From 3749f6f435e79624f72841e866245d84195551cd Mon Sep 17 00:00:00 2001 From: Harinarayanan K K Date: Thu, 18 Jun 2015 16:03:38 +0530 Subject: Added support for Main Profile toolsets in encoder. Added support for CABAC entropy coding. Added support for B slices. Fixed an issue in rate control constant QP mode. Change-Id: Ib759d35e8e943f941aa9b8bbff0362d92c619994 --- common/ih264_defs.h | 3 + common/ih264_structs.h | 10 +- common/ithread.c | 2 +- encoder.mk | 3 + encoder/arm/ih264e_function_selector.c | 7 +- encoder/arm/ih264e_function_selector_a9q.c | 291 ++-- encoder/arm/ih264e_function_selector_av8.c | 21 +- encoder/ih264e_api.c | 119 +- encoder/ih264e_bitstream.c | 3 +- encoder/ih264e_bitstream.h | 65 +- encoder/ih264e_cabac.c | 819 +++++++++ encoder/ih264e_cabac.h | 452 +++++ encoder/ih264e_cabac_encode.c | 2391 ++++++++++++++++++++++++++ encoder/ih264e_cabac_init.c | 226 +++ encoder/ih264e_cabac_structs.h | 221 +++ encoder/ih264e_cavlc.c | 335 +++- encoder/ih264e_cavlc.h | 44 +- encoder/ih264e_core_coding.c | 5 +- encoder/ih264e_deblk.c | 178 +- encoder/ih264e_defs.h | 52 +- encoder/ih264e_encode.c | 436 +++-- encoder/ih264e_encode_header.c | 79 +- encoder/ih264e_fmt_conv.c | 5 +- encoder/ih264e_function_selector_generic.c | 30 +- encoder/ih264e_globals.c | 187 +- encoder/ih264e_half_pel.c | 1 - encoder/ih264e_intra_modes_eval.c | 9 +- encoder/ih264e_mc.c | 355 ++-- encoder/ih264e_me.c | 1741 ++++++++++++++----- encoder/ih264e_me.h | 441 +++-- encoder/ih264e_modify_frm_rate.c | 3 + encoder/ih264e_process.c | 430 +++-- encoder/ih264e_process.h | 30 - encoder/ih264e_rate_control.c | 141 +- encoder/ih264e_rate_control.h | 35 +- encoder/ih264e_rc_mem_interface.c | 8 +- encoder/ih264e_structs.h | 169 +- encoder/ih264e_time_stamp.c | 3 + encoder/ih264e_utils.c | 559 ++++-- encoder/ih264e_utils.h | 30 + encoder/ime.c | 238 +-- encoder/ime.h | 51 +- encoder/ime_defs.h | 3 + encoder/ime_distortion_metrics.c | 1 + encoder/ime_distortion_metrics.h | 2 +- encoder/ime_structs.h | 37 +- encoder/irc_common.h | 2 +- encoder/irc_picture_type.c | 78 +- encoder/irc_picture_type.h | 1 + encoder/irc_rate_control_api.c | 4 +- encoder/irc_rate_control_api.h | 1 + encoder/ive2.h | 17 +- encoder/mips/ih264e_function_selector.c | 5 +- encoder/x86/ih264e_function_selector.c | 7 +- encoder/x86/ih264e_function_selector_sse42.c | 13 +- encoder/x86/ih264e_function_selector_ssse3.c | 13 +- encoder/x86/ih264e_half_pel_ssse3.c | 1 - encoder/x86/ih264e_intra_modes_eval_ssse3.c | 6 +- test/Android.mk | 1 - test/encoder/app.h | 15 +- test/encoder/main.c | 330 ++-- test/encoder/recon.c | 5 +- 62 files changed, 8627 insertions(+), 2143 deletions(-) create mode 100644 encoder/ih264e_cabac.c create mode 100644 encoder/ih264e_cabac.h create mode 100644 encoder/ih264e_cabac_encode.c create mode 100644 encoder/ih264e_cabac_init.c create mode 100644 encoder/ih264e_cabac_structs.h diff --git a/common/ih264_defs.h b/common/ih264_defs.h index 6bf74d1..b26a5a4 100644 --- a/common/ih264_defs.h +++ b/common/ih264_defs.h @@ -270,6 +270,9 @@ typedef enum P8x8 = 6, PSKIP = 7, IPCM = 8, + B16x16 = 9, + BSKIP = 10, + BDIRECT = 11, MAX_MBTYPES, }MBTYPES_T; diff --git a/common/ih264_structs.h b/common/ih264_structs.h index fa4e142..0a7c940 100644 --- a/common/ih264_structs.h +++ b/common/ih264_structs.h @@ -1353,6 +1353,11 @@ typedef struct */ UWORD8 u1_ref_idx_reordering_flag_l0; + /* + * ref_pic_list_reordering_flag_l1 + */ + UWORD8 u1_ref_idx_reordering_flag_l1; + /** * Reference prediction list modification */ @@ -1368,11 +1373,6 @@ typedef struct */ ref_list_t as_ref_pic_list1[MAX_DPB_SIZE]; - /* - * weighted_bipred_idc - */ - WORD8 u1_weighted_bipred_idc; - /* * no_output_of_prior_pics_flag */ diff --git a/common/ithread.c b/common/ithread.c index f7335d9..25a8cd0 100644 --- a/common/ithread.c +++ b/common/ithread.c @@ -71,7 +71,7 @@ #endif -#if defined(X86_MSVC) || defined (X86_MINGW) +#ifdef X86_MSVC #include #define SEM_MAX_COUNT 100 diff --git a/encoder.mk b/encoder.mk index 5829118..11bd802 100644 --- a/encoder.mk +++ b/encoder.mk @@ -53,6 +53,9 @@ libavce_srcs_c += encoder/ih264e_utils.c libavce_srcs_c += encoder/ih264e_version.c libavce_srcs_c += encoder/ih264e_bitstream.c libavce_srcs_c += encoder/ih264e_cavlc.c +libavce_srcs_c += encoder/ih264e_cabac_init.c +libavce_srcs_c += encoder/ih264e_cabac.c +libavce_srcs_c += encoder/ih264e_cabac_encode.c libavce_srcs_c += encoder/ih264e_encode_header.c libavce_srcs_c += encoder/ih264e_function_selector_generic.c libavce_srcs_c += encoder/ih264e_fmt_conv.c diff --git a/encoder/arm/ih264e_function_selector.c b/encoder/arm/ih264e_function_selector.c index e4f67a0..0486200 100644 --- a/encoder/arm/ih264e_function_selector.c +++ b/encoder/arm/ih264e_function_selector.c @@ -58,8 +58,8 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -68,14 +68,15 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" - +#include "ih264_cabac_tables.h" #include "ih264_macros.h" #include "ih264_platform_macros.h" -#include "ih264e_defs.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" +#include "ih264e_cabac.h" #include "ih264e_platform_macros.h" /** diff --git a/encoder/arm/ih264e_function_selector_a9q.c b/encoder/arm/ih264e_function_selector_a9q.c index 8b2879b..30d7795 100644 --- a/encoder/arm/ih264e_function_selector_a9q.c +++ b/encoder/arm/ih264e_function_selector_a9q.c @@ -58,8 +58,8 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -68,23 +68,18 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" - +#include "ih264_cabac_tables.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_platform_macros.h" -#include "ih264_intra_pred_filters.h" -#include "ih264_trans_quant_itrans_iquant.h" -#include "ih264e_defs.h" -#include "ih264e_structs.h" -#include "ih264_deblk_edge_filters.h" +#include "ih264e_cabac.h" #include "ih264e_core_coding.h" #include "ih264_cavlc_tables.h" #include "ih264e_cavlc.h" -#include "ih264_padding.h" #include "ih264e_intra_modes_eval.h" -#include "ih264_mem_fns.h" #include "ih264e_fmt_conv.h" #include "ih264e_half_pel.h" @@ -109,144 +104,144 @@ void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec) { WORD32 i= 0; - - /* curr proc ctxt */ - process_ctxt_t *ps_proc = NULL; - me_ctxt_t *ps_me_ctxt = NULL; - - /* Init function pointers for intra pred leaf level functions luma - * Intra 16x16 */ - ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q; - ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q; - ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q; - ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q; - - /* Init function pointers for intra pred leaf level functions luma - * Intra 4x4 */ - ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q; - ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q; - ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q; - ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q; - ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q; - ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q; - ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q; - ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q; - ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q; - - /* Init function pointers for intra pred leaf level functions luma - * Intra 8x8 */ - ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q; - ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q; - ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q; - ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q; - ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q; - ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q; - ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q; - ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q; - - /* Init function pointers for intra pred leaf level functions chroma - * Intra 8x8 */ - ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q; - ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q; - ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q; - ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q; - - /* Init forward transform fn ptr */ - ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8; - ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_a9; - ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_a9; - ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_a9; - ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_a9; - - /* Init inverse transform fn ptr */ - ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8; - ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_a9; - ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_a9; - ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_a9; - ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_a9; - ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9; - ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9; - ps_codec->pf_interleave_copy = ih264_interleave_copy_a9; - - /* Init fn ptr luma core coding */ - ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16; - ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4; - ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16; - - /* Init fn ptr chroma core coding */ - ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8; - ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8; - - /* Init fn ptr luma deblocking */ - ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9; - ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9; - ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9; - ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9; - - /* Init fn ptr chroma deblocking */ - ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9; - ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9; - ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9; - ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9; - - /* write mb syntax layer */ - ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb; - ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb; - - /* Padding Functions */ - ps_codec->pf_pad_top = ih264_pad_top_a9q; - ps_codec->pf_pad_bottom = ih264_pad_bottom; - ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q; - ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q; - ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q; - ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q; - - /* Inter pred leaf level functions */ - ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q; - ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q; - ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q; - ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q; - ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q; - - /* sad me level functions */ - ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q; - ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q; - ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q; - - /* memor handling operations */ - ps_codec->pf_mem_cpy = ih264_memcpy_a9q; - ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q; - ps_codec->pf_mem_set = ih264_memset_a9q; - ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_a9q; - - /* sad me level functions */ - for(i = 0; i < (MAX_PROCESS_CTXT); i++) - { - ps_proc = &ps_codec->as_process[i]; - ps_me_ctxt = &ps_proc->s_me_ctxt; - ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q; - ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q; - ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q; - ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q; - ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q; - ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q; - ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q; - ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q; - } - - /* intra mode eval -encoder level function */ - ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q; - ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q; - ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q; - - /* csc */ - ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp_a9q; - ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp_a9q; - - /* Halp pel generation function - encoder level*/ - ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_a9q; - ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_a9q; - - return ; + /* curr proc ctxt */ + process_ctxt_t *ps_proc = NULL; + me_ctxt_t *ps_me_ctxt = NULL; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 16x16 */ + ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q; + ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q; + ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q; + ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 4x4 */ + ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q; + ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q; + ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q; + ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q; + ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q; + ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q; + ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q; + ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q; + ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q; + + /* Init function pointers for intra pred leaf level functions luma + * Intra 8x8 */ + ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q; + ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q; + ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q; + ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q; + ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q; + ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q; + ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q; + ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q; + + /* Init function pointers for intra pred leaf level functions chroma + * Intra 8x8 */ + ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q; + ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q; + ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q; + ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q; + + /* Init forward transform fn ptr */ + ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8; + ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_a9; + ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_a9; + ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_a9; + ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_a9; + + /* Init inverse transform fn ptr */ + ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8; + ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_a9; + ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_a9; + ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_a9; + ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_a9; + ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9; + ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9; + ps_codec->pf_interleave_copy = ih264_interleave_copy_a9; + + /* Init fn ptr luma core coding */ + ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16; + ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4; + ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16; + + /* Init fn ptr chroma core coding */ + ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8; + ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8; + + /* Init fn ptr luma deblocking */ + ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9; + ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9; + ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9; + ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9; + + /* Init fn ptr chroma deblocking */ + ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9; + ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9; + ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9; + ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9; + + /* write mb syntax layer */ + ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = ih264e_write_islice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = ih264e_write_pslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = ih264e_write_islice_mb_cabac; + ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = ih264e_write_pslice_mb_cabac; + + /* Padding Functions */ + ps_codec->pf_pad_top = ih264_pad_top_a9q; + ps_codec->pf_pad_bottom = ih264_pad_bottom; + ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q; + ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q; + ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q; + ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q; + + /* Inter pred leaf level functions */ + ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q; + ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q; + ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q; + ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q; + ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q; + + /* sad me level functions */ + ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q; + ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q; + ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q; + + /* memor handling operations */ + ps_codec->pf_mem_cpy = ih264_memcpy_a9q; + ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q; + ps_codec->pf_mem_set = ih264_memset_a9q; + ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_a9q; + + /* sad me level functions */ + for (i = 0; i < (MAX_PROCESS_CTXT); i++) + { + ps_proc = &ps_codec->as_process[i]; + ps_me_ctxt = &ps_proc->s_me_ctxt; + ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q; + ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q; + ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q; + ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q; + ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q; + ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q; + ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q; + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q; } + /* intra mode eval -encoder level function */ + ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q; + ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q; + ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q; + + /* csc */ + ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp_a9q; + ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp_a9q; + + /* Halp pel generation function - encoder level */ + ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_a9q; + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_a9q; + +} + diff --git a/encoder/arm/ih264e_function_selector_av8.c b/encoder/arm/ih264e_function_selector_av8.c index 173c2d5..1679af3 100644 --- a/encoder/arm/ih264e_function_selector_av8.c +++ b/encoder/arm/ih264e_function_selector_av8.c @@ -62,8 +62,8 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -72,23 +72,18 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" - +#include "ih264_cabac_tables.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_platform_macros.h" -#include "ih264_intra_pred_filters.h" -#include "ih264_trans_quant_itrans_iquant.h" -#include "ih264e_defs.h" -#include "ih264e_structs.h" -#include "ih264_deblk_edge_filters.h" +#include "ih264e_cabac.h" #include "ih264e_core_coding.h" #include "ih264_cavlc_tables.h" #include "ih264e_cavlc.h" -#include "ih264_padding.h" #include "ih264e_intra_modes_eval.h" -#include "ih264_mem_fns.h" #include "ih264e_fmt_conv.h" #include "ih264e_half_pel.h" @@ -197,8 +192,12 @@ void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec) ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8; /* write mb syntax layer */ - ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb; - ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb; + /* write mb syntax layer */ + ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = ih264e_write_islice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = ih264e_write_pslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = ih264e_write_bslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = ih264e_write_islice_mb_cabac; + ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = ih264e_write_pslice_mb_cabac; /* Padding Functions */ ps_codec->pf_pad_top = ih264_pad_top_av8; diff --git a/encoder/ih264e_api.c b/encoder/ih264e_api.c index 8a478bb..9d6c9ef 100644 --- a/encoder/ih264e_api.c +++ b/encoder/ih264e_api.c @@ -93,6 +93,7 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ih264_macros.h" #include "ih264e_defs.h" #include "ih264e_globals.h" @@ -109,10 +110,10 @@ #include "ime_defs.h" #include "ime_distortion_metrics.h" #include "ime_structs.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_utils.h" #include "ih264e_core_coding.h" -#include "ih264_buf_mgr.h" #include "ih264_platform_macros.h" #include "ih264e_platform_macros.h" #include "ih264_list.h" @@ -399,7 +400,8 @@ static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, return (IV_FAIL); } - if (ps_ip->s_ive_ip.u4_max_ref_cnt != 1) + if (ps_ip->s_ive_ip.u4_max_ref_cnt > MAX_REF_PIC_CNT || + ps_ip->s_ive_ip.u4_max_ref_cnt < MIN_REF_PIC_CNT) { ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED; @@ -482,7 +484,15 @@ static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, return (IV_FAIL); } - if (ps_ip->s_ive_ip.u4_max_num_bframes != 0) + if (ps_ip->s_ive_ip.u4_num_bframes > MAX_NUM_BFRAMES) + { + ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; + ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED; + return (IV_FAIL); + } + + if (ps_ip->s_ive_ip.u4_num_bframes + && (ps_ip->s_ive_ip.u4_max_ref_cnt < 2)) { ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM; ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED; @@ -1472,15 +1482,6 @@ static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle, return IV_FAIL; } - if (ps_ip->s_ive_ip.u4_num_b_frames != 0) - { - ps_op->s_ive_op.u4_error_code |= 1 - << IVE_UNSUPPORTEDPARAM; - ps_op->s_ive_op.u4_error_code |= - IH264E_BFRAMES_NOT_SUPPORTED; - return IV_FAIL; - } - break; } @@ -2091,7 +2092,6 @@ IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec, ps_curr_cfg->u4_idr_frm_interval = ps_cfg->u4_idr_frm_interval; - ps_curr_cfg->u4_num_b_frames = ps_cfg->u4_num_b_frames; } else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DEBLOCK_PARAMS) { @@ -2188,6 +2188,7 @@ IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec, else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_PROFILE_PARAMS) { ps_codec->s_cfg.e_profile = ps_cfg->e_profile; + ps_codec->s_cfg.u4_entropy_coding_mode = ps_cfg->u4_entropy_coding_mode; } else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_NUM_CORES) { @@ -2259,8 +2260,9 @@ IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec, ps_codec->s_cfg.u4_target_bitrate, ps_codec->s_cfg.u4_max_bitrate, ps_codec->s_cfg.u4_vbv_buffer_delay, - ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp, - H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp, + ps_codec->s_cfg.u4_i_frm_interval, + ps_codec->s_cfg.u4_num_bframes + 1, au1_init_qp, + ps_codec->s_cfg.u4_num_bframes + 2, au1_min_max_qp, ps_codec->s_cfg.u4_max_level); } @@ -2302,7 +2304,7 @@ static WORD32 ih264e_set_default_params(cfg_params_t *ps_cfg) ps_cfg->e_rc_mode = DEFAULT_RC; ps_cfg->u4_max_framerate = DEFAULT_MAX_FRAMERATE; ps_cfg->u4_max_bitrate = DEFAULT_MAX_BITRATE; - ps_cfg->u4_max_num_bframes = 0; + ps_cfg->u4_num_bframes = DEFAULT_MAX_NUM_BFRAMES; ps_cfg->e_content_type = IV_PROGRESSIVE; ps_cfg->u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X; ps_cfg->u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y; @@ -2350,7 +2352,6 @@ static WORD32 ih264e_set_default_params(cfg_params_t *ps_cfg) ps_cfg->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y; ps_cfg->u4_i_frm_interval = DEFAULT_I_INTERVAL; ps_cfg->u4_idr_frm_interval = DEFAULT_IDR_INTERVAL; - ps_cfg->u4_num_b_frames = DEFAULT_B_FRAMES; ps_cfg->u4_disable_deblock_level = DEFAULT_DISABLE_DEBLK_LEVEL; ps_cfg->e_profile = DEFAULT_PROFILE; ps_cfg->u4_timestamp_low = 0; @@ -2396,7 +2397,7 @@ static WORD32 ih264e_init(codec_t *ps_codec) WORD32 i; /* coded pic count */ - ps_codec->i4_coded_pic_cnt = 0; + ps_codec->i4_poc = 0; /* Number of API calls to encode are made */ ps_codec->i4_encode_api_call_cnt = -1; @@ -2422,7 +2423,7 @@ static WORD32 ih264e_init(codec_t *ps_codec) ps_codec->i4_disable_deblk_pic_cnt = 0; /* frame num */ - ps_codec->i4_frame_num = -1; + ps_codec->i4_frame_num = 0; /* set the current frame type to I frame, since we are going to start encoding*/ ps_codec->force_curr_frame_type = IV_NA_FRAME; @@ -2738,11 +2739,31 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op) } DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CODEC, ps_mem_rec->u4_mem_size); + /************************************************************************ + * Request memory for CABAC context * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CABAC]; + { + ps_mem_rec->u4_mem_size = sizeof(cabac_ctxt_t); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CABAC, ps_mem_rec->u4_mem_size); + + /************************************************************************ + * Request memory for CABAC MB info * + ***********************************************************************/ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CABAC_MB_INFO]; + { + ps_mem_rec->u4_mem_size = ((max_mb_cols + 1) + 1) + * sizeof(mb_info_ctxt_t); + } + DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CABAC_MB_INFO, ps_mem_rec->u4_mem_size); + + /************************************************************************ * Request memory for entropy context * * In multi core encoding, each row is assumed to be launched on a * * thread. The rows below can only start after its neighbors are coded * - * The status of an mb coded/uncoded is signaled via entropy map. * + * The status of an mb coded/uncoded is signaled via entropy map. * * 1. One word32 to store skip run cnt * * 2. mb entropy map (mb status entropy coded/uncoded). The size* * of the entropy map is max mb cols. Further allocate one * @@ -3177,6 +3198,7 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op) ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH]; { WORD32 total_size = 0; + WORD32 i4_tmp_size; /* size to hold prediction buffer */ total_size += sizeof(UWORD8) * 16 * 16; @@ -3215,14 +3237,8 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op) total_size = ALIGN64(total_size); /* Buffers for holding half_x , half_y and half_xy planes */ - total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); - total_size = ALIGN64(total_size); - - total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); - total_size = ALIGN64(total_size); - - total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); - total_size = ALIGN64(total_size); + i4_tmp_size = sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT); + total_size += (ALIGN64(i4_tmp_size) * SUBPEL_BUFF_CNT); /* Allocate for each process thread */ total_size *= MAX_PROCESS_CTXT; @@ -3517,6 +3533,9 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj, /* codec variables */ codec_t * ps_codec; + cabac_ctxt_t *ps_cabac; + mb_info_ctxt_t *ps_mb_map_ctxt_inc; + cfg_params_t *ps_cfg; /* frame dimensions */ @@ -3524,7 +3543,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj, WORD32 max_mb_rows, max_mb_cols, max_mb_cnt; /* temp var */ - WORD32 i; + WORD32 i, j; WORD32 status = IV_SUCCESS; /* frame dimensions */ @@ -3543,11 +3562,23 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj, ps_codec_obj->pv_codec_handle = ps_mem_rec->pv_base; ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle); } + /* Init mem records_cabac ctxt */ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CABAC]; + { + ps_cabac = (cabac_ctxt_t *)(ps_mem_rec->pv_base); + } + + /* Init mem records mb info array for CABAC */ + ps_mem_rec = &ps_mem_rec_base[MEM_REC_CABAC_MB_INFO]; + { + ps_mb_map_ctxt_inc = (mb_info_ctxt_t *)(ps_mem_rec->pv_base); + } /* Note this memset can not be done in init() call, since init will called during reset as well. And calling this during reset will mean all pointers need to reinitialized */ memset(ps_codec, 0, sizeof(codec_t)); + memset(ps_cabac, 0, sizeof(cabac_ctxt_t)); /* Set default Config Params */ ps_cfg = &ps_codec->s_cfg; @@ -3565,7 +3596,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj, ps_cfg->e_recon_color_fmt = ps_ip->s_ive_ip.e_recon_color_fmt; ps_cfg->u4_max_framerate = ps_ip->s_ive_ip.u4_max_framerate; ps_cfg->u4_max_bitrate = ps_ip->s_ive_ip.u4_max_bitrate; - ps_cfg->u4_max_num_bframes = ps_ip->s_ive_ip.u4_max_num_bframes; + ps_cfg->u4_num_bframes = ps_ip->s_ive_ip.u4_num_bframes; ps_cfg->e_content_type = ps_ip->s_ive_ip.e_content_type; ps_cfg->u4_max_srch_rng_x = ps_ip->s_ive_ip.u4_max_srch_rng_x; ps_cfg->u4_max_srch_rng_y = ps_ip->s_ive_ip.u4_max_srch_rng_y; @@ -3652,6 +3683,8 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj, size += (max_mb_cols * 4 * sizeof(UWORD8)); size = ALIGN128(size); offset = size; + /* cabac Context */ + ps_codec->as_process[i].s_entropy.ps_cabac = ps_cabac; } else { @@ -3693,8 +3726,12 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj, (void *) (pu1_buf + size); size += (max_mb_cols * 4 * sizeof(UWORD8)); size = ALIGN128(size); + /* cabac Context */ + ps_codec->as_process[i].s_entropy.ps_cabac = ps_cabac; } } + ps_codec->as_process[0].s_entropy.ps_cabac->ps_mb_map_ctxt_inc_base = + ps_mb_map_ctxt_inc; } ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA]; @@ -4102,18 +4139,11 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj, size += size_inv; size = ALIGN64(size); - /* Buffers for holding half_x , half_y and half_xy values */ - ps_codec->as_process[i].pu1_half_x = (void *) (pu1_buf + size); - size += size_hp; - size = ALIGN64(size); - - ps_codec->as_process[i].pu1_half_y = (void *) (pu1_buf + size); - size += size_hp; - size = ALIGN64(size); - - ps_codec->as_process[i].pu1_half_xy = (void *) (pu1_buf + size); - size += size_hp; - size = ALIGN64(size); + for (j = 0; j < SUBPEL_BUFF_CNT; j++) + { + ps_codec->as_process[i].apu1_subpel_buffs[j] = (pu1_buf + size); + size += ALIGN64(size_hp); + } } } @@ -5073,7 +5103,6 @@ static IV_STATUS_T ih264_set_gop_params(void *pv_api_ip, ps_cfg->u4_i_frm_interval = ps_ip->s_ive_ip.u4_i_frm_interval; ps_cfg->u4_idr_frm_interval = ps_ip->s_ive_ip.u4_idr_frm_interval; - ps_cfg->u4_num_b_frames = ps_ip->s_ive_ip.u4_num_b_frames; ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; @@ -5117,6 +5146,8 @@ static IV_STATUS_T ih264_set_profile_params(void *pv_api_ip, ps_cfg->e_profile = ps_ip->s_ive_ip.e_profile; + ps_cfg->u4_entropy_coding_mode = ps_ip->s_ive_ip.u4_entropy_coding_mode; + ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high; ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low; @@ -5297,7 +5328,7 @@ static WORD32 ih264e_ctl(iv_obj_t *ps_codec_obj, IVE_CONTROL_API_COMMAND_TYPE_T sub_cmd = ps_ctl_ip->s_ive_ip.e_sub_cmd; /* error status */ - IV_STATUS_T ret = 0; + IV_STATUS_T ret = IV_SUCCESS; /* temp var */ WORD32 i; diff --git a/encoder/ih264e_bitstream.c b/encoder/ih264e_bitstream.c index e5bfbe4..bfe8f9e 100644 --- a/encoder/ih264e_bitstream.c +++ b/encoder/ih264e_bitstream.c @@ -97,7 +97,7 @@ IH264E_ERROR_T ih264e_bitstrm_init(bitstrm_t *ps_bitstrm, UWORD32 u4_max_bitstrm_size) { ps_bitstrm->pu1_strm_buffer = pu1_bitstrm_buf; - ps_bitstrm->u4_max_strm_size = u4_max_bitstrm_size; + ps_bitstrm->u4_max_strm_size = MAX(u4_max_bitstrm_size, MIN_STREAM_SIZE); /* Default init values for other members of bitstream context */ ps_bitstrm->u4_strm_buf_offset = 0; @@ -151,7 +151,6 @@ IH264E_ERROR_T ih264e_put_bits(bitstrm_t *ps_bitstrm, if(code_len < WORD_SIZE) ASSERT((u4_code_val >> code_len) == 0); - /* sanity check on the bitstream engine state */ ASSERT(bits_left_in_cw > 0 && bits_left_in_cw <= WORD_SIZE); diff --git a/encoder/ih264e_bitstream.h b/encoder/ih264e_bitstream.h index 21360cc..d5c8d89 100644 --- a/encoder/ih264e_bitstream.h +++ b/encoder/ih264e_bitstream.h @@ -65,6 +65,14 @@ #define EPB_BYTE 0x03 +/** +****************************************************************************** + * @brief Stream buffer allocated per frame should be atleast MIN_STREAM_SIZE +****************************************************************************** + */ +#define MIN_STREAM_SIZE 0x20000 + + /*****************************************************************************/ /* Function Macros */ /*****************************************************************************/ @@ -106,12 +114,12 @@ * @brief returns bits required to code a value ****************************************************************************** */ -#define UE_LENGTH(bits,x) \ -{ \ - UWORD32 r_bit; \ - GETRANGE(r_bit,x+1) \ - bits =(((r_bit - 1) << 1)+1); \ -} \ +#define UE_LENGTH(bits,x) \ +{ \ + UWORD32 r_bit; \ + GETRANGE(r_bit,x+1) \ + bits =(((r_bit - 1) << 1)+1);\ +} \ /** ****************************************************************************** @@ -140,6 +148,51 @@ */ #define BYTE_ALIGNMENT(ps_bitstrm) ih264e_put_rbsp_trailing_bits(ps_bitstrm) +/** +****************************************************************************** + * @brief Gets number of bits coded +****************************************************************************** + */ + +#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) \ + + 32 - ps_bitstream->i4_bits_left_in_cw); + + + +/** +****************************************************************************** + * @macro Align bitstream to byte - Remainig bits are filled with '1' +****************************************************************************** +*/ +#define BITSTREAM_BYTE_ALIGN(ps_bitstrm) \ + if (ps_bitstrm->i4_bits_left_in_cw & 0x07) \ + { \ + const WORD32 len = (WORD32)((ps_bitstrm->i4_bits_left_in_cw) & 0x07);\ + ih264e_put_bits(ps_bitstrm, (UWORD32)((1 << len) - 1), len); \ + } + + +/** +****************************************************************************** +* flush the bits in cur word byte by byte and copy to stream * +* (current word is assumed to be byte aligned) * +****************************************************************************** +*/ +#define BITSTREAM_FLUSH(ps_bitstrm) \ +{ \ + WORD32 i; \ + for (i = WORD_SIZE; i > ps_bitstrm->i4_bits_left_in_cw; i -= 8) \ + { \ + UWORD8 u1_next_byte = (ps_bitstrm->u4_cur_word >> (i - 8)) & 0xFF; \ + PUTBYTE_EPB(ps_bitstrm->pu1_strm_buffer, ps_bitstrm->u4_strm_buf_offset,\ + u1_next_byte, ps_bitstrm->i4_zero_bytes_run); \ + } \ + ps_bitstrm->u4_cur_word = 0; \ + ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE; \ +} \ + + + /*****************************************************************************/ /* Structures */ diff --git a/encoder/ih264e_cabac.c b/encoder/ih264e_cabac.c new file mode 100644 index 0000000..64ff7cd --- /dev/null +++ b/encoder/ih264e_cabac.c @@ -0,0 +1,819 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_cabac.c +* +* @brief +* Contains all leaf level functions for CABAC entropy coding. +* +* +* @author +* Doney Alex +* +* @par List of Functions: +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264e_defs.h" +#include "ih264_macros.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_platform_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" +#include "ih264e_structs.h" +#include "ih264e_cabac.h" +#include "ih264e_encode_header.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" + + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + + +/** + ******************************************************************************* + * + * @brief + * k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated + * unary/ k-th order Exp-Golomb (UEGk) binarization process, + * where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402 + * + * @param[in] i2_sufs + * Suffix bit string + * + * @param[in] pi1_bins_len + * Pointer to length of tthe string + * + * @returns Binarized value + * + * @remarks + * None + * + ******************************************************************************* + */ +UWORD32 ih264e_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len) +{ + UWORD32 u4_bins; + WORD32 i4_len; + WORD16 x, y; + + x = i2_sufs + 1; + i4_len = CLZ(x); + i4_len = 31 - i4_len; + y = 1 << i4_len; + y = y - 1; + i2_sufs = i2_sufs - y; + u4_bins = y << 1; + u4_bins = u4_bins << i4_len; + u4_bins = u4_bins + i2_sufs; + + REV(u4_bins, u4_bins); + u4_bins = u4_bins >> (31 - 2 * i4_len); + (*pi1_bins_len) = 2 * i4_len + 1; + + return (u4_bins); +} + + +/** + ******************************************************************************* + * + * @brief + * Get cabac context for the MB :calculates the pointers to Top and left + * cabac neighbor context depending upon neighbor availability. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @param[in] u4_mb_type + * Type of MB + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264e_get_cabac_context(entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type) +{ + + /* CABAC context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + mb_info_ctxt_t *ps_ctx_inc_mb_map; + cab_csbp_t *ps_lft_csbp; + + WORD32 i4_lft_avail, i4_top_avail, i4_is_intra; + WORD32 i4_mb_x, i4_mb_y; + UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx; + + i4_is_intra = ((u4_mb_type == I16x16) || (u4_mb_type == I8x8) + || (u4_mb_type == I4x4)); + + /* derive neighbor availability */ + i4_mb_x = ps_ent_ctxt->i4_mb_x; + i4_mb_y = ps_ent_ctxt->i4_mb_y; + pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs); + /* left macroblock availability */ + i4_lft_avail = (i4_mb_x == 0 + || (pu1_slice_idx[i4_mb_x - 1] != pu1_slice_idx[i4_mb_x])) ? + 0 : 1; + /* top macroblock availability */ + i4_top_avail = (i4_mb_y == 0 + || (pu1_slice_idx[i4_mb_x - ps_ent_ctxt->i4_wd_mbs] + != pu1_slice_idx[i4_mb_x])) ? 0 : 1; + i4_mb_x = ps_ent_ctxt->i4_mb_x; + ps_ctx_inc_mb_map = ps_cabac_ctxt->ps_mb_map_ctxt_inc; + ps_cabac_ctxt->ps_curr_ctxt_mb_info = ps_ctx_inc_mb_map + i4_mb_x; + ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info; + ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info; + ps_lft_csbp = ps_cabac_ctxt->ps_lft_csbp; + ps_cabac_ctxt->pu1_left_y_ac_csbp = &ps_lft_csbp->u1_y_ac_csbp_top_mb; + ps_cabac_ctxt->pu1_left_uv_ac_csbp = &ps_lft_csbp->u1_uv_ac_csbp_top_mb; + ps_cabac_ctxt->pu1_left_yuv_dc_csbp = &ps_lft_csbp->u1_yuv_dc_csbp_top_mb; + ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc = + &ps_cabac_ctxt->i1_left_ref_idx_ctx_inc_arr[0][0]; + ps_cabac_ctxt->pu1_left_mv_ctxt_inc = + ps_cabac_ctxt->u1_left_mv_ctxt_inc_arr[0]; + + if (i4_lft_avail) + ps_cabac_ctxt->ps_left_ctxt_mb_info = + ps_cabac_ctxt->ps_curr_ctxt_mb_info - 1; + if (i4_top_avail) + ps_cabac_ctxt->ps_top_ctxt_mb_info = + ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + if (!i4_lft_avail) + { + UWORD8 u1_def_csbp = i4_is_intra ? 0xf : 0; + *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = u1_def_csbp; + *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = u1_def_csbp; + *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = u1_def_csbp; + *((UWORD32 *) ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc) = 0; + memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16); + } + if (!i4_top_avail) + { + UWORD8 u1_def_csbp = i4_is_intra ? 0xff : 0; + ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_ac_csbp = u1_def_csbp; + ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_dc_csbp = u1_def_csbp; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[0] = + ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[1] = + ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[2] = + ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[3] = 0; + memset(ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv, 0, 16); + } + +} + + + +/** + ******************************************************************************* + * @brief + * flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402). + * + * @param[in] ps_cabac_ctxt + * pointer to cabac context (handle) + * + * @returns success or failure error code + * + * @remarks + * None + * + ******************************************************************************* + */ +WORD32 ih264e_cabac_flush(cabac_ctxt_t *ps_cabac_ctxt) +{ + + /* bit stream ptr */ + bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm; + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env); + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen; + UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer; + UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset; + WORD32 zero_run = ps_stream->i4_zero_bytes_run; + UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes; + + /************************************************************************/ + /* Insert the carry (propogated in previous byte) along with */ + /* outstanding bytes (if any) and flush remaining bits */ + /************************************************************************/ + { + /* carry = 1 => putbit(1); carry propogated due to L renorm */ + WORD32 carry = (u4_low >> (u4_bits_gen + CABAC_BITS)) & 0x1; + WORD32 last_byte; + WORD32 bits_left; + WORD32 rem_bits; + + /*********************************************************************/ + /* Bitstream overflow check */ + /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */ + /*********************************************************************/ + if ((u4_strm_buf_offset + u4_out_standing_bytes + 1) + >= ps_stream->u4_max_strm_size) + { + /* return without corrupting the buffer beyond its size */ + return (IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + + if (carry) + { + /* CORNER CASE: if the previous data is 0x000003, then EPB will be inserted + and the data will become 0x00000303 and if the carry is present, it will + be added with the last byte and it will become 0x00000304 which is not correct + as per standard */ + /* so check for previous four bytes and if it is equal to 0x00000303 + then subtract u4_strm_buf_offset by 1 */ + if (pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03 + && pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03 + && pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00 + && pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00) + { + u4_strm_buf_offset -= 1; + } + /* previous byte carry add will not result in overflow to */ + /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */ + pu1_strm_buf[u4_strm_buf_offset - 1] += carry; + zero_run = 0; + } + + /* Insert outstanding bytes (if any) */ + while (u4_out_standing_bytes) + { + UWORD8 u1_0_or_ff = carry ? 0 : 0xFF; + + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run); + u4_out_standing_bytes--; + } + + /* clear the carry in low */ + u4_low &= ((1 << (u4_bits_gen + CABAC_BITS)) - 1); + + /* extract the remaining bits; */ + /* includes additional msb bit of low as per Figure 9-12 */ + bits_left = u4_bits_gen + 1; + rem_bits = (u4_low >> (u4_bits_gen + CABAC_BITS - bits_left)); + + if (bits_left >= 8) + { + last_byte = (rem_bits >> (bits_left - 8)) & 0xFF; + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run); + bits_left -= 8; + } + + /* insert last byte along with rbsp stop bit(1) and 0's in the end */ + last_byte = (rem_bits << (8 - bits_left)) + | (1 << (7 - bits_left) | (1 << (7 - bits_left - 1))); + last_byte &= 0xFF; + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run); + + /* update the state variables and return success */ + ps_stream->u4_strm_buf_offset = u4_strm_buf_offset; + ps_stream->i4_zero_bytes_run = 0; + /* Default init values for scratch variables of bitstream context */ + ps_stream->u4_cur_word = 0; + ps_stream->i4_bits_left_in_cw = WORD_SIZE; + + return (IH264E_SUCCESS); + } +} + +/** + ****************************************************************************** + * + * @brief Puts new byte (and outstanding bytes) into bitstream after cabac + * renormalization + * + * @par Description + * 1. Extract the leading byte of low(L) + * 2. If leading byte=0xff increment outstanding bytes and return + * (as the actual bits depend on carry propogation later) + * 3. If leading byte is not 0xff check for any carry propogation + * 4. Insert the carry (propogated in previous byte) along with outstanding + * bytes (if any) and leading byte + * + * + * @param[in] ps_cabac_ctxt + * pointer to cabac context (handle) + * + * @return + * + ****************************************************************************** + */ +void ih264e_cabac_put_byte(cabac_ctxt_t *ps_cabac_ctxt) +{ + + /* bit stream ptr */ + bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm; + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env); + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen; + WORD32 lead_byte = u4_low >> (u4_bits_gen + CABAC_BITS - 8); + + /* Sanity checks */ + ASSERT((ps_cab_enc_env->u4_code_int_range >= 256) + && (ps_cab_enc_env->u4_code_int_range < 512)); + ASSERT((u4_bits_gen >= 8)); + + /* update bits generated and low after extracting leading byte */ + u4_bits_gen -= 8; + ps_cab_enc_env->u4_code_int_low &= ((1 << (CABAC_BITS + u4_bits_gen)) - 1); + ps_cab_enc_env->u4_bits_gen = u4_bits_gen; + + /************************************************************************/ + /* 1. Extract the leading byte of low(L) */ + /* 2. If leading byte=0xff increment outstanding bytes and return */ + /* (as the actual bits depend on carry propogation later) */ + /* 3. If leading byte is not 0xff check for any carry propogation */ + /* 4. Insert the carry (propogated in previous byte) along with */ + /* outstanding bytes (if any) and leading byte */ + /************************************************************************/ + if (lead_byte == 0xff) + { + /* actual bits depend on carry propogration */ + ps_cab_enc_env->u4_out_standing_bytes++; + return ; + } + else + { + /* carry = 1 => putbit(1); carry propogated due to L renorm */ + WORD32 carry = (lead_byte >> 8) & 0x1; + UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer; + UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset; + WORD32 zero_run = ps_stream->i4_zero_bytes_run; + UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes; + + + /*********************************************************************/ + /* Insert the carry propogated in previous byte */ + /* */ + /* Note : Do not worry about corruption into slice header align byte */ + /* This is because the first bin cannot result in overflow */ + /*********************************************************************/ + if (carry) + { + /* CORNER CASE: if the previous data is 0x000003, then EPB will be inserted + and the data will become 0x00000303 and if the carry is present, it will + be added with the last byte and it will become 0x00000304 which is not correct + as per standard */ + /* so check for previous four bytes and if it is equal to 0x00000303 + then subtract u4_strm_buf_offset by 1 */ + if (pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03 + && pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03 + && pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00 + && pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00) + { + u4_strm_buf_offset -= 1; + } + /* previous byte carry add will not result in overflow to */ + /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */ + pu1_strm_buf[u4_strm_buf_offset - 1] += carry; + zero_run = 0; + } + + /* Insert outstanding bytes (if any) */ + while (u4_out_standing_bytes) + { + UWORD8 u1_0_or_ff = carry ? 0 : 0xFF; + + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run); + + u4_out_standing_bytes--; + } + ps_cab_enc_env->u4_out_standing_bytes = 0; + + /* Insert the leading byte */ + lead_byte &= 0xFF; + PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, lead_byte, zero_run); + + /* update the state variables and return success */ + ps_stream->u4_strm_buf_offset = u4_strm_buf_offset; + ps_stream->i4_zero_bytes_run = zero_run; + + } +} + + + + + /** + ****************************************************************************** + * + * @brief Codes a bin based on probablilty and mps packed context model + * + * @par Description + * 1. Apart from encoding bin, context model is updated as per state transition + * 2. Range and Low renormalization is done based on bin and original state + * 3. After renorm bistream is updated (if required) + * + * @param[in] ps_cabac + * pointer to cabac context (handle) + * + * @param[in] bin + * bin(boolean) to be encoded + * + * @param[in] pu1_bin_ctxts + * index of cabac context model containing pState[bits 5-0] | MPS[bit6] + * + * @return + * + ****************************************************************************** + */ +void ih264e_cabac_encode_bin(cabac_ctxt_t *ps_cabac, WORD32 bin, + bin_ctxt_model *pu1_bin_ctxts) +{ + + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env); + UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range; + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + UWORD32 u4_rlps; + UWORD8 state_mps = (*pu1_bin_ctxts) & 0x3F; + UWORD8 u1_mps = !!((*pu1_bin_ctxts) & (0x40)); + WORD32 shift; + UWORD32 u4_table_val; + /* Sanity checks */ + ASSERT((bin == 0) || (bin == 1)); + ASSERT((u4_range >= 256) && (u4_range < 512)); + + /* Get the lps range from LUT based on quantized range and state */ + u4_table_val= gau4_ih264_cabac_table[state_mps][(u4_range >> 6) & 0x3]; + u4_rlps = u4_table_val & 0xFF; + u4_range -= u4_rlps; + + /* check if bin is mps or lps */ + if (u1_mps ^ bin) + { + /* lps path; L= L + R; R = RLPS */ + u4_low += u4_range; + u4_range = u4_rlps; + if (state_mps == 0) + { + /* MPS(CtxIdx) = 1 - MPS(CtxIdx) */ + u1_mps = 1 - u1_mps; + } /* update the context model from state transition LUT */ + + state_mps = (u4_table_val >> 15) & 0x3F; + } + else + { /* update the context model from state transition LUT */ + state_mps = (u4_table_val >> 8) & 0x3F; + } + + (*pu1_bin_ctxts) = (u1_mps << 6) | state_mps; + + /*****************************************************************/ + /* Renormalization; calculate bits generated based on range(R) */ + /* Note : 6 <= R < 512; R is 2 only for terminating encode */ + /*****************************************************************/ + GETRANGE(shift, u4_range); + shift = 9 - shift; + u4_low <<= shift; + u4_range <<= shift; + + /* bits to be inserted in the bitstream */ + ps_cab_enc_env->u4_bits_gen += shift; + ps_cab_enc_env->u4_code_int_range = u4_range; + ps_cab_enc_env->u4_code_int_low = u4_low; + + /* generate stream when a byte is ready */ + if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + ih264e_cabac_put_byte(ps_cabac); + } + +} + + + + + /** + ******************************************************************************* + * + * @brief + * Encoding process for a binary decision :implements encoding process of a decision + * as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol. Implements + * flowchart Figure 9-7( ITU_T_H264-201402) + * + * @param[in] u4_bins + * array of bin values + * + * @param[in] i1_bins_len + * Length of bins, maximum 32 + * + * @param[in] u4_ctx_inc + * CtxInc, byte0- bin0, byte1-bin1 .. + * + * @param[in] i1_valid_len + * valid length of bins, after that CtxInc is constant + * + * @param[in] pu1_bin_ctxt_type + * Pointer to binary contexts + + * @param[in] ps_cabac + * Pointer to cabac_context_structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264e_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, + UWORD32 u4_ctx_inc, WORD8 i1_valid_len, + bin_ctxt_model *pu1_bin_ctxt_type, + cabac_ctxt_t *ps_cabac) +{ + WORD8 i; + UWORD8 u1_ctx_inc, u1_bin; + + for (i = 0; i < i1_bins_len; i++) + { + u1_bin = (u4_bins & 0x01); + u4_bins = u4_bins >> 1; + u1_ctx_inc = u4_ctx_inc & 0x0f; + if (i < i1_valid_len) + u4_ctx_inc = u4_ctx_inc >> 4; + /* Encode the bin */ + ih264e_cabac_encode_bin(ps_cabac, u1_bin, + pu1_bin_ctxt_type + u1_ctx_inc); + } + +} + + + + + + +/** + ******************************************************************************* + * @brief + * Encoding process for a binary decision before termination:Encoding process + * of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11. + * + * @param[in] ps_cabac + * Pointer to cabac structure + * + * @param[in] term_bin + * Symbol value, end of slice or not, term_bin is binary + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264e_cabac_encode_terminate(cabac_ctxt_t *ps_cabac, WORD32 term_bin) +{ + + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env); + + UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range; + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + UWORD32 u4_rlps; + WORD32 shift; + + /* Sanity checks */ + ASSERT((u4_range >= 256) && (u4_range < 512)); + ASSERT((term_bin == 0) || (term_bin == 1)); + + /* term_bin = 1 has lps range = 2 */ + u4_rlps = 2; + u4_range -= u4_rlps; + + /* if terminate L is incremented by curR and R=2 */ + if (term_bin) + { + /* lps path; L= L + R; R = RLPS */ + u4_low += u4_range; + u4_range = u4_rlps; + } + + /*****************************************************************/ + /* Renormalization; calculate bits generated based on range(R) */ + /* Note : 6 <= R < 512; R is 2 only for terminating encode */ + /*****************************************************************/ + GETRANGE(shift, u4_range); + shift = 9 - shift; + u4_low <<= shift; + u4_range <<= shift; + + /* bits to be inserted in the bitstream */ + ps_cab_enc_env->u4_bits_gen += shift; + ps_cab_enc_env->u4_code_int_range = u4_range; + ps_cab_enc_env->u4_code_int_low = u4_low; + + /* generate stream when a byte is ready */ + if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + ih264e_cabac_put_byte(ps_cabac); + } + + if (term_bin) + { + ih264e_cabac_flush(ps_cabac); + } + +} + + +/** + ******************************************************************************* + * @brief + * Bypass encoding process for binary decisions: Explained (9.3.4.4 :ITU_T_H264-201402) + * , flowchart 9-10. + * + * @param[ino] ps_cabac : pointer to cabac context (handle) + * + * @param[in] bin : bypass bin(0/1) to be encoded + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +void ih264e_cabac_encode_bypass_bin(cabac_ctxt_t *ps_cabac, WORD32 bin) +{ + + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env); + + UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range; + UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low; + + /* Sanity checks */ + ASSERT((u4_range >= 256) && (u4_range < 512)); + ASSERT((bin == 0) || (bin == 1)); + + u4_low <<= 1; + /* add range if bin is 1 */ + if (bin) + { + u4_low += u4_range; + } + + /* 1 bit to be inserted in the bitstream */ + ps_cab_enc_env->u4_bits_gen++; + ps_cab_enc_env->u4_code_int_low = u4_low; + + /* generate stream when a byte is ready */ + if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + ih264e_cabac_put_byte(ps_cabac); + } + +} + + + /** + ****************************************************************************** + * + * @brief Encodes a series of bypass bins (FLC bypass bins) + * + * @par Description + * This function is more optimal than calling ih264e_cabac_encode_bypass_bin() + * in a loop as cabac low, renorm and generating the stream (8bins at a time) + * can be done in one operation + * + * @param[inout]ps_cabac + * pointer to cabac context (handle) + * + * @param[in] u4_bins + * syntax element to be coded (as FLC bins) + * + * @param[in] num_bins + * This is the FLC length for u4_sym + * + * @return + * + ****************************************************************************** + */ + +void ih264e_cabac_encode_bypass_bins(cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, + WORD32 num_bins) +{ + + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env); + + UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range; + WORD32 next_byte; + UWORD32 rev_next_byte; + + /* Sanity checks */ + ASSERT((num_bins < 33) && (num_bins > 0)); + ASSERT((u4_range >= 256) && (u4_range < 512)); + + /* Compute bit always to populate the trace */ + /* increment bits generated by num_bins */ + + /* Encode 8bins at a time and put in the bit-stream */ + while (num_bins > 8) + { + num_bins -= 8; + + /* extract the leading 8 bins */ + next_byte = (u4_bins) & 0xff; + u4_bins >>= 8; + REV_NBITS(next_byte, 8, rev_next_byte); + + /* L = (L << 8) + (R * next_byte) */ + ps_cab_enc_env->u4_code_int_low <<= 8; + ps_cab_enc_env->u4_code_int_low += (rev_next_byte * u4_range); + ps_cab_enc_env->u4_bits_gen += 8; + + if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + /* insert the leading byte of low into stream */ + ih264e_cabac_put_byte(ps_cabac); + } + } + + /* Update low with remaining bins and return */ + next_byte = (u4_bins & ((1 << num_bins) - 1)); + + REV_NBITS(next_byte, num_bins, rev_next_byte); + + ps_cab_enc_env->u4_code_int_low <<= num_bins; + ps_cab_enc_env->u4_code_int_low += (rev_next_byte * u4_range); + ps_cab_enc_env->u4_bits_gen += num_bins; + + if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + /* insert the leading byte of low into stream */ + ih264e_cabac_put_byte(ps_cabac); + } + +} + + + + + + + diff --git a/encoder/ih264e_cabac.h b/encoder/ih264e_cabac.h new file mode 100644 index 0000000..e781783 --- /dev/null +++ b/encoder/ih264e_cabac.h @@ -0,0 +1,452 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264e_cabac_structs.h + * + * @brief + * This file contains cabac related macros, enums, tables and function declarations. + * + * @author + * Doney Alex + * + * @remarks + * none + * + ******************************************************************************* + */ + +#ifndef IH264E_CABAC_H_ +#define IH264E_CABAC_H_ + + + +/******************************************************************************* +@brief Bit precision of cabac engine; +******************************************************************************* +*/ +#define CABAC_BITS 9 + + + + +/** +****************************************************************************** + * @macro Count number of bits set +****************************************************************************** +*/ +#define REV_NBITS(word, size, rev_word) \ +{ \ + WORD32 i; \ + rev_word = 0; \ + for (i = 0; i < (size); i++) \ + { \ + UWORD32 bit = ((word) >> i) & 1; \ + rev_word += (1 << ((size) - i - 1)) * bit; \ + } \ +} \ + +/** +****************************************************************************** + * @macro Reverse bits in an unsigned integer +****************************************************************************** +*/ +#define REV(u4_input, u4_output) \ +{ \ + UWORD32 u4_temp = (u4_input); \ + WORD8 i; \ + u4_output = 0; \ + for (i = 0; i < 32; i++) \ + { \ + u4_output = (u4_output << 1) + \ + ((u4_temp >> i) & 0x01); \ + } \ +} + +/** +****************************************************************************** +*! Bit manipulation macros +****************************************************************************** +*/ +#define SETBIT(a, i) ((a) |= (1 << (i))) +#define CLEARBIT(a, i) ((a) &= ~(1 << (i))) + + +/** +****************************************************************************** +*! Cabac module expect atlesat MIN_STREAM_SIZE_MB bytes left in stream buffer +*! for encoding an MB +****************************************************************************** +*/ +#define MIN_STREAM_SIZE_MB 1024 + + + +/*****************************************************************************/ +/* Function Declarations */ +/*****************************************************************************/ + + +/** + ******************************************************************************* + * + * @brief + * Initialize default context values and pointers. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264e_init_cabac_table(entropy_ctxt_t *ps_ent_ctxt); + + +/** + ******************************************************************************* + * + * @brief + * Initialize cabac context: Intitalize all contest with init values given in the spec. + * Called at the beginning of entropy coding of each slice for CABAC encoding. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264e_init_cabac_ctxt(entropy_ctxt_t *ps_ent_ctxt); + + + +/** + ******************************************************************************* + * + * @brief + * k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated + * unary/ k-th order Exp-Golomb (UEGk) binarization process, + * where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402 + * + * @param[in] i2_sufs + * Suffix bit string + * + * @param[in] pi1_bins_len + * Pointer to length of the string + * + * @returns Binarized value + * + * @remarks + * None + * + ******************************************************************************* + */ +UWORD32 ih264e_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len); + + +/** + ******************************************************************************* + * + * @brief + * Get cabac context for the MB :calculates the pointers to Top and left + * cabac neighbor context depending upon neighbor availability. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @param[in] u4_mb_type + * Type of MB + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264e_get_cabac_context(entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type); + + +/** + ******************************************************************************* + * @brief + * flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402). + * + * @param[in] ps_cabac_ctxt + * pointer to cabac context (handle) + * + * @returns success or failure error code + * + * @remarks + * None + * + ******************************************************************************* + */ +WORD32 ih264e_cabac_flush(cabac_ctxt_t *ps_cabac_ctxt); + + +/** + ****************************************************************************** + * + * @brief Puts new byte (and outstanding bytes) into bitstream after cabac + * renormalization + * + * @par Description + * 1. Extract the leading byte of low(L) + * 2. If leading byte=0xff increment outstanding bytes and return + * (as the actual bits depend on carry propogation later) + * 3. If leading byte is not 0xff check for any carry propogation + * 4. Insert the carry (propogated in previous byte) along with outstanding + * bytes (if any) and leading byte + * + * + * @param[inout] ps_cabac_ctxt + * pointer to cabac context (handle) + * + * @return + * + ****************************************************************************** + */ +void ih264e_cabac_put_byte(cabac_ctxt_t *ps_cabac_ctxt); + + +/** + ****************************************************************************** + * + * @brief Codes a bin based on probablilty and mps packed context model + * + * @par Description + * 1. Apart from encoding bin, context model is updated as per state transition + * 2. Range and Low renormalization is done based on bin and original state + * 3. After renorm bistream is updated (if required) + * + * @param[inout] ps_cabac + * pointer to cabac context (handle) + * + * @param[in] bin + * bin(boolean) to be encoded + * + * @param[in] pu1_bin_ctxts + * index of cabac context model containing pState[bits 5-0] | MPS[bit6] + * + * @return + * + ****************************************************************************** + */ +void ih264e_cabac_encode_bin(cabac_ctxt_t *ps_cabac, WORD32 bin, + bin_ctxt_model *pu1_bin_ctxts); + + + +/** + ******************************************************************************* + * + * @brief + * Encoding process for a binary decision :implements encoding process of a decision + * as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol. Implements + * flowchart Figure 9-7( ITU_T_H264-201402) + * + * @param[in] u4_bins + * array of bin values + * + * @param[in] i1_bins_len + * Length of bins, maximum 32 + * + * @param[in] u4_ctx_inc + * CtxInc, byte0- bin0, byte1-bin1 .. + * + * @param[in] i1_valid_len + * valid length of bins, after that CtxInc is constant + * + * @param[in] pu1_bin_ctxt_type + * Pointer to binary contexts + + * @param[in] ps_cabac + * Pointer to cabac_context_structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264e_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len, + UWORD32 u4_ctx_inc, WORD8 i1_valid_len, + bin_ctxt_model *pu1_bin_ctxt_type, + cabac_ctxt_t *ps_cabac); + +/** + ******************************************************************************* + * @brief + * Encoding process for a binary decision before termination:Encoding process + * of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11. + * + * @param[in] ps_cabac + * Pointer to cabac structure + * + * @param[in] term_bin + * Symbol value, end of slice or not, term_bin is binary + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264e_cabac_encode_terminate(cabac_ctxt_t *ps_cabac, WORD32 term_bin); + + +/** + ******************************************************************************* + * @brief + * Bypass encoding process for binary decisions: Explained (9.3.4.4 :ITU_T_H264-201402) + * , flowchart 9-10. + * + * @param[in] ps_cabac : pointer to cabac context (handle) + * + * @param[in] bin : bypass bin(0/1) to be encoded + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +void ih264e_cabac_encode_bypass_bin(cabac_ctxt_t *ps_cabac, WORD32 bin); + + + +/** + ****************************************************************************** + * + * @brief Encodes a series of bypass bins (FLC bypass bins) + * + * @par Description + * This function is more optimal than calling ih264e_cabac_encode_bypass_bin() + * in a loop as cabac low, renorm and generating the stream (8bins at a time) + * can be done in one operation + * + * @param[inout]ps_cabac + * pointer to cabac context (handle) + * + * @param[in] u4_bins + * syntax element to be coded (as FLC bins) + * + * @param[in] num_bins + * This is the FLC length for u4_sym + * + * @return + * + ****************************************************************************** + */ + +void ih264e_cabac_encode_bypass_bins(cabac_ctxt_t *ps_cabac, UWORD32 u4_bins, + WORD32 num_bins); + + + + + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for an Intra Slice. + * + * @description + * The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes + * (if present), mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification. + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T ih264e_write_islice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt); + + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for Inter slices + * + * @description + * The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes + * (if present), mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt); + + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for B slices + * + * @description + * The mb syntax layer for inter slices constitutes luma mb mode, + * mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt); + + +#endif /* IH264E_CABAC_H_ */ diff --git a/encoder/ih264e_cabac_encode.c b/encoder/ih264e_cabac_encode.c new file mode 100644 index 0000000..ebcd418 --- /dev/null +++ b/encoder/ih264e_cabac_encode.c @@ -0,0 +1,2391 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_cabac.c +* +* @brief +* Contains all functions to encode in CABAC entropy mode +* +* +* @author +* Doney Alex +* +* @par List of Functions: +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include + +/* User include files */ +#include "ih264e_config.h" +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_debug.h" +#include "ih264_defs.h" +#include "ih264e_defs.h" +#include "ih264_macros.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_platform_macros.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" +#include "ih264e_structs.h" +#include "ih264e_cabac.h" +#include "ih264e_encode_header.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_cavlc.h" +#include "ih264e_statistics.h" +#include "ih264e_trace.h" + +/*****************************************************************************/ +/* Function Definitions */ +/*****************************************************************************/ + + + + +/** + ******************************************************************************* + * + * @brief + * Encodes mb_skip_flag using CABAC entropy coding mode. + * + * @param[in] u1_mb_skip_flag + * mb_skip_flag + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @param[in] u4_ctxidx_offset + * ctxIdxOffset for mb_skip_flag context + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_enc_mb_skip(UWORD8 u1_mb_skip_flag, + cabac_ctxt_t *ps_cabac_ctxt, + UWORD32 u4_ctxidx_offset) +{ + + UWORD8 u4_ctx_inc; + WORD8 a, b; + a = ((ps_cabac_ctxt->ps_left_ctxt_mb_info->u1_mb_type & CAB_SKIP_MASK) ? + 0 : 1); + b = ((ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_mb_type & CAB_SKIP_MASK) ? + 0 : 1); + + u4_ctx_inc = a + b; + /* Encode the bin */ + ih264e_cabac_encode_bin(ps_cabac_ctxt, + (UWORD32) u1_mb_skip_flag, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset + + u4_ctx_inc); + +} + + +/* ! < Table 9-36 – Binarization for macroblock types in I slices in ITU_T_H264-201402 + * Bits 0-7 : binarised value + * Bits 8-15: length of binary sequence + */ +static const UWORD32 u4_mb_type_intra[26] = + { 0x0100, 0x0620, 0x0621, 0x0622, 0x0623, 0x0748, 0x0749, 0x074a, 0x074b, + 0x074c, 0x074d, 0x074e, 0x074f, 0x0628, 0x0629, 0x062a, 0x062b, 0x0758, + 0x0759, 0x075a, 0x075b, 0x075c, 0x075d, 0x075e, 0x075f, 0x0203 }; + + +/* CtxInc for mb types */ +static const UWORD32 u4_mb_ctxinc[2][26] = +{ + /* Intra CtxInc's */ + { 0x00, + 0x03467, 0x03467, 0x03467, 0x03467, 0x034567, 0x034567, 0x034567, + 0x034567, 0x034567, 0x034567, 0x034567, 0x034567, 0x03467, 0x03467, + 0x03467, 0x03467, 0x034567, 0x034567, 0x034567, 0x034567, 0x034567, + 0x034567, 0x034567, 0x034567, 0x00}, + /* Inter CtxInc's */ + { 0x00, + 0x001233, 0x001233, 0x001233, 0x001233, 0x0012233, 0x0012233, 0x0012233, + 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x001233, 0x001233, + 0x001233, 0x001233, 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x0012233, + 0x0012233, 0x0012233, 0x0012233, 0x00} +}; + + +/** + ******************************************************************************* + * + * @brief + * Encodes mb_type for an intra MB. + * + * @param[in] u4_slice_type + * slice type + * + * @param[in] u4_intra_mb_type + * MB type (Table 7-11) + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + ** @param[in] u4_ctxidx_offset + * ctxIdxOffset for mb_type context + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ + +static void ih264e_cabac_enc_intra_mb_type(UWORD32 u4_slice_type, + UWORD32 u4_intra_mb_type, + cabac_ctxt_t *ps_cabac_ctxt, + UWORD32 u4_ctx_idx_offset) +{ + + encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env); + bin_ctxt_model *pu1_mb_bin_ctxt, *pu1_bin_ctxt; + UWORD8 u1_bin; + mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + UWORD32 u4_bins; + UWORD32 u4_ctx_inc; + WORD8 i1_bins_len; + UWORD32 u4_code_int_range; + UWORD32 u4_code_int_low; + UWORD16 u2_quant_code_int_range; + UWORD16 u4_code_int_range_lps; + WORD8 i; + UWORD8 u1_ctx_inc; + UWORD32 u4_table_val; + + pu1_mb_bin_ctxt = ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_idx_offset; + + u4_bins = u4_mb_type_intra[u4_intra_mb_type]; + i1_bins_len = (WORD8) ((u4_bins >> 8) & 0x0f); + u4_ctx_inc = u4_mb_ctxinc[(u4_slice_type != ISLICE)][u4_intra_mb_type]; + u1_ctx_inc = 0; + if (u4_slice_type == ISLICE) + { + if (ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u1_ctx_inc += ((ps_left_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0); + if (ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u1_ctx_inc += ((ps_top_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0); + + u4_ctx_inc = (u4_ctx_inc | (u1_ctx_inc << ((i1_bins_len - 1) << 2))); + } + else + { + pu1_mb_bin_ctxt += 3; + if (u4_slice_type == BSLICE) + pu1_mb_bin_ctxt += 2; + + } + + u4_code_int_range = ps_cab_enc_env->u4_code_int_range; + u4_code_int_low = ps_cab_enc_env->u4_code_int_low; + + for (i = (i1_bins_len - 1); i >= 0; i--) + { + WORD32 shift; + + u1_ctx_inc = ((u4_ctx_inc >> (i << 2)) & 0x0f); + u1_bin = ((u4_bins >> i) & 0x01); + /* Encode the bin */ + pu1_bin_ctxt = pu1_mb_bin_ctxt + u1_ctx_inc; + if (i != (i1_bins_len - 2)) + { + WORD8 i1_mps = !!((*pu1_bin_ctxt) & (0x40)); + WORD8 i1_state = (*pu1_bin_ctxt) & 0x3F; + + u2_quant_code_int_range = ((u4_code_int_range >> 6) & 0x03); + u4_table_val = + gau4_ih264_cabac_table[i1_state][u2_quant_code_int_range]; + u4_code_int_range_lps = u4_table_val & 0xFF; + + u4_code_int_range -= u4_code_int_range_lps; + if (u1_bin != i1_mps) + { + u4_code_int_low += u4_code_int_range; + u4_code_int_range = u4_code_int_range_lps; + if (i1_state == 0) + { + /* MPS(CtxIdx) = 1 - MPS(CtxIdx) */ + i1_mps = 1 - i1_mps; + } + + i1_state = (u4_table_val >> 15) & 0x3F; + } + else + { + i1_state = (u4_table_val >> 8) & 0x3F; + + } + + (*pu1_bin_ctxt) = (i1_mps << 6) | i1_state; + } + else + { + u4_code_int_range -= 2; + } + + /* Renormalize */ + /*****************************************************************/ + /* Renormalization; calculate bits generated based on range(R) */ + /* Note : 6 <= R < 512; R is 2 only for terminating encode */ + /*****************************************************************/ + GETRANGE(shift, u4_code_int_range); + shift = 9 - shift; + u4_code_int_low <<= shift; + u4_code_int_range <<= shift; + + /* bits to be inserted in the bitstream */ + ps_cab_enc_env->u4_bits_gen += shift; + ps_cab_enc_env->u4_code_int_range = u4_code_int_range; + ps_cab_enc_env->u4_code_int_low = u4_code_int_low; + + /* generate stream when a byte is ready */ + if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS) + { + ih264e_cabac_put_byte(ps_cabac_ctxt); + u4_code_int_range = ps_cab_enc_env->u4_code_int_range; + u4_code_int_low = ps_cab_enc_env->u4_code_int_low; + + } + } +} + + + +/** + ******************************************************************************* + * + * @brief + * Encodes prev_intra4x4_pred_mode_flag and + * rem_intra4x4_pred_mode using CABAC entropy coding mode + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @param[in] pu1_intra_4x4_modes + * Pointer to array containing prev_intra4x4_pred_mode_flag and + * rem_intra4x4_pred_mode + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_enc_4x4mb_modes(cabac_ctxt_t *ps_cabac_ctxt, + UWORD8 *pu1_intra_4x4_modes) +{ + WORD32 i; + WORD8 byte; + for (i = 0; i < 16; i += 2) + { + /* sub blk idx 1 */ + byte = *pu1_intra_4x4_modes++; + if (byte & 0x1) + { + ih264e_cabac_encode_bin(ps_cabac_ctxt, + 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + + PREV_INTRA4X4_PRED_MODE_FLAG); + } + else + { + /* Binarization is FL and Cmax=7 */ + ih264e_encode_decision_bins(byte & 0xF, + 4, + 0x05554, + 4, + ps_cabac_ctxt->au1_cabac_ctxt_table + + REM_INTRA4X4_PRED_MODE - 5, + ps_cabac_ctxt); + } + /* sub blk idx 2 */ + byte >>= 4; + if (byte & 0x1) + { + ih264e_cabac_encode_bin(ps_cabac_ctxt, + 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + + PREV_INTRA4X4_PRED_MODE_FLAG); + } + else + { + ih264e_encode_decision_bins(byte & 0xF, + 4, + 0x05554, + 4, + ps_cabac_ctxt->au1_cabac_ctxt_table + + REM_INTRA4X4_PRED_MODE - 5, + ps_cabac_ctxt); + } + } +} + + + +/** + ******************************************************************************* + * + * @brief + * Encodes chroma intrapred mode for the MB. + * + * @param[in] u1_chroma_pred_mode + * Chroma intr prediction mode + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_enc_chroma_predmode(UWORD8 u1_chroma_pred_mode, + cabac_ctxt_t *ps_cabac_ctxt) +{ + + WORD8 i1_temp; + mb_info_ctxt_t *ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + UWORD32 u4_bins = 0; + WORD8 i1_bins_len = 1; + UWORD32 u4_ctx_inc = 0; + UWORD8 a, b; + a = ((ps_left_ctxt->u1_intrapred_chroma_mode != 0) ? 1 : 0); + b = ((ps_top_ctxt->u1_intrapred_chroma_mode != 0) ? 1 : 0); + + /* Binarization is TU and Cmax=3 */ + ps_curr_ctxt->u1_intrapred_chroma_mode = u1_chroma_pred_mode; + + u4_ctx_inc = a + b; + u4_ctx_inc = (u4_ctx_inc | 0x330); + if (u1_chroma_pred_mode) + { + u4_bins = 1; + i1_temp = u1_chroma_pred_mode; + i1_temp--; + /* Put a stream of 1's of length Chromaps_pred_mode_ctxt value */ + while (i1_temp) + { + u4_bins = (u4_bins | (1 << i1_bins_len)); + i1_bins_len++; + i1_temp--; + } + /* If Chromaps_pred_mode_ctxt < Cmax i.e 3. Terminate put a zero */ + if (u1_chroma_pred_mode < 3) + { + i1_bins_len++; + } + } + + ih264e_encode_decision_bins(u4_bins, + i1_bins_len, + u4_ctx_inc, + 3, + ps_cabac_ctxt->au1_cabac_ctxt_table + + INTRA_CHROMA_PRED_MODE, + ps_cabac_ctxt); + +} + + +/** + ******************************************************************************* + * + * @brief + * Encodes CBP for the MB. + * + * @param[in] u1_cbp + * CBP for the MB + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_enc_cbp(UWORD32 u4_cbp, cabac_ctxt_t *ps_cabac_ctxt) +{ + mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + WORD8 i2_cbp_chroma, i, j; + UWORD8 u1_ctxt_inc, u1_bin; + UWORD8 a, b; + UWORD32 u4_ctx_inc; + UWORD32 u4_bins; + WORD8 i1_bins_len; + + /* CBP Luma, FL, Cmax = 15, L = 4 */ + u4_ctx_inc = 0; + u4_bins = 0; + i1_bins_len = 5; + for (i = 0; i < 4; i++) + { + /* calulate ctxtInc, depending on neighbour availability */ + /* u1_ctxt_inc = CondTerm(A) + 2 * CondTerm(B); + A: Left block and B: Top block */ + + /* Check for Top availability */ + if (i >> 1) + { + j = i - 2; + /* Top is available always and it's current MB */ + b = (((u4_cbp >> j) & 0x01) != 0 ? 0 : 1); + } + else + { + /* for blocks whose top reference is in another MB */ + { + j = i + 2; + b = ((ps_top_ctxt->u1_cbp >> j) & 0x01) ? 0 : 1; + } + } + + /* Check for Left availability */ + if (i & 0x01) + { + /* Left is available always and it's current MB */ + j = i - 1; + a = (((u4_cbp >> j) & 0x01) != 0 ? 0 : 1); + } + else + { + { + j = i + 1; + a = ((ps_left_ctxt->u1_cbp >> j) & 0x01) ? 0 : 1; + } + } + u1_ctxt_inc = a + 2 * b; + u1_bin = ((u4_cbp >> i) & 0x01); + u4_ctx_inc = (u4_ctx_inc | (u1_ctxt_inc << (i << 2))); + u4_bins = (u4_bins | (u1_bin << i)); + } + + /* CBP Chroma, TU, Cmax = 2 */ + i2_cbp_chroma = u4_cbp >> 4; + /* calulate ctxtInc, depending on neighbour availability */ + a = (ps_left_ctxt->u1_cbp > 15) ? 1 : 0; + b = (ps_top_ctxt->u1_cbp > 15) ? 1 : 0; + + u1_ctxt_inc = a + 2 * b; + if (i2_cbp_chroma) + { + u4_ctx_inc = u4_ctx_inc | ((4 + u1_ctxt_inc) << 16); + u4_bins = (u4_bins | 0x10); + /* calulate ctxtInc, depending on neighbour availability */ + a = (ps_left_ctxt->u1_cbp > 31) ? 1 : 0; + b = (ps_top_ctxt->u1_cbp > 31) ? 1 : 0; + u1_ctxt_inc = a + 2 * b; + u4_ctx_inc = u4_ctx_inc | ((8 + u1_ctxt_inc) << 20); + u4_bins = (u4_bins | (((i2_cbp_chroma >> 1) & 0x01) << i1_bins_len)); + i1_bins_len++; + } + else + { + u4_ctx_inc = (u4_ctx_inc | ((4 + u1_ctxt_inc) << 16)); + } + ih264e_encode_decision_bins(u4_bins, i1_bins_len, u4_ctx_inc, 8, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBP_LUMA, + ps_cabac_ctxt); +} + + +/** + ******************************************************************************* + * + * @brief + * Encodes mb_qp_delta for the MB. + * + * @param[in] i1_mb_qp_delta + * mb_qp_delta + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_enc_mb_qp_delta(WORD8 i1_mb_qp_delta, + cabac_ctxt_t *ps_cabac_ctxt) +{ + UWORD8 u1_code_num; + UWORD8 u1_ctxt_inc; + + UWORD32 u4_ctx_inc; + UWORD32 u4_bins; + WORD8 i1_bins_len; + UWORD8 u1_ctx_inc, u1_bin; + /* Range of ps_mb_qp_delta_ctxt= -26 to +25 inclusive */ + ASSERT((i1_mb_qp_delta < 26) && (i1_mb_qp_delta > -27)); + /* if ps_mb_qp_delta_ctxt=0, then codeNum=0 */ + u1_code_num = 0; + if (i1_mb_qp_delta > 0) + u1_code_num = (i1_mb_qp_delta << 1) - 1; + else if (i1_mb_qp_delta < 0) + u1_code_num = (ABS(i1_mb_qp_delta)) << 1; + + u4_ctx_inc = 0; + u4_bins = 0; + i1_bins_len = 1; + /* calculate ctxtInc, depending on neighbour availability */ + u1_ctxt_inc = (!(!(ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt))); + ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = i1_mb_qp_delta; + + if (u1_code_num == 0) + { + /* b0 */ + u1_bin = (UWORD8) (u4_bins); + u1_ctx_inc = u1_ctxt_inc & 0x0f; + /* Encode the bin */ + ih264e_cabac_encode_bin(ps_cabac_ctxt, + u1_bin, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_QP_DELTA + + u1_ctx_inc); + + } + else + { + /* b0 */ + u4_ctx_inc = u1_ctxt_inc; + u4_bins = 1; + u1_code_num--; + if (u1_code_num == 0) + { + /* b1 */ + u4_ctx_inc = (u4_ctx_inc | 0x20); + i1_bins_len++; + ih264e_encode_decision_bins(u4_bins, i1_bins_len, u4_ctx_inc, 3, + ps_cabac_ctxt->au1_cabac_ctxt_table + MB_QP_DELTA, + ps_cabac_ctxt); + } + else + { + /* b1 */ + u4_ctx_inc = (u4_ctx_inc | 0x20); + u4_bins = (u4_bins | (1 << i1_bins_len)); + i1_bins_len++; + u1_code_num--; + /* BinIdx from b2 onwards */ + if (u1_code_num < 30) + { /* maximum i1_bins_len = 31 */ + while (u1_code_num) + { + u4_bins = (u4_bins | (1 << i1_bins_len)); + i1_bins_len++; + u1_code_num--; + }; + u4_ctx_inc = (u4_ctx_inc | 0x300); + i1_bins_len++; + ih264e_encode_decision_bins(u4_bins, + i1_bins_len, + u4_ctx_inc, + 2, + ps_cabac_ctxt->au1_cabac_ctxt_table + + MB_QP_DELTA, + ps_cabac_ctxt); + } + else + { + /* maximum i1_bins_len = 53 */ + u4_bins = 0xffffffff; + i1_bins_len = 32; + u4_ctx_inc = (u4_ctx_inc | 0x300); + u1_code_num -= 30; + ih264e_encode_decision_bins(u4_bins, + i1_bins_len, + u4_ctx_inc, + 2, + ps_cabac_ctxt->au1_cabac_ctxt_table + + MB_QP_DELTA, + ps_cabac_ctxt); + u4_bins = 0; + i1_bins_len = 0; + u4_ctx_inc = 0x033; + while (u1_code_num) + { + u4_bins = (u4_bins | (1 << i1_bins_len)); + i1_bins_len++; + u1_code_num--; + }; + + u4_ctx_inc = (u4_ctx_inc | 0x300); + i1_bins_len++; + ih264e_encode_decision_bins(u4_bins, + i1_bins_len, + u4_ctx_inc, + 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + + MB_QP_DELTA, + ps_cabac_ctxt); + } + } + } +} + + + + +/** + ******************************************************************************* + * @brief + * Encodes 4residual_block_cabac as defined in 7.3.5.3.3. + * + * @param[in] pi2_res_block + * pointer to the array of residues + * + * @param[in] u1_nnz + * Number of non zero coeffs in the block + * + * @param[in] u1_max_num_coeffs + * Max number of coeffs that can be there in the block + * + * @param[in] u2_sig_coeff_map + * Significant coeff map + * + * @param[in] u4_ctx_cat_offset + * ctxIdxOffset for absolute value contexts + * + * @param[in] pu1_ctxt_sig_coeff + * Pointer to residual state variables + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_write_coeff4x4(WORD16 *pi2_res_block, UWORD8 u1_nnz, + UWORD8 u1_max_num_coeffs, + UWORD16 u2_sig_coeff_map, + UWORD32 u4_ctx_cat_offset, + bin_ctxt_model *pu1_ctxt_sig_coeff, + cabac_ctxt_t *ps_cabac_ctxt) +{ + + WORD8 i; + WORD16 *pi16_coeffs; + UWORD32 u4_sig_coeff, u4_bins; + UWORD32 u4_ctx_inc; + UWORD8 u1_last_sig_coef_index = (31 - CLZ(u2_sig_coeff_map)); + + /* Always put Coded Block Flag as 1 */ + + pi16_coeffs = pi2_res_block; + { + bin_ctxt_model *pu1_bin_ctxt; + UWORD8 u1_bin, uc_last; + + i = 0; + pu1_bin_ctxt = pu1_ctxt_sig_coeff; + u4_sig_coeff = 0; + u1_bin = 1; + if ((u1_last_sig_coef_index)) + { + u1_bin = !!(u2_sig_coeff_map & 01); + } + uc_last = 1; + + do + { + /* Encode Decision */ + ih264e_cabac_encode_bin(ps_cabac_ctxt, u1_bin, pu1_bin_ctxt); + + if (u1_bin & uc_last) + { + u4_sig_coeff = (u4_sig_coeff | (1 << i)); + pu1_bin_ctxt = pu1_ctxt_sig_coeff + i + + LAST_SIGNIFICANT_COEFF_FLAG_FRAME + - SIGNIFICANT_COEFF_FLAG_FRAME; + u1_bin = (i == u1_last_sig_coef_index); + uc_last = 0; + } + else + { + i = i + 1; + pu1_bin_ctxt = pu1_ctxt_sig_coeff + i; + u1_bin = (i == u1_last_sig_coef_index); + uc_last = 1; + if ((i != u1_last_sig_coef_index)) + { + u1_bin = !!((u2_sig_coeff_map >> i) & 01); + } + } + }while (!((i > u1_last_sig_coef_index) + || (i > (u1_max_num_coeffs - 1)))); + } + + /* Encode coeff_abs_level_minus1 and coeff_sign_flag */ + { + UWORD8 u1_sign; + UWORD16 u2_abs_level; + UWORD8 u1_abs_level_equal1 = 1, u1_abs_level_gt1 = 0; + UWORD8 u1_ctx_inc; + UWORD8 u1_coff; + WORD16 i2_sufs; + WORD8 i1_bins_len; + i = u1_last_sig_coef_index; + pi16_coeffs = pi2_res_block + u1_nnz - 1; + do + { + { + u4_sig_coeff = u4_sig_coeff & ((1 << i) - 1); + u4_bins = 0; + u4_ctx_inc = 0; + i1_bins_len = 1; + /* Encode the AbsLevelMinus1 */ + u2_abs_level = ABS(*(pi16_coeffs)) - 1; + /* CtxInc for bin0 */ + u4_ctx_inc = MIN(u1_abs_level_equal1, 4); + /* CtxInc for remaining */ + u1_ctx_inc = 5 + MIN(u1_abs_level_gt1, 4); + u4_ctx_inc = u4_ctx_inc + (u1_ctx_inc << 4); + if (u2_abs_level) + { + u1_abs_level_gt1++; + u1_abs_level_equal1 = 0; + } + if (!u1_abs_level_gt1) + u1_abs_level_equal1++; + + u1_coff = 14; + if (u2_abs_level >= u1_coff) + { + /* Prefix TU i.e string of 14 1's */ + u4_bins = 0x3fff; + i1_bins_len = 14; + ih264e_encode_decision_bins(u4_bins, i1_bins_len, + u4_ctx_inc, 1, ps_cabac_ctxt->au1_cabac_ctxt_table + + u4_ctx_cat_offset, + ps_cabac_ctxt); + + /* Suffix, uses EncodeBypass */ + i2_sufs = u2_abs_level - u1_coff; + + u4_bins = ih264e_cabac_UEGk0_binarization(i2_sufs, + &i1_bins_len); + + ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, u4_bins, + i1_bins_len); + + } + else + { + /* Prefix only */ + u4_bins = (1 << u2_abs_level) - 1; + i1_bins_len = u2_abs_level + 1; + /* Encode Terminating bit */ + ih264e_encode_decision_bins(u4_bins, i1_bins_len, + u4_ctx_inc, 1, ps_cabac_ctxt->au1_cabac_ctxt_table + + u4_ctx_cat_offset, + ps_cabac_ctxt); + } + } + /* encode coeff_sign_flag[i] */ + u1_sign = ((*pi16_coeffs) < 0) ? 1 : 0; + ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, u1_sign, 1); + i = CLZ(u4_sig_coeff); + i = 31 - i; + pi16_coeffs--; + }while (u4_sig_coeff); + } + +} + + +/** + ******************************************************************************* + * @brief + * Write DC coeffs for intra predicted luma block + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_encode_residue_luma_dc(entropy_ctxt_t *ps_ent_ctxt) +{ + + /* CABAC context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + tu_sblk_coeff_data_t *ps_mb_coeff_data; + + /* packed residue */ + void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data; + UWORD16 u2_sig_coeff_map; + WORD16 *pi2_res_block; + UWORD8 u1_nnz; + UWORD8 u1_cbf; + mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + mb_info_ctxt_t *p_CurCtxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u1_nnz, + u2_sig_coeff_map, pi2_res_block); + + u1_cbf = !!(u1_nnz); + + { + UWORD32 u4_ctx_inc; + UWORD8 u1_a, u1_b; + + u1_a = ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] & 0x1; + u1_b = ps_top_ctxt->u1_yuv_dc_csbp & 0x1; + u4_ctx_inc = u1_a + (u1_b << 1); + + ih264e_cabac_encode_bin(ps_cabac_ctxt, + u1_cbf, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBF + + (LUMA_DC_CTXCAT << 2) + u4_ctx_inc); + } + + /* Write coded_block_flag */ + if (u1_cbf) + { + ih264e_cabac_write_coeff4x4(pi2_res_block, + u1_nnz, + 15, + u2_sig_coeff_map, + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_0_OFFSET, + ps_cabac_ctxt->au1_cabac_ctxt_table + + SIGNIFICANT_COEFF_FLAG_FRAME + + SIG_COEFF_CTXT_CAT_0_OFFSET, + ps_cabac_ctxt); + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] |= 0x1; + p_CurCtxt->u1_yuv_dc_csbp |= 0x1; + } + else + { + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + p_CurCtxt->u1_yuv_dc_csbp &= 0x6; + } + + ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data; +} + + + + +/** + ******************************************************************************* + * @brief + * Write chroma residues to the bitstream + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @param[in] u1_chroma_cbp + * coded block pattern, chroma + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_write_chroma_residue(entropy_ctxt_t *ps_ent_ctxt, + UWORD8 u1_chroma_cbp) +{ + /* CABAC context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + tu_sblk_coeff_data_t *ps_mb_coeff_data; + /* packed residue */ + void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data; + UWORD16 u2_sig_coeff_map; + UWORD8 u1_nnz; + mb_info_ctxt_t *ps_top_ctxt_mb_info, *ps_curr_ctxt; + + ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_top_ctxt_mb_info; + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + /********************/ + /* Write Chroma DC */ + /********************/ + { + WORD16 *pi2_res_block; + UWORD8 u1_left_dc_csbp, u1_top_dc_csbp, u1_uv, u1_cbf; + + u1_left_dc_csbp = (ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0]) >> 1; + u1_top_dc_csbp = (ps_top_ctxt_mb_info->u1_yuv_dc_csbp) >> 1; + + for (u1_uv = 0; u1_uv < 2; u1_uv++) + { + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, + u1_nnz, u2_sig_coeff_map, pi2_res_block); + u1_cbf = !!(u1_nnz); + { + UWORD8 u1_a, u1_b; + UWORD32 u4_ctx_inc; + u1_a = (u1_left_dc_csbp >> u1_uv) & 0x01; + u1_b = (u1_top_dc_csbp >> u1_uv) & 0x01; + u4_ctx_inc = (u1_a + (u1_b << 1)); + + ih264e_cabac_encode_bin(ps_cabac_ctxt, + u1_cbf, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBF + + (CHROMA_DC_CTXCAT << 2) + + u4_ctx_inc); + } + + if (u1_cbf) + { + ih264e_cabac_write_coeff4x4(pi2_res_block, + u1_nnz, + 3, + u2_sig_coeff_map, + COEFF_ABS_LEVEL_MINUS1 + + COEFF_ABS_LEVEL_CAT_3_OFFSET, + ps_cabac_ctxt->au1_cabac_ctxt_table + + SIGNIFICANT_COEFF_FLAG_FRAME + + SIG_COEFF_CTXT_CAT_3_OFFSET, + ps_cabac_ctxt); + + SETBIT(u1_top_dc_csbp, u1_uv); + SETBIT(u1_left_dc_csbp, u1_uv); + } + else + { + CLEARBIT(u1_top_dc_csbp, u1_uv); + CLEARBIT(u1_left_dc_csbp, u1_uv); + } + } + /*************************************************************/ + /* Update the DC csbp */ + /*************************************************************/ + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x1; + ps_curr_ctxt->u1_yuv_dc_csbp &= 0x1; + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] |= (u1_left_dc_csbp << 1); + ps_curr_ctxt->u1_yuv_dc_csbp |= (u1_top_dc_csbp << 1); + } + /*******************/ + /* Write Chroma AC */ + /*******************/ + { + if (u1_chroma_cbp == 2) + { + UWORD8 u1_uv_blkno, u1_left_ac_csbp, u1_top_ac_csbp; + WORD16 *pi2_res_block; + u1_left_ac_csbp = ps_cabac_ctxt->pu1_left_uv_ac_csbp[0]; + u1_top_ac_csbp = ps_top_ctxt_mb_info->u1_yuv_ac_csbp >> 4; + + for (u1_uv_blkno = 0; u1_uv_blkno < 8; u1_uv_blkno++) + { + UWORD8 u1_cbf; + UWORD8 u1_b2b0, u1_b2b1; + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, + u1_nnz, u2_sig_coeff_map, + pi2_res_block); + + u1_cbf = !!(u1_nnz); + u1_b2b0 = ((u1_uv_blkno & 0x4) >> 1) | (u1_uv_blkno & 0x1); + u1_b2b1 = ((u1_uv_blkno & 0x4) >> 1) + | ((u1_uv_blkno & 0x2) >> 1); + + { + UWORD8 u1_a, u1_b; + UWORD32 u4_ctx_inc; + /* write coded_block_flag */ + u1_a = (u1_left_ac_csbp >> u1_b2b1) & 0x1; + u1_b = (u1_top_ac_csbp >> u1_b2b0) & 0x1; + u4_ctx_inc = u1_a + (u1_b << 1); + + ih264e_cabac_encode_bin(ps_cabac_ctxt, + u1_cbf, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBF + + (CHROMA_AC_CTXCAT << 2) + + u4_ctx_inc); + + } + if (u1_cbf) + { + ih264e_cabac_write_coeff4x4(pi2_res_block, + u1_nnz, + 14, + u2_sig_coeff_map, + COEFF_ABS_LEVEL_MINUS1 + + COEFF_ABS_LEVEL_CAT_4_OFFSET, + ps_cabac_ctxt->au1_cabac_ctxt_table + + +SIGNIFICANT_COEFF_FLAG_FRAME + + SIG_COEFF_CTXT_CAT_4_OFFSET, + ps_cabac_ctxt); + + SETBIT(u1_left_ac_csbp, u1_b2b1); + SETBIT(u1_top_ac_csbp, u1_b2b0); + } + else + { + CLEARBIT(u1_left_ac_csbp, u1_b2b1); + CLEARBIT(u1_top_ac_csbp, u1_b2b0); + + } + } + /*************************************************************/ + /* Update the AC csbp */ + /*************************************************************/ + ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = u1_left_ac_csbp; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0x0f; + ps_curr_ctxt->u1_yuv_ac_csbp |= (u1_top_ac_csbp << 4); + } + else + { + ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = 0; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf; + } + } + ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data; +} + + + + +/** + ******************************************************************************* + * @brief + * Encodes Residues for the MB as defined in 7.3.5.3 + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @param[in] u1_cbp + * coded block pattern + * + * @param[in] u1_ctx_cat + * Context category, LUMA_AC_CTXCAT or LUMA_4x4_CTXCAT + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_encode_residue(entropy_ctxt_t *ps_ent_ctxt, + UWORD32 u4_cbp, UWORD8 u1_ctx_cat) +{ + /* CABAC context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + + tu_sblk_coeff_data_t *ps_mb_coeff_data; + /* packed residue */ + void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data; + UWORD16 u2_sig_coeff_map; + UWORD8 u1_nnz; + mb_info_ctxt_t *ps_curr_ctxt; + mb_info_ctxt_t *ps_top_ctxt; + UWORD8 u1_left_ac_csbp; + UWORD8 u1_top_ac_csbp; + UWORD32 u4_ctx_idx_offset_sig_coef, u4_ctx_idx_offset_abs_lvl; + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + u1_left_ac_csbp = ps_cabac_ctxt->pu1_left_y_ac_csbp[0]; + u1_top_ac_csbp = ps_top_ctxt->u1_yuv_ac_csbp; + + if (u4_cbp & 0xf) + { + /* Write luma residue */ + UWORD8 u1_offset; + WORD16 *pi2_res_block; + UWORD8 u1_subblk_num; + if (u1_ctx_cat == LUMA_AC_CTXCAT) + { + u1_offset = 1; + u4_ctx_idx_offset_sig_coef = SIG_COEFF_CTXT_CAT_1_OFFSET; + u4_ctx_idx_offset_abs_lvl = COEFF_ABS_LEVEL_MINUS1 + + COEFF_ABS_LEVEL_CAT_1_OFFSET; + } + else + { + u1_offset = 0; + u4_ctx_idx_offset_sig_coef = SIG_COEFF_CTXT_CAT_2_OFFSET; + u4_ctx_idx_offset_abs_lvl = COEFF_ABS_LEVEL_MINUS1 + + COEFF_ABS_LEVEL_CAT_2_OFFSET; + } + + for (u1_subblk_num = 0; u1_subblk_num < 16; u1_subblk_num++) + { + UWORD8 u1_b0, u1_b1, u1_b2, u1_b3, u1_b2b0, u1_b3b1, u1_b3b2; + u1_b0 = (u1_subblk_num & 0x1); + u1_b1 = (u1_subblk_num & 0x2) >> 1; + u1_b2 = (u1_subblk_num & 0x4) >> 2; + u1_b3 = (u1_subblk_num & 0x8) >> 3; + u1_b2b0 = (u1_b2 << 1) | (u1_b0); + u1_b3b1 = (u1_b3 << 1) | (u1_b1); + u1_b3b2 = (u1_b3 << 1) | (u1_b2); + + if (!((u4_cbp >> u1_b3b2) & 0x1)) + { + /* ---------------------------------------------------------- */ + /* The current block is not coded so skip all the sub block */ + /* and set the pointer of scan level, csbp accrodingly */ + /* ---------------------------------------------------------- */ + CLEARBIT(u1_top_ac_csbp, u1_b2b0); + CLEARBIT(u1_top_ac_csbp, (u1_b2b0 + 1)); + CLEARBIT(u1_left_ac_csbp, u1_b3b1); + CLEARBIT(u1_left_ac_csbp, (u1_b3b1 + 1)); + + u1_subblk_num += 3; + } + else + { + PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, + u1_nnz, u2_sig_coeff_map, + pi2_res_block); + + UWORD8 u1_csbf = !!(u1_nnz); + { + UWORD8 u1_a, u1_b; + UWORD32 u4_ctx_inc; + u1_b = (u1_top_ac_csbp >> u1_b2b0) & 0x01; + u1_a = (u1_left_ac_csbp >> u1_b3b1) & 0x01; + u4_ctx_inc = u1_a + (u1_b << 1); + + /* Encode the bin */ + ih264e_cabac_encode_bin(ps_cabac_ctxt, + u1_csbf, + ps_cabac_ctxt->au1_cabac_ctxt_table + CBF + + (u1_ctx_cat << 2) + u4_ctx_inc); + + } + /**************************/ + /* Write coded_block_flag */ + /**************************/ + if (u1_csbf) + { + ih264e_cabac_write_coeff4x4(pi2_res_block, + u1_nnz, + (UWORD8) (15 - u1_offset), + u2_sig_coeff_map, + u4_ctx_idx_offset_abs_lvl, + ps_cabac_ctxt->au1_cabac_ctxt_table + + SIGNIFICANT_COEFF_FLAG_FRAME + + u4_ctx_idx_offset_sig_coef, + ps_cabac_ctxt); + + SETBIT(u1_top_ac_csbp, u1_b2b0); + SETBIT(u1_left_ac_csbp, u1_b3b1); + } + else + { + CLEARBIT(u1_top_ac_csbp, u1_b2b0); + CLEARBIT(u1_left_ac_csbp, u1_b3b1); + } + } + } + /**************************************************************************/ + /* Update the AC csbp */ + /**************************************************************************/ + ps_cabac_ctxt->pu1_left_y_ac_csbp[0] = u1_left_ac_csbp & 0xf; + u1_top_ac_csbp &= 0x0f; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf0; + ps_curr_ctxt->u1_yuv_ac_csbp |= u1_top_ac_csbp; + } + else + { + ps_cabac_ctxt->pu1_left_y_ac_csbp[0] = 0; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf0; + } + + /* Write chroma residue */ + + ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data; + { + UWORD8 u1_cbp_chroma; + u1_cbp_chroma = u4_cbp >> 4; + if (u1_cbp_chroma) + { + ih264e_cabac_write_chroma_residue(ps_ent_ctxt, u1_cbp_chroma); + } + else + { + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x1; + ps_curr_ctxt->u1_yuv_dc_csbp &= 0x1; + ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = 0; + ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf; + } + } +} + +/** + ******************************************************************************* + * @brief + * Encodes a Motion vector (9.3.3.1.1.7 ) + * + * @param[in] u1_mvd + * Motion vector to be encoded + * + * @param[in] u4_ctx_idx_offset + * * ctxIdxOffset for MV_X or MV_Ycontext + * + * @param[in] ui2_abs_mvd + * sum of absolute value of corresponding neighboring motion vectors + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_enc_ctx_mvd(WORD16 u1_mvd, UWORD32 u4_ctx_idx_offset, + UWORD16 ui2_abs_mvd, + cabac_ctxt_t *ps_cabac_ctxt) +{ + + UWORD8 u1_bin, u1_ctxt_inc; + WORD8 k = 3, u1_coff = 9; + WORD16 i2_abs_mvd, i2_sufs; + UWORD32 u4_ctx_inc; + UWORD32 u4_bins; + WORD8 i1_bins_len; + + /* if mvd < u1_coff + only Prefix + else + Prefix + Suffix + + encode sign bit + + Prefix TU encoding Cmax =u1_coff and Suffix 3rd order Exp-Golomb + */ + + if (ui2_abs_mvd < 3) + u4_ctx_inc = 0; + else if (ui2_abs_mvd > 32) + u4_ctx_inc = 2; + else + u4_ctx_inc = 1; + + u4_bins = 0; + i1_bins_len = 1; + + if (u1_mvd == 0) + { + ih264e_cabac_encode_bin(ps_cabac_ctxt, + 0, + ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_idx_offset + + u4_ctx_inc); + } + else + { + i2_abs_mvd = ABS(u1_mvd); + if (i2_abs_mvd >= u1_coff) + { + /* Prefix TU i.e string of 9 1's */ + u4_bins = 0x1ff; + i1_bins_len = 9; + u4_ctx_inc = (u4_ctx_inc | 0x065430); + + ih264e_encode_decision_bins(u4_bins, + i1_bins_len, + u4_ctx_inc, + 4, + ps_cabac_ctxt->au1_cabac_ctxt_table + + u4_ctx_idx_offset, + ps_cabac_ctxt); + + /* Suffix, uses EncodeBypass */ + u4_bins = 0; + i1_bins_len = 0; + i2_sufs = i2_abs_mvd - u1_coff; + while (1) + { + if (i2_sufs >= (1 << k)) + { + u4_bins = (u4_bins | (1 << i1_bins_len)); + i1_bins_len++; + i2_sufs = i2_sufs - (1 << k); + k++; + } + else + { + i1_bins_len++; + while (k--) + { + u1_bin = ((i2_sufs >> k) & 0x01); + u4_bins = (u4_bins | (u1_bin << i1_bins_len)); + i1_bins_len++; + } + break; + } + } + ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, u4_bins, + i1_bins_len); + } + else + { + /* Prefix only */ + /* b0 */ + u4_bins = 1; + i2_abs_mvd--; + u1_ctxt_inc = 3; + while (i2_abs_mvd) + { + i2_abs_mvd--; + u4_bins = (u4_bins | (1 << i1_bins_len)); + if (u1_ctxt_inc <= 6) + { + u4_ctx_inc = (u4_ctx_inc + | (u1_ctxt_inc << (i1_bins_len << 2))); + u1_ctxt_inc++; + } + i1_bins_len++; + } + /* Encode Terminating bit */ + if (i1_bins_len <= 4) + u4_ctx_inc = (u4_ctx_inc | (u1_ctxt_inc << (i1_bins_len << 2))); + i1_bins_len++; + ih264e_encode_decision_bins(u4_bins, + i1_bins_len, + u4_ctx_inc, + 4, + ps_cabac_ctxt->au1_cabac_ctxt_table + + u4_ctx_idx_offset, + ps_cabac_ctxt); + } + /* sign bit, uses EncodeBypass */ + if (u1_mvd > 0) + ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, 0, 1); + else + ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, 1, 1); + } +} + +/** + ******************************************************************************* + * @brief + * Encodes all motion vectors for a P16x16 MB + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @param[in] pi2_mv_ptr + * Pointer to array of motion vectors + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_enc_mvds_p16x16(cabac_ctxt_t *ps_cabac_ctxt, + WORD16 *pi2_mv_ptr) +{ + + + /* Encode the differential component of the motion vectors */ + + { + UWORD8 u1_abs_mvd_x, u1_abs_mvd_y; + UWORD8 *pu1_top_mv_ctxt, *pu1_lft_mv_ctxt; + WORD16 u2_mv; + u1_abs_mvd_x = 0; + u1_abs_mvd_y = 0; + pu1_top_mv_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv[0]; + pu1_lft_mv_ctxt = ps_cabac_ctxt->pu1_left_mv_ctxt_inc[0]; + { + UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a, + u2_abs_mvd_y_b; + u2_abs_mvd_x_b = (UWORD16) pu1_top_mv_ctxt[0]; + u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[1]; + u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[0]; + u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[1]; + u2_mv = *(pi2_mv_ptr++); + + ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_X, + (UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b), + ps_cabac_ctxt); + + u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv)); + u2_mv = *(pi2_mv_ptr++); + + ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_Y, + (UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b), + ps_cabac_ctxt); + + u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv)); + } + /***************************************************************/ + /* Store abs_mvd_values cabac contexts */ + /***************************************************************/ + pu1_top_mv_ctxt[0] = pu1_lft_mv_ctxt[0] = u1_abs_mvd_x; + pu1_top_mv_ctxt[1] = pu1_lft_mv_ctxt[1] = u1_abs_mvd_y; + } +} + + +/** + ******************************************************************************* + * @brief + * Encodes all motion vectors for a B MB (Assues that mbype is B_L0_16x16, B_L1_16x16 or B_Bi_16x16 + * + * @param[in] ps_cabac_ctxt + * Pointer to cabac context structure + * + * @param[in] pi2_mv_ptr + * Pointer to array of motion vectors + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +static void ih264e_cabac_enc_mvds_b16x16(cabac_ctxt_t *ps_cabac_ctxt, + WORD16 *pi2_mv_ptr, + WORD32 i4_mb_part_pred_mode ) +{ + + /* Encode the differential component of the motion vectors */ + + { + UWORD8 u1_abs_mvd_x, u1_abs_mvd_y; + UWORD8 *pu1_top_mv_ctxt, *pu1_lft_mv_ctxt; + WORD16 u2_mv; + u1_abs_mvd_x = 0; + u1_abs_mvd_y = 0; + pu1_top_mv_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv[0]; + pu1_lft_mv_ctxt = ps_cabac_ctxt->pu1_left_mv_ctxt_inc[0]; + if (i4_mb_part_pred_mode != PRED_L1)/* || PRED_BI */ + { + UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a, + u2_abs_mvd_y_b; + u2_abs_mvd_x_b = (UWORD16) pu1_top_mv_ctxt[0]; + u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[1]; + u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[0]; + u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[1]; + u2_mv = *(pi2_mv_ptr++); + + ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_X, + (UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b), + ps_cabac_ctxt); + + u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv)); + u2_mv = *(pi2_mv_ptr++); + + ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_Y, + (UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b), + ps_cabac_ctxt); + + u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv)); + } + /***************************************************************/ + /* Store abs_mvd_values cabac contexts */ + /***************************************************************/ + pu1_top_mv_ctxt[0] = pu1_lft_mv_ctxt[0] = u1_abs_mvd_x; + pu1_top_mv_ctxt[1] = pu1_lft_mv_ctxt[1] = u1_abs_mvd_y; + + u1_abs_mvd_x = 0; + u1_abs_mvd_y = 0; + if (i4_mb_part_pred_mode != PRED_L0)/* || PRED_BI */ + { + UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a, + u2_abs_mvd_y_b; + u2_abs_mvd_x_b = (UWORD16) pu1_top_mv_ctxt[2]; + u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[3]; + u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[2]; + u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[3]; + u2_mv = *(pi2_mv_ptr++); + + ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_X, + (UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b), + ps_cabac_ctxt); + + u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv)); + u2_mv = *(pi2_mv_ptr++); + + ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_Y, + (UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b), + ps_cabac_ctxt); + + u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv)); + } + /***************************************************************/ + /* Store abs_mvd_values cabac contexts */ + /***************************************************************/ + pu1_top_mv_ctxt[2] = pu1_lft_mv_ctxt[2] = u1_abs_mvd_x; + pu1_top_mv_ctxt[3] = pu1_lft_mv_ctxt[3] = u1_abs_mvd_y; + } +} + + + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for an Intra Slice. + * + * @description + * The mb syntax layer for intra slices constitutes luma mb mode, mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification. + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T ih264e_write_islice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt) +{ + /* bit stream ptr */ + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + /* CABAC context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + /* packed header data */ + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + mb_info_ctxt_t *ps_curr_ctxt; + WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode; + WORD8 mb_qp_delta; + UWORD32 u4_cbp_l, u4_cbp_c; + WORD32 byte_count = 0; + WORD32 bitstream_start_offset, bitstream_end_offset; + + if ((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB) + >= ps_bitstream->u4_max_strm_size) + { + /* return without corrupting the buffer beyond its size */ + return (IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + /* mb header info */ + mb_tpm = *pu1_byte++; + byte_count++; + cbp = *pu1_byte++; + byte_count++; + mb_qp_delta = *pu1_byte++; + byte_count++; + /* mb type */ + mb_type = mb_tpm & 0xF; + + ih264e_get_cabac_context(ps_ent_ctxt, mb_type); + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = GET_NUM_BITS(ps_bitstream); + u4_cbp_c = (cbp >> 4); + u4_cbp_l = (cbp & 0xF); + if (mb_type == I16x16) + { + luma_intra_mode = ((mb_tpm >> 4) & 3) + 1 + (u4_cbp_c << 2) + + (u4_cbp_l == 15) * 12; + } + else + { + luma_intra_mode = 0; + } + + chroma_intra_mode = (mb_tpm >> 6); + + /* Encode Intra pred mode, Luma */ + ih264e_cabac_enc_intra_mb_type(ISLICE, luma_intra_mode, ps_cabac_ctxt, + MB_TYPE_I_SLICE); + + if (mb_type == I4x4) + { /* Encode 4x4 MB modes */ + ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, pu1_byte); + byte_count += 8; + } + /* Encode chroma mode */ + ih264e_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt); + + if (mb_type != I16x16) + { /* Encode MB cbp */ + ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt); + } + + if ((cbp > 0) || (mb_type == I16x16)) + { + /* Encode mb_qp_delta */ + ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt); + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset + - bitstream_start_offset; + /* Starting bitstream offset for residue */ + bitstream_start_offset = bitstream_end_offset; + if (mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + ps_curr_ctxt->u1_cbp = cbp; + ih264e_cabac_encode_residue_luma_dc(ps_ent_ctxt); + ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_AC_CTXCAT); + } + else + { + ps_curr_ctxt->u1_cbp = cbp; + ps_curr_ctxt->u1_mb_type = I4x4; + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT); + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + } + /* Ending bitstream offset for reside in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset + - bitstream_start_offset; + } + else + { + ps_curr_ctxt->u1_yuv_ac_csbp = 0; + ps_curr_ctxt->u1_yuv_dc_csbp = 0; + *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0; + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset + - bitstream_start_offset; + + /* Computing the number of used used for encoding the MB syntax */ + } + memset(ps_curr_ctxt->u1_mv, 0, 16); + memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16); + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_cbp = cbp; + ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count; + if (mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + + } + else + { + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + + } + return IH264E_SUCCESS; +} + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for Inter slices + * + * @description + * The mb syntax layer for inter slices constitutes luma mb mode, mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt) +{ + /* bit stream ptr */ + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + /* CABAC context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + + mb_info_ctxt_t *ps_curr_ctxt; + + WORD32 bitstream_start_offset, bitstream_end_offset; + WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode; + WORD8 mb_qp_delta; + UWORD32 u4_cbp_l, u4_cbp_c; + WORD32 byte_count = 0; + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + + if ((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB) + >= ps_bitstream->u4_max_strm_size) + { + /* return without corrupting the buffer beyond its size */ + return (IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + /* mb header info */ + mb_tpm = *pu1_byte++; + byte_count++; + + /* mb type */ + mb_type = mb_tpm & 0xF; + /* CABAC contexts for the MB */ + ih264e_get_cabac_context(ps_ent_ctxt, mb_type); + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + /* if Intra MB */ + if (mb_type == I16x16 || mb_type == I4x4) + { + cbp = *pu1_byte++; + byte_count++; + mb_qp_delta = *pu1_byte++; + byte_count++; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = GET_NUM_BITS(ps_bitstream); + + /* Encode mb_skip_flag */ + ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_P_SLICE); + u4_cbp_c = (cbp >> 4); + u4_cbp_l = (cbp & 0xF); + if (mb_type == I16x16) + { + luma_intra_mode = ((mb_tpm >> 4) & 3) + 1 + (u4_cbp_c << 2) + + (u4_cbp_l == 15) * 12; + } + else + { + luma_intra_mode = 0; + } + /* Encode intra mb type */ + { + ih264e_cabac_encode_bin(ps_cabac_ctxt, + 1, + ps_cabac_ctxt->au1_cabac_ctxt_table + + MB_TYPE_P_SLICE); + + ih264e_cabac_enc_intra_mb_type(PSLICE, (UWORD8) luma_intra_mode, + ps_cabac_ctxt, MB_TYPE_P_SLICE); + } + + if (mb_type == I4x4) + { /* Intra 4x4 modes */ + ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, pu1_byte); + byte_count += 8; + } + chroma_intra_mode = (mb_tpm >> 6); + + ih264e_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt); + + if (mb_type != I16x16) + { + /* encode CBP */ + ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt); + } + + if ((cbp > 0) || (mb_type == I16x16)) + { + ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt); + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset + - bitstream_start_offset; + /* Starting bitstream offset for residue */ + bitstream_start_offset = bitstream_end_offset; + + /* Encoding Residue */ + if (mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + ps_curr_ctxt->u1_cbp = (UWORD8) cbp; + ih264e_cabac_encode_residue_luma_dc(ps_ent_ctxt); + ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_AC_CTXCAT); + } + else + { + ps_curr_ctxt->u1_cbp = (UWORD8) cbp; + ps_curr_ctxt->u1_mb_type = I4x4; + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT); + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + } + + /* Ending bitstream offset for reside in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset + - bitstream_start_offset; + } + else + { + ps_curr_ctxt->u1_yuv_ac_csbp = 0; + ps_curr_ctxt->u1_yuv_dc_csbp = 0; + *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0; + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset + - bitstream_start_offset; + } + + memset(ps_curr_ctxt->u1_mv, 0, 16); + memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16); + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_cbp = (UWORD8) cbp; + + if (mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + } + else + { + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + } + + ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count; + + return IH264E_SUCCESS; + } + else /* Inter MB */ + { + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = GET_NUM_BITS(ps_bitstream); + /* Encoding P16x16 */ + if (mb_type != PSKIP) + { + cbp = *pu1_byte++; + byte_count++; + mb_qp_delta = *pu1_byte++; + byte_count++; + + /* Encoding mb_skip */ + ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_P_SLICE); + + /* Encoding mb_type as P16x16 */ + { + UWORD32 u4_ctx_inc_p; + u4_ctx_inc_p = (0x010 + ((2) << 8)); + + ih264e_encode_decision_bins(0, 3, u4_ctx_inc_p, 3, + &(ps_cabac_ctxt->au1_cabac_ctxt_table[MB_TYPE_P_SLICE]), + ps_cabac_ctxt); + } + ps_curr_ctxt->u1_mb_type = CAB_P; + { + WORD16 *pi2_mv_ptr = (WORD16 *) pu1_byte; + byte_count += 4; + ps_curr_ctxt->u1_mb_type = (ps_curr_ctxt->u1_mb_type + | CAB_NON_BD16x16); + /* Encoding motion vector for P16x16 */ + ih264e_cabac_enc_mvds_p16x16(ps_cabac_ctxt, pi2_mv_ptr); + } + /* Encode CBP */ + ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt); + + if (cbp) + { + /* encode mb_qp_delta */ + ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt); + } + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset + - bitstream_start_offset; + /* Starting bitstream offset for residue */ + bitstream_start_offset = bitstream_end_offset; + + } + else/* MB = PSKIP */ + { + ih264e_cabac_enc_mb_skip(1, ps_cabac_ctxt, MB_SKIP_FLAG_P_SLICE); + + ps_curr_ctxt->u1_mb_type = CAB_P_SKIP; + (*ps_ent_ctxt->pi4_mb_skip_run)++; + + memset(ps_curr_ctxt->u1_mv, 0, 16); + memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16); + cbp = 0; + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset + - bitstream_start_offset; + /* Starting bitstream offset for residue */ + + } + + if (cbp > 0) + { + /* Encode residue */ + ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT); + /* Ending bitstream offset for reside in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[1] += bitstream_end_offset + - bitstream_start_offset; + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_curr_ctxt->u1_yuv_dc_csbp &= 0x6; + } + else + { + ps_curr_ctxt->u1_yuv_ac_csbp = 0; + ps_curr_ctxt->u1_yuv_dc_csbp = 0; + *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0; + } + ps_curr_ctxt->u1_intrapred_chroma_mode = 0; + ps_curr_ctxt->u1_cbp = cbp; + ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count; + return IH264E_SUCCESS; + } +} + + +/* ! < Table 9-37 – Binarization for macroblock types in B slices in ITU_T_H264-201402 + * Bits 0-7 : binarised value + * Bits 8-15: length of binary sequence */ + + +static const UWORD32 u4_b_mb_type[27] = { 0x0100, 0x0301, 0x0305, 0x0603, + 0x0623, 0x0613, 0x0633, 0x060b, + 0x062b, 0x061b, 0x063b, 0x061f, + 0x0707, 0x0747, 0x0727, 0x0767, + 0x0717, 0x0757, 0x0737, 0x0777, + 0x070f, 0x074f, 0x063f }; +/* CtxInc for mb types in B slices */ +static const UWORD32 ui_b_mb_type_ctx_inc[27] = { 0x00, 0x0530, 0x0530, + 0x0555430, 0x0555430, + 0x0555430, 0x0555430, + 0x0555430, 0x0555430, + 0x0555430, 0x0555430, + 0x0555430, 0x05555430, + 0x05555430, 0x05555430, + 0x05555430, 0x05555430, + 0x05555430, 0x05555430, + 0x05555430, 0x05555430, + 0x05555430, 0x0555430 }; + +/** + ******************************************************************************* + * + * @brief + * This function generates CABAC coded bit stream for B slices + * + * @description + * The mb syntax layer for inter slices constitutes luma mb mode, + * mb qp delta, coded block pattern, chroma mb mode and + * luma/chroma residue. These syntax elements are written as directed by table + * 7.3.5 of h264 specification + * + * @param[in] ps_ent_ctxt + * pointer to entropy context + * + * @returns error code + * + * @remarks none + * + ******************************************************************************* + */ +IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt) +{ + /* bit stream ptr */ + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + /* CABAC context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + + mb_info_ctxt_t *ps_curr_ctxt; + + WORD32 bitstream_start_offset, bitstream_end_offset; + WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode; + WORD8 mb_qp_delta; + UWORD32 u4_cbp_l, u4_cbp_c; + WORD32 byte_count = 0; + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + + if ((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB) + >= ps_bitstream->u4_max_strm_size) + { + /* return without corrupting the buffer beyond its size */ + return (IH264E_BITSTREAM_BUFFER_OVERFLOW); + } + /* mb header info */ + mb_tpm = *pu1_byte++; + byte_count++; + + /* mb type */ + mb_type = mb_tpm & 0xF; + /* CABAC contexts for the MB */ + ih264e_get_cabac_context(ps_ent_ctxt, mb_type); + ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info; + + /* if Intra MB */ + if (mb_type == I16x16 || mb_type == I4x4) + { + cbp = *pu1_byte++; + byte_count++; + mb_qp_delta = *pu1_byte++; + byte_count++; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = GET_NUM_BITS(ps_bitstream); + + /* Encode mb_skip_flag */ + ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE); + u4_cbp_c = (cbp >> 4); + u4_cbp_l = (cbp & 0xF); + if (mb_type == I16x16) + { + luma_intra_mode = ((mb_tpm >> 4) & 3) + 1 + (u4_cbp_c << 2) + + (u4_cbp_l == 15) * 12; + } + else + { + luma_intra_mode = 0; + } + /* Encode intra mb type */ + { + mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info; + mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + UWORD32 u4_ctx_inc = 0; + + if (ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u4_ctx_inc += ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) + != CAB_BD16x16) ? 1 : 0; + if (ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u4_ctx_inc += ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) + != CAB_BD16x16) ? 1 : 0; + + /* Intra Prefix Only "111101" */ + u4_ctx_inc = (u4_ctx_inc | 0x05555430); + ih264e_encode_decision_bins(0x2f, + 6, + u4_ctx_inc, + 3, + ps_cabac_ctxt->au1_cabac_ctxt_table + + MB_TYPE_B_SLICE, + ps_cabac_ctxt); + + ih264e_cabac_enc_intra_mb_type(BSLICE, (UWORD8) luma_intra_mode, + ps_cabac_ctxt, MB_TYPE_B_SLICE); + + } + + if (mb_type == I4x4) + { /* Intra 4x4 modes */ + ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, pu1_byte); + byte_count += 8; + } + chroma_intra_mode = (mb_tpm >> 6); + + ih264e_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt); + + if (mb_type != I16x16) + { + /* encode CBP */ + ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt); + } + + if ((cbp > 0) || (mb_type == I16x16)) + { + ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt); + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset + - bitstream_start_offset; + /* Starting bitstream offset for residue */ + bitstream_start_offset = bitstream_end_offset; + + /* Encoding Residue */ + if (mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + ps_curr_ctxt->u1_cbp = (UWORD8) cbp; + ih264e_cabac_encode_residue_luma_dc(ps_ent_ctxt); + ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_AC_CTXCAT); + } + else + { + ps_curr_ctxt->u1_cbp = (UWORD8) cbp; + ps_curr_ctxt->u1_mb_type = I4x4; + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT); + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6; + } + + /* Ending bitstream offset for reside in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset + - bitstream_start_offset; + } + else + { + ps_curr_ctxt->u1_yuv_ac_csbp = 0; + ps_curr_ctxt->u1_yuv_dc_csbp = 0; + *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0; + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset + - bitstream_start_offset; + } + + memset(ps_curr_ctxt->u1_mv, 0, 16); + memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16); + ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_cbp = (UWORD8) cbp; + + if (mb_type == I16x16) + { + ps_curr_ctxt->u1_mb_type = CAB_I16x16; + } + else + { + ps_curr_ctxt->u1_mb_type = CAB_I4x4; + } + + ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count; + + return IH264E_SUCCESS; + } + + else /* Inter MB */ + { + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = GET_NUM_BITS(ps_bitstream); + /* Encoding B_Direct_16x16 */ + if (mb_type == BDIRECT) + { + cbp = *pu1_byte++; + byte_count++; + mb_qp_delta = *pu1_byte++; + byte_count++; + + /* Encoding mb_skip */ + ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE); + + /* Encoding mb_type as B_Direct_16x16 */ + { + + mb_info_ctxt_t *ps_left_ctxt = + ps_cabac_ctxt->ps_left_ctxt_mb_info; + mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + UWORD32 u4_ctx_inc = 0; + + if (ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u4_ctx_inc += ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) + != CAB_BD16x16) ? 1 : 0; + if (ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u4_ctx_inc += ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) + != CAB_BD16x16) ? 1 : 0; + /* Encode the bin */ + ih264e_cabac_encode_bin( + ps_cabac_ctxt, + 0, + ps_cabac_ctxt->au1_cabac_ctxt_table + + MB_TYPE_B_SLICE + u4_ctx_inc); + + } + ps_curr_ctxt->u1_mb_type = CAB_BD16x16; + memset(ps_curr_ctxt->u1_mv, 0, 16); + memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16); + + /* Encode CBP */ + ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt); + + if (cbp) + { + /* encode mb_qp_delta */ + ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt); + } + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset + - bitstream_start_offset; + /* Starting bitstream offset for residue */ + bitstream_start_offset = bitstream_end_offset; + /* Starting bitstream offset for residue */ + + } + + else if (mb_type == BSKIP)/* MB = BSKIP */ + { + ih264e_cabac_enc_mb_skip(1, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE); + + ps_curr_ctxt->u1_mb_type = CAB_B_SKIP; + + memset(ps_curr_ctxt->u1_mv, 0, 16); + memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16); + cbp = 0; + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset + - bitstream_start_offset; + /* Starting bitstream offset for residue */ + + } + + else /* mbype is B_L0_16x16, B_L1_16x16 or B_Bi_16x16 */ + { + WORD32 i4_mb_part_pred_mode = (mb_tpm >> 4); + UWORD32 u4_mb_type = mb_type - B16x16 + B_L0_16x16 + + i4_mb_part_pred_mode; + cbp = *pu1_byte++; + byte_count++; + mb_qp_delta = *pu1_byte++; + byte_count++; + + /* Encoding mb_skip */ + ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE); + + /* Encoding mb_type as B16x16 */ + { + mb_info_ctxt_t *ps_left_ctxt = + ps_cabac_ctxt->ps_left_ctxt_mb_info; + mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info; + UWORD32 u4_ctx_inc = 0; + + UWORD32 u4_mb_type_bins = u4_b_mb_type[u4_mb_type]; + UWORD32 u4_bin_len = (u4_mb_type_bins >> 8) & 0x0F; + u4_mb_type_bins = u4_mb_type_bins & 0xFF; + + if (ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u4_ctx_inc += ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) + != CAB_BD16x16) ? 1 : 0; + if (ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info) + u4_ctx_inc += ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) + != CAB_BD16x16) ? 1 : 0; + + u4_ctx_inc = u4_ctx_inc | ui_b_mb_type_ctx_inc[u4_mb_type]; + + ih264e_encode_decision_bins(u4_mb_type_bins, + u4_bin_len, + u4_ctx_inc, + u4_bin_len, + &(ps_cabac_ctxt->au1_cabac_ctxt_table[MB_TYPE_B_SLICE]), + ps_cabac_ctxt); + } + + ps_curr_ctxt->u1_mb_type = CAB_NON_BD16x16; + { + WORD16 *pi2_mv_ptr = (WORD16 *) pu1_byte; + /* Get the pred modes */ + + byte_count += 4 * (1 + (i4_mb_part_pred_mode == PRED_BI)); + + ps_curr_ctxt->u1_mb_type = (ps_curr_ctxt->u1_mb_type + | CAB_NON_BD16x16); + /* Encoding motion vector for B16x16 */ + ih264e_cabac_enc_mvds_b16x16(ps_cabac_ctxt, pi2_mv_ptr, + i4_mb_part_pred_mode); + } + /* Encode CBP */ + ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt); + + if (cbp) + { + /* encode mb_qp_delta */ + ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt); + } + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset + - bitstream_start_offset; + /* Starting bitstream offset for residue */ + bitstream_start_offset = bitstream_end_offset; + } + + if (cbp > 0) + { + /* Encode residue */ + ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT); + /* Ending bitstream offset for reside in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + ps_ent_ctxt->u4_residue_bits[1] += bitstream_end_offset + - bitstream_start_offset; + + ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6; + ps_curr_ctxt->u1_yuv_dc_csbp &= 0x6; + } + else + { + ps_curr_ctxt->u1_yuv_ac_csbp = 0; + ps_curr_ctxt->u1_yuv_dc_csbp = 0; + *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0; + *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0; + } + ps_curr_ctxt->u1_intrapred_chroma_mode = 0; + ps_curr_ctxt->u1_cbp = cbp; + ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count; + return IH264E_SUCCESS; + } +} diff --git a/encoder/ih264e_cabac_init.c b/encoder/ih264e_cabac_init.c new file mode 100644 index 0000000..347842c --- /dev/null +++ b/encoder/ih264e_cabac_init.c @@ -0,0 +1,226 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** +******************************************************************************* +* @file +* ih264e_cabac_init.c +* +* @brief +* Contains all initialization functions for cabac contexts +* +* @author +* Doney Alex +* +* @par List of Functions: +* +* +* @remarks +* None +* +******************************************************************************* +*/ + +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ + +/* System include files */ +#include +#include +#include +#include +#include +#include + +/* User include files */ +#include "ih264_typedefs.h" +#include "iv2.h" +#include "ive2.h" +#include "ih264_defs.h" +#include "ih264_debug.h" +#include "ime_distortion_metrics.h" +#include "ime_defs.h" +#include "ime_structs.h" +#include "ih264_error.h" +#include "ih264_structs.h" +#include "ih264_trans_quant_itrans_iquant.h" +#include "ih264_inter_pred_filters.h" +#include "ih264_mem_fns.h" +#include "ih264_padding.h" +#include "ih264_intra_pred_filters.h" +#include "ih264_deblk_edge_filters.h" +#include "ih264_platform_macros.h" +#include "ih264_macros.h" +#include "ih264_buf_mgr.h" +#include "ih264e_error.h" +#include "ih264e_bitstream.h" +#include "ih264_common_tables.h" +#include "ih264_cabac_tables.h" +#include "ih264_list.h" +#include "ih264e_defs.h" +#include "irc_cntrl_param.h" +#include "irc_frame_info_collector.h" +#include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" +#include "ih264e_structs.h" +#include "ih264e_cabac.h" +#include "ih264e_process.h" +#include "ithread.h" +#include "ih264e_intra_modes_eval.h" +#include "ih264e_encode_header.h" +#include "ih264e_globals.h" +#include "ih264e_config.h" +#include "ih264e_trace.h" +#include "ih264e_statistics.h" +#include "ih264_cavlc_tables.h" +#include "ih264e_deblk.h" +#include "ih264e_me.h" +#include "ih264e_debug.h" +#include "ih264e_master.h" +#include "ih264e_utils.h" +#include "irc_mem_req_and_acq.h" +#include "irc_rate_control_api.h" +#include "ih264e_platform_macros.h" +#include "ime_statistics.h" + + + +/*****************************************************************************/ +/* Function definitions . */ +/*****************************************************************************/ + +/** + ******************************************************************************* + * + * @brief + * Initialize cabac encoding environment + * + * @param[in] ps_cab_enc_env + * Pointer to encoding_envirnoment_t structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* +*/ +static void ih264e_init_cabac_enc_envirnoment(encoding_envirnoment_t *ps_cab_enc_env) +{ + ps_cab_enc_env->u4_code_int_low = 0; + ps_cab_enc_env->u4_code_int_range = 0x1fe; + ps_cab_enc_env->u4_out_standing_bytes = 0; + ps_cab_enc_env->u4_bits_gen = 0; +} + + +/** + ******************************************************************************* + * + * @brief + * Initialize default context values and pointers (Called once at the beginning of encoding). + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* +*/ +void ih264e_init_cabac_table(entropy_ctxt_t *ps_ent_ctxt) +{ + /* CABAC context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + ps_cabac_ctxt->ps_mb_map_ctxt_inc = ps_cabac_ctxt->ps_mb_map_ctxt_inc_base + 1; + ps_cabac_ctxt->ps_lft_csbp = &ps_cabac_ctxt->s_lft_csbp; + ps_cabac_ctxt->ps_bitstrm = ps_ent_ctxt->ps_bitstrm; + + { + /* 0th entry of mb_map_ctxt_inc will be always be containing default values */ + /* for CABAC context representing MB not available */ + mb_info_ctxt_t *ps_def_ctxt = ps_cabac_ctxt->ps_mb_map_ctxt_inc - 1; + UWORD32 *pu4_temp; + WORD8 i; + + ps_def_ctxt->u1_mb_type = CAB_SKIP; + ps_def_ctxt->u1_cbp = 0x0f; + ps_def_ctxt->u1_intrapred_chroma_mode = 0; + pu4_temp = (UWORD32 *)ps_def_ctxt->i1_ref_idx; + pu4_temp[0] = 0; + pu4_temp = (UWORD32 *)ps_def_ctxt->u1_mv; + for (i = 0; i < 4; i++, pu4_temp++) + (*pu4_temp) = 0; + ps_cabac_ctxt->ps_def_ctxt_mb_info = ps_def_ctxt; + } +} + + +/** + ******************************************************************************* + * + * @brief + * Initialize cabac context: Initialize all contest with init values given in the spec. + * Called at the beginning of entropy coding of each slice for CABAC encoding. + * + * @param[in] ps_ent_ctxt + * Pointer to entropy context structure + * + * @returns + * + * @remarks + * None + * + ******************************************************************************* + */ +void ih264e_init_cabac_ctxt(entropy_ctxt_t *ps_ent_ctxt) +{ + /* CABAC context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac; + + /* slice header */ + slice_header_t *ps_slice_hdr = ps_ent_ctxt->ps_slice_hdr_base; + const UWORD8 u1_slice_type = ps_slice_hdr->u1_slice_type; + WORD8 i1_cabac_init_idc = 0; + bin_ctxt_model *au1_cabac_ctxt_table = ps_cabac_ctxt->au1_cabac_ctxt_table; + UWORD8 u1_qp_y = ps_slice_hdr->i1_slice_qp; + + ih264e_init_cabac_enc_envirnoment(&ps_cabac_ctxt->s_cab_enc_env); + + ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 0; + + if (ISLICE != u1_slice_type) + { + i1_cabac_init_idc = ps_slice_hdr->i1_cabac_init_idc; + } + else + { + i1_cabac_init_idc = 3; + + } + + memcpy(au1_cabac_ctxt_table, + gau1_ih264_cabac_ctxt_init_table[i1_cabac_init_idc][u1_qp_y], + NUM_CABAC_CTXTS * sizeof(bin_ctxt_model)); + +} diff --git a/encoder/ih264e_cabac_structs.h b/encoder/ih264e_cabac_structs.h new file mode 100644 index 0000000..82938ca --- /dev/null +++ b/encoder/ih264e_cabac_structs.h @@ -0,0 +1,221 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ + +/** + ******************************************************************************* + * @file + * ih264e_cabac_structs.h + * + * @brief + * This file contains cabac related structure definitions. + * + * @author + * Doney Alex + * + * @remarks + * none + * + ******************************************************************************* + */ + +#ifndef IH264E_CABAC_STRUCTS_H_ +#define IH264E_CABAC_STRUCTS_H_ + + + +#define CABAC_INIT_IDC 2 + + +/** + ****************************************************************************** + * @brief typedef for context model + ****************************************************************************** + */ + +/* bits 0 to 5 :state + bit 6 :mps */ +typedef UWORD8 bin_ctxt_model; + +/** + ****************************************************************************** + * @brief MB info for cabac + ****************************************************************************** + */ +typedef struct +{ + /* Neighbour availability Variables needed to get CtxtInc, for CABAC */ + UWORD8 u1_mb_type; /* !< macroblock type: I/P/B/SI/SP */ + + UWORD8 u1_cbp; /* !< Coded Block Pattern */ + UWORD8 u1_intrapred_chroma_mode; + + /*************************************************************************/ + /* Arrangnment of AC CSBP */ + /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */ + /* CSBP: V1 V0 U1 U0 Y3 Y2 Y1 Y0 */ + /*************************************************************************/ + UWORD8 u1_yuv_ac_csbp; + /*************************************************************************/ + /* Arrangnment of DC CSBP */ + /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */ + /* CSBP: x x x x x Vdc Udc Ydc */ + /*************************************************************************/ + UWORD8 u1_yuv_dc_csbp; + + WORD8 i1_ref_idx[4]; + UWORD8 u1_mv[4][4]; +} mb_info_ctxt_t; + + +/** + ****************************************************************************** + * @brief CSBP info for CABAC + ****************************************************************************** + */ +typedef struct +{ + /*************************************************************************/ + /* Arrangnment of Luma AC CSBP for leftMb */ + /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */ + /* CSBP: X X X X Y3 Y2 Y1 Y0 */ + /*************************************************************************/ + /*************************************************************************/ + /* Points either to u1_y_ac_csbp_top_mb or u1_y_ac_csbp_bot_mb */ + /*************************************************************************/ + UWORD8 u1_y_ac_csbp_top_mb; + UWORD8 u1_y_ac_csbp_bot_mb; + + /*************************************************************************/ + /* Arrangnment of Chroma AC CSBP for leftMb */ + /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */ + /* CSBP: X X X X V1 V0 U1 U0 */ + /*************************************************************************/ + /*************************************************************************/ + /* Points either to u1_uv_ac_csbp_top_mb or u1_uv_ac_csbp_bot_mb */ + /*************************************************************************/ + UWORD8 u1_uv_ac_csbp_top_mb; + UWORD8 u1_uv_ac_csbp_bot_mb; + + /*************************************************************************/ + /* Arrangnment of DC CSBP */ + /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */ + /* CSBP: x x x x x Vdc Udc Ydc */ + /*************************************************************************/ + /*************************************************************************/ + /* Points either to u1_yuv_dc_csbp_top_mb or u1_yuv_dc_csbp_bot_mb */ + /*************************************************************************/ + UWORD8 u1_yuv_dc_csbp_top_mb; + UWORD8 u1_yuv_dc_csbp_bot_mb; +} cab_csbp_t; + +/** + ****************************************************************************** + * @brief CABAC Encoding Environment + ****************************************************************************** + */ + +typedef struct +{ + /** cabac interval start L */ + UWORD32 u4_code_int_low; + + /** cabac interval range R */ + UWORD32 u4_code_int_range; + + /** bytes_outsanding; number of 0xFF bits that occur during renorm + * These will be accumulated till the carry bit is knwon + */ + UWORD32 u4_out_standing_bytes; + + /** bits generated during renormalization + * A byte is put to stream/u4_out_standing_bytes from u4_low(L) when + * u4_bits_gen exceeds 8 + */ + UWORD32 u4_bits_gen; +} encoding_envirnoment_t; + + +/** + ****************************************************************************** + * @brief CABAC Context structure : Variables to handle Cabac + ****************************************************************************** + */ +typedef struct +{ + + /* Base pointer to all the cabac contexts */ + bin_ctxt_model au1_cabac_ctxt_table[NUM_CABAC_CTXTS]; + + + cab_csbp_t s_lft_csbp; + + /** + * pointer to Bitstream structure + */ + bitstrm_t *ps_bitstrm; + + /* Pointer to mb_info_ctxt_t map_base */ + mb_info_ctxt_t *ps_mb_map_ctxt_inc_base; + + /* Pointer to encoding_envirnoment_t */ + encoding_envirnoment_t s_cab_enc_env; + + /* These things need to be updated at each MbLevel */ + + /* Prev ps_mb_qp_delta_ctxt */ + WORD8 i1_prevps_mb_qp_delta_ctxt; + + /* Pointer to mb_info_ctxt_t map */ + mb_info_ctxt_t *ps_mb_map_ctxt_inc; + + /* Pointer to default mb_info_ctxt_t */ + mb_info_ctxt_t *ps_def_ctxt_mb_info; + + /* Pointer to current mb_info_ctxt_t */ + mb_info_ctxt_t *ps_curr_ctxt_mb_info; + + /* Pointer to left mb_info_ctxt_t */ + mb_info_ctxt_t *ps_left_ctxt_mb_info; + + /* Pointer to top mb_info_ctxt_t */ + mb_info_ctxt_t *ps_top_ctxt_mb_info; + + /* Poniter to left csbp structure */ + cab_csbp_t *ps_lft_csbp; + UWORD8 *pu1_left_y_ac_csbp; + UWORD8 *pu1_left_uv_ac_csbp; + UWORD8 *pu1_left_yuv_dc_csbp; + + /***************************************************************************/ + /* Ref_idx contexts are stored in the following way */ + /* Array Idx 0,1 for reference indices in Forward direction */ + /* Array Idx 2,3 for reference indices in backward direction */ + /***************************************************************************/ + /* Dimensions for u1_left_ref_ctxt_inc_arr is [2][4] for Mbaff:Top and Bot */ + WORD8 i1_left_ref_idx_ctx_inc_arr[2][4]; + WORD8 *pi1_left_ref_idx_ctxt_inc; + + /* Dimensions for u1_left_mv_ctxt_inc_arr is [2][4][4] for Mbaff case */ + UWORD8 u1_left_mv_ctxt_inc_arr[2][4][4]; + UWORD8 (*pu1_left_mv_ctxt_inc)[4]; + +} cabac_ctxt_t; + +#endif /* IH264E_CABAC_STRUCTS_H_ */ diff --git a/encoder/ih264e_cavlc.c b/encoder/ih264e_cavlc.c index 1f98b6a..5d819d9 100644 --- a/encoder/ih264e_cavlc.c +++ b/encoder/ih264e_cavlc.c @@ -35,8 +35,8 @@ * - ih264e_write_coeff4x4_cavlc() * - ih264e_write_coeff8x8_cavlc() * - ih264e_encode_residue() -* - ih264e_write_islice_mb() -* - ih264e_write_pslice_mb() +* - ih264e_write_islice_mb_cavlc() +* - ih264e_write_pslice_mb_cavlc() * * @remarks * None @@ -65,8 +65,8 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -75,9 +75,11 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_encode_header.h" #include "ih264_cavlc_tables.h" @@ -712,8 +714,8 @@ static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt, /* temp var */ UWORD32 u4_nC, u4_ngbr_avlb; UWORD8 au1_nnz[4], *pu1_ngbr_avlb, *pu1_top_nnz, *pu1_left_nnz; - UWORD16 au2_sig_coeff_map[4]; - WORD16 *pi2_res_block[4]; + UWORD16 au2_sig_coeff_map[4] = {0}; + WORD16 *pi2_res_block[4] = {NULL}; UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx; tu_sblk_coeff_data_t *ps_mb_coeff_data; ENTROPY_BLK_TYPE e_entropy_blk_type = CAVLC_LUMA_4x4; @@ -925,7 +927,6 @@ static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt, return error_status; } -#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + 32 - ps_bitstream->i4_bits_left_in_cw) /** ******************************************************************************* @@ -948,7 +949,7 @@ static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt, * ******************************************************************************* */ -IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt) +IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt) { /* error status */ IH264E_ERROR_T error_status = IH264E_SUCCESS; @@ -1170,7 +1171,7 @@ IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt) * ******************************************************************************* */ -IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt) +IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt) { /* error status */ IH264E_ERROR_T error_status = IH264E_SUCCESS; @@ -1406,7 +1407,6 @@ IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt) for (i = 0; i < (WORD32)u4_part_cnt; i++) { PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv x"); - PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv y"); } @@ -1425,6 +1425,323 @@ IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt) PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta"); } + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + /* start bitstream offset for residue in bits */ + bitstream_start_offset = bitstream_end_offset; + + /* residual */ + error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp); + + /* Ending bitstream offset for residue in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + + ps_ent_ctxt->u4_residue_bits[is_inter] += bitstream_end_offset - bitstream_start_offset; + + /* store the index of the next mb syntax layer */ + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + return error_status; +} + + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for B slices +* +* @description +* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt) +{ + /* error status */ + IH264E_ERROR_T error_status = IH264E_SUCCESS; + + /* bit stream ptr */ + bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm; + + /* packed header data */ + UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data; + + /* mb header info */ + /* + * mb_tpm : mb type plus mode + * mb_type : luma mb type and chroma mb type are packed + * cbp : coded block pattern + * mb_qp_delta : mb qp delta + * chroma_intra_mode : chroma intra mode + * luma_intra_mode : luma intra mode + * ps_pu : Pointer to the array of structures having motion vectors, size + * and position of sub partitions + */ + WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode; + WORD8 mb_qp_delta; + + /* temp var */ + WORD32 i, mb_type_stream, cbptable = 1; + + WORD32 is_inter = 0; + + WORD32 bitstream_start_offset, bitstream_end_offset; + + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = GET_NUM_BITS(ps_bitstream); + + /********************************************************************/ + /* BEGIN HEADER GENERATION */ + /********************************************************************/ + + mb_tpm = *pu1_byte++; + + /* mb type */ + mb_type = mb_tpm & 0xF; + + /* check for skip */ + if (mb_type == BSKIP) + { + UWORD32 *nnz; + + is_inter = 1; + + /* increment skip counter */ + (*ps_ent_ctxt->pi4_mb_skip_run)++; + + /* store the index of the next mb syntax layer */ + ps_ent_ctxt->pv_mb_header_data = pu1_byte; + + /* set nnz to zero */ + ps_ent_ctxt->u4_left_nnz_luma = 0; + nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x]; + *nnz = 0; + ps_ent_ctxt->u4_left_nnz_cbcr = 0; + nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x]; + *nnz = 0; + + /* residual */ + error_status = ih264e_encode_residue(ps_ent_ctxt, B16x16, 0); + + bitstream_end_offset = GET_NUM_BITS(ps_bitstream); + + ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset + - bitstream_start_offset; + + return error_status; + } + + + /* remaining mb header info */ + cbp = *pu1_byte++; + mb_qp_delta = *pu1_byte++; + + /* mb skip run */ + PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run"); + + /* reset skip counter */ + *ps_ent_ctxt->pi4_mb_skip_run = 0; + + /* is intra ? */ + if (mb_type == I16x16) + { + UWORD32 u4_cbp_l, u4_cbp_c; + + is_inter = 0; + + u4_cbp_c = (cbp >> 4); + u4_cbp_l = (cbp & 0xF); + luma_intra_mode = (mb_tpm >> 4) & 3; + chroma_intra_mode = (mb_tpm >> 6); + + mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12; + + mb_type_stream += 23; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type"); + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + } + else if (mb_type == I4x4) + { + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + is_inter = 0; + + chroma_intra_mode = (mb_tpm >> 6); + cbptable = 0; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 23, error_status, "mb type"); + + for (i = 0; i < 16; i += 2) + { + /* sub blk idx 1 */ + byte = *pu1_byte++; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + } + else if (mb_type == I8x8) + { + /* transform 8x8 flag */ + UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag; + + /* mb sub blk modes */ + WORD32 intra_pred_mode_flag, rem_intra_mode; + WORD32 byte; + + is_inter = 0; + + chroma_intra_mode = (mb_tpm >> 6); + cbptable = 0; + + ASSERT(0); + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, 23, error_status, "mb type"); + + /* u4_transform_size_8x8_flag */ + PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag"); + + /* write sub block modes */ + for (i = 0; i < 4; i++) + { + /* sub blk idx 1 */ + byte = *pu1_byte++; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + + /* sub blk idx 2 */ + byte >>= 4; + + intra_pred_mode_flag = byte & 0x1; + + /* prev_intra4x4_pred_mode_flag */ + PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag"); + + /* rem_intra4x4_pred_mode */ + if (!intra_pred_mode_flag) + { + rem_intra_mode = (byte & 0xF) >> 1; + PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode"); + } + } + + /* intra_chroma_pred_mode */ + PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode"); + } + else if(mb_type == BDIRECT) + { + is_inter = 1; + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, B_DIRECT_16x16, error_status, "mb type"); + } + else /* if mb_type == B16x16 */ + { + /* inter macro block partition cnt for 16x16 16x8 8x16 8x8 */ + const UWORD8 au1_part_cnt[] = { 1, 2, 2, 4 }; + + /* mv ptr */ + WORD16 *pi2_mvd_ptr = (WORD16 *)pu1_byte; + + /* number of partitions for the current mb */ + UWORD32 u4_part_cnt = au1_part_cnt[mb_type - B16x16]; + + /* Get the pred modes */ + WORD32 i4_mb_part_pred_mode = (mb_tpm >> 4); + + is_inter = 1; + + mb_type_stream = mb_type - B16x16 + B_L0_16x16 + i4_mb_part_pred_mode; + + /* write mb type */ + PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type"); + + for (i = 0; i < (WORD32)u4_part_cnt; i++) + { + if (i4_mb_part_pred_mode != PRED_L1)/* || PRED_BI */ + { + PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr++, error_status, "mv l0 x"); + PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr++, error_status, "mv l0 y"); + } + if (i4_mb_part_pred_mode != PRED_L0)/* || PRED_BI */ + { + PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr++, error_status, "mv l1 x"); + PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr++, error_status, "mv l1 y"); + } + } + + pu1_byte = (UWORD8 *)pi2_mvd_ptr; + } + + /* coded_block_pattern */ + if (mb_type != I16x16) + { + PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][cbptable], error_status, "coded_block_pattern"); + } + + if (cbp || mb_type == I16x16) + { + /* mb_qp_delta */ + PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta"); + } /* Ending bitstream offset for header in bits */ bitstream_end_offset = GET_NUM_BITS(ps_bitstream); diff --git a/encoder/ih264e_cavlc.h b/encoder/ih264e_cavlc.h index acd0def..8da2cea 100644 --- a/encoder/ih264e_cavlc.h +++ b/encoder/ih264e_cavlc.h @@ -42,23 +42,6 @@ /* Function macro definitions */ /*****************************************************************************/ -#define PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u4_nnz, u4_sig_coeff_map, pi2_res_block) \ -{\ - ps_mb_coeff_data = pv_mb_coeff_data; \ - u4_nnz = ps_mb_coeff_data->i4_sig_map_nnz & 0xff; \ - if (u4_nnz)\ - {\ - u4_sig_coeff_map = ps_mb_coeff_data->i4_sig_map_nnz >> 16; \ - pi2_res_block = ps_mb_coeff_data->ai2_residue; \ - pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz); \ - }\ - else\ - {\ - pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue;\ - }\ -} - - /*****************************************************************************/ /* Extern Function Declarations */ /*****************************************************************************/ @@ -84,7 +67,7 @@ * ******************************************************************************* */ -IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt); +IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt); /** ******************************************************************************* @@ -107,6 +90,29 @@ IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt); * ******************************************************************************* */ -IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt); +IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt); + +/** +******************************************************************************* +* +* @brief +* This function generates CAVLC coded bit stream for Inter(B) slices +* +* @description +* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes +* (if present), mb qp delta, coded block pattern, chroma mb mode and +* luma/chroma residue. These syntax elements are written as directed by table +* 7.3.5 of h264 specification +* +* @param[in] ps_ent_ctxt +* pointer to entropy context +* +* @returns error code +* +* @remarks none +* +******************************************************************************* +*/ +IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt); #endif /* IH264E_CAVLC_H_ */ diff --git a/encoder/ih264e_core_coding.c b/encoder/ih264e_core_coding.c index 89243a5..05a891d 100644 --- a/encoder/ih264e_core_coding.c +++ b/encoder/ih264e_core_coding.c @@ -65,6 +65,7 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -73,9 +74,11 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_globals.h" #include "ih264e_core_coding.h" @@ -1249,7 +1252,7 @@ void ih264e_pack_c_mb(WORD16 *pi2_res_mb, * For that there are two paths we need to look for * One is the path to bitstream , these variables should have the proper input * configured UV or VU - * For the other path the inverse transform variables should have ehat ever 0ordering the + * For the other path the inverse transform variables should have what ever ordering the * input had */ diff --git a/encoder/ih264e_deblk.c b/encoder/ih264e_deblk.c index 8a11bdb..db176ac 100644 --- a/encoder/ih264e_deblk.c +++ b/encoder/ih264e_deblk.c @@ -63,6 +63,7 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -71,12 +72,13 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264_trans_data.h" -#include "ih264_deblk_edge_filters.h" #include "ih264_deblk_tables.h" #include "ih264e_deblk.h" @@ -150,20 +152,15 @@ static const UWORD16 ih264e_gu2_4x4_v2h_reorder[16] = * @param[in] u4_left_mb_csbp * coded sub block pattern of top mb * -* @param[in] ps_leftMvPred -* MV of left mb -* -* @param[in] ps_topMvPred -* MV of top mb +* @param[in] ps_left_pu +* PU for left MB * -* @param[in] ps_curMvPred -* MV of curr mb +* @param[in] ps_top_pu +* PU for top MB * -* @param[in] u1_left_intra -* is left intra +* @param[in] ps_curr_pu +* PU for current MB * -* @param[in] u1_top_intra -* is top intra * * @returns none * @@ -176,64 +173,65 @@ static void ih264e_fill_bs_1mv_1ref_non_mbaff(UWORD32 *pu4_horz_bs, UWORD32 u4_left_mb_csbp, UWORD32 u4_top_mb_csbp, UWORD32 u4_cur_mb_csbp, - mv_t *ps_leftMvPred, - mv_t *ps_topMvPred, - mv_t *ps_curMvPred, - UWORD8 u1_left_intra, - UWORD8 u1_top_intra) + enc_pu_t *ps_left_pu, + enc_pu_t *ps_top_pu, + enc_pu_t *ps_curr_pu) { /* motion vectors of blks p & q */ - WORD16 i16_qMv0, i16_qMv1, i16_pMv0, i16_pMv1; + WORD16 i16_qMvl0_x, i16_qMvl0_y, i16_pMvl0_x, i16_pMvl0_y; + WORD16 i16_qMvl1_x, i16_qMvl1_y, i16_pMvl1_x, i16_pMvl1_y; /* temp var */ - UWORD32 u4_lft_flag, u4_top_flag; - const UWORD32 *bs_map; - UWORD32 u4_reordered_vert_bs_enc, u4_temp; + UWORD32 u4_left_flag, u4_top_flag; + const UWORD32 *bs_map; + UWORD32 u4_reordered_vert_bs_enc, u4_temp; /* Coded Pattern for Horizontal Edge */ /*-----------------------------------------------------------------------*/ /*u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */ /*-----------------------------------------------------------------------*/ - UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12); - UWORD32 u4_horz_bs_enc = u4_cur_mb_csbp | u4_nbr_horz_csbp; + UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12); + UWORD32 u4_horz_bs_enc = u4_cur_mb_csbp | u4_nbr_horz_csbp; /* Coded Pattern for Vertical Edge */ /*-----------------------------------------------------------------------*/ /*u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0 */ /*-----------------------------------------------------------------------*/ - UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK; + UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK; /*-----------------------------------------------------------------------*/ /*u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */ /*-----------------------------------------------------------------------*/ - UWORD32 u4_cur_mb_masked_csbp =(u4_cur_mb_csbp<<1)&(~CSBP_LEFT_BLOCK_MASK); + UWORD32 u4_cur_mb_masked_csbp = (u4_cur_mb_csbp << 1) + & (~CSBP_LEFT_BLOCK_MASK); /*-----------------------------------------------------------------------*/ /*u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */ /*-----------------------------------------------------------------------*/ - UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp) | (u4_left_mb_masked_csbp >> 3); - UWORD32 u4_vert_bs_enc = u4_cur_mb_csbp | u4_nbr_vert_csbp; + UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp) + | (u4_left_mb_masked_csbp >> 3); + UWORD32 u4_vert_bs_enc = u4_cur_mb_csbp | u4_nbr_vert_csbp; /* BS Calculation for MB Boundary Edges */ /* BS calculation for 1 2 3 horizontal boundary */ - bs_map = gu4_bs_table[0]; + bs_map = gu4_bs_table[0]; pu4_horz_bs[1] = bs_map[(u4_horz_bs_enc >> 4) & 0xF]; pu4_horz_bs[2] = bs_map[(u4_horz_bs_enc >> 8) & 0xF]; pu4_horz_bs[3] = bs_map[(u4_horz_bs_enc >> 12) & 0xF]; /* BS calculation for 5 6 7 vertical boundary */ /* Do 4x4 tranpose of u4_vert_bs_enc by using look up table for reorder */ - u4_reordered_vert_bs_enc = ih264e_gu2_4x4_v2h_reorder[u4_vert_bs_enc & 0xF]; + u4_reordered_vert_bs_enc = ih264e_gu2_4x4_v2h_reorder[u4_vert_bs_enc & 0xF]; - u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 4) & 0xF]; - u4_reordered_vert_bs_enc |= (u4_temp << 1); + u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 4) & 0xF]; + u4_reordered_vert_bs_enc |= (u4_temp << 1); - u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 8) & 0xF]; - u4_reordered_vert_bs_enc |= (u4_temp << 2); + u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 8) & 0xF]; + u4_reordered_vert_bs_enc |= (u4_temp << 2); - u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 12) & 0xF]; - u4_reordered_vert_bs_enc |= (u4_temp << 3); + u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 12) & 0xF]; + u4_reordered_vert_bs_enc |= (u4_temp << 3); pu4_vert_bs[1] = bs_map[(u4_reordered_vert_bs_enc >> 4) & 0xF]; pu4_vert_bs[2] = bs_map[(u4_reordered_vert_bs_enc >> 8) & 0xF]; @@ -241,39 +239,96 @@ static void ih264e_fill_bs_1mv_1ref_non_mbaff(UWORD32 *pu4_horz_bs, /* BS Calculation for MB Boundary Edges */ - i16_qMv0 = ps_curMvPred->i2_mvx; - i16_qMv1 = ps_curMvPred->i2_mvy; - - if (u1_top_intra) + if (ps_top_pu->b1_intra_flag) { pu4_horz_bs[0] = 0x04040404; } else { - i16_pMv0 = ps_topMvPred->i2_mvx; - i16_pMv1 = ps_topMvPred->i2_mvy; + if (ps_curr_pu->b2_pred_mode != ps_top_pu->b2_pred_mode) + { + u4_top_flag = 1; + } + else if(ps_curr_pu->b2_pred_mode != 2) + { + i16_pMvl0_x = ps_top_pu->s_me_info[ps_top_pu->b2_pred_mode].s_mv.i2_mvx; + i16_pMvl0_y = ps_top_pu->s_me_info[ps_top_pu->b2_pred_mode].s_mv.i2_mvy; + + i16_qMvl0_x = ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv.i2_mvx; + i16_qMvl0_y = ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv.i2_mvy; - u4_top_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) | - (ABS((i16_pMv1 - i16_qMv1)) >= 4); - bs_map = gu4_bs_table[!!u4_top_flag]; + u4_top_flag = (ABS((i16_pMvl0_x - i16_qMvl0_x)) >= 4) + | (ABS((i16_pMvl0_y - i16_qMvl0_y)) >= 4); + } + else + { + + i16_pMvl0_x = ps_top_pu->s_me_info[PRED_L0].s_mv.i2_mvx; + i16_pMvl0_y = ps_top_pu->s_me_info[PRED_L0].s_mv.i2_mvy; + i16_pMvl1_x = ps_top_pu->s_me_info[PRED_L1].s_mv.i2_mvx; + i16_pMvl1_y = ps_top_pu->s_me_info[PRED_L1].s_mv.i2_mvy; + + i16_qMvl0_x = ps_curr_pu->s_me_info[PRED_L0].s_mv.i2_mvx; + i16_qMvl0_y = ps_curr_pu->s_me_info[PRED_L0].s_mv.i2_mvy; + i16_qMvl1_x = ps_curr_pu->s_me_info[PRED_L1].s_mv.i2_mvx; + i16_qMvl1_y = ps_curr_pu->s_me_info[PRED_L1].s_mv.i2_mvy; + + + u4_top_flag = (ABS((i16_pMvl0_x - i16_qMvl0_x)) >= 4) + | (ABS((i16_pMvl0_y - i16_qMvl0_y)) >= 4) + | (ABS((i16_pMvl1_x - i16_qMvl1_x)) >= 4) + | (ABS((i16_pMvl1_y - i16_qMvl1_y)) >= 4); + } + + bs_map = gu4_bs_table[!!u4_top_flag]; pu4_horz_bs[0] = bs_map[u4_horz_bs_enc & 0xF]; } - if (u1_left_intra) + + if (ps_left_pu->b1_intra_flag) { pu4_vert_bs[0] = 0x04040404; } else { - i16_pMv0 = ps_leftMvPred->i2_mvx; - i16_pMv1 = ps_leftMvPred->i2_mvy; + if (ps_curr_pu->b2_pred_mode != ps_left_pu->b2_pred_mode) + { + u4_left_flag = 1; + } + else if(ps_curr_pu->b2_pred_mode != 2)/* Not bipred */ + { + i16_pMvl0_x = ps_left_pu->s_me_info[ps_left_pu->b2_pred_mode].s_mv.i2_mvx; + i16_pMvl0_y = ps_left_pu->s_me_info[ps_left_pu->b2_pred_mode].s_mv.i2_mvy; + + i16_qMvl0_x = ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv.i2_mvx; + i16_qMvl0_y = ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv.i2_mvy; - u4_lft_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) | - (ABS((i16_pMv1 - i16_qMv1)) >= 4); + u4_left_flag = (ABS((i16_pMvl0_x - i16_qMvl0_x)) >= 4) + | (ABS((i16_pMvl0_y - i16_qMvl0_y)) >= 4); + } + else + { + + i16_pMvl0_x = ps_left_pu->s_me_info[PRED_L0].s_mv.i2_mvx; + i16_pMvl0_y = ps_left_pu->s_me_info[PRED_L0].s_mv.i2_mvy; + i16_pMvl1_x = ps_left_pu->s_me_info[PRED_L1].s_mv.i2_mvx; + i16_pMvl1_y = ps_left_pu->s_me_info[PRED_L1].s_mv.i2_mvy; + + i16_qMvl0_x = ps_curr_pu->s_me_info[PRED_L0].s_mv.i2_mvx; + i16_qMvl0_y = ps_curr_pu->s_me_info[PRED_L0].s_mv.i2_mvy; + i16_qMvl1_x = ps_curr_pu->s_me_info[PRED_L1].s_mv.i2_mvx; + i16_qMvl1_y = ps_curr_pu->s_me_info[PRED_L1].s_mv.i2_mvy; + + + u4_left_flag = (ABS((i16_pMvl0_x - i16_qMvl0_x)) >= 4) + | (ABS((i16_pMvl0_y - i16_qMvl0_y)) >= 4) + | (ABS((i16_pMvl1_x - i16_qMvl1_x)) >= 4) + | (ABS((i16_pMvl1_y - i16_qMvl1_y)) >= 4); + } - bs_map = gu4_bs_table[!!u4_lft_flag]; + bs_map = gu4_bs_table[!!u4_left_flag]; pu4_vert_bs[0] = bs_map[u4_reordered_vert_bs_enc & 0xF]; } } @@ -331,8 +386,7 @@ static UWORD32 ih264e_calculate_csbp(process_ctxt_t *ps_proc) * * @returns none * -* @remarks In this module it is assumed that their is only single reference -* frame and is always the most recently used anchor frame +* @remarks * ******************************************************************************* */ @@ -394,14 +448,18 @@ void ih264e_compute_bs(process_ctxt_t * ps_proc) if (i4_mb_x == 0) { ps_left_mb_syntax_ele->u4_csbp = 0; - ps_left_mb_syntax_ele->u2_is_intra = 0; - ps_proc->s_left_mb_pu.s_l0_mv = ps_proc->ps_pu->s_l0_mv; + ps_proc->s_left_mb_pu.b1_intra_flag = 0; + ps_proc->s_left_mb_pu.b2_pred_mode = ps_proc->ps_pu->b2_pred_mode; + ps_proc->s_left_mb_pu.s_me_info[0].s_mv = ps_proc->ps_pu->s_me_info[0].s_mv; + ps_proc->s_left_mb_pu.s_me_info[1].s_mv = ps_proc->ps_pu->s_me_info[1].s_mv; } if (i4_mb_y == 0) { ps_top_mb_syntax_ele->u4_csbp = 0; - ps_top_mb_syntax_ele->u2_is_intra = 0; - ps_top_row_pu->s_l0_mv = ps_proc->ps_pu->s_l0_mv; + ps_top_row_pu->b1_intra_flag = 0; + ps_top_row_pu->b2_pred_mode = ps_proc->ps_pu->b2_pred_mode; + ps_top_row_pu->s_me_info[0].s_mv = ps_proc->ps_pu->s_me_info[0].s_mv; + ps_top_row_pu->s_me_info[1].s_mv = ps_proc->ps_pu->s_me_info[1].s_mv; } ih264e_fill_bs_1mv_1ref_non_mbaff(pu4_pic_horz_bs, @@ -409,11 +467,9 @@ void ih264e_compute_bs(process_ctxt_t * ps_proc) ps_left_mb_syntax_ele->u4_csbp, ps_top_mb_syntax_ele->u4_csbp, ps_proc->u4_csbp, - &ps_proc->s_left_mb_pu.s_l0_mv, - &ps_top_row_pu->s_l0_mv, - &ps_proc->ps_pu->s_l0_mv, - ps_left_mb_syntax_ele->u2_is_intra, - ps_top_mb_syntax_ele->u2_is_intra); + &ps_proc->s_left_mb_pu, + ps_top_row_pu, + ps_proc->ps_pu); } return ; diff --git a/encoder/ih264e_defs.h b/encoder/ih264e_defs.h index 76929ef..c7e2a87 100644 --- a/encoder/ih264e_defs.h +++ b/encoder/ih264e_defs.h @@ -38,6 +38,22 @@ #define IH264E_DEFS_H_ +#define PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u4_nnz, u4_sig_coeff_map, pi2_res_block) \ +{ \ + ps_mb_coeff_data = pv_mb_coeff_data; \ + u4_nnz = ps_mb_coeff_data->i4_sig_map_nnz & 0xff; \ + if (u4_nnz) \ + { \ + u4_sig_coeff_map = ps_mb_coeff_data->i4_sig_map_nnz >> 16; \ + pi2_res_block = ps_mb_coeff_data->ai2_residue; \ + pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz); \ + } \ + else \ + { \ + pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue; \ + } \ +} + /*****************************************************************************/ /* Width and height restrictions */ /*****************************************************************************/ @@ -103,6 +119,21 @@ /*****************************************************************************/ /* Number of frame restrictions */ /*****************************************************************************/ +/** + * Maximum number of reference pictures + */ +#define MAX_REF_PIC_CNT 2 + +/** + * Minimum number of reference pictures + */ +#define MIN_REF_PIC_CNT 1 + +/** + * Maximum number of B pictures between two I/P pictures + */ +#define MAX_NUM_BFRAMES 10 + /** * Maximum number of reference buffers in DPB manager */ @@ -165,6 +196,7 @@ #define DEFAULT_RC IVE_RC_STORAGE #define DEFAULT_MAX_FRAMERATE 120000 #define DEFAULT_MAX_BITRATE 20000000 +#define DEFAULT_MAX_NUM_BFRAMES 0 #define DEFAULT_MAX_SRCH_RANGE_X 256 #define DEFAULT_MAX_SRCH_RANGE_Y 256 #define DEFAULT_SLICE_PARAM 256 @@ -206,6 +238,7 @@ #define DEFAULT_ENC_SPEED_PRESET IVE_USER_DEFINED #define DEFAULT_PRE_ENC_ME 0 #define DEFAULT_PRE_ENC_IPE 0 +#define DEFAULT_ENTROPY_CODING_MODE 0 /** Maximum number of entries in input buffer list */ #define MAX_INP_BUF_LIST_ENTRIES 32 @@ -217,7 +250,10 @@ #define MAX_REC_LIST_ENTRIES 16 /** Number of buffers created to hold half-pel planes for every reference buffer */ - #define HPEL_PLANES_CNT 1 +#define HPEL_PLANES_CNT 1 + +/** Number of buffers Needed for SUBPEL and BIPRED computation */ +#define SUBPEL_BUFF_CNT 4 /** ***************************************************************************** @@ -261,6 +297,16 @@ enum */ MEM_REC_CODEC, + /** + * Cabac context + */ + MEM_REC_CABAC, + + /** + * Cabac context_mb_info + */ + MEM_REC_CABAC_MB_INFO, + /** * entropy context */ @@ -525,9 +571,9 @@ enum /* [0 - 00 - 00110] */ #define NAL_SEI_FIRST_BYTE 0x06 -#define H264_ALLOC_INTER_FRM_INTV 1 +#define H264_ALLOC_INTER_FRM_INTV 2 -#define H264_MPEG_QP_MAP 191 +#define H264_MPEG_QP_MAP 255 #define MPEG2_QP_ELEM (H264_MPEG_QP_MAP + 1) #define H264_QP_ELEM (MAX_H264_QP + 1) diff --git a/encoder/ih264e_encode.c b/encoder/ih264e_encode.c index ffc6fb7..f131eb2 100644 --- a/encoder/ih264e_encode.c +++ b/encoder/ih264e_encode.c @@ -48,7 +48,7 @@ #include #include #include - +#include /* User Include files */ #include "ih264e_config.h" #include "ih264_typedefs.h" @@ -63,26 +63,25 @@ #include "ih264_platform_macros.h" #include "ih264_error.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" -#include "ih264_error.h" -#include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" #include "ih264_inter_pred_filters.h" #include "ih264_mem_fns.h" #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ih264_list.h" #include "ih264e_error.h" #include "ih264e_defs.h" -#include "ih264_padding.h" #include "ih264e_bitstream.h" #include "irc_mem_req_and_acq.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" #include "ih264e_time_stamp.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_master.h" #include "ih264e_process.h" @@ -90,7 +89,6 @@ #include "ih264_dpb_mgr.h" #include "ih264e_utils.h" #include "ih264e_fmt_conv.h" -#include "ih264e_config.h" #include "ih264e_statistics.h" #include "ih264e_trace.h" #include "ih264e_debug.h" @@ -217,7 +215,7 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) out_buf_t s_out_buf; /* temp var */ - WORD32 ctxt_sel = 0, i; + WORD32 ctxt_sel = 0, i, i4_rc_pre_enc_skip; /********************************************************************/ /* BEGIN INIT */ @@ -228,28 +226,15 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) ps_video_encode_op->s_ive_op.dump_recon = 0; ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME; - /* copy input info. to internal structure */ - s_inp_buf.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf; - s_inp_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low; - s_inp_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high; - s_inp_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last; - s_inp_buf.pv_mb_info = ps_video_encode_ip->s_ive_ip.pv_mb_info; - s_inp_buf.u4_mb_info_type = ps_video_encode_ip->s_ive_ip.u4_mb_info_type; - s_inp_buf.pv_pic_info = ps_video_encode_ip->s_ive_ip.pv_pic_info; - s_inp_buf.u4_pic_info_type = ps_video_encode_ip->s_ive_ip.u4_pic_info_type; - /* copy output info. to internal structure */ s_out_buf.s_bits_buf = ps_video_encode_ip->s_ive_ip.s_out_buf; - s_out_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last; + s_out_buf.u4_is_last = 0; s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low; s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high; /* api call cnt */ ps_codec->i4_encode_api_call_cnt += 1; - /* curr pic cnt */ - ps_codec->i4_pic_cnt += 1; - /* codec context selector */ ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1; @@ -274,8 +259,8 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) if (1 == ps_cfg->u4_is_valid) { - if ( ((ps_cfg->u4_timestamp_high == s_inp_buf.u4_timestamp_high) && - (ps_cfg->u4_timestamp_low == s_inp_buf.u4_timestamp_low)) || + if ( ((ps_cfg->u4_timestamp_high == ps_video_encode_ip->s_ive_ip.u4_timestamp_high) && + (ps_cfg->u4_timestamp_low == ps_video_encode_ip->s_ive_ip.u4_timestamp_low)) || ((WORD32)ps_cfg->u4_timestamp_high == -1) || ((WORD32)ps_cfg->u4_timestamp_low == -1) ) { @@ -355,9 +340,6 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) /* api call cnt */ ps_codec->i4_encode_api_call_cnt --; - /* curr pic cnt */ - ps_codec->i4_pic_cnt --; - /* header mode tag is not sticky */ ps_codec->i4_header_mode = 0; @@ -381,8 +363,18 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) return IV_SUCCESS; } + /* curr pic cnt */ + ps_codec->i4_pic_cnt += 1; + + i4_rc_pre_enc_skip = 0; + i4_rc_pre_enc_skip = ih264e_input_queue_update( + ps_codec, &ps_video_encode_ip->s_ive_ip, &s_inp_buf); + + s_out_buf.u4_is_last = s_inp_buf.u4_is_last; + ps_video_encode_op->s_ive_op.u4_is_last = s_inp_buf.u4_is_last; - if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL) + /* Only encode if the current frame is not pre-encode skip */ + if (!i4_rc_pre_enc_skip && s_inp_buf.s_raw_buf.apv_bufs[0]) { /* array giving pic cnt that is being processed in curr context set */ ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt; @@ -394,172 +386,282 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); - if (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0) - { - /* proc ctxt base idx */ - WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS; + /* proc ctxt base idx */ + WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS; - /* proc ctxt */ - process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select]; + /* proc ctxt */ + process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select]; - WORD32 ret = 0; + WORD32 ret = 0; - /* number of addl. threads to be created */ - WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1; + /* number of addl. threads to be created */ + WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1; - for (i = 0; i < num_thread_cnt; i++) + for (i = 0; i < num_thread_cnt; i++) + { + ret = ithread_create(ps_codec->apv_proc_thread_handle[i], + NULL, + (void *)ih264e_process_thread, + &ps_codec->as_process[i + 1]); + if (ret != 0) { - ret = ithread_create(ps_codec->apv_proc_thread_handle[i], - NULL, - (void*)ih264e_process_thread, - &ps_codec->as_process[i + 1]); - if (ret != 0) - { - printf("pthread Create Failed"); - assert(0); - } + printf("pthread Create Failed"); + assert(0); + } - ps_codec->ai4_process_thread_created[i] = 1; + ps_codec->ai4_process_thread_created[i] = 1; - ps_codec->i4_proc_thread_cnt++; - } + ps_codec->i4_proc_thread_cnt++; + } - /* launch job */ - ih264e_process_thread(ps_proc); + /* launch job */ + ih264e_process_thread(ps_proc); - /* Join threads at the end of encoding a frame */ - ih264e_join_threads(ps_codec); + /* Join threads at the end of encoding a frame */ + ih264e_join_threads(ps_codec); - ih264_list_reset(ps_codec->pv_proc_jobq); + ih264_list_reset(ps_codec->pv_proc_jobq); - ih264_list_reset(ps_codec->pv_entropy_jobq); - } + ih264_list_reset(ps_codec->pv_entropy_jobq); } - if (-1 != ps_codec->ai4_pic_cnt[ctxt_sel]) - { - /* proc ctxt base idx */ - WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS; - /* proc ctxt */ - process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select]; + /**************************************************************************** + * RECON + * Since we have forward dependent frames, we cannot return recon in encoding + * order. It must be in poc order, or input pic order. To achieve this we + * introduce a delay of 1 to the recon wrt encode. Now since we have that + * delay, at any point minimum of pic_cnt in our ref buffer will be the + * correct frame. For ex let our GOP be IBBP [1 2 3 4] . The encode order + * will be [1 4 2 3] .Now since we have a delay of 1, when we are done with + * encoding 4, the min in the list will be 1. After encoding 2, it will be + * 2, 3 after 3 and 4 after 4. Hence we can return in sequence. Note + * that the 1 delay is critical. Hence if we have post enc skip, we must + * skip here too. Note that since post enc skip already frees the recon + * buffer we need not do any thing here + * + * We need to return a recon when ever we consume an input buffer. This + * comsumption include a pre or post enc skip. Thus dump recon is set for + * all cases except when + * 1) We are waiting -> ps_codec->i4_frame_num > 1 + * 2) When the input buffer is null [ ie we are not consuming any inp] + * An exception need to be made for the case when we have the last buffer + * since we need to flush out the on remainig recon. + ****************************************************************************/ - /* receive output back from codec */ - s_out_buf = ps_codec->as_out_buf[ctxt_sel]; + ps_video_encode_op->s_ive_op.dump_recon = 0; - /* send the output to app */ - ps_video_encode_op->s_ive_op.output_present = 1; - ps_video_encode_op->s_ive_op.dump_recon = 1; - ps_video_encode_op->s_ive_op.s_out_buf = s_out_buf.s_bits_buf; - ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS; + if (ps_codec->s_cfg.u4_enable_recon && (ps_codec->i4_frame_num > 1) + && (s_inp_buf.s_raw_buf.apv_bufs[0] || s_inp_buf.u4_is_last)) + { + /* error status */ + IH264_ERROR_T ret = IH264_SUCCESS; + pic_buf_t *ps_pic_buf = NULL; + WORD32 i4_buf_status, i4_curr_poc = 32768; - /* receive input back from codec */ - s_inp_buf = ps_proc->s_inp_buf; + /* In case of skips we return recon, but indicate that buffer is zero size */ + if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel] + || i4_rc_pre_enc_skip) + { - /* send the input to app */ - ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf; + ps_video_encode_op->s_ive_op.dump_recon = 1; + ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[0] = 0; + ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[1] = 0; - if (ps_codec->s_cfg.u4_enable_recon && - ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0) + } + else { - /* error status */ - IH264_ERROR_T ret = IH264_SUCCESS; - - /* recon buffer */ - rec_buf_t *ps_rec_buf = &ps_codec->as_rec_buf[ctxt_sel]; - - ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf; - - /* copy/convert the recon buffer and return */ - ih264e_fmt_conv(ps_codec, &ps_rec_buf->s_pic_buf, - ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0], - ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1], - ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2], - ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0], - ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1], - 0, - ps_codec->s_cfg.u4_disp_ht); - - ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_rec_buf->s_pic_buf.i4_buf_id, BUF_MGR_IO); - if (IH264_SUCCESS != ret) + for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++) { - SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret, - IVE_FATALERROR, + if (ps_codec->as_ref_set[i].i4_pic_cnt == -1) + continue; + + i4_buf_status = ih264_buf_mgr_get_status( + ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); + + if ((i4_buf_status & BUF_MGR_IO) + && (ps_codec->as_ref_set[i].i4_poc < i4_curr_poc)) + { + ps_pic_buf = ps_codec->as_ref_set[i].ps_pic_buf; + i4_curr_poc = ps_codec->as_ref_set[i].i4_poc; + } + } + + ps_video_encode_op->s_ive_op.s_recon_buf = + ps_video_encode_ip->s_ive_ip.s_recon_buf; + + /* + * If we get a valid buffer. output and free recon. + * + * we may get an invalid buffer if num_b_frames is 0. This is because + * We assume that there will be a ref frame in ref list after encoding + * the last frame. With B frames this is correct since its forward ref + * pic will be in the ref list. But if num_b_frames is 0, we will not + * have a forward ref pic + */ + + if (ps_pic_buf) + { + /* copy/convert the recon buffer and return */ + ih264e_fmt_conv(ps_codec, + ps_pic_buf, + ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0], + ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1], + ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2], + ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0], + ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1], + 0, ps_codec->s_cfg.u4_disp_ht); + + ps_video_encode_op->s_ive_op.dump_recon = 1; + + ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, + ps_pic_buf->i4_buf_id, BUF_MGR_IO); + + if (IH264_SUCCESS != ret) + { + SET_ERROR_ON_RETURN( + (IH264E_ERROR_T)ret, IVE_FATALERROR, ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); + } } } + } - /* release buffers from ref list */ - if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1) - { - /* pic info */ - pic_buf_t *ps_cur_pic; - /* mv info */ - mv_buf_t *ps_cur_mv_buf; + /*************************************************************************** + * Free reference buffers: + * In case of a post enc skip, we have to ensure that those pics will not + * be used as reference anymore. In all other cases we will not even mark + * the ref buffers + ***************************************************************************/ + if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) + { + /* pic info */ + pic_buf_t *ps_cur_pic; - /* error status */ - IH264_ERROR_T ret = IH264_SUCCESS; + /* mv info */ + mv_buf_t *ps_cur_mv_buf; - /* Decrement coded pic count */ - ps_codec->i4_coded_pic_cnt--; + /* error status */ + IH264_ERROR_T ret = IH264_SUCCESS; - /* loop through to get the min pic cnt among the list of pics stored in ref list */ - /* since the skipped frame may not be on reference list, we may not have an MV bank - * hence free only if we have allocated */ - for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + /* Decrement coded pic count */ + ps_codec->i4_poc--; + + /* loop through to get the min pic cnt among the list of pics stored in ref list */ + /* since the skipped frame may not be on reference list, we may not have an MV bank + * hence free only if we have allocated */ + for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + { + if (ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt) { - if (ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt) - { - ps_codec->as_ref_set[i].i4_pic_cnt = -1; - ps_codec->as_ref_set[i].i4_poc = -1; - - ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf; - - ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_mv_buf; - - /* release this frame from reference list */ - ret = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_REF); - SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret, - IVE_FATALERROR, - ps_video_encode_op->s_ive_op.u4_error_code, - IV_FAIL); - - ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_REF); - SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret, - IVE_FATALERROR, - ps_video_encode_op->s_ive_op.u4_error_code, - IV_FAIL); - break; - } + + ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf; + + ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_mv_buf; + + /* release this frame from reference list and recon list */ + ret = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_REF); + ret |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_IO); + SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + + ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_REF); + ret |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_IO); + SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret, + IVE_FATALERROR, + ps_video_encode_op->s_ive_op.u4_error_code, + IV_FAIL); + break; } } + } - if ((ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1) || - (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 1)) + /* + * Since recon is not in sync with output, ie there can be frame to be + * given back as recon even after last output. Hence we need to mark that + * the output is not the last. + * Hence search through reflist and mark appropriately + */ + if (ps_codec->s_cfg.u4_enable_recon) + { + WORD32 i4_buf_status = 0; + + for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++) { - ps_video_encode_op->s_ive_op.dump_recon = 0; + if (ps_codec->as_ref_set[i].i4_pic_cnt == -1) + continue; + + i4_buf_status |= ih264_buf_mgr_get_status( + ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); } - else + + if (i4_buf_status & BUF_MGR_IO) { - /* set output pic type */ - if (ps_codec->i4_slice_type == PSLICE) - { - ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME; - } - else if (ps_codec->i4_slice_type == ISLICE && ps_codec->u4_is_idr != 1) - { + s_out_buf.u4_is_last = 0; + ps_video_encode_op->s_ive_op.u4_is_last = 0; + } + } + + + /************************************************************************** + * Signaling to APP + * 1) If we valid a valid output mark it so + * 2) Set the codec output ps_video_encode_op + * 3) Set the error status + * 4) Set the return Pic type + * Note that we already has marked recon properly + * 5)Send the consumed input back to app so that it can free it if possible + * + * We will have to return the output and input buffers unconditionally + * so that app can release them + **************************************************************************/ + if (!i4_rc_pre_enc_skip + && !ps_codec->s_rate_control.post_encode_skip[ctxt_sel] + && s_inp_buf.s_raw_buf.apv_bufs[0]) + { + + /* receive output back from codec */ + s_out_buf = ps_codec->as_out_buf[ctxt_sel]; + + /* send the output to app */ + ps_video_encode_op->s_ive_op.output_present = 1; + ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS; + + /* Set the time stamps of the encodec input */ + ps_video_encode_op->s_ive_op.u4_timestamp_low = s_inp_buf.u4_timestamp_low; + ps_video_encode_op->s_ive_op.u4_timestamp_high = s_inp_buf.u4_timestamp_high; + + + switch (ps_codec->pic_type) + { + case PIC_IDR: + ps_video_encode_op->s_ive_op.u4_encoded_frame_type =IV_IDR_FRAME; + break; + + case PIC_I: ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME; - } - else - { - ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME; - } + break; + + case PIC_P: + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME; + break; + + case PIC_B: + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_B_FRAME; + break; + + default: + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME; + break; } - /* loop through to get the error status */ for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++) { error_status |= ps_codec->as_process[ctxt_sel + i].i4_error_code; @@ -569,6 +671,36 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op) ps_video_encode_op->s_ive_op.u4_error_code, IV_FAIL); } + else + { + /* proc ctxt base idx */ + WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS; + + /* proc ctxt */ + process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select]; + + /* receive output back from codec */ + s_out_buf = ps_codec->as_out_buf[ctxt_sel]; + + ps_video_encode_op->s_ive_op.output_present = 0; + ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS; + + /* Set the time stamps of the encodec input */ + ps_video_encode_op->s_ive_op.u4_timestamp_low = 0; + ps_video_encode_op->s_ive_op.u4_timestamp_high = 0; + + /* receive input back from codec and send it to app */ + s_inp_buf = ps_proc->s_inp_buf; + ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf; + + ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME; + + } + + /* Send the input to encoder so that it can free it if possible */ + ps_video_encode_op->s_ive_op.s_out_buf = s_out_buf.s_bits_buf; + ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf; + if (1 == s_inp_buf.u4_is_last) { diff --git a/encoder/ih264e_encode_header.c b/encoder/ih264e_encode_header.c index 67e5409..2a895b1 100644 --- a/encoder/ih264e_encode_header.c +++ b/encoder/ih264e_encode_header.c @@ -65,14 +65,13 @@ #include "ithread.h" #include "ih264e_config.h" #include "ih264e_trace.h" -#include "ih264_typedefs.h" #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ih264_debug.h" #include "ih264_defs.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -81,10 +80,12 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ih264e_defs.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_encode_header.h" #include "ih264_common_tables.h" @@ -523,11 +524,12 @@ WORD32 ih264e_generate_slice_header(bitstrm_t *ps_bitstrm, { /* num_ref_idx_l0_active_minus1 */ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l0_active - 1, return_status, "num_ref_idx_l0_active_minus1"); - } - if (ps_slice_hdr->u1_slice_type == BSLICE) - { - /* num_ref_idx_l1_active_minus1 */ - PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status, "num_ref_idx_l1_active_minus1"); + + if (ps_slice_hdr->u1_slice_type == BSLICE) + { + /* num_ref_idx_l1_active_minus1 */ + PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status, "num_ref_idx_l1_active_minus1"); + } } } @@ -544,9 +546,20 @@ WORD32 ih264e_generate_slice_header(bitstrm_t *ps_bitstrm, } } + if (ps_slice_hdr->u1_slice_type == BSLICE) + { + /* ref_pic_list_reordering_flag_l1 */ + PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_ref_idx_reordering_flag_l1, 1, return_status, "ref_pic_list_reordering_flag_l1"); + + if (ps_slice_hdr->u1_ref_idx_reordering_flag_l1) + { + + } + } + if ((ps_pps->i1_weighted_pred_flag && (ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE)) || - (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_slice_hdr->u1_slice_type == BSLICE)) + (ps_pps->i1_weighted_bipred_idc == 1 && ps_slice_hdr->u1_slice_type == BSLICE)) { /* TODO_LATER: Currently there is no support for weighted prediction. This needs to be updated when the support is added */ @@ -662,8 +675,8 @@ IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps) * To the constrained baseline profile if we add support for B slices, support for encoding interlaced frames, * support for weighted prediction and introduce CABAC entropy coding then we have Main Profile. */ - if ((ps_cfg->u4_num_b_frames) || (ps_cfg->e_content_type != IV_PROGRESSIVE) || - (ps_cfg->u4_entropy_coding_mode == CABAC) || (ps_cfg->u4_weighted_prediction)) + if ((ps_cfg->u4_num_bframes) || (ps_cfg->e_content_type != IV_PROGRESSIVE) || + (ps_cfg->u4_entropy_coding_mode == CABAC) || (ps_cfg->u4_weighted_prediction)) { ps_sps->u1_profile_idc = IH264_PROFILE_MAIN; } @@ -748,8 +761,10 @@ IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps) /* pic_order_cnt_type */ ps_sps->i1_pic_order_cnt_type = 2; - if(ps_cfg->u4_enable_alt_ref) + if (ps_codec->i4_non_ref_frames_in_stream) + { ps_sps->i1_pic_order_cnt_type = 0; + } /* log2_max_pic_order_cnt_lsb_minus4 */ ps_sps->i1_log2_max_pic_order_cnt_lsb = 8; @@ -765,8 +780,15 @@ IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps) } /* num_ref_frames */ - /* FIXME : Fix this hard coding */ - ps_sps->u1_max_num_ref_frames = 1; + /* TODO : Should we have a flexible num ref frames */ + if (ps_codec->s_cfg.u4_num_bframes > 0) + { + ps_sps->u1_max_num_ref_frames = 2; + } + else + { + ps_sps->u1_max_num_ref_frames = 1; + } /* gaps_in_frame_num_value_allowed_flag */ ps_sps->i1_gaps_in_frame_num_value_allowed_flag = 0; @@ -852,7 +874,7 @@ IH264E_ERROR_T ih264e_populate_pps(codec_t *ps_codec, pps_t *ps_pps) /* entropy_coding_mode */ ps_pps->u1_entropy_coding_mode_flag = ps_cfg->u4_entropy_coding_mode; - /* pic_order_present_flag is unset for POC type 2 */ + /* pic_order_present_flag is unset if we don't have feilds */ ps_pps->u1_pic_order_present_flag = 0; /* Currently number of slice groups supported are 1 */ @@ -980,18 +1002,17 @@ WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc, if (ps_sps->i1_pic_order_cnt_type == 0) { - WORD32 val; - val = ps_codec->i4_coded_pic_cnt; - val %= (1 << ps_sps->i1_log2_max_pic_order_cnt_lsb); - ps_slice_hdr->i4_pic_order_cnt_lsb = val; + WORD32 i4_poc; + i4_poc = ps_codec->i4_poc; + i4_poc %= (1 << ps_sps->i1_log2_max_pic_order_cnt_lsb); + ps_slice_hdr->i4_pic_order_cnt_lsb = i4_poc; } + /* TODO add support for poc type 1 */ else if (ps_sps->i1_pic_order_cnt_type == 1) { } - if(0 == ps_slice_hdr->u2_first_mb_in_slice) - ps_codec->i4_coded_pic_cnt++; /* * redundant slices are not currently supported. @@ -1005,7 +1026,7 @@ WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc, /* direct spatial mv pred flag */ if (ps_proc->i4_slice_type == BSLICE) { - + ps_slice_hdr->u1_direct_spatial_mv_pred_flag = 1; } if (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == BSLICE) @@ -1036,11 +1057,23 @@ WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc, { } + + /* ref_pic_list_reordering_flag_l1 */ + ps_slice_hdr->u1_ref_idx_reordering_flag_l1 = 0; + + if (ps_slice_hdr->u1_ref_idx_reordering_flag_l1) + { + + } } + + /* Currently we do not support weighted pred */ + /* ps_slice_hdr->u1_weighted_bipred_idc = 0; */ + if ((ps_pps->i1_weighted_pred_flag && (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE)) || - (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_proc->i4_slice_type == BSLICE)) + (ps_pps->i1_weighted_bipred_idc == 1 && ps_proc->i4_slice_type == BSLICE)) { /* TODO_LATER: Currently there is no support for weighted prediction. This needs to be updated when the support is added */ @@ -1114,6 +1147,8 @@ WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc, * If this is not the case, we have to add Slice group map type to the bit stream */ } + ps_slice_hdr->i1_cabac_init_idc = CABAC_INIT_IDC; + return IH264E_SUCCESS; } diff --git a/encoder/ih264e_fmt_conv.c b/encoder/ih264e_fmt_conv.c index 393d6ca..e06aea1 100644 --- a/encoder/ih264e_fmt_conv.c +++ b/encoder/ih264e_fmt_conv.c @@ -65,8 +65,8 @@ #include "ih264_defs.h" #include "ih264_debug.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -75,9 +75,9 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ih264_macros.h" #include "ih264_platform_macros.h" -#include "ih264_error.h" #include "ih264_buf_mgr.h" #include "ih264e_defs.h" #include "ih264e_error.h" @@ -85,6 +85,7 @@ #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_fmt_conv.h" diff --git a/encoder/ih264e_function_selector_generic.c b/encoder/ih264e_function_selector_generic.c index 65f943a..8305fd2 100644 --- a/encoder/ih264e_function_selector_generic.c +++ b/encoder/ih264e_function_selector_generic.c @@ -60,8 +60,8 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -70,24 +70,21 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_platform_macros.h" -#include "ih264_intra_pred_filters.h" -#include "ih264_trans_quant_itrans_iquant.h" -#include "ih264e_defs.h" -#include "ih264e_structs.h" -#include "ih264_deblk_edge_filters.h" +#include "ih264e_cabac.h" #include "ih264e_core_coding.h" #include "ih264_cavlc_tables.h" #include "ih264e_cavlc.h" -#include "ih264_padding.h" #include "ih264e_intra_modes_eval.h" -#include "ih264_mem_fns.h" #include "ih264e_fmt_conv.h" #include "ih264e_half_pel.h" +#include "ih264e_me.h" /*****************************************************************************/ @@ -197,8 +194,12 @@ void ih264e_init_function_ptr_generic(codec_t *ps_codec) ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4; /* write mb syntax layer */ - ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb; - ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb; + ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = ih264e_write_islice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = ih264e_write_pslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = ih264e_write_bslice_mb_cavlc; + ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = ih264e_write_islice_mb_cabac; + ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = ih264e_write_pslice_mb_cabac; + ps_codec->pf_write_mb_syntax_layer[CABAC][BSLICE] = ih264e_write_bslice_mb_cabac; /* Padding Functions */ ps_codec->pf_pad_top = ih264_pad_top; @@ -255,5 +256,14 @@ void ih264e_init_function_ptr_generic(codec_t *ps_codec) ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz; ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert; + /* ME compute */ + ps_codec->apf_compute_me[PSLICE] = &ih264e_compute_me_single_reflist; + ps_codec->apf_compute_me[BSLICE] = &ih264e_compute_me_multi_reflist; + + /* skip decision */ + ps_codec->apf_find_skip_params_me[PSLICE] = &ih264e_find_pskip_params_me; + ps_codec->apf_find_skip_params_me[BSLICE] = &ih264e_find_bskip_params_me; + + return; } diff --git a/encoder/ih264e_globals.c b/encoder/ih264e_globals.c index e2b46a4..28d3faf 100644 --- a/encoder/ih264e_globals.c +++ b/encoder/ih264e_globals.c @@ -196,66 +196,143 @@ const WORD8 gi1_mv_pred_condition[8] = -1, 0, 1, -1, 2, -1, -1, -1 }; -/** -****************************************************************************** -* @brief maps the h264 quantizer to the mpeg2 quantizer scale -* input : h264 qp -* output : equivalent mpeg 2 qp -* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1] -****************************************************************************** -*/ + +/******************************************************************************* + * Translation of MPEG QP to H264 QP + ******************************************************************************/ +/* + * Note : RC library models QP and bits assuming the QP to be MPEG2. + * Since MPEG qp varies linearly, when the relationship is computed, + * it learns that delta(qp) => delta(bits). Now what we are doing by the + * transation of qp is that + * QPrc = a + b*2^(QPen) + * By not considering the weight matrix in both MPEG and H264 we in effect + * only changing the relation to + * QPrc = c + d*2^(QPen) + * This will only entatil changin the RC model parameters, and this will + * not affect rc relation at all + * + * + * We have MPEG qp which varies from 0-228. The quantization factor has a linear + * relation ship with the size of quantized values + * + * We also have H264 Qp, which varies such that for a change in QP of 6 , we + * double the corresponding scaling factor. Hence the scaling is linear in terms + * of 2^(QPh/6) + * + * Now we want to have translation between QPm and QPh. Hence we can write + * + * QPm = a + b*2^(QPh/6) + * + * Appling boundary condition that + * 1) QPm = 1 if QPh = 0 + * 2) QPm = 228 if QPh = 51, + * + * we will have + * a = -0.372, b = 0.628 + * + * Hence the relatiohship is + * QPm = a + b*2^(Qph/6) + * QPh = 6*log((Qpm - a)/b) + * + * + * Unrounded values for gau1_h264_to_mpeg2_qmap[H264_QP_ELEM] = + * + * 0.33291 0.41923 0.51613 0.62489 0.74697 0.88400 + * 1.03781 1.21046 1.40425 1.62178 1.86594 2.14000 + * 2.44762 2.79292 3.18050 3.61555 4.10388 4.65200 + * 5.26725 5.95784 6.73301 7.60310 8.57975 9.67600 + * 10.90650 12.28769 13.83802 15.57821 17.53150 19.72400 + * 22.18500 24.94737 28.04804 31.52841 35.43500 39.82000 + * 44.74199 50.26675 56.46807 63.42882 71.24200 80.01200 + * 89.85599 100.90549 113.30814 127.22965 142.85601 160.39600 + * 180.08398 202.18299 226.98829 + * + * Unrounded values for gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM] + * + * -4.5328 6.7647 11.5036 14.5486 16.7967 18.5797 20.0575 + * 21.3193 22.4204 23.3971 24.2747 25.0715 25.8010 26.4738 + * 27.0981 27.6804 28.2259 28.7391 29.2236 29.6824 30.1181 + * 30.5329 30.9287 31.3072 31.6699 32.0180 32.3526 32.6748 + * 32.9854 33.2852 33.5750 33.8554 34.1270 34.3904 34.6460 + * 34.8942 35.1355 35.3703 35.5989 35.8216 36.0387 36.2505 + * 36.4572 36.6591 36.8564 37.0494 37.2381 37.4228 37.6036 + * 37.7807 37.9543 38.1244 38.2913 38.4550 38.6157 38.7735 + * 38.9284 39.0806 39.2302 39.3772 39.5218 39.6640 39.8039 + * 39.9416 40.0771 40.2106 40.3420 40.4714 40.5990 40.7247 + * 40.8486 40.9707 41.0911 41.2099 41.3271 41.4427 41.5568 + * 41.6694 41.7806 41.8903 41.9987 42.1057 42.2115 42.3159 + * 42.4191 42.5211 42.6219 42.7216 42.8201 42.9175 43.0138 + * 43.1091 43.2033 43.2965 43.3887 43.4799 43.5702 43.6596 + * 43.7480 43.8356 43.9223 44.0081 44.0930 44.1772 44.2605 + * 44.3431 44.4248 44.5058 44.5861 44.6656 44.7444 44.8224 + * 44.8998 44.9765 45.0525 45.1279 45.2026 45.2766 45.3501 + * 45.4229 45.4951 45.5667 45.6378 45.7082 45.7781 45.8474 + * 45.9162 45.9844 46.0521 46.1193 46.1859 46.2521 46.3177 + * 46.3829 46.4475 46.5117 46.5754 46.6386 46.7014 46.7638 + * 46.8256 46.8871 46.9481 47.0087 47.0689 47.1286 47.1880 + * 47.2469 47.3054 47.3636 47.4213 47.4787 47.5357 47.5923 + * 47.6486 47.7045 47.7600 47.8152 47.8700 47.9245 47.9787 + * 48.0325 48.0859 48.1391 48.1919 48.2444 48.2966 48.3485 + * 48.4000 48.4513 48.5022 48.5529 48.6033 48.6533 48.7031 + * 48.7526 48.8018 48.8508 48.8995 48.9478 48.9960 49.0438 + * 49.0914 49.1388 49.1858 49.2327 49.2792 49.3256 49.3716 + * 49.4175 49.4630 49.5084 49.5535 49.5984 49.6430 49.6875 + * 49.7317 49.7756 49.8194 49.8629 49.9062 49.9493 49.9922 + * 50.0348 50.0773 50.1196 50.1616 50.2034 50.2451 50.2865 + * 50.3278 50.3688 50.4097 50.4503 50.4908 50.5311 50.5712 + * 50.6111 50.6508 50.6904 50.7298 50.7690 50.8080 50.8468 + * 50.8855 50.9240 50.9623 51.0004 51.0384 + * + */ + const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM] = { - 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 3, 3, 3, 4, - 4, 4, 5, 6, 6, 7, 8, 9, - 10, 11, 13, 14, 16, 18, 20, 23, - 25, 29, 32, 36, 40, 45, 51, 57, - 64, 72, 81, 91, 102, 114, 128, 144, - 161, 181, 203, 228, + 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, + 2, 3, 3, 4, 4, 5, + 5, 6, 7, 8, 9, 10, + 11, 12, 14, 16, 18, 20, + 22, 25, 28, 32, 35, 40, + 45, 50, 56, 63, 71, 80, + 90, 101, 113, 127, 143, 160, + 180, 202, 227 }; -/** -****************************************************************************** -* @brief maps the mpeg2 quantizer to the h264 quantizer scale -* input : mpeg2 qp -* output : equivalent h264qp -* @remarks MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32 -* k = 0 (for intra) k = sign(QFij) -* H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6) -* -* Excluding the portion of R(QP%6,i,j) that is due to -* the DCT scale factors, the 6 entries after dividing by 64 (2^6) -* correspond to dequant values of -* 2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375. -* (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc) -* -* Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2 -* (the actual mapping seems to be to MPEG2 qscale of 2.5), -* and the fact that the effective h264 quantizer changes by -* a factor of 2 for every 6 steps, the following mapping is -* obtained: -* h264qp = 6*(log2(mpeg2qscale/2)) + 12. -* -* Note that the quant matrix entry assumed for the above -* equality is 16. Hence when the mpeg2 quant matrix entries -* are all 16, this lookup can be used as is (which is the -* default inter quant matrix in mpeg-2). -****************************************************************************** -*/ const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM] = { - 0, 4, 10, 14, 16, 18, 20, 21, 22, 23, 24, 25, 26, 26, 27, 27, - 28, 29, 29, 29, 30, 30, 31, 31, 32, 32, 32, 33, 33, 33, 33, 34, - 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, - 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, - 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 0, 7, 12, 15, 17, 19, 20, + 21, 22, 23, 24, 25, 26, 26, + 27, 28, 28, 29, 29, 30, 30, + 31, 31, 31, 32, 32, 32, 33, + 33, 33, 34, 34, 34, 34, 35, + 35, 35, 35, 36, 36, 36, 36, + 36, 37, 37, 37, 37, 37, 38, + 38, 38, 38, 38, 38, 39, 39, + 39, 39, 39, 39, 40, 40, 40, + 40, 40, 40, 40, 40, 41, 41, + 41, 41, 41, 41, 41, 41, 42, + 42, 42, 42, 42, 42, 42, 42, + 42, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 44, 44, + 44, 44, 44, 44, 44, 44, 44, + 44, 44, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, + 45, 45, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, + 46, 46, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 48, 48, + 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, + 48, 48, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, + 49, 49, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 51, 51, + 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51 }; diff --git a/encoder/ih264e_half_pel.c b/encoder/ih264e_half_pel.c index cb475a1..4871f40 100644 --- a/encoder/ih264e_half_pel.c +++ b/encoder/ih264e_half_pel.c @@ -55,7 +55,6 @@ #include "ih264_defs.h" #include "ih264e_half_pel.h" #include "ih264_macros.h" -#include "ih264e_half_pel.h" #include "ih264e_debug.h" #include "ih264_inter_pred_filters.h" #include "ih264_mem_fns.h" diff --git a/encoder/ih264e_intra_modes_eval.c b/encoder/ih264e_intra_modes_eval.c index b41d717..74adbbc 100644 --- a/encoder/ih264e_intra_modes_eval.c +++ b/encoder/ih264e_intra_modes_eval.c @@ -74,15 +74,17 @@ #include "ih264_inter_pred_filters.h" #include "ih264_mem_fns.h" #include "ih264_padding.h" -#include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ime_distortion_metrics.h" #include "ih264e_error.h" #include "ih264e_bitstream.h" +#include "ime_defs.h" #include "ime_structs.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_intra_modes_eval.h" #include "ih264e_globals.h" @@ -372,9 +374,10 @@ void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps UWORD32 i, u4_enable_fast_sad = 0, offset = 0; /* init temp var */ - if (ps_proc->i4_slice_type == PSLICE) + if (ps_proc->i4_slice_type != ISLICE) { - offset = 5; + /* Offset for MBtype */ + offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23; u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad; } diff --git a/encoder/ih264e_mc.c b/encoder/ih264e_mc.c index 2dd0974..2b19dd1 100644 --- a/encoder/ih264e_mc.c +++ b/encoder/ih264e_mc.c @@ -19,25 +19,25 @@ */ /** -******************************************************************************* -* @file -* ih264e_mc.c -* -* @brief -* Contains definition of functions for motion compensation -* -* @author -* ittiam -* -* @par List of Functions: -* - ih264e_motion_comp_luma() -* - ih264e_motion_comp_chroma() -* -* @remarks -* None -* -******************************************************************************* -*/ + ******************************************************************************* + * @file + * ih264e_mc.c + * + * @brief + * Contains definition of functions for motion compensation + * + * @author + * ittiam + * + * @par List of Functions: + * - ih264e_motion_comp_luma() + * - ih264e_motion_comp_chroma() + * + * @remarks + * None + * + ******************************************************************************* + */ /*****************************************************************************/ /* File Includes */ @@ -52,6 +52,7 @@ #include "iv2.h" #include "ive2.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" #include "ih264_structs.h" #include "ih264_inter_pred_filters.h" @@ -60,57 +61,52 @@ #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" #include "ih264_trans_quant_itrans_iquant.h" -#include "ih264_inter_pred_filters.h" -#include "ih264_mem_fns.h" -#include "ih264_padding.h" -#include "ih264_intra_pred_filters.h" -#include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ih264e_defs.h" #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_mc.h" #include "ih264e_half_pel.h" - /*****************************************************************************/ /* Function Definitions */ /*****************************************************************************/ /** -****************************************************************************** -* -* @brief -* performs motion compensation for a luma mb for the given mv. -* -* @par Description -* This routine performs motion compensation of an inter mb. When the inter -* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer -* to pred buffer. In this case the function returns pointer and stride of the -* ref. buffer and this info is used in place of pred buffer else where. -* In other cases, the pred buffer is populated via copy / filtering + copy -* (q pel cases) and returned. -* -* @param[in] ps_proc -* pointer to current proc ctxt -* -* @param[out] pu1_pseudo_pred -* pseudo prediction buffer -* -* @param[out] u4_pseudo_pred_strd -* pseudo pred buffer stride -* -* @return none -* -* @remarks Assumes half pel buffers for the entire frame are populated. -* -****************************************************************************** -*/ -void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, - UWORD8 **pu1_pseudo_pred, + ****************************************************************************** + * + * @brief + * performs motion compensation for a luma mb for the given mv. + * + * @par Description + * This routine performs motion compensation of an inter mb. When the inter + * mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer + * to pred buffer. In this case the function returns pointer and stride of the + * ref. buffer and this info is used in place of pred buffer else where. + * In other cases, the pred buffer is populated via copy / filtering + copy + * (q pel cases) and returned. + * + * @param[in] ps_proc + * pointer to current proc ctxt + * + * @param[out] pu1_pseudo_pred + * pseudo prediction buffer + * + * @param[out] u4_pseudo_pred_strd + * pseudo pred buffer stride + * + * @return none + * + * @remarks Assumes half pel buffers for the entire frame are populated. + * + ****************************************************************************** + */ +void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, UWORD8 **pu1_pseudo_pred, WORD32 *pi4_pseudo_pred_strd) { /* codec context */ @@ -152,51 +148,96 @@ void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, /* half / qpel coefficient */ UWORD32 u4_subpel_factor; + /* BIPRED Flag */ + WORD32 i4_bipred_flag; + /* temp var */ UWORD32 u4_lkup_idx1; /* Init */ i4_ref_strd[0] = ps_proc->i4_rec_strd; - i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_hp_buf_strd; + i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = + ps_me_ctxt->u4_subpel_buf_strd; - for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++) + for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; + u4_num_prtn++) { + mv_t *ps_curr_mv; + /* update ptr to curr partition */ ps_curr_pu = ps_proc->ps_pu + u4_num_prtn; + /* Set no no bipred */ + i4_bipred_flag = 0; + + switch (ps_curr_pu->b2_pred_mode) + { + case PRED_L0: + ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv; + pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0]; + break; + + case PRED_L1: + ps_curr_mv = &ps_curr_pu->s_me_info[1].s_mv; + pu1_ref[0] = ps_proc->apu1_ref_buf_luma[1]; + break; + + case PRED_BI: + /* + * In case of PRED_BI, we only need to ensure that + * the reference buffer that gets selected is + * ps_proc->pu1_best_subpel_buf + */ + + /* Dummy */ + ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv; + pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0]; + + i4_bipred_flag = 1; + break; + + default: + ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv; + pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0]; + break; + + } /* get full pel mv's (full pel units) */ - u4_mv_x_full = ps_curr_pu->s_l0_mv.i2_mvx >> 2; - u4_mv_y_full = ps_curr_pu->s_l0_mv.i2_mvy >> 2; + u4_mv_x_full = ps_curr_mv->i2_mvx >> 2; + u4_mv_y_full = ps_curr_mv->i2_mvy >> 2; /* get half pel mv's */ - u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1; - u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1; + u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; + u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; /* get quarter pel mv's */ - u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1); - u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1); + u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); + u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); /* width and height of partition */ wd = (ps_curr_pu->b4_wd + 1) << 2; ht = (ps_curr_pu->b4_ht + 1) << 2; /* decision ? qpel/hpel, fpel */ - u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel); + u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel); - /* update ref buffer ptrs */ - pu1_ref[0] = ps_proc->pu1_ref_buf_luma + (u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full; + /* Move ref to position given by MV */ + pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full); - pu1_ref[1] = ps_proc->pu1_best_subpel_buf; + /* Sub pel ptrs/ Biperd pointers init */ + pu1_ref[1] = ps_proc->pu1_best_subpel_buf; i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd; - /* update pred buff ptr */ - pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 4 * ps_curr_pu->b4_pos_x; + pu1_pred = ps_proc->pu1_pred_mb + + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + + 4 * ps_curr_pu->b4_pos_x; - /*u4_lkup_idx1 will be non zero for half pel*/ - u4_lkup_idx1 = (u4_subpel_factor >> 2 ) != 0 ; + /* u4_lkup_idx1 will be non zero for half pel and bipred */ + u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag; { /********************************************************************/ @@ -218,7 +259,11 @@ void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, */ else { - ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred, i4_ref_strd[u4_lkup_idx1], i4_pred_strd, ht, wd, NULL, 0); + ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], + pu1_pred, + i4_ref_strd[u4_lkup_idx1], + i4_pred_strd, ht, wd, NULL, + 0); } } @@ -226,24 +271,24 @@ void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, } /** -****************************************************************************** -* -* @brief -* performs motion compensation for chroma mb -* -* @par Description -* Copies a MB of data from the reference buffer (Full pel, half pel or q pel) -* according to the motion vectors given -* -* @param[in] ps_proc -* pointer to current proc ctxt -* -* @return none -* -* @remarks Assumes half pel and quarter pel buffers for the entire frame are -* populated. -****************************************************************************** -*/ + ****************************************************************************** + * + * @brief + * performs motion compensation for chroma mb + * + * @par Description + * Copies a MB of data from the reference buffer (Full pel, half pel or q pel) + * according to the motion vectors given + * + * @param[in] ps_proc + * pointer to current proc ctxt + * + * @return none + * + * @remarks Assumes half pel and quarter pel buffers for the entire frame are + * populated. + ****************************************************************************** + */ void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc) { /* codec context */ @@ -283,38 +328,122 @@ void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc) WORD32 u4_mv_y; UWORD8 u1_dx, u1_dy; - for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++) + for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; + u4_num_prtn++) { - ps_curr_pu =ps_proc->ps_pu + u4_num_prtn; + mv_t *ps_curr_mv; - u4_mv_x = ps_curr_pu->s_l0_mv.i2_mvx >> 3; - u4_mv_y = ps_curr_pu->s_l0_mv.i2_mvy >> 3; + ps_curr_pu = ps_proc->ps_pu + u4_num_prtn; - /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed with dx, dy =4*/ - u4_mv_x_full = (ps_curr_pu->s_l0_mv.i2_mvx & 0x4) >> 2; - u4_mv_y_full = (ps_curr_pu->s_l0_mv.i2_mvy & 0x4) >> 2; + if (ps_curr_pu->b2_pred_mode != PRED_BI) + { + ps_curr_mv = &ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv; + pu1_ref = ps_proc->apu1_ref_buf_chroma[ps_curr_pu->b2_pred_mode]; - /* get half pel mv's */ - u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1; - u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1; + u4_mv_x = ps_curr_mv->i2_mvx >> 3; + u4_mv_y = ps_curr_mv->i2_mvy >> 3; - /* get quarter pel mv's */ - u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1); - u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1); + /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed wiith dx, dy =4 */ + u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2; + u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2; + + /* get half pel mv's */ + u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; + u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; + + /* get quarter pel mv's */ + u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); + u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); - /* width and height of sub macro block */ - wd = (ps_curr_pu->b4_wd + 1) << 1; - ht = (ps_curr_pu->b4_ht + 1) << 1; + /* width and height of sub macro block */ + wd = (ps_curr_pu->b4_wd + 1) << 1; + ht = (ps_curr_pu->b4_ht + 1) << 1; - /* move the pointers so that they point to the motion compensated locations */ - pu1_ref = ps_proc->pu1_ref_buf_chroma + (u4_mv_y * i4_ref_strd) + (u4_mv_x << 1); + /* move the pointers so that they point to the motion compensated locations */ + pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1)); - pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 2 * ps_curr_pu->b4_pos_x; + pu1_pred = ps_proc->pu1_pred_mb + + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + + 2 * ps_curr_pu->b4_pos_x; - u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel); - u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel); + u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel); + u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel); - ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd, - u1_dx, u1_dy, ht, wd); + /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with + * separate functions for better performance + * + * ih264_inter_pred_chroma_dx_zero_a9q + * and + * ih264_inter_pred_chroma_dy_zero_a9q + */ + + ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, + i4_pred_strd, u1_dx, u1_dy, ht, wd); + } + else /* If the pred mode is PRED_BI */ + { + /* + * We need to interpolate the L0 and L1 ref pics with the chorma MV + * then use them to average for bilinrar interpred + */ + WORD32 i4_predmode; + UWORD8 *pu1_ref_buf[2]; + + /* Temporary buffers to store the interpolated value from L0 and L1 */ + pu1_ref_buf[PRED_L0] = ps_proc->apu1_subpel_buffs[0]; + pu1_ref_buf[PRED_L1] = ps_proc->apu1_subpel_buffs[1]; + + + for (i4_predmode = 0; i4_predmode < PRED_BI; i4_predmode++) + { + ps_curr_mv = &ps_curr_pu->s_me_info[i4_predmode].s_mv; + pu1_ref = ps_proc->apu1_ref_buf_chroma[i4_predmode]; + + u4_mv_x = ps_curr_mv->i2_mvx >> 3; + u4_mv_y = ps_curr_mv->i2_mvy >> 3; + + /* + * corresponds to full pel motion vector in luma, but in chroma + * corresponds to pel formed wiith dx, dy =4 + */ + u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2; + u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2; + + /* get half pel mv's */ + u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; + u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; + + /* get quarter pel mv's */ + u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); + u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); + + /* width and height of sub macro block */ + wd = (ps_curr_pu->b4_wd + 1) << 1; + ht = (ps_curr_pu->b4_ht + 1) << 1; + + /* move the pointers so that they point to the motion compensated locations */ + pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1)); + + pu1_pred = ps_proc->pu1_pred_mb + + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + + 2 * ps_curr_pu->b4_pos_x; + + u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + + (u4_mv_x_qpel); + u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + + (u4_mv_y_qpel); + + ps_codec->pf_inter_pred_chroma(pu1_ref, + pu1_ref_buf[i4_predmode], + i4_ref_strd, MB_SIZE, u1_dx, + u1_dy, ht, wd); + } + + ps_codec->pf_inter_pred_luma_bilinear(pu1_ref_buf[PRED_L0], + pu1_ref_buf[PRED_L1], pu1_pred, + MB_SIZE, MB_SIZE, + i4_pred_strd, MB_SIZE >> 1, + MB_SIZE); + } } } diff --git a/encoder/ih264e_me.c b/encoder/ih264e_me.c index 9e8d7a3..6fef9d9 100644 --- a/encoder/ih264e_me.c +++ b/encoder/ih264e_me.c @@ -75,20 +75,20 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ih264e_defs.h" #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_globals.h" #include "ih264_macros.h" #include "ih264e_me.h" #include "ime.h" -#include "ime_distortion_metrics.h" #include "ih264_debug.h" -#include "ithread.h" #include "ih264e_intra_modes_eval.h" #include "ih264e_core_coding.h" #include "ih264e_mc.h" @@ -164,6 +164,8 @@ void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt) } } + + /** ******************************************************************************* * @@ -204,37 +206,25 @@ void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt) * number of such MVs * * @remarks -* Assumptions : 1. Assumes Single reference frame -* 2. Assumes Only partition of size 16x16 +* Assumptions : 1. Assumes Only partition of size 16x16 * ******************************************************************************* */ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc, - me_ctxt_t *ps_me_ctxt) + me_ctxt_t *ps_me_ctxt, + WORD32 i4_reflist) { /* curr mb indices */ WORD32 i4_mb_x = ps_proc->i4_mb_x; - /* left mb motion vector */ - mv_t *ps_left_mv; - - /* top left mb motion vector */ - mv_t *ps_top_mv; - - /* top left mb motion vector */ - mv_t *ps_top_left_mv; - - /* top left mb motion vector */ - mv_t *ps_top_right_mv; + /* Motion vector */ + mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv; - /* skip mv */ - mv_t *ps_skip_mv = ps_proc->ps_skip_mv; + /* Pred modes */ + WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode; /* mb part info */ - mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; - - /* num of candidate search candidates */ - UWORD32 u4_num_candidates = 0; + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; /* mvs */ WORD32 mvx, mvy; @@ -242,29 +232,36 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc, /* ngbr availability */ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + /* Current mode */ + WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0; + /* srch range*/ WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n; WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s; WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e; WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w; - ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_l0_mv; - ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_l0_mv; - ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_l0_mv; - ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_l0_mv; + ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_me_info[i4_reflist].s_mv; + ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_me_info[i4_reflist].s_mv; + ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_me_info[i4_reflist].s_mv; + ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_me_info[i4_reflist].s_mv; + + i4_left_mode = ps_proc->s_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode; + i4_top_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x)->b2_pred_mode != i4_cmpl_predmode; + i4_top_left_mode = ps_proc->s_top_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode; + i4_top_right_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->b2_pred_mode != i4_cmpl_predmode; + + /* num of candidate search candidates */ + UWORD32 u4_num_candidates =0 ; - /************************************************************/ /* Taking the Zero motion vector as one of the candidates */ - /************************************************************/ - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = 0; - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = 0; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = 0; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = 0; u4_num_candidates++; - /************************************************************/ /* Taking the Left MV Predictor as one of the candidates */ - /************************************************************/ - if (ps_ngbr_avbl->u1_mb_a) + if (ps_ngbr_avbl->u1_mb_a && i4_left_mode) { mvx = (ps_left_mv->i2_mvx + 2) >> 2; mvy = (ps_left_mv->i2_mvy + 2) >> 2; @@ -272,21 +269,14 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc, mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy; u4_num_candidates ++; } - /*else - { - ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvx = 0; - ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvy = 0; - }*/ - /************************************************************/ /* Taking the Top MV Predictor as one of the candidates */ - /************************************************************/ - if (ps_ngbr_avbl->u1_mb_b) + if (ps_ngbr_avbl->u1_mb_b && i4_top_mode) { mvx = (ps_top_mv->i2_mvx + 2) >> 2; mvy = (ps_top_mv->i2_mvy + 2) >> 2; @@ -294,15 +284,13 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc, mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy; u4_num_candidates ++; - /************************************************************/ /* Taking the TopRt MV Predictor as one of the candidates */ - /************************************************************/ - if (ps_ngbr_avbl->u1_mb_c) + if (ps_ngbr_avbl->u1_mb_c && i4_top_right_mode) { mvx = (ps_top_right_mv->i2_mvx + 2) >> 2; mvy = (ps_top_right_mv->i2_mvy + 2)>> 2; @@ -310,15 +298,13 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc, mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy; u4_num_candidates ++; } - /************************************************************/ /* Taking the TopLt MV Predictor as one of the candidates */ - /************************************************************/ - else if (ps_ngbr_avbl->u1_mb_d) + else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode) { mvx = (ps_top_left_mv->i2_mvx + 2) >> 2; mvy = (ps_top_left_mv->i2_mvy + 2) >> 2; @@ -326,84 +312,84 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc, mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy; u4_num_candidates ++; } - /*else - { - ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0; - ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0; - }*/ } - /*else - { - ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvx = 0; - ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvy = 0; - - ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0; - ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0; - }*/ /********************************************************************/ /* MV Prediction */ /********************************************************************/ - ih264e_mv_pred_me(ps_proc); + ih264e_mv_pred_me(ps_proc, i4_reflist); - ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv->i2_mvx; - ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv->i2_mvy; + ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx; + ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy; - /************************************************************/ /* Get the skip motion vector */ - /************************************************************/ - ih264e_find_skip_motion_vector(ps_proc, 1); + { + ps_me_ctxt->i4_skip_type = ps_proc->ps_codec->apf_find_skip_params_me + [ps_proc->i4_slice_type](ps_proc, i4_reflist); - /************************************************************/ - /* Taking the Skip motion vector as one of the candidates */ - /************************************************************/ - mvx = (ps_skip_mv->i2_mvx + 2) >> 2; - mvy = (ps_skip_mv->i2_mvy + 2) >> 2; + /* Taking the Skip motion vector as one of the candidates */ + mvx = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvx + 2) >> 2; + mvy = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvy + 2) >> 2; - mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); - mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); + mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); + mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx; - ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy; + u4_num_candidates++; - u4_num_candidates++; + if (ps_proc->i4_slice_type == BSLICE) + { + /* Taking the temporal Skip motion vector as one of the candidates */ + mvx = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvx + 2) >> 2; + mvy = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvy + 2) >> 2; + + mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx); + mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy); + + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx; + ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy; + u4_num_candidates++; + } + } - ASSERT(u4_num_candidates <= 5); + ASSERT(u4_num_candidates <= 6); - ps_me_ctxt->u4_num_candidates = u4_num_candidates; + ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates; } /** ******************************************************************************* * -* @brief The function gives the skip motion vector +* @brief The function computes parameters for a PSKIP MB * * @par Description: -* The function gives the skip motion vector +* The function updates the skip motion vector and checks if the current +* MB can be a skip PSKIP mB or not * -* @param[in] ps_left_mb_pu -* pointer to left mb motion vector info +* @param[in] ps_proc +* Pointer to process context * -* @param[in] ps_top_row_pu -* pointer to top & top right mb motion vector info +* @param[in] u4_for_me +* Flag to indicate function is called for ME or not * -* @param[out] ps_pred_mv -* pointer to candidate predictors for the current block +* @param[out] i4_ref_list +* Current active refernce list * -* @returns The x & y components of the MV predictor. +* @returns Flag indicating if the current MB can be marked as skip * -* @remarks The code implements the logic as described in sec 8.4.1.1 in H264 +* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264 * specification. * ******************************************************************************* */ -void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me) +WORD32 ih264e_find_pskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist) { /* left mb motion vector */ enc_pu_t *ps_left_mb_pu ; @@ -411,35 +397,116 @@ void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me) /* top mb motion vector */ enc_pu_t *ps_top_mb_pu ; - /* skip mv */ - mv_t *ps_skip_mv = ps_proc->ps_skip_mv; + /* Skip mv */ + mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv; + + UNUSED(i4_reflist); + + ps_left_mb_pu = &ps_proc->s_left_mb_pu ; + ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x; + + if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) || + (!ps_proc->ps_ngbr_avbl->u1_mb_b) || + ( + (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) && + (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) && + (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0) + ) || + ( + (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) && + (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) && + (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0) + ) + ) - if (u4_for_me == 1) { - ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME; - ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x; + ps_skip_mv->i2_mvx = 0; + ps_skip_mv->i2_mvy = 0; } else { - ps_left_mb_pu = &ps_proc->s_left_mb_pu ; - ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x; + ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx; + ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy; + } + + if ( (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx) + && (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy)) + { + return 1; } - if ( (!ps_proc->ps_ngbr_avbl->u1_mb_a) || - (!ps_proc->ps_ngbr_avbl->u1_mb_b) || - ((ps_left_mb_pu->i1_l0_ref_idx | ps_left_mb_pu->s_l0_mv.i2_mvx | ps_left_mb_pu->s_l0_mv.i2_mvy) == 0) || - ((ps_top_mb_pu->i1_l0_ref_idx | ps_top_mb_pu->s_l0_mv.i2_mvx | ps_top_mb_pu->s_l0_mv.i2_mvy) == 0) ) + return 0; +} + +/** +******************************************************************************* +* +* @brief The function computes parameters for a PSKIP MB +* +* @par Description: +* The function updates the skip motion vector and checks if the current +* MB can be a skip PSKIP mB or not +* +* @param[in] ps_proc +* Pointer to process context +* +* @param[in] u4_for_me +* Flag to dincate fucntion is called for ME or not +* +* @param[out] i4_ref_list +* Current active refernce list +* +* @returns Flag indicating if the current MB can be marked as skip +* +* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264 +* specification. +* +******************************************************************************* +*/ +WORD32 ih264e_find_pskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist) +{ + /* left mb motion vector */ + enc_pu_t *ps_left_mb_pu ; + + /* top mb motion vector */ + enc_pu_t *ps_top_mb_pu ; + + /* Skip mv */ + mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv; + + UNUSED(i4_reflist); + + ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME; + ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x; + + if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) || + (!ps_proc->ps_ngbr_avbl->u1_mb_b) || + ( + (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) && + (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) && + (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0) + ) || + ( + (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) && + (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) && + (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0) + ) + ) + { ps_skip_mv->i2_mvx = 0; ps_skip_mv->i2_mvy = 0; } else { - ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv->i2_mvx; - ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv->i2_mvy; + ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx; + ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy; } + + return PRED_L0; } + /** ******************************************************************************* * @@ -469,61 +536,64 @@ void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me) */ void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu, enc_pu_t *ps_top_row_pu, - mv_t *ps_pred_mv) + enc_pu_mv_t *ps_pred_mv, + WORD32 i4_ref_list) { - /* curr frame ref idx */ - /* we are assuming that we are operating on single reference frame - * hence the ref idx is insignificant during mv prediction. - */ - WORD32 u4_ref_idx = 0; - /* temp var */ - WORD32 pred_algo = 3, a, b, c; - - /* If only one of the candidate blocks has a reference frame equal to - * the current block then use the same block as the final predictor */ - a = (ps_left_mb_pu->i1_l0_ref_idx == u4_ref_idx)? 0:-1; - b = (ps_top_row_pu[0].i1_l0_ref_idx == u4_ref_idx)? 0:-1; - c = (ps_top_row_pu[1].i1_l0_ref_idx == u4_ref_idx)? 0:-1; - - if (a == 0 && b == -1 && c == -1) - pred_algo = 0; /* LEFT */ - else if (a == -1 && b == 0 && c == -1) - pred_algo = 1; /* TOP */ - else if (a == -1 && b == -1 && c == 0) - pred_algo = 2; /* TOP RIGHT */ - - switch (pred_algo) - { - case 0: - /* left */ - ps_pred_mv->i2_mvx = ps_left_mb_pu->s_l0_mv.i2_mvx; - ps_pred_mv->i2_mvy = ps_left_mb_pu->s_l0_mv.i2_mvy; - break; - case 1: - /* top */ - ps_pred_mv->i2_mvx = ps_top_row_pu[0].s_l0_mv.i2_mvx; - ps_pred_mv->i2_mvy = ps_top_row_pu[0].s_l0_mv.i2_mvy; - break; - case 2: - /* top right */ - ps_pred_mv->i2_mvx = ps_top_row_pu[1].s_l0_mv.i2_mvx; - ps_pred_mv->i2_mvy = ps_top_row_pu[1].s_l0_mv.i2_mvy; - break; - case 3: - /* median */ - MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvx, - ps_top_row_pu[0].s_l0_mv.i2_mvx, - ps_top_row_pu[1].s_l0_mv.i2_mvx, - ps_pred_mv->i2_mvx); - MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvy, - ps_top_row_pu[0].s_l0_mv.i2_mvy, - ps_top_row_pu[1].s_l0_mv.i2_mvy, - ps_pred_mv->i2_mvy); + /* Indicated the current ref */ + WORD8 i1_ref_idx; - break; - default: - break; + /* For pred L0 */ + i1_ref_idx = -1; + { + /* temp var */ + WORD32 pred_algo = 3, a, b, c; + + /* If only one of the candidate blocks has a reference frame equal to + * the current block then use the same block as the final predictor */ + a = (ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1; + b = (ps_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1; + c = (ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1; + + if (a == 0 && b == -1 && c == -1) + pred_algo = 0; /* LEFT */ + else if(a == -1 && b == 0 && c == -1) + pred_algo = 1; /* TOP */ + else if(a == -1 && b == -1 && c == 0) + pred_algo = 2; /* TOP RIGHT */ + + switch (pred_algo) + { + case 0: + /* left */ + ps_pred_mv->s_mv.i2_mvx = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx; + ps_pred_mv->s_mv.i2_mvy = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy; + break; + case 1: + /* top */ + ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx; + ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy; + break; + case 2: + /* top right */ + ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx; + ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy; + break; + case 3: + /* median */ + MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx, + ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx, + ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx, + ps_pred_mv->s_mv.i2_mvx); + MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy, + ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy, + ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy, + ps_pred_mv->s_mv.i2_mvy); + + break; + default: + break; + } } } @@ -545,31 +615,34 @@ void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu, * ******************************************************************************* */ -void ih264e_mv_pred(process_ctxt_t *ps_proc) +void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_slice_type) { /* left mb motion vector */ - enc_pu_t *ps_left_mb_pu ; + enc_pu_t *ps_left_mb_pu; /* top left mb motion vector */ - enc_pu_t *ps_top_left_mb_pu ; + enc_pu_t *ps_top_left_mb_pu; /* top row motion vector info */ enc_pu_t *ps_top_row_pu; /* predicted motion vector */ - mv_t *ps_pred_mv = ps_proc->ps_pred_mv; + enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv; /* zero mv */ - mv_t zero_mv = {0, 0}; + mv_t zero_mv = { 0, 0 }; /* mb neighbor availability */ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; /* mb syntax elements of neighbors */ - mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; - mb_info_t *ps_top_left_syn; - UWORD32 u4_left_is_intra; + mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; + mb_info_t *ps_top_left_syn; + UWORD32 u4_left_is_intra; + + /* Temp var */ + WORD32 i4_reflist, max_reflist, i4_cmpl_predmode; ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele); u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra; @@ -577,44 +650,58 @@ void ih264e_mv_pred(process_ctxt_t *ps_proc) ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu; ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x); - /* Before performing mv prediction prepare the ngbr information and - * reset motion vectors basing on their availability */ - if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1) ) - { - /* left mv */ - ps_left_mb_pu->i1_l0_ref_idx = -1; - ps_left_mb_pu->s_l0_mv = zero_mv; - } - if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra) - { - /* top mv */ - ps_top_row_pu[0].i1_l0_ref_idx = -1; - ps_top_row_pu[0].s_l0_mv = zero_mv; - } - if (!ps_ngbr_avbl->u1_mb_c) + /* Number of ref lists to process */ + max_reflist = (i4_slice_type == PSLICE) ? 1 : 2; + + for (i4_reflist = 0; i4_reflist < max_reflist; i4_reflist++) { - /* top right mv - When top right partition is not available for - * prediction if top left is available use it for prediction else - * set the mv information to -1 and (0, 0) - * */ - if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra) + i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0; + + /* Before performing mv prediction prepare the ngbr information and + * reset motion vectors basing on their availability */ + if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1) + || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode)) { - ps_top_row_pu[1].i1_l0_ref_idx = -1; - ps_top_row_pu[1].s_l0_mv = zero_mv; + /* left mv */ + ps_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx = 0; + ps_left_mb_pu->s_me_info[i4_reflist].s_mv = zero_mv; } - else + if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra + || (ps_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode)) { - ps_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx; - ps_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv; + /* top mv */ + ps_top_row_pu[0].s_me_info[i4_reflist].i1_ref_idx = 0; + ps_top_row_pu[0].s_me_info[i4_reflist].s_mv = zero_mv; } - } - else if (ps_top_syn[1].u2_is_intra) - { - ps_top_row_pu[1].i1_l0_ref_idx = -1; - ps_top_row_pu[1].s_l0_mv = zero_mv; + + if (!ps_ngbr_avbl->u1_mb_c) + { + /* top right mv - When top right partition is not available for + * prediction if top left is available use it for prediction else + * set the mv information to -1 and (0, 0) + * */ + if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra + || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode)) + { + ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0; + ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv; + } + else + { + ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = ps_top_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx; + ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = ps_top_left_mb_pu->s_me_info[i4_reflist].s_mv; + } + } + else if(ps_top_syn[1].u2_is_intra + || (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode)) + { + ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0; + ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv; + } + + ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, &ps_pred_mv[i4_reflist], i4_reflist); } - ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, ps_pred_mv); } /** @@ -635,7 +722,7 @@ void ih264e_mv_pred(process_ctxt_t *ps_proc) * ******************************************************************************* */ -void ih264e_mv_pred_me(process_ctxt_t *ps_proc) +void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list) { /* left mb motion vector */ enc_pu_t *ps_left_mb_pu ; @@ -649,11 +736,14 @@ void ih264e_mv_pred_me(process_ctxt_t *ps_proc) enc_pu_t s_top_row_pu[2]; /* predicted motion vector */ - mv_t *ps_pred_mv = ps_proc->ps_pred_mv; + enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv; /* zero mv */ mv_t zero_mv = {0, 0}; + /* Complementary pred mode */ + WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? PRED_L1 : PRED_L0; + /* mb neighbor availability */ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; @@ -664,19 +754,23 @@ void ih264e_mv_pred_me(process_ctxt_t *ps_proc) s_top_row_pu[0] = ps_top_row_pu[0]; s_top_row_pu[1] = ps_top_row_pu[1]; - /* Before performing mv prediction prepare the ngbr information and - * reset motion vectors basing on their availability */ - if (!ps_ngbr_avbl->u1_mb_a ) + /* + * Before performing mv prediction prepare the ngbr information and + * reset motion vectors basing on their availability + */ + + if (!ps_ngbr_avbl->u1_mb_a || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode)) { /* left mv */ - ps_left_mb_pu->i1_l0_ref_idx = -1; - ps_left_mb_pu->s_l0_mv = zero_mv; + ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx = 0; + ps_left_mb_pu->s_me_info[i4_ref_list].s_mv = zero_mv; } - if (!ps_ngbr_avbl->u1_mb_b ) + if (!ps_ngbr_avbl->u1_mb_b || (s_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode)) { /* top mv */ - s_top_row_pu[0].i1_l0_ref_idx = -1; - s_top_row_pu[0].s_l0_mv = zero_mv; + s_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx = 0; + s_top_row_pu[0].s_me_info[i4_ref_list].s_mv = zero_mv; + } if (!ps_ngbr_avbl->u1_mb_c) { @@ -684,19 +778,28 @@ void ih264e_mv_pred_me(process_ctxt_t *ps_proc) * prediction if top left is available use it for prediction else * set the mv information to -1 and (0, 0) * */ - if (!ps_ngbr_avbl->u1_mb_d) + if (!ps_ngbr_avbl->u1_mb_d || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode)) { - s_top_row_pu[1].i1_l0_ref_idx = -1; - s_top_row_pu[1].s_l0_mv = zero_mv; + s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0; + s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv; + + s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0; + s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv; } else { - s_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx; - s_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv; + s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = ps_top_left_mb_pu->s_me_info[0].i1_ref_idx; + s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = ps_top_left_mb_pu->s_me_info[0].s_mv; } } + else if (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode) + { + ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0; + ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv; + } - ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]), ps_pred_mv); + ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]), + &ps_pred_mv[i4_ref_list], i4_ref_list); } /** @@ -722,20 +825,38 @@ void ih264e_init_me(process_ctxt_t *ps_proc) /* me ctxt */ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B; + + if (ps_codec->s_cfg.u4_num_bframes == 0) + { + ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P; + } + else + { + ps_me_ctxt->i4_skip_bias[PSLICE] = SKIP_BIAS_P; + } + /* src ptr */ ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma; - /* ref ptr */ - ps_me_ctxt->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma; + /* ref ptrs and corresponding lagrange params */ + ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma[0]; + ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma[1]; - /* lagrange param */ ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp]; + + } + /** ******************************************************************************* * -* @brief This function performs motion estimation for the current mb +* @brief This function performs motion estimation for the current mb using +* single reference list * * @par Description: * The current mb is compared with a list of mb's in the reference frame for @@ -753,7 +874,7 @@ void ih264e_init_me(process_ctxt_t *ps_proc) * ******************************************************************************* */ -void ih264e_compute_me(process_ctxt_t *ps_proc) +void ih264e_compute_me_single_reflist(process_ctxt_t *ps_proc) { /* me ctxt */ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; @@ -761,20 +882,6 @@ void ih264e_compute_me(process_ctxt_t *ps_proc) /* codec context */ codec_t *ps_codec = ps_proc->ps_codec; -// /* mb syntax elements of neighbors */ -// mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; -// mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME); - - /* mb part info */ - mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; - mb_part_ctxt skip_mb_part_info; - - /* temp var */ - WORD32 rows_above, rows_below, columns_left, columns_right,u4_use_stat_sad; - - /* Motion vectors in full-pel units */ - WORD16 mv_x, mv_y; - /* recon stride */ WORD32 i4_rec_strd = ps_proc->i4_rec_strd; @@ -787,118 +894,104 @@ void ih264e_compute_me(process_ctxt_t *ps_proc) /* Sad therholds */ ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh; - /*Best half pel buffer*/ - UWORD8 *pu1_best_subpel_buf = ps_proc->pu1_best_subpel_buf; - UWORD32 u4_bst_spel_strd = ps_proc->u4_bst_spel_buf_strd; + /* Mb part ctxts for SKIP */ + mb_part_ctxt s_skip_mbpart; - /* During evaluation for motion vectors do not search through padded regions */ - /* Obtain number of rows and columns that are effective for computing for me evaluation */ - rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE; - rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE; - columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE; - columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE; - - /* init srch range */ - /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2 - * on all sides. - */ -// ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, ps_me_ctxt->ai2_srch_boundaries[0]); -// ps_me_ctxt->i4_srch_range_e = MIN(columns_right, ps_me_ctxt->ai2_srch_boundaries[0]); -// ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, ps_me_ctxt->ai2_srch_boundaries[1]); -// ps_me_ctxt->i4_srch_range_s = MIN(rows_below, ps_me_ctxt->ai2_srch_boundaries[1]); - - ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1); - ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1); - ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1); - ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1); - - /* this is to facilitate fast sub pel computation with minimal loads */ - if (ps_me_ctxt->u4_enable_hpel) { + WORD32 rows_above, rows_below, columns_left, columns_right; + + /* During evaluation for motion vectors do not search through padded regions */ + /* Obtain number of rows and columns that are effective for computing for me evaluation */ + rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE; + rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE; + columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE; + columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE; + + /* init srch range */ + /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2 + * on all sides. + */ + ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + + /* this is to facilitate fast sub pel computation with minimal loads */ ps_me_ctxt->i4_srch_range_w += 1; ps_me_ctxt->i4_srch_range_e -= 1; ps_me_ctxt->i4_srch_range_n += 1; ps_me_ctxt->i4_srch_range_s -= 1; } - /*Initialize the min sad option*/ - ps_me_ctxt->u4_min_sad_reached = 0; /*Not yet found min sad*/ - ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad; + /* Compute ME and store the MVs */ - /************************************************************/ - /* Get the seed motion vector candidates */ - /************************************************************/ - ih264e_get_search_candidates(ps_proc, ps_me_ctxt); - - /************************************************************/ - /* Init the MB part ctxt structure */ - /************************************************************/ - ps_mb_part->s_mv_curr.i2_mvx = 0; - ps_mb_part->s_mv_curr.i2_mvy = 0; - ps_mb_part->i4_mb_cost = INT_MAX; - ps_mb_part->i4_mb_distortion = INT_MAX; - - /* With NMB changes this logic will not work as we cannot exit NME in between*/ - /********************************************************************/ - /* Analyse skip */ - /********************************************************************/ -// if (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 0 -// && u4_frame_level_me == 0) -// { -// if ( (ps_proc->ps_ngbr_avbl->u1_mb_a && (ps_me_ctxt->u4_left_is_skip == 1)) || -// (ps_proc->ps_ngbr_avbl->u1_mb_b && ps_top_syn->u2_mb_type == PSKIP) || -// (ps_proc->ps_ngbr_avbl->u1_mb_d && ps_top_left_syn->u2_mb_type == PSKIP) ) -// { -// if ( 0 == ih264e_analyse_skip(ps_proc, ps_me_ctxt) ) -// { -// return; -// } -// } -// } - - /********************************************************************/ - /* compute skip cost */ - /********************************************************************/ - /* See if we need to use modified sad */ - u4_use_stat_sad = (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 1); + /*********************************************************************** + * Compute ME for list L0 + ***********************************************************************/ - /* init the cost of skip MB */ - skip_mb_part_info.i4_mb_cost = INT_MAX; - ime_compute_skip_cost(ps_me_ctxt, ps_proc->ps_skip_mv, &skip_mb_part_info, u4_use_stat_sad); + /* Init SATQD for the current list */ + ps_me_ctxt->u4_min_sad_reached = 0; + ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad; - - if (ps_me_ctxt->u4_min_sad_reached == 0) + /* Get the seed motion vector candidates */ + ih264e_get_search_candidates(ps_proc, ps_me_ctxt, PRED_L0); + + /* **************************************************************** + *Evaluate the SKIP for current list + * ****************************************************************/ + s_skip_mbpart.s_mv_curr.i2_mvx = 0; + s_skip_mbpart.s_mv_curr.i2_mvy = 0; + s_skip_mbpart.i4_mb_cost = INT_MAX; + s_skip_mbpart.i4_mb_distortion = INT_MAX; + + ime_compute_skip_cost( ps_me_ctxt, + (ime_mv_t *)(&ps_proc->ps_skip_mv[PRED_L0].s_mv), + &s_skip_mbpart, + ps_proc->ps_codec->s_cfg.u4_enable_satqd, + PRED_L0, + 0 /* Not a Bslice */ ); + + s_skip_mbpart.s_mv_curr.i2_mvx <<= 2; + s_skip_mbpart.s_mv_curr.i2_mvy <<= 2; + + /****************************************************************** + * Evaluate ME For current list + *****************************************************************/ + ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx = 0; + ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy = 0; + ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = INT_MAX; + ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = INT_MAX; + + /* Init Hpel */ + ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf = NULL; + + /* In case we found out the minimum SAD, exit the ME eval */ + if (!ps_me_ctxt->u4_min_sad_reached) { - /************************************************************/ - /* Evaluate search candidates for initial mv pt. */ - /************************************************************/ - ime_evaluate_init_srchposn_16x16(ps_me_ctxt); + /* Evaluate search candidates for initial mv pt */ + ime_evaluate_init_srchposn_16x16(ps_me_ctxt, PRED_L0); /********************************************************************/ /* full pel motion estimation */ /********************************************************************/ - ime_full_pel_motion_estimation_16x16(ps_me_ctxt); + ime_full_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0); - DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2), - (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2)); + /* Scale the MV to qpel resolution */ + ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx <<= 2; + ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy <<= 2; - DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1); - /********************************************************************/ - /* sub pel motion estimation */ - /********************************************************************/ if (ps_me_ctxt->u4_enable_hpel) { - /* motion vectors in terms of full pel values */ - mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2; - mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2; - /* moving src pointer to the converged motion vector location*/ - pu1_hpel_src = ps_me_ctxt->pu1_ref_buf_luma + mv_x + (mv_y * i4_rec_strd); + pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx >> 2) + + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy >> 2)* i4_rec_strd; + + ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0]; + ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1]; + ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2]; - ps_me_ctxt->pu1_half_x = ps_proc->pu1_half_x; - ps_me_ctxt->pu1_half_y = ps_proc->pu1_half_y; - ps_me_ctxt->pu1_half_xy = ps_proc->pu1_half_xy; - ps_me_ctxt->u4_hp_buf_strd = HP_BUFF_WD; + ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD; /* half pel search is done for both sides of full pel, * hence half_x of width x height = 17x16 is created @@ -907,9 +1000,9 @@ void ih264e_compute_me(process_ctxt_t *ps_proc) /* computing half_x */ ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src, - ps_proc->pu1_half_x, + ps_me_ctxt->apu1_subpel_buffs[0], i4_rec_strd, - ps_me_ctxt->u4_hp_buf_strd); + ps_me_ctxt->u4_subpel_buf_strd); /* * Halfpel search is done for both sides of full pel, @@ -918,61 +1011,57 @@ void ih264e_compute_me(process_ctxt_t *ps_proc) * for half_xy top_left is required * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1 */ - pu1_hpel_src -= i4_rec_strd; /* computing half_y , and half_xy*/ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert( - pu1_hpel_src, ps_proc->pu1_half_y, - ps_proc->pu1_half_xy, i4_rec_strd, - ps_me_ctxt->u4_hp_buf_strd, ps_proc->ai16_pred1 + 3, - ps_me_ctxt->u4_hp_buf_strd); + pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1], + ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd, + ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3, + ps_me_ctxt->u4_subpel_buf_strd); - ime_sub_pel_motion_estimation_16x16(ps_me_ctxt); + ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0); } } - { - /* if skip gives a better cost than other search, copy the cost accordingly*/ - if (skip_mb_part_info.i4_mb_cost < ps_mb_part->i4_mb_cost) - { - ps_mb_part->i4_mb_cost = skip_mb_part_info.i4_mb_cost; - ps_mb_part->i4_mb_distortion = skip_mb_part_info.i4_mb_distortion; - ps_mb_part->s_mv_curr.i2_mvx = skip_mb_part_info.s_mv_curr.i2_mvx; - ps_mb_part->s_mv_curr.i2_mvy = skip_mb_part_info.s_mv_curr.i2_mvy; - } - else - { - /* - * If the current MB has a sub pel component, - * we need to copy that to the best subpel buffer - */ - if (ps_me_ctxt->u4_enable_hpel && ps_mb_part->pu1_best_hpel_buf) - { - ps_codec->pf_inter_pred_luma_copy(ps_mb_part->pu1_best_hpel_buf, - pu1_best_subpel_buf, - ps_me_ctxt->u4_hp_buf_strd, - u4_bst_spel_strd, MB_SIZE, - MB_SIZE, NULL, 0); - } - } + /*********************************************************************** + * If a particular skiip Mv is giving better sad, copy to the corresponding + * MBPART + * In B slices this loop should go only to PREDL1: If we found min sad + * we will go to the skip ref list only + * Have to find a way to make it without too much change or new vars + **********************************************************************/ + if (s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost) + { + ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost; + ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion; + ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = s_skip_mbpart.s_mv_curr; } - - DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 0); - - /* update the type of the mb if necessary */ - if (ps_me_ctxt->s_mb_part.i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost) + else if (ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf) { - /* mb cost */ - ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->s_mb_part.i4_mb_cost; + /* Now we have to copy the buffers */ + ps_codec->pf_inter_pred_luma_copy( + ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf, + ps_proc->pu1_best_subpel_buf, + ps_me_ctxt->u4_subpel_buf_strd, + ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, + NULL, 0); + } - /* mb distortion */ - ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->s_mb_part.i4_mb_distortion; + /********************************************************************** + * Now get the minimum of MB part sads by searching over all ref lists + **********************************************************************/ + ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx; + ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy; + ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost; + ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion; + ps_proc->ps_cur_mb->u4_mb_type = P16x16; + ps_proc->ps_pu->b2_pred_mode = PRED_L0 ; - /* mb type */ - ps_proc->ps_cur_mb->u4_mb_type = P16x16; - } + /* Mark the reflists */ + ps_proc->ps_pu->s_me_info[0].i1_ref_idx = -1; + ps_proc->ps_pu->s_me_info[1].i1_ref_idx = 0; /* number of partitions */ ps_proc->u4_num_sub_partitions = 1; @@ -986,19 +1075,13 @@ void ih264e_compute_me(process_ctxt_t *ps_proc) ps_proc->ps_pu->b4_wd = 3; ps_proc->ps_pu->b4_ht = 3; - /* ref idx */ - ps_proc->ps_pu->i1_l0_ref_idx = 0; - - /* motion vector L0 */ - ps_proc->ps_pu->s_l0_mv.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx; - ps_proc->ps_pu->s_l0_mv.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy; - /* Update min sad conditions */ if (ps_me_ctxt->u4_min_sad_reached == 1) { ps_proc->ps_cur_mb->u4_min_sad_reached = 1; ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad; } + } /** @@ -1054,9 +1137,9 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count) } } - ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].s_skip_mv); + ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]); ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl); - ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].s_pred_mv); + ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]); ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]); @@ -1080,7 +1163,8 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count) /* init me */ ih264e_init_me(ps_proc); - ih264e_compute_me(ps_proc); + /* Compute ME according to slice type */ + ps_proc->ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc); /* update top and left structs */ { @@ -1119,7 +1203,8 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count) /* update buffers pointers */ ps_proc->pu1_src_buf_luma += MB_SIZE; ps_proc->pu1_rec_buf_luma += MB_SIZE; - ps_proc->pu1_ref_buf_luma += MB_SIZE; + ps_proc->apu1_ref_buf_luma[0] += MB_SIZE; + ps_proc->apu1_ref_buf_luma[1] += MB_SIZE; /* * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, @@ -1127,7 +1212,9 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count) */ ps_proc->pu1_src_buf_chroma += MB_SIZE; ps_proc->pu1_rec_buf_chroma += MB_SIZE; - ps_proc->pu1_ref_buf_chroma += MB_SIZE; + ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE; + ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE; + ps_proc->pu4_mb_pu_cnt += 1; } @@ -1139,7 +1226,8 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count) /* update buffers pointers */ ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count; ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count; - ps_proc->pu1_ref_buf_luma -= MB_SIZE * u4_nmb_count; + ps_proc->apu1_ref_buf_luma[0] -= MB_SIZE * u4_nmb_count; + ps_proc->apu1_ref_buf_luma[1] -= MB_SIZE * u4_nmb_count; /* * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, @@ -1147,7 +1235,892 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count) */ ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count; ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count; - ps_proc->pu1_ref_buf_chroma -= MB_SIZE * u4_nmb_count; + ps_proc->apu1_ref_buf_chroma[0] -= MB_SIZE * u4_nmb_count; + ps_proc->apu1_ref_buf_chroma[1] -= MB_SIZE * u4_nmb_count; + ps_proc->pu4_mb_pu_cnt -= u4_nmb_count; } + + +/** +******************************************************************************* +* +* @brief The function computes parameters for a BSKIP MB +* +* @par Description: +* The function updates the skip motion vector for B Mb, check if the Mb can be +* marked as skip and returns it +* +* @param[in] ps_proc +* Pointer to process context +* +* @param[in] u4_for_me +* Dummy +* +* @param[in] i4_reflist +* Dummy +* +* @returns Flag indicating if the current Mb can be skip or not +* +* @remarks +* The code implements the logic as described in sec 8.4.1.2.2 +* It also computes co-located MB parmas according to sec 8.4.1.2.1 +* +* Need to add condition for this fucntion to be used in ME +* +*******************************************************************************/ +WORD32 ih264e_find_bskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist) +{ + /* Colzero for co-located MB */ + WORD32 i4_colzeroflag; + + /* motion vectors for neighbouring MBs */ + enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu; + + /* Variables to check if a particular mB is available */ + WORD32 i4_a, i4_b, i4_c, i4_c_avail;; + + /* Mode availability, init to no modes available */ + WORD32 i4_mode_avail; + + /* mb neighbor availability */ + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + + /* Temp var */ + WORD32 i, i4_cmpl_mode, i4_skip_type = -1; + + /* + * Colocated motion vector + */ + mv_t s_mvcol; + + /* + * Colocated picture idx + */ + WORD32 i4_refidxcol; + + UNUSED(i4_reflist); + + /************************************************************************** + *Find co-located MB parameters + * See sec 8.4.1.2.1 for reference + **************************************************************************/ + { + /* + * Find the co-located Mb and update the skip and pred appropriately + * 1) Default colpic is forward ref : Table 8-6 + * 2) Default mb col is current MB : Table 8-8 + */ + + if (ps_proc->ps_colpu->b1_intra_flag) + { + s_mvcol.i2_mvx = 0; + s_mvcol.i2_mvy = 0; + i4_refidxcol = -1; + } + else + { + if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1) + { + s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv; + i4_refidxcol = 0; + } + else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0) + { + s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv; + i4_refidxcol = 0; + } + } + + /* RefPicList1[ 0 ] is marked as "used for short-term reference", as default */ + i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) + && (ABS(s_mvcol.i2_mvy) <= 1)); + + } + + /*************************************************************************** + * Evaluating skip params : Spatial Skip + **************************************************************************/ + { + /* Get the neighbouring MBS according to Section 8.4.1.2.2 */ + ps_a_pu = &ps_proc->s_left_mb_pu_ME; + ps_b_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x); + + i4_c_avail = 0; + if (ps_ngbr_avbl->u1_mb_c) + { + ps_c_pu = &((ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x)[1]); + i4_c_avail = 1; + } + else + { + ps_c_pu = &ps_proc->s_top_left_mb_pu_ME; + i4_c_avail = ps_ngbr_avbl->u1_mb_d; + } + + i4_a = ps_ngbr_avbl->u1_mb_a; + i4_b = ps_ngbr_avbl->u1_mb_b; + i4_c = i4_c_avail; + + /* Init to no mode avail */ + i4_mode_avail = 0; + for (i = 0; i < 2; i++) + { + i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0; + + i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx; + ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy; + } + else + { + ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0; + ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0; + } + + /* Update skip MV for L1 */ + if ((i4_mode_avail & 0x2) && (!i4_colzeroflag)) + { + ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx; + ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy; + } + else + { + ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0; + ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0; + } + + } + + /*************************************************************************** + * Evaluating skip params : Temporal skip + **************************************************************************/ + { + pic_buf_t * ps_ref_pic[MAX_REF_PIC_CNT]; + WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor; + enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2]; + + ps_ref_pic[PRED_L0] = ps_proc->aps_ref_pic[PRED_L0]; + ps_ref_pic[PRED_L1] = ps_proc->aps_ref_pic[PRED_L1]; + + i4_tb = ps_proc->ps_codec->i4_poc - ps_ref_pic[PRED_L0]->i4_abs_poc; + i4_td = ps_ref_pic[PRED_L1]->i4_abs_poc - ps_ref_pic[PRED_L0]->i4_abs_poc; + + i4_tb = CLIP3(-128, 127, i4_tb); + i4_td = CLIP3(-128, 127, i4_td); + + i4_tx = ( 16384 + ABS( i4_td / 2 ) ) / i4_td ; + i4_dist_scale_factor = CLIP3( -1024, 1023, ( i4_tb * i4_tx + 32 ) >> 6 ); + + /* Motion vectors taken in full pel resolution , hence -> (& 0xfffc) operation */ + ps_skip_mv[PRED_L0].s_mv.i2_mvx = (( i4_dist_scale_factor * s_mvcol.i2_mvx + 128 ) >> 8) & 0xfffc; + ps_skip_mv[PRED_L0].s_mv.i2_mvy = (( i4_dist_scale_factor * s_mvcol.i2_mvy + 128 ) >> 8) & 0xfffc; + + ps_skip_mv[PRED_L1].s_mv.i2_mvx = (ps_skip_mv[PRED_L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc; + ps_skip_mv[PRED_L1].s_mv.i2_mvy = (ps_skip_mv[PRED_L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc; + + } + + return i4_skip_type; +} + +/** +******************************************************************************* +* +* @brief The function computes the skip motion vectoe for B mb +* +* @par Description: +* The function gives the skip motion vector for B Mb, check if the Mb can be +* marked as skip +* +* @param[in] ps_proc +* Pointer to process context +* +* @param[in] u4_for_me +* Dummy +* +* @param[in] u4_for_me +* Dummy +* +* @returns Flag indicating if the current Mb can be skip or not +* +* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264 +* specification. It also computes co-located MB parmas according to sec 8.4.1.2.1 +* +*******************************************************************************/ +WORD32 ih264e_find_bskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist) +{ + WORD32 i4_colzeroflag; + + /* motion vectors */ + enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu; + + /* Syntax elem */ + mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn; + + /* Variables to check if a particular mB is available */ + WORD32 i4_a, i4_b, i4_c, i4_c_avail; + + /* Mode availability, init to no modes available */ + WORD32 i4_mode_avail; + + /* mb neighbor availability */ + block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; + + /* Temp var */ + WORD32 i, i4_cmpl_mode; + + UNUSED(i4_reflist); + + /************************************************************************** + *Find co-locates parameters + * See sec 8.4.1.2.1 for reference + **************************************************************************/ + { + /* + * Find the co-located Mb and update the skip and pred appropriately + * 1) Default colpic is forward ref : Table 8-6 + * 2) Default mb col is current MB : Table 8-8 + */ + + mv_t s_mvcol; + WORD32 i4_refidxcol; + + if (ps_proc->ps_colpu->b1_intra_flag) + { + s_mvcol.i2_mvx = 0; + s_mvcol.i2_mvy = 0; + i4_refidxcol = -1; + } + else + { + if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1) + { + s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv; + i4_refidxcol = 0; + } + else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0) + { + s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv; + i4_refidxcol = 0; + } + } + + /* RefPicList1[ 0 ] is marked as "used for short-term reference", as default */ + i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) + && (ABS(s_mvcol.i2_mvy) <= 1)); + + } + + /*************************************************************************** + * Evaluating skip params + **************************************************************************/ + /* Section 8.4.1.2.2 */ + ps_a_syn = &ps_proc->s_left_mb_syntax_ele; + ps_a_pu = &ps_proc->s_left_mb_pu; + + ps_b_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; + ps_b_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x); + + i4_c_avail = 0; + if (ps_ngbr_avbl->u1_mb_c) + { + ps_c_syn = &((ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x)[1]); + ps_c_pu = &((ps_proc->ps_top_row_pu + ps_proc->i4_mb_x)[1]); + i4_c_avail = 1; + } + else + { + ps_c_syn = &(ps_proc->s_top_left_mb_syntax_ele); + ps_c_pu = &ps_proc->s_top_left_mb_pu; + i4_c_avail = ps_ngbr_avbl->u1_mb_d; + } + + + i4_a = ps_ngbr_avbl->u1_mb_a; + i4_a &= !ps_a_syn->u2_is_intra; + + i4_b = ps_ngbr_avbl->u1_mb_b; + i4_b &= !ps_b_syn->u2_is_intra; + + i4_c = i4_c_avail; + i4_c &= !ps_c_syn->u2_is_intra; + + /* Init to no mode avail */ + i4_mode_avail = 0; + for (i = 0; i < 2; i++) + { + i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0; + + i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx; + ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy; + } + else + { + ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0; + ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0; + } + + /* Update skip MV for L1 */ + if ((i4_mode_avail & 0x2) && (!i4_colzeroflag)) + { + ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx; + ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy; + } + else + { + ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0; + ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0; + } + + /* Now see if the ME information matches the SKIP information */ + switch (ps_proc->ps_pu->b2_pred_mode) + { + case PRED_BI: + if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx) + && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy) + && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx) + && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy) + && (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)) + { + return 1; + } + break; + + case PRED_L0: + if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx) + && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy) + && (i4_mode_avail == 0x1)) + { + return 1; + } + break; + + case PRED_L1: + if ( (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx) + && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy) + && (i4_mode_avail == 0x2)) + { + return 1; + } + break; + } + + return 0; +} + + +/** +******************************************************************************* +* +* @brief This function computes the best motion vector among the tentative mv +* candidates chosen. +* +* @par Description: +* This function determines the position in the search window at which the motion +* estimation should begin in order to minimise the number of search iterations. +* +* @param[in] ps_mb_part +* pointer to current mb partition ctxt with respect to ME +* +* @param[in] u4_lambda_motion +* lambda motion +* +* @param[in] u4_fast_flag +* enable/disable fast sad computation +* +* @returns mv pair & corresponding distortion and cost +* +* @remarks Currently onyl 4 search candiates are supported +* +******************************************************************************* +*/ +void ih264e_evaluate_bipred(me_ctxt_t *ps_me_ctxt, + process_ctxt_t *ps_proc, + mb_part_ctxt *ps_mb_ctxt_bi) +{ + + UWORD32 i, u4_fast_sad; + + WORD32 i4_dest_buff; + + mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv; + + UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1; + + UWORD8 *pu1_dst_buf; + + WORD32 i4_ref_l0_stride, i4_ref_l1_stride; + + WORD32 i4_mb_distortion, i4_mb_cost; + + u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad; + + i4_dest_buff = 0; + for (i = 0; i < ps_me_ctxt->u4_num_candidates[PRED_BI]; i += 2) + { + pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff]; + + s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx >> 2; + s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy >> 2; + s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx >> 2; + s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy >> 2; + + ps_l0_pred_mv = &ps_proc->ps_pred_mv[PRED_L0].s_mv; + ps_l1_pred_mv = &ps_proc->ps_pred_mv[PRED_L1].s_mv; + + if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx & 0x3)|| + (ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy & 0x3)) + { + pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf; + i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd; + } + else + { + pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + (s_l0_mv.i2_mvx) + ((s_l0_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd); + i4_ref_l0_stride = ps_me_ctxt->i4_rec_strd; + } + + + if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx & 0x3) || + (ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy & 0x3)) + { + pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[PRED_L1].pu1_best_hpel_buf; + i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd; + } + else + { + pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L1] + (s_l1_mv.i2_mvx) + ((s_l1_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd); + i4_ref_l1_stride = ps_me_ctxt->i4_rec_strd; + } + + ps_proc->ps_codec->pf_inter_pred_luma_bilinear( + pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf, + i4_ref_l0_stride, i4_ref_l1_stride, + ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE); + + ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad]( + ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf, + ps_me_ctxt->i4_src_strd, ps_me_ctxt->u4_subpel_buf_strd, + ps_mb_ctxt_bi->i4_mb_distortion, &i4_mb_distortion); + + /* compute cost */ + i4_mb_cost = ps_me_ctxt->pu1_mv_bits[( s_l0_mv.i2_mvy << 2 ) - ps_l0_pred_mv->i2_mvx]; + i4_mb_cost += ps_me_ctxt->pu1_mv_bits[( s_l0_mv.i2_mvy << 2 ) - ps_l0_pred_mv->i2_mvy]; + i4_mb_cost += ps_me_ctxt->pu1_mv_bits[( s_l1_mv.i2_mvx << 2 ) - ps_l1_pred_mv->i2_mvx]; + i4_mb_cost += ps_me_ctxt->pu1_mv_bits[( s_l1_mv.i2_mvy << 2 ) - ps_l1_pred_mv->i2_mvy]; + + i4_mb_cost -= (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == PRED_BI) * (i == 0); + + + i4_mb_cost *= ps_me_ctxt->u4_lambda_motion; + i4_mb_cost += i4_mb_distortion; + + if (i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost) + { + ps_mb_ctxt_bi->i4_srch_pos_idx = (i>>1); + ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost; + ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion; + ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf; + i4_dest_buff = (i4_dest_buff + 1) % 2; + } + } + +} + +/** +******************************************************************************* +* +* @brief This function performs motion estimation for the current mb +* +* @par Description: +* The current mb is compared with a list of mb's in the reference frame for +* least cost. The mb that offers least cost is chosen as predicted mb and the +* displacement of the predicted mb from index location of the current mb is +* signaled as mv. The list of the mb's that are chosen in the reference frame +* are dependent on the speed of the ME configured. +* +* @param[in] ps_proc +* Process context corresponding to the job +* +* @returns motion vector of the pred mb, sad, cost. +* +* @remarks none +* +******************************************************************************* +*/ +void ih264e_compute_me_multi_reflist(process_ctxt_t *ps_proc) +{ + /* me ctxt */ + me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; + + /* codec context */ + codec_t *ps_codec = ps_proc->ps_codec; + + /* Temp variables for looping over ref lists */ + WORD32 i4_reflist, i4_max_reflist; + + /* recon stride */ + WORD32 i4_rec_strd = ps_proc->i4_rec_strd; + + /* source buffer for halp pel generation functions */ + UWORD8 *pu1_hpel_src; + + /* quantization parameters */ + quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; + + /* Sad therholds */ + ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh; + + /* Mb part ctxts for SKIP */ + mb_part_ctxt as_skip_mbpart[2]; + + { + WORD32 rows_above, rows_below, columns_left, columns_right; + + /* During evaluation for motion vectors do not search through padded regions */ + /* Obtain number of rows and columns that are effective for computing for me evaluation */ + rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE; + rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE; + columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE; + columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE; + + /* init srch range */ + /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2 + * on all sides. + */ + ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1); + ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1); + + /* this is to facilitate fast sub pel computation with minimal loads */ + if (ps_me_ctxt->u4_enable_hpel) + { + ps_me_ctxt->i4_srch_range_w += 1; + ps_me_ctxt->i4_srch_range_e -= 1; + ps_me_ctxt->i4_srch_range_n += 1; + ps_me_ctxt->i4_srch_range_s -= 1; + } + } + + /* Compute ME and store the MVs */ + { + /*********************************************************************** + * Compute ME for lists L0 and L1 + * For L0 -> L0 skip + L0 + * for L1 -> L0 skip + L0 + L1 skip + L1 + ***********************************************************************/ + i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? PRED_L0 : PRED_L1; + + /* Init SATQD for the current list */ + ps_me_ctxt->u4_min_sad_reached = 0; + ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad; + + for (i4_reflist = PRED_L0; i4_reflist <= i4_max_reflist; i4_reflist++) + { + + /* Get the seed motion vector candidates */ + ih264e_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist); + + /* **************************************************************** + *Evaluate the SKIP for current list + * ****************************************************************/ + as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0; + as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0; + as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX; + as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX; + + if (ps_me_ctxt->i4_skip_type == i4_reflist) + { + ime_compute_skip_cost( ps_me_ctxt, + (ime_mv_t *)(&ps_proc->ps_skip_mv[i4_reflist].s_mv), + &as_skip_mbpart[i4_reflist], + ps_proc->ps_codec->s_cfg.u4_enable_satqd, + i4_reflist, + (ps_proc->i4_slice_type == BSLICE) ); + } + + as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2; + as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2; + + /****************************************************************** + * Evaluate ME For current list + *****************************************************************/ + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0; + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0; + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX; + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX; + + /* Init Hpel */ + ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL; + + /* In case we found out the minimum SAD, exit the ME eval */ + if (ps_me_ctxt->u4_min_sad_reached) + { + i4_max_reflist = i4_reflist; + break; + } + + + /* Evaluate search candidates for initial mv pt */ + ime_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist); + + /********************************************************************/ + /* full pel motion estimation */ + /********************************************************************/ + ime_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist); + + DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2), + (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2)); + + DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1); + + /* Scale the MV to qpel resolution */ + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2; + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2; + + if (ps_me_ctxt->u4_enable_hpel) + { + /* moving src pointer to the converged motion vector location */ + pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2) + + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2)* i4_rec_strd); + + ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0]; + ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1]; + ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2]; + + /* Init the search position to an invalid number */ + ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3; + + /* Incase a buffer is still in use by L0, replace it with spare buff */ + ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx] = + ps_proc->apu1_subpel_buffs[3]; + + + ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD; + + /* half pel search is done for both sides of full pel, + * hence half_x of width x height = 17x16 is created + * starting from left half_x of converged full pel */ + pu1_hpel_src -= 1; + + /* computing half_x */ + ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src, + ps_me_ctxt->apu1_subpel_buffs[0], + i4_rec_strd, + ps_me_ctxt->u4_subpel_buf_strd); + + /* + * Halfpel search is done for both sides of full pel, + * hence half_y of width x height = 16x17 is created + * starting from top half_y of converged full pel + * for half_xy top_left is required + * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1 + */ + pu1_hpel_src -= i4_rec_strd; + + /* computing half_y and half_xy */ + ps_codec->pf_ih264e_sixtap_filter_2dvh_vert( + pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1], + ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd, + ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3, + ps_me_ctxt->u4_subpel_buf_strd); + + ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist); + + } + } + + /*********************************************************************** + * If a particular skiip Mv is giving better sad, copy to the corresponding + * MBPART + * In B slices this loop should go only to PREDL1: If we found min sad + * we will go to the skip ref list only + * Have to find a way to make it without too much change or new vars + **********************************************************************/ + for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++) + { + if (as_skip_mbpart[i4_reflist].i4_mb_cost < ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost) + { + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = as_skip_mbpart[i4_reflist].i4_mb_cost; + ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = as_skip_mbpart[i4_reflist].i4_mb_distortion; + ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr; + } + } + + /*********************************************************************** + * Compute ME for BI + * In case of BI we do ME for two candidates + * 1) The best L0 and L1 Mvs + * 2) Skip L0 and L1 MVs + * + * TODO + * one of the search candidates is skip. Hence it may be duplicated + ***********************************************************************/ + if (i4_max_reflist == PRED_L1 && ps_me_ctxt->u4_min_sad_reached == 0) + { + WORD32 i, j = 0; + WORD32 l0_srch_pos_idx, l1_srch_pos_idx; + WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx; + + /* Get the free buffers */ + l0_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx; + l1_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L1].i4_srch_pos_idx; + + /* Search for the two free buffers in subpel list */ + for (i = 0; i < SUBPEL_BUFF_CNT; i++) + { + if (i != l0_srch_pos_idx && i != l1_srch_pos_idx) + { + ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i]; + j++; + } + } + ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD; + + /* Copy the statial SKIP MV of each list */ + i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L0] - 2; + i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L1] - 2; + ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2; + ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2; + ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2; + ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2; + + /* Copy the SKIP MV temporal of each list */ + i4_l0_skip_mv_idx++; + i4_l1_skip_mv_idx++; + ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2; + ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2; + ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2; + ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2; + + /* Copy the best MV after ME */ + ps_me_ctxt->as_mv_init_search[PRED_BI][4] = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr; + ps_me_ctxt->as_mv_init_search[PRED_BI][5] = ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr; + + ps_me_ctxt->u4_num_candidates[PRED_BI] = 6; + + ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_cost = INT_MAX; + ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_distortion = INT_MAX; + + ih264e_evaluate_bipred(ps_me_ctxt, ps_proc, + &ps_me_ctxt->as_mb_part[PRED_BI]); + + i4_max_reflist = PRED_BI; + } + + /********************************************************************** + * Now get the minimum of MB part sads by searching over all ref lists + **********************************************************************/ + ps_proc->ps_pu->b2_pred_mode = 0x3; + + for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++) + { + if (ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost) + { + ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost; + ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion; + ps_proc->ps_cur_mb->u4_mb_type = (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16; + ps_proc->ps_pu->b2_pred_mode = i4_reflist ; + } + } + + /********************************************************************** + * In case we have a BI MB, we have to copy the buffers and set proer MV's + * 1)In case its BI, we need to get the best MVs given by BI and update + * to their corresponding MB part + * 2)We also need to copy the buffer in which bipred buff is populated + * + * Not that if we have + **********************************************************************/ + if (ps_proc->ps_pu->b2_pred_mode == PRED_BI) + { + WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[PRED_BI].i4_srch_pos_idx; + UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[PRED_BI].pu1_best_hpel_buf; + + ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][i4_srch_pos << 1]; + ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][(i4_srch_pos << 1) + 1]; + + /* Now we have to copy the buffers */ + ps_codec->pf_inter_pred_luma_copy(pu1_bi_buf, + ps_proc->pu1_best_subpel_buf, + ps_me_ctxt->u4_subpel_buf_strd, + ps_proc->u4_bst_spel_buf_strd, + MB_SIZE, MB_SIZE, NULL, 0); + + } + else if (ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf) + { + /* Now we have to copy the buffers */ + ps_codec->pf_inter_pred_luma_copy( + ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf, + ps_proc->pu1_best_subpel_buf, + ps_me_ctxt->u4_subpel_buf_strd, + ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, + NULL, 0); + } + } + + /************************************************************************** + *Now copy the MVs to the current PU with qpel scaling + ***************************************************************************/ + ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx); + ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy); + ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvx); + ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvy); + + + ps_proc->ps_pu->s_me_info[0].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L1)? -1:0; + ps_proc->ps_pu->s_me_info[1].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L0)? -1:0; + + /* number of partitions */ + ps_proc->u4_num_sub_partitions = 1; + *(ps_proc->pu4_mb_pu_cnt) = 1; + + /* position in-terms of PU */ + ps_proc->ps_pu->b4_pos_x = 0; + ps_proc->ps_pu->b4_pos_y = 0; + + /* PU size */ + ps_proc->ps_pu->b4_wd = 3; + ps_proc->ps_pu->b4_ht = 3; + + /* Update min sad conditions */ + if (ps_me_ctxt->u4_min_sad_reached == 1) + { + ps_proc->ps_cur_mb->u4_min_sad_reached = 1; + ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad; + } +} + diff --git a/encoder/ih264e_me.h b/encoder/ih264e_me.h index c4834a1..bd88a01 100644 --- a/encoder/ih264e_me.h +++ b/encoder/ih264e_me.h @@ -42,10 +42,10 @@ /*****************************************************************************/ /** -****************************************************************************** + ****************************************************************************** * @brief compute median of 3 elements (a, b, c) and store the output * in to result. This is used for mv prediction -****************************************************************************** + ****************************************************************************** */ #define MEDIAN(a, b, c, result) if (a > b){\ @@ -69,210 +69,285 @@ }\ } - - /*****************************************************************************/ /* Extern Function Declarations */ /*****************************************************************************/ /** -******************************************************************************* -* -* @brief -* This function populates the length of the codewords for motion vectors in the -* range (-search range, search range) in pixels -* -* @param[in] ps_me -* Pointer to me ctxt -* -* @param[out] pu1_mv_bits -* length of the codeword for all mv's -* -* @remarks The length of the code words are derived from signed exponential -* goloumb codes. -* -******************************************************************************* -*/ -void ih264e_init_mv_bits - ( - me_ctxt_t *ps_me - ); + ******************************************************************************* + * + * @brief + * This function populates the length of the codewords for motion vectors in the + * range (-search range, search range) in pixels + * + * @param[in] ps_me + * Pointer to me ctxt + * + * @param[out] pu1_mv_bits + * length of the codeword for all mv's + * + * @remarks The length of the code words are derived from signed exponential + * goloumb codes. + * + ******************************************************************************* + */ +void ih264e_init_mv_bits(me_ctxt_t *ps_me); /** -******************************************************************************* -* -* @brief The function gives the skip motion vector -* -* @par Description: -* The function gives the skip motion vector -* -* @param[in] ps_left_mb_pu -* pointer to left mb motion vector info -* -* @param[in] ps_top_row_pu -* pointer to top & top right mb motion vector info -* -* @param[out] ps_pred_mv -* pointer to candidate predictors for the current block -* -* @returns The x & y components of the MV predictor. -* -* @remarks The code implements the logic as described in sec 8.4.1.1 in H264 -* specification. -* -******************************************************************************* + ******************************************************************************* + * + * @brief The function computes the parameters for a P skip MB + * + * @par Description: + * The function computes the parameters for a P skip MB + * + * @param[in] ps_proc + * Process context + * + * @param[in] u4_for_me + * Flag to indicate the purpose of computing skip + * + * @param[out] ps_pred_mv + * Flag to indicate the current active refernce list + * + * @returns + * 1) Updates skip MV in proc + * 2) Returns if the current MB can be coded as skip or not + * + * @remarks The code implements the logic as described in sec 8.4.1.1 in H264 + * specification. + * + ******************************************************************************* */ -void ih264e_find_skip_motion_vector - ( - process_ctxt_t *ps_proc, - UWORD32 u4_for_me - ); +ih264e_skip_params_ft ih264e_find_pskip_params; /** -******************************************************************************* -* -* @brief motion vector predictor -* -* @par Description: -* The routine calculates the motion vector predictor for a given block, -* given the candidate MV predictors. -* -* @param[in] ps_left_mb_pu -* pointer to left mb motion vector info -* -* @param[in] ps_top_row_pu -* pointer to top & top right mb motion vector info -* -* @param[out] ps_pred_mv -* pointer to candidate predictors for the current block -* -* @returns The x & y components of the MV predictor. -* -* @remarks The code implements the logic as described in sec 8.4.1.3 in H264 -* specification. -* Assumptions : 1. Assumes Single reference frame -* 2. Assumes Only partition of size 16x16 -* -******************************************************************************* + ******************************************************************************* + * + * @brief The function computes the parameters for a P skip MB + * + * @par Description: + * The function computes the parameters for a P skip MB + * + * @param[in] ps_proc + * Process context + * + * @param[in] u4_for_me + * Flag to indicate the purpose of computing skip + * + * @param[out] ps_pred_mv + * Flag to indicate the current active refernce list + * + * @returns + * 1) Updates skip MV in proc + * 2) Returns if the current MB can be coded as skip or not + * + * @remarks The code implements the logic as described in sec 8.4.1.1 in H264 + * specification. + * + ******************************************************************************* */ -void ih264e_get_mv_predictor - ( - enc_pu_t *ps_left_mb_pu, - enc_pu_t *ps_top_row_pu, - mv_t *ps_pred_mv - ); +ih264e_skip_params_ft ih264e_find_pskip_params_me; /** -******************************************************************************* -* -* @brief This function computes the best motion vector for the current mb -* -* @par Description: -* This function currently does nothing except set motion vectors from external -* source -* -* @param[in] ps_proc -* Process context corresponding to the job -* -* @returns none -* -* @remarks none -* -******************************************************************************* + ******************************************************************************* + * + * @brief The function computes the parameters for a B skip MB + * + * @par Description: + * The function computes the parameters for a B skip MB + * + * @param[in] ps_proc + * Process context + * + * @param[in] u4_for_me + * Flag to indicate the purpose of computing skip + * + * @param[out] ps_pred_mv + * Flag to indicate the current active refernce list + * + * @returns + * 1) Updates skip MV in proc + * 2) Returns if the current MB can be coded as skip or not + * + * @remarks The code implements the logic as described in sec 8.4.1.1 in H264 + * specification. + * + ******************************************************************************* */ -void ih264e_compute_me - ( - process_ctxt_t *ps_proc - ); +ih264e_skip_params_ft ih264e_find_bskip_params; /** -******************************************************************************* -* -* @brief This function initializes me ctxt -* -* @par Description: -* Before dispatching the current job to me thread, the me context associated -* with the job is initialized. -* -* @param[in] ps_proc -* Process context corresponding to the job -* -* @returns none -* -* @remarks none -* -******************************************************************************* + ******************************************************************************* + * + * @brief The function computes the parameters for a B skip MB + * + * @par Description: + * The function computes the parameters for a B skip MB + * + * @param[in] ps_proc + * Process context + * + * @param[in] u4_for_me + * Flag to indicate the purpose of computing skip + * + * @param[out] ps_pred_mv + * Flag to indicate the current active refernce list + * + * @returns + * 1) Updates skip MV in proc + * 2) The type of SKIP [L0/L1/BI] + * + * @remarks + ******************************************************************************* */ +ih264e_skip_params_ft ih264e_find_bskip_params_me; + +/** + ******************************************************************************* + * + * @brief motion vector predictor + * + * @par Description: + * The routine calculates the motion vector predictor for a given block, + * given the candidate MV predictors. + * + * @param[in] ps_left_mb_pu + * pointer to left mb motion vector info + * + * @param[in] ps_top_row_pu + * pointer to top & top right mb motion vector info + * + * @param[out] ps_pred_mv + * pointer to candidate predictors for the current block + * + * @returns The x & y components of the MV predictor. + * + * @remarks The code implements the logic as described in sec 8.4.1.3 in H264 + * specification. + * Assumptions : 1. Assumes Only partition of size 16x16 + * + ******************************************************************************* + */ +void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu, enc_pu_t *ps_top_row_pu, + enc_pu_mv_t *ps_pred_mv, WORD32 i4_ref_list); + +/** + ******************************************************************************* + * + * @brief This fucntion evalues ME for 2 reference lists + * + * @par Description: + * It evaluates skip, full-pel an half-pel and assigns the correct MV in proc + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +ih264e_compute_me_ft ih264e_compute_me_multi_reflist; + +/** + ******************************************************************************* + * + * @brief This fucntion evalues ME for single reflist [Pred L0] + * + * @par Description: + * It evaluates skip, full-pel an half-pel and assigns the correct MV in proc + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ +ih264e_compute_me_ft ih264e_compute_me_single_reflist; + +/** + ******************************************************************************* + * + * @brief This function initializes me ctxt + * + * @par Description: + * Before dispatching the current job to me thread, the me context associated + * with the job is initialized. + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * + ******************************************************************************* + */ void ih264e_init_me(process_ctxt_t *ps_proc); /** -******************************************************************************* -* -* @brief This function performs motion estimation for the current NMB -* -* @par Description: -* Intializes input and output pointers required by the function ih264e_compute_me -* and calls the function ih264e_compute_me in a loop to process NMBs. -* -* @param[in] ps_proc -* Process context corresponding to the job -* -* @returns -* -* @remarks none -* -******************************************************************************* -*/ -void ih264e_compute_me_nmb - ( - process_ctxt_t *ps_proc, - UWORD32 u4_nmb_count - ); + ******************************************************************************* + * + * @brief This function performs motion estimation for the current NMB + * + * @par Description: + * Intializes input and output pointers required by the function ih264e_compute_me + * and calls the function ih264e_compute_me in a loop to process NMBs. + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns + * + * @remarks none + * + ******************************************************************************* + */ +void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count); /** -******************************************************************************* -* -* @brief This function performs MV prediction -* -* @par Description: -* -* @param[in] ps_proc -* Process context corresponding to the job -* -* @returns none -* -* @remarks none -* This function will update the MB availability since intra inter decision -* should be done before the call -* -******************************************************************************* -*/ -void ih264e_mv_pred - ( - process_ctxt_t *ps_proc - ); + ******************************************************************************* + * + * @brief This function performs MV prediction + * + * @par Description: + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * This function will update the MB availability since intra inter decision + * should be done before the call + * + ******************************************************************************* + */ +void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_reflist); /** -******************************************************************************* -* -* @brief This function approximates Pred. MV -* -* @par Description: -* -* @param[in] ps_proc -* Process context corresponding to the job -* -* @returns none -* -* @remarks none -* Motion estimation happens at nmb level. For cost calculations, mv is appro -* ximated using this function -* -******************************************************************************* -*/ -void ih264e_mv_pred_me - ( - process_ctxt_t *ps_proc - ); + ******************************************************************************* + * + * @brief This function approximates Pred. MV + * + * @par Description: + * + * @param[in] ps_proc + * Process context corresponding to the job + * + * @returns none + * + * @remarks none + * Motion estimation happens at nmb level. For cost calculations, mv is appro + * ximated using this function + * + ******************************************************************************* + */ +void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list); #endif /* IH264E_ME_H_ */ diff --git a/encoder/ih264e_modify_frm_rate.c b/encoder/ih264e_modify_frm_rate.c index bc0e873..6afb727 100644 --- a/encoder/ih264e_modify_frm_rate.c +++ b/encoder/ih264e_modify_frm_rate.c @@ -57,14 +57,17 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ih264e_defs.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_rc_mem_interface.h" #include "ih264e_time_stamp.h" diff --git a/encoder/ih264e_process.c b/encoder/ih264e_process.c index 670428e..aa84af6 100644 --- a/encoder/ih264e_process.c +++ b/encoder/ih264e_process.c @@ -68,8 +68,8 @@ #include "ih264_defs.h" #include "ih264_debug.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -78,20 +78,21 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ih264_platform_macros.h" #include "ih264_macros.h" -#include "ih264_error.h" #include "ih264_buf_mgr.h" #include "ih264e_error.h" #include "ih264e_bitstream.h" -#include "ih264_structs.h" #include "ih264_common_tables.h" #include "ih264_list.h" #include "ih264e_defs.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" +#include "ih264e_cabac.h" #include "ih264e_process.h" #include "ithread.h" #include "ih264e_intra_modes_eval.h" @@ -105,15 +106,11 @@ #include "ih264e_deblk.h" #include "ih264e_me.h" #include "ih264e_debug.h" -#include "ih264e_process.h" #include "ih264e_master.h" #include "ih264e_utils.h" #include "irc_mem_req_and_acq.h" -#include "irc_cntrl_param.h" -#include "irc_frame_info_collector.h" #include "irc_rate_control_api.h" #include "ih264e_platform_macros.h" -#include "ih264_padding.h" #include "ime_statistics.h" @@ -274,7 +271,6 @@ IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc) * ******************************************************************************* */ -#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + WORD_SIZE - ps_bitstream->i4_bits_left_in_cw) IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) { @@ -284,6 +280,9 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) /* entropy context */ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; + /* cabac context */ + cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac; + /* sps */ sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT); @@ -314,7 +313,7 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) /* temp var */ WORD32 i4_wd_mbs, i4_ht_mbs; UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx; - + WORD32 bitstream_start_offset, bitstream_end_offset; /********************************************************************/ /* BEGIN INIT */ /********************************************************************/ @@ -391,6 +390,13 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) /* once start of frame / slice is done, you can reset it */ /* it is the responsibility of the caller to set this flag */ ps_entropy->i4_sof = 0; + + if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) + { + BITSTREAM_BYTE_ALIGN(ps_bitstrm); + BITSTREAM_FLUSH(ps_bitstrm); + ih264e_init_cabac_ctxt(ps_entropy); + } } /* begin entropy coding for the mb set */ @@ -399,7 +405,7 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) /* init ptrs/indices */ if (ps_entropy->i4_mb_x == i4_wd_mbs) { - ps_entropy->i4_mb_y ++; + ps_entropy->i4_mb_y++; ps_entropy->i4_mb_x = 0; /* packed mb coeff data */ @@ -411,7 +417,7 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; /* proc map */ - pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; + pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; /* entropy map */ pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; @@ -430,20 +436,31 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) volatile UWORD8 *pu1_buf1; WORD32 idx = ps_entropy->i4_mb_x; - pu1_buf1 = pu1_proc_map + idx; - if(*pu1_buf1) + pu1_buf1 = pu1_proc_map + idx; + if (*pu1_buf1) break; ithread_yield(); } + /* write mb layer */ - ps_codec->pf_write_mb_syntax_layer[i4_slice_type](ps_entropy); + ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy); + /* Starting bitstream offset for header in bits */ + bitstream_start_offset = GET_NUM_BITS(ps_bitstrm); /* set entropy map */ pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1; - u4_mb_idx ++; - ps_entropy->i4_mb_x ++; + u4_mb_idx++; + ps_entropy->i4_mb_x++; + /* check for eof */ + if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) + { + if (ps_entropy->i4_mb_x < i4_wd_mbs) + { + ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); + } + } if (ps_entropy->i4_mb_x == i4_wd_mbs) { @@ -459,39 +476,65 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) /* No need to open a slice at end of frame. The current slice can be closed at the time * of signaling eof flag. */ - if ( (u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx != pu1_slice_idx[u4_mb_idx])) + if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx + != pu1_slice_idx[u4_mb_idx])) { - /* mb skip run */ - if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) - { - if (*ps_entropy->pi4_mb_skip_run) + if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) + { /* mb skip run */ + if ((i4_slice_type != ISLICE) + && *ps_entropy->pi4_mb_skip_run) { + if (*ps_entropy->pi4_mb_skip_run) + { PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); - *ps_entropy->pi4_mb_skip_run = 0; + *ps_entropy->pi4_mb_skip_run = 0; + } } + /* put rbsp trailing bits for the previous slice */ + ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); + } + else + { + ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); } - - /* put rbsp trailing bits for the previous slice */ - ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); /* update slice header pointer */ i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx]; ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx; - ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (i4_curr_slice_idx % MAX_SLICE_HDR_CNT); + ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT); /* populate slice header */ ps_entropy->i4_mb_start_add = u4_mb_idx; - ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); + ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, + ps_sps); /* generate slice header */ - ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr, - ps_pps, ps_sps); + ps_entropy->i4_error_code |= ih264e_generate_slice_header( + ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps); + if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) + { + BITSTREAM_BYTE_ALIGN(ps_bitstrm); + BITSTREAM_FLUSH(ps_bitstrm); + ih264e_init_cabac_ctxt(ps_entropy); + } + } + else + { + if (CABAC == ps_entropy->u1_entropy_coding_mode_flag + && u4_mb_idx != u4_mb_cnt) + { + ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); + } } } - /* Dont execute any further instructions until store synchronization took place */ DATA_SYNC(); } + + /* Ending bitstream offset for header in bits */ + bitstream_end_offset = GET_NUM_BITS(ps_bitstrm); + ps_entropy->u4_header_bits[i4_slice_type == PSLICE] += + bitstream_end_offset - bitstream_start_offset; } /* check for eof */ @@ -500,30 +543,47 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) /* set end of frame flag */ ps_entropy->i4_eof = 1; } + else + { + if (CABAC == ps_entropy->u1_entropy_coding_mode_flag + && ps_codec->s_cfg.e_slice_mode + != IVE_SLICE_MODE_BLOCKS) + { + ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); + } + } if (ps_entropy->i4_eof) { - /* mb skip run */ - if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) + if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) { - if (*ps_entropy->pi4_mb_skip_run) + /* mb skip run */ + if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) { - PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); - *ps_entropy->pi4_mb_skip_run = 0; + if (*ps_entropy->pi4_mb_skip_run) + { + PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, + ps_entropy->i4_error_code, "mb skip run"); + *ps_entropy->pi4_mb_skip_run = 0; + } } + /* put rbsp trailing bits */ + ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); + } + else + { + ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); } - - /* put rbsp trailing bits */ - ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); /* update current frame stats to rc library */ - if (IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode) { /* number of bytes to stuff */ WORD32 i4_stuff_bytes; /* update */ - i4_stuff_bytes = ih264e_update_rc_post_enc(ps_codec, ctxt_sel, ps_proc->i4_pic_cnt); + i4_stuff_bytes = ih264e_update_rc_post_enc( + ps_codec, ctxt_sel, + (ps_proc->ps_codec->i4_poc == 0)); /* cbr rc - house keeping */ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) @@ -537,10 +597,21 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) } } + /* + *Frame number is to be incremented only if the current frame is a + * reference frame. After each successful frame encode, we increment + * frame number by 1 + */ + if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] + && ps_codec->u4_is_curr_frm_ref) + { + ps_codec->i4_frame_num++; + } /********************************************************************/ /* signal the output */ /********************************************************************/ - ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = ps_entropy->ps_bitstrm->u4_strm_buf_offset; + ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = + ps_entropy->ps_bitstrm->u4_strm_buf_offset; DEBUG("entropy status %x", ps_entropy->i4_error_code); } @@ -679,9 +750,9 @@ IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc) i2_mv_ptr = (WORD16 *)pu1_ptr; - *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvx - ps_proc->ps_pred_mv->i2_mvx; + *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; - *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvy - ps_proc->ps_pred_mv->i2_mvy; + *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; /* end of mb layer */ ps_proc->pv_mb_header_data = i2_mv_ptr; @@ -697,6 +768,79 @@ IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc) /* end of mb layer */ ps_proc->pv_mb_header_data = pu1_ptr; } + else if(u4_mb_type == B16x16) + { + + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + + WORD16 *i2_mv_ptr; + + UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; + + /* mb type plus mode */ + *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; + + /* cbp */ + *pu1_ptr++ = ps_proc->u4_cbp; + + /* mb qp delta */ + *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; + + /* l0 & l1 me data */ + i2_mv_ptr = (WORD16 *)pu1_ptr; + + if (u4_pred_mode != PRED_L1) + { + *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx + - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; + + *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy + - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; + } + if (u4_pred_mode != PRED_L0) + { + *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx + - ps_proc->ps_pred_mv[1].s_mv.i2_mvx; + + *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy + - ps_proc->ps_pred_mv[1].s_mv.i2_mvy; + } + + /* end of mb layer */ + ps_proc->pv_mb_header_data = i2_mv_ptr; + + } + else if(u4_mb_type == BDIRECT) + { + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + + /* mb type plus mode */ + *pu1_ptr++ = u4_mb_type; + + /* cbp */ + *pu1_ptr++ = ps_proc->u4_cbp; + + /* mb qp delta */ + *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; + + ps_proc->pv_mb_header_data = pu1_ptr; + + } + else if(u4_mb_type == BSKIP) + { + UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; + + /* pointer to mb header storage space */ + UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; + + /* mb type plus mode */ + *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; + + /* end of mb layer */ + ps_proc->pv_mb_header_data = pu1_ptr; + } return IH264E_SUCCESS; } @@ -788,12 +932,11 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) /* mb type, mb class, csbp */ *ps_top_left_syn = *ps_top_syn; - if (ps_proc->i4_slice_type == PSLICE) + if (ps_proc->i4_slice_type != ISLICE) { /*****************************************/ /* update top left with top info results */ /*****************************************/ - /* mv */ *ps_top_left_mb_pu = *ps_top_row_pu; } @@ -832,17 +975,13 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); } - if (ps_proc->i4_slice_type == PSLICE) + if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE)) { /* mv */ *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); - -// /* reset ngbr mv's */ -// ps_top_row_pu->i1_l0_ref_idx = -1; -// ps_top_row_pu->s_l0_mv = zero_mv; -// -// *ps_left_mb_pu = *ps_top_row_pu; } + + *ps_proc->pu4_mb_pu_cnt = 1; } else { @@ -929,7 +1068,8 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) /* update buffers pointers */ ps_proc->pu1_src_buf_luma += MB_SIZE; ps_proc->pu1_rec_buf_luma += MB_SIZE; - ps_proc->pu1_ref_buf_luma += MB_SIZE; + ps_proc->apu1_ref_buf_luma[0] += MB_SIZE; + ps_proc->apu1_ref_buf_luma[1] += MB_SIZE; /* * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, @@ -937,7 +1077,9 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) */ ps_proc->pu1_src_buf_chroma += MB_SIZE; ps_proc->pu1_rec_buf_chroma += MB_SIZE; - ps_proc->pu1_ref_buf_chroma += MB_SIZE; + ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE; + ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE; + /* Reset cost, distortion params */ @@ -948,6 +1090,10 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) ps_proc->pu4_mb_pu_cnt += 1; + /* Update colocated pu */ + if (ps_proc->i4_slice_type == BSLICE) + ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x); + /* deblk ctxts */ if (ps_proc->u4_disable_deblock_level != 1) { @@ -1038,6 +1184,7 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs; ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs; + /* init buffer pointers */ convert_uv_only = 1; if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)) { @@ -1045,12 +1192,10 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base; ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE); convert_uv_only = 0; - } else ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE); - /* init buffer pointers */ if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE || ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P || @@ -1069,9 +1214,12 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); - ps_proc->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); - ps_proc->pu1_ref_buf_chroma = ps_proc->pu1_ref_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); + /* Tempral back and forward reference buffer */ + ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); + ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); + ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); + ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); /* * Do color space conversion @@ -1208,6 +1356,9 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) /* init mv buffer ptr */ ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); + /* Init co-located mv buffer */ + ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE)); + if (i4_mb_y == 0) { ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu; @@ -1768,9 +1919,23 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc) u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; } - /* enable inter 16x16 */ + /* enable inter P16x16 */ u4_valid_modes |= (1 << P16x16); } + else if (ps_proc->i4_slice_type == BSLICE) + { + /* enable intra 16x16 */ + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; + + /* enable intra 4x4 */ + if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) + { + u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; + } + + /* enable inter B16x16 */ + u4_valid_modes |= (1 << B16x16); + } /* init entropy */ @@ -1806,7 +1971,7 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc) (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt); /* evaluate inter 16x16 modes */ - if (u4_valid_modes & (1 << P16x16)) + if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16))) { /* compute nmb me */ if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0) @@ -1823,9 +1988,9 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc) ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; - ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_skip_mv); + ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]); ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl); - ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_pred_mv); + ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]); ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion; ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost; @@ -1889,7 +2054,7 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc) { /* intra gating in inter slices */ /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/ - if (i4_air_enable_inter && ps_proc->i4_slice_type == PSLICE && ps_codec->u4_inter_gate) + if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate) { /* distortion of neighboring blocks */ WORD32 i4_distortion[4]; @@ -1906,6 +2071,7 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc) } + /* If we are going to force intra we need to evaluate intra irrespective of gating */ if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion)) { @@ -1933,10 +2099,10 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc) { ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); } - } - } + } } + } /* is intra */ if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8) @@ -1955,13 +2121,14 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc) is_intra = 0; } ps_proc->u4_is_intra = is_intra; + ps_proc->ps_pu->b1_intra_flag = is_intra; /* redo MV pred of neighbors in the case intra mb */ /* TODO : currently called unconditionally, needs to be called only in the case of intra * to modify neighbors */ if (ps_proc->i4_slice_type != ISLICE) { - ih264e_mv_pred(ps_proc); + ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type); } /* Perform luma mb core coding */ @@ -1973,18 +2140,18 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc) /* coded block pattern */ ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; - /* mb skip */ - if (is_intra == 0) + if (!ps_proc->u4_is_intra) { - if (ps_proc->u4_cbp == 0) + if (ps_proc->i4_slice_type == BSLICE) { - /* get skip mv */ - UWORD32 u4_for_me = 0; - ih264e_find_skip_motion_vector(ps_proc,u4_for_me); - - /* skip ? */ - if (ps_proc->ps_skip_mv->i2_mvx == ps_proc->ps_pu->s_l0_mv.i2_mvx && - ps_proc->ps_skip_mv->i2_mvy == ps_proc->ps_pu->s_l0_mv.i2_mvy) + if (ih264e_find_bskip_params(ps_proc, PRED_L0)) + { + ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP; + } + } + else if(!ps_proc->u4_cbp) + { + if (ih264e_find_pskip_params(ps_proc, PRED_L0)) { ps_proc->u4_mb_type = PSKIP; } @@ -2086,106 +2253,6 @@ UPDATE_MB_INFO: return error_status; } -/** -******************************************************************************* -* -* @brief -* function to receive frame qp and pic type before encoding -* -* @par Description: -* Before encoding the frame, this function calls the rc library for frame qp -* and picture type -* -* @param[in] ps_codec -* Pointer to codec context -* -* @param[in] pic_cnt -* pic count -* -* @param[out] pi4_pic_type -* pic type - -* @returns skip_src -* if the source frame rate and target frame rate are not identical, the encoder -* skips few source frames. skip_src is set when the source need not be encoded. -* -* @remarks none -* -******************************************************************************* -*/ -WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type) -{ - /* rate control context */ - rate_control_ctxt_t *ps_rate_control = &ps_codec->s_rate_control; - - /* frame qp */ - UWORD8 u1_frame_qp; - - /* pic type */ - PIC_TYPE_T pic_type = PIC_NA; - - /* should src be skipped */ - WORD32 skip_src = 0; - - /* temp var */ - WORD32 delta_time_stamp = 1; - - /* see if the app requires any specific frame */ - if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME) - { - irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api); - } - - /* call rate control lib to get curr pic type and qp to be used */ - skip_src = ih264e_rc_pre_enc(ps_rate_control->pps_rate_control_api, - ps_rate_control->pps_pd_frm_rate, - ps_rate_control->pps_time_stamp, - ps_rate_control->pps_frame_time, - delta_time_stamp, - (ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs), - &ps_rate_control->e_pic_type, - &u1_frame_qp); - - switch (ps_rate_control->e_pic_type) - { - case I_PIC: - pic_type = PIC_I; - break; - - case P_PIC: - pic_type = PIC_P; - break; - - case B_PIC: - pic_type = PIC_B; - break; - - default: - break; - } - - /* is idr? */ - if ((0 == cur_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval) || - ps_codec->force_curr_frame_type == IV_IDR_FRAME) - { - pic_type = PIC_IDR; - } - - /* force frame tag is not sticky */ - if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME) - { - ps_codec->force_curr_frame_type = IV_NA_FRAME; - } - - /* qp */ - ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp]; - - /* pic type */ - *pi4_pic_type = pic_type; - - return skip_src; -} - /** ******************************************************************************* * @@ -2214,7 +2281,7 @@ WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *p * ******************************************************************************* */ -WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt) +WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm) { /* proc set base idx */ WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; @@ -2295,18 +2362,11 @@ WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_ ps_codec->s_rate_control.pps_frame_time, (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs), &rc_pic_type, - pic_cnt, + i4_is_first_frm, &ps_codec->s_rate_control.post_encode_skip[ctxt_sel], u1_frame_qp, &ps_codec->s_rate_control.num_intra_in_prev_frame, &ps_codec->s_rate_control.i4_avg_activity); - - /* in case the frame needs to be skipped, the frame num should not be incremented */ - if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) - { - ps_codec->i4_frame_num --; - } - return i4_stuffing_byte; } diff --git a/encoder/ih264e_process.h b/encoder/ih264e_process.h index 9715434..9cfdac8 100644 --- a/encoder/ih264e_process.h +++ b/encoder/ih264e_process.h @@ -280,36 +280,6 @@ IH264E_ERROR_T ih264e_halfpel_generation */ WORD32 ih264e_process(process_ctxt_t *ps_proc); -/** -******************************************************************************* -* -* @brief -* function to receive frame qp and pic type before encoding -* -* @par Description: -* Before encoding the frame, this function calls the rc library for frame qp -* and picture type -* -* @param[in] ps_codec -* Pointer to codec context -* -* @param[in] pic_cnt -* pic count -* -* @param[out] pi4_pic_type -* pic type - -* @returns skip_src -* if the source frame rate and target frame rate are not identical, the encoder -* skips few source frames. skip_src is set when the source need not be encoded. -* -* @remarks none -* -******************************************************************************* -*/ -WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type); - - /** ******************************************************************************* * diff --git a/encoder/ih264e_rate_control.c b/encoder/ih264e_rate_control.c index 1e2fe4f..1da2f03 100644 --- a/encoder/ih264e_rate_control.c +++ b/encoder/ih264e_rate_control.c @@ -63,6 +63,7 @@ #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" #include "ih264_common_tables.h" +#include "ih264_cabac_tables.h" #include "ih264e_defs.h" #include "ih264e_globals.h" #include "irc_mem_req_and_acq.h" @@ -75,7 +76,9 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_utils.h" #include "irc_trace_support.h" @@ -186,6 +189,7 @@ void ih264e_rc_init(void *pv_rc_api, UWORD32 u4_peak_bit_rate, UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, + WORD32 i4_inter_frm_int, UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp, @@ -230,6 +234,9 @@ void ih264e_rc_init(void *pv_rc_api, u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time); u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time); + /* Init max_inter_frame int */ + i4_max_inter_frm_int = (i4_inter_frm_int == 1) ? 2 : (i4_inter_frm_int + 2); + /* Initialize the rate control */ irc_initialise_rate_control(pv_rc_api, /* RC handle */ e_rate_control_type, /* RC algo type */ @@ -240,6 +247,7 @@ void ih264e_rc_init(void *pv_rc_api, u4_src_frm_rate, /* Src frame_rate */ u4_max_delay, /* Max buffer delay */ u4_intra_frame_interval, /* Intra frm_interval */ + i4_inter_frm_int, /* Inter frame interval */ pu1_init_qp, /* Init QP array[3]:[I][P][B] */ u4_max_cpb_size, /* Max VBV/CPB Buffer Size */ i4_max_inter_frm_int, /* Max inter frm_interval */ @@ -268,13 +276,13 @@ void ih264e_rc_init(void *pv_rc_api, * ******************************************************************************* */ -picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api) +picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api, + WORD32 *pi4_pic_id, + WORD32 *pi4_pic_disp_order_no) { - WORD32 i4_pic_id = 0; - WORD32 i4_pic_disp_order_no = 0; picture_type_e e_rc_pic_type = P_PIC; - irc_get_picture_details(pv_rc_api, &i4_pic_id, &i4_pic_disp_order_no, + irc_get_picture_details(pv_rc_api, pi4_pic_id, pi4_pic_disp_order_no, &e_rc_pic_type); return (e_rc_pic_type); @@ -286,8 +294,9 @@ picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api) * @brief Function to get rate control output before encoding * * @par Description -* This function is called before encoding the current frame and gets the qp -* for the current frame from rate control module +* This function is called before queing the current frame. It decides if we should +* skip the current iput buffer due to frame rate mismatch. It also updates RC about +* the acehivble frame rate * * @param[in] ps_rate_control_api * Handle to rate control api @@ -314,138 +323,58 @@ picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api) * QP for current frame * * @returns -* Skip or encode the current frame +* Skip or queue the current frame * * @remarks * ******************************************************************************* */ -WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api, - void * ps_pd_frm_rate, - void * ps_time_stamp, - void * ps_frame_time, - WORD32 i4_delta_time_stamp, - WORD32 i4_total_mb_in_frame, - picture_type_e *pe_vop_coding_type, - UWORD8 *pu1_frame_qp) +WORD32 ih264e_update_rc_framerates(void *ps_rate_control_api, + void *ps_pd_frm_rate, + void *ps_time_stamp, + void *ps_frame_time) { - WORD8 i4_skip_src = 0, i4_num_app_skips = 0; + WORD8 i4_skip_src = 0; UWORD32 u4_src_not_skipped_for_dts = 0; - /* Variables for the update_frm_level_info */ - WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE]; - WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0}; - WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0}; - WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0}; - WORD32 i4_total_frame_bits = 0; - WORD32 i4_total_hdr_bits = 0; - WORD32 i4_avg_mb_activity = 0; - WORD32 i4_intra_frm_cost = 0; - UWORD8 u1_is_scd = 0; - - /* Set all the MBs to Intra */ - ai4_tot_mb_in_type[0] = i4_total_mb_in_frame; - ai4_tot_mb_in_type[1] = 0; - - /* If delta time stamp is greater than 1, do rcupdate that many times */ - for (i4_num_app_skips = 0; (i4_num_app_skips < i4_delta_time_stamp - 1); i4_num_app_skips++) - { - /*update the missing frames frm_rate with 0 */ - ih264e_update_pd_frm_rate(ps_pd_frm_rate,0); - - /* Update the time stamp */ - ih264e_update_time_stamp(ps_time_stamp); - - /* Do a pre encode skip update */ - - irc_update_frame_level_info(ps_rate_control_api, - (*pe_vop_coding_type), - ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */ - i4_total_frame_bits, /* Total frame bits actually consumed */ - i4_total_hdr_bits, /*header bits for model updation*/ - ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */ - ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */ - ai4_tot_mb_in_type, /* total number of mbs in each mb type */ - i4_avg_mb_activity, /* Average mb activity in frame */ - u1_is_scd, /* Is a scene change detected at the current frame */ - 1, /* If it's a pre-encode skip */ - i4_intra_frm_cost, /* Sum of Intra cost for each frame */ - 0); /* Is pic handling [irc_update_pic_handling_state] done before update */ - } - /* Update the time stamp for the current frame */ ih264e_update_time_stamp(ps_time_stamp); /* Check if a src not needs to be skipped */ i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time, - i4_delta_time_stamp, + 1, &u4_src_not_skipped_for_dts); - /*********************************************************************** - Based on difference in source and target frame rate frames are skipped - ***********************************************************************/ if (i4_skip_src) { + /*********************************************************************** + *Based on difference in source and target frame rate frames are skipped + ***********************************************************************/ /*update the missing frames frm_rate with 0 */ - ih264e_update_pd_frm_rate(ps_pd_frm_rate,0); - - /* Do a pre encode skip update */ - irc_update_frame_level_info(ps_rate_control_api, - (*pe_vop_coding_type), - ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */ - i4_total_frame_bits, /* Total frame bits actually consumed */ - i4_total_hdr_bits, /*header bits for model updation*/ - ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */ - ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */ - ai4_tot_mb_in_type, /* total number of mbs in each mb type */ - i4_avg_mb_activity, /* Average mb activity in frame */ - u1_is_scd, /* Is a scene change detected at the current frame */ - 1, /* If it's a pre-encode skip */ - i4_intra_frm_cost, /* Sum of Intra cost for each frame */ - 0); /* Is pic handling [irc_update_pic_handling_state] done before update */ - - /* Set the current frame type to NA */ - *pe_vop_coding_type = BUF_PIC; + ih264e_update_pd_frm_rate(ps_pd_frm_rate, 0); } else { -#define MAX_FRAME_BITS 0x7FFFFFFF -// WORD32 i4_pic_id; -// WORD32 i4_pic_disp_order_no; WORD32 i4_avg_frm_rate, i4_source_frame_rate; - i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time); + i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate( + ps_frame_time); /* Update the frame rate of the frame present with the tgt_frm_rate */ /* If the frm was not skipped due to delta_time_stamp, update the - frame_rate with double the tgt_frame_rate value, so that it makes - up for one of the frames skipped by the application */ - ih264e_update_pd_frm_rate(ps_pd_frm_rate, - i4_source_frame_rate); + frame_rate with double the tgt_frame_rate value, so that it makes + up for one of the frames skipped by the application */ + ih264e_update_pd_frm_rate(ps_pd_frm_rate, i4_source_frame_rate); /* Based on the update get the average frame rate */ i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate); /* Call the RC library function to change the frame_rate to the - actually achieved frm_rate */ + actually achieved frm_rate */ irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate); - - /* --------Rate control related things. Get pic type and frame Qp---------*/ - /* Add picture to the stack. For IPP encoder we push the variable - into the stack and get back the variables by requesting RC. - This interface is designed for IPB encoder */ - irc_add_picture_to_stack(ps_rate_control_api, 1); - - /* Query the picture_type */ - *pe_vop_coding_type = ih264e_rc_get_picture_details(ps_rate_control_api); - - /* Get current frame Qp */ - pu1_frame_qp[0] = (UWORD8)irc_get_frame_level_qp(ps_rate_control_api, - (picture_type_e)(pe_vop_coding_type[0]), - MAX_FRAME_BITS); } - return(i4_skip_src); + return (i4_skip_src); } /** @@ -678,8 +607,8 @@ WORD32 ih264e_rc_post_enc(void * ps_rate_control_api, &u1_enc_buf_overflow,&u1_enc_buf_underflow); /* We skip the frame if decoder buffer is underflowing. But we never skip first I frame */ - // if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1)) - if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0)) + if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1)) + // if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0)) { irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e)pe_vop_coding_type[0]); // i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc); diff --git a/encoder/ih264e_rate_control.h b/encoder/ih264e_rate_control.h index de9466a..cca9ad3 100644 --- a/encoder/ih264e_rate_control.h +++ b/encoder/ih264e_rate_control.h @@ -90,6 +90,9 @@ * @param[in] u4_intra_frame_interval * Intra frame interval * +* @param[in] i4_inter_frm_int +* Inter frame interval +* * @param[in] pu1_init_qp * Initial qp * @@ -120,6 +123,7 @@ void ih264e_rc_init(void *pv_rc_api, UWORD32 u4_peak_bit_rate, UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, + WORD32 i4_inter_frm_int, UWORD8 *pu1_init_qp, WORD32 i4_max_inter_frm_int, UWORD8 *pu1_min_max_qp, @@ -143,13 +147,15 @@ void ih264e_rc_init(void *pv_rc_api, * ******************************************************************************* */ -picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api); +picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api, + WORD32 *pi4_pic_id, + WORD32 *pi4_pic_disp_order_no); /** ******************************************************************************* * -* @brief Function to get rate control output before encoding +* @brief Function to set frame rate inside RC. * * @par Description * This function is called before encoding the current frame and gets the qp @@ -167,18 +173,6 @@ picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api); * @param[in] ps_frame_time * Handle to frame time context * -* @param[in] i4_delta_time_stamp -* Time stamp difference between frames -* -* @param[in] i4_total_mb_in_frame -* Total Macro Blocks in frame -* -* @param[in/out] pe_vop_coding_type -* Picture coding type(I/P/B) -* -* @param[in/out] pu1_frame_qp -* QP for current frame -* * @returns * Skip or encode the current frame * @@ -186,14 +180,11 @@ picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api); * ******************************************************************************* */ -WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api, - void * ps_pd_frm_rate, - void * ps_time_stamp, - void * ps_frame_time, - WORD32 i4_delta_time_stamp, - WORD32 i4_total_mb_in_frame, - picture_type_e *pe_vop_coding_type, - UWORD8 *pu1_frame_qp); +WORD32 ih264e_update_rc_framerates(void *ps_rate_control_api, + void *ps_pd_frm_rate, + void *ps_time_stamp, + void *ps_frame_time + ); /** ******************************************************************************* diff --git a/encoder/ih264e_rc_mem_interface.c b/encoder/ih264e_rc_mem_interface.c index e4d5781..39a5311 100644 --- a/encoder/ih264e_rc_mem_interface.c +++ b/encoder/ih264e_rc_mem_interface.c @@ -62,10 +62,10 @@ #include "iv2.h" #include "ive2.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" #include "ih264e.h" #include "ithread.h" -#include "ih264e.h" #include "ih264_defs.h" #include "ih264_debug.h" #include "ih264_macros.h" @@ -80,12 +80,14 @@ #include "ih264_deblk_edge_filters.h" #include "ih264_common_tables.h" #include "ih264_list.h" +#include "ih264_cabac_tables.h" #include "ih264e_error.h" #include "ih264e_defs.h" #include "ih264e_bitstream.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_master.h" #include "ih264_buf_mgr.h" @@ -93,12 +95,8 @@ #include "ih264e_utils.h" #include "ih264e_platform_macros.h" #include "ih264_cavlc_tables.h" -#include "ih264e_config.h" #include "ih264e_statistics.h" #include "ih264e_trace.h" -#include "ih264e_statistics.h" -#include "ih264e_error.h" -#include "ih264e_utils.h" #include "ih264e_fmt_conv.h" #include "ih264e_cavlc.h" #include "ih264e_rc_mem_interface.h" diff --git a/encoder/ih264e_structs.h b/encoder/ih264e_structs.h index 1043a53..80f2957 100644 --- a/encoder/ih264e_structs.h +++ b/encoder/ih264e_structs.h @@ -38,6 +38,15 @@ #ifndef IH264E_STRUCTS_H_ #define IH264E_STRUCTS_H_ +/*****************************************************************************/ +/* Structure definitions */ +/*****************************************************************************/ + +/* Early declaration of structs */ +typedef struct _codec_t codec_t; +typedef struct _proc_t process_ctxt_t; + + /*****************************************************************************/ /* Extern Function type definitions */ /*****************************************************************************/ @@ -154,6 +163,22 @@ typedef void (*pf_fmt_conv_422ile_to_420sp)(UWORD8 *pu1_y_buf, UWORD8 *pu1_u_buf WORD32 u4_422i_stride); + +/** +****************************************************************************** + * @brief ME evaluation +****************************************************************************** + */ +typedef void ih264e_compute_me_ft(process_ctxt_t *); + +/** +****************************************************************************** + * @brief SKIP decision +****************************************************************************** + */ +typedef WORD32 ih264e_skip_params_ft(process_ctxt_t *, WORD32); + + /*****************************************************************************/ /* Enums */ /*****************************************************************************/ @@ -196,11 +221,27 @@ typedef enum */ typedef struct { + /** + * Motion Vector + */ + mv_t s_mv; /** - * L0 Motion Vector + * Ref index */ - mv_t s_l0_mv; + WORD8 i1_ref_idx; + +} enc_pu_mv_t; + + +/* + * Total Pu info for an MB + */ +typedef struct +{ + + /* Array with ME info for all lists */ + enc_pu_mv_t s_me_info[2]; /** * PU X position in terms of min PU (4x4) units @@ -223,13 +264,18 @@ typedef struct UWORD32 b4_ht : 2; /** - * L0 Ref index + * Intra or Inter flag for each partition - 0 or 1 + */ + UWORD32 b1_intra_flag : 1; + + /** + * PRED_L0, PRED_L1, PRED_BI */ - WORD8 i1_l0_ref_idx; + UWORD32 b2_pred_mode : 2; + } enc_pu_t; -typedef struct _codec_t codec_t; typedef struct { @@ -336,7 +382,7 @@ typedef struct UWORD32 u4_max_bitrate; /** Maximum number of consecutive B frames */ - UWORD32 u4_max_num_bframes; + UWORD32 u4_num_bframes; /** Content type Interlaced/Progressive */ IV_CONTENT_TYPE_T e_content_type; @@ -473,9 +519,6 @@ typedef struct /** IDR frame interval */ UWORD32 u4_idr_frm_interval; - /** consecutive B frames */ - UWORD32 u4_num_b_frames; - /** Disable deblock level (0: Enable completely, 3: Disable completely */ UWORD32 u4_disable_deblock_level; @@ -859,6 +902,10 @@ typedef struct */ typedef struct { + /** + * Pointer to the cabac context + */ + cabac_ctxt_t *ps_cabac; /** * start of frame / start of slice flag @@ -1142,9 +1189,9 @@ typedef struct WORD32 i4_mb_cost; WORD32 i4_mb_distortion; + enc_pu_mv_t as_skip_mv[4]; - mv_t s_skip_mv; - mv_t s_pred_mv; + enc_pu_mv_t as_pred_mv[2]; block_neighbors_t s_ngbr_avbl; @@ -1165,7 +1212,7 @@ typedef struct * @brief Pixel processing thread context ****************************************************************************** */ -typedef struct +struct _proc_t { /** * entropy context @@ -1210,12 +1257,12 @@ typedef struct /** * Ref pointer to current MB luma */ - UWORD8 *pu1_ref_buf_luma; + UWORD8 *apu1_ref_buf_luma[MAX_REF_PIC_CNT]; /** * Ref pointer to current MB chroma */ - UWORD8 *pu1_ref_buf_chroma; + UWORD8 *apu1_ref_buf_chroma[MAX_REF_PIC_CNT]; /** * pointer to luma plane of input buffer (base :: mb (0,0)) @@ -1230,7 +1277,7 @@ typedef struct /** * pointer to luma plane of ref buffer (base :: mb (0,0)) */ - UWORD8 *pu1_ref_buf_luma_base; + UWORD8 *apu1_ref_buf_luma_base[MAX_REF_PIC_CNT]; /** * pointer to chroma plane of input buffer (base :: mb (0,0)) @@ -1256,7 +1303,7 @@ typedef struct /** * pointer to chroma plane of reconstructed buffer (base :: mb (0,0)) */ - UWORD8 *pu1_ref_buf_chroma_base; + UWORD8 *apu1_ref_buf_chroma_base[MAX_REF_PIC_CNT]; /** * Pointer to ME NMB info @@ -1503,10 +1550,20 @@ typedef struct */ enc_pu_t *ps_pu; + /** + * Pointer to the pu of current co-located MB in list 1 + */ + enc_pu_t *ps_colpu; + /** * predicted motion vector */ - mv_t *ps_pred_mv; + enc_pu_mv_t *ps_skip_mv; + + /** + * predicted motion vector + */ + enc_pu_mv_t *ps_pred_mv; /** * top row mb syntax information base @@ -1554,7 +1611,6 @@ typedef struct */ enc_pu_t s_top_left_mb_pu_ME; - /** * mb neighbor availability pointer */ @@ -1589,11 +1645,6 @@ typedef struct */ UWORD8 *pu1_top_mb_intra_modes; - /** - * skip motion vector info - */ - mv_t *ps_skip_mv; - /** * left mb motion vector */ @@ -1802,9 +1853,14 @@ typedef struct /** * Reference picture for the current picture - * TODO: Only 1 reference assumed currently + * TODO: Only 2 reference assumed currently + */ + pic_buf_t *aps_ref_pic[MAX_REF_PIC_CNT]; + + /** + * Reference MV buff for the current picture */ - pic_buf_t *ps_ref_pic; + mv_buf_t *aps_mv_buf[MAX_REF_PIC_CNT]; /** * frame info used by RC @@ -1834,27 +1890,10 @@ typedef struct */ UWORD32 u4_compute_recon; - /* - * Buffer for holding half_x (1/2,1 - interpolated) - * values when halfpel generation - * for the entire plane is not enabled - */ - UWORD8 *pu1_half_x; - - /* - * Buffer for holding half_x (1,1/2 - interpolated) - * values when halfpel generation - * for the entire plane is not enabled - */ - UWORD8 *pu1_half_y; - /* - * Buffer for holding half_x (1/2,1/2 - interpolated) - * values when halfpel generation - * for the entire plane is not enabled - * + * Temporary buffers to be used for subpel computation */ - UWORD8 *pu1_half_xy; + UWORD8 *apu1_subpel_buffs[SUBPEL_BUFF_CNT]; /* * Buffer holding best sub pel values @@ -1866,7 +1905,7 @@ typedef struct */ UWORD32 u4_bst_spel_buf_strd; -} process_ctxt_t; +}; /** ****************************************************************************** @@ -1921,12 +1960,13 @@ typedef struct struct _codec_t { /** - * Number of coded pictures + * Id of current pic (input order) */ - WORD32 i4_coded_pic_cnt; + WORD32 i4_poc; /** * Number of encode frame API calls made + * This variable must only be used for context selection [Read only] */ WORD32 i4_encode_api_call_cnt; @@ -2305,6 +2345,7 @@ struct _codec_t */ ref_set_t as_ref_set[MAX_DPB_SIZE + MAX_CTXT_SETS]; + /* * Air pic cnt * Contains the number of pictures that have been encoded with air @@ -2319,11 +2360,15 @@ struct _codec_t UWORD16 *pu2_intr_rfrsh_map; /* - * Alternate reference frames * Indicates if the current frame is used as a reference frame */ UWORD32 u4_is_curr_frm_ref; + /* + * Indicates if there can be non reference frames in the stream + */ + WORD32 i4_non_ref_frames_in_stream; + /* * Memory for color space conversion for luma plane */ @@ -2510,6 +2555,18 @@ struct _codec_t ime_compute_sad_ft *apf_compute_sad_16x16[2]; ime_compute_sad_ft *pf_compute_sad_16x8; + + /** + * Function pointer for computing ME + * 1 for PSLICE and 1 for BSLICE + */ + ih264e_compute_me_ft *apf_compute_me[2]; + + /** + * Function pointers for computing SKIP parameters + */ + ih264e_skip_params_ft *apf_find_skip_params_me[2]; + /** * fn ptrs for memory handling operations */ @@ -2545,8 +2602,7 @@ struct _codec_t /** * write mb layer for a given slice I, P, B */ - IH264E_ERROR_T (*pf_write_mb_syntax_layer[3]) ( entropy_ctxt_t *ps_ent_ctxt ); - + IH264E_ERROR_T (*pf_write_mb_syntax_layer[2][3]) ( entropy_ctxt_t *ps_ent_ctxt ); /** * Output buffer @@ -2562,5 +2618,18 @@ struct _codec_t * rate control context */ rate_control_ctxt_t s_rate_control; + + /** + * input buffer queue + */ + inp_buf_t as_inp_list[MAX_NUM_BFRAMES]; + + + /* + *Flag to indicate if we have recived the last input frame + */ + WORD32 i4_last_inp_buff_received; + }; + #endif /* IH264E_STRUCTS_H_ */ diff --git a/encoder/ih264e_time_stamp.c b/encoder/ih264e_time_stamp.c index a6a7f3c..8afa24d 100644 --- a/encoder/ih264e_time_stamp.c +++ b/encoder/ih264e_time_stamp.c @@ -67,6 +67,7 @@ #include "ih264_defs.h" #include "ih264e_defs.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" @@ -78,6 +79,8 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_rc_mem_interface.h" #include "ih264e_time_stamp.h" diff --git a/encoder/ih264e_utils.c b/encoder/ih264e_utils.c index 3657f33..74fd001 100644 --- a/encoder/ih264e_utils.c +++ b/encoder/ih264e_utils.c @@ -68,8 +68,8 @@ #include "ih264_defs.h" #include "ih264_size_defs.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -78,6 +78,7 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" +#include "ih264_cabac_tables.h" #include "ih264_macros.h" #include "ih264_common_tables.h" #include "ih264_debug.h" @@ -91,7 +92,9 @@ #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" +#include "ih264e_cabac.h" #include "ih264e_utils.h" #include "ih264e_config.h" #include "ih264e_statistics.h" @@ -99,9 +102,7 @@ #include "ih264_list.h" #include "ih264e_encode_header.h" #include "ih264e_me.h" -#include "ime_defs.h" #include "ime.h" -#include "ih264e_rate_control.h" #include "ih264e_core_coding.h" #include "ih264e_rc_mem_interface.h" #include "ih264e_time_stamp.h" @@ -115,6 +116,235 @@ /* Function Definitions */ /*****************************************************************************/ +/** + ******************************************************************************* + * + * @brief + * Queues the current buffer, gets back a another buffer for encoding with corrent + * picture type + * + * @par Description: + * This function performs 3 distinct but related functions. + * 1) Maintains an input queue [Note the the term queue donot imply a + * first-in first-out logic here] that queues input and dequeues them so + * that input frames can be encoded at any predetermined encoding order + * 2) Uses RC library to decide which frame must be encoded in current pass + * and which picture type it must be encoded to. + * 3) Uses RC library to decide the QP at which current frame has to be + * encoded + * 4) Determines if the current picture must be encoded or not based on + * PRE-ENC skip + * + * Input queue is used for storing input buffers till they are used for + * encoding. This queue is maintained at ps_codec->as_inp_list. Whenever a + * valid input comes, it is added to the end of queue. This same input is + * added to RC queue using the identifier as ps_codec->i4_pic_cnt. Hence any + * pic from RC can be located in the input queue easily. + * + * The dequeue operation does not start till we have ps_codec->s_cfg.u4_max_num_bframes + * frames in the queue. THis is done in order to ensure that once output starts + * we will have a constant stream of output with no gaps. + * + * THe output frame order is governed by RC library. When ever we dequeue a + * buffer from RC library, it ensures that we will get them in encoding order + * With the output of RC library, we can use the picture id to dequeue the + * corresponding buffer from input queue and encode it. + * + * Condition at the end of stream. + * ------------------------------- + * At the last valid buffer from the app, we will get ps_ive_ip->u4_is_last + * to be set. This will the given to lib when appropriate input buffer is + * given to encoding. + * + * Since we have to output is not in sync with input, we will have frames to + * encode even after we recive the last vaild input buffer. Hence we have to + * make sure that we donot queue any new buffers once we get the flag [It may + * mess up GOP ?]. This is acheived by setting ps_codec->i4_last_inp_buff_received + * to act as a permenent marker for last frame recived [This may not be needed, + * because in our current app, all buffers after the last are marked as last. + * But can we rely on that?] . Hence after this flgag is set no new buffers are + * queued. + * + * @param[in] ps_codec + * Pointer to codec descriptor + * + * @param[in] ps_ive_ip + * Current input buffer to the encoder + * + * @param[out] ps_inp + * Buffer to be encoded in the current pass + * + * @returns + * Flag indicating if we have a pre-enc skip or not + * + * @remarks + * TODO (bpic) + * The check for null ans is last is redudent. + * Need to see if we can remove it + * + ******************************************************************************* + */ +WORD32 ih264e_input_queue_update(codec_t *ps_codec, + ive_video_encode_ip_t *ps_ive_ip, + inp_buf_t *ps_enc_buff) +{ + + inp_buf_t *ps_inp_buf; + picture_type_e e_pictype; + WORD32 i4_skip; + UWORD32 ctxt_sel, u4_pic_id, u4_pic_disp_id; + UWORD8 u1_frame_qp; + UWORD32 max_frame_bits = 0x7FFFFFFF; + + /* Mark that the last input frame has been received */ + if (ps_ive_ip->u4_is_last == 1) + { + ps_codec->i4_last_inp_buff_received = 1; + } + + if (ps_ive_ip->s_inp_buf.apv_bufs[0] == NULL + && !ps_codec->i4_last_inp_buff_received) + { + ps_enc_buff->s_raw_buf.apv_bufs[0] = NULL; + return 0; + } + + /*************************************************************************** + * Check for pre enc skip + * When src and target frame rates donot match, we skip some frames to + * maintain the relation ship between them + **************************************************************************/ + { + WORD32 skip_src; + + skip_src = ih264e_update_rc_framerates( + ps_codec->s_rate_control.pps_rate_control_api, + ps_codec->s_rate_control.pps_pd_frm_rate, + ps_codec->s_rate_control.pps_time_stamp, + ps_codec->s_rate_control.pps_frame_time); + + if (skip_src) return 1; + } + + /*************************************************************************** + *Queue the input to the queue + **************************************************************************/ + ps_inp_buf = &(ps_codec->as_inp_list[ps_codec->i4_pic_cnt + % MAX_NUM_BFRAMES]); + + /* copy input info. to internal structure */ + ps_inp_buf->s_raw_buf = ps_ive_ip->s_inp_buf; + ps_inp_buf->u4_timestamp_low = ps_ive_ip->u4_timestamp_low; + ps_inp_buf->u4_timestamp_high = ps_ive_ip->u4_timestamp_high; + ps_inp_buf->u4_is_last = ps_ive_ip->u4_is_last; + ps_inp_buf->pv_mb_info = ps_ive_ip->pv_mb_info; + ps_inp_buf->u4_mb_info_type = ps_ive_ip->u4_mb_info_type; + ps_inp_buf->pv_pic_info = ps_ive_ip->pv_pic_info; + ps_inp_buf->u4_pic_info_type = ps_ive_ip->u4_pic_info_type; + + /*************************************************************************** + * Now we should add the picture to RC stack here + **************************************************************************/ + irc_add_picture_to_stack(ps_codec->s_rate_control.pps_rate_control_api, + ps_codec->i4_pic_cnt); + + /* + * Rc has a problem with this delayed processing + */ + if (ps_codec->i4_encode_api_call_cnt + < (WORD32)(ps_codec->s_cfg.u4_num_bframes)) + { + ps_enc_buff->s_raw_buf.apv_bufs[0] = NULL; + return 0; + } + + /*************************************************************************** + * Get a new pic to encode + **************************************************************************/ + + /* + * If a frame is forced, apply it + * We cannot force an I frame for first frame + */ + if ((ps_codec->i4_frame_num > 0)&& + ((ps_codec->force_curr_frame_type == IV_I_FRAME)|| + (ps_codec->force_curr_frame_type == IV_IDR_FRAME))) + { + irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api); + } + + /* Query the picture_type */ + e_pictype = ih264e_rc_get_picture_details( + ps_codec->s_rate_control.pps_rate_control_api, (WORD32 *)(&u4_pic_id), + (WORD32 *)(&u4_pic_disp_id)); + + switch (e_pictype) + { + case I_PIC: + ps_codec->pic_type = PIC_I; + break; + case P_PIC: + ps_codec->pic_type = PIC_P; + break; + case B_PIC: + ps_codec->pic_type = PIC_B; + break; + default: + ps_codec->pic_type = PIC_NA; + ps_enc_buff->s_raw_buf.apv_bufs[0] = NULL; + return 0; + } + + + ps_codec->pic_type = ( (u4_pic_id % ps_codec->s_cfg.u4_idr_frm_interval) || + (ps_codec->force_curr_frame_type != IV_IDR_FRAME) ) ? + ps_codec->pic_type : PIC_IDR; + + ps_codec->force_curr_frame_type = IV_NA_FRAME; + + /* Get current frame Qp */ + u1_frame_qp = (UWORD8)irc_get_frame_level_qp( + ps_codec->s_rate_control.pps_rate_control_api, e_pictype, + max_frame_bits); + ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp]; + + /* + * copy the pic id to poc because the display order is assumed to be same + * as input order + */ + ps_codec->i4_poc = u4_pic_id; + + /*************************************************************************** + * Now retrieve the correct picture from the queue + **************************************************************************/ + + /* Mark the skip flag */ + i4_skip = 0; + ctxt_sel = ps_codec->i4_encode_api_call_cnt & 0x01; + ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = i4_skip; + + /* Get a buffer to encode */ + ps_inp_buf = &(ps_codec->as_inp_list[u4_pic_id % MAX_NUM_BFRAMES]); + + /* copy dequeued input to output */ + ps_enc_buff->s_raw_buf = ps_inp_buf->s_raw_buf; + ps_enc_buff->u4_timestamp_low = ps_inp_buf->u4_timestamp_low; + ps_enc_buff->u4_timestamp_high = ps_inp_buf->u4_timestamp_high; + ps_enc_buff->u4_is_last = ps_inp_buf->u4_is_last; + ps_enc_buff->pv_mb_info = ps_inp_buf->pv_mb_info; + ps_enc_buff->u4_mb_info_type = ps_inp_buf->u4_mb_info_type; + ps_enc_buff->pv_pic_info = ps_inp_buf->pv_pic_info; + ps_enc_buff->u4_pic_info_type = ps_inp_buf->u4_pic_info_type; + + if (ps_enc_buff->u4_is_last) + { + ps_codec->pic_type = PIC_NA; + } + + /* Return the buffer status */ + return (0); +} + /** ******************************************************************************* * @@ -331,7 +561,7 @@ WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size, WORD32 num_samples; WORD32 max_num_bufs; WORD32 pad = MAX(horz_pad, vert_pad); - UNUSED(pic_size); + /* * If num_ref_frames and num_reorder_frmaes is specified * Use minimum value @@ -343,6 +573,7 @@ WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size, /* Maximum number of luma samples in a picture at given level */ num_luma_samples = gai4_ih264_max_luma_pic_size[lvl_idx]; + num_luma_samples = MAX(num_luma_samples, pic_size); /* Account for chroma */ num_samples = num_luma_samples * 3 / 2; @@ -1002,8 +1233,9 @@ IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec) ps_codec->s_cfg.u4_target_bitrate, ps_codec->s_cfg.u4_max_bitrate, ps_codec->s_cfg.u4_vbv_buffer_delay, - ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp, - H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp, + ps_codec->s_cfg.u4_i_frm_interval, + ps_codec->s_cfg.u4_num_bframes + 1, au1_init_qp, + ps_codec->s_cfg.u4_num_bframes + 2 , au1_min_max_qp, ps_codec->s_cfg.u4_max_level); } @@ -1020,6 +1252,11 @@ IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec) DEBUG_HISTOGRAM_INIT(); + + /* Init dependecy vars */ + ps_codec->i4_last_inp_buff_received = 0; + + return IH264E_SUCCESS; } @@ -1067,7 +1304,8 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) UWORD8 *pu1_cur_pic_luma, *pu1_cur_pic_chroma; /* ref buffer set */ - pic_buf_t *ps_ref_pic; + pic_buf_t *aps_ref_pic[MAX_REF_PIC_CNT] = {NULL, NULL}; + mv_buf_t *aps_mv_buf[MAX_REF_PIC_CNT] = {NULL, NULL}; WORD32 ref_set_id; /* pic time stamp */ @@ -1080,9 +1318,6 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) /* curr pic type */ PIC_TYPE_T *pic_type = &ps_codec->pic_type; - /* should src be skipped */ - WORD32 *skip_src = &ps_codec->s_rate_control.pre_encode_skip[ctxt_sel]; - /* Diamond search Iteration Max Cnt */ UWORD32 u4_num_layers = (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) ? @@ -1094,62 +1329,52 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) /********************************************************************/ /* INITIALIZE CODEC CONTEXT */ /********************************************************************/ - - /* pre enc rc call */ - *skip_src = ih264e_set_rc_pic_params(ps_codec, - ps_codec->i4_encode_api_call_cnt, - (WORD32 *) pic_type); - if (*skip_src == 1) + /* slice_type */ + if ((PIC_I == *pic_type) || (PIC_IDR == *pic_type)) { - ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_inp_buf = - *ps_inp_buf; - - /* inform output bytes generated as zero */ - ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 0; - - return error_status; + ps_codec->i4_slice_type = ISLICE; } - - /********************************************************************/ - /* Alternate reference frame */ - /********************************************************************/ - if (ps_codec->s_cfg.u4_enable_alt_ref) + else if (PIC_P == *pic_type) { - if (PIC_IDR == *pic_type || PIC_I == *pic_type) - { - ps_codec->u4_is_curr_frm_ref = 1; - } - else - { - ps_codec->u4_is_curr_frm_ref = 1; - if(ps_codec->i4_encode_api_call_cnt % (ps_codec->s_cfg.u4_enable_alt_ref + 1)) - ps_codec->u4_is_curr_frm_ref = 0; - } - - if ((ps_codec->u4_is_curr_frm_ref == 1) || (ps_codec->i4_frame_num < 0)) - { - ps_codec->i4_frame_num++; - } + ps_codec->i4_slice_type = PSLICE; } - else + else if(PIC_B == *pic_type) { - ps_codec->u4_is_curr_frm_ref = 1; - - ps_codec->i4_frame_num++; + ps_codec->i4_slice_type = BSLICE; } - /* slice_type */ - ps_codec->i4_slice_type = PSLICE; - if ((PIC_I == *pic_type) || (PIC_IDR == *pic_type)) + /*************************************************************************** + * Set up variables for sending frame number, poc and reference + * a) Set up alt ref too + **************************************************************************/ + + /* In case of alt ref and B pics we will have non reference frame in stream */ + if (ps_codec->s_cfg.u4_enable_alt_ref || ps_codec->s_cfg.u4_num_bframes) { - ps_codec->i4_slice_type = ISLICE; + ps_codec->i4_non_ref_frames_in_stream = 1; } - else if (PIC_P == *pic_type) + + /* Check and set if the current frame is reference or not */ + ps_codec->u4_is_curr_frm_ref = 0; + + /* This frame is reference if its not a B pic, pending approval from alt ref */ + ps_codec->u4_is_curr_frm_ref = (*pic_type != PIC_B); + + /* In case if its a P pic, we will decide according to alt ref also */ + if (ps_codec->s_cfg.u4_enable_alt_ref && (*pic_type == PIC_P) + && (ps_codec->i4_pic_cnt + % (ps_codec->s_cfg.u4_enable_alt_ref + 1))) { - ps_codec->i4_slice_type = PSLICE; + ps_codec->u4_is_curr_frm_ref = 0; } + /* + * Override everything in case of IDR + * Note that in case of IDR, at this point ps_codec->u4_is_curr_frm_ref must + * be 1 + */ + /* is this an IDR pic */ ps_codec->u4_is_idr = 0; @@ -1165,6 +1390,10 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) ps_codec->i4_idr_pic_id++; } + /*************************************************************************** + * Set up Deblock + **************************************************************************/ + /* set deblock disable flags based on disable deblock level */ ps_codec->i4_disable_deblk_pic = 1; @@ -1235,93 +1464,132 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) ih264e_populate_pps(ps_codec, ps_pps); } - /* Reference and MV bank Buffer Manager */ + /*************************************************************************** + * Reference and MV bank Buffer Manager + * Here we will + * 1) Find the correct ref pics for the current frame + * 2) Free the ref pic that is not going to be used anywhere + * 3) Find a free buff from the list and assign it as the recon of + * current frame + * + * 1) Finding correct ref pic + * All pics needed for future are arranged in a picture list called + * ps_codec->as_ref_set. Each picture in this will have a pic buffer and + * MV buffer that is marked appropriately as BUF_MGR_REF, BUF_MGR_IO or + * BUF_MGR_CODEC. Also the pic_cnt and poc will also be present. + * Hence to find the ref pic we will loop through the list and find + * 2 pictures with maximum i4_pic_cnt . + * + * note that i4_pic_cnt == -1 is used to filter uninit ref pics. + * Now since we only have max two ref pics, we will always find max 2 + * ref pics. + + * + * 2) 3) Self explanatory + ***************************************************************************/ { - /* min pic cnt among the list of pics stored in ref list */ - WORD32 min_pic_cnt; + /* Search for buffs with maximum pic cnt */ - /* max pic cnt among the list of pics stored in ref list */ - WORD32 max_pic_cnt; + WORD32 max_pic_cnt[] = { -1, -1 }; - /* temp var */ - WORD32 i; + mv_buf_t *ps_mv_buf_to_free[] = { NULL, NULL }; - ps_ref_pic = NULL; + /* temp var */ + WORD32 i, buf_status; - /* get reference picture when necessary */ - /* Only nearest picture encoded (max pic cnt) is used as reference */ - if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I)) + for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++) { - max_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt; + if (ps_codec->as_ref_set[i].i4_pic_cnt == -1) + continue; + + buf_status = ih264_buf_mgr_get_status( + ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); + + /* Ideally we should look for buffer status of MV BUFF also. But since + * the correponding MV buffs also will be at the same state. It dosent + * matter as of now. But the check will make the logic better */ + if ((max_pic_cnt[0] < ps_codec->as_ref_set[i].i4_pic_cnt) + && (buf_status & BUF_MGR_REF)) + { + if (max_pic_cnt[1] < ps_codec->as_ref_set[i].i4_pic_cnt) + { + max_pic_cnt[0] = max_pic_cnt[1]; + aps_ref_pic[0] = aps_ref_pic[1]; + aps_mv_buf[0] = aps_mv_buf[1]; - ps_ref_pic = ps_codec->as_ref_set[0].ps_pic_buf; + ps_mv_buf_to_free[0] = ps_mv_buf_to_free[1]; - /* loop through to get the max pic cnt among the list of pics stored in ref list */ - for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++) - { - if (max_pic_cnt < ps_codec->as_ref_set[i].i4_pic_cnt) + max_pic_cnt[1] = ps_codec->as_ref_set[i].i4_pic_cnt; + aps_ref_pic[1] = ps_codec->as_ref_set[i].ps_pic_buf; + aps_mv_buf[1] = ps_codec->as_ref_set[i].ps_mv_buf; + ps_mv_buf_to_free[1] = ps_codec->as_ref_set[i].ps_mv_buf; + + } + else { - max_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt; - ps_ref_pic = ps_codec->as_ref_set[i].ps_pic_buf; + max_pic_cnt[0] = ps_codec->as_ref_set[i].i4_pic_cnt; + aps_ref_pic[0] = ps_codec->as_ref_set[i].ps_pic_buf; + aps_mv_buf[0] = ps_codec->as_ref_set[i].ps_mv_buf; + ps_mv_buf_to_free[0] = ps_codec->as_ref_set[i].ps_mv_buf; } } } - /* get a location at which the curr pic info can be stored for future reference */ - ref_set_id = -1; - - for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++) + /* + * Now if the current picture is I or P, we discard the back ref pic and + * assign forward ref as backward ref + */ + if (*pic_type != PIC_B) { - if (-1 == ps_codec->as_ref_set[i].i4_pic_cnt) + if (ps_mv_buf_to_free[0]) { - ref_set_id = i; - break; - } - } + /* release this frame from reference list */ + ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, + ps_mv_buf_to_free[0]->i4_buf_id, + BUF_MGR_REF); - /* If all the entries in the ref_set array are filled, then remove the entry with least pic_cnt */ - if (ref_set_id == -1) - { - /* pic info */ - pic_buf_t *ps_cur_pic; - - /* mv info */ - mv_buf_t *ps_cur_mv_buf; - - ref_set_id = 0; - min_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt; - - /* loop through to get the min pic cnt among the list of pics stored in ref list */ - for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++) - { - if (min_pic_cnt > ps_codec->as_ref_set[i].i4_pic_cnt) - { - min_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt; - ref_set_id = i; - } + ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, + aps_ref_pic[0]->i4_buf_id, BUF_MGR_REF); } - ps_cur_pic = ps_codec->as_ref_set[ref_set_id].ps_pic_buf; - - ps_cur_mv_buf = ps_codec->as_ref_set[ref_set_id].ps_mv_buf; + max_pic_cnt[0] = max_pic_cnt[1]; + aps_ref_pic[0] = aps_ref_pic[1]; + aps_mv_buf[0] = aps_mv_buf[1]; - /* release this frame from reference list */ - ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, - ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF); - - ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, - ps_cur_pic->i4_buf_id, BUF_MGR_REF); + /* Dummy */ + max_pic_cnt[1] = -1; } - if (ps_codec->s_cfg.u4_enable_recon) + /* + * Mark all reference pic with unused buffers to be free + * We need this step since each one, ie ref, recon io etc only unset their + * respective flags. Hence we need to combine togather and mark the ref set + * accordingly + */ + ref_set_id = -1; + for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++) { - ret = ih264_buf_mgr_check_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr); + if (ps_codec->as_ref_set[i].i4_pic_cnt == -1) + { + ref_set_id = i; + continue; + } - if (ret != IH264_SUCCESS) + buf_status = ih264_buf_mgr_get_status( + ps_codec->pv_ref_buf_mgr, + ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id); + + if ((buf_status & (BUF_MGR_REF | BUF_MGR_CODEC | BUF_MGR_IO)) == 0) { - return IH264E_NO_FREE_RECONBUF; + ps_codec->as_ref_set[i].i4_pic_cnt = -1; + ps_codec->as_ref_set[i].i4_poc = 32768; + + ref_set_id = i; } } + /* An asssert failure here means we donot have any free buffs */ + ASSERT(ref_set_id >= 0); } { @@ -1353,7 +1621,6 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) * and getting a buffer id to free */ ps_mv_buf->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt; - ps_mv_buf->i4_buf_id = cur_mv_bank_buf_id; } @@ -1375,7 +1642,7 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) } /* mark the buffer as needed for reference if the curr pic is available for ref */ - if (1 == ps_codec->u4_is_curr_frm_ref) + if (ps_codec->u4_is_curr_frm_ref) { ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id, BUF_MGR_REF); @@ -1392,7 +1659,7 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) ps_cur_pic->u4_timestamp_high = ps_inp_buf->u4_timestamp_high; ps_cur_pic->u4_timestamp_low = ps_inp_buf->u4_timestamp_low; - ps_cur_pic->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt; + ps_cur_pic->i4_abs_poc = ps_codec->i4_poc; ps_cur_pic->i4_poc_lsb = ps_codec->i4_pic_order_cnt_lsb; ps_cur_pic->i4_buf_id = cur_pic_buf_id; @@ -1401,18 +1668,17 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) pu1_cur_pic_chroma = ps_cur_pic->pu1_chroma; } - /* in case the current picture is used for reference then add it to the reference set */ - if (ps_codec->u4_is_curr_frm_ref - && ((*pic_type == PIC_IDR) || (*pic_type == PIC_I) - || (*pic_type == PIC_P))) + /* + * Add the current picture to ref list independent of the fact that it is used + * as reference or not. This is because, now recon is not in sync with output + * hence we may need the current recon after some delay. By adding it to ref list + * we can retrieve the recon any time we want. The information that it is used + * for ref can still be found by checking the buffer status of pic buf. + */ { ps_codec->as_ref_set[ref_set_id].i4_pic_cnt = ps_codec->i4_pic_cnt; - - /* TODO: Currently pic_cnt and poc are same - Once frame drops are introduced change appropriately */ - ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_pic_cnt; - + ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_poc; ps_codec->as_ref_set[ref_set_id].ps_mv_buf = ps_mv_buf; - ps_codec->as_ref_set[ref_set_id].ps_pic_buf = ps_cur_pic; } @@ -1592,16 +1858,37 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) /* Pointer to current pictures mv buffers */ ps_proc->ps_cur_mv_buf = ps_mv_buf; - /* pointer to ref picture */ - ps_proc->ps_ref_pic = ps_ref_pic; + /* + * pointer to ref picture + * 0 : Temporal back reference + * 1 : Temporal forward reference + */ + ps_proc->aps_ref_pic[PRED_L0] = aps_ref_pic[PRED_L0]; + ps_proc->aps_ref_pic[PRED_L1] = aps_ref_pic[PRED_L1]; + if (ps_codec->pic_type == PIC_B) + { + ps_proc->aps_mv_buf[PRED_L0] = aps_mv_buf[PRED_L0]; + ps_proc->aps_mv_buf[PRED_L1] = aps_mv_buf[PRED_L1]; + } + else + { + /* + * Else is dummy since for non B pic we does not need this + * But an assignment here will help in not having a segfault + * when we calcualte colpic in P slices + */ + ps_proc->aps_mv_buf[PRED_L0] = ps_mv_buf; + ps_proc->aps_mv_buf[PRED_L1] = ps_mv_buf; + } if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I)) { - /* ref pointer luma */ - ps_proc->pu1_ref_buf_luma_base = ps_ref_pic->pu1_luma; + /* temporal back an forward ref pointer luma and chroma */ + ps_proc->apu1_ref_buf_luma_base[PRED_L0] = aps_ref_pic[PRED_L0]->pu1_luma; + ps_proc->apu1_ref_buf_chroma_base[PRED_L0] = aps_ref_pic[PRED_L0]->pu1_chroma; - /* ref pointer chroma */ - ps_proc->pu1_ref_buf_chroma_base = ps_ref_pic->pu1_chroma; + ps_proc->apu1_ref_buf_luma_base[PRED_L1] = aps_ref_pic[PRED_L1]->pu1_luma; + ps_proc->apu1_ref_buf_chroma_base[PRED_L1] = aps_ref_pic[PRED_L1]->pu1_chroma; } /* Structure for current input buffer */ @@ -1649,6 +1936,9 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) /* slice hdr base */ ps_entropy->ps_slice_hdr_base = ps_proc->ps_slice_hdr_base; + /* Abs poc */ + ps_entropy->i4_abs_pic_order_cnt = ps_proc->ps_codec->i4_poc; + /* initialize entropy map */ if (i == j) { @@ -1656,6 +1946,9 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) memset(ps_entropy->pu1_entropy_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs); /* row 0 to ht in mbs */ memset(ps_entropy->pu1_entropy_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs); + + /* intialize cabac tables */ + ih264e_init_cabac_table(ps_entropy); } /* wd in mbs */ @@ -1751,7 +2044,7 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf) /* qp */ ps_me_ctxt->u1_mb_qp = ps_codec->u4_frame_qp; - if ((i == 0) && (0 == ps_codec->i4_pic_cnt)) + if ((i == j) && (0 == ps_codec->i4_poc)) { /* init mv bits tables */ ih264e_init_mv_bits(ps_me_ctxt); diff --git a/encoder/ih264e_utils.h b/encoder/ih264e_utils.h index 651dad9..912711f 100644 --- a/encoder/ih264e_utils.h +++ b/encoder/ih264e_utils.h @@ -30,6 +30,7 @@ * Harish * * @par List of Functions: +* -ih264e_input_queue_update() * -ih264e_get_min_level() * -ih264e_get_lvl_idx() * -ih264e_get_dpb_size() @@ -51,6 +52,35 @@ #ifndef IH264E_UTILS_H_ #define IH264E_UTILS_H_ +/** + ******************************************************************************* + * + * @brief + * Queues the current buffer, gets back a another buffer for encoding with corrent + * picture type + * + * @par Description: + * + * @param[in] ps_codec + * Pointer to codec descriptor + * + * @param[in] ps_ive_ip + * Current input buffer to the encoder + * + * @param[out] ps_inp + * Buffer to be encoded in the current pass + * + * @returns + * Flag indicating if we have a pre-enc skip or not + * + * @remarks + * + ******************************************************************************* + */ +WORD32 ih264e_input_queue_update(codec_t *ps_codec, + ive_video_encode_ip_t *ps_ive_ip, + inp_buf_t *ps_enc_buff); + /** ******************************************************************************* * diff --git a/encoder/ime.c b/encoder/ime.c index c89aaab..cfd6e81 100644 --- a/encoder/ime.c +++ b/encoder/ime.c @@ -50,10 +50,10 @@ /* User include files */ #include "ime_typedefs.h" #include "ime_distortion_metrics.h" -#include "ime_structs.h" #include "ime_defs.h" -#include "ime_macros.h" +#include "ime_structs.h" #include "ime.h" +#include "ime_macros.h" #include "ime_statistics.h" /** @@ -87,10 +87,10 @@ * ******************************************************************************* */ -void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt) +void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist) { /* MB partition info */ - mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; /* lagrange parameter */ UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; @@ -106,7 +106,7 @@ void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt) /* pointer to src macro block */ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; - UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma; + UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; /* strides */ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; @@ -271,22 +271,24 @@ void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt) * ******************************************************************************* */ + void ime_evaluate_init_srchposn_16x16 ( - me_ctxt_t *ps_me_ctxt + me_ctxt_t *ps_me_ctxt, + WORD32 i4_reflist ) { UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; /* candidate mv cnt */ - UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates; + UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist]; /* list of candidate mvs */ - ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search; + ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist]; /* pointer to src macro block */ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; - UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma; + UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; /* strides */ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; @@ -302,46 +304,15 @@ void ime_evaluate_init_srchposn_16x16 WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX; /* mb partitions info */ - mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]); /* mv bits */ UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; /* temp var */ - UWORD32 i, j, u4_srch_pos_idx = 0; + UWORD32 i, j; + WORD32 i4_srch_pos_idx = 0; UWORD8 *pu1_ref = NULL; - WORD16 mv_x, mv_y; - - if (0) - { - /************************************************************/ - /* Compute SKIP Cost */ - /************************************************************/ - mv_x = ps_mv_list[SKIP_CAND].i2_mvx; - mv_y = ps_mv_list[SKIP_CAND].i2_mvy; - - /* adjust ref pointer */ - pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd); - - /* compute distortion */ - ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion); - - /* for skip mode cost & distortion are identical - * But we shall add a bias to favor skip mode. - * Doc. JVT B118 Suggests SKIP_BIAS as 16. - * TODO : Empirical analysis of SKIP_BIAS is necessary */ - - i4_distortion_least = i4_mb_distortion; - - u4_srch_pos_idx = 0; - -#define SKIP_BIAS 8 - - i4_mb_cost_least = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS); - -#undef SKIP_BIAS - } - /* Carry out a search using each of the motion vector pairs identified above as predictors. */ /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */ @@ -366,6 +337,7 @@ void ime_evaluate_init_srchposn_16x16 /* compute distortion */ ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion); + DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3); /* compute cost */ i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] @@ -377,22 +349,21 @@ void ime_evaluate_init_srchposn_16x16 i4_distortion_least = i4_mb_distortion; - u4_srch_pos_idx = i; + i4_srch_pos_idx = i; } } } if (i4_mb_cost_least < ps_mb_part->i4_mb_cost) { - ps_mb_part->u4_srch_pos_idx = u4_srch_pos_idx; + ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; ps_mb_part->i4_mb_cost = i4_mb_cost_least; ps_mb_part->i4_mb_distortion = i4_distortion_least; - ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[u4_srch_pos_idx].i2_mvx; - ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[u4_srch_pos_idx].i2_mvy; + ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx; + ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy; } } - /** ******************************************************************************* * @@ -419,11 +390,12 @@ void ime_evaluate_init_srchposn_16x16 */ void ime_full_pel_motion_estimation_16x16 ( - me_ctxt_t *ps_me_ctxt + me_ctxt_t *ps_me_ctxt, + WORD32 i4_ref_list ) { /* mb part info */ - mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list]; /******************************************************************/ /* Modify Search range about initial candidate instead of zero mv */ @@ -448,19 +420,14 @@ void ime_full_pel_motion_estimation_16x16 switch (ps_me_ctxt->u4_me_speed_preset) { case DMND_SRCH: - ime_diamond_search_16x16(ps_me_ctxt); + ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list); break; default: assert(0); break; } - - ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx << 2; - ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy << 2; - } - /** ******************************************************************************* * @@ -487,13 +454,13 @@ void ime_full_pel_motion_estimation_16x16 */ void ime_sub_pel_motion_estimation_16x16 ( - me_ctxt_t *ps_me_ctxt + me_ctxt_t *ps_me_ctxt, + WORD32 i4_reflist ) { /* pointers to src & ref macro block */ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; - /* pointers to ref. half pel planes */ UWORD8 *pu1_ref_mb_half_x; UWORD8 *pu1_ref_mb_half_y; @@ -507,10 +474,10 @@ void ime_sub_pel_motion_estimation_16x16 /* strides */ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; - WORD32 i4_ref_strd = ps_me_ctxt->u4_hp_buf_strd; + WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd; /* mb partitions info */ - mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part; + mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; /* SAD(distortion metric) of an mb */ WORD32 i4_mb_distortion; @@ -523,7 +490,6 @@ void ime_sub_pel_motion_estimation_16x16 /*Best half pel buffer*/ UWORD8 *pu1_best_hpel_buf = NULL; - /* mv bits */ UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; @@ -550,6 +516,8 @@ void ime_sub_pel_motion_estimation_16x16 WORD32 i, j; WORD32 ai4_sad[8]; + WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx; + i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx; i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy; @@ -575,10 +543,9 @@ void ime_sub_pel_motion_estimation_16x16 /* Hence corresponding adjustments made here */ /**************************************************************/ - pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->pu1_half_x + 1; - pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->pu1_half_y + 1 + i4_ref_strd; - pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->pu1_half_xy + 1 + i4_ref_strd; - + pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1; + pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd; + pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd; ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x, pu1_ref_mb_half_y, @@ -611,8 +578,10 @@ void ime_sub_pel_motion_estimation_16x16 i2_mv_u_y = mv_y_tmp; #ifndef HP_PL /*choosing whether left or right half_x*/ - ps_me_ctxt->pu1_half_x = pu1_ref_mb_half_x_temp - i; + ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i; pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i; + + i4_srch_pos_idx = 0; #endif } @@ -643,8 +612,10 @@ void ime_sub_pel_motion_estimation_16x16 i2_mv_u_y = mv_y_tmp; #ifndef HP_PL/*choosing whether top or bottom half_y*/ - ps_me_ctxt->pu1_half_y = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); + ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); + + i4_srch_pos_idx = 1; #endif } @@ -678,23 +649,27 @@ void ime_sub_pel_motion_estimation_16x16 i2_mv_u_y = mv_y_tmp; #ifndef HP_PL /*choosing between four half_xy */ - ps_me_ctxt->pu1_half_xy = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; + ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; + + i4_srch_pos_idx = 2; #endif } } } - ps_mb_part->i4_mb_cost = i4_mb_cost_least; - ps_mb_part->i4_mb_distortion = i4_distortion_least; - ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x; - ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y; - ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf; - + if (i4_mb_cost_least < ps_mb_part->i4_mb_cost) + { + ps_mb_part->i4_mb_cost = i4_mb_cost_least; + ps_mb_part->i4_mb_distortion = i4_distortion_least; + ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x; + ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y; + ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf; + ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; + } } - /** ******************************************************************************* * @@ -705,132 +680,105 @@ void ime_sub_pel_motion_estimation_16x16 * @param[in] ps_me_ctxt * pointer to me ctxt * -* @param[in] ps_skip_mv -* pointer to skip mv * * @returns none * * @remarks * NOTE: while computing the skip cost, do not enable early exit from compute * sad function because, a negative bias gets added later +* Note tha the last ME candidate in me ctxt is taken as skip motion vector * ******************************************************************************* */ void ime_compute_skip_cost ( me_ctxt_t *ps_me_ctxt, - void *pv_skip_mv, + ime_mv_t *ps_skip_mv, mb_part_ctxt *ps_smb_part_info, - UWORD32 u4_use_stat_sad + UWORD32 u4_use_stat_sad, + WORD32 i4_reflist, + WORD32 i4_is_slice_type_b ) { - /* pointers to src & ref macro block */ - UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; - UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma; - - /* strides */ - WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; - WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; - - /* enabled fast sad computation */ - UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; - /* SAD(distortion metric) of an mb */ WORD32 i4_mb_distortion; /* cost = distortion + u4_lambda_motion * rate */ WORD32 i4_mb_cost; - /* Motion vectors in full-pel units */ - WORD16 mv_x, mv_y; - - /* lambda - lagrange constant */ - UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; - - /* skip mv */ - ime_mv_t *ps_skip_mv = pv_skip_mv, s_clip_skip_mv; - /* temp var */ UWORD8 *pu1_ref = NULL; - UWORD32 u4_is_nonzero; - s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, ps_skip_mv->i2_mvx); - s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, ps_skip_mv->i2_mvy); + ime_mv_t s_skip_mv; - if ((s_clip_skip_mv.i2_mvx != ps_skip_mv->i2_mvx) || - (s_clip_skip_mv.i2_mvy != ps_skip_mv->i2_mvy)) - { - /* skip motion vector not with in bounds */ - /* it is possible that mv is already evaluated */ - return ; - } + s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2; + s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2; - mv_x = (ps_skip_mv->i2_mvx + 2) >> 2; - mv_y = (ps_skip_mv->i2_mvy + 2) >> 2; - - if ((mv_x << 2) != ps_skip_mv->i2_mvx || (mv_y << 2) != ps_skip_mv->i2_mvy) + /* Check if the skip mv is out of bounds or subpel */ { + /* skip mv */ + ime_mv_t s_clip_skip_mv; + s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx); + s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy); - return ; + if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) || + (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) || + (ps_skip_mv->i2_mvx & 0x3) || + (ps_skip_mv->i2_mvy & 0x3)) + { + return ; + } + } - } - else - { - /* adjust ref pointer */ - pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd); - } + /* adjust ref pointer */ + pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx + + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd); if(u4_use_stat_sad == 1) { - ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, - ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion,&u4_is_nonzero); - - /* - *NOTE The check here is two fold - * One is checking if the sad has been reached, ie min sad, which a configurable parameter - * If that is reached,we need not do any mode evaluation - * Similary if we find a distortion of zero there is no point of doing any further mode evaluation - * as sad is a non negative quantity - * hence in this case too, no further evaluation is necessary - */ - /* - *NOTE in case we need to disable the zero check using satdq, - * we need only to set the u4_is_zero to a non zero value - */ - if(u4_is_nonzero==0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad) + UWORD32 u4_is_nonzero; + + ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16( + ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, + ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh, + &i4_mb_distortion, &u4_is_nonzero); + + if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad) { - ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/ - ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0)?0:i4_mb_distortion; + ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ + ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion; } } else { - ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, INT_MAX, &i4_mb_distortion); + ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad]( + ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, + ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion); if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad) { ps_me_ctxt->i4_min_sad = i4_mb_distortion; - ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/ + ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ } } + /* for skip mode cost & distortion are identical * But we shall add a bias to favor skip mode. * Doc. JVT B118 Suggests SKIP_BIAS as 16. * TODO : Empirical analysis of SKIP_BIAS is necessary */ -#define SKIP_BIAS 8 - i4_mb_cost = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS); -#undef SKIP_BIAS + + i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b)); if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost) { ps_smb_part_info->i4_mb_cost = i4_mb_cost; ps_smb_part_info->i4_mb_distortion = i4_mb_distortion; - ps_smb_part_info->s_mv_curr.i2_mvx = ps_skip_mv->i2_mvx; - ps_smb_part_info->s_mv_curr.i2_mvy = ps_skip_mv->i2_mvy; + ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx; + ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy; } } diff --git a/encoder/ime.h b/encoder/ime.h index 5c039e8..17912d4 100644 --- a/encoder/ime.h +++ b/encoder/ime.h @@ -47,6 +47,19 @@ */ #define NUM_LAYERS 16 +/** +****************************************************************************** + * @brief Skip Bias value for P slice +****************************************************************************** + */ +#define SKIP_BIAS_P 2 + +/** +****************************************************************************** + * @brief Skip Bias value for B slice +****************************************************************************** + */ +#define SKIP_BIAS_B 16 /*****************************************************************************/ /* Extern Function Declarations */ @@ -84,8 +97,8 @@ * computational feasibility. This is only for quality eval purposes. * ******************************************************************************* -*/ -extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt); + */ +extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist); /** @@ -113,10 +126,8 @@ extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt); * ******************************************************************************* */ -extern void ime_evaluate_init_srchposn_16x16 - ( - me_ctxt_t *ps_me_ctxt - ); +extern void ime_evaluate_init_srchposn_16x16(me_ctxt_t *ps_me_ctxt, + WORD32 i4_reflist); /** ******************************************************************************* @@ -142,10 +153,8 @@ extern void ime_evaluate_init_srchposn_16x16 * ******************************************************************************* */ -extern void ime_full_pel_motion_estimation_16x16 - ( - me_ctxt_t *ps_me_ctxt - ); +extern void ime_full_pel_motion_estimation_16x16(me_ctxt_t *ps_me_ctxt, + WORD32 i4_ref_list); /** ******************************************************************************* @@ -171,10 +180,8 @@ extern void ime_full_pel_motion_estimation_16x16 * ******************************************************************************* */ -extern void ime_sub_pel_motion_estimation_16x16 - ( - me_ctxt_t *ps_me_ctxt - ); +extern void ime_sub_pel_motion_estimation_16x16(me_ctxt_t *ps_me_ctxt, + WORD32 i4_reflist); /** ******************************************************************************* @@ -189,6 +196,9 @@ extern void ime_sub_pel_motion_estimation_16x16 * @param[in] ps_skip_mv * pointer to skip mv * + @param[in] is_slice_type_b +* Whether slice type is BSLICE or not + * @returns none * * @remarks @@ -197,13 +207,12 @@ extern void ime_sub_pel_motion_estimation_16x16 * ******************************************************************************* */ -extern void ime_compute_skip_cost - ( - me_ctxt_t *ps_me_ctxt, - void *pv_skip_mv, - mb_part_ctxt *ps_smb_part_info, - UWORD32 u4_use_stat_sad - ); +extern void ime_compute_skip_cost(me_ctxt_t *ps_me_ctxt, + ime_mv_t *ps_skip_mv, + mb_part_ctxt *ps_smb_part_info, + UWORD32 u4_use_stat_sad, + WORD32 i4_reflist, + WORD32 is_slice_type_b); #endif /* IME_H_ */ diff --git a/encoder/ime_defs.h b/encoder/ime_defs.h index 14d9c55..f82018d 100644 --- a/encoder/ime_defs.h +++ b/encoder/ime_defs.h @@ -55,5 +55,8 @@ #define NSTEP_SRCH 50 #define HEX_SRCH 75 +#define MAX_NUM_REFLIST 2 +#define SUBPEL_BUFF_CNT 4 + #endif /*_IME_DEFS_H_*/ diff --git a/encoder/ime_distortion_metrics.c b/encoder/ime_distortion_metrics.c index 23a1fbc..f8c44df 100644 --- a/encoder/ime_distortion_metrics.c +++ b/encoder/ime_distortion_metrics.c @@ -1260,3 +1260,4 @@ void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src, else *sig_sad_dc = 1; } + diff --git a/encoder/ime_distortion_metrics.h b/encoder/ime_distortion_metrics.h index a30e1fc..5056ba0 100644 --- a/encoder/ime_distortion_metrics.h +++ b/encoder/ime_distortion_metrics.h @@ -130,6 +130,7 @@ ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter; ime_compute_satqd_8x16_chroma_ft ime_compute_satqd_8x16_chroma; ime_compute_satqd_16x16_lumaintra_ft ime_compute_satqd_16x16_lumaintra; + /*SSE4.2 Declarations*/ ime_compute_sad_ft ime_compute_sad_16x16_sse42; ime_compute_sad_ft ime_compute_sad_16x16_fast_sse42; @@ -164,7 +165,6 @@ ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_av8; ime_compute_sad_stat ime_compute_16x16_sad_stat_av8; ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_av8; - #endif /* IME_DISTORTION_METRICS_H_ */ diff --git a/encoder/ime_structs.h b/encoder/ime_structs.h index 7819b91..9baacb3 100644 --- a/encoder/ime_structs.h +++ b/encoder/ime_structs.h @@ -90,7 +90,7 @@ typedef struct /** * Search position for least cost among the list of candidates */ - UWORD32 u4_srch_pos_idx; + WORD32 i4_srch_pos_idx; /** * Search position for least cost among the list of candidates @@ -116,9 +116,9 @@ typedef struct typedef struct { /** - * Ref pointer to current MB luma + * Ref pointer to current MB luma for each ref list */ - UWORD8 *pu1_ref_buf_luma; + UWORD8 *apu1_ref_buf_luma[MAX_NUM_REFLIST]; /** * Src pointer to current MB luma @@ -190,13 +190,13 @@ typedef struct /** * Number of valid candidates for the Initial search position */ - UWORD32 u4_num_candidates; + UWORD32 u4_num_candidates[MAX_NUM_REFLIST + 1]; /** - * Motion vector predictors derived from neighbouring + * Motion vector predictors derived from neighboring * blocks for each of the six block partitions */ - ime_mv_t as_mv_init_search[5]; + ime_mv_t as_mv_init_search[MAX_NUM_REFLIST + 1][6]; /** * mv bits @@ -247,10 +247,17 @@ typedef struct UWORD32 u4_left_is_skip; + /* skip_type can be PREDL0, PREDL1 or BIPRED */ + WORD32 i4_skip_type; + + /* Biasing given for skip prediction */ + WORD32 i4_skip_bias[2]; + /** * Structure to store the MB partition info + * We need 1(L0)+1(L1)+1(bi) */ - mb_part_ctxt s_mb_part; + mb_part_ctxt as_mb_part[MAX_NUM_REFLIST + 1]; /* * Threshold to compare the sad with */ @@ -277,27 +284,17 @@ typedef struct UWORD8 u1_mb_qp; /* - * Buffers for holding half_x , half_y and half_xy - * values when halfpel generation - * for the entire plane is not enabled + * Buffers for holding subpel and bipred temp buffers */ - UWORD8 *pu1_half_x; - UWORD8 *pu1_half_y; - UWORD8 *pu1_half_xy; + UWORD8 *apu1_subpel_buffs[SUBPEL_BUFF_CNT]; + WORD32 u4_subpel_buf_strd; /* * Buffers to store the best halfpel plane* */ UWORD8 *pu1_hpel_buf; - /* - * Stride for hpel buffer - */ - UWORD32 u4_hpel_buf_strd; - - WORD32 u4_hp_buf_strd; - } me_ctxt_t; diff --git a/encoder/irc_common.h b/encoder/irc_common.h index c341de4..448fad3 100644 --- a/encoder/irc_common.h +++ b/encoder/irc_common.h @@ -97,7 +97,7 @@ typedef float number_t; /* The ratios between I to P and P to B Qp is specified here */ #define K_Q 4 #define I_TO_P_RATIO (19) /* In K_Q Q factor */ -#define P_TO_B_RATIO (21) /* In K_Q Q factor */ +#define P_TO_B_RATIO (32) /* In K_Q Q factor */ #define P_TO_I_RATIO (13) /* In K_Q Q factor */ #endif /* _RC_COMMON_H_ */ diff --git a/encoder/irc_picture_type.c b/encoder/irc_picture_type.c index 186188c..8740d95 100644 --- a/encoder/irc_picture_type.c +++ b/encoder/irc_picture_type.c @@ -253,6 +253,7 @@ WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_t **pps_pic_handli *****************************************************************************/ void irc_init_pic_handling(pic_handling_t *ps_pic_handling, WORD32 i4_intra_frm_int, + WORD32 i4_inter_frm_int, WORD32 i4_max_inter_frm_int, WORD32 i4_is_gop_closed) { @@ -262,7 +263,7 @@ void irc_init_pic_handling(pic_handling_t *ps_pic_handling, /* Checks */ /* Codec Parameters */ ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int; - ps_pic_handling->i4_inter_frm_int = i4_max_inter_frm_int; + ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int; ps_pic_handling->i4_max_inter_frm_int = i4_max_inter_frm_int; ps_pic_handling->i4_is_gop_closed = i4_is_gop_closed; @@ -278,6 +279,10 @@ void irc_init_pic_handling(pic_handling_t *ps_pic_handling, /* Indices to the pic_stack */ ps_pic_handling->i4_ref_pic_idx = 0; + /* + * B frame index should be ref_frame_num, + * which is 2 in out case + */ ps_pic_handling->i4_b_pic_idx = 2; ps_pic_handling->i4_prev_b_pic_idx = 2; @@ -302,7 +307,7 @@ void irc_init_pic_handling(pic_handling_t *ps_pic_handling, /* Variables on which the bit allocation is dependent */ /* Get the pic distribution in the gop */ find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int, - i4_max_inter_frm_int, i4_is_gop_closed, + i4_inter_frm_int, i4_is_gop_closed, &ps_pic_handling->i4_b_in_incomp_subgop, &ps_pic_handling->i4_extra_p); @@ -528,8 +533,7 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id) * 3)The new inter-frm-interval won't cross the intra_frm_interval */ if((ps_pic_handling->i4_change_in_inter_frm_int == 1) - && ((i4_buf_pic_no % i4_inter_frm_int == 1) - || (i4_pic_disp_order_no == 1) || (i4_inter_frm_int == 1))) + && ((i4_buf_pic_no % i4_inter_frm_int == 1)|| (i4_pic_disp_order_no == 1) || (i4_inter_frm_int == 1))) { /* * Condition which checks if the new inter_frm_int will cross the @@ -540,10 +544,31 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id) if(i4_condn_for_change_in_inter_frm_int) { + /* + * If there is a change in inter frame interval. We should set the b + * frame IDX to the (num ref frame - num ref frame in buf)+ i4_ref_pic_idx + * Since our case we have a structure of I B P or I B...B P only + * we have three cases + * 1) current incoming frame is I. Then we have to leave space for + * current I and next P hence write b idx as to ref idx + 2 + * 2) Current incoming frame is B. In that case, we have I in buffer. + * Only one P needs space hence write b idx as ref idx +1 + * 3) Current incoming frame is P. In that case we are at the end of + * gop [sub gop?] and we have to leave space for next gops I and P. + * Thus b idx = ref idx + 2 + * + * In case of an closed Gop. The last frame has to be forced to be a P. + * Hence we may have problems in that case. + * + * Also this has the implicit assumption of only 2 ref frames + */ + WORD32 i4_is_curr_frm_b = (i4_buf_pic_no % i4_new_inter_frm_int)&& + !(i4_is_gop_closed && (i4_b_count_in_gop == i4_b_frms_in_prd)); + /*If the inter_frm_int = 1, then the b_pic_idx needs to be modified */ if(i4_inter_frm_int == 1) { - ps_pic_handling->i4_b_pic_idx = (1 + ps_pic_handling->i4_b_pic_idx = ((i4_is_curr_frm_b ? 1 : 2) + ps_pic_handling->i4_ref_pic_idx) % (i4_max_inter_frm_int + 1); } @@ -811,7 +836,42 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id) i4_pic_disp_order_no++; i4_buf_pic_no++; +#if 0 /* For any gop */ + /* BUG FIX + * This piece of code resets the gop upon I frame(?) + * This introduces a problem of GOP getting reset not at I frames as it should be + * The reason AFAIK is that + * 1) This code uses i4_pic_disp_order_no to reset GOP. I assume it computes + * if are at GOP boundary and does it, but not sure + * 2) The frames rmainign in GOP is done in post enc as it should be. + * + * Also ps_pic_handling->i4_pic_disp_order_no is incremented when a pic is added + * to stack becuase the additon is in disp order while poping is in encode order + * + * SUppose there is a deay od 1 frame between queue and encode. + * then he timing will be. Assume a GOP of IPPIPP + * + * Input buff Input to qu Output buf/encode buff remaining pic in gop + * 1 I I NA rest to 1 2 + * 2 P P I 0 2 + * 3 P P P 0 1 + * 4 I I P reset to 1 2 + * 5 P P I 1 1 + * 6 P P P 1 0 + * 7 NA NA P + * + * Hence our gop gets reset at I(1) and I(4) in the RC.thus the reaming pic in gop + * count will be as shown. We can clearly see that the GOP gets reset at I(4) .Hence + * for the correpondinng QP for output buf p(4) will be that of an I frame. + * + * By hiding this I hope to fix this problem. But Iam not sure exaclty. + * This needs to be investigated further + * + * By hiding this most likely we are in effect disabling the dynanic + * update of gop params. + */ + if(ps_pic_handling->i4_pic_disp_order_no == (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed) * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop))) @@ -831,6 +891,7 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id) - ps_pic_handling->i4_b_in_incomp_subgop_mix_gop; } } +#endif /* End of GOP updates */ if(i4_pic_disp_order_no == (i4_p_frms_in_prd + i4_b_frms_in_prd + 1)) @@ -855,11 +916,12 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id) } /* Updating the vars which work on the encoded pics */ - /* For the first gop */ + /* For the first gop + * TODO (BPIC) this // || (i4_intra_frm_int == 1)) may cause problems for bpics */ if(((ps_pic_handling->i4_is_first_gop) && (ps_pic_handling->i4_pic_disp_order_no - == (i4_max_inter_frm_int - 1))) - || (i4_intra_frm_int == 1)) + == (i4_max_inter_frm_int - 2)))) + // || (i4_intra_frm_int == 1)) { ps_pic_handling->i4_coded_pic_no = 0; ps_pic_handling->i4_stack_count = 0; diff --git a/encoder/irc_picture_type.h b/encoder/irc_picture_type.h index 1af5424..021ee33 100644 --- a/encoder/irc_picture_type.h +++ b/encoder/irc_picture_type.h @@ -34,6 +34,7 @@ WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_handle *pps_pic_ha void irc_init_pic_handling(pic_handling_handle ps_pic_handling, WORD32 i4_intra_frm_int, + WORD32 i4_inter_frm_int, WORD32 i4_max_inter_frm_int, WORD32 i4_is_gop_closed); diff --git a/encoder/irc_rate_control_api.c b/encoder/irc_rate_control_api.c index 6c6586e..f72597a 100644 --- a/encoder/irc_rate_control_api.c +++ b/encoder/irc_rate_control_api.c @@ -148,6 +148,7 @@ void irc_initialise_rate_control(rate_control_api_t *ps_rate_control_api, UWORD32 u4_frame_rate, UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, + WORD32 i4_inter_frm_int, UWORD8 *pu1_init_qp, UWORD32 u4_max_vbv_buff_size, WORD32 i4_max_inter_frm_int, @@ -172,7 +173,8 @@ void irc_initialise_rate_control(rate_control_api_t *ps_rate_control_api, /* Initialize the pic_handling module */ irc_init_pic_handling(ps_rate_control_api->ps_pic_handling, - (WORD32)u4_intra_frame_interval, i4_max_inter_frm_int, + (WORD32)u4_intra_frame_interval, + i4_inter_frm_int, i4_max_inter_frm_int, i4_is_gop_closed); /*** Initialize the rate control modules ***/ diff --git a/encoder/irc_rate_control_api.h b/encoder/irc_rate_control_api.h index 0173037..4b24ece 100644 --- a/encoder/irc_rate_control_api.h +++ b/encoder/irc_rate_control_api.h @@ -42,6 +42,7 @@ void irc_initialise_rate_control(rate_control_handle ps_rate_control_api, UWORD32 u4_frame_rate, UWORD32 u4_max_delay, UWORD32 u4_intra_frame_interval, + WORD32 i4_inter_frm_int, UWORD8 *pu1_init_qp, UWORD32 u4_max_vbv_buff_size, WORD32 i4_max_inter_frm_int, diff --git a/encoder/ive2.h b/encoder/ive2.h index 8cb0fd1..7a543bb 100644 --- a/encoder/ive2.h +++ b/encoder/ive2.h @@ -293,7 +293,7 @@ typedef struct UWORD32 u4_max_bitrate; /** Maximum number of consecutive B frames */ - UWORD32 u4_max_num_bframes; + UWORD32 u4_num_bframes; /** Content type Interlaced/Progressive */ IV_CONTENT_TYPE_T e_content_type; @@ -394,6 +394,15 @@ typedef struct /* encoded frame type */ UWORD32 u4_encoded_frame_type; + /** Flag to indicate if this is the last output from the encoder */ + UWORD32 u4_is_last; + + /** Lower 32bits of input time stamp */ + UWORD32 u4_timestamp_low; + + /** Upper 32bits of input time stamp */ + UWORD32 u4_timestamp_high; + /** Descriptor for input raw buffer freed from codec */ iv_raw_buf_t s_inp_buf; @@ -1339,9 +1348,6 @@ typedef struct /** IDR frame interval */ UWORD32 u4_idr_frm_interval; - /** consecutive B frames */ - UWORD32 u4_num_b_frames; - /** Lower 32bits of time stamp corresponding to input buffer, * from which this command takes effect */ UWORD32 u4_timestamp_low; @@ -1428,6 +1434,9 @@ typedef struct * from which this command takes effect */ UWORD32 u4_timestamp_high; + /** Entropy coding mode flag: 0-CAVLC, 1-CABAC */ + UWORD32 u4_entropy_coding_mode; + }ive_ctl_set_profile_params_ip_t; /** Output structure : Set Profile Params */ diff --git a/encoder/mips/ih264e_function_selector.c b/encoder/mips/ih264e_function_selector.c index 58ec4d0..7a3718b 100644 --- a/encoder/mips/ih264e_function_selector.c +++ b/encoder/mips/ih264e_function_selector.c @@ -58,8 +58,8 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -71,11 +71,12 @@ #include "ih264_macros.h" #include "ih264_platform_macros.h" -#include "ih264e_defs.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" +#include "ih264e_cabac.h" #include "ih264e_platform_macros.h" /** diff --git a/encoder/x86/ih264e_function_selector.c b/encoder/x86/ih264e_function_selector.c index 429cdab..b0acb19 100644 --- a/encoder/x86/ih264e_function_selector.c +++ b/encoder/x86/ih264e_function_selector.c @@ -58,8 +58,8 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -68,14 +68,15 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" - +#include "ih264_cabac_tables.h" #include "ih264_macros.h" #include "ih264_platform_macros.h" -#include "ih264e_defs.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" +#include "ih264e_cabac.h" #include "ih264e_platform_macros.h" /** diff --git a/encoder/x86/ih264e_function_selector_sse42.c b/encoder/x86/ih264e_function_selector_sse42.c index d953c76..6888e5d 100644 --- a/encoder/x86/ih264e_function_selector_sse42.c +++ b/encoder/x86/ih264e_function_selector_sse42.c @@ -59,8 +59,8 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -69,23 +69,18 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" - +#include "ih264_cabac_tables.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" +#include "ih264e_cabac.h" #include "ih264e_platform_macros.h" -#include "ih264_intra_pred_filters.h" -#include "ih264_trans_quant_itrans_iquant.h" -#include "ih264e_defs.h" -#include "ih264e_structs.h" -#include "ih264_deblk_edge_filters.h" #include "ih264e_core_coding.h" #include "ih264_cavlc_tables.h" #include "ih264e_cavlc.h" -#include "ih264_padding.h" #include "ih264e_intra_modes_eval.h" -#include "ih264_mem_fns.h" #include "ih264e_fmt_conv.h" #include "ih264e_half_pel.h" diff --git a/encoder/x86/ih264e_function_selector_ssse3.c b/encoder/x86/ih264e_function_selector_ssse3.c index 4eb4c7b..4419112 100644 --- a/encoder/x86/ih264e_function_selector_ssse3.c +++ b/encoder/x86/ih264e_function_selector_ssse3.c @@ -59,8 +59,8 @@ #include "ih264e_error.h" #include "ih264e_bitstream.h" #include "ime_distortion_metrics.h" +#include "ime_defs.h" #include "ime_structs.h" -#include "ih264_defs.h" #include "ih264_error.h" #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" @@ -69,23 +69,18 @@ #include "ih264_padding.h" #include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" - +#include "ih264_cabac_tables.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" #include "ih264e_platform_macros.h" -#include "ih264_intra_pred_filters.h" -#include "ih264_trans_quant_itrans_iquant.h" -#include "ih264e_defs.h" -#include "ih264e_structs.h" -#include "ih264_deblk_edge_filters.h" +#include "ih264e_cabac.h" #include "ih264e_core_coding.h" #include "ih264_cavlc_tables.h" #include "ih264e_cavlc.h" -#include "ih264_padding.h" #include "ih264e_intra_modes_eval.h" -#include "ih264_mem_fns.h" #include "ih264e_fmt_conv.h" #include "ih264e_half_pel.h" diff --git a/encoder/x86/ih264e_half_pel_ssse3.c b/encoder/x86/ih264e_half_pel_ssse3.c index 42580fa..8da73b7 100644 --- a/encoder/x86/ih264e_half_pel_ssse3.c +++ b/encoder/x86/ih264e_half_pel_ssse3.c @@ -55,7 +55,6 @@ #include "ih264_defs.h" #include "ih264e_half_pel.h" #include "ih264_macros.h" -#include "ih264e_half_pel.h" #include "ih264e_debug.h" #include "ih264_inter_pred_filters.h" #include "ih264_mem_fns.h" diff --git a/encoder/x86/ih264e_intra_modes_eval_ssse3.c b/encoder/x86/ih264e_intra_modes_eval_ssse3.c index 0f4a9ad..c11d7f2 100644 --- a/encoder/x86/ih264e_intra_modes_eval_ssse3.c +++ b/encoder/x86/ih264e_intra_modes_eval_ssse3.c @@ -67,18 +67,20 @@ #include "ih264_inter_pred_filters.h" #include "ih264_mem_fns.h" #include "ih264_padding.h" -#include "ih264_intra_pred_filters.h" #include "ih264_deblk_edge_filters.h" #include "ime_distortion_metrics.h" #include "ih264e_error.h" #include "ih264e_bitstream.h" +#include "ime_defs.h" #include "ime_structs.h" - +#include "ih264_cabac_tables.h" #include "irc_cntrl_param.h" #include "irc_frame_info_collector.h" #include "ih264e_rate_control.h" +#include "ih264e_cabac_structs.h" #include "ih264e_structs.h" +#include "ih264e_cabac.h" #include "ih264e_intra_modes_eval.h" #include "ih264e_globals.h" #include "ime_platform_macros.h" diff --git a/test/Android.mk b/test/Android.mk index adb14f0..0085832 100644 --- a/test/Android.mk +++ b/test/Android.mk @@ -6,4 +6,3 @@ include $(LOCAL_PATH)/encoder.mk # decoder include $(LOCAL_PATH)/decoder.mk - diff --git a/test/encoder/app.h b/test/encoder/app.h index 7c16fcd..7b00298 100644 --- a/test/encoder/app.h +++ b/test/encoder/app.h @@ -55,13 +55,13 @@ /* Constant Macros */ /*****************************************************************************/ -#define DEFAULT_NUM_INPUT_BUFS 1 +#define DEFAULT_NUM_INPUT_BUFS 32 #define DEFAULT_MAX_INPUT_BUFS 32 -#define DEFAULT_NUM_OUTPUT_BUFS 1 +#define DEFAULT_NUM_OUTPUT_BUFS 32 #define DEFAULT_MAX_OUTPUT_BUFS 32 -#define DEFAULT_NUM_RECON_BUFS 1 +#define DEFAULT_NUM_RECON_BUFS 32 #define DEFAULT_MAX_RECON_BUFS DEFAULT_NUM_RECON_BUFS @@ -69,11 +69,12 @@ #define MAX_VBV_BUFF_SIZE (120 * 16384) #define MAX_NUM_IO_BUFS 3 -#define DEFAULT_MAX_REF_FRM 1 +#define DEFAULT_MAX_REF_FRM 2 #define DEFAULT_MAX_REORDER_FRM 0 #define DEFAULT_QP_MIN 0 #define DEFAULT_QP_MAX 51 #define DEFAULT_MAX_BITRATE 20000000 +#define DEFAULT_NUM_BFRAMES 0 #define DEFAULT_MAX_SRCH_RANGE_X 256 #define DEFAULT_MAX_SRCH_RANGE_Y 256 #define DEFAULT_MAX_FRAMERATE 120000 @@ -94,7 +95,7 @@ #define DEFAULT_TGT_FRAME_RATE 30 #define DEFAULT_MAX_WD 1920 #define DEFAULT_MAX_HT 1920 -#define DEFAULT_MAX_LEVEL 50 +#define DEFAULT_MAX_LEVEL 40 #define DEFAULT_STRIDE 0 #define DEFAULT_WD 0 #define DEFAULT_HT 0 @@ -127,6 +128,8 @@ #define DEFAULT_EPROFILE IV_PROFILE_BASE #define DEFAULT_SLICE_MODE 0 #define DEFAULT_SLICE_PARAM 256 +#define DEFAULT_ENTROPY_CODING_MODE 0 + #define STRLENGTH 500 @@ -281,6 +284,7 @@ typedef struct UWORD32 u4_i_interval; UWORD32 u4_idr_interval; UWORD32 u4_b_frames; + UWORD32 u4_num_bframes; UWORD32 u4_disable_deblk_level; UWORD32 u4_hpel; UWORD32 u4_qpel; @@ -289,6 +293,7 @@ typedef struct UWORD32 u4_slice_mode; UWORD32 u4_slice_param; + UWORD32 u4_entropy_coding_mode; void *pv_input_thread_handle; void *pv_output_thread_handle; diff --git a/test/encoder/main.c b/test/encoder/main.c index 26420e2..9bbb5cf 100644 --- a/test/encoder/main.c +++ b/test/encoder/main.c @@ -91,6 +91,7 @@ typedef enum I_QP_MIN, P_QP_MIN, B_QP_MIN, + ENTROPY, AIR, AIR_REFRESH_PERIOD, ARCH, @@ -105,6 +106,7 @@ typedef enum I_INTERVAL, IDR_INTERVAL, B_FRMS, + NUM_B_FRMS, DISABLE_DBLK, PROFILE, FAST_SAD, @@ -153,7 +155,7 @@ static const argument_t argument_mapping[] = { "--", "--src_framerate", SRC_FRAMERATE, "Source frame rate \n" }, { "--", "--i_interval", I_INTERVAL, "Intra frame interval \n" }, { "--", "--idr_interval", IDR_INTERVAL, "IDR frame interval \n" }, - { "--", "--bframes", B_FRMS, "Consecutive B frames \n" }, + { "--", "--bframes", NUM_B_FRMS, "Maximum number of consecutive B frames \n" }, { "--", "--speed", ENC_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n" }, { "--", "--me_speed", ME_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n" }, { "--", "--fast_sad", FAST_SAD, " Flag for faster sad execution\n" }, @@ -193,6 +195,7 @@ static const argument_t argument_mapping[] = { "--", "--qp_i_min", I_QP_MIN, "Min QP for I frames\n"}, { "--", "--qp_p_min", P_QP_MIN, "Min QP for P frames\n"}, { "--", "--qp_b_min", B_QP_MIN, "Min QP for B frames\n"}, + { "--", "--entropy", ENTROPY, "Entropy coding mode(0: CAVLC or 1: CABAC)\n"}, { "--", "--vbv_delay", VBV_DELAY, "VBV buffer delay\n"}, { "--", "--vbv_size", VBV_SIZE, "VBV buffer size\n"}, { "-i4", "--intra_4x4_enable", INTRA_4x4_ENABLE, "Intra 4x4 enable \n" }, @@ -657,6 +660,10 @@ void parse_argument(app_ctxt_t *ps_app_ctxt, CHAR *argument, CHAR *value) sscanf(value, "%d", &ps_app_ctxt->u4_b_qp_min); break; + case ENTROPY: + sscanf(value, "%d", &ps_app_ctxt->u4_entropy_coding_mode); + break; + case AIR: sscanf(value, "%d", &ps_app_ctxt->u4_air); break; @@ -742,8 +749,8 @@ void parse_argument(app_ctxt_t *ps_app_ctxt, CHAR *argument, CHAR *value) sscanf(value, "%d", &ps_app_ctxt->u4_idr_interval); break; - case B_FRMS: - sscanf(value, "%d", &ps_app_ctxt->u4_b_frames); + case NUM_B_FRMS: + sscanf(value, "%d", &ps_app_ctxt->u4_num_bframes); break; case DISABLE_DEBLOCK_LEVEL: @@ -886,7 +893,13 @@ void validate_params(app_ctxt_t *ps_app_ctxt) sprintf(ac_error, "Invalid number of frames to be encoded: %d", ps_app_ctxt->u4_max_num_frms); invalid_argument_exit(ac_error); } - + if ((0 != (WORD32)ps_app_ctxt->u4_entropy_coding_mode) + && (1 != (WORD32)ps_app_ctxt->u4_entropy_coding_mode)) + { + sprintf(ac_error, "Invalid entropy codeing mode: %d", + ps_app_ctxt->u4_entropy_coding_mode); + invalid_argument_exit(ac_error); + } return; } @@ -944,6 +957,7 @@ void init_default_params(app_ctxt_t *ps_app_ctxt) ps_app_ctxt->u4_enable_alt_ref = DEFAULT_ENABLE_ALT_REF; ps_app_ctxt->u4_rc = DEFAULT_RC; ps_app_ctxt->u4_max_bitrate = DEFAULT_MAX_BITRATE; + ps_app_ctxt->u4_num_bframes = DEFAULT_NUM_BFRAMES; ps_app_ctxt->u4_bitrate = DEFAULT_BITRATE; ps_app_ctxt->u4_i_qp = DEFAULT_I_QP; ps_app_ctxt->u4_p_qp = DEFAULT_P_QP; @@ -960,7 +974,6 @@ void init_default_params(app_ctxt_t *ps_app_ctxt) ps_app_ctxt->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y; ps_app_ctxt->u4_i_interval = DEFAULT_I_INTERVAL; ps_app_ctxt->u4_idr_interval = DEFAULT_IDR_INTERVAL; - ps_app_ctxt->u4_b_frames = DEFAULT_B_FRAMES; ps_app_ctxt->u4_disable_deblk_level = DEFAULT_DISABLE_DEBLK_LEVEL; ps_app_ctxt->u4_hpel = DEFAULT_HPEL; ps_app_ctxt->u4_qpel = DEFAULT_QPEL; @@ -979,6 +992,7 @@ void init_default_params(app_ctxt_t *ps_app_ctxt) ps_app_ctxt->u4_psnr_cnt = 0; ps_app_ctxt->pu1_psnr_buf = NULL; ps_app_ctxt->u4_psnr_buf_size = 0; + ps_app_ctxt->u4_entropy_coding_mode = DEFAULT_ENTROPY_CODING_MODE; return; } @@ -1334,7 +1348,6 @@ void set_gop_params(app_ctxt_t *ps_app_ctxt, s_gop_params_ip.s_ive_ip.u4_i_frm_interval = ps_app_ctxt->u4_i_interval; s_gop_params_ip.s_ive_ip.u4_idr_frm_interval = ps_app_ctxt->u4_idr_interval; - s_gop_params_ip.s_ive_ip.u4_num_b_frames = ps_app_ctxt->u4_b_frames; s_gop_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; s_gop_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; @@ -1368,6 +1381,8 @@ void set_profile_params(app_ctxt_t *ps_app_ctxt, s_profile_params_ip.s_ive_ip.e_profile = ps_app_ctxt->e_profile; + s_profile_params_ip.s_ive_ip.u4_entropy_coding_mode = ps_app_ctxt->u4_entropy_coding_mode; + s_profile_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high; s_profile_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low; @@ -1433,7 +1448,7 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) IV_STATUS_T status = IV_SUCCESS; - WORD32 i, read_failed = 0, is_last = 0, buff_size = 0, num_bytes = 0; + WORD32 i, is_last = 0, buff_size = 0, num_bytes = 0; UWORD32 u4_total_time = 0; UWORD8 *pu1_buf = NULL; UWORD32 u4_timestamp_low, u4_timestamp_high; @@ -1449,6 +1464,7 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) iv_raw_buf_t s_inp_buf, s_recon_buf; CHAR ac_error[STRLENGTH]; WORD32 end_of_frames=0; + WORD32 i4_inp_done =0; u4_timestamp_low = 0; u4_timestamp_high = 0; @@ -1498,18 +1514,6 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) } } -#if 0 //Input buffer dump - //if(1 == ps_app_ctxt->u4_psnr_enable) - { - ps_app_ctxt->fp_dump_op = fopen("D:\\dump\\inp.yuv", "wb"); - if(NULL == ps_app_ctxt->fp_dump_op) - { - sprintf(ac_error, "Unable to open output file for input dump: %s", "D:\\dump\\inp.yuv"); - invalid_argument_exit(ac_error); - } - } -#endif //Input buffer dump - /* If PSNR is enabled, open input file again and hold a different file pointer * This makes it easy to compute PSNR without adding dependency between input and recon threads */ @@ -1548,10 +1552,6 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) while(1) { - - - - /******************************************************************************/ /****************** Input Initialization **************************************/ /******************************************************************************/ @@ -1568,6 +1568,12 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) } } + if (i == DEFAULT_MAX_INPUT_BUFS) + { + printf("\n Unable to find a free input buffer!!"); + exit(0); + } + ps_video_encode_ip->u4_size = sizeof(ih264e_video_encode_ip_t); ps_video_encode_op->u4_size = sizeof(ih264e_video_encode_op_t); @@ -1637,11 +1643,19 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd *2; } + /* + * Here we read input and other associated buffers. Regardless of success + * we will proceed from here as we will need extra calls to flush out + * input queue in encoder. Note that this is not necessary. You can just + * send encode calls till with valid output and recon buffers till the + * queue is flushed. + */ while(1) { IV_STATUS_T mb_info_status = IV_SUCCESS, pic_info_status = IV_SUCCESS; - read_failed = 0; + status = read_input(ps_app_ctxt->fp_ip, ps_inp_raw_buf); + if (ps_app_ctxt->u4_mb_info_type != 0) { mb_info_status = read_mb_info(ps_app_ctxt, pv_mb_info); @@ -1656,15 +1670,12 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) if(0 == ps_app_ctxt->u4_loopback) { is_last = 1; - read_failed = 1; - break; } else fseek(ps_app_ctxt->fp_ip, 0, SEEK_SET); } - else - break; + break; } /******************************************************************************/ @@ -1716,13 +1727,12 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) ps_inp_raw_buf->apv_bufs[0] = NULL; ps_inp_raw_buf->apv_bufs[1] = NULL; ps_inp_raw_buf->apv_bufs[2] = NULL; - end_of_frames = 1; } ps_video_encode_ip->u4_is_last = is_last; ps_video_encode_ip->u4_mb_info_type = ps_app_ctxt->u4_mb_info_type; ps_video_encode_ip->u4_pic_info_type = ps_app_ctxt->u4_pic_info_type;; - ps_video_encode_op->s_out_buf.pv_buf= 0; + ps_video_encode_op->s_out_buf.pv_buf= NULL; ps_video_encode_ip->u4_timestamp_high = u4_timestamp_high; ps_video_encode_ip->u4_timestamp_low = u4_timestamp_low; @@ -1766,11 +1776,7 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) /****************** Writing Output ********************************************/ /******************************************************************************/ num_bytes = 0; - /* Break if all the encoded frames are taken from encoder */ - if(1 == end_of_frames && 0 == ps_video_encode_op->output_present) - { - break; - } + if(1 == ps_video_encode_op->output_present) { num_bytes = ps_video_encode_op->s_out_buf.u4_bytes; @@ -1783,7 +1789,11 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) printf("Error: Unable to write to output file\n"); break; } + } + /* free input bufer if codec returns a valid input buffer */ + if (ps_video_encode_op->s_inp_buf.apv_bufs[0]) + { /* Reuse of freed input buffer */ for(i = 0; i < DEFAULT_MAX_INPUT_BUFS; i++) { @@ -1793,8 +1803,11 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) break; } } + } - /* Reuse of freed output buffer */ + /* free output buffer if codec returns a valid output buffer */ + // if(ps_video_encode_op->s_out_buf.pv_buf) + { for(i = 0; i < DEFAULT_MAX_OUTPUT_BUFS; i++) { if(ps_app_ctxt->as_output_buf[i].pu1_buf == ps_video_encode_op->s_out_buf.pv_buf) @@ -1805,132 +1818,169 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt) } } - if (ps_video_encode_op->dump_recon == 1) + /********************************************************************** + * Print stats + **********************************************************************/ { - ps_app_ctxt->u4_pics_cnt++; + UWORD8 u1_pic_type[][5] = + { "IDR", "I", "P", "B", "NA" }; + WORD32 lookup_idx = 0; - ps_app_ctxt->avg_time = u4_total_time / ps_app_ctxt->u4_pics_cnt; - if (ps_app_ctxt->u4_psnr_enable == 0) + if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type + == IV_IDR_FRAME) + { + lookup_idx = 0; + } + else if(ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type + == IV_I_FRAME) + { + lookup_idx = 1; + } + else if(ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type + == IV_P_FRAME) { - UWORD8 u1_pic_type[][5] = { "IDR", "I", "P","NA" }; - WORD32 lookup_idx = 0; + lookup_idx = 2; + } + else if(ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type + == IV_B_FRAME) + { + lookup_idx = 3; + } + else if(ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type + == IV_NA_FRAME) + { + lookup_idx = 4; + } - if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_IDR_FRAME) - { - lookup_idx = 0; - } - else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_I_FRAME) - { - lookup_idx = 1; - } - else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_P_FRAME) - { - lookup_idx = 2; - } - else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_NA_FRAME) - { - lookup_idx = 3; - } + if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type + != IV_NA_FRAME) + { + ps_app_ctxt->u4_pics_cnt++; + ps_app_ctxt->avg_time = u4_total_time / ps_app_ctxt->u4_pics_cnt; + ps_app_ctxt->u4_total_bytes += num_bytes; + } - printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n", u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt, num_bytes, timetaken, ps_app_ctxt->avg_time, peak_avg_max); + if (ps_app_ctxt->u4_psnr_enable == 0) + { + printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n", + u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt, + num_bytes, timetaken, ps_app_ctxt->avg_time, + peak_avg_max); } + } + - ps_app_ctxt->u4_total_bytes += num_bytes; + /* For psnr computation, we need to read the correct input frame and + * compare with recon. The difficulty with doing it is that we only know + * that the frame number of recon is monotonically increasing. There + * may be gaps in the recon if any pre or post enc skip happens. There are + * 3 senarios + * 1) A frame is encoded -> returns the pic type + * 2) A frame is not encoded -> Encoder is waiting, the frame may get + * encoded later + * 3) A frame is not encoded -> A post enc or pre enc skip happend. The + * frame is not going to be encoded + * + * The 1st and 2nd scenarios are easy, since we just needs to increment + * recon cnt whenever we get a valid recon. This cnt can we used to + * sync the recon and input + * 3rd scenario in conjuction with 2nd will pose problems. Even if + * the returning frame is NA, we donot know we should increment the + * recon cnt or not becasue it can be case 2 or case 3. + * + * Solutions: + * ------------------------- + * One way to over come this will be to return more information as of + * the frame type. We can send if a frame was skipped as a part of the + * return frame type. + * This will not work. Since the output and recon are not in sync, we + * cannot use the current output frame type to determine if a recon + * is present currently or not. We need some other way to acheive this. + * + * Other way to do this which is cleaner and maintains the seperation + * between recon and the ouptut is to set the width [& height] of output recon + * buffer to be zero. Hence we will in effect be saying :"look there + * is a recon, but due to frame not being encoded it is having a width 0". + * To be more clear we need to make height also to be zero. + * + * But are we using these variables for allocating and deallocating + * the buffers some where ? No we are not. The buffer gets re-init + * at every encode call + * + * Fixes + * ------------------------ + * Currently the recon buff width and height are set in the encoder. + * This will not work now because since recon and input are not + * in sync. Hence a recon buff sent at time stamp x will get used to + * fill recon of input at time stamp y (x > y). If we reduced the + * frame dimensions in between, the recon buffer will not have enough + * space. Hence we need to set the with and height appropriatley inside + * lib itself. + */ - /******************************************************************************/ - /****************** Writing Recon ********************************************/ - /******************************************************************************/ - if(1 == ps_video_encode_op->output_present) + if (ps_app_ctxt->u4_recon_enable || ps_app_ctxt->u4_chksum_enable + || ps_app_ctxt->u4_psnr_enable) + { + if (ps_video_encode_op->dump_recon) { s_recon_buf = ps_video_encode_op->s_recon_buf; - /* Dump recon when enabled, and output bytes != 0*/ - if(ps_app_ctxt->u4_recon_enable) - { - status = write_recon(ps_app_ctxt->fp_recon, &s_recon_buf); - if(IV_SUCCESS != status) - { - printf("Error: Unable to write to recon file\n"); - break; - } - } - - - if(ps_app_ctxt->u4_psnr_enable) - { + /* Read input for psnr computuation */ + if (ps_app_ctxt->u4_psnr_enable) read_input(ps_app_ctxt->fp_psnr_ip, &s_inp_buf); - compute_psnr(ps_app_ctxt, &s_recon_buf, &s_inp_buf); - } - - if(ps_app_ctxt->u4_chksum_enable) + /* if we have a valid recon buffer do the assocated tasks */ + if (s_recon_buf.au4_wd[0]) { - WORD32 comp; - WORD32 num_comp; - num_comp = 2; - if(IV_YUV_420P == s_recon_buf.e_color_fmt) - num_comp = 3; + /* Dump recon when enabled, and output bytes != 0 */ + if (ps_app_ctxt->u4_recon_enable) + { + status = write_recon(ps_app_ctxt->fp_recon, &s_recon_buf); + if (IV_SUCCESS != status) + { + printf("Error: Unable to write to recon file\n"); + break; + } + } - for(comp = 0; comp < num_comp; comp++ ) + if (ps_app_ctxt->u4_psnr_enable) { - UWORD8 au1_chksum[16]; + compute_psnr(ps_app_ctxt, &s_recon_buf, &s_inp_buf); + } - calc_md5_cksum((UWORD8 *)s_recon_buf.apv_bufs[comp], - s_recon_buf.au4_strd[comp], - s_recon_buf.au4_wd[comp], - s_recon_buf.au4_ht[comp], - au1_chksum); - fwrite(au1_chksum, sizeof(UWORD8), 16, ps_app_ctxt->fp_chksum); + if (ps_app_ctxt->u4_chksum_enable) + { + WORD32 comp, num_comp = 2; + + if (IV_YUV_420P == s_recon_buf.e_color_fmt) + num_comp = 3; + + for (comp = 0; comp < num_comp; comp++) + { + UWORD8 au1_chksum[16]; + calc_md5_cksum((UWORD8 *)s_recon_buf.apv_bufs[comp], + s_recon_buf.au4_strd[comp], + s_recon_buf.au4_wd[comp], + s_recon_buf.au4_ht[comp], + au1_chksum); + fwrite(au1_chksum, sizeof(UWORD8), 16, ps_app_ctxt->fp_chksum); + } } } - - } } - else - { - if (ps_app_ctxt->u4_psnr_enable == 0) - { - UWORD8 u1_pic_type[][5] = { "IDR", "I", "P", "NA" }; - WORD32 lookup_idx = 0; - if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_IDR_FRAME) - { - lookup_idx = 0; - } - else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_I_FRAME) - { - lookup_idx = 1; - } - else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_P_FRAME) - { - lookup_idx = 2; - } - else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_NA_FRAME) - { - lookup_idx = 3; - } - - printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n", u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt, num_bytes, timetaken, ps_app_ctxt->avg_time, peak_avg_max); - } - else - { - read_input(ps_app_ctxt->fp_psnr_ip, &s_inp_buf); - } - } -#if 0 //Input buffer dump - /*Dump input buffers to a file*/ - dump_input(ps_app_ctxt->fp_dump_op, ps_inp_raw_buf); -#endif //Input buffer dump + u4_timestamp_low++; - if(is_last) + /* Break if all the encoded frames are taken from encoder */ + if (1 == ps_video_encode_op->u4_is_last) + { break; - - u4_timestamp_low++; + } } - /* Pic count is 1 more than actual num frames encoded, beacause last call is to just get the output */ + /* Pic count is 1 more than actual num frames encoded, because last call is to just get the output */ ps_app_ctxt->u4_pics_cnt--; if(ps_app_ctxt->u4_psnr_enable) @@ -2008,9 +2058,10 @@ int main(int argc, char *argv[]) /* error status */ IV_STATUS_T status = IV_SUCCESS; - +#ifdef IOS /* temp var */ CHAR filename_with_path[STRLENGTH]; +#endif WORD32 num_mem_recs; iv_obj_t *ps_enc; WORD32 i; @@ -2247,7 +2298,7 @@ int main(int argc, char *argv[]) s_init_ip.s_ive_ip.e_rc_mode = s_app_ctxt.u4_rc; s_init_ip.s_ive_ip.u4_max_framerate = s_app_ctxt.u4_max_frame_rate; s_init_ip.s_ive_ip.u4_max_bitrate = s_app_ctxt.u4_max_bitrate; - s_init_ip.s_ive_ip.u4_max_num_bframes = DEFAULT_B_FRAMES; + s_init_ip.s_ive_ip.u4_num_bframes = s_app_ctxt.u4_num_bframes; s_init_ip.s_ive_ip.e_content_type = IV_PROGRESSIVE; s_init_ip.s_ive_ip.u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X; s_init_ip.s_ive_ip.u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y; @@ -2443,10 +2494,13 @@ int main(int argc, char *argv[]) WORD32 achieved_bitrate; if(s_app_ctxt.u4_pics_cnt != 0) + { bytes_per_frame = (s_app_ctxt.u4_total_bytes) / (s_app_ctxt.u4_pics_cnt); + } else + { bytes_per_frame = 0; - + } bytes_per_second = (bytes_per_frame * s_app_ctxt.u4_tgt_frame_rate); achieved_bitrate = bytes_per_second * 8; diff --git a/test/encoder/recon.c b/test/encoder/recon.c index ed63aac..9cb3394 100644 --- a/test/encoder/recon.c +++ b/test/encoder/recon.c @@ -54,7 +54,7 @@ IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf) { WORD32 bytes; - WORD32 wd, ht, strd; + WORD32 wd, ht; UWORD8 *pu1_buf; WORD32 i; WORD32 comp; @@ -68,7 +68,6 @@ IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf) { wd = ps_raw_buf->au4_wd[comp]; ht = ps_raw_buf->au4_ht[comp]; - strd = ps_raw_buf->au4_strd[comp]; pu1_buf = ps_raw_buf->apv_bufs[comp]; for(i = 0; i < ht; i++) { @@ -163,7 +162,7 @@ void init_raw_buf_descr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_raw_buf, UWORD /* All the pointers and dimensions are initialized here * to support change in resolution from the application */ - luma_size = ALIGN16(ps_app_ctxt->u4_wd) * ALIGN16(ps_app_ctxt->u4_ht); + luma_size = ALIGN16(ps_app_ctxt->u4_max_wd) * ALIGN16(ps_app_ctxt->u4_max_ht); chroma_size = (luma_size) / 4; ps_raw_buf->apv_bufs[0] = pu1_buf; -- cgit v1.2.3