summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s16
-rw-r--r--common/arm/ih264_iquant_itrans_recon_a9.s3
-rw-r--r--common/arm/ih264_iquant_itrans_recon_dc_a9.s3
-rw-r--r--common/arm/ih264_itrans_recon_a9.s216
-rw-r--r--common/armv8/ih264_deblk_chroma_av8.s2
-rw-r--r--common/armv8/ih264_deblk_luma_av8.s8
-rw-r--r--common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s24
-rw-r--r--common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s28
-rw-r--r--common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s24
-rw-r--r--common/armv8/ih264_intra_pred_chroma_av8.s6
-rw-r--r--common/armv8/ih264_intra_pred_luma_16x16_av8.s2
-rw-r--r--common/armv8/ih264_intra_pred_luma_8x8_av8.s6
-rw-r--r--common/armv8/ih264_mem_fns_neon_av8.s6
-rw-r--r--common/armv8/ih264_resi_trans_quant_av8.s2
-rw-r--r--common/armv8/ih264_weighted_bi_pred_av8.s56
-rw-r--r--common/armv8/ih264_weighted_pred_av8.s28
-rw-r--r--common/ih264_chroma_intra_pred_filters.c2
-rw-r--r--common/ih264_defs.h3
-rw-r--r--common/ih264_itrans_recon.h71
-rw-r--r--common/ih264_structs.h10
-rw-r--r--common/ithread.c398
-rw-r--r--common/ithread.h3
-rw-r--r--common/x86/ih264_chroma_intra_pred_filters_ssse3.c36
-rw-r--r--common/x86/ih264_inter_pred_filters_ssse3.c158
-rw-r--r--common/x86/ih264_iquant_itrans_recon_dc_ssse3.c57
-rw-r--r--common/x86/ih264_iquant_itrans_recon_sse42.c31
-rw-r--r--common/x86/ih264_iquant_itrans_recon_ssse3.c4
-rw-r--r--common/x86/ih264_luma_intra_pred_filters_ssse3.c288
-rw-r--r--common/x86/ih264_padding_ssse3.c26
-rw-r--r--common/x86/ih264_weighted_pred_sse42.c84
-rw-r--r--decoder.arm.mk6
-rw-r--r--decoder.arm64.mk3
-rw-r--r--decoder.mk4
-rw-r--r--decoder.x86.mk2
-rw-r--r--decoder.x86_64.mk3
-rw-r--r--decoder/ih264d_api.c102
-rw-r--r--decoder/ih264d_error_handler.h6
-rw-r--r--decoder/ih264d_function_selector.h4
-rw-r--r--decoder/ih264d_parse_headers.c17
-rw-r--r--decoder/ih264d_parse_islice.c14
-rw-r--r--decoder/ih264d_parse_pslice.c40
-rw-r--r--decoder/ih264d_parse_slice.c154
-rw-r--r--decoder/ih264d_process_intra_mb.c20
-rw-r--r--decoder/ih264d_structs.h5
-rw-r--r--decoder/ih264d_thread_parse_decode.c2
-rw-r--r--decoder/ih264d_utils.c10
-rw-r--r--encoder.arm.mk10
-rw-r--r--encoder.arm64.mk6
-rw-r--r--encoder.mk7
-rw-r--r--encoder.x86.mk2
-rw-r--r--encoder.x86_64.mk2
-rw-r--r--encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s7
-rw-r--r--encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s3
-rw-r--r--encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s1
-rw-r--r--encoder/arm/ih264e_fmt_conv.s39
-rw-r--r--encoder/arm/ih264e_function_selector.c7
-rw-r--r--encoder/arm/ih264e_function_selector_a9q.c291
-rw-r--r--encoder/arm/ih264e_function_selector_av8.c21
-rw-r--r--encoder/arm/ih264e_half_pel.s1
-rw-r--r--encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s20
-rw-r--r--encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s19
-rw-r--r--encoder/armv8/ih264e_half_pel_av8.s245
-rw-r--r--encoder/ih264e_api.c190
-rw-r--r--encoder/ih264e_bitstream.c1
-rw-r--r--encoder/ih264e_bitstream.h65
-rw-r--r--encoder/ih264e_cabac.c819
-rw-r--r--encoder/ih264e_cabac.h452
-rw-r--r--encoder/ih264e_cabac_encode.c2391
-rw-r--r--encoder/ih264e_cabac_init.c226
-rw-r--r--encoder/ih264e_cabac_structs.h221
-rw-r--r--encoder/ih264e_cavlc.c335
-rw-r--r--encoder/ih264e_cavlc.h44
-rw-r--r--encoder/ih264e_core_coding.c9
-rw-r--r--encoder/ih264e_deblk.c178
-rw-r--r--encoder/ih264e_defs.h61
-rw-r--r--encoder/ih264e_encode.c453
-rw-r--r--encoder/ih264e_encode_header.c93
-rw-r--r--encoder/ih264e_error.h5
-rw-r--r--encoder/ih264e_fmt_conv.c5
-rw-r--r--encoder/ih264e_function_selector_generic.c30
-rw-r--r--encoder/ih264e_globals.c182
-rw-r--r--encoder/ih264e_half_pel.c1
-rw-r--r--encoder/ih264e_intra_modes_eval.c11
-rw-r--r--encoder/ih264e_mc.c355
-rw-r--r--encoder/ih264e_me.c1743
-rw-r--r--encoder/ih264e_me.h441
-rw-r--r--encoder/ih264e_modify_frm_rate.c5
-rw-r--r--encoder/ih264e_process.c545
-rw-r--r--encoder/ih264e_process.h30
-rw-r--r--encoder/ih264e_rate_control.c141
-rw-r--r--encoder/ih264e_rate_control.h35
-rw-r--r--encoder/ih264e_rc_mem_interface.c10
-rw-r--r--encoder/ih264e_structs.h187
-rw-r--r--encoder/ih264e_time_stamp.c7
-rw-r--r--encoder/ih264e_utils.c606
-rw-r--r--encoder/ih264e_utils.h39
-rw-r--r--encoder/ih264e_version.c19
-rw-r--r--encoder/ime.c238
-rw-r--r--encoder/ime.h51
-rw-r--r--encoder/ime_defs.h3
-rw-r--r--encoder/ime_distortion_metrics.c1
-rw-r--r--encoder/ime_distortion_metrics.h2
-rw-r--r--encoder/ime_structs.h37
-rw-r--r--encoder/irc_bit_allocation.c2
-rw-r--r--encoder/irc_cbr_buffer_control.c2
-rw-r--r--encoder/irc_common.h2
-rw-r--r--encoder/irc_est_sad.c2
-rw-r--r--encoder/irc_fixed_point_error_bits.c2
-rw-r--r--encoder/irc_mb_model_based.c2
-rw-r--r--encoder/irc_picture_type.c79
-rw-r--r--encoder/irc_picture_type.h1
-rw-r--r--encoder/irc_rate_control_api.c37
-rw-r--r--encoder/irc_rate_control_api.h1
-rw-r--r--encoder/irc_rd_model.c13
-rw-r--r--encoder/irc_vbr_storage_vbv.c2
-rw-r--r--encoder/ithread.h101
-rw-r--r--encoder/ive2.h17
-rw-r--r--encoder/mips/ih264e_function_selector.c7
-rw-r--r--encoder/x86/ih264e_function_selector.c7
-rw-r--r--encoder/x86/ih264e_function_selector_sse42.c13
-rw-r--r--encoder/x86/ih264e_function_selector_ssse3.c13
-rw-r--r--encoder/x86/ih264e_half_pel_ssse3.c1
-rw-r--r--encoder/x86/ih264e_intra_modes_eval_ssse3.c6
-rw-r--r--encoder/x86/ime_distortion_metrics_sse42.c4
-rw-r--r--test/Android.mk1
-rw-r--r--test/decoder/main.c145
-rw-r--r--test/encoder/app.h19
-rw-r--r--test/encoder/input.c1
-rw-r--r--test/encoder/main.c340
-rw-r--r--test/encoder/output.c1
-rw-r--r--test/encoder/psnr.c1
-rw-r--r--test/encoder/recon.c6
132 files changed, 9704 insertions, 4065 deletions
diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
index 54183f0..2066a20 100644
--- a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
@@ -177,7 +177,7 @@ loop_16:
vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
vqrshrun.s32 d22, q1, #10
vqrshrun.s32 d23, q15, #10
- vqshrun.s16 d22, q11, #0
+ vqmovun.s16 d22, q11
vst1.u8 {d22}, [r1], r10 @//Store dest row0, column 1; (1/2,1/2)
vext.16 q11, q13, q14, #2 @//extract a[2] (column2)
vaddl.s16 q1, d20, d26 @// a0 + a5 (column2)
@@ -196,7 +196,7 @@ loop_16:
vqrshrun.s32 d20, q1, #10
vqrshrun.s32 d21, q15, #10
vld1.u32 {d2, d3, d4}, [r0], r2 @ Vector load from src[6_0]
- vqshrun.s16 d22, q10, #0
+ vqmovun.s16 d22, q10
vst1.u8 {d22}, [r1], r7 @//Store dest row0 ,column 2; (1/2,1/2)
@ vERTICAL FILTERING FOR ROW 1
@@ -236,7 +236,7 @@ loop_16:
vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
vqrshrun.s32 d22, q3, #10
vqrshrun.s32 d23, q15, #10
- vqshrun.s16 d22, q11, #0
+ vqmovun.s16 d22, q11
vst1.u8 {d22}, [r1], r10 @//Store dest row1, column 1; (1/2,1/2)
vext.16 q11, q13, q14, #2 @//extract a[2] (column2)
vaddl.s16 q3, d20, d26 @// a0 + a5 (column2)
@@ -254,7 +254,7 @@ loop_16:
vmlsl.s16 q15, d21, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2)
vqrshrun.s32 d20, q3, #10
vqrshrun.s32 d21, q15, #10
- vqshrun.s16 d22, q10, #0
+ vqmovun.s16 d22, q10
vst1.u8 {d22}, [r1], r7 @//Store dest row1 ,column 2; (1/2,1/2)
subs r8, r8, #2 @ 2 rows processed, decrement by 2
@@ -315,7 +315,7 @@ loop_8:
vaddl.u8 q15, d7, d13 @ temp2 = src[1_0] + src4_0]
vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20
vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5
- vqshrun.s16 d2, q9, #0
+ vqmovun.s16 d2, q9
@ vERTICAL FILTERING FOR ROW 1
@Q12,Q13 HAVE VERTICAL FILTERED VALUES
@@ -338,7 +338,7 @@ loop_8:
vmlsl.s16 q15, d5, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
vqrshrun.s32 d18, q14, #10
vqrshrun.s32 d19, q15, #10
- vqshrun.s16 d3, q9, #0
+ vqmovun.s16 d3, q9
vst1.u8 {d3}, [r1], r3 @//Store dest row1, column 1; (1/2,1/2)
subs r8, r8, #2 @ 2 rows processed, decrement by 2
@@ -398,7 +398,7 @@ loop_4:
vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20
vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5
vaddl.u8 q15, d7, d13 @ temp2 = src[1_0] + src4_0]
- vqshrun.s16 d2, q9, #0
+ vqmovun.s16 d2, q9
vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20
vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5
@@ -424,7 +424,7 @@ loop_4:
vmlsl.s16 q15, d5, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
vqrshrun.s32 d18, q14, #10
vqrshrun.s32 d19, q15, #10
- vqshrun.s16 d4, q9, #0
+ vqmovun.s16 d4, q9
vst1.u32 {d4[0]}, [r1], r3 @//Store dest row1, column 1; (1/2,1/2)
subs r8, r8, #2 @ 2 rows processed, decrement by 2
diff --git a/common/arm/ih264_iquant_itrans_recon_a9.s b/common/arm/ih264_iquant_itrans_recon_a9.s
index 4e49f6a..a6af1cb 100644
--- a/common/arm/ih264_iquant_itrans_recon_a9.s
+++ b/common/arm/ih264_iquant_itrans_recon_a9.s
@@ -106,6 +106,7 @@
@r8 => iq_start_idx
@r10=> pi2_dc_ld_addr
.text
+.syntax unified
.p2align 2
.global ih264_iquant_itrans_recon_4x4_a9
@@ -141,7 +142,7 @@ ih264_iquant_itrans_recon_4x4_a9:
vmul.s16 q11, q11, q14 @x[i]=(scale[i] * dequant[i]) where i = 8..15
subs r8, r8, #1 @ if r8 == 1 => intra case , so result of subtraction is zero and Z flag is set
- ldreqsh r9, [r10] @ Loads signed halfword pi2_dc_ld_addr[0], if r8==1
+ ldrsheq r9, [r10] @ Loads signed halfword pi2_dc_ld_addr[0], if r8==1
vmull.s16 q0, d16, d20 @ Q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3
vmull.s16 q1, d17, d21 @ Q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7
diff --git a/common/arm/ih264_iquant_itrans_recon_dc_a9.s b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
index 97c4724..d12665f 100644
--- a/common/arm/ih264_iquant_itrans_recon_dc_a9.s
+++ b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
@@ -108,6 +108,7 @@
@unused => pi2_dc_ld_addr
.text
+.syntax unified
.p2align 2
.global ih264_iquant_itrans_recon_4x4_dc_a9
@@ -136,7 +137,7 @@ ih264_iquant_itrans_recon_4x4_dc_a9:
asr r6, r6, #4 @q0 = (pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0] + rnd_fact)<<(u4_qp_div_6-4)
subs r9, r9, #1 @ if r8 == 1 => intra case , so result of subtraction is zero and Z flag is set
- ldreqsh r10, [r0] @ Loads signed halfword pi2_src[0], if r9==1
+ ldrsheq r10, [r0] @ Loads signed halfword pi2_src[0], if r9==1
moveq r6, r10 @ Restore dc value in case of intra, i.e. r9 == 1
add r6, r6, #32 @i_macro = q0 + 32
diff --git a/common/arm/ih264_itrans_recon_a9.s b/common/arm/ih264_itrans_recon_a9.s
deleted file mode 100644
index 769d5d7..0000000
--- a/common/arm/ih264_itrans_recon_a9.s
+++ /dev/null
@@ -1,216 +0,0 @@
-@/******************************************************************************
-@ *
-@ * Copyright (C) 2015 The Android Open Source Project
-@ *
-@ * Licensed under the Apache License, Version 2.0 (the "License");
-@ * you may not use this file except in compliance with the License.
-@ * You may obtain a copy of the License at:
-@ *
-@ * http://www.apache.org/licenses/LICENSE-2.0
-@ *
-@ * Unless required by applicable law or agreed to in writing, software
-@ * distributed under the License is distributed on an "AS IS" BASIS,
-@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ * See the License for the specific language governing permissions and
-@ * limitations under the License.
-@ *
-@ *****************************************************************************
-@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
-@*/
-@**
-@ *******************************************************************************
-@ * @file
-@ * ih264_itrans_recon_neon_a9.s
-@ *
-@ * @brief
-@ * Contains function definitions for single stage inverse transform
-@ *
-@ *
-@ * @par List of Functions:
-@ * - ih264_itrans_recon_4x4_a9()
-@ *
-@ * @remarks
-@ * None
-@ *
-@ *******************************************************************************
-@*
-@**
-@ *******************************************************************************
-@ *
-@ * @brief
-@ * This function performs Inverse transform type Ci4 for 4*4 block
-@ *
-@ * @par Description:
-@ * Performs inverse transform Ci4 and adds the residue to get the
-@ * reconstructed block
-@ *
-@ * @param[in] pi16_levelBlock
-@ * Input 4x4 coefficients
-@ *
-@ * @param[in] puc_predBuffer
-@ * Prediction 4x4 block
-@ *
-@ * @param[out] puc_reconPic
-@ * Output 4x4 block
-@ *
-@ * @param[in] ui16_picWidth
-@ * Input stride
-@ *
-@ * @param[in] pred_strd
-@ * Prediction stride
-@ *
-@ * @param[in] dst_strd
-@ * Output Stride
-@ *
-@ * @param[in] zero_cols
-@ * Zero columns in pi2_src
-@ *
-@ * @returns Void
-@ *
-@ * @remarks
-@ * None
-@ *
-@ *
-@ *******************************************************************************
-@ *
-@void ih264_itrans_recon_4x4(
-@ WORD16 *pi2_src,
-@ UWORD8 *pu1_pred,
-@ UWORD8 *pu1_recon,
-@ WORD32 src_strd,
-@ WORD32 pred_strd,
-@ WORD32 dst_strd,
-@ UWORD32 q_lev, //quantizer level
-@ WORD32 *pi4_tmp)
-@**************Variables Vs Registers*****************************************
-@r0 => *pi2_src
-@r1 => *pu1_pred
-@r2 => *pu1_recon
-@r3 => src_strd
-@r4 => pred_strd
-@r5 => dst_strd
-@r6 => q_lev
-@r7 => *pi4_tmp
-
-.text
-.p2align 2
-
-
- .global ih264_itrans_recon_4x4_a9
-
-ih264_itrans_recon_4x4_a9:
- stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
- lsl r3, r3, #1
-
- vld1.16 d0, [r0], r3 @0th row pi2_src_tmp[0]
- ldr r4, [sp, #40] @Loads pred_strd
-
- vld1.16 d1, [r0], r3 @I row pi2_src_tmp[0]
- ldr r5, [sp, #44] @Loads *dst_strd
-
- vld1.16 d2, [r0], r3 @II row pi2_src_tmp[0]
-
- vld1.16 d3, [r0] @III row pi2_src_tmp[0]
- ldr r7, [sp, #52] @Loads *pi4_tmp
-
- vpush {d8-d15}
-
- vtrn.16 d0, d1 @Transpose to get all the 0th element in the single D register
- vtrn.16 d2, d3
- vtrn.32 d0, d2
- vtrn.32 d1, d3 @D0 --> pi2_src_tmp[0], D1 --> pi2_src_tmp[1]
- @D2 --> pi2_src_tmp[2], D3 --> pi2_src_tmp[3]
-
- vaddl.s16 q3, d0, d2 @x0 = (pi2_src_tmp[0] + pi2_src_tmp[2])
- vsubl.s16 q4, d0, d2 @x1 = (pi2_src_tmp[0] - pi2_src_tmp[2])
- vshr.s16 d4, d1, #1 @pi2_src_tmp[1] >> 1
- vshr.s16 d5, d3, #1 @pi2_src_tmp[3] >> 1
-
- vsubl.s16 q5, d4, d3 @x2 = D_SHIFT(pi2_src_tmp[1],1,shft) - pi2_src_tmp[3]
-
- vaddl.s16 q6, d1, d5 @x3 = pi2_src_tmp[1] + D_SHIFT(pi2_src_tmp[3],1,shft)
-
- vadd.s32 q8, q4, q5 @x1 + x2
- vsub.s32 q9, q4, q5 @x1 - x2
-
- vadd.s32 q7, q3, q6 @x0 + x3
- vsub.s32 q10, q3, q6 @x0 - x3
-
- vtrn.32 q7, q8 @Transpose the register to have the adjacent values
-
- vtrn.32 q9, q10
- vadd.s32 d6, d14, d15 @x0(0,1) = (pi4_tblk[0,1] + pi4_tblk[8,9])
-
- vsub.s32 d7, d14, d15 @x1(0,1) = (pi4_tblk[0,1] - pi4_tblk[8,9])
-
- vshr.s32 d4, d16, #1 @pi4_tblk[4,5] >> 1
- vshr.s32 d5, d17, #1 @pi4_tblk[12,13] >> 1
-
- vsub.s32 d8, d4, d17 @x2(0,1) = D_SHIFT(pi4_tblk[4,5],1,shft) - pi4_tblk[12,13]
- vadd.s32 d9, d16, d5 @x3(0,1) = pi4_tblk[4,5] + D_SHIFT(pi4_tblk[12,13],1,shft)
-
- vadd.s32 d10, d18, d19 @x0(2,3) = (pi4_tblk[2,3] + pi4_tblk[10,11])
- vsub.s32 d11, d18, d19 @x1(2,3) = (pi4_tblk[2,3] - pi4_tblk[10,11])
- vshr.s32 d4, d20, #1 @pi4_tblk[6,7] >> 1
- vshr.s32 d5, d21, #1 @pi4_tblk[14,15] >> 1
-
- vld1.32 d30[0], [r1], r4 @I row Load pu1_pred buffer
- vsub.s32 d12, d4, d21 @x2(2,3) = D_SHIFT(pi4_tblk[6,7],1,shft) - pi4_tblk[14,15]
-
- vmovl.u8 q15, d30 @I row Convert 8 bit pred buffer to 16 bit
- vadd.s32 d13, d20, d5 @x3(2,3) = pi4_tblk[6,7] + D_SHIFT(pi4_tblk[14,15],1,shft)
-
- vadd.s32 d16, d6, d9 @I row i_macro(0,1) = x0(0,1) + x3(0,1)
-
- vld1.32 d28[0], [r1], r4 @II row Load pu1_pred buffer
- vadd.s32 d17, d10, d13 @I row i_macro(2,3) = x0(2,3) + x3(2,3)
-
- vqrshrn.s32 d16, q8, #6 @I row i_macro = D_SHIFT(i_macro,6,shft)
-
- vmovl.u8 q14, d28 @II row Convert 8 bit pred buffer to 16 bit
- vadd.u16 d16, d16, d30 @I row i_macro += *pu1_pred_tmp
-
- vqmovun.s16 d16, q8 @I row CLIP_U8(i_macro)
- vadd.s32 d18, d7, d8 @II row i_macro(0,1) = x1(0,1) + x2(0,1)
-
- vld1.32 d26[0], [r1], r4 @III row Load pu1_pred buffer
- vadd.s32 d19, d11, d12 @II row i_macro(2,3) = x1(2,3) + x2(2,3)
-
- vqrshrn.s32 d18, q9, #6 @II row i_macro = D_SHIFT(i_macro,6,shft)
-
- vmovl.u8 q13, d26 @III row Convert 8 bit pred buffer to 16 bit
- vadd.u16 d18, d18, d28 @II row i_macro += *pu1_pred_tmp
-
- vst1.32 d16[0], [r2], r5 @I row store the value
- vsub.s32 d20, d7, d8 @III row i_macro(0,1) = x1(0,1) - x2(0,1)
-
- vqmovun.s16 d18, q9 @II row CLIP_U8(i_macro)
- vsub.s32 d21, d11, d12 @III row i_macro(2,3) = x1(2,3) - x2(2,3)
-
- vld1.32 d24[0], [r1], r4 @IV row Load pu1_pred buffer
- vqrshrn.s32 d20, q10, #6 @III row i_macro = D_SHIFT(i_macro,6,shft)
-
- vmovl.u8 q12, d24 @IV row Convert 8 bit pred buffer to 16 bit
- vadd.u16 d20, d20, d26 @III row i_macro += *pu1_pred_tmp
-
- vqmovun.s16 d20, q10 @III row CLIP_U8(i_macro)
- vsub.s32 d22, d6, d9 @IV row i_macro(0,1) = x0(0,1) - x3(0,1)
-
- vst1.32 d18[0], [r2], r5 @II row store the value
- vsub.s32 d23, d10, d13 @IV row i_macro(2,3) = x0(2,3) - x3(2,3)
-
- vqrshrn.s32 d22, q11, #6 @IV row i_macro = D_SHIFT(i_macro,6,shft)
-
- vst1.32 d20[0], [r2], r5 @III row store the value
- vadd.u16 d22, d22, d24 @IV row i_macro += *pu1_pred_tmp
-
- vqmovun.s16 d22, q11 @IV row CLIP_U8(i_macro)
- vst1.32 d22[0], [r2], r5 @IV row store the value
-
-
- vpop {d8-d15}
- ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
-
-
-
-
diff --git a/common/armv8/ih264_deblk_chroma_av8.s b/common/armv8/ih264_deblk_chroma_av8.s
index 3021556..a4dbd23 100644
--- a/common/armv8/ih264_deblk_chroma_av8.s
+++ b/common/armv8/ih264_deblk_chroma_av8.s
@@ -337,7 +337,7 @@ ih264_deblk_chroma_horz_bslt4_av8:
ldr x9, [sp, #80]
sub x0, x0, x1, lsl #1 //x0 = uc_edgePixelU pointing to p1 of chroma U
rev w7, w7 //
- mov v12.2s[0], w7 //D12[0] = ui_Bs
+ mov v12.s[0], w7 //D12[0] = ui_Bs
ld1 {v16.s}[0], [x8] //D16[0] contains cliptab_cb
ld1 {v17.s}[0], [x9] //D17[0] contains cliptab_cr
ld2 {v6.8b, v7.8b}, [x0], x1 //Q3=p1
diff --git a/common/armv8/ih264_deblk_luma_av8.s b/common/armv8/ih264_deblk_luma_av8.s
index bcdb03f..1b3950d 100644
--- a/common/armv8/ih264_deblk_luma_av8.s
+++ b/common/armv8/ih264_deblk_luma_av8.s
@@ -97,7 +97,7 @@ ih264_deblk_luma_horz_bslt4_av8:
sub x0, x0, x1 //x0 pointer to p2
rev w4, w4 //
ld1 {v10.8b, v11.8b}, [x0], x1 //p2 values are loaded into q5
- mov v12.2s[0], w4 //d12[0] = ui_Bs
+ mov v12.s[0], w4 //d12[0] = ui_Bs
mov x6, x0 //keeping backup of pointer to p1
ld1 {v8.8b, v9.8b}, [x0], x1 //p1 values are loaded into q4
mov x7, x0 //keeping backup of pointer to p0
@@ -364,8 +364,8 @@ ih264_deblk_luma_horz_bs4_av8:
mov v26.d[1] , v27.d[0]
mov v2.d[1] , v3.d[0]
uaddl v16.8h, v31.8b, v25.8b //p2+p3 H
- mla v12.8h, v8.8h , v1.4h[0] //(p0+q0+p1)+3*p2+2*p3 L
- mla v4.8h, v16.8h , v1.4h[0] //(p0+q0+p1)+3*p2+2*p3 H
+ mla v12.8h, v8.8h , v1.h[0] //(p0+q0+p1)+3*p2+2*p3 L
+ mla v4.8h, v16.8h , v1.h[0] //(p0+q0+p1)+3*p2+2*p3 H
bic v16.16b, v20.16b , v18.16b //((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta))
mov v17.d[0] , v16.d[1] //&& (Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
bit v2.16b, v28.16b , v20.16b //choosing between po' and p0"
@@ -443,7 +443,7 @@ ih264_deblk_luma_vert_bslt4_av8:
ld1 {v4.8b}, [x0], x1 //row3
rev w12, w12 //reversing ui_bs
ld1 {v6.8b}, [x0], x1 //row4
- mov v18.2s[0], w12 //d12[0] = ui_Bs
+ mov v18.s[0], w12 //d12[0] = ui_Bs
ld1 {v16.s}[0], [x14] //D16[0] contains cliptab
ld1 {v8.8b}, [x0], x1 //row5
uxtl v18.8h, v18.8b //q6 = uc_Bs in each 16 bt scalar
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
index 202c516..d2897b6 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
@@ -146,7 +146,7 @@ loop_16:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ext v24.16b, v20.16b , v22.16b , #4
ext v26.16b, v20.16b , v22.16b , #6
@@ -174,7 +174,7 @@ loop_16:
uqxtn v19.8b, v19.8h
uqxtn v25.8b, v25.8h
- mov v19.2s[1], v25.2s[0]
+ mov v19.s[1], v25.s[0]
uaddl v22.8h, v4.8b, v10.8b
ld1 {v0.2s, v1.2s}, [x0], #16 // Vector load from src[6_0]
@@ -228,7 +228,7 @@ loop_16:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ext v24.16b, v20.16b , v22.16b , #4
ext v26.16b, v20.16b , v22.16b , #6
@@ -253,7 +253,7 @@ loop_16:
uqxtn v19.8b, v19.8h
uqxtn v25.8b, v25.8h
- mov v19.2s[1], v25.2s[0]
+ mov v19.s[1], v25.s[0]
uaddl v22.8h, v6.8b, v0.8b
@@ -306,7 +306,7 @@ loop_16:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ext v24.16b, v20.16b , v22.16b , #4
@@ -334,7 +334,7 @@ loop_16:
uqxtn v19.8b, v19.8h
uqxtn v25.8b, v25.8h
- mov v19.2s[1], v25.2s[0]
+ mov v19.s[1], v25.s[0]
@@ -387,7 +387,7 @@ loop_16:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ext v24.16b, v20.16b , v22.16b , #4
@@ -427,7 +427,7 @@ loop_16:
uqxtn v19.8b, v19.8h
uqxtn v25.8b, v25.8h
- mov v19.2s[1], v25.2s[0]
+ mov v19.s[1], v25.s[0]
@@ -501,7 +501,7 @@ loop_8:
ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0]
uqxtn v25.8b, v12.8h
uqxtn v13.8b, v13.8h
- mov v25.2s[1], v13.2s[0]
+ mov v25.s[1], v13.s[0]
uaddl v16.8h, v8.8b, v10.8b
@@ -535,7 +535,7 @@ loop_8:
uaddl v28.8h, v9.8b, v11.8b
uqxtn v13.8b, v16.8h
uqxtn v17.8b, v17.8h
- mov v13.2s[1], v17.2s[0]
+ mov v13.s[1], v17.s[0]
uaddl v14.8h, v5.8b, v3.8b
@@ -576,7 +576,7 @@ loop_8:
mls v16.8h, v30.8h , v24.8h
uqxtn v27.8b, v12.8h
uqxtn v13.8b, v13.8h
- mov v27.2s[1], v13.2s[0]
+ mov v27.s[1], v13.s[0]
ext v22.16b, v28.16b , v16.16b , #10
@@ -616,7 +616,7 @@ loop_8:
subs x4, x4, #4
uqxtn v13.8b, v16.8h
uqxtn v17.8b, v17.8h
- mov v13.2s[1], v17.2s[0]
+ mov v13.s[1], v17.s[0]
mov v0.16b, v8.16b
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
index 38f971b..546c807 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
@@ -275,7 +275,7 @@ loop_16_lowhalf:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ld1 {v0.2s, v1.2s}, [x0], x2 // row 5 load for horizontal filter
@@ -313,7 +313,7 @@ loop_16_lowhalf:
uaddl v2.8h, v1.8b, v4.8b
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
add v30.8h, v14.8h , v16.8h
mls v8.8h, v2.8h , v24.8h
ld1 {v0.2s, v1.2s}, [x0], x2 // row 6 load for horizontal filter
@@ -355,7 +355,7 @@ loop_16_lowhalf:
mls v28.8h, v2.8h , v24.8h
uqxtn v27.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v27.2s[1], v19.2s[0]
+ mov v27.s[1], v19.s[0]
saddl v18.4s, v12.4h, v28.4h
saddl2 v6.4s, v12.8h, v28.8h
@@ -384,7 +384,7 @@ loop_16_lowhalf:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
mov v12.16b, v8.16b
mov v13.16b, v9.16b
@@ -523,7 +523,7 @@ loop_16_highhalf:
mls v20.8h, v2.8h , v24.8h
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ld1 {v0.2s, v1.2s}, [x8], x2
urhadd v26.8b, v18.8b , v26.8b
@@ -558,7 +558,7 @@ loop_16_highhalf:
uaddl v2.8h, v1.8b, v4.8b
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
add v30.8h, v14.8h , v16.8h
mls v8.8h, v2.8h , v24.8h
ld1 {v0.2s, v1.2s}, [x8], x2
@@ -598,7 +598,7 @@ loop_16_highhalf:
mls v28.8h, v2.8h , v24.8h
uqxtn v27.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v27.2s[1], v19.2s[0]
+ mov v27.s[1], v19.s[0]
saddl v18.4s, v12.4h, v28.4h
@@ -627,7 +627,7 @@ loop_16_highhalf:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
mov v12.16b, v8.16b
mov v13.16b, v9.16b
@@ -768,7 +768,7 @@ loop_8:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ld1 {v0.2s, v1.2s}, [x0], x2 // row 5 load for horizontal filter
@@ -812,7 +812,7 @@ loop_8:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
add v30.8h, v14.8h , v16.8h
mls v8.8h, v2.8h , v24.8h
@@ -855,7 +855,7 @@ loop_8:
uqxtn v27.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v27.2s[1], v19.2s[0]
+ mov v27.s[1], v19.s[0]
saddl v18.4s, v12.4h, v28.4h
saddl2 v6.4s, v12.8h, v28.8h
@@ -885,7 +885,7 @@ loop_8:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
mov v12.16b, v8.16b
@@ -1024,7 +1024,7 @@ loop_4:
sqrshrun v9.8b, v6.8h, #5
sqrshrun v7.8b, v7.8h, #5
- mov v9.2s[1], v7.2s[0]
+ mov v9.s[1], v7.s[0]
ext v20.8b, v18.8b , v19.8b , #2
@@ -1089,7 +1089,7 @@ loop_4:
sqrshrun v10.8b, v8.8h, #5
sqrshrun v9.8b, v9.8h, #5
- mov v10.2s[1], v9.2s[0]
+ mov v10.s[1], v9.s[0]
mov v12.8b, v28.8b
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
index b1e4866..3f3e297 100644
--- a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
@@ -209,7 +209,7 @@ loop_16:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ext v24.16b, v20.16b , v22.16b , #4
ext v26.16b, v20.16b , v22.16b , #6
@@ -238,7 +238,7 @@ loop_16:
uqxtn v19.8b, v19.8h
uqxtn v18.8b, v18.8h
- mov v19.2s[1], v18.2s[0]
+ mov v19.s[1], v18.s[0]
ld1 {v18.2s}, [x1]
sqrshrun v20.8b, v20.8h, #5
@@ -297,7 +297,7 @@ loop_16:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ext v24.16b, v20.16b , v22.16b , #4
ext v26.16b, v20.16b , v22.16b , #6
@@ -323,7 +323,7 @@ loop_16:
ld1 {v22.4s}, [x6], x7
uqxtn v19.8b, v19.8h
uqxtn v18.8b, v18.8h
- mov v19.2s[1], v18.2s[0]
+ mov v19.s[1], v18.s[0]
ld1 {v18.4s}, [x1]
sqrshrun v20.8b, v20.8h, #5
sqrshrun v21.8b, v22.8h, #5
@@ -380,7 +380,7 @@ loop_16:
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ext v24.16b, v20.16b , v22.16b , #4
@@ -409,7 +409,7 @@ loop_16:
uqxtn v19.8b, v19.8h
uqxtn v18.8b, v18.8h
- mov v19.2s[1], v18.2s[0]
+ mov v19.s[1], v18.s[0]
ld1 {v18.2s}, [x1]
sqrshrun v20.8b, v20.8h, #5
@@ -466,7 +466,7 @@ loop_16:
ld1 {v22.4s}, [x9], #16
uqxtn v18.8b, v18.8h
uqxtn v19.8b, v19.8h
- mov v18.2s[1], v19.2s[0]
+ mov v18.s[1], v19.s[0]
ext v24.16b, v20.16b , v22.16b , #4
@@ -506,7 +506,7 @@ loop_16:
uqxtn v19.8b, v19.8h
uqxtn v18.8b, v18.8h
- mov v19.2s[1], v18.2s[0]
+ mov v19.s[1], v18.s[0]
ld1 {v20.4s}, [x6], #16
ld1 {v22.4s}, [x6], x7
@@ -586,7 +586,7 @@ loop_8:
ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0]
uqxtn v25.8b, v12.8h
uqxtn v13.8b, v13.8h
- mov v25.2s[1], v13.2s[0]
+ mov v25.s[1], v13.s[0]
uaddl v16.8h, v8.8b, v10.8b
@@ -620,7 +620,7 @@ loop_8:
uaddl v28.8h, v9.8b, v11.8b
uqxtn v13.8b, v16.8h
uqxtn v17.8b, v17.8h
- mov v13.2s[1], v17.2s[0]
+ mov v13.s[1], v17.s[0]
urhadd v12.16b, v12.16b , v14.16b
urhadd v13.16b, v13.16b , v15.16b
@@ -662,7 +662,7 @@ loop_8:
mls v16.8h, v30.8h , v24.8h
uqxtn v27.8b, v12.8h
uqxtn v13.8b, v13.8h
- mov v27.2s[1], v13.2s[0]
+ mov v27.s[1], v13.s[0]
sqrshrun v14.8b, v14.8h, #5
ext v22.16b, v28.16b , v16.16b , #10
@@ -702,7 +702,7 @@ loop_8:
subs x4, x4, #4
uqxtn v13.8b, v16.8h
uqxtn v17.8b, v17.8h
- mov v13.2s[1], v17.2s[0]
+ mov v13.s[1], v17.s[0]
urhadd v12.16b, v12.16b , v14.16b
urhadd v13.16b, v13.16b , v15.16b
diff --git a/common/armv8/ih264_intra_pred_chroma_av8.s b/common/armv8/ih264_intra_pred_chroma_av8.s
index 2c5efb3..8f0f282 100644
--- a/common/armv8/ih264_intra_pred_chroma_av8.s
+++ b/common/armv8/ih264_intra_pred_chroma_av8.s
@@ -501,7 +501,7 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
add v16.8h, v0.8h , v16.8h
dup v20.8h, v22.h[0]
mul v4.8h, v6.8h , v20.8h
- dup v30.8h, v22.4h[1]
+ dup v30.8h, v22.h[1]
mul v18.8h, v6.8h , v20.8h
mul v14.8h, v6.8h , v30.8h
mul v8.8h, v6.8h , v30.8h
@@ -511,7 +511,7 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
sqrshrun v28.8b, v24.8h, #5
add v26.8h, v16.8h , v8.8h
sqrshrun v29.8b, v0.8h, #5
- dup v20.8h, v22.4h[2]
+ dup v20.8h, v22.h[2]
st1 {v28.8b, v29.8b}, [x1], x3
sqrshrun v28.8b, v2.8h, #5
sqrshrun v29.8b, v26.8h, #5
@@ -520,7 +520,7 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
st1 {v28.8b, v29.8b}, [x1], x3
add v24.8h, v12.8h , v4.8h
add v0.8h, v16.8h , v18.8h
- dup v30.8h, v22.4h[3]
+ dup v30.8h, v22.h[3]
sqrshrun v28.8b, v24.8h, #5
sqrshrun v29.8b, v0.8h, #5
mul v14.8h, v6.8h , v30.8h
diff --git a/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
index a9eb165..c1847b5 100644
--- a/common/armv8/ih264_intra_pred_luma_16x16_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
@@ -467,7 +467,7 @@ ih264_intra_pred_luma_16x16_mode_plane_av8:
ldrb w5, [x7], #-1
sxtw x5, w5
add x8, x8, x8, lsl #1
- dup v4.8h, v0.4h[0]
+ dup v4.8h, v0.h[0]
add x12, x12, x8
ldrb w9, [x0], #1
sxtw x9, w9
diff --git a/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
index 2b972ca..bf9a4c1 100644
--- a/common/armv8/ih264_intra_pred_luma_8x8_av8.s
+++ b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
@@ -337,7 +337,7 @@ ih264_intra_pred_luma_8x8_mode_dc_av8:
uaddlp v3.2s, v1.4h
uaddlp v2.1d, v3.2s
dup v10.8h, w5
- dup v8.8h, v2.4h[0]
+ dup v8.8h, v2.h[0]
add v12.8h, v8.8h , v10.8h
sqrshrun v31.8b, v12.8h, #4
st1 {v31.8b}, [x1], x3
@@ -360,7 +360,7 @@ top_available: // ONLT TOP AVAILABLE
uaddlp v13.2s, v14.4h
uaddlp v12.1d, v13.2s
rshrn v4.8b, v12.8h, #3
- dup v31.8b, v4.8b[0]
+ dup v31.8b, v4.b[0]
st1 {v31.8b}, [x1], x3
st1 {v31.8b}, [x1], x3
st1 {v31.8b}, [x1], x3
@@ -1059,7 +1059,7 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8:
mov v30.16b, v4.16b
mov v31.16b, v6.16b
tbl v12.8b, {v30.16b, v31.16b}, v10.8b
- dup v14.16b, v5.8b[7] //
+ dup v14.16b, v5.b[7] //
tbl v13.8b, {v30.16b, v31.16b}, v11.8b
mov v12.d[1], v13.d[0]
ext v16.16b, v12.16b , v14.16b , #2
diff --git a/common/armv8/ih264_mem_fns_neon_av8.s b/common/armv8/ih264_mem_fns_neon_av8.s
index f5c2e29..4e9020d 100644
--- a/common/armv8/ih264_mem_fns_neon_av8.s
+++ b/common/armv8/ih264_mem_fns_neon_av8.s
@@ -119,7 +119,7 @@ loop_neon_memcpy:
subs x2, x2, #8
bge loop_neon_memcpy
- cmp x2, #-8
+ cmn x2, #8
beq end_func1
arm_memcpy:
@@ -184,7 +184,7 @@ loop_neon_memset:
subs x2, x2, #8
bge loop_neon_memset
- cmp x2, #-8
+ cmn x2, #8
beq end_func2
arm_memset:
@@ -254,7 +254,7 @@ loop_neon_memset_16bit:
subs x2, x2, #8
bge loop_neon_memset_16bit
- cmp x2, #-8
+ cmn x2, #8
beq end_func3
arm_memset_16bit:
diff --git a/common/armv8/ih264_resi_trans_quant_av8.s b/common/armv8/ih264_resi_trans_quant_av8.s
index dc1c680..316c220 100644
--- a/common/armv8/ih264_resi_trans_quant_av8.s
+++ b/common/armv8/ih264_resi_trans_quant_av8.s
@@ -665,7 +665,7 @@ ih264_hadamard_quant_2x2_uv_av8:
ld2 {v0.4h-v1.4h}, [x0] //load src
ld1 {v30.h}[0], [x2] //load pu2_scale_matrix[0]
- dup v30.4h, v30.4h[0] //pu2_scale_matrix
+ dup v30.4h, v30.h[0] //pu2_scale_matrix
uxtl v30.4s, v30.4h //pu2_scale_matrix
neg w4, w4
diff --git a/common/armv8/ih264_weighted_bi_pred_av8.s b/common/armv8/ih264_weighted_bi_pred_av8.s
index 96ef50a..b039fba 100644
--- a/common/armv8/ih264_weighted_bi_pred_av8.s
+++ b/common/armv8/ih264_weighted_bi_pred_av8.s
@@ -173,10 +173,10 @@ loop_4: //each iteration processes four rows
ld1 {v10.s}[1], [x1], x4 //load row 4 in source 2
uxtl v8.8h, v8.8b //converting rows 3,4 in source 1 to 16-bit
uxtl v10.8h, v10.8b //converting rows 3,4 in source 2 to 16-bit
- mul v4.8h, v4.8h , v2.4h[0] //weight 1 mult. for rows 1,2
- mla v4.8h, v6.8h , v2.4h[2] //weight 2 mult. for rows 1,2
- mul v8.8h, v8.8h , v2.4h[0] //weight 1 mult. for rows 3,4
- mla v8.8h, v10.8h , v2.4h[2] //weight 2 mult. for rows 3,4
+ mul v4.8h, v4.8h , v2.h[0] //weight 1 mult. for rows 1,2
+ mla v4.8h, v6.8h , v2.h[2] //weight 2 mult. for rows 1,2
+ mul v8.8h, v8.8h , v2.h[0] //weight 1 mult. for rows 3,4
+ mla v8.8h, v10.8h , v2.h[2] //weight 2 mult. for rows 3,4
subs w11, w11, #4 //decrement ht by 4
srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from rows 1,2
srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from rows 3,4
@@ -205,18 +205,18 @@ loop_8: //each iteration processes four rows
ld1 {v18.8b}, [x1], x4 //load row 4 in source 2
uxtl v8.8h, v8.8b //converting row 2 in source 1 to 16-bit
uxtl v10.8h, v10.8b //converting row 2 in source 2 to 16-bit
- mul v4.8h, v4.8h , v2.4h[0] //weight 1 mult. for row 1
- mla v4.8h, v6.8h , v2.4h[2] //weight 2 mult. for row 1
+ mul v4.8h, v4.8h , v2.h[0] //weight 1 mult. for row 1
+ mla v4.8h, v6.8h , v2.h[2] //weight 2 mult. for row 1
uxtl v12.8h, v12.8b //converting row 3 in source 1 to 16-bit
uxtl v14.8h, v14.8b //converting row 3 in source 2 to 16-bit
- mul v8.8h, v8.8h , v2.4h[0] //weight 1 mult. for row 2
- mla v8.8h, v10.8h , v2.4h[2] //weight 2 mult. for row 2
+ mul v8.8h, v8.8h , v2.h[0] //weight 1 mult. for row 2
+ mla v8.8h, v10.8h , v2.h[2] //weight 2 mult. for row 2
uxtl v16.8h, v16.8b //converting row 4 in source 1 to 16-bit
uxtl v18.8h, v18.8b //converting row 4 in source 2 to 16-bit
- mul v12.8h, v12.8h , v2.4h[0] //weight 1 mult. for row 3
- mla v12.8h, v14.8h , v2.4h[2] //weight 2 mult. for row 3
- mul v16.8h, v16.8h , v2.4h[0] //weight 1 mult. for row 4
- mla v16.8h, v18.8h , v2.4h[2] //weight 2 mult. for row 4
+ mul v12.8h, v12.8h , v2.h[0] //weight 1 mult. for row 3
+ mla v12.8h, v14.8h , v2.h[2] //weight 2 mult. for row 3
+ mul v16.8h, v16.8h , v2.h[0] //weight 1 mult. for row 4
+ mla v16.8h, v18.8h , v2.h[2] //weight 2 mult. for row 4
srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from row 1
srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 2
srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 3
@@ -251,35 +251,35 @@ loop_16: //each iteration processes two rows
ld1 {v18.8b, v19.8b}, [x1], x4 //load row 4 in source 2
uxtl v4.8h, v5.8b //converting row 1H in source 1 to 16-bit
uxtl v6.8h, v7.8b //converting row 1H in source 2 to 16-bit
- mul v20.8h, v20.8h , v2.4h[0] //weight 1 mult. for row 1L
- mla v20.8h, v22.8h , v2.4h[2] //weight 2 mult. for row 1L
+ mul v20.8h, v20.8h , v2.h[0] //weight 1 mult. for row 1L
+ mla v20.8h, v22.8h , v2.h[2] //weight 2 mult. for row 1L
uxtl v24.8h, v8.8b //converting row 2L in source 1 to 16-bit
uxtl v26.8h, v10.8b //converting row 2L in source 2 to 16-bit
- mul v4.8h, v4.8h , v2.4h[0] //weight 1 mult. for row 1H
- mla v4.8h, v6.8h , v2.4h[2] //weight 2 mult. for row 1H
+ mul v4.8h, v4.8h , v2.h[0] //weight 1 mult. for row 1H
+ mla v4.8h, v6.8h , v2.h[2] //weight 2 mult. for row 1H
uxtl v8.8h, v9.8b //converting row 2H in source 1 to 16-bit
uxtl v10.8h, v11.8b //converting row 2H in source 2 to 16-bit
- mul v24.8h, v24.8h , v2.4h[0] //weight 1 mult. for row 2L
- mla v24.8h, v26.8h , v2.4h[2] //weight 2 mult. for row 2L
+ mul v24.8h, v24.8h , v2.h[0] //weight 1 mult. for row 2L
+ mla v24.8h, v26.8h , v2.h[2] //weight 2 mult. for row 2L
uxtl v28.8h, v12.8b //converting row 3L in source 1 to 16-bit
uxtl v30.8h, v14.8b //converting row 3L in source 2 to 16-bit
- mul v8.8h, v8.8h , v2.4h[0] //weight 1 mult. for row 2H
- mla v8.8h, v10.8h , v2.4h[2] //weight 2 mult. for row 2H
+ mul v8.8h, v8.8h , v2.h[0] //weight 1 mult. for row 2H
+ mla v8.8h, v10.8h , v2.h[2] //weight 2 mult. for row 2H
uxtl v12.8h, v13.8b //converting row 3H in source 1 to 16-bit
uxtl v14.8h, v15.8b //converting row 3H in source 2 to 16-bit
- mul v28.8h, v28.8h , v2.4h[0] //weight 1 mult. for row 3L
- mla v28.8h, v30.8h , v2.4h[2] //weight 2 mult. for row 3L
+ mul v28.8h, v28.8h , v2.h[0] //weight 1 mult. for row 3L
+ mla v28.8h, v30.8h , v2.h[2] //weight 2 mult. for row 3L
uxtl v22.8h, v16.8b //converting row 4L in source 1 to 16-bit
uxtl v6.8h, v18.8b //converting row 4L in source 2 to 16-bit
- mul v12.8h, v12.8h , v2.4h[0] //weight 1 mult. for row 3H
- mla v12.8h, v14.8h , v2.4h[2] //weight 2 mult. for row 3H
+ mul v12.8h, v12.8h , v2.h[0] //weight 1 mult. for row 3H
+ mla v12.8h, v14.8h , v2.h[2] //weight 2 mult. for row 3H
uxtl v16.8h, v17.8b //converting row 4H in source 1 to 16-bit
uxtl v18.8h, v19.8b //converting row 4H in source 2 to 16-bit
- mul v22.8h, v22.8h , v2.4h[0] //weight 1 mult. for row 4L
- mla v22.8h, v6.8h , v2.4h[2] //weight 2 mult. for row 4L
+ mul v22.8h, v22.8h , v2.h[0] //weight 1 mult. for row 4L
+ mla v22.8h, v6.8h , v2.h[2] //weight 2 mult. for row 4L
srshl v20.8h, v20.8h , v0.8h //rounds off the weighted samples from row 1L
- mul v16.8h, v16.8h , v2.4h[0] //weight 1 mult. for row 4H
- mla v16.8h, v18.8h , v2.4h[2] //weight 2 mult. for row 4H
+ mul v16.8h, v16.8h , v2.h[0] //weight 1 mult. for row 4H
+ mla v16.8h, v18.8h , v2.h[2] //weight 2 mult. for row 4H
srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from row 1H
srshl v24.8h, v24.8h , v0.8h //rounds off the weighted samples from row 2L
saddw v20.8h, v20.8h , v3.8b //adding offset for row 1L
diff --git a/common/armv8/ih264_weighted_pred_av8.s b/common/armv8/ih264_weighted_pred_av8.s
index ec5bb7a..69ed3b0 100644
--- a/common/armv8/ih264_weighted_pred_av8.s
+++ b/common/armv8/ih264_weighted_pred_av8.s
@@ -143,8 +143,8 @@ loop_4: //each iteration processes four rows
uxtl v4.8h, v4.8b //converting rows 1,2 to 16-bit
uxtl v6.8h, v6.8b //converting rows 3,4 to 16-bit
- mul v4.8h, v4.8h , v2.4h[0] //weight mult. for rows 1,2
- mul v6.8h, v6.8h , v2.4h[0] //weight mult. for rows 3,4
+ mul v4.8h, v4.8h , v2.h[0] //weight mult. for rows 1,2
+ mul v6.8h, v6.8h , v2.h[0] //weight mult. for rows 3,4
subs w7, w7, #4 //decrement ht by 4
srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from rows 1,2
@@ -175,11 +175,11 @@ loop_8: //each iteration processes four rows
uxtl v6.8h, v6.8b //converting row 2 to 16-bit
uxtl v8.8h, v8.8b //converting row 3 to 16-bit
- mul v4.8h, v4.8h , v2.4h[0] //weight mult. for row 1
+ mul v4.8h, v4.8h , v2.h[0] //weight mult. for row 1
uxtl v10.8h, v10.8b //converting row 4 to 16-bit
- mul v6.8h, v6.8h , v2.4h[0] //weight mult. for row 2
- mul v8.8h, v8.8h , v2.4h[0] //weight mult. for row 3
- mul v10.8h, v10.8h , v2.4h[0] //weight mult. for row 4
+ mul v6.8h, v6.8h , v2.h[0] //weight mult. for row 2
+ mul v8.8h, v8.8h , v2.h[0] //weight mult. for row 3
+ mul v10.8h, v10.8h , v2.h[0] //weight mult. for row 4
srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from row 1
srshl v6.8h, v6.8h , v0.8h //rounds off the weighted samples from row 2
@@ -214,20 +214,20 @@ loop_16: //each iteration processes two rows
uxtl v14.8h, v5.8b //converting row 1H to 16-bit
ld1 {v10.8b, v11.8b}, [x0], x2 //load row 4 in source
uxtl v16.8h, v6.8b //converting row 2L to 16-bit
- mul v12.8h, v12.8h , v2.4h[0] //weight mult. for row 1L
+ mul v12.8h, v12.8h , v2.h[0] //weight mult. for row 1L
uxtl v18.8h, v7.8b //converting row 2H to 16-bit
- mul v14.8h, v14.8h , v2.4h[0] //weight mult. for row 1H
+ mul v14.8h, v14.8h , v2.h[0] //weight mult. for row 1H
uxtl v20.8h, v8.8b //converting row 3L to 16-bit
- mul v16.8h, v16.8h , v2.4h[0] //weight mult. for row 2L
+ mul v16.8h, v16.8h , v2.h[0] //weight mult. for row 2L
uxtl v22.8h, v9.8b //converting row 3H to 16-bit
- mul v18.8h, v18.8h , v2.4h[0] //weight mult. for row 2H
+ mul v18.8h, v18.8h , v2.h[0] //weight mult. for row 2H
uxtl v24.8h, v10.8b //converting row 4L to 16-bit
- mul v20.8h, v20.8h , v2.4h[0] //weight mult. for row 3L
+ mul v20.8h, v20.8h , v2.h[0] //weight mult. for row 3L
uxtl v26.8h, v11.8b //converting row 4H to 16-bit
- mul v22.8h, v22.8h , v2.4h[0] //weight mult. for row 3H
- mul v24.8h, v24.8h , v2.4h[0] //weight mult. for row 4L
+ mul v22.8h, v22.8h , v2.h[0] //weight mult. for row 3H
+ mul v24.8h, v24.8h , v2.h[0] //weight mult. for row 4L
srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 1L
- mul v26.8h, v26.8h , v2.4h[0] //weight mult. for row 4H
+ mul v26.8h, v26.8h , v2.h[0] //weight mult. for row 4H
srshl v14.8h, v14.8h , v0.8h //rounds off the weighted samples from row 1H
srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 2L
saddw v12.8h, v12.8h , v3.8b //adding offset for row 1L
diff --git a/common/ih264_chroma_intra_pred_filters.c b/common/ih264_chroma_intra_pred_filters.c
index ee145e5..1894bfc 100644
--- a/common/ih264_chroma_intra_pred_filters.c
+++ b/common/ih264_chroma_intra_pred_filters.c
@@ -117,7 +117,6 @@ void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
WORD32 top_avail; /* availability of top predictors (only for DC) */
UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
- UNUSED(src_strd);
/* temporary variables to store accumulated first left half,second left half,
* first top half,second top half of U and V values*/
@@ -127,6 +126,7 @@ void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
WORD32 val_u1 = 0, val_u2 = 0, val_v1 = 0, val_v2 = 0;
WORD32 col, row; /*loop variables*/
+ UNUSED(src_strd);
left_avail = ngbr_avail & 0x11;
left_avail1 = ngbr_avail & 1;
diff --git a/common/ih264_defs.h b/common/ih264_defs.h
index 6bf74d1..b26a5a4 100644
--- a/common/ih264_defs.h
+++ b/common/ih264_defs.h
@@ -270,6 +270,9 @@ typedef enum
P8x8 = 6,
PSKIP = 7,
IPCM = 8,
+ B16x16 = 9,
+ BSKIP = 10,
+ BDIRECT = 11,
MAX_MBTYPES,
}MBTYPES_T;
diff --git a/common/ih264_itrans_recon.h b/common/ih264_itrans_recon.h
deleted file mode 100644
index fd1f239..0000000
--- a/common/ih264_itrans_recon.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/******************************************************************************
- *
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- *****************************************************************************
- * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
-*/
-/**
-*******************************************************************************
-* @file
-* ih264_itrans_recon.h
-*
-* @brief
-* Contains function declarations for inverse transform and reconstruction of
-* the quantized macro blocks
-*
-* @author
-* Ittiam
-*
-* @par List of Functions:
-* - ih264_itrans_recon_ft
-* - ih264_itrans_recon_4x4
-* - ih264_itrans_recon_8x8
-* - ih264_itrans_recon_4x4_a9
-*
-*
-* @remarks
-* None
-*
-*******************************************************************************
-*/
-
-#ifndef IH264_ITRANS_RECON_H_
-#define IH264_ITRANS_RECON_H_
-
-/*****************************************************************************/
-/* Extern Function Declarations */
-/*****************************************************************************/
-
-typedef void ih264_itrans_recon_ft(WORD16 *pi2_src,
- UWORD8 *pu1_pred,
- UWORD8 *pu1_recon,
- WORD32 src_strd,
- WORD32 pred_strd,
- WORD32 dst_strd,
- UWORD32 q_lev,
- WORD32 *pi4_tmp);
-
-/*C declarations*/
-
-ih264_itrans_recon_ft ih264_itrans_recon_4x4;
-
-ih264_itrans_recon_ft ih264_itrans_recon_8x8;
-
-/*A9 declarations */
-
-ih264_itrans_recon_ft ih264_itrans_recon_4x4_a9;
-
-#endif /* IH264_ITRANS_RECON_H_ */
diff --git a/common/ih264_structs.h b/common/ih264_structs.h
index fa4e142..0a7c940 100644
--- a/common/ih264_structs.h
+++ b/common/ih264_structs.h
@@ -1353,6 +1353,11 @@ typedef struct
*/
UWORD8 u1_ref_idx_reordering_flag_l0;
+ /*
+ * ref_pic_list_reordering_flag_l1
+ */
+ UWORD8 u1_ref_idx_reordering_flag_l1;
+
/**
* Reference prediction list modification
*/
@@ -1369,11 +1374,6 @@ typedef struct
ref_list_t as_ref_pic_list1[MAX_DPB_SIZE];
/*
- * weighted_bipred_idc
- */
- WORD8 u1_weighted_bipred_idc;
-
- /*
* no_output_of_prior_pics_flag
*/
UWORD8 u1_no_output_of_prior_pics_flag;
diff --git a/common/ithread.c b/common/ithread.c
index f7335d9..d19bdec 100644
--- a/common/ithread.c
+++ b/common/ithread.c
@@ -38,12 +38,6 @@
#include <string.h>
#include "ih264_typedefs.h"
-/*
- * If the end target is bare metal, then there shall be no OS.
- * In this case, the functions ithread_* used inside the h264 encoder library to assist multicore
- * will not longer be functional. To resolve link issues, the functions are re-defined with no body.
- */
-#ifndef BAREMETAL
#include "ithread.h"
@@ -52,7 +46,6 @@
#define UNUSED(x) ((void)(x))
-#ifndef X86_MSVC
//#define PTHREAD_AFFINITY
//#define SYSCALL_AFFINITY
@@ -69,270 +62,6 @@
#include <sys/prctl.h>
#endif
-#endif
-
-#if defined(X86_MSVC) || defined (X86_MINGW)
-
-#include <windows.h>
-#define SEM_MAX_COUNT 100
-#define SEM_INCREMENT_COUNT 1
-
-UWORD32 ithread_get_handle_size(void)
-{
- return (sizeof(HANDLE));
-}
-
-UWORD32 ithread_get_mutex_lock_size(void)
-{
- return (sizeof(HANDLE));
-}
-
-WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument)
-{
- HANDLE *ppv_thread_handle;
- HANDLE thread_handle_value;
-
- UNUSED(attribute);
-
- if(0 == thread_handle)
- return -1;
-
- ppv_thread_handle = (HANDLE *)thread_handle;
- thread_handle_value = (void *)CreateThread
- (NULL, /* Attributes */
- 1024*128, /* Stack i4_size */
- (LPTHREAD_START_ROUTINE)strt, /* Thread function */
- argument, /* Parameters */
- 0, /* Creation flags */
- NULL); /* Thread ID */
- *ppv_thread_handle = (HANDLE)thread_handle_value;
-
- return 0;
-}
-
-WORD32 ithread_join(void *thread_handle, void ** val_ptr)
-{
- HANDLE *ppv_thread_handle;
- HANDLE thread_handle_value;
-
- UNUSED(val_ptr);
-
- if(0 == thread_handle)
- return -1;
-
- ppv_thread_handle = (HANDLE *)thread_handle;
- thread_handle_value = *ppv_thread_handle;
-
- if(WAIT_OBJECT_0 == WaitForSingleObject(thread_handle_value, INFINITE))
- {
- CloseHandle(thread_handle_value);
- }
-
- return 0;
-}
-
-void ithread_exit(void *thread_handle)
-{
- HANDLE *ppv_thread_handle;
- HANDLE thread_handle_value;
- DWORD thread_exit_code;
-
- if(0 == thread_handle)
- return;
-
- ppv_thread_handle = (HANDLE *)thread_handle;
- thread_handle_value = *ppv_thread_handle;
- /* Get exit code for thread. If the return value is 0, means thread is busy */
- if( 0 != GetExitCodeThread(thread_handle_value, &thread_exit_code))
- {
- TerminateThread(thread_handle_value, thread_exit_code);
- }
-
- return;
-}
-
-WORD32 ithread_get_mutex_struct_size(void)
-{
- return (sizeof(HANDLE));
-}
-
-WORD32 ithread_mutex_init(void *mutex)
-{
- HANDLE *ppv_mutex_handle;
- HANDLE mutex_handle_value;
-
- if(0 == mutex)
- return -1;
-
- ppv_mutex_handle = (HANDLE *)mutex;
- mutex_handle_value = CreateSemaphore(NULL, 1, 1, NULL);
- *ppv_mutex_handle = mutex_handle_value;
- return 0;
-}
-
-WORD32 ithread_mutex_destroy(void *mutex)
-{
- HANDLE *ppv_mutex_handle;
- HANDLE mutex_handle_value;
-
- if(0 == mutex)
- return -1;
-
- ppv_mutex_handle = (HANDLE *)mutex;
- mutex_handle_value = *ppv_mutex_handle;
- CloseHandle(mutex_handle_value);
- return 0;
-}
-
-WORD32 ithread_mutex_lock(void *mutex)
-{
- HANDLE *ppv_mutex_handle;
- HANDLE mutex_handle_value;
- DWORD result = 0;
-
- if(0 == mutex)
- return -1;
-
- ppv_mutex_handle = (HANDLE *)mutex;
- mutex_handle_value = *ppv_mutex_handle;
- result = WaitForSingleObject(mutex_handle_value, INFINITE);
-
- if(WAIT_OBJECT_0 == result)
- return 0;
-
- return 1;
-
-}
-
-WORD32 ithread_mutex_unlock(void *mutex)
-{
- HANDLE *ppv_mutex_handle;
- HANDLE mutex_handle_value;
- DWORD result = 0;
-
- if(0 == mutex)
- return -1;
-
- ppv_mutex_handle = (HANDLE *)mutex;
- mutex_handle_value = *ppv_mutex_handle;
- result = ReleaseSemaphore(mutex_handle_value, 1, NULL);
-
- if(0 == result)
- return -1;
-
- return 0;
-}
-
-void ithread_yield(void) { }
-
-void ithread_usleep(UWORD32 u4_time_us)
-{
- UWORD32 u4_time_ms = u4_time_us / 1000;
- Sleep(u4_time_ms);
-}
-
-void ithread_msleep(UWORD32 u4_time_ms)
-{
- Sleep(u4_time_ms);
-}
-
-void ithread_sleep(UWORD32 u4_time)
-{
- UWORD32 u4_time_ms = u4_time * 1000;
- Sleep(u4_time_ms);
-}
-
-UWORD32 ithread_get_sem_struct_size(void)
-{
- return (sizeof(HANDLE));
-}
-
-WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value)
-{
- HANDLE *sem_handle = (HANDLE *)sem;
- HANDLE sem_handle_value;
-
- if(0 == sem)
- return -1;
-
- sem_handle_value = CreateSemaphore(NULL, /* Security Attribute*/
- value, /* Initial count */
- SEM_MAX_COUNT,/* Max value */
- NULL); /* Name, not used */
- *sem_handle = sem_handle_value;
- return 0;
-}
-
-WORD32 ithread_sem_post(void *sem)
-{
- HANDLE *sem_handle = (HANDLE *)sem;
- HANDLE sem_handle_value;
-
- if(0 == sem)
- return -1;
-
- sem_handle_value = *sem_handle;
-
- /* Post on Semaphore by releasing the lock on mutex */
- if(ReleaseSemaphore(sem_handle_value, SEM_INCREMENT_COUNT, NULL))
- return 0;
-
- return -1;
-}
-
-WORD32 ithread_sem_wait(void *sem)
-{
- DWORD result = 0;
- HANDLE *sem_handle = (HANDLE *)sem;
- HANDLE sem_handle_value;
-
- if(0 == sem)
- return -1;
-
- sem_handle_value = *sem_handle;
-
- /* Wait on Semaphore object infinitly */
- result = WaitForSingleObject(sem_handle_value, INFINITE);
-
- /* If lock on semaphore is acquired, return SUCCESS */
- if(WAIT_OBJECT_0 == result)
- return 0;
-
- /* If call timeouts, return FAILURE */
- if(WAIT_TIMEOUT == result)
- return -1;
-
- return 0;
-}
-
-WORD32 ithread_sem_destroy(void *sem)
-{
- HANDLE *sem_handle = (HANDLE *)sem;
- HANDLE sem_handle_value;
-
- if(0 == sem)
- return -1;
-
- sem_handle_value = *sem_handle;
-
- if(FALSE == CloseHandle(sem_handle_value) )
- {
- return -1;
- }
- return 0;
-}
-
-WORD32 ithread_set_affinity(WORD32 core_id)
-{
- return 1;
-}
-
-void ithread_set_name(CHAR *pc_thread_name)
-{
- return;
-}
-
-#else
UWORD32 ithread_get_handle_size(void)
{
@@ -358,11 +87,6 @@ WORD32 ithread_join(void *thread_handle, void ** val_ptr)
return pthread_join(*pthread_handle, NULL);
}
-void ithread_exit(void *val_ptr)
-{
- return pthread_exit(val_ptr);
-}
-
WORD32 ithread_get_mutex_struct_size(void)
{
return(sizeof(pthread_mutex_t));
@@ -485,125 +209,3 @@ WORD32 ithread_set_affinity(WORD32 core_id)
return 1;
}
-#endif
-
-#else
-
-UWORD32 ithread_get_handle_size(void)
-{
- return sizeof(int);
-}
-
-UWORD32 ithread_get_mutex_lock_size(void)
-{
- return sizeof(int);
-}
-
-UWORD32 ithread_get_cond_size(void)
-{
- return(sizeof(int));
-}
-WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument)
-{
- return 0;
-}
-
-WORD32 ithread_join(void *thread_handle, void ** val_ptr)
-{
- return 0;
-}
-
-void ithread_exit(void *val_ptr)
-{
- return;
-}
-
-WORD32 ithread_mutex_init(void *mutex)
-{
- return 0;
-}
-
-WORD32 ithread_mutex_destroy(void *mutex)
-{
- return 0;
-}
-
-WORD32 ithread_mutex_lock(void *mutex)
-{
- return 0;
-}
-
-WORD32 ithread_mutex_unlock(void *mutex)
-{
- return 0;
-}
-
-void ithread_yield(void)
-{
- return;
-}
-
-void ithread_sleep(UWORD32 u4_time_in_us)
-{
- return;
-}
-
-void ithread_usleep(UWORD32 u4_time_us)
-{
- return;
-}
-
-UWORD32 ithread_get_sem_strcut_size(void)
-{
- return(sizeof(int));
-}
-
-
-WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value)
-{
- return 0;
-}
-
-WORD32 ithread_sem_post(void *sem)
-{
- return 0;
-}
-
-
-WORD32 ithread_sem_wait(void *sem)
-{
- return 0;
-}
-
-WORD32 ithread_sem_destroy(void *sem)
-{
- return 0;
-}
-
-void ithread_set_name(UWORD8 *pu1_thread_name)
-{
- return;
-}
-
-void ithread_condition_init(void *condition)
-{
- return;
-}
-
-void ithread_condition_signal(void * condition)
-{
- return;
-}
-
-
-
-void ithread_condition_wait(void *condition,void *mutex)
-{
- return;
-}
-
-WORD32 ithread_set_affinity(WORD32 core_id)
-{
- return 1;
-}
-#endif
diff --git a/common/ithread.h b/common/ithread.h
index f926f83..3e5aa9c 100644
--- a/common/ithread.h
+++ b/common/ithread.h
@@ -29,7 +29,6 @@
/* List of Functions : ithread_get_handle_size */
/* ithread_get_mutex_lock_size */
/* ithread_create */
-/* ithread_exit */
/* ithread_join */
/* ithread_get_mutex_struct_size */
/* ithread_mutex_init */
@@ -65,8 +64,6 @@ UWORD32 ithread_get_mutex_lock_size(void);
WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument);
-void ithread_exit(void *val_ptr);
-
WORD32 ithread_join(void *thread_id, void ** val_ptr);
WORD32 ithread_get_mutex_struct_size(void);
diff --git a/common/x86/ih264_chroma_intra_pred_filters_ssse3.c b/common/x86/ih264_chroma_intra_pred_filters_ssse3.c
index 45101a4..d43ce20 100644
--- a/common/x86/ih264_chroma_intra_pred_filters_ssse3.c
+++ b/common/x86/ih264_chroma_intra_pred_filters_ssse3.c
@@ -103,47 +103,35 @@ void ih264_intra_pred_chroma_8x8_mode_horz_ssse3(UWORD8 *pu1_src,
UWORD8 *pu1_left; /* Pointer to start of top predictors */
WORD32 dst_strd2;
- __m128i left_16x8b, left_sh_16x8b;
__m128i row1_16x8b, row2_16x8b;
- __m128i const_14_15_16x8b;
UNUSED(src_strd);
UNUSED(ngbr_avail);
pu1_left = pu1_src + 2 * BLK8x8SIZE - 2;
- left_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 14));
-
- const_14_15_16x8b = _mm_set1_epi16(0x0f0e);
dst_strd2 = dst_strd << 1;
- left_sh_16x8b = _mm_slli_si128(left_16x8b, 2);
- row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);
- row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);
+ row1_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left)));
+ row2_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 2)));
_mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
- left_16x8b = _mm_slli_si128(left_16x8b, 4);
- left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
pu1_dst += dst_strd2;
- row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);
- row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);
+ row1_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 4)));
+ row2_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 6)));
_mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
- left_16x8b = _mm_slli_si128(left_16x8b, 4);
- left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
pu1_dst += dst_strd2;
- row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);
- row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);
+ row1_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 8)));
+ row2_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 10)));
_mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
- left_16x8b = _mm_slli_si128(left_16x8b, 4);
- left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
pu1_dst += dst_strd2;
- row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);
- row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);
+ row1_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 12)));
+ row2_16x8b = _mm_set1_epi16(*((WORD16 *)(pu1_left - 14)));
_mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
}
@@ -273,7 +261,6 @@ void ih264_intra_pred_chroma_8x8_mode_plane_ssse3(UWORD8 *pu1_src,
//calculating a, b and c
{
WORD32 h_u, h_v, v_u, v_v;
- WORD32 temp1, temp2;
__m128i h_val1_16x8b, h_val2_16x8b;
__m128i h_val1_8x16b, h_val2_8x16b, h_val_4x32b;
@@ -302,13 +289,10 @@ void ih264_intra_pred_chroma_8x8_mode_plane_ssse3(UWORD8 *pu1_src,
h_val_4x32b = _mm_madd_epi16(mul_8x16b, h_val1_8x16b);
v_val_4x32b = _mm_madd_epi16(mul_8x16b, v_val1_8x16b);
- temp1 = _mm_extract_epi16(h_val1_16x8b, 3);
- temp2 = _mm_extract_epi16(v_val1_16x8b, 3);
-
hv_val_4x32b = _mm_hadd_epi32(h_val_4x32b, v_val_4x32b);
- a_u = ((temp1 & 0xff) + (temp2 & 0xff)) << 4;
- a_v = ((temp1 >> 8) + (temp2 >> 8)) << 4;
+ a_u = (pu1_left[7 * (-2)] + pu1_top[14]) << 4;
+ a_v = (pu1_left[7 * (-2) + 1] + pu1_top[15]) << 4;
h_u = _mm_extract_epi16(hv_val_4x32b, 0);
h_v = _mm_extract_epi16(hv_val_4x32b, 2);
diff --git a/common/x86/ih264_inter_pred_filters_ssse3.c b/common/x86/ih264_inter_pred_filters_ssse3.c
index 6d318c9..480a8c7 100644
--- a/common/x86/ih264_inter_pred_filters_ssse3.c
+++ b/common/x86/ih264_inter_pred_filters_ssse3.c
@@ -111,23 +111,12 @@ void ih264_inter_pred_luma_copy_ssse3(UWORD8 *pu1_src,
if(wd == 4)
{
- __m128i mask_full_128b, mask_low_32b;
-
- mask_full_128b = _mm_set1_epi8(0xff);
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
- // mask for first four bytes
-
do
{
- y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
- y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
- y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd2));
- y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd3));
-
- _mm_maskmoveu_si128(y_0_16x8b, mask_low_32b, (char*)pu1_dst);
- _mm_maskmoveu_si128(y_1_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(y_2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
- _mm_maskmoveu_si128(y_3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ *((WORD32 *)(pu1_dst)) = *((WORD32 *)(pu1_src));
+ *((WORD32 *)(pu1_dst + dst_strd)) = *((WORD32 *)(pu1_src + src_strd));
+ *((WORD32 *)(pu1_dst + dst_strd2)) = *((WORD32 *)(pu1_src + src_strd2));
+ *((WORD32 *)(pu1_dst + dst_strd3)) = *((WORD32 *)(pu1_src + src_strd3));
ht -= 4;
pu1_src += src_strd4;
@@ -255,11 +244,6 @@ void ih264_inter_pred_luma_horz_ssse3(UWORD8 *pu1_src,
__m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
__m128i res_r0r1_16x8b;
- __m128i mask_full_16x8b, mask_low32b;
-
- mask_full_16x8b = _mm_set1_epi8(0xff);
- mask_low32b = _mm_srli_si128(mask_full_16x8b, 12); // mask for first four bytes
-
//Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
//Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
@@ -307,9 +291,9 @@ void ih264_inter_pred_luma_horz_ssse3(UWORD8 *pu1_src,
res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b, res_r0r1_t1_8x16b);
- _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)pu1_dst);
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4);
- _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
ht -= 2;
pu1_src += src_strd << 1;
@@ -525,10 +509,6 @@ void ih264_inter_pred_luma_vert_ssse3(UWORD8 *pu1_src,
if(wd == 4)
{
- __m128i mask_low32b;
-
- mask_low32b = _mm_set1_epi8(0xff);
-
//Epilogue: Load all the pred rows except sixth and seventh row
// for the first and second row processing.
src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
@@ -542,8 +522,6 @@ void ih264_inter_pred_luma_vert_ssse3(UWORD8 *pu1_src,
src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
pu1_src += src_strd;
- mask_low32b = _mm_srli_si128(mask_low32b, 12); // mask for first four bytes
-
src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b);
src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b);
src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b);
@@ -572,9 +550,9 @@ void ih264_inter_pred_luma_vert_ssse3(UWORD8 *pu1_src,
res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst);
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
res_16x8b = _mm_srli_si128(res_16x8b, 4);
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_16x8b);
src_r0_16x8b = src_r2_16x8b;
src_r1_16x8b = src_r3_16x8b;
@@ -893,15 +871,12 @@ void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src,
__m128i res_8x16b, res_16x8b;
__m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
- __m128i const_val512_4x32b, mask_low32b;
-
- mask_low32b = _mm_set1_epi8(0xff);
+ __m128i const_val512_4x32b;
coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
- mask_low32b = _mm_srli_si128(mask_low32b, 12);
const_val512_4x32b = _mm_set1_epi32(512);
src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp));
@@ -947,9 +922,9 @@ void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src,
res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst);
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
res_16x8b = _mm_srli_si128(res_16x8b, 4);
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_16x8b);
src_r0_8x16b = src_r2_8x16b;
src_r1_8x16b = src_r3_8x16b;
@@ -1196,8 +1171,6 @@ void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src,
// Horizontal 6-tap filtering
{
- ht_tmp = ht + 5;
-
__m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
__m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
@@ -1206,6 +1179,8 @@ void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src,
__m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+ ht_tmp = ht + 5;
+
coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
@@ -1551,11 +1526,6 @@ void ih264_inter_pred_luma_horz_qpel_ssse3(UWORD8 *pu1_src,
__m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
__m128i res_r0r1_16x8b;
- __m128i mask_full_16x8b, mask_low32b;
-
- mask_full_16x8b = _mm_set1_epi8(0xff);
- mask_low32b = _mm_srli_si128(mask_full_16x8b, 12); // mask for first four bytes
-
//Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
//Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
@@ -1607,9 +1577,9 @@ void ih264_inter_pred_luma_horz_qpel_ssse3(UWORD8 *pu1_src,
res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b, res_r0r1_t1_8x16b);
res_r0r1_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_r0r1_16x8b); //computing q-pel
- _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)pu1_dst);
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4);
- _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
ht -= 2;
pu1_src += src_strd << 1;
@@ -1849,10 +1819,6 @@ void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src,
if(wd == 4)
{
- __m128i mask_low32b;
-
- mask_low32b = _mm_set1_epi8(0xff);
-
//Epilogue: Load all the pred rows except sixth and seventh row
// for the first and second row processing.
src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
@@ -1866,8 +1832,6 @@ void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src,
src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
pu1_src += src_strd;
- mask_low32b = _mm_srli_si128(mask_low32b, 12); // mask for first four bytes
-
src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b);
src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b);
src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b);
@@ -1904,9 +1868,9 @@ void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src,
res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst);
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
res_16x8b = _mm_srli_si128(res_16x8b, 4);
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_16x8b);
src_r0_16x8b = src_r2_16x8b;
src_r1_16x8b = src_r3_16x8b;
@@ -2157,6 +2121,9 @@ void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src,
UWORD8 *pu1_tmp1, *pu1_tmp2;
WORD32 x_offset, y_offset;
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+ __m128i const_val16_8x16b;
+
pu1_tmp1 = pu1_tmp;
dydx &= 0xf;
@@ -2169,9 +2136,6 @@ void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src,
pu1_pred_horiz = pu1_src + (y_offset >> 1) * src_strd - 2;
//the filter input starts from x[-2] (till x[3])
- __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
- __m128i const_val16_8x16b;
-
coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
@@ -2257,11 +2221,6 @@ void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src,
__m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
__m128i res_r0r1_16x8b;
- __m128i mask_low32b;
-
- mask_low32b = _mm_set1_epi8(0xff);
- mask_low32b = _mm_srli_si128(mask_low32b, 12);
-
//Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
//Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
@@ -2313,9 +2272,9 @@ void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src,
res_r0r1_16x8b = _mm_avg_epu8(res_r0r1_16x8b,src_r0r1_vpel_16x8b);
- _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)pu1_dst);
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4);
- _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_r0r1_16x8b);
ht -= 2;
pu1_pred_horiz += src_strd << 1;
@@ -2852,16 +2811,11 @@ void ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3(UWORD8 *pu1_src,
__m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
__m128i const_val512_4x32b, const_val16_8x16b;
- __m128i mask_low32b;
-
- mask_low32b = _mm_set1_epi8(0xff);
coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
- mask_low32b = _mm_srli_si128(mask_low32b, 12);
-
const_val512_4x32b = _mm_set1_epi32(512);
const_val16_8x16b = _mm_set1_epi16(16);
@@ -2897,7 +2851,7 @@ void ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3(UWORD8 *pu1_src,
res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst);
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
ht--;
pi2_temp2 = pi2_temp2 + 4 + 5;
@@ -3424,12 +3378,9 @@ void ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3(UWORD8 *pu1_src,
__m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
__m128i const_val512_4x32b, const_val16_8x16b;
- __m128i mask_low32b;
- mask_low32b = _mm_set1_epi8(0xff);
const_val512_4x32b = _mm_set1_epi32(512);
const_val16_8x16b = _mm_set1_epi16(16);
- mask_low32b = _mm_srli_si128(mask_low32b, 12);
coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
@@ -3483,9 +3434,9 @@ void ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3(UWORD8 *pu1_src,
res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)(pu1_dst));
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(res_16x8b);
res_16x8b = _mm_srli_si128(res_16x8b, 4);
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(res_16x8b);
src_r0_8x16b = src_r2_8x16b;
src_r1_8x16b = src_r3_8x16b;
@@ -4106,65 +4057,6 @@ void ih264_inter_pred_chroma_ssse3(UWORD8 *pu1_src,
}
while(ht > 0);
- /*
- WORD32 AB, CD;
-
- __m128i src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
- __m128i src_r1r2_16x8b, src_r2r3_16x8b;
- __m128i res_AB_8x16b, res_CD_8x16b, res_8x16b, res_16x8b;
- __m128i mask_low32b;
-
- __m128i coeffAB_16x8b, coeffCD_16x8b, round_add32_8x16b;
- __m128i const_shuff_16x8b;
-
- AB = (B << 8) + A;
- CD = (D << 8) + C;
-
- coeffAB_16x8b = _mm_set1_epi16(AB);
- coeffCD_16x8b = _mm_set1_epi16(CD);
-
- round_add32_8x16b = _mm_set1_epi16(32);
-
- mask_low32b = _mm_set1_epi8(0xff);
- src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); //u1[0] v1[0] u1[1] v1[1] u1[2] v1[2] u1[3] v1[3]
- pu1_src += src_strd;
-
- const_shuff_16x8b = _mm_setr_epi32(0x03010200, 0x05030402, 0x0b090a08, 0x0d0b0c0a);
- mask_low32b = _mm_srli_si128(mask_low32b, 12);
-
- do
- {
- src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); //u2[0] v2[0] u2[1] v2[1] u1[2] v2[2] u2[3] v2[3]
- src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); //u3[0] v3[0] u3[1] v3[1] u3[2] v3[2] u3[3] v3[3]
-
- src_r1r2_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b);
- src_r2r3_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b);
-
- src_r1r2_16x8b = _mm_shuffle_epi8(src_r1r2_16x8b, const_shuff_16x8b); //u1[0] u1[1] v1[0] v1[1] u1[1] u1[2] v1[1] v1[2]
- //u2[0] u2[1] v2[0] v2[1] u2[1] u2[2] v2[1] v2[2]
- src_r2r3_16x8b = _mm_shuffle_epi8(src_r2r3_16x8b, const_shuff_16x8b); //u2[0] u2[1] v2[0] v2[1] u2[1] u2[2] v2[1] v2[2]
- //u3[0] u3[1] v3[0] v3[1] u3[1] u3[2] v3[1] v3[2]
- res_AB_8x16b = _mm_maddubs_epi16(src_r1r2_16x8b, coeffAB_16x8b);
- res_CD_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeffCD_16x8b);
-
- res_8x16b = _mm_add_epi16(res_AB_8x16b, round_add32_8x16b);
- res_8x16b = _mm_add_epi16(res_8x16b, res_CD_8x16b);
- res_8x16b = _mm_srai_epi16(res_8x16b, 6);
- res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
-
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)pu1_dst);
-
- ht -= 2;
- pu1_src += src_strd << 1;
- res_16x8b = _mm_srli_si128(res_16x8b, 4);
- src_r1_16x8b = src_r3_16x8b;
-
- _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)(pu1_dst + dst_strd));
-
- pu1_dst += dst_strd << 1;
- }
- while(ht > 0);
- */
}
else if(wd == 4)
{
diff --git a/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c b/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
index 565cc75..bcfe503 100644
--- a/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
+++ b/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
@@ -30,8 +30,8 @@
* Mohit [100664]
*
* @par List of Functions:
- * - ihevc_iquant_itrans_recon_4x4_dc_ssse3()
- * - ihevc_iquant_itrans_recon_8x8_dc_ssse3()
+ * - ih264_iquant_itrans_recon_4x4_dc_ssse3()
+ * - ih264_iquant_itrans_recon_8x8_dc_ssse3()
*
* @remarks
* None
@@ -113,6 +113,13 @@ void ih264_iquant_itrans_recon_4x4_dc_ssse3(WORD16 *pi2_src,
UWORD32 *pu4_out = (UWORD32 *)pu1_out;
WORD32 q0 = pi2_src[0];
WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
+
+ __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3;
+ __m128i sign_reg;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i temp4, temp5, temp6, temp7;
+ __m128i value_add;
+
UNUSED (pi2_tmp);
INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
@@ -122,11 +129,7 @@ void ih264_iquant_itrans_recon_4x4_dc_ssse3(WORD16 *pi2_src,
i_macro = ((q0 + 32) >> 6);
- __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3;
- __m128i sign_reg;
- __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
- __m128i temp4, temp5, temp6, temp7;
- __m128i value_add = _mm_set1_epi16(i_macro);
+ value_add = _mm_set1_epi16(i_macro);
zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
//Load pred buffer
@@ -235,6 +238,13 @@ void ih264_iquant_itrans_recon_8x8_dc_ssse3 (WORD16 *pi2_src,
{
WORD32 q0 = pi2_src[0];
WORD16 i_macro, rnd_fact = (qp_div < 6) ? 1 << (5 - qp_div) : 0;
+
+ __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3,pred_r4,pred_r5,pred_r6,pred_r7;
+ __m128i sign_reg;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i temp1,temp2,temp3,temp4, temp5, temp6, temp7,temp8;
+ __m128i value_add;
+
UNUSED (pi2_tmp);
UNUSED (iq_start_idx);
UNUSED (pi2_dc_ld_addr);
@@ -242,11 +252,7 @@ void ih264_iquant_itrans_recon_8x8_dc_ssse3 (WORD16 *pi2_src,
INV_QUANT(q0, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
i_macro = ((q0 + 32) >> 6);
- __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3,pred_r4,pred_r5,pred_r6,pred_r7;
- __m128i sign_reg;
- __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
- __m128i temp1,temp2,temp3,temp4, temp5, temp6, temp7,temp8;
- __m128i value_add = _mm_set1_epi16(i_macro);
+ value_add = _mm_set1_epi16(i_macro);
//Load pred buffer row 0
predload_r = _mm_loadl_epi64((__m128i *)(&pu1_pred[0])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
@@ -397,6 +403,7 @@ void ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3(WORD16 *pi2_src,
__m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
__m128i chroma_mask = _mm_set1_epi16 (0xFF);
__m128i value_add = _mm_set1_epi16(i_macro);
+ __m128i out_r0, out_r1, out_r2, out_r3;
UNUSED (pi2_src);
UNUSED (pu2_iscal_mat);
@@ -438,12 +445,26 @@ void ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3(WORD16 *pi2_src,
pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- all 16 bits
pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- all 16 bits
- chroma_mask = _mm_unpacklo_epi64(chroma_mask, zero_8x16b); //1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 -- 8 bits
-
- _mm_maskmoveu_si128(pred_r0, chroma_mask, (char *)(&pu1_out[0]));
- _mm_maskmoveu_si128(pred_r1, chroma_mask, (char *)(&pu1_out[out_strd]));
- _mm_maskmoveu_si128(pred_r2, chroma_mask, (char *)(&pu1_out[2*out_strd]));
- _mm_maskmoveu_si128(pred_r3, chroma_mask, (char *)(&pu1_out[3*out_strd]));
+ chroma_mask = _mm_set1_epi16 (0xFF00);
+ out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
+ out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[out_strd]));
+ out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * out_strd]));
+ out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * out_strd]));
+
+ out_r0 = _mm_and_si128(out_r0, chroma_mask);
+ out_r1 = _mm_and_si128(out_r1, chroma_mask);
+ out_r2 = _mm_and_si128(out_r2, chroma_mask);
+ out_r3 = _mm_and_si128(out_r3, chroma_mask);
+
+ out_r0 = _mm_add_epi8(out_r0, pred_r0);
+ out_r1 = _mm_add_epi8(out_r1, pred_r1);
+ out_r2 = _mm_add_epi8(out_r2, pred_r2);
+ out_r3 = _mm_add_epi8(out_r3, pred_r3);
+
+ _mm_storel_epi64((__m128i *)(&pu1_out[0]), out_r0);
+ _mm_storel_epi64((__m128i *)(&pu1_out[out_strd]), out_r1);
+ _mm_storel_epi64((__m128i *)(&pu1_out[2 * out_strd]), out_r2);
+ _mm_storel_epi64((__m128i *)(&pu1_out[3 * out_strd]), out_r3);
}
diff --git a/common/x86/ih264_iquant_itrans_recon_sse42.c b/common/x86/ih264_iquant_itrans_recon_sse42.c
index 6399b65..f27111f 100644
--- a/common/x86/ih264_iquant_itrans_recon_sse42.c
+++ b/common/x86/ih264_iquant_itrans_recon_sse42.c
@@ -30,8 +30,8 @@
* Mohit [100664]
*
* @par List of Functions:
- * - ihevc_iquant_itrans_recon_4x4_sse42()
- * - ihevc_iquant_itrans_recon_chroma_4x4_sse42()
+ * - ih264_iquant_itrans_recon_4x4_sse42()
+ * - ih264_iquant_itrans_recon_chroma_4x4_sse42()
*
* @remarks
* None
@@ -370,6 +370,7 @@ void ih264_iquant_itrans_recon_chroma_4x4_sse42(WORD16 *pi2_src,
__m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6)));
__m128i value_32 = _mm_set1_epi32(32);
__m128i chroma_mask = _mm_set1_epi16 (0xFF);
+ __m128i out_r0, out_r1, out_r2, out_r3;
UNUSED (pi2_tmp);
/*************************************************************/
@@ -548,10 +549,24 @@ void ih264_iquant_itrans_recon_chroma_4x4_sse42(WORD16 *pi2_src,
resq_r2 = _mm_cvtepu8_epi16(resq_r2); //p20 p21 p22 p23 -- all 16 bits
resq_r3 = _mm_cvtepu8_epi16(resq_r3); //p30 p31 p32 p33 -- all 16 bits
- chroma_mask = _mm_unpacklo_epi64(chroma_mask, zero_8x16b);
-
- _mm_maskmoveu_si128(resq_r0, chroma_mask, (char *)(&pu1_out[0]));
- _mm_maskmoveu_si128(resq_r1, chroma_mask, (char *)(&pu1_out[out_strd]));
- _mm_maskmoveu_si128(resq_r2, chroma_mask, (char *)(&pu1_out[2*out_strd]));
- _mm_maskmoveu_si128(resq_r3, chroma_mask, (char *)(&pu1_out[3*out_strd]));
+ chroma_mask = _mm_set1_epi16 (0xFF00);
+ out_r0 = _mm_loadl_epi64((__m128i *) (&pu1_out[0]));
+ out_r1 = _mm_loadl_epi64((__m128i *) (&pu1_out[out_strd]));
+ out_r2 = _mm_loadl_epi64((__m128i *) (&pu1_out[2 * out_strd]));
+ out_r3 = _mm_loadl_epi64((__m128i *) (&pu1_out[3 * out_strd]));
+
+ out_r0 = _mm_and_si128(out_r0, chroma_mask);
+ out_r1 = _mm_and_si128(out_r1, chroma_mask);
+ out_r2 = _mm_and_si128(out_r2, chroma_mask);
+ out_r3 = _mm_and_si128(out_r3, chroma_mask);
+
+ out_r0 = _mm_add_epi8(out_r0, resq_r0);
+ out_r1 = _mm_add_epi8(out_r1, resq_r1);
+ out_r2 = _mm_add_epi8(out_r2, resq_r2);
+ out_r3 = _mm_add_epi8(out_r3, resq_r3);
+
+ _mm_storel_epi64((__m128i *)(&pu1_out[0]), out_r0);
+ _mm_storel_epi64((__m128i *)(&pu1_out[out_strd]), out_r1);
+ _mm_storel_epi64((__m128i *)(&pu1_out[2 * out_strd]), out_r2);
+ _mm_storel_epi64((__m128i *)(&pu1_out[3 * out_strd]), out_r3);
}
diff --git a/common/x86/ih264_iquant_itrans_recon_ssse3.c b/common/x86/ih264_iquant_itrans_recon_ssse3.c
index 388cafe..30f7e59 100644
--- a/common/x86/ih264_iquant_itrans_recon_ssse3.c
+++ b/common/x86/ih264_iquant_itrans_recon_ssse3.c
@@ -30,8 +30,8 @@
* Mohit [100664]
*
* @par List of Functions:
- * - ihevc_iquant_itrans_recon_4x4_ssse3()
- * - ihevc_iquant_itrans_recon_8x8_ssse3()
+ * - ih264_iquant_itrans_recon_4x4_ssse3()
+ * - ih264_iquant_itrans_recon_8x8_ssse3()
*
* @remarks
* None
diff --git a/common/x86/ih264_luma_intra_pred_filters_ssse3.c b/common/x86/ih264_luma_intra_pred_filters_ssse3.c
index 5a35372..a1721d5 100644
--- a/common/x86/ih264_luma_intra_pred_filters_ssse3.c
+++ b/common/x86/ih264_luma_intra_pred_filters_ssse3.c
@@ -122,28 +122,22 @@ void ih264_intra_pred_luma_4x4_mode_vert_ssse3(UWORD8 *pu1_src,
{
UWORD8 *pu1_top;
WORD32 dst_strd2, dst_strd3;
-
- __m128i top_16x8b;
- __m128i mask_full_128b, mask_low_32b;
+ WORD32 i4_top;
UNUSED(src_strd);
UNUSED(ngbr_avail);
- mask_full_128b = _mm_set1_epi8(0xff);
-
pu1_top = pu1_src + BLK_SIZE + 1;
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
-
- top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
+ i4_top = *((WORD32 *)pu1_top);
dst_strd2 = dst_strd << 1;
dst_strd3 = dst_strd + dst_strd2;
- _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)pu1_dst);
- _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
- _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ *((WORD32 *)(pu1_dst)) = i4_top;
+ *((WORD32 *)(pu1_dst + dst_strd)) = i4_top;
+ *((WORD32 *)(pu1_dst + dst_strd2)) = i4_top;
+ *((WORD32 *)(pu1_dst + dst_strd3)) = i4_top;
}
/**
@@ -185,39 +179,31 @@ void ih264_intra_pred_luma_4x4_mode_horz_ssse3(UWORD8 *pu1_src,
WORD32 dst_strd,
WORD32 ngbr_avail)
{
- UWORD8 *pu1_left;
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ WORD32 row1,row2,row3,row4;
+ UWORD8 val;
WORD32 dst_strd2, dst_strd3;
- WORD32 val1, val2;
-
- __m128i left_16x8b;
- __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
- __m128i mask_full_128b, mask_low_32b;
UNUSED(src_strd);
UNUSED(ngbr_avail);
-
- mask_full_128b = _mm_set1_epi8(0xff);
-
pu1_left = pu1_src + BLK_SIZE - 1;
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
- left_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));
-
- val1 = _mm_extract_epi16(left_16x8b, 1);
- val2 = _mm_extract_epi16(left_16x8b, 0);
-
- row1_16x8b = _mm_set1_epi8(val1 >> 8);
- row2_16x8b = _mm_set1_epi8(val1 & 0xff);
- row3_16x8b = _mm_set1_epi8(val2 >> 8);
- row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+ val = *pu1_left;
+ row1 = val + (val << 8) + (val << 16) + (val << 24);
+ val = *(pu1_left - 1);
+ row2 = val + (val << 8) + (val << 16) + (val << 24);
+ val = *(pu1_left - 2);
+ row3 = val + (val << 8) + (val << 16) + (val << 24);
+ val = *(pu1_left - 3);
+ row4 = val + (val << 8) + (val << 16) + (val << 24);
dst_strd2 = dst_strd << 1;
dst_strd3 = dst_strd + dst_strd2;
- _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
- _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
- _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ *((WORD32 *)(pu1_dst)) = row1;
+ *((WORD32 *)(pu1_dst + dst_strd)) = row2;
+ *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
+ *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
}
/**
@@ -259,72 +245,43 @@ void ih264_intra_pred_luma_4x4_mode_dc_ssse3(UWORD8 *pu1_src,
WORD32 ngbr_avail)
{
UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
- UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
- UWORD8 *pu1_left, *pu1_top;
- WORD32 dc_val, flag;
+ UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
WORD32 dst_strd2, dst_strd3;
-
- __m128i mask_full_128b, mask_low_32b;
- __m128i dcval_16x8b;
-
+ WORD32 val = 0;
UNUSED(src_strd);
UNUSED(ngbr_avail);
-
- mask_full_128b = _mm_set1_epi8(0xff);
-
u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
-
- pu1_left = pu1_src + BLK_SIZE - 1;
pu1_top = pu1_src + BLK_SIZE + 1;
+ pu1_left = pu1_src + BLK_SIZE - 1;
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
-
- flag = u1_useleft + u1_usetop;
-
- if(flag)
+ if(u1_useleft)
{
- WORD32 shft, ofst = 0;
-
- __m128i left_16x8b, top_16x8b, val_16x8b, tmp_8x16b, zero_vector;
-
- if(u1_useleft)
- {
- left_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));
- ofst += 2;
- }
- else
- left_16x8b = _mm_setzero_si128();
-
- zero_vector = _mm_setzero_si128();
-
- if(u1_usetop)
- {
- top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
- ofst += 2;
- }
- else
- top_16x8b = _mm_setzero_si128();
-
- shft = flag + 1;
- val_16x8b = _mm_unpacklo_epi32(left_16x8b, top_16x8b);
- tmp_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
-
- dc_val = _mm_extract_epi16(tmp_8x16b, 0);
- dc_val = (dc_val + ofst) >> shft;
+ val += *pu1_left--;
+ val += *pu1_left--;
+ val += *pu1_left--;
+ val += *pu1_left + 2;
}
- else
- dc_val = 128;
+ if(u1_usetop)
+ {
+ val += *pu1_top + *(pu1_top + 1) + *(pu1_top + 2) + *(pu1_top + 3)
+ + 2;
+ }
+ /* Since 2 is added if either left/top pred is there,
+ val still being zero implies both preds are not there */
+ val = (val) ? (val >> (1 + u1_useleft + u1_usetop)) : 128;
+
+ val = val + (val << 8) + (val << 16) + (val << 24);
dst_strd2 = dst_strd << 1;
dst_strd3 = dst_strd + dst_strd2;
- dcval_16x8b = _mm_set1_epi8(dc_val);
-
- _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)pu1_dst);
- _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
- _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ *((WORD32 *)(pu1_dst)) = val;
+ *((WORD32 *)(pu1_dst + dst_strd)) = val;
+ *((WORD32 *)(pu1_dst + dst_strd2)) = val;
+ *((WORD32 *)(pu1_dst + dst_strd3)) = val;
}
/**
@@ -371,7 +328,7 @@ void ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src,
__m128i top_16x8b, top_8x16b, top_sh_8x16b;
__m128i res1_8x16b, res2_8x16b, res_16x8b;
__m128i zero_vector, const_2_8x16b;
- __m128i mask_full_128b, mask_low_32b;
+ WORD32 row1,row2,row3,row4;
UNUSED(src_strd);
UNUSED(ngbr_avail);
@@ -382,13 +339,11 @@ void ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src,
zero_vector = _mm_setzero_si128();
top_8x16b = _mm_unpacklo_epi8(top_16x8b, zero_vector); //t0 t1 t2 t3 t4 t5 t6 t7
- mask_full_128b = _mm_set1_epi8(0xff);
top_sh_8x16b = _mm_srli_si128(top_8x16b, 2); //t1 t2 t3 t4 t5 t6 t7 0
const_2_8x16b = _mm_set1_epi16(2);
top_sh_8x16b = _mm_shufflehi_epi16(top_sh_8x16b, 0xa4); //t1 t2 t3 t4 t5 t6 t7 t7
res1_8x16b = _mm_add_epi16(top_8x16b, top_sh_8x16b);
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
res2_8x16b = _mm_srli_si128(res1_8x16b, 2);
res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b);
@@ -399,13 +354,18 @@ void ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src,
dst_strd3 = dst_strd + dst_strd2;
res_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
- _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)pu1_dst);
+ row1 = _mm_cvtsi128_si32(res_16x8b);
res_16x8b = _mm_srli_si128(res_16x8b, 1);
- _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ row2 = _mm_cvtsi128_si32(res_16x8b);
res_16x8b = _mm_srli_si128(res_16x8b, 1);
- _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ row3 = _mm_cvtsi128_si32(res_16x8b);
res_16x8b = _mm_srli_si128(res_16x8b, 1);
- _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ row4 = _mm_cvtsi128_si32(res_16x8b);
+
+ *((WORD32 *)(pu1_dst)) = row1;
+ *((WORD32 *)(pu1_dst + dst_strd)) = row2;
+ *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
+ *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
}
/**
@@ -454,7 +414,7 @@ void ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src,
__m128i res1_8x16b, res2_8x16b;
__m128i res1_16x8b, res2_16x8b;
__m128i zero_vector, const_2_8x16b;
- __m128i mask_full_128b, mask_low_32b;
+ WORD32 row1,row2,row3,row4;
UNUSED(src_strd);
UNUSED(ngbr_avail);
@@ -468,13 +428,11 @@ void ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src,
top_left_8x16b = _mm_unpacklo_epi8(top_left_16x8b, zero_vector);
top_left_sh_8x16b = _mm_unpacklo_epi8(top_left_sh_16x8b, zero_vector);
- mask_full_128b = _mm_set1_epi8(0xff);
res1_8x16b = _mm_add_epi16(top_left_8x16b, top_left_sh_8x16b); //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3...
const_2_8x16b = _mm_set1_epi16(2);
res2_8x16b = _mm_srli_si128(res1_8x16b, 2); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3...
res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b);
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b); //l3+2*l2+l1+2 l2+2*l1+l0+2...
res1_8x16b = _mm_srai_epi16(res1_8x16b, 2);
res1_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
@@ -483,12 +441,18 @@ void ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src,
dst_strd3 = dst_strd + dst_strd2;
res2_16x8b = _mm_srli_si128(res1_16x8b, 3);
- _mm_maskmoveu_si128(res2_16x8b, mask_low_32b, (char*)pu1_dst);
+
+ row1 = _mm_cvtsi128_si32(res2_16x8b);
res2_16x8b = _mm_srli_si128(res1_16x8b, 2);
- _mm_maskmoveu_si128(res2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ row2 = _mm_cvtsi128_si32(res2_16x8b);
res2_16x8b = _mm_srli_si128(res1_16x8b, 1);
- _mm_maskmoveu_si128(res2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
- _mm_maskmoveu_si128(res1_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ row3 = _mm_cvtsi128_si32(res2_16x8b);
+ row4 = _mm_cvtsi128_si32(res1_16x8b);
+
+ *((WORD32 *)(pu1_dst)) = row1;
+ *((WORD32 *)(pu1_dst + dst_strd)) = row2;
+ *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
+ *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
}
/**
@@ -537,14 +501,11 @@ void ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src,
__m128i w121_a1_8x16b, w121_a2_8x16b, w121_sh_8x16b;
__m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
__m128i zero_vector, const_2_8x16b;
- __m128i mask_full_128b, mask_low_32b;
+ WORD32 row1,row2,row3,row4;
UNUSED(src_strd);
UNUSED(ngbr_avail);
- mask_full_128b = _mm_set1_epi8(0xff);
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
-
pu1_left = pu1_src + BLK_SIZE - 1;
val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 2));
@@ -575,10 +536,15 @@ void ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src,
dst_strd2 = dst_strd << 1;
dst_strd3 = dst_strd + dst_strd2;
- _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
- _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
- _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ row1 = _mm_cvtsi128_si32(row1_16x8b);
+ row2 = _mm_cvtsi128_si32(row2_16x8b);
+ row3 = _mm_cvtsi128_si32(row3_16x8b);
+ row4 = _mm_cvtsi128_si32(row4_16x8b);
+
+ *((WORD32 *)(pu1_dst)) = row1;
+ *((WORD32 *)(pu1_dst + dst_strd)) = row2;
+ *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
+ *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
}
/*
@@ -629,14 +595,11 @@ void ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src,
__m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
__m128i zero_vector, const_2_8x16b;
- __m128i mask_full_128b, mask_low_32b;
+ WORD32 row1,row2,row3,row4;
UNUSED(src_strd);
UNUSED(ngbr_avail);
- mask_full_128b = _mm_set1_epi8(0xff);
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
-
pu1_left = pu1_src + BLK_SIZE - 1;
val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));
@@ -669,10 +632,15 @@ void ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src,
row2_16x8b = _mm_srli_si128(row4_16x8b, 4);
row3_16x8b = _mm_srli_si128(row4_16x8b, 2);
- _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
- _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
- _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ row1 = _mm_cvtsi128_si32(row1_16x8b);
+ row2 = _mm_cvtsi128_si32(row2_16x8b);
+ row3 = _mm_cvtsi128_si32(row3_16x8b);
+ row4 = _mm_cvtsi128_si32(row4_16x8b);
+
+ *((WORD32 *)(pu1_dst)) = row1;
+ *((WORD32 *)(pu1_dst + dst_strd)) = row2;
+ *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
+ *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
}
/**
@@ -721,14 +689,11 @@ void ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src,
__m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
__m128i zero_vector, const_2_8x16b;
- __m128i mask_full_128b, mask_low_32b;
+ WORD32 row1,row2,row3,row4;
UNUSED(src_strd);
UNUSED(ngbr_avail);
- mask_full_128b = _mm_set1_epi8(0xff);
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
-
pu1_top = pu1_src +BLK_SIZE + 1;
val_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
@@ -756,10 +721,15 @@ void ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src,
row3_16x8b = _mm_srli_si128(row1_16x8b, 1);
row4_16x8b = _mm_srli_si128(row2_16x8b, 1);
- _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
- _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
- _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ row1 = _mm_cvtsi128_si32(row1_16x8b);
+ row2 = _mm_cvtsi128_si32(row2_16x8b);
+ row3 = _mm_cvtsi128_si32(row3_16x8b);
+ row4 = _mm_cvtsi128_si32(row4_16x8b);
+
+ *((WORD32 *)(pu1_dst)) = row1;
+ *((WORD32 *)(pu1_dst + dst_strd)) = row2;
+ *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
+ *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
}
/**
@@ -809,14 +779,11 @@ void ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src,
__m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
__m128i zero_vector, const_2_8x16b, rev_16x8b;
- __m128i mask_full_128b, mask_low_32b;
+ WORD32 row1,row2,row3,row4;
UNUSED(src_strd);
UNUSED(ngbr_avail);
- mask_full_128b = _mm_set1_epi8(0xff);
- mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
-
pu1_left = pu1_src + BLK_SIZE - 1;
zero_vector = _mm_setzero_si128();
@@ -851,10 +818,15 @@ void ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src,
row3_16x8b = _mm_srli_si128(row1_16x8b, 4);
row4_16x8b = _mm_srli_si128(row1_16x8b, 6);
- _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
- _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
- _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+ row1 = _mm_cvtsi128_si32(row1_16x8b);
+ row2 = _mm_cvtsi128_si32(row2_16x8b);
+ row3 = _mm_cvtsi128_si32(row3_16x8b);
+ row4 = _mm_cvtsi128_si32(row4_16x8b);
+
+ *((WORD32 *)(pu1_dst)) = row1;
+ *((WORD32 *)(pu1_dst + dst_strd)) = row2;
+ *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
+ *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
}
/******************* 8x8 Modes *******************/
@@ -1814,9 +1786,7 @@ void ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src,
{
UWORD8 *pu1_left;
WORD32 dst_strd2, dst_strd3, dst_strd4;
- WORD32 val1, val2;
- __m128i val_16x8b;
__m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
UNUSED(src_strd);
@@ -1826,60 +1796,46 @@ void ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src,
dst_strd4 = dst_strd << 2;
- val_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 15));
-
dst_strd2 = dst_strd << 1;
dst_strd3 = dst_strd4 - dst_strd;
- val1 = _mm_extract_epi16(val_16x8b, 7);
- val2 = _mm_extract_epi16(val_16x8b, 6);
-
- row1_16x8b = _mm_set1_epi8(val1 >> 8);
- row2_16x8b = _mm_set1_epi8(val1 & 0xff);
- row3_16x8b = _mm_set1_epi8(val2 >> 8);
- row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+ row1_16x8b = _mm_set1_epi8(*(pu1_left));
+ row2_16x8b = _mm_set1_epi8(*(pu1_left - 1));
+ row3_16x8b = _mm_set1_epi8(*(pu1_left - 2));
+ row4_16x8b = _mm_set1_epi8(*(pu1_left - 3));
_mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
- val1 = _mm_extract_epi16(val_16x8b, 5);
- val2 = _mm_extract_epi16(val_16x8b, 4);
-
pu1_dst += dst_strd4;
- row1_16x8b = _mm_set1_epi8(val1 >> 8);
- row2_16x8b = _mm_set1_epi8(val1 & 0xff);
- row3_16x8b = _mm_set1_epi8(val2 >> 8);
- row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+ row1_16x8b = _mm_set1_epi8(*(pu1_left - 4));
+ row2_16x8b = _mm_set1_epi8(*(pu1_left - 5));
+ row3_16x8b = _mm_set1_epi8(*(pu1_left - 6));
+ row4_16x8b = _mm_set1_epi8(*(pu1_left - 7));
_mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
- val1 = _mm_extract_epi16(val_16x8b, 3);
- val2 = _mm_extract_epi16(val_16x8b, 2);
-
pu1_dst += dst_strd4;
- row1_16x8b = _mm_set1_epi8(val1 >> 8);
- row2_16x8b = _mm_set1_epi8(val1 & 0xff);
- row3_16x8b = _mm_set1_epi8(val2 >> 8);
- row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+ row1_16x8b = _mm_set1_epi8(*(pu1_left - 8));
+ row2_16x8b = _mm_set1_epi8(*(pu1_left - 9));
+ row3_16x8b = _mm_set1_epi8(*(pu1_left - 10));
+ row4_16x8b = _mm_set1_epi8(*(pu1_left - 11));
_mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
- val1 = _mm_extract_epi16(val_16x8b, 1);
- val2 = _mm_extract_epi16(val_16x8b, 0);
-
pu1_dst += dst_strd4;
- row1_16x8b = _mm_set1_epi8(val1 >> 8);
- row2_16x8b = _mm_set1_epi8(val1 & 0xff);
- row3_16x8b = _mm_set1_epi8(val2 >> 8);
- row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+ row1_16x8b = _mm_set1_epi8(*(pu1_left - 12));
+ row2_16x8b = _mm_set1_epi8(*(pu1_left - 13));
+ row3_16x8b = _mm_set1_epi8(*(pu1_left - 14));
+ row4_16x8b = _mm_set1_epi8(*(pu1_left - 15));
_mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
_mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
diff --git a/common/x86/ih264_padding_ssse3.c b/common/x86/ih264_padding_ssse3.c
index 6dadd39..43ded8e 100644
--- a/common/x86/ih264_padding_ssse3.c
+++ b/common/x86/ih264_padding_ssse3.c
@@ -97,9 +97,6 @@ void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src,
WORD32 row;
WORD32 i;
UWORD8 *pu1_dst;
- __m128i const0_16x8b;
-
- const0_16x8b = _mm_setzero_si128();
ASSERT(pad_size % 8 == 0);
@@ -107,9 +104,8 @@ void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src,
{
__m128i src_temp0_16x8b;
- src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
pu1_dst = pu1_src - pad_size;
- src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+ src_temp0_16x8b = _mm_set1_epi8(*pu1_src);
for(i = 0; i < pad_size; i += 8)
{
_mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
@@ -168,20 +164,14 @@ void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src,
WORD32 row;
WORD32 col;
UWORD8 *pu1_dst;
- __m128i const0_16x8b, const1_16x8b;
- const0_16x8b = _mm_setzero_si128();
- const1_16x8b = _mm_set1_epi8(1);
- const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
ASSERT(pad_size % 8 == 0);
for(row = 0; row < ht; row++)
{
__m128i src_temp0_16x8b;
- src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
pu1_dst = pu1_src - pad_size;
- src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
-
+ src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)pu1_src));
for(col = 0; col < pad_size; col += 8)
{
_mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
@@ -240,7 +230,6 @@ void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src,
WORD32 row;
WORD32 col;
UWORD8 *pu1_dst;
- __m128i const0_16x8b;
ASSERT(pad_size % 8 == 0);
@@ -248,10 +237,8 @@ void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src,
{
__m128i src_temp0_16x8b;
- src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1));
- const0_16x8b = _mm_setzero_si128();
pu1_dst = pu1_src;
- src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+ src_temp0_16x8b = _mm_set1_epi8(*(pu1_src - 1));
for(col = 0; col < pad_size; col += 8)
{
_mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
@@ -310,10 +297,6 @@ void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src,
WORD32 row;
WORD32 col;
UWORD8 *pu1_dst;
- __m128i const0_16x8b, const1_16x8b;
- const0_16x8b = _mm_setzero_si128();
- const1_16x8b = _mm_set1_epi8(1);
- const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
ASSERT(pad_size % 8 == 0);
@@ -321,9 +304,8 @@ void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src,
{
__m128i src_temp0_16x8b;
- src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2));
pu1_dst = pu1_src;
- src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+ src_temp0_16x8b = _mm_set1_epi16(*((UWORD16 *)(pu1_src - 2)));
for(col = 0; col < pad_size; col += 8)
{
_mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
diff --git a/common/x86/ih264_weighted_pred_sse42.c b/common/x86/ih264_weighted_pred_sse42.c
index b1684b7..48f1f54 100644
--- a/common/x86/ih264_weighted_pred_sse42.c
+++ b/common/x86/ih264_weighted_pred_sse42.c
@@ -96,12 +96,6 @@ void ih264_default_weighted_pred_luma_sse42(UWORD8 *pu1_src1,
if(wd == 4)
{
- __m128i mask_full_16x8b, mask_ll4B_16x8b;
-
- mask_full_16x8b = _mm_set1_epi8(0xff);
- mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
- // mask for first four bytes
-
do
{
y0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
@@ -121,13 +115,10 @@ void ih264_default_weighted_pred_luma_sse42(UWORD8 *pu1_src1,
y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b);
y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b);
- _mm_maskmoveu_si128(y0_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
- _mm_maskmoveu_si128(y0_1_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(y0_2_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + (dst_strd << 1)));
- _mm_maskmoveu_si128(y0_3_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + dst_strd * 3));
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y0_0_16x8b);
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y0_1_16x8b);
+ *((WORD32 *)(pu1_dst + (dst_strd << 1))) = _mm_cvtsi128_si32(y0_2_16x8b);
+ *((WORD32 *)(pu1_dst + dst_strd * 3)) = _mm_cvtsi128_si32(y0_3_16x8b);
ht -= 4;
pu1_src1 += src_strd1 << 2;
@@ -268,12 +259,6 @@ void ih264_default_weighted_pred_chroma_sse42(UWORD8 *pu1_src1,
if(wd == 2)
{
- __m128i mask_full_16x8b, mask_ll4B_16x8b;
-
- mask_full_16x8b = _mm_set1_epi8(0xff);
- mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
- // mask for first four bytes
-
do
{
uv0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
@@ -285,9 +270,8 @@ void ih264_default_weighted_pred_chroma_sse42(UWORD8 *pu1_src1,
uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b);
uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b);
- _mm_maskmoveu_si128(uv0_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
- _mm_maskmoveu_si128(uv0_1_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(uv0_0_16x8b);
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(uv0_1_16x8b);
ht -= 2;
pu1_src1 += src_strd1 << 1;
@@ -419,12 +403,6 @@ void ih264_weighted_pred_luma_sse42(UWORD8 *pu1_src,
{
__m128i y_0_8x16b, y_2_8x16b;
- __m128i mask_full_16x8b, mask_ll4B_16x8b;
-
- mask_full_16x8b = _mm_set1_epi8(0xff);
- mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
- // mask for first four bytes
-
do
{
y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
@@ -455,13 +433,10 @@ void ih264_weighted_pred_luma_sse42(UWORD8 *pu1_src,
y_2_16x8b = _mm_srli_si128(y_0_16x8b, 8);
y_3_16x8b = _mm_srli_si128(y_0_16x8b, 12);
- _mm_maskmoveu_si128(y_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
- _mm_maskmoveu_si128(y_1_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(y_2_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + (dst_strd << 1)));
- _mm_maskmoveu_si128(y_3_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + dst_strd * 3));
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y_0_16x8b);
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y_1_16x8b);
+ *((WORD32 *)(pu1_dst + (dst_strd << 1))) = _mm_cvtsi128_si32(y_2_16x8b);
+ *((WORD32 *)(pu1_dst + dst_strd * 3)) = _mm_cvtsi128_si32(y_3_16x8b);
ht -= 4;
pu1_src += src_strd << 2;
@@ -660,12 +635,6 @@ void ih264_weighted_pred_chroma_sse42(UWORD8 *pu1_src,
{
__m128i y_0_8x16b;
- __m128i mask_full_16x8b, mask_ll4B_16x8b;
-
- mask_full_16x8b = _mm_set1_epi8(0xff);
- mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
- // mask for first four bytes
-
do
{
y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
@@ -686,9 +655,8 @@ void ih264_weighted_pred_chroma_sse42(UWORD8 *pu1_src,
y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_0_8x16b);
y_1_16x8b = _mm_srli_si128(y_0_16x8b, 4);
- _mm_maskmoveu_si128(y_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
- _mm_maskmoveu_si128(y_1_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y_0_16x8b);
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y_1_16x8b);
ht -= 2;
pu1_src += src_strd << 1;
@@ -890,12 +858,6 @@ void ih264_weighted_bi_pred_luma_sse42(UWORD8 *pu1_src1,
__m128i y1_0_8x16b, y1_2_8x16b;
__m128i y2_0_8x16b, y2_2_8x16b;
- __m128i mask_ll4B_16x8b;
-
- mask_ll4B_16x8b = _mm_set1_epi8(0xff);
- mask_ll4B_16x8b = _mm_srli_si128(mask_ll4B_16x8b, 12);
- // mask for first four bytes
-
do
{
y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
@@ -942,13 +904,11 @@ void ih264_weighted_bi_pred_luma_sse42(UWORD8 *pu1_src1,
y1_2_16x8b = _mm_srli_si128(y1_0_16x8b, 8);
y1_3_16x8b = _mm_srli_si128(y1_0_16x8b, 12);
- _mm_maskmoveu_si128(y1_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
- _mm_maskmoveu_si128(y1_1_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + dst_strd));
- _mm_maskmoveu_si128(y1_2_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + (dst_strd << 1)));
- _mm_maskmoveu_si128(y1_3_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + dst_strd * 3));
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y1_0_16x8b);
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y1_1_16x8b);
+ *((WORD32 *)(pu1_dst + (dst_strd << 1))) = _mm_cvtsi128_si32(y1_2_16x8b);
+ *((WORD32 *)(pu1_dst + dst_strd * 3)) = _mm_cvtsi128_si32(y1_3_16x8b);
+
ht -= 4;
pu1_src1 += src_strd1 << 2;
@@ -1187,11 +1147,6 @@ void ih264_weighted_bi_pred_chroma_sse42(UWORD8 *pu1_src1,
{
__m128i y1_0_8x16b, y2_0_8x16b;
- __m128i mask_full_16x8b, mask_ll4B_16x8b;
-
- mask_full_16x8b = _mm_set1_epi8(0xff);
- mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
-
do
{
y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
@@ -1218,9 +1173,8 @@ void ih264_weighted_bi_pred_chroma_sse42(UWORD8 *pu1_src1,
y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_0_8x16b);
y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 4);
- _mm_maskmoveu_si128(y1_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
- _mm_maskmoveu_si128(y1_1_16x8b, mask_ll4B_16x8b,
- (char*)(pu1_dst + dst_strd));
+ *((WORD32 *)(pu1_dst)) = _mm_cvtsi128_si32(y1_0_16x8b);
+ *((WORD32 *)(pu1_dst + dst_strd)) = _mm_cvtsi128_si32(y1_1_16x8b);
ht -= 2;
pu1_src1 += src_strd1 << 1;
diff --git a/decoder.arm.mk b/decoder.arm.mk
index 556e838..e5ac2d4 100644
--- a/decoder.arm.mk
+++ b/decoder.arm.mk
@@ -2,7 +2,7 @@ libavcd_inc_dir_arm += $(LOCAL_PATH)/decoder/arm
libavcd_inc_dir_arm += $(LOCAL_PATH)/common/arm
libavcd_srcs_c_arm += decoder/arm/ih264d_function_selector.c
-libavcd_cflags_arm += -DDISABLE_NEONINTR -DARM -DARMGCC
+libavcd_cflags_arm += -DARM
#LOCAL_ARM_MODE := arm
@@ -43,7 +43,3 @@ libavcd_srcs_asm_arm += common/arm/ih264_arm_memory_barrier.s
LOCAL_SRC_FILES_arm += $(libavcd_srcs_c_arm) $(libavcd_srcs_asm_arm)
LOCAL_C_INCLUDES_arm += $(libavcd_inc_dir_arm)
LOCAL_CFLAGS_arm += $(libavcd_cflags_arm)
-
-# CLANG WORKAROUNDS
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
-LOCAL_CLANG_ASFLAGS_arm += $(addprefix -Wa$(comma)-I,$(libavcd_inc_dir_arm))
diff --git a/decoder.arm64.mk b/decoder.arm64.mk
index 423c34d..2140b94 100644
--- a/decoder.arm64.mk
+++ b/decoder.arm64.mk
@@ -1,5 +1,5 @@
libavcd_cflags_arm64 += -DARMV8
-libavcd_cflags_arm64 += -DDISABLE_NEONINTR -DARM -DARMGCC
+libavcd_cflags_arm64 += -DARM
libavcd_inc_dir_arm64 += $(LOCAL_PATH)/decoder/arm
libavcd_inc_dir_arm64 += $(LOCAL_PATH)/common/armv8
@@ -46,5 +46,4 @@ LOCAL_C_INCLUDES_arm64 += $(libavcd_inc_dir_arm64)
LOCAL_CFLAGS_arm64 += $(libavcd_cflags_arm64)
# CLANG WORKAROUNDS
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
LOCAL_CLANG_ASFLAGS_arm64 += $(addprefix -Wa$(comma)-I,$(libavcd_inc_dir_arm64))
diff --git a/decoder.mk b/decoder.mk
index 7df8d17..8b9bd55 100644
--- a/decoder.mk
+++ b/decoder.mk
@@ -9,8 +9,8 @@ LOCAL_MODULE := libavcdec
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-LOCAL_CFLAGS += -D_LIB -DMULTICORE -fPIC -UAPPLY_CONCEALMENT -UINSERT_LOGO -DTHREAD_QUAD_CORE
-LOCAL_CFLAGS += -O3 -DANDROID
+LOCAL_CFLAGS += -fPIC
+LOCAL_CFLAGS += -O3
LOCAL_C_INCLUDES := $(LOCAL_PATH)/decoder $(LOCAL_PATH)/common
diff --git a/decoder.x86.mk b/decoder.x86.mk
index 309bc23..e7a4686 100644
--- a/decoder.x86.mk
+++ b/decoder.x86.mk
@@ -1,4 +1,4 @@
-libavcd_cflags_x86 += -DX86 -DDISABLE_AVX2 -m32 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
+libavcd_cflags_x86 += -DX86 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
libavcd_inc_dir_x86 += $(LOCAL_PATH)/decoder/x86
libavcd_inc_dir_x86 += $(LOCAL_PATH)/common/x86
diff --git a/decoder.x86_64.mk b/decoder.x86_64.mk
index 1b018f7..b265f4f 100644
--- a/decoder.x86_64.mk
+++ b/decoder.x86_64.mk
@@ -1,5 +1,4 @@
-libavcd_cflags_x86_64 += -DX86 -DDISABLE_AVX2 -m64 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
-libavcd_cflags_x86_64 += -UAPPLY_CONCEALMENT -ULOGO_EN -DTHREAD_QUAD_CORE
+libavcd_cflags_x86_64 += -DX86 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
libavcd_inc_dir_x86_64 += $(LOCAL_PATH)/decoder/x86
libavcd_inc_dir_x86_64 += $(LOCAL_PATH)/common/x86
diff --git a/decoder/ih264d_api.c b/decoder/ih264d_api.c
index 18e4c2e..6ea75c6 100644
--- a/decoder/ih264d_api.c
+++ b/decoder/ih264d_api.c
@@ -107,18 +107,9 @@
#define CODEC_VENDOR "ITTIAM"
#define MAXVERSION_STRLEN 511
#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor) \
- strncpy(version_string,"@(#)Id:", MAXVERSION_STRLEN); \
- strncat(version_string,codec_name, MAXVERSION_STRLEN); \
- strncat(version_string,"_", MAXVERSION_STRLEN); \
- strncat(version_string,codec_release_type, MAXVERSION_STRLEN); \
- strncat(version_string," Ver:", MAXVERSION_STRLEN); \
- strncat(version_string,codec_release_ver, MAXVERSION_STRLEN); \
- strncat(version_string," Released by ", MAXVERSION_STRLEN); \
- strncat(version_string,codec_vendor, MAXVERSION_STRLEN); \
- strncat(version_string," Build: ", MAXVERSION_STRLEN); \
- strncat(version_string,__DATE__, MAXVERSION_STRLEN); \
- strncat(version_string," @ ", MAXVERSION_STRLEN); \
- strncat(version_string,__TIME__, MAXVERSION_STRLEN);
+ snprintf(version_string, MAXVERSION_STRLEN, \
+ "@(#)Id:%s_%s Ver:%s Released by %s Build: %s @ %s", \
+ codec_name, codec_release_type, codec_release_ver, codec_vendor, __DATE__, __TIME__)
#define MAX_NAL_UNIT_SIZE MAX((H264_MAX_FRAME_HEIGHT * H264_MAX_FRAME_HEIGHT),MIN_NALUNIT_SIZE)
#define MIN_NALUNIT_SIZE 200000
@@ -1501,7 +1492,6 @@ void ih264d_init_decoder(void * ps_dec_params)
ps_dec->u2_mbx = 0xffff;
ps_dec->u2_mby = 0;
ps_dec->u2_total_mbs_coded = 0;
- ps_cur_slice->u1_end_of_frame_signal = 0;
/* POC initializations */
ps_prev_poc = &ps_dec->s_prev_pic_poc;
@@ -2441,9 +2431,9 @@ WORD32 ih264d_init(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
{
ih264d_init_ip_t *ps_init_ip;
ih264d_init_op_t *ps_init_op;
+ WORD32 init_status = IV_SUCCESS;
ps_init_ip = (ih264d_init_ip_t *)pv_api_ip;
ps_init_op = (ih264d_init_op_t *)pv_api_op;
- WORD32 init_status = IV_SUCCESS;
init_status = ih264d_init_video_decoder(dec_hdl, ps_init_ip, ps_init_op);
@@ -2602,11 +2592,11 @@ WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
WORD32 ret,api_ret_value = IV_SUCCESS;
WORD32 header_data_left = 0,frame_data_left = 0;
UWORD8 *pu1_bitstrm_buf;
- ithread_set_name((void*)"Parse_thread");
-
-
ivd_video_decode_ip_t *ps_dec_ip;
ivd_video_decode_op_t *ps_dec_op;
+
+ ithread_set_name((void*)"Parse_thread");
+
ps_dec_ip = (ivd_video_decode_ip_t *)pv_api_ip;
ps_dec_op = (ivd_video_decode_op_t *)pv_api_op;
ps_dec->pv_dec_out = ps_dec_op;
@@ -2859,8 +2849,9 @@ WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
ps_dec->u2_cur_slice_num = 0;
ps_dec->cur_dec_mb_num = 0;
ps_dec->cur_recon_mb_num = 0;
- ps_dec->u4_first_slice_in_pic = 1;
+ ps_dec->u4_first_slice_in_pic = 2;
ps_dec->u1_slice_header_done = 0;
+ ps_dec->u1_dangling_field = 0;
ps_dec->u4_dec_thread_created = 0;
ps_dec->u4_bs_deblk_thread_created = 0;
@@ -2914,7 +2905,6 @@ WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
{
ps_dec->u2_total_mbs_coded =
ps_dec->ps_cur_sps->u2_max_mb_addr + 1;
- ps_dec->ps_cur_slice->u1_end_of_frame_signal = 1;
}
/* close deblock thread if it is not closed yet*/
@@ -3029,16 +3019,39 @@ WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
ps_dec_op->u4_error_code = error | ret;
api_ret_value = IV_FAIL;
- if((ret == IVD_RES_CHANGED)||(ret == IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED))
+ if((ret == IVD_RES_CHANGED) || (ret == IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED))
{
/*dont consume the SPS*/
ps_dec_op->u4_num_bytes_consumed -= bytes_consumed;
return IV_FAIL;
}
- if(ret == ERROR_IN_LAST_SLICE_OF_PIC)
+
+ if((ret == IVD_RES_CHANGED) || (ret == IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED))
+ {
+ /*dont consume the SPS*/
+ ps_dec_op->u4_num_bytes_consumed -= bytes_consumed;
+ return IV_FAIL;
+ }
+
+ if((ret == ERROR_UNAVAIL_PICBUF_T) || (ret == ERROR_UNAVAIL_MVBUF_T))
+ {
+ ps_dec_op->u4_num_bytes_consumed -= bytes_consumed;
+ return IV_FAIL;
+ }
+
+ if((ret == ERROR_INCOMPLETE_FRAME) || (ret == ERROR_DANGLING_FIELD_IN_PIC))
{
ps_dec_op->u4_num_bytes_consumed -= bytes_consumed;
+ api_ret_value = IV_FAIL;
+ break;
+ }
+
+ if(ret == ERROR_IN_LAST_SLICE_OF_PIC)
+ {
+ api_ret_value = IV_FAIL;
+ break;
}
+
}
if(ps_dec->u4_return_to_app)
@@ -3079,11 +3092,24 @@ WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
{
// last slice - missing/corruption
WORD32 num_mb_skipped;
+ WORD32 prev_slice_err;
pocstruct_t temp_poc;
num_mb_skipped = (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
- ps_dec->u2_total_mbs_coded;
- ih264d_mark_err_slice_skip(ps_dec, num_mb_skipped, ps_dec->u1_nal_unit_type == IDR_SLICE_NAL,&temp_poc,3);
+
+ if(ps_dec->u4_first_slice_in_pic)
+ prev_slice_err = 1;
+ else
+ prev_slice_err = 2;
+
+ ret = ih264d_mark_err_slice_skip(ps_dec, num_mb_skipped, ps_dec->u1_nal_unit_type == IDR_SLICE_NAL, ps_dec->ps_cur_slice->u2_frame_num,
+ &temp_poc, prev_slice_err);
+
+ if((ret == ERROR_UNAVAIL_PICBUF_T) || (ret == ERROR_UNAVAIL_MVBUF_T))
+ {
+ return IV_FAIL;
+ }
}
@@ -3181,19 +3207,6 @@ WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
* For field pictures, set the bottom and top picture decoded u4_flag correctly.
*/
- if(ps_dec->u4_pic_buf_got == 0)
- {
- ih264d_fill_output_struct_from_context(ps_dec, ps_dec_op);
-
- ps_dec_op->u4_frame_decoded_flag = 0;
- /* close deblock thread if it is not closed yet*/
- if(ps_dec->u4_num_cores == 3)
- {
- ih264d_signal_bs_deblk_thread(ps_dec);
- }
- return (IV_FAIL);
- }
-
if(ps_dec->ps_cur_slice->u1_field_pic_flag)
{
if(1 == ps_dec->ps_cur_slice->u1_bottom_field_flag)
@@ -3206,10 +3219,19 @@ WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
}
}
- /* Calling Function to deblock Picture and Display */
- ret = ih264d_deblock_display(ps_dec);
- if(ret != 0)
- return IV_FAIL;
+ /* if new frame in not found (if we are still getting slices from previous frame)
+ * ih264d_deblock_display is not called. Such frames will not be added to reference /display
+ */
+ if((ps_dec->ps_dec_err_status->u1_err_flag & REJECT_CUR_PIC) == 0)
+ {
+ /* Calling Function to deblock Picture and Display */
+ ret = ih264d_deblock_display(ps_dec);
+ if(ret != 0)
+ {
+ return IV_FAIL;
+ }
+ }
+
/*set to complete ,as we dont support partial frame decode*/
if(ps_dec->i4_header_decoded == 3)
@@ -3334,7 +3356,7 @@ WORD32 ih264d_get_version(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
return (IV_FAIL);
}
- version_string_len = strnlen(version_string, MAXVERSION_STRLEN) + 1;
+ version_string_len = strlen(version_string) + 1;
if(ps_ip->u4_version_buffer_size >= version_string_len) //(WORD32)sizeof(sizeof(version_string)))
{
diff --git a/decoder/ih264d_error_handler.h b/decoder/ih264d_error_handler.h
index 1ff5c7d..5b1bc84 100644
--- a/decoder/ih264d_error_handler.h
+++ b/decoder/ih264d_error_handler.h
@@ -110,14 +110,18 @@ typedef enum
ERROR_LEVEL_UNSUPPORTED = 0x90,
ERROR_START_CODE_NOT_FOUND = 0x91,
ERROR_PIC_NUM_IS_REPEATED = 0x92,
- ERROR_IN_LAST_SLICE_OF_PIC = 0x93
+ ERROR_IN_LAST_SLICE_OF_PIC = 0x93,
+ ERROR_NEW_FRAME_EXPECTED = 0x94,
+ ERROR_INCOMPLETE_FRAME = 0x95
} h264_decoder_error_code_t;
WORD32 ih264d_mark_err_slice_skip(dec_struct_t * ps_dec,
WORD32 num_mb_skip,
UWORD8 u1_is_idr_slice,
+ UWORD16 u2_frame_num,
pocstruct_t *ps_cur_poc,
WORD32 prev_slice_err);
+void ih264d_err_pic_dispbuf_mgr(dec_struct_t *ps_dec);
#endif /* _IH264D_ERROR_HANDLER_H_ */
diff --git a/decoder/ih264d_function_selector.h b/decoder/ih264d_function_selector.h
index 92ad959..22e2efe 100644
--- a/decoder/ih264d_function_selector.h
+++ b/decoder/ih264d_function_selector.h
@@ -65,10 +65,6 @@ void ih264d_init_function_ptr_generic(dec_struct_t *ps_codec);
void ih264d_init_function_ptr_ssse3(dec_struct_t *ps_codec);
void ih264d_init_function_ptr_sse42(dec_struct_t *ps_codec);
-#ifndef DISABLE_AVX2
-void ih264d_init_function_ptr_avx2(dec_struct_t *ps_codec);
-#endif
-
void ih264d_init_function_ptr_a9q(dec_struct_t *ps_codec);
void ih264d_init_function_ptr_av8(dec_struct_t *ps_codec);
diff --git a/decoder/ih264d_parse_headers.c b/decoder/ih264d_parse_headers.c
index f7ae612..743b573 100644
--- a/decoder/ih264d_parse_headers.c
+++ b/decoder/ih264d_parse_headers.c
@@ -545,10 +545,6 @@ WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm)
u1_level_idc = ih264d_get_bits_h264(ps_bitstrm, 8);
- if(ps_dec->u4_level_at_init < u1_level_idc)
- {
- return IH264D_UNSUPPORTED_LEVEL;
- }
COPYTHECONTEXT("SPS: u4_level_idc",u1_level_idc);
@@ -934,6 +930,10 @@ WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm)
ps_dec->u2_disp_width = i4_cropped_wd;
}
+ if(ps_dec->u4_level_at_init < u1_level_idc)
+ {
+ return IH264D_UNSUPPORTED_LEVEL;
+ }
ps_seq->u1_is_valid = TRUE;
@@ -1096,8 +1096,17 @@ WORD32 ih264d_parse_nal_unit(iv_obj_t *dec_hdl,
== IDR_SLICE_NAL),
u1_nal_ref_idc, ps_dec);
+ if((ps_dec->u4_first_slice_in_pic != 0)&&
+ ((ps_dec->ps_dec_err_status->u1_err_flag & REJECT_CUR_PIC) == 0))
+ {
+ /* if the first slice header was not valid set to 1 */
+ ps_dec->u4_first_slice_in_pic = 1;
+ }
+
if(i_status != OK)
+ {
return i_status;
+ }
}
else
{
diff --git a/decoder/ih264d_parse_islice.c b/decoder/ih264d_parse_islice.c
index 534c785..1e4fdfa 100644
--- a/decoder/ih264d_parse_islice.c
+++ b/decoder/ih264d_parse_islice.c
@@ -132,21 +132,23 @@ WORD32 ih264d_parse_imb_cavlc(dec_struct_t * ps_dec,
/*--------------------------------------------------------------------*/
if (!ps_cur_mb_info->u1_tran_form8x8)
{
+ UWORD8 *pu1_temp;
ih264d_read_intra_pred_modes(ps_dec,
((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+16),
ps_cur_mb_info->u1_tran_form8x8);
- UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
+ pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
pu1_temp += 32;
ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
}
else
{
+ UWORD8 *pu1_temp;
ih264d_read_intra_pred_modes(ps_dec,
((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+4),
ps_cur_mb_info->u1_tran_form8x8);
- UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
+ pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
pu1_temp += 8;
ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
}
@@ -403,8 +405,8 @@ WORD32 ih264d_parse_imb_cavlc(dec_struct_t * ps_dec,
(tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
WORD16 *pi2_coeff_block =
(WORD16 *)ps_dec->pv_parse_tu_coeff_data;
- ps_tu_4x4->u2_sig_coeff_map = 0;
UWORD32 u4_num_coeff;
+ ps_tu_4x4->u2_sig_coeff_map = 0;
ret = ps_dec->pf_cavlc_parse4x4coeff[(ui_N > 7)](pi2_dc_coef, 0, ui_N,
ps_dec, &u4_num_coeff);
@@ -542,23 +544,25 @@ WORD32 ih264d_parse_imb_cabac(dec_struct_t * ps_dec,
/*--------------------------------------------------------------------*/
if (!ps_cur_mb_info->u1_tran_form8x8)
{
+ UWORD8 *pu1_temp;
ih264d_read_intra_pred_modes_cabac(
ps_dec,
((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+16),
ps_cur_mb_info->u1_tran_form8x8);
- UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
+ pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
pu1_temp += 32;
ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
}
else
{
+ UWORD8 *pu1_temp;
ih264d_read_intra_pred_modes_cabac(
ps_dec,
((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+4),
ps_cur_mb_info->u1_tran_form8x8);
- UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
+ pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
pu1_temp += 8;
ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
}
diff --git a/decoder/ih264d_parse_pslice.c b/decoder/ih264d_parse_pslice.c
index 02110eb..d56f44e 100644
--- a/decoder/ih264d_parse_pslice.c
+++ b/decoder/ih264d_parse_pslice.c
@@ -1432,6 +1432,7 @@ WORD32 ih264d_parse_inter_slice_data_cavlc(dec_struct_t * ps_dec,
WORD32 ih264d_mark_err_slice_skip(dec_struct_t * ps_dec,
WORD32 num_mb_skip,
UWORD8 u1_is_idr_slice,
+ UWORD16 u2_frame_num,
pocstruct_t *ps_cur_poc,
WORD32 prev_slice_err)
{
@@ -1457,14 +1458,20 @@ WORD32 ih264d_mark_err_slice_skip(dec_struct_t * ps_dec,
UWORD16 u2_total_mbs_coded;
UWORD32 u1_mbaff = ps_slice->u1_mbaff_frame_flag;
parse_part_params_t *ps_part_info;
+ WORD32 ret;
+
+
+ if(ps_dec->ps_dec_err_status->u1_err_flag & REJECT_CUR_PIC)
+ {
+ ih264d_err_pic_dispbuf_mgr(ps_dec);
+ return 0;
+ }
if(prev_slice_err == 1)
{
- // first slice - missing/header corruption
- if(u1_is_idr_slice)
- ps_dec->ps_cur_slice->u2_frame_num = 0;
- else
- ps_dec->ps_cur_slice->u2_frame_num++;
+ /* first slice - missing/header corruption */
+ ps_dec->ps_cur_slice->u2_frame_num = u2_frame_num;
+
if(!ps_dec->u1_first_slice_in_stream)
{
@@ -1482,7 +1489,6 @@ WORD32 ih264d_mark_err_slice_skip(dec_struct_t * ps_dec,
ps_dec->pf_mvpred = ih264d_mvpred_nonmbaff;
ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_bp;
ps_dec->p_motion_compensate = ih264d_motion_compensate_bp;
- ps_dec->ps_pps->ps_sps = ps_dec->ps_cur_sps;
if(ps_dec->ps_cur_pic != NULL)
poc = ps_dec->ps_cur_pic->i4_poc + 2;
@@ -1491,10 +1497,16 @@ WORD32 ih264d_mark_err_slice_skip(dec_struct_t * ps_dec,
for(i = 0; i < MAX_NUM_PIC_PARAMS; i++)
if(ps_dec->ps_pps[i].u1_is_valid == TRUE)
j = i;
+ {
+ ret = ih264d_start_of_pic(ps_dec, poc, ps_cur_poc,
+ ps_dec->ps_cur_slice->u2_frame_num,
+ &ps_dec->ps_pps[j]);
- ih264d_start_of_pic(ps_dec, poc, ps_cur_poc,
- ps_dec->ps_cur_slice->u2_frame_num,
- &ps_dec->ps_pps[j]);
+ if(ret != OK)
+ {
+ return ret;
+ }
+ }
ps_dec->ps_ref_pic_buf_lx[0][0]->u1_pic_buf_id = 0;
@@ -1617,7 +1629,7 @@ WORD32 ih264d_mark_err_slice_skip(dec_struct_t * ps_dec,
>= ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
{
ps_dec->u1_pic_decode_done = 1;
- return 1;
+ return 0;
}
// Inserting new slice
@@ -1685,8 +1697,6 @@ WORD32 ih264d_mark_err_slice_skip(dec_struct_t * ps_dec,
/******************************************************/
/* Parsing / decoding the slice */
/******************************************************/
- ps_dec->u4_first_slice_in_pic = 0;
- ps_dec->u1_first_slice_in_stream = 0;
ps_dec->u1_slice_header_done = 2;
ps_dec->u1_qp = ps_slice->u1_slice_qp;
ih264d_update_qp(ps_dec, 0);
@@ -1823,6 +1833,11 @@ WORD32 ih264d_mark_err_slice_skip(dec_struct_t * ps_dec,
H264_DEC_DEBUG_PRINT("Mbs in slice: %d\n", ps_dec->ps_cur_slice->u4_mbs_in_slice);
ps_dec->u2_cur_slice_num++;
+
+ /* incremented here only if first slice is inserted */
+ if(ps_dec->u4_first_slice_in_pic != 0)
+ ps_dec->ps_parse_cur_slice++;
+
ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx;
ps_dec->i2_prev_slice_mby = ps_dec->u2_mby;
@@ -1830,7 +1845,6 @@ WORD32 ih264d_mark_err_slice_skip(dec_struct_t * ps_dec,
>= ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
{
ps_dec->u1_pic_decode_done = 1;
- return 1;
}
return 0;
diff --git a/decoder/ih264d_parse_slice.c b/decoder/ih264d_parse_slice.c
index b3a7632..eef9db5 100644
--- a/decoder/ih264d_parse_slice.c
+++ b/decoder/ih264d_parse_slice.c
@@ -447,8 +447,8 @@ WORD32 ih264d_start_of_pic(dec_struct_t *ps_dec,
if(!ps_dec->ps_cur_pic)
{
- H264_DEC_DEBUG_PRINT("------- Display Buffers Reset --------\n");
WORD32 j;
+ H264_DEC_DEBUG_PRINT("------- Display Buffers Reset --------\n");
for(j = 0; j < MAX_DISP_BUFS_NEW; j++)
{
@@ -849,13 +849,6 @@ WORD32 ih264d_end_of_pic_dispbuf_mgr(dec_struct_t * ps_dec)
ps_cur_slice->u1_field_pic_flag,
ps_dec->u1_second_field);
}
- {
-
- if(!ps_cur_slice->u1_end_of_frame_signal)
- {
- ps_cur_slice->u1_end_of_frame_signal = 1;
- }
- }
if(!ps_cur_slice->u1_field_pic_flag
|| ((TOP_FIELD_ONLY | BOT_FIELD_ONLY)
@@ -961,7 +954,6 @@ WORD32 ih264d_end_of_pic(dec_struct_t *ps_dec,
dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
WORD32 ret;
- ps_dec->u4_first_slice_in_pic = 1;
ps_dec->u1_first_pb_nal_in_pic = 1;
ps_dec->u2_mbx = 0xffff;
ps_dec->u2_mby = 0;
@@ -969,9 +961,8 @@ WORD32 ih264d_end_of_pic(dec_struct_t *ps_dec,
dec_err_status_t * ps_err = ps_dec->ps_dec_err_status;
if(ps_err->u1_err_flag & REJECT_CUR_PIC)
{
- ps_err->u1_err_flag ^= REJECT_CUR_PIC;
ih264d_err_pic_dispbuf_mgr(ps_dec);
- return OK;
+ return ERROR_NEW_FRAME_EXPECTED;
}
}
@@ -1016,10 +1007,8 @@ WORD32 ih264d_end_of_pic(dec_struct_t *ps_dec,
ps_prev_poc->u1_bot_field = ps_cur_poc->u1_bot_field;
}
}
- if(!ps_cur_slice->u1_end_of_frame_signal)
- {
- return ERROR_END_OF_FRAME_EXPECTED_T;
- } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
+
+ H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
return OK;
}
@@ -1294,6 +1283,22 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
u1_field_pic_flag,
u1_bottom_field_flag);
+ /* since we support only Full frame decode, every new process should
+ * process a new pic
+ */
+ if((ps_dec->u4_first_slice_in_pic == 2) && (i1_is_end_of_poc == 0))
+ {
+ /* if it is the first slice is process call ,it should be a new frame. If it is not
+ * reject current pic and dont add it to dpb
+ */
+ ps_dec->ps_dec_err_status->u1_err_flag |= REJECT_CUR_PIC;
+ i1_is_end_of_poc = 1;
+ }
+ else
+ {
+ /* reset REJECT_CUR_PIC */
+ ps_dec->ps_dec_err_status->u1_err_flag &= MASK_REJECT_CUR_PIC;
+ }
}
/*--------------------------------------------------------------------*/
@@ -1310,6 +1315,7 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
&& ps_dec->u1_top_bottom_decoded
!= (TOP_FIELD_ONLY | BOT_FIELD_ONLY))
{
+ ps_dec->u1_dangling_field = 1;
if(ps_dec->u4_first_slice_in_pic)
{
// first slice - dangling field
@@ -1332,7 +1338,7 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
u1_is_idr_slice = ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL;
}
- else if(ps_dec->u4_first_slice_in_pic)
+ else if(ps_dec->u4_first_slice_in_pic == 2)
{
if(u2_first_mb_in_slice > 0)
{
@@ -1355,10 +1361,25 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
}
else
{
- // last slice - missing/corruption
- prev_slice_err = 2;
- num_mb_skipped = (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
- - ps_dec->u2_total_mbs_coded;
+
+ if(ps_dec->u4_first_slice_in_pic)
+ {
+ /* if valid slice header is not decoded do start of pic processing
+ * since in the current process call, frame num is not updated in the slice structure yet
+ * ih264d_is_end_of_pic is checked with valid frame num of previous process call,
+ * although i1_is_end_of_poc is set there could be more slices in the frame,
+ * so conceal only till cur slice */
+ prev_slice_err = 1;
+ num_mb_skipped = u2_first_mb_in_slice << u1_mbaff;
+ }
+ else
+ {
+ /* since i1_is_end_of_poc is set ,means new frame num is encountered. so conceal the current frame
+ * completely */
+ prev_slice_err = 2;
+ num_mb_skipped = (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
+ - ps_dec->u2_total_mbs_coded;
+ }
ps_cur_poc = &s_tmp_poc;
}
}
@@ -1380,13 +1401,40 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
if(prev_slice_err)
{
- end_of_frame = ih264d_mark_err_slice_skip(ps_dec,num_mb_skipped,u1_is_idr_slice,ps_cur_poc,prev_slice_err);
+ ret = ih264d_mark_err_slice_skip(ps_dec, num_mb_skipped, u1_is_idr_slice, u2_frame_num, ps_cur_poc, prev_slice_err);
+
+ if(ps_dec->u1_dangling_field == 1)
+ {
+ ps_dec->u1_second_field = 1 - ps_dec->u1_second_field;
+ ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag;
+ ps_dec->u2_prv_frame_num = u2_frame_num;
+ ps_dec->u1_first_slice_in_stream = 0;
+ return ERROR_DANGLING_FIELD_IN_PIC;
+ }
- if(end_of_frame)
+ if(prev_slice_err == 2)
{
- // return if all MBs in frame are parsed
+ ps_dec->u1_first_slice_in_stream = 0;
+ return ERROR_INCOMPLETE_FRAME;
+ }
+
+ if(ps_dec->u2_total_mbs_coded
+ >= ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs)
+ {
+ /* return if all MBs in frame are parsed*/
+ ps_dec->u1_first_slice_in_stream = 0;
return ERROR_IN_LAST_SLICE_OF_PIC;
}
+
+ if(ps_dec->ps_dec_err_status->u1_err_flag & REJECT_CUR_PIC)
+ {
+ ih264d_err_pic_dispbuf_mgr(ps_dec);
+ return ERROR_NEW_FRAME_EXPECTED;
+ }
+
+ if(ret != OK)
+ return ret;
+
i1_is_end_of_poc = 0;
}
@@ -1401,13 +1449,6 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
if(!ps_dec->u1_first_slice_in_stream)
{
UWORD8 uc_mbs_exceed = 0;
- /*since we support only Full frame decode, every new process should
- * process a new pic
- */
- if(ps_dec->u4_first_slice_in_pic == 1)
- {
- i1_is_end_of_poc = 1;
- }
if(ps_dec->u2_total_mbs_coded
== (ps_dec->ps_cur_sps->u2_max_mb_addr + 1))
@@ -1446,45 +1487,8 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
}
}
- ps_cur_slice->u1_end_of_frame_signal = 0;
if(u1_field_pic_flag)
{
- /*
- * Check if the frame number has changed.
- */
- H264_DEC_DEBUG_PRINT(
- "u2_frame_num: %d ps_dec->u2_prv_frame_num: %d ps_dec->u1_top_bottom_decoded: %d\n",
- u2_frame_num, ps_dec->u2_prv_frame_num,
- ps_dec->u1_top_bottom_decoded);
- if((u2_frame_num != ps_dec->u2_prv_frame_num)
- && (0 != ps_dec->u1_top_bottom_decoded))
- {
- if((TOP_FIELD_ONLY | BOT_FIELD_ONLY)
- != ps_dec->u1_top_bottom_decoded)
- {
- H264_DEC_DEBUG_PRINT("Dangling Field, toggling second field\n");
- ps_dec->u1_second_field = 1 - ps_dec->u1_second_field;
- ps_dec->u1_dangling_field = 1;
- /*
- * Updating the u1_bottom_field_flag since its used in the concealment function.
- */
- ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag;
- ps_dec->u2_prv_frame_num = u2_frame_num;
-
- ret = ih264d_deblock_display(ps_dec);
- if(ret != OK)
- return ret;
-
- /*
- * The bytes consumed will be handled by the
- * video_decode function after the error is handled.
- */
- return ERROR_DANGLING_FIELD_IN_PIC;
-
- }
-
- }
-
ps_dec->u2_prv_frame_num = u2_frame_num;
}
@@ -1513,7 +1517,7 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
ps_dec->ps_cur_pic->i4_poc = i4_temp_poc;
ps_dec->ps_cur_pic->i4_avg_poc = i4_temp_poc;
}
- if(ps_dec->u4_first_slice_in_pic)
+ if(ps_dec->u4_first_slice_in_pic == 2)
{
ret = ih264d_decode_pic_order_cnt(u1_is_idr_slice, u2_frame_num,
&ps_dec->s_prev_pic_poc,
@@ -1581,11 +1585,14 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
ps_dec->pf_mvpred = ih264d_mvpred_nonmbaff;
}
- if(ps_dec->u4_first_slice_in_pic)
+ if(ps_dec->u4_first_slice_in_pic == 2)
{
- ret = ih264d_start_of_pic(ps_dec, i4_poc, &s_tmp_poc, u2_frame_num, ps_pps);
- if(ret != OK)
- return ret;
+ if(u2_first_mb_in_slice == 0)
+ {
+ ret = ih264d_start_of_pic(ps_dec, i4_poc, &s_tmp_poc, u2_frame_num, ps_pps);
+ if(ret != OK)
+ return ret;
+ }
ps_dec->u4_output_present = 0;
@@ -1898,7 +1905,8 @@ WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
if(ps_dec->u1_slice_header_done)
{
- /*set to zero to indicate a valid slice has been decoded*/
+ /* set to zero to indicate a valid slice has been decoded */
+ /* first slice header successfully decoded */
ps_dec->u4_first_slice_in_pic = 0;
ps_dec->u1_first_slice_in_stream = 0;
}
diff --git a/decoder/ih264d_process_intra_mb.c b/decoder/ih264d_process_intra_mb.c
index d2da005..dde2a7e 100644
--- a/decoder/ih264d_process_intra_mb.c
+++ b/decoder/ih264d_process_intra_mb.c
@@ -924,7 +924,10 @@ WORD32 ih264d_process_intra_mb(dec_struct_t * ps_dec,
(u1_intrapred_mode ^ 2);
if((u1_err_code & u1_packed_modes) ^ u1_err_code)
+ {
+ u1_intrapred_mode = 0;
ps_dec->i4_error_code = ERROR_INTRAPRED;
+ }
}
{
UWORD8 au1_ngbr_pels[33];
@@ -1242,9 +1245,11 @@ WORD32 ih264d_process_intra_mb(dec_struct_t * ps_dec,
{
UWORD8 u1_err_code = pu1_intra_err_codes[i1_intra_pred];
- /*if((u1_err_code & u1_packed_modes) ^ u1_err_code)
+ if((u1_err_code & u1_packed_modes) ^ u1_err_code)
{
- }*/
+ i1_intra_pred = 0;
+ ps_dec->i4_error_code = ERROR_INTRAPRED;
+ }
}
}
@@ -1649,7 +1654,10 @@ WORD32 ih264d_process_intra_mb(dec_struct_t * ps_dec,
UWORD8 u1_err_code = pu1_intra_err_codes[i1_intra_pred];
if((u1_err_code & u1_packed_modes) ^ u1_err_code)
+ {
+ i1_intra_pred = 0;
ps_dec->i4_error_code = ERROR_INTRAPRED;
+ }
}
}
@@ -1761,7 +1769,10 @@ WORD32 ih264d_process_intra_mb(dec_struct_t * ps_dec,
u1_intra_chrom_pred_mode :
(u1_intra_chrom_pred_mode ^ 2);
if((u1_err_code & u1_packed_modes) ^ u1_err_code)
+ {
+ u1_intra_chrom_pred_mode = 0;
ps_dec->i4_error_code = ERROR_INTRAPRED;
+ }
}
/* CHANGED CODE */
@@ -1933,11 +1944,12 @@ WORD32 ih264d_process_intra_mb(dec_struct_t * ps_dec,
UWORD8 *pu1_ngbr_pels = (UWORD8 *)au2_ngbr_pels;
UWORD16 *pu2_left_uv;
UWORD16 *pu2_topleft_uv;
- pu2_topleft_uv = (UWORD16 *)pu1_u_top_left;
- pu2_left_uv = (UWORD16 *)pu1_uleft;
WORD32 use_left1 = (u2_use_left_mb_pack & 0x0ff);
WORD32 use_left2 = (u2_use_left_mb_pack & 0xff00) >> 8;
+ pu2_topleft_uv = (UWORD16 *)pu1_u_top_left;
+ pu2_left_uv = (UWORD16 *)pu1_uleft;
+
/* Get neighbour pixels */
/* left pels */
if(u2_use_left_mb_pack)
diff --git a/decoder/ih264d_structs.h b/decoder/ih264d_structs.h
index 4e3f0bb..062747b 100644
--- a/decoder/ih264d_structs.h
+++ b/decoder/ih264d_structs.h
@@ -524,7 +524,6 @@ typedef struct
unsigned. LSB byte : weight and MSB byte: u4_ofst */
UWORD32 u4_wt_ofst_lx[2][MAX_REF_BUFS][3];
void * pv_codec_handle; /* For Error Handling */
- UWORD8 u1_end_of_frame_signal;
/* This is used when reordering is done in Forward or */
/* backward lists. This is because reordering can point */
@@ -607,6 +606,9 @@ typedef struct code_overlay_ctxt
#define REJECT_CUR_PIC (0x01)
#define REJECT_PB_PICS (0x02)
+#define MASK_REJECT_CUR_PIC (0xFE)
+#define MASK_REJECT_PB_PICS (0xFD)
+
#define PIC_TYPE_UNKNOWN (0xFF)
#define PIC_TYPE_I (0x00)
#define SYNC_FRM_DEFAULT (0xFFFFFFFF)
@@ -1351,6 +1353,7 @@ typedef struct _DecStruct
UWORD32 u4_cur_slice_decode_done;
UWORD32 u4_extra_mem_used;
+ /* 2 first slice not parsed , 1 :first slice parsed , 0 :first valid slice header parsed*/
UWORD32 u4_first_slice_in_pic;
UWORD32 u4_num_cores;
IVD_ARCH_T e_processor_arch;
diff --git a/decoder/ih264d_thread_parse_decode.c b/decoder/ih264d_thread_parse_decode.c
index 910183c..f3da270 100644
--- a/decoder/ih264d_thread_parse_decode.c
+++ b/decoder/ih264d_thread_parse_decode.c
@@ -633,8 +633,6 @@ void ih264d_decode_picture_thread(dec_struct_t *ps_dec )
ps_dec->u4_fmt_conv_num_rows);
ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
}
-
- ithread_exit(0);
}
void ih264d_signal_decode_thread(dec_struct_t *ps_dec)
diff --git a/decoder/ih264d_utils.c b/decoder/ih264d_utils.c
index 31e9532..1581bd6 100644
--- a/decoder/ih264d_utils.c
+++ b/decoder/ih264d_utils.c
@@ -646,8 +646,11 @@ WORD32 ih264d_get_dpb_size(dec_seq_params_t *ps_seq, dec_struct_t *ps_dec)
case 51:
i4_size = 70778880;
break;
+ case 52:
+ i4_size = 70778880;
+ break;
default:
- i4_size = 6912000;
+ i4_size = 70778880;
break;
}
@@ -712,9 +715,12 @@ WORD32 ih264d_get_dpb_size_new(UWORD32 u4_level_idc,
case 51:
i4_size = 70778880;
break;
+ case 52:
+ i4_size = 70778880;
+ break;
default:
{
- return -1;
+ i4_size = 70778880;
}
break;
}
diff --git a/encoder.arm.mk b/encoder.arm.mk
index 874c81c..f06a6d5 100644
--- a/encoder.arm.mk
+++ b/encoder.arm.mk
@@ -1,7 +1,7 @@
libavce_inc_dir_arm += $(LOCAL_PATH)/encoder/arm
libavce_inc_dir_arm += $(LOCAL_PATH)/common/arm
-libavce_cflags_arm += -DDISABLE_NEONINTR -DARM -DARMGCC
+libavce_cflags_arm += -DARM
libavce_srcs_c_arm += encoder/arm/ih264e_function_selector.c
@@ -35,10 +35,8 @@ libavce_srcs_asm_arm += encoder/arm/ih264e_fmt_conv.s
#ME
libavce_srcs_asm_arm += encoder/arm/ime_distortion_metrics_a9q.s
-libavce_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARM_A9Q
-
else #No Neon
-libavce_cflags_arm += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
+libavce_cflags_arm += -DDISABLE_NEON
endif #Neon check
libavce_srcs_asm_arm += common/arm/ih264_arm_memory_barrier.s
@@ -46,7 +44,3 @@ libavce_srcs_asm_arm += common/arm/ih264_arm_memory_barrier.s
LOCAL_SRC_FILES_arm += $(libavce_srcs_c_arm) $(libavce_srcs_asm_arm)
LOCAL_C_INCLUDES_arm += $(libavce_inc_dir_arm)
LOCAL_CFLAGS_arm += $(libavce_cflags_arm)
-
-# CLANG WORKAROUNDS
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
-LOCAL_CLANG_ASFLAGS_arm += $(addprefix -Wa$(comma)-I,$(libavce_inc_dir_arm))
diff --git a/encoder.arm64.mk b/encoder.arm64.mk
index 5d2d045..f95a29f 100644
--- a/encoder.arm64.mk
+++ b/encoder.arm64.mk
@@ -1,5 +1,5 @@
libavce_cflags_arm64 += -DARMV8
-libavce_cflags_arm64 += -DDISABLE_NEONINTR -DARM -DARMGCC
+libavce_cflags_arm64 += -DARM
libavce_inc_dir_arm64 += $(LOCAL_PATH)/encoder/arm
libavce_inc_dir_arm64 += $(LOCAL_PATH)/encoder/armv8
@@ -35,9 +35,8 @@ libavce_srcs_asm_arm64 += encoder/armv8/ih264e_half_pel_av8.s
#ME
libavce_srcs_asm_arm64 += encoder/armv8/ime_distortion_metrics_av8.s
-libavce_cflags_arm64 += -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC
else
-libavce_cflags_arm64 += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
+libavce_cflags_arm64 += -DDISABLE_NEON
endif
@@ -48,5 +47,4 @@ LOCAL_C_INCLUDES_arm64 += $(libavce_inc_dir_arm64)
LOCAL_CFLAGS_arm64 += $(libavce_cflags_arm64)
# CLANG WORKAROUNDS
-LOCAL_CLANG_ASFLAGS_arm64 += -no-integrated-as
LOCAL_CLANG_ASFLAGS_arm64 += $(addprefix -Wa$(comma)-I,$(libavce_inc_dir_arm64))
diff --git a/encoder.mk b/encoder.mk
index 5829118..7efcda2 100644
--- a/encoder.mk
+++ b/encoder.mk
@@ -9,8 +9,8 @@ LOCAL_MODULE := libavcenc
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-LOCAL_CFLAGS += -D_LIB -DMULTICORE -DANDROID -DNDEBUG -UHP_PL -DN_MB_ENABLE -URC_FIXED_POINT -fPIC
-LOCAL_CFLAGS += -O3 -DANDROID
+LOCAL_CFLAGS += -DNDEBUG -UHP_PL -DN_MB_ENABLE -fPIC
+LOCAL_CFLAGS += -O3
LOCAL_C_INCLUDES := $(LOCAL_PATH)/encoder $(LOCAL_PATH)/common
@@ -53,6 +53,9 @@ libavce_srcs_c += encoder/ih264e_utils.c
libavce_srcs_c += encoder/ih264e_version.c
libavce_srcs_c += encoder/ih264e_bitstream.c
libavce_srcs_c += encoder/ih264e_cavlc.c
+libavce_srcs_c += encoder/ih264e_cabac_init.c
+libavce_srcs_c += encoder/ih264e_cabac.c
+libavce_srcs_c += encoder/ih264e_cabac_encode.c
libavce_srcs_c += encoder/ih264e_encode_header.c
libavce_srcs_c += encoder/ih264e_function_selector_generic.c
libavce_srcs_c += encoder/ih264e_fmt_conv.c
diff --git a/encoder.x86.mk b/encoder.x86.mk
index e9b6a5f..f1e2ffa 100644
--- a/encoder.x86.mk
+++ b/encoder.x86.mk
@@ -1,4 +1,4 @@
-libavce_cflags_x86 += -DX86 -DDISABLE_AVX2 -m32 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
+libavce_cflags_x86 += -DX86 -msse4.2 -mno-avx
libavce_inc_dir_x86 += $(LOCAL_PATH)/encoder/x86
libavce_inc_dir_x86 += $(LOCAL_PATH)/common/x86
diff --git a/encoder.x86_64.mk b/encoder.x86_64.mk
index deb004b..14205a3 100644
--- a/encoder.x86_64.mk
+++ b/encoder.x86_64.mk
@@ -1,4 +1,4 @@
-libavce_cflags_x86_64 += -DX86 -DDISABLE_AVX2 -m64 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
+libavce_cflags_x86_64 += -DX86 -msse4.2 -mno-avx
libavce_inc_dir_x86_64 += $(LOCAL_PATH)/encoder/x86
libavce_inc_dir_x86_64 += $(LOCAL_PATH)/common/x86
diff --git a/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
index fe0ce17..9f5bfa9 100644
--- a/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
+++ b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
@@ -17,7 +17,6 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
@/**
@******************************************************************************
@@ -102,11 +101,11 @@ ih264e_evaluate_intra16x16_modes_a9q:
vld1.32 {q5}, [r1]!
mov r11, #0
mov r4, #0
- @/* Left available ????
+ @/* Left available ???? */
ands r7, r5, #01
movne r10, #1
- @/* Top available ????
+ @/* Top available ???? */
ands r8, r5, #04
lsl r9, r10, #3
movne r11, #1
@@ -114,7 +113,7 @@ ih264e_evaluate_intra16x16_modes_a9q:
adds r8, r9, r12
- @/* None available :(
+ @/* None available :( */
moveq r4, #128
diff --git a/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
index 568e623..6137054 100644
--- a/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
+++ b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
@@ -17,9 +17,7 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
-@/**
.data
.p2align 2
@@ -39,7 +37,6 @@ scratch_intrapred_luma_4x4_prediction_addr1:
@/**
-@/**
@******************************************************************************
@*
@* @brief :Evaluate best intra 4x4 mode
diff --git a/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
index e4dfca8..bdbaa02 100644
--- a/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
+++ b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
@@ -17,7 +17,6 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
@/**
@******************************************************************************
diff --git a/encoder/arm/ih264e_fmt_conv.s b/encoder/arm/ih264e_fmt_conv.s
index 2c04141..f8f5e42 100644
--- a/encoder/arm/ih264e_fmt_conv.s
+++ b/encoder/arm/ih264e_fmt_conv.s
@@ -17,11 +17,9 @@
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
-@/**
.text
.p2align 2
-@/**
@/*****************************************************************************
@* *
@@ -268,7 +266,6 @@ ih264e_fmt_conv_422i_to_420sp_a9q:
@ SUB r10,r10,r7,ASR #1 ;// u2_offset3 = u4_stride_v - u4_width >> 1
mov r14, r14, lsl #1 @// u2_offset_yuv422i = u2_offset_yuv422i * 2
- mov r7, r7, asr #4 @// u4_width = u4_width / 16 (u4_width >> 4)
mov r11, r11, asr #1 @// u4_width = u4_width / 2 (u4_width >> 1)
add r4, r12, r4 @// u2_offset1 = u2_offset1 + u4_stride_y
@@ -288,14 +285,14 @@ ih264e_fmt_conv_422i_to_420sp_a9q:
@// u4_width / 16 - r7
@// u4_height / 2 - r11
@// inner loop count - r12
-yuv420_to_yuv422i_hight_loop:
+yuv422i_to_420sp_height_loop:
mov r12, r7 @// Inner loop count = u4_width / 16
-yuv420_to_yuv422i_width_loop:
+yuv422i_to_420sp_width_loop:
vld4.8 {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1
vld4.8 {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2
- subs r12, r12, #1
+ sub r12, r12, #16
vrhadd.u8 d0, d0, d4
vrhadd.u8 d2, d2, d6
@@ -305,8 +302,34 @@ yuv420_to_yuv422i_width_loop:
vst2.8 {d0, d2}, [r1]! @// Store the 8 elements of row1/2 U
- bgt yuv420_to_yuv422i_width_loop
+ cmp r12, #15
+ bgt yuv422i_to_420sp_width_loop
+ cmp r12, #0
+ beq yuv422i_to_420sp_row_loop_end
+ @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+ @//Ex if width is 162, above loop will process 160 pixels. And
+ @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+ @// and written using VLD1 and VST1
+ rsb r12, r12, #16
+ sub r3, r3, r12, lsl #1
+ sub r8, r8, r12, lsl #1
+ sub r0, r0, r12
+ sub r6, r6, r12
+ sub r1, r1, r12
+
+ vld4.8 {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1
+ vld4.8 {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2
+
+ vrhadd.u8 d0, d0, d4
+ vrhadd.u8 d2, d2, d6
+
+ vst2.8 {d1, d3}, [r0]! @// Store the 16 elements of row1 Y
+ vst2.8 {d5, d7}, [r6]! @// Store the 16 elements of row2 Y
+
+ vst2.8 {d0, d2}, [r1]! @// Store the 8 elements of row1/2 U
+
+yuv422i_to_420sp_row_loop_end:
@// Update the buffer pointer so that they will refer to next pair of rows
add r0, r0, r4 @// pu1_y = pu1_y + u2_offset1
add r6, r6, r4 @// pu1_y_nxt_row = pu1_y_nxt_row + u2_offset1
@@ -317,7 +340,7 @@ yuv420_to_yuv422i_width_loop:
add r3, r3, r5 @// pu2_yuv422i = pu2_yuv422i + u2_offset_yuv422i
add r8, r8, r5 @// pu2_yuv422i_nxt_row = pu2_yuv422i_nxt_row + u2_offset_yuv422i
- bgt yuv420_to_yuv422i_hight_loop
+ bgt yuv422i_to_420sp_height_loop
ldmfd sp!, {r4-r12, pc} @// Restore the register which are used
diff --git a/encoder/arm/ih264e_function_selector.c b/encoder/arm/ih264e_function_selector.c
index e4f67a0..0486200 100644
--- a/encoder/arm/ih264e_function_selector.c
+++ b/encoder/arm/ih264e_function_selector.c
@@ -58,8 +58,8 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -68,14 +68,15 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
-
+#include "ih264_cabac_tables.h"
#include "ih264_macros.h"
#include "ih264_platform_macros.h"
-#include "ih264e_defs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
#include "ih264e_platform_macros.h"
/**
diff --git a/encoder/arm/ih264e_function_selector_a9q.c b/encoder/arm/ih264e_function_selector_a9q.c
index 8b2879b..30d7795 100644
--- a/encoder/arm/ih264e_function_selector_a9q.c
+++ b/encoder/arm/ih264e_function_selector_a9q.c
@@ -58,8 +58,8 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -68,23 +68,18 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
-
+#include "ih264_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_platform_macros.h"
-#include "ih264_intra_pred_filters.h"
-#include "ih264_trans_quant_itrans_iquant.h"
-#include "ih264e_defs.h"
-#include "ih264e_structs.h"
-#include "ih264_deblk_edge_filters.h"
+#include "ih264e_cabac.h"
#include "ih264e_core_coding.h"
#include "ih264_cavlc_tables.h"
#include "ih264e_cavlc.h"
-#include "ih264_padding.h"
#include "ih264e_intra_modes_eval.h"
-#include "ih264_mem_fns.h"
#include "ih264e_fmt_conv.h"
#include "ih264e_half_pel.h"
@@ -109,144 +104,144 @@
void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec)
{
WORD32 i= 0;
-
- /* curr proc ctxt */
- process_ctxt_t *ps_proc = NULL;
- me_ctxt_t *ps_me_ctxt = NULL;
-
- /* Init function pointers for intra pred leaf level functions luma
- * Intra 16x16 */
- ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q;
- ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q;
- ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q;
- ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q;
-
- /* Init function pointers for intra pred leaf level functions luma
- * Intra 4x4 */
- ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q;
- ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q;
- ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q;
- ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
- ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
- ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
- ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
- ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
- ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
-
- /* Init function pointers for intra pred leaf level functions luma
- * Intra 8x8 */
- ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q;
- ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q;
- ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
- ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
- ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
- ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
- ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
- ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
-
- /* Init function pointers for intra pred leaf level functions chroma
- * Intra 8x8 */
- ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q;
- ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q;
- ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q;
- ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q;
-
- /* Init forward transform fn ptr */
- ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8;
- ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_a9;
- ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_a9;
- ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_a9;
- ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_a9;
-
- /* Init inverse transform fn ptr */
- ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8;
- ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_a9;
- ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_a9;
- ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_a9;
- ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_a9;
- ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9;
- ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9;
- ps_codec->pf_interleave_copy = ih264_interleave_copy_a9;
-
- /* Init fn ptr luma core coding */
- ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
- ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
- ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
-
- /* Init fn ptr chroma core coding */
- ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
- ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
-
- /* Init fn ptr luma deblocking */
- ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9;
- ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9;
- ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9;
- ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9;
-
- /* Init fn ptr chroma deblocking */
- ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9;
- ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9;
- ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9;
- ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9;
-
- /* write mb syntax layer */
- ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
- ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
-
- /* Padding Functions */
- ps_codec->pf_pad_top = ih264_pad_top_a9q;
- ps_codec->pf_pad_bottom = ih264_pad_bottom;
- ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q;
- ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q;
- ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q;
- ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q;
-
- /* Inter pred leaf level functions */
- ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q;
- ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q;
- ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q;
- ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q;
- ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q;
-
- /* sad me level functions */
- ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
- ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
- ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
-
- /* memor handling operations */
- ps_codec->pf_mem_cpy = ih264_memcpy_a9q;
- ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q;
- ps_codec->pf_mem_set = ih264_memset_a9q;
- ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_a9q;
-
- /* sad me level functions */
- for(i = 0; i < (MAX_PROCESS_CTXT); i++)
- {
- ps_proc = &ps_codec->as_process[i];
- ps_me_ctxt = &ps_proc->s_me_ctxt;
- ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
- ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
- ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
- ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q;
- ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q;
- ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q;
- ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q;
- ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q;
- }
-
- /* intra mode eval -encoder level function */
- ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q;
- ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q;
- ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q;
-
- /* csc */
- ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp_a9q;
- ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp_a9q;
-
- /* Halp pel generation function - encoder level*/
- ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_a9q;
- ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_a9q;
-
- return ;
+ /* curr proc ctxt */
+ process_ctxt_t *ps_proc = NULL;
+ me_ctxt_t *ps_me_ctxt = NULL;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q;
+ ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q;
+ ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q;
+ ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q;
+ ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q;
+ ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q;
+ ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
+ ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
+ ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
+ ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
+ ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
+ ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q;
+ ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q;
+ ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
+ ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
+ ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
+ ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
+ ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
+ ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q;
+ ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q;
+ ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q;
+ ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q;
+
+ /* Init forward transform fn ptr */
+ ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8;
+ ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_a9;
+ ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_a9;
+ ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_a9;
+ ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_a9;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8;
+ ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_a9;
+ ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_a9;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_a9;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_a9;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9;
+ ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9;
+ ps_codec->pf_interleave_copy = ih264_interleave_copy_a9;
+
+ /* Init fn ptr luma core coding */
+ ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+ ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+ ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+ /* Init fn ptr chroma core coding */
+ ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+ ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9;
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9;
+
+ /* write mb syntax layer */
+ ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = ih264e_write_islice_mb_cavlc;
+ ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = ih264e_write_pslice_mb_cavlc;
+ ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = ih264e_write_islice_mb_cabac;
+ ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = ih264e_write_pslice_mb_cabac;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top_a9q;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q;
+
+ /* Inter pred leaf level functions */
+ ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q;
+ ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q;
+ ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q;
+ ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q;
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q;
+
+ /* sad me level functions */
+ ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+ ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+ ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+
+ /* memor handling operations */
+ ps_codec->pf_mem_cpy = ih264_memcpy_a9q;
+ ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q;
+ ps_codec->pf_mem_set = ih264_memset_a9q;
+ ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_a9q;
+
+ /* sad me level functions */
+ for (i = 0; i < (MAX_PROCESS_CTXT); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+ ps_me_ctxt = &ps_proc->s_me_ctxt;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+ ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+ ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q;
+ ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q;
+ ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q;
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q;
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q;
}
+ /* intra mode eval -encoder level function */
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q;
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q;
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q;
+
+ /* csc */
+ ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp_a9q;
+ ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp_a9q;
+
+ /* Halp pel generation function - encoder level */
+ ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_a9q;
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_a9q;
+
+}
+
diff --git a/encoder/arm/ih264e_function_selector_av8.c b/encoder/arm/ih264e_function_selector_av8.c
index 173c2d5..1679af3 100644
--- a/encoder/arm/ih264e_function_selector_av8.c
+++ b/encoder/arm/ih264e_function_selector_av8.c
@@ -62,8 +62,8 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -72,23 +72,18 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
-
+#include "ih264_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_platform_macros.h"
-#include "ih264_intra_pred_filters.h"
-#include "ih264_trans_quant_itrans_iquant.h"
-#include "ih264e_defs.h"
-#include "ih264e_structs.h"
-#include "ih264_deblk_edge_filters.h"
+#include "ih264e_cabac.h"
#include "ih264e_core_coding.h"
#include "ih264_cavlc_tables.h"
#include "ih264e_cavlc.h"
-#include "ih264_padding.h"
#include "ih264e_intra_modes_eval.h"
-#include "ih264_mem_fns.h"
#include "ih264e_fmt_conv.h"
#include "ih264e_half_pel.h"
@@ -197,8 +192,12 @@ void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec)
ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8;
/* write mb syntax layer */
- ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
- ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+ /* write mb syntax layer */
+ ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = ih264e_write_islice_mb_cavlc;
+ ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = ih264e_write_pslice_mb_cavlc;
+ ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = ih264e_write_bslice_mb_cavlc;
+ ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = ih264e_write_islice_mb_cabac;
+ ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = ih264e_write_pslice_mb_cabac;
/* Padding Functions */
ps_codec->pf_pad_top = ih264_pad_top_av8;
diff --git a/encoder/arm/ih264e_half_pel.s b/encoder/arm/ih264e_half_pel.s
index 1b9a87a..3ae6130 100644
--- a/encoder/arm/ih264e_half_pel.s
+++ b/encoder/arm/ih264e_half_pel.s
@@ -43,7 +43,6 @@
.text
.p2align 2
-@ /**
@/*******************************************************************************
@*
@* @brief
diff --git a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
index e768c21..df06d41 100644
--- a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
+++ b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
@@ -17,7 +17,6 @@
//*****************************************************************************
//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
-///**
///**
//******************************************************************************
@@ -97,7 +96,6 @@ ih264e_evaluate_intra16x16_modes_av8:
ldr x16, [sp, #80]
mov x17, x4
- mov x18, x5
mov x14, x6
mov x15, x7
@@ -503,9 +501,9 @@ sad_comp:
///----------------------
//DO VERTICAL PREDICTION
- str x8 , [x7] //MIN SAD
- mov x8, #0
- str x8 , [x6] // MODE
+ str w8 , [x7] //MIN SAD
+ mov w8, #0
+ str w8 , [x6] // MODE
add x6, x1, #17
ld1 {v30.16b}, [x6]
b do_dc_vert
@@ -515,9 +513,9 @@ not_vert: cmp x9, x10
///----------------------
//DO HORIZONTAL
- str x9 , [x7] //MIN SAD
- mov x9, #1
- str x9 , [x6] // MODE
+ str w9 , [x7] //MIN SAD
+ mov w9, #1
+ str w9 , [x6] // MODE
ld1 {v0.16b}, [x1]
dup v10.16b, v0.b[15]
@@ -562,9 +560,9 @@ not_vert: cmp x9, x10
do_dc: ///---------------------------------
//DO DC
- str x10 , [x7] //MIN SAD
- mov x10, #2
- str x10 , [x6] // MODE
+ str w10 , [x7] //MIN SAD
+ mov w10, #2
+ str w10 , [x6] // MODE
do_dc_vert:
st1 {v30.4s}, [x2], x4 //0
st1 {v30.4s}, [x2], x4 //1
diff --git a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
index b02afd1..bb2526d 100644
--- a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
+++ b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
@@ -17,7 +17,6 @@
//*****************************************************************************
//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
-///**
///**
//******************************************************************************
@@ -401,10 +400,10 @@ sad_comp:
///----------------------
//DO DC PREDICTION
- str x10 , [x7] //MIN SAD
+ str w10 , [x7] //MIN SAD
- mov x10, #0
- str x10 , [x6] // MODE
+ mov w10, #0
+ str w10 , [x6] // MODE
b do_dc_vert
//-----------------------------
@@ -414,10 +413,10 @@ not_dc:
bgt do_vert
///----------------------
//DO HORIZONTAL
- str x9 , [x7] //MIN SAD
+ str w9 , [x7] //MIN SAD
- mov x10, #1
- str x10 , [x6] // MODE
+ mov w10, #1
+ str w10 , [x6] // MODE
ld1 {v0.8h}, [x1]
dup v10.8h, v0.h[7]
@@ -441,9 +440,9 @@ not_dc:
do_vert:
//DO VERTICAL PREDICTION
- str x8 , [x7] //MIN SAD
- mov x8, #2
- str x8 , [x6] // MODE
+ str w8 , [x7] //MIN SAD
+ mov w8, #2
+ str w8 , [x6] // MODE
add x6, x1, #18
ld1 {v28.8b, v29.8b}, [x6] // vertical values
ld1 {v30.8b, v31.8b}, [x6] // vertical values
diff --git a/encoder/armv8/ih264e_half_pel_av8.s b/encoder/armv8/ih264e_half_pel_av8.s
index 817faa6..8f27104 100644
--- a/encoder/armv8/ih264e_half_pel_av8.s
+++ b/encoder/armv8/ih264e_half_pel_av8.s
@@ -44,7 +44,6 @@
.p2align 2
.include "ih264_neon_macros.s"
-// /**
///*******************************************************************************
//*
//* @brief
@@ -280,8 +279,8 @@ ih264e_sixtap_filter_2dvh_vert_av8:
ld1 {v11.8b, v12.8b, v13.8b}, [x0], x3
mov x14, #20
ld1 {v14.8b, v15.8b, v16.8b}, [x0], x3
- mov v0.4h[0], w12
- mov v0.4h[1], w14
+ mov v0.h[0], w12
+ mov v0.h[1], w14
ld1 {v17.8b, v18.8b, v19.8b}, [x0], x3
movi v1.8b, #20
@@ -333,10 +332,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
ext v31.8b, v22.8b , v23.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
- smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
ext v30.8b, v21.8b , v22.8b , #4
sqrshrun v4.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
@@ -344,10 +343,10 @@ filter_2dvh_loop:
ext v28.8b, v21.8b , v22.8b , #2
saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
- smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
- smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
- smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
- smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ smlal v20.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
ext v31.8b, v23.8b , v24.8b , #2
mov v21.d[0], v20.d[1]
ext v2.8b, v2.8b , v3.8b , #2
@@ -362,10 +361,10 @@ filter_2dvh_loop:
saddl v2.4s, v31.4h, v22.4h //// a0 + a5 (set3)
ext v28.8b, v22.8b , v23.8b , #2
- smlal v2.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
- smlal v2.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
- smlsl v2.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
- smlsl v2.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ smlal v2.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v2.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v2.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v2.4s, v23.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
ext v31.8b, v24.8b , v25.8b , #2
shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
@@ -376,10 +375,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
ext v28.8b, v23.8b , v24.8b , #2
ext v31.8b, v25.8b , v25.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
- smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
ext v30.8b, v24.8b , v25.8b , #4
saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
@@ -389,10 +388,10 @@ filter_2dvh_loop:
shrn v28.4h, v2.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
ld1 {v2.8b, v3.8b, v4.8b}, [x0], x3 //// Load next Row data
- smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
- smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
- smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
- smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ smlal v22.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
mov v20.d[1], v21.d[0]
sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
@@ -449,10 +448,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
ext v31.8b, v22.8b , v23.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
- smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
ext v30.8b, v21.8b , v22.8b , #4
sqrshrun v7.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
@@ -460,10 +459,10 @@ filter_2dvh_loop:
ext v28.8b, v21.8b , v22.8b , #2
saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
- smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
- smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
- smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
- smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ smlal v20.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
ext v31.8b, v23.8b , v24.8b , #2
ext v5.8b, v5.8b , v6.8b , #2
@@ -478,10 +477,10 @@ filter_2dvh_loop:
saddl v6.4s, v31.4h, v22.4h //// a0 + a5 (set3)
ext v28.8b, v22.8b , v23.8b , #2
- smlal v6.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
- smlal v6.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
- smlsl v6.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
- smlsl v6.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ smlal v6.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v6.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v6.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v6.4s, v23.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
ext v31.8b, v24.8b , v25.8b , #2
shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
@@ -492,10 +491,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
ext v28.8b, v23.8b , v24.8b , #2
ext v31.8b, v25.8b , v25.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
- smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
ext v30.8b, v24.8b , v25.8b , #4
saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
@@ -505,10 +504,10 @@ filter_2dvh_loop:
shrn v28.4h, v6.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
ld1 {v5.8b, v6.8b, v7.8b}, [x0], x3 //// Load next Row data
- smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
- smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
- smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
- smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ smlal v22.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
mov v20.d[1], v21.d[0]
sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
@@ -564,10 +563,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
ext v31.8b, v22.8b , v23.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
- smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
ext v30.8b, v21.8b , v22.8b , #4
sqrshrun v10.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
@@ -575,10 +574,10 @@ filter_2dvh_loop:
ext v28.8b, v21.8b , v22.8b , #2
saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
- smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
- smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
- smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
- smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ smlal v20.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
ext v31.8b, v23.8b , v24.8b , #2
ext v8.8b, v8.8b , v9.8b , #2
@@ -593,10 +592,10 @@ filter_2dvh_loop:
saddl v8.4s, v31.4h, v22.4h //// a0 + a5 (set3)
ext v28.8b, v22.8b , v23.8b , #2
- smlal v8.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
- smlal v8.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
- smlsl v8.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
- smlsl v8.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ smlal v8.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v8.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v8.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v8.4s, v23.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
ext v31.8b, v24.8b , v25.8b , #2
shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
@@ -607,10 +606,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
ext v28.8b, v23.8b , v24.8b , #2
ext v31.8b, v25.8b , v25.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
- smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
ext v30.8b, v24.8b , v25.8b , #4
saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
@@ -620,10 +619,10 @@ filter_2dvh_loop:
shrn v28.4h, v8.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
ld1 {v8.8b, v9.8b, v10.8b}, [x0], x3 //// Load next Row data
- smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
- smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
- smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
- smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ smlal v22.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
mov v20.d[1], v21.d[0]
sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
@@ -678,10 +677,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
ext v31.8b, v22.8b , v23.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
- smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
ext v30.8b, v21.8b , v22.8b , #4
sqrshrun v13.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
@@ -689,10 +688,10 @@ filter_2dvh_loop:
ext v28.8b, v21.8b , v22.8b , #2
saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
- smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
- smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
- smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
- smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ smlal v20.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
ext v31.8b, v23.8b , v24.8b , #2
ext v11.8b, v11.8b , v12.8b , #2
@@ -707,10 +706,10 @@ filter_2dvh_loop:
saddl v12.4s, v31.4h, v22.4h //// a0 + a5 (set3)
ext v28.8b, v22.8b , v23.8b , #2
- smlal v12.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
- smlal v12.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
- smlsl v12.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
- smlsl v12.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ smlal v12.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v12.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v12.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v12.4s, v23.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
ext v31.8b, v24.8b , v25.8b , #2
shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
@@ -721,10 +720,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
ext v28.8b, v23.8b , v24.8b , #2
ext v31.8b, v25.8b , v25.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
- smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
ext v30.8b, v24.8b , v25.8b , #4
saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
@@ -734,10 +733,10 @@ filter_2dvh_loop:
shrn v28.4h, v12.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
ld1 {v11.8b, v12.8b, v13.8b}, [x0], x3 //// Load next Row data
- smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
- smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
- smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
- smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ smlal v22.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
mov v20.d[1], v21.d[0]
sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
@@ -792,10 +791,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
ext v31.8b, v22.8b , v23.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
- smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
ext v30.8b, v21.8b , v22.8b , #4
sqrshrun v16.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
@@ -803,10 +802,10 @@ filter_2dvh_loop:
ext v28.8b, v21.8b , v22.8b , #2
saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
- smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
- smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
- smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
- smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ smlal v20.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
ext v31.8b, v23.8b , v24.8b , #2
ext v14.8b, v14.8b , v15.8b , #2
@@ -821,10 +820,10 @@ filter_2dvh_loop:
saddl v14.4s, v31.4h, v22.4h //// a0 + a5 (set3)
ext v28.8b, v22.8b , v23.8b , #2
- smlal v14.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
- smlal v14.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
- smlsl v14.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
- smlsl v14.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ smlal v14.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v14.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v14.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v14.4s, v23.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
ext v31.8b, v24.8b , v25.8b , #2
shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
@@ -835,10 +834,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
ext v28.8b, v23.8b , v24.8b , #2
ext v31.8b, v25.8b , v25.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
- smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
ext v30.8b, v24.8b , v25.8b , #4
saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
@@ -848,10 +847,10 @@ filter_2dvh_loop:
shrn v28.4h, v14.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
ld1 {v14.8b, v15.8b, v16.8b}, [x0], x3 //// Load next Row data
- smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
- smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
- smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
- smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ smlal v22.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
mov v20.d[1], v21.d[0]
sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
@@ -909,10 +908,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
ext v31.8b, v22.8b , v23.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
- smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
ext v30.8b, v21.8b , v22.8b , #4
sqrshrun v19.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
@@ -920,10 +919,10 @@ filter_2dvh_loop:
ext v28.8b, v21.8b , v22.8b , #2
saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
- smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
- smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
- smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
- smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ smlal v20.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
ext v31.8b, v23.8b , v24.8b , #2
ext v17.8b, v17.8b , v18.8b , #2
@@ -938,10 +937,10 @@ filter_2dvh_loop:
saddl v18.4s, v31.4h, v22.4h //// a0 + a5 (set3)
ext v28.8b, v22.8b , v23.8b , #2
- smlal v18.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
- smlal v18.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
- smlsl v18.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
- smlsl v18.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ smlal v18.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v18.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v18.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v18.4s, v23.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
ext v31.8b, v24.8b , v25.8b , #2
shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
@@ -952,10 +951,10 @@ filter_2dvh_loop:
saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
ext v28.8b, v23.8b , v24.8b , #2
ext v31.8b, v25.8b , v25.8b , #2
- smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
- smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
- smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
- smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ smlal v26.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
ext v30.8b, v24.8b , v25.8b , #4
saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
@@ -965,10 +964,10 @@ filter_2dvh_loop:
shrn v28.4h, v18.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
ld1 {v17.8b, v18.8b, v19.8b}, [x0], x3 //// Load next Row data
- smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
- smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
- smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
- smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ smlal v22.4s, v30.4h, v0.h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
mov v20.d[1], v21.d[0]
sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
diff --git a/encoder/ih264e_api.c b/encoder/ih264e_api.c
index 8a478bb..96122de 100644
--- a/encoder/ih264e_api.c
+++ b/encoder/ih264e_api.c
@@ -93,6 +93,7 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ih264_macros.h"
#include "ih264e_defs.h"
#include "ih264e_globals.h"
@@ -109,10 +110,10 @@
#include "ime_defs.h"
#include "ime_distortion_metrics.h"
#include "ime_structs.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_utils.h"
#include "ih264e_core_coding.h"
-#include "ih264_buf_mgr.h"
#include "ih264_platform_macros.h"
#include "ih264e_platform_macros.h"
#include "ih264_list.h"
@@ -399,7 +400,8 @@ static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle,
return (IV_FAIL);
}
- if (ps_ip->s_ive_ip.u4_max_ref_cnt != 1)
+ if (ps_ip->s_ive_ip.u4_max_ref_cnt > MAX_REF_PIC_CNT ||
+ ps_ip->s_ive_ip.u4_max_ref_cnt < MIN_REF_PIC_CNT)
{
ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
@@ -482,7 +484,15 @@ static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle,
return (IV_FAIL);
}
- if (ps_ip->s_ive_ip.u4_max_num_bframes != 0)
+ if (ps_ip->s_ive_ip.u4_num_bframes > MAX_NUM_BFRAMES)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_num_bframes
+ && (ps_ip->s_ive_ip.u4_max_ref_cnt < 2))
{
ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED;
@@ -1472,15 +1482,6 @@ static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle,
return IV_FAIL;
}
- if (ps_ip->s_ive_ip.u4_num_b_frames != 0)
- {
- ps_op->s_ive_op.u4_error_code |= 1
- << IVE_UNSUPPORTEDPARAM;
- ps_op->s_ive_op.u4_error_code |=
- IH264E_BFRAMES_NOT_SUPPORTED;
- return IV_FAIL;
- }
-
break;
}
@@ -1560,7 +1561,11 @@ static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle,
return IV_FAIL;
}
- if ((ps_ip->s_ive_ip.u4_i_qp_min > ps_ip->s_ive_ip.u4_i_qp_max)
+ /* We donot support QP < 4 */
+ if ((ps_ip->s_ive_ip.u4_i_qp_min < 4)
+ || (ps_ip->s_ive_ip.u4_p_qp_min < 4)
+ || (ps_ip->s_ive_ip.u4_b_qp_min < 4)
+ || (ps_ip->s_ive_ip.u4_i_qp_min > ps_ip->s_ive_ip.u4_i_qp_max)
|| (ps_ip->s_ive_ip.u4_p_qp_min > ps_ip->s_ive_ip.u4_p_qp_max)
|| (ps_ip->s_ive_ip.u4_b_qp_min > ps_ip->s_ive_ip.u4_b_qp_max))
{
@@ -1743,7 +1748,8 @@ static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle,
return IV_FAIL;
}
- if (ps_ip->s_ive_ip.e_profile != IV_PROFILE_BASE)
+ if (ps_ip->s_ive_ip.e_profile != IV_PROFILE_BASE &&
+ ps_ip->s_ive_ip.e_profile != IV_PROFILE_MAIN)
{
ps_op->s_ive_op.u4_error_code |= 1
<< IVE_UNSUPPORTEDPARAM;
@@ -1832,7 +1838,6 @@ IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec,
ps_curr_cfg->i4_wd_mbs = ps_curr_cfg->u4_wd >> 4;
ps_curr_cfg->i4_ht_mbs = ps_curr_cfg->u4_ht >> 4;
- ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd;
ps_codec->i4_rec_strd = ALIGN16(ps_cfg->u4_wd) + PAD_WD;
/* If number of MBs in a frame changes the air map also changes.
@@ -1864,7 +1869,7 @@ IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec,
u4_init_rc = 1;
/* when the dimension changes, the header needs to be regenerated */
- ps_codec->i4_header_mode = 1;
+ ps_codec->i4_gen_header = 1;
}
}
else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_FRAMERATE)
@@ -2091,7 +2096,6 @@ IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec,
ps_curr_cfg->u4_idr_frm_interval = ps_cfg->u4_idr_frm_interval;
- ps_curr_cfg->u4_num_b_frames = ps_cfg->u4_num_b_frames;
}
else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DEBLOCK_PARAMS)
{
@@ -2188,6 +2192,7 @@ IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec,
else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_PROFILE_PARAMS)
{
ps_codec->s_cfg.e_profile = ps_cfg->e_profile;
+ ps_codec->s_cfg.u4_entropy_coding_mode = ps_cfg->u4_entropy_coding_mode;
}
else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_NUM_CORES)
{
@@ -2259,8 +2264,9 @@ IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec,
ps_codec->s_cfg.u4_target_bitrate,
ps_codec->s_cfg.u4_max_bitrate,
ps_codec->s_cfg.u4_vbv_buffer_delay,
- ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp,
- H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp,
+ ps_codec->s_cfg.u4_i_frm_interval,
+ ps_codec->s_cfg.u4_num_bframes + 1, au1_init_qp,
+ ps_codec->s_cfg.u4_num_bframes + 2, au1_min_max_qp,
ps_codec->s_cfg.u4_max_level);
}
@@ -2302,7 +2308,7 @@ static WORD32 ih264e_set_default_params(cfg_params_t *ps_cfg)
ps_cfg->e_rc_mode = DEFAULT_RC;
ps_cfg->u4_max_framerate = DEFAULT_MAX_FRAMERATE;
ps_cfg->u4_max_bitrate = DEFAULT_MAX_BITRATE;
- ps_cfg->u4_max_num_bframes = 0;
+ ps_cfg->u4_num_bframes = DEFAULT_MAX_NUM_BFRAMES;
ps_cfg->e_content_type = IV_PROGRESSIVE;
ps_cfg->u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X;
ps_cfg->u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y;
@@ -2350,7 +2356,6 @@ static WORD32 ih264e_set_default_params(cfg_params_t *ps_cfg)
ps_cfg->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y;
ps_cfg->u4_i_frm_interval = DEFAULT_I_INTERVAL;
ps_cfg->u4_idr_frm_interval = DEFAULT_IDR_INTERVAL;
- ps_cfg->u4_num_b_frames = DEFAULT_B_FRAMES;
ps_cfg->u4_disable_deblock_level = DEFAULT_DISABLE_DEBLK_LEVEL;
ps_cfg->e_profile = DEFAULT_PROFILE;
ps_cfg->u4_timestamp_low = 0;
@@ -2396,7 +2401,7 @@ static WORD32 ih264e_init(codec_t *ps_codec)
WORD32 i;
/* coded pic count */
- ps_codec->i4_coded_pic_cnt = 0;
+ ps_codec->i4_poc = 0;
/* Number of API calls to encode are made */
ps_codec->i4_encode_api_call_cnt = -1;
@@ -2422,7 +2427,7 @@ static WORD32 ih264e_init(codec_t *ps_codec)
ps_codec->i4_disable_deblk_pic_cnt = 0;
/* frame num */
- ps_codec->i4_frame_num = -1;
+ ps_codec->i4_frame_num = 0;
/* set the current frame type to I frame, since we are going to start encoding*/
ps_codec->force_curr_frame_type = IV_NA_FRAME;
@@ -2502,7 +2507,7 @@ static WORD32 ih264e_init(codec_t *ps_codec)
{
WORD32 max_mb_rows = ps_cfg->i4_ht_mbs;
- WORD32 num_jobs = max_mb_rows * 2;
+ WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS;
WORD32 clz;
/* Use next power of two number of entries*/
@@ -2619,10 +2624,11 @@ static WORD32 ih264e_init(codec_t *ps_codec)
*/
static WORD32 ih264e_get_num_rec(void *pv_api_ip, void *pv_api_op)
{
- UNUSED(pv_api_ip);
/* api call I/O structures */
ih264e_num_mem_rec_op_t *ps_op = pv_api_op;
+ UNUSED(pv_api_ip);
+
ps_op->s_ive_op.u4_num_mem_rec = MEM_REC_CNT;
return IV_SUCCESS;
@@ -2674,8 +2680,6 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
/* error status */
IV_STATUS_T status = IV_SUCCESS;
- /* profile / level info */
- level = ps_ip->s_ive_ip.u4_max_level;
num_reorder_frames = ps_ip->s_ive_ip.u4_max_reorder_cnt;
num_ref_frames = ps_ip->s_ive_ip.u4_max_ref_cnt;
@@ -2692,6 +2696,9 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
max_mb_cols = max_wd_luma / MB_SIZE;
max_mb_cnt = max_mb_rows * max_mb_cols;
+ /* profile / level info */
+ level = ih264e_get_min_level(max_ht_luma, max_wd_luma);
+
/* validate params */
if ((level < MIN_LEVEL) || (level > MAX_LEVEL))
{
@@ -2739,10 +2746,30 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CODEC, ps_mem_rec->u4_mem_size);
/************************************************************************
+ * Request memory for CABAC context *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CABAC];
+ {
+ ps_mem_rec->u4_mem_size = sizeof(cabac_ctxt_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CABAC, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for CABAC MB info *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CABAC_MB_INFO];
+ {
+ ps_mem_rec->u4_mem_size = ((max_mb_cols + 1) + 1)
+ * sizeof(mb_info_ctxt_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CABAC_MB_INFO, ps_mem_rec->u4_mem_size);
+
+
+ /************************************************************************
* Request memory for entropy context *
* In multi core encoding, each row is assumed to be launched on a *
* thread. The rows below can only start after its neighbors are coded *
- * The status of an mb coded/uncoded is signaled via entropy map. *
+ * The status of an mb coded/uncoded is signaled via entropy map. *
* 1. One word32 to store skip run cnt *
* 2. mb entropy map (mb status entropy coded/uncoded). The size*
* of the entropy map is max mb cols. Further allocate one *
@@ -3042,7 +3069,7 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
{
/* One process job per row of MBs */
/* Allocate for two pictures, so that wrap around can be handled easily */
- WORD32 num_jobs = max_mb_rows * 2;
+ WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS;
WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
@@ -3057,7 +3084,7 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
{
/* One process job per row of MBs */
/* Allocate for two pictures, so that wrap around can be handled easily */
- WORD32 num_jobs = max_mb_rows * 2;
+ WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS;
WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
@@ -3177,6 +3204,7 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH];
{
WORD32 total_size = 0;
+ WORD32 i4_tmp_size;
/* size to hold prediction buffer */
total_size += sizeof(UWORD8) * 16 * 16;
@@ -3215,14 +3243,8 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
total_size = ALIGN64(total_size);
/* Buffers for holding half_x , half_y and half_xy planes */
- total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
- total_size = ALIGN64(total_size);
-
- total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
- total_size = ALIGN64(total_size);
-
- total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
- total_size = ALIGN64(total_size);
+ i4_tmp_size = sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+ total_size += (ALIGN64(i4_tmp_size) * SUBPEL_BUFF_CNT);
/* Allocate for each process thread */
total_size *= MAX_PROCESS_CTXT;
@@ -3449,9 +3471,9 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
************************************************************************/
ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB];
{
- ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * MAX_NMB
- * (sizeof(mb_info_nmb_t)
- + MB_SIZE * MB_SIZE * sizeof(UWORD8));
+ ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * max_mb_cols *
+ (sizeof(mb_info_nmb_t) + MB_SIZE * MB_SIZE
+ * sizeof(UWORD8));
}
DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_INFO_NMB, ps_mem_rec->u4_mem_size);
@@ -3517,6 +3539,9 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
/* codec variables */
codec_t * ps_codec;
+ cabac_ctxt_t *ps_cabac;
+ mb_info_ctxt_t *ps_mb_map_ctxt_inc;
+
cfg_params_t *ps_cfg;
/* frame dimensions */
@@ -3524,7 +3549,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
WORD32 max_mb_rows, max_mb_cols, max_mb_cnt;
/* temp var */
- WORD32 i;
+ WORD32 i, j;
WORD32 status = IV_SUCCESS;
/* frame dimensions */
@@ -3543,11 +3568,23 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
ps_codec_obj->pv_codec_handle = ps_mem_rec->pv_base;
ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle);
}
+ /* Init mem records_cabac ctxt */
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CABAC];
+ {
+ ps_cabac = (cabac_ctxt_t *)(ps_mem_rec->pv_base);
+ }
+
+ /* Init mem records mb info array for CABAC */
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CABAC_MB_INFO];
+ {
+ ps_mb_map_ctxt_inc = (mb_info_ctxt_t *)(ps_mem_rec->pv_base);
+ }
/* Note this memset can not be done in init() call, since init will called
during reset as well. And calling this during reset will mean all pointers
need to reinitialized */
memset(ps_codec, 0, sizeof(codec_t));
+ memset(ps_cabac, 0, sizeof(cabac_ctxt_t));
/* Set default Config Params */
ps_cfg = &ps_codec->s_cfg;
@@ -3565,7 +3602,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
ps_cfg->e_recon_color_fmt = ps_ip->s_ive_ip.e_recon_color_fmt;
ps_cfg->u4_max_framerate = ps_ip->s_ive_ip.u4_max_framerate;
ps_cfg->u4_max_bitrate = ps_ip->s_ive_ip.u4_max_bitrate;
- ps_cfg->u4_max_num_bframes = ps_ip->s_ive_ip.u4_max_num_bframes;
+ ps_cfg->u4_num_bframes = ps_ip->s_ive_ip.u4_num_bframes;
ps_cfg->e_content_type = ps_ip->s_ive_ip.e_content_type;
ps_cfg->u4_max_srch_rng_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
ps_cfg->u4_max_srch_rng_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
@@ -3611,7 +3648,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
/* base ptr */
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
@@ -3652,6 +3689,8 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
size += (max_mb_cols * 4 * sizeof(UWORD8));
size = ALIGN128(size);
offset = size;
+ /* cabac Context */
+ ps_codec->as_process[i].s_entropy.ps_cabac = ps_cabac;
}
else
{
@@ -3693,8 +3732,12 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
(void *) (pu1_buf + size);
size += (max_mb_cols * 4 * sizeof(UWORD8));
size = ALIGN128(size);
+ /* cabac Context */
+ ps_codec->as_process[i].s_entropy.ps_cabac = ps_cabac;
}
}
+ ps_codec->as_process[0].s_entropy.ps_cabac->ps_mb_map_ctxt_inc_base =
+ ps_mb_map_ctxt_inc;
}
ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA];
@@ -3720,7 +3763,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf;
ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data =
@@ -3758,7 +3801,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf;
ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data =
@@ -3838,7 +3881,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base;
}
@@ -3860,7 +3903,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf;
}
@@ -3885,7 +3928,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping;
}
@@ -3945,7 +3988,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols;
}
@@ -3976,7 +4019,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols;
@@ -4006,7 +4049,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols;
}
@@ -4102,18 +4145,11 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
size += size_inv;
size = ALIGN64(size);
- /* Buffers for holding half_x , half_y and half_xy values */
- ps_codec->as_process[i].pu1_half_x = (void *) (pu1_buf + size);
- size += size_hp;
- size = ALIGN64(size);
-
- ps_codec->as_process[i].pu1_half_y = (void *) (pu1_buf + size);
- size += size_hp;
- size = ALIGN64(size);
-
- ps_codec->as_process[i].pu1_half_xy = (void *) (pu1_buf + size);
- size += size_hp;
- size = ALIGN64(size);
+ for (j = 0; j < SUBPEL_BUFF_CNT; j++)
+ {
+ ps_codec->as_process[i].apu1_subpel_buffs[j] = (pu1_buf + size);
+ size += ALIGN64(size_hp);
+ }
}
}
@@ -4209,7 +4245,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base =
(mb_info_t *) pu1_buf;
@@ -4260,7 +4296,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
- if (i < MAX_PROCESS_CTXT / 2)
+ if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
pu1_buf_ping = (UWORD8 *) ps_mem_rec->pv_base;
@@ -4341,9 +4377,9 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
/* size of nmb ctxt */
- WORD32 size = MAX_NMB * sizeof(mb_info_nmb_t);
+ WORD32 size = max_mb_cols * sizeof(mb_info_nmb_t);
- UWORD32 nmb_cntr, subpel_buf_size;
+ WORD32 nmb_cntr, subpel_buf_size;
/* init nmb info structure pointer in all proc ctxts */
for (i = 0; i < MAX_PROCESS_CTXT; i++)
@@ -4361,7 +4397,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
mb_info_nmb_t* ps_mb_info_nmb =
&ps_codec->as_process[i].ps_nmb_info[0];
- for (nmb_cntr = 0; nmb_cntr < MAX_NMB; nmb_cntr++)
+ for (nmb_cntr = 0; nmb_cntr < max_mb_cols; nmb_cntr++)
{
ps_mb_info_nmb[nmb_cntr].pu1_best_sub_pel_buf = pu1_buf;
@@ -4477,13 +4513,14 @@ static WORD32 ih264e_set_flush_mode(iv_obj_t *ps_codec_obj,
void *pv_api_ip,
void *pv_api_op)
{
- UNUSED(pv_api_ip);
/* codec ctxt */
codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
/* ctrl call I/O structures */
ih264e_ctl_flush_op_t *ps_ctl_op = pv_api_op;
+ UNUSED(pv_api_ip);
+
ps_ctl_op->s_ive_op.u4_error_code = 0;
/* signal flush frame control call */
@@ -4522,7 +4559,6 @@ static WORD32 ih264e_get_buf_info(iv_obj_t *ps_codec_obj,
void *pv_api_ip,
void *pv_api_op)
{
- UNUSED(ps_codec_obj);
/* ctrl call I/O structures */
ih264e_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip;
ih264e_ctl_getbufinfo_op_t *ps_op = pv_api_op;
@@ -4532,6 +4568,8 @@ static WORD32 ih264e_get_buf_info(iv_obj_t *ps_codec_obj,
WORD32 ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
WORD32 i;
+ UNUSED(ps_codec_obj);
+
ps_op->s_ive_op.u4_error_code = 0;
/* Number of components in input buffers required for codec &
@@ -4584,7 +4622,7 @@ static WORD32 ih264e_get_buf_info(iv_obj_t *ps_codec_obj,
for (i = 0; i < (WORD32) ps_op->s_ive_op.u4_out_comp_cnt; i++)
{
- ps_op->s_ive_op.au4_min_out_buf_size[i] = (wd * ht * 3) >> 1;
+ ps_op->s_ive_op.au4_min_out_buf_size[i] = MAX(((wd * ht * 3) >> 1), MIN_STREAM_SIZE);
}
ps_op->s_ive_op.u4_min_inp_bufs = MIN_INP_BUFS;
@@ -5073,7 +5111,6 @@ static IV_STATUS_T ih264_set_gop_params(void *pv_api_ip,
ps_cfg->u4_i_frm_interval = ps_ip->s_ive_ip.u4_i_frm_interval;
ps_cfg->u4_idr_frm_interval = ps_ip->s_ive_ip.u4_idr_frm_interval;
- ps_cfg->u4_num_b_frames = ps_ip->s_ive_ip.u4_num_b_frames;
ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
@@ -5117,6 +5154,8 @@ static IV_STATUS_T ih264_set_profile_params(void *pv_api_ip,
ps_cfg->e_profile = ps_ip->s_ive_ip.e_profile;
+ ps_cfg->u4_entropy_coding_mode = ps_ip->s_ive_ip.u4_entropy_coding_mode;
+
ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
@@ -5237,13 +5276,14 @@ static WORD32 ih264e_reset(iv_obj_t *ps_codec_obj,
void *pv_api_ip,
void *pv_api_op)
{
- UNUSED(pv_api_ip);
/* codec ctxt */
codec_t * ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle);
/* ctrl call I/O structures */
ih264e_ctl_reset_op_t *ps_op = pv_api_op;
+ UNUSED(pv_api_ip);
+
ps_op->s_ive_op.u4_error_code = 0;
if (ps_codec != NULL)
@@ -5297,7 +5337,7 @@ static WORD32 ih264e_ctl(iv_obj_t *ps_codec_obj,
IVE_CONTROL_API_COMMAND_TYPE_T sub_cmd = ps_ctl_ip->s_ive_ip.e_sub_cmd;
/* error status */
- IV_STATUS_T ret = 0;
+ IV_STATUS_T ret = IV_SUCCESS;
/* temp var */
WORD32 i;
diff --git a/encoder/ih264e_bitstream.c b/encoder/ih264e_bitstream.c
index e5bfbe4..d79f637 100644
--- a/encoder/ih264e_bitstream.c
+++ b/encoder/ih264e_bitstream.c
@@ -151,7 +151,6 @@ IH264E_ERROR_T ih264e_put_bits(bitstrm_t *ps_bitstrm,
if(code_len < WORD_SIZE)
ASSERT((u4_code_val >> code_len) == 0);
-
/* sanity check on the bitstream engine state */
ASSERT(bits_left_in_cw > 0 && bits_left_in_cw <= WORD_SIZE);
diff --git a/encoder/ih264e_bitstream.h b/encoder/ih264e_bitstream.h
index 21360cc..9cd2b81 100644
--- a/encoder/ih264e_bitstream.h
+++ b/encoder/ih264e_bitstream.h
@@ -65,6 +65,14 @@
#define EPB_BYTE 0x03
+/**
+******************************************************************************
+ * @brief Stream buffer allocated per frame should be atleast MIN_STREAM_SIZE
+******************************************************************************
+ */
+#define MIN_STREAM_SIZE 0x800
+
+
/*****************************************************************************/
/* Function Macros */
/*****************************************************************************/
@@ -106,12 +114,12 @@
* @brief returns bits required to code a value
******************************************************************************
*/
-#define UE_LENGTH(bits,x) \
-{ \
- UWORD32 r_bit; \
- GETRANGE(r_bit,x+1) \
- bits =(((r_bit - 1) << 1)+1); \
-} \
+#define UE_LENGTH(bits,x) \
+{ \
+ UWORD32 r_bit; \
+ GETRANGE(r_bit,x+1) \
+ bits =(((r_bit - 1) << 1)+1);\
+} \
/**
******************************************************************************
@@ -140,6 +148,51 @@
*/
#define BYTE_ALIGNMENT(ps_bitstrm) ih264e_put_rbsp_trailing_bits(ps_bitstrm)
+/**
+******************************************************************************
+ * @brief Gets number of bits coded
+******************************************************************************
+ */
+
+#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) \
+ + 32 - ps_bitstream->i4_bits_left_in_cw);
+
+
+
+/**
+******************************************************************************
+ * @macro Align bitstream to byte - Remainig bits are filled with '1'
+******************************************************************************
+*/
+#define BITSTREAM_BYTE_ALIGN(ps_bitstrm) \
+ if (ps_bitstrm->i4_bits_left_in_cw & 0x07) \
+ { \
+ const WORD32 len = (WORD32)((ps_bitstrm->i4_bits_left_in_cw) & 0x07);\
+ ih264e_put_bits(ps_bitstrm, (UWORD32)((1 << len) - 1), len); \
+ }
+
+
+/**
+******************************************************************************
+* flush the bits in cur word byte by byte and copy to stream *
+* (current word is assumed to be byte aligned) *
+******************************************************************************
+*/
+#define BITSTREAM_FLUSH(ps_bitstrm) \
+{ \
+ WORD32 i; \
+ for (i = WORD_SIZE; i > ps_bitstrm->i4_bits_left_in_cw; i -= 8) \
+ { \
+ UWORD8 u1_next_byte = (ps_bitstrm->u4_cur_word >> (i - 8)) & 0xFF; \
+ PUTBYTE_EPB(ps_bitstrm->pu1_strm_buffer, ps_bitstrm->u4_strm_buf_offset,\
+ u1_next_byte, ps_bitstrm->i4_zero_bytes_run); \
+ } \
+ ps_bitstrm->u4_cur_word = 0; \
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE; \
+} \
+
+
+
/*****************************************************************************/
/* Structures */
diff --git a/encoder/ih264e_cabac.c b/encoder/ih264e_cabac.c
new file mode 100644
index 0000000..64ff7cd
--- /dev/null
+++ b/encoder/ih264e_cabac.c
@@ -0,0 +1,819 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_cabac.c
+*
+* @brief
+* Contains all leaf level functions for CABAC entropy coding.
+*
+*
+* @author
+* Doney Alex
+*
+* @par List of Functions:
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264_macros.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
+#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
+#include "ih264e_encode_header.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated
+ * unary/ k-th order Exp-Golomb (UEGk) binarization process,
+ * where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402
+ *
+ * @param[in] i2_sufs
+ * Suffix bit string
+ *
+ * @param[in] pi1_bins_len
+ * Pointer to length of tthe string
+ *
+ * @returns Binarized value
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+UWORD32 ih264e_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len)
+{
+ UWORD32 u4_bins;
+ WORD32 i4_len;
+ WORD16 x, y;
+
+ x = i2_sufs + 1;
+ i4_len = CLZ(x);
+ i4_len = 31 - i4_len;
+ y = 1 << i4_len;
+ y = y - 1;
+ i2_sufs = i2_sufs - y;
+ u4_bins = y << 1;
+ u4_bins = u4_bins << i4_len;
+ u4_bins = u4_bins + i2_sufs;
+
+ REV(u4_bins, u4_bins);
+ u4_bins = u4_bins >> (31 - 2 * i4_len);
+ (*pi1_bins_len) = 2 * i4_len + 1;
+
+ return (u4_bins);
+}
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Get cabac context for the MB :calculates the pointers to Top and left
+ * cabac neighbor context depending upon neighbor availability.
+ *
+ * @param[in] ps_ent_ctxt
+ * Pointer to entropy context structure
+ *
+ * @param[in] u4_mb_type
+ * Type of MB
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264e_get_cabac_context(entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type)
+{
+
+ /* CABAC context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+ mb_info_ctxt_t *ps_ctx_inc_mb_map;
+ cab_csbp_t *ps_lft_csbp;
+
+ WORD32 i4_lft_avail, i4_top_avail, i4_is_intra;
+ WORD32 i4_mb_x, i4_mb_y;
+ UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx;
+
+ i4_is_intra = ((u4_mb_type == I16x16) || (u4_mb_type == I8x8)
+ || (u4_mb_type == I4x4));
+
+ /* derive neighbor availability */
+ i4_mb_x = ps_ent_ctxt->i4_mb_x;
+ i4_mb_y = ps_ent_ctxt->i4_mb_y;
+ pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs);
+ /* left macroblock availability */
+ i4_lft_avail = (i4_mb_x == 0
+ || (pu1_slice_idx[i4_mb_x - 1] != pu1_slice_idx[i4_mb_x])) ?
+ 0 : 1;
+ /* top macroblock availability */
+ i4_top_avail = (i4_mb_y == 0
+ || (pu1_slice_idx[i4_mb_x - ps_ent_ctxt->i4_wd_mbs]
+ != pu1_slice_idx[i4_mb_x])) ? 0 : 1;
+ i4_mb_x = ps_ent_ctxt->i4_mb_x;
+ ps_ctx_inc_mb_map = ps_cabac_ctxt->ps_mb_map_ctxt_inc;
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info = ps_ctx_inc_mb_map + i4_mb_x;
+ ps_cabac_ctxt->ps_left_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info;
+ ps_cabac_ctxt->ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_def_ctxt_mb_info;
+ ps_lft_csbp = ps_cabac_ctxt->ps_lft_csbp;
+ ps_cabac_ctxt->pu1_left_y_ac_csbp = &ps_lft_csbp->u1_y_ac_csbp_top_mb;
+ ps_cabac_ctxt->pu1_left_uv_ac_csbp = &ps_lft_csbp->u1_uv_ac_csbp_top_mb;
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp = &ps_lft_csbp->u1_yuv_dc_csbp_top_mb;
+ ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc =
+ &ps_cabac_ctxt->i1_left_ref_idx_ctx_inc_arr[0][0];
+ ps_cabac_ctxt->pu1_left_mv_ctxt_inc =
+ ps_cabac_ctxt->u1_left_mv_ctxt_inc_arr[0];
+
+ if (i4_lft_avail)
+ ps_cabac_ctxt->ps_left_ctxt_mb_info =
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info - 1;
+ if (i4_top_avail)
+ ps_cabac_ctxt->ps_top_ctxt_mb_info =
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info;
+
+ if (!i4_lft_avail)
+ {
+ UWORD8 u1_def_csbp = i4_is_intra ? 0xf : 0;
+ *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = u1_def_csbp;
+ *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = u1_def_csbp;
+ *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = u1_def_csbp;
+ *((UWORD32 *) ps_cabac_ctxt->pi1_left_ref_idx_ctxt_inc) = 0;
+ memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
+ }
+ if (!i4_top_avail)
+ {
+ UWORD8 u1_def_csbp = i4_is_intra ? 0xff : 0;
+ ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_ac_csbp = u1_def_csbp;
+ ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_yuv_dc_csbp = u1_def_csbp;
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[0] =
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[1] =
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[2] =
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->i1_ref_idx[3] = 0;
+ memset(ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv, 0, 16);
+ }
+
+}
+
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402).
+ *
+ * @param[in] ps_cabac_ctxt
+ * pointer to cabac context (handle)
+ *
+ * @returns success or failure error code
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+WORD32 ih264e_cabac_flush(cabac_ctxt_t *ps_cabac_ctxt)
+{
+
+ /* bit stream ptr */
+ bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm;
+ encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env);
+ UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+ UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen;
+ UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer;
+ UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset;
+ WORD32 zero_run = ps_stream->i4_zero_bytes_run;
+ UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes;
+
+ /************************************************************************/
+ /* Insert the carry (propogated in previous byte) along with */
+ /* outstanding bytes (if any) and flush remaining bits */
+ /************************************************************************/
+ {
+ /* carry = 1 => putbit(1); carry propogated due to L renorm */
+ WORD32 carry = (u4_low >> (u4_bits_gen + CABAC_BITS)) & 0x1;
+ WORD32 last_byte;
+ WORD32 bits_left;
+ WORD32 rem_bits;
+
+ /*********************************************************************/
+ /* Bitstream overflow check */
+ /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+ /*********************************************************************/
+ if ((u4_strm_buf_offset + u4_out_standing_bytes + 1)
+ >= ps_stream->u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ if (carry)
+ {
+ /* CORNER CASE: if the previous data is 0x000003, then EPB will be inserted
+ and the data will become 0x00000303 and if the carry is present, it will
+ be added with the last byte and it will become 0x00000304 which is not correct
+ as per standard */
+ /* so check for previous four bytes and if it is equal to 0x00000303
+ then subtract u4_strm_buf_offset by 1 */
+ if (pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03
+ && pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03
+ && pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00
+ && pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00)
+ {
+ u4_strm_buf_offset -= 1;
+ }
+ /* previous byte carry add will not result in overflow to */
+ /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */
+ pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
+ zero_run = 0;
+ }
+
+ /* Insert outstanding bytes (if any) */
+ while (u4_out_standing_bytes)
+ {
+ UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
+ u4_out_standing_bytes--;
+ }
+
+ /* clear the carry in low */
+ u4_low &= ((1 << (u4_bits_gen + CABAC_BITS)) - 1);
+
+ /* extract the remaining bits; */
+ /* includes additional msb bit of low as per Figure 9-12 */
+ bits_left = u4_bits_gen + 1;
+ rem_bits = (u4_low >> (u4_bits_gen + CABAC_BITS - bits_left));
+
+ if (bits_left >= 8)
+ {
+ last_byte = (rem_bits >> (bits_left - 8)) & 0xFF;
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
+ bits_left -= 8;
+ }
+
+ /* insert last byte along with rbsp stop bit(1) and 0's in the end */
+ last_byte = (rem_bits << (8 - bits_left))
+ | (1 << (7 - bits_left) | (1 << (7 - bits_left - 1)));
+ last_byte &= 0xFF;
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
+
+ /* update the state variables and return success */
+ ps_stream->u4_strm_buf_offset = u4_strm_buf_offset;
+ ps_stream->i4_zero_bytes_run = 0;
+ /* Default init values for scratch variables of bitstream context */
+ ps_stream->u4_cur_word = 0;
+ ps_stream->i4_bits_left_in_cw = WORD_SIZE;
+
+ return (IH264E_SUCCESS);
+ }
+}
+
+/**
+ ******************************************************************************
+ *
+ * @brief Puts new byte (and outstanding bytes) into bitstream after cabac
+ * renormalization
+ *
+ * @par Description
+ * 1. Extract the leading byte of low(L)
+ * 2. If leading byte=0xff increment outstanding bytes and return
+ * (as the actual bits depend on carry propogation later)
+ * 3. If leading byte is not 0xff check for any carry propogation
+ * 4. Insert the carry (propogated in previous byte) along with outstanding
+ * bytes (if any) and leading byte
+ *
+ *
+ * @param[in] ps_cabac_ctxt
+ * pointer to cabac context (handle)
+ *
+ * @return
+ *
+ ******************************************************************************
+ */
+void ih264e_cabac_put_byte(cabac_ctxt_t *ps_cabac_ctxt)
+{
+
+ /* bit stream ptr */
+ bitstrm_t *ps_stream = ps_cabac_ctxt->ps_bitstrm;
+ encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env);
+ UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+ UWORD32 u4_bits_gen = ps_cab_enc_env->u4_bits_gen;
+ WORD32 lead_byte = u4_low >> (u4_bits_gen + CABAC_BITS - 8);
+
+ /* Sanity checks */
+ ASSERT((ps_cab_enc_env->u4_code_int_range >= 256)
+ && (ps_cab_enc_env->u4_code_int_range < 512));
+ ASSERT((u4_bits_gen >= 8));
+
+ /* update bits generated and low after extracting leading byte */
+ u4_bits_gen -= 8;
+ ps_cab_enc_env->u4_code_int_low &= ((1 << (CABAC_BITS + u4_bits_gen)) - 1);
+ ps_cab_enc_env->u4_bits_gen = u4_bits_gen;
+
+ /************************************************************************/
+ /* 1. Extract the leading byte of low(L) */
+ /* 2. If leading byte=0xff increment outstanding bytes and return */
+ /* (as the actual bits depend on carry propogation later) */
+ /* 3. If leading byte is not 0xff check for any carry propogation */
+ /* 4. Insert the carry (propogated in previous byte) along with */
+ /* outstanding bytes (if any) and leading byte */
+ /************************************************************************/
+ if (lead_byte == 0xff)
+ {
+ /* actual bits depend on carry propogration */
+ ps_cab_enc_env->u4_out_standing_bytes++;
+ return ;
+ }
+ else
+ {
+ /* carry = 1 => putbit(1); carry propogated due to L renorm */
+ WORD32 carry = (lead_byte >> 8) & 0x1;
+ UWORD8 *pu1_strm_buf = ps_stream->pu1_strm_buffer;
+ UWORD32 u4_strm_buf_offset = ps_stream->u4_strm_buf_offset;
+ WORD32 zero_run = ps_stream->i4_zero_bytes_run;
+ UWORD32 u4_out_standing_bytes = ps_cab_enc_env->u4_out_standing_bytes;
+
+
+ /*********************************************************************/
+ /* Insert the carry propogated in previous byte */
+ /* */
+ /* Note : Do not worry about corruption into slice header align byte */
+ /* This is because the first bin cannot result in overflow */
+ /*********************************************************************/
+ if (carry)
+ {
+ /* CORNER CASE: if the previous data is 0x000003, then EPB will be inserted
+ and the data will become 0x00000303 and if the carry is present, it will
+ be added with the last byte and it will become 0x00000304 which is not correct
+ as per standard */
+ /* so check for previous four bytes and if it is equal to 0x00000303
+ then subtract u4_strm_buf_offset by 1 */
+ if (pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03
+ && pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03
+ && pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00
+ && pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00)
+ {
+ u4_strm_buf_offset -= 1;
+ }
+ /* previous byte carry add will not result in overflow to */
+ /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */
+ pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
+ zero_run = 0;
+ }
+
+ /* Insert outstanding bytes (if any) */
+ while (u4_out_standing_bytes)
+ {
+ UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
+
+ u4_out_standing_bytes--;
+ }
+ ps_cab_enc_env->u4_out_standing_bytes = 0;
+
+ /* Insert the leading byte */
+ lead_byte &= 0xFF;
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, lead_byte, zero_run);
+
+ /* update the state variables and return success */
+ ps_stream->u4_strm_buf_offset = u4_strm_buf_offset;
+ ps_stream->i4_zero_bytes_run = zero_run;
+
+ }
+}
+
+
+
+
+ /**
+ ******************************************************************************
+ *
+ * @brief Codes a bin based on probablilty and mps packed context model
+ *
+ * @par Description
+ * 1. Apart from encoding bin, context model is updated as per state transition
+ * 2. Range and Low renormalization is done based on bin and original state
+ * 3. After renorm bistream is updated (if required)
+ *
+ * @param[in] ps_cabac
+ * pointer to cabac context (handle)
+ *
+ * @param[in] bin
+ * bin(boolean) to be encoded
+ *
+ * @param[in] pu1_bin_ctxts
+ * index of cabac context model containing pState[bits 5-0] | MPS[bit6]
+ *
+ * @return
+ *
+ ******************************************************************************
+ */
+void ih264e_cabac_encode_bin(cabac_ctxt_t *ps_cabac, WORD32 bin,
+ bin_ctxt_model *pu1_bin_ctxts)
+{
+
+ encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
+ UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
+ UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+ UWORD32 u4_rlps;
+ UWORD8 state_mps = (*pu1_bin_ctxts) & 0x3F;
+ UWORD8 u1_mps = !!((*pu1_bin_ctxts) & (0x40));
+ WORD32 shift;
+ UWORD32 u4_table_val;
+ /* Sanity checks */
+ ASSERT((bin == 0) || (bin == 1));
+ ASSERT((u4_range >= 256) && (u4_range < 512));
+
+ /* Get the lps range from LUT based on quantized range and state */
+ u4_table_val= gau4_ih264_cabac_table[state_mps][(u4_range >> 6) & 0x3];
+ u4_rlps = u4_table_val & 0xFF;
+ u4_range -= u4_rlps;
+
+ /* check if bin is mps or lps */
+ if (u1_mps ^ bin)
+ {
+ /* lps path; L= L + R; R = RLPS */
+ u4_low += u4_range;
+ u4_range = u4_rlps;
+ if (state_mps == 0)
+ {
+ /* MPS(CtxIdx) = 1 - MPS(CtxIdx) */
+ u1_mps = 1 - u1_mps;
+ } /* update the context model from state transition LUT */
+
+ state_mps = (u4_table_val >> 15) & 0x3F;
+ }
+ else
+ { /* update the context model from state transition LUT */
+ state_mps = (u4_table_val >> 8) & 0x3F;
+ }
+
+ (*pu1_bin_ctxts) = (u1_mps << 6) | state_mps;
+
+ /*****************************************************************/
+ /* Renormalization; calculate bits generated based on range(R) */
+ /* Note : 6 <= R < 512; R is 2 only for terminating encode */
+ /*****************************************************************/
+ GETRANGE(shift, u4_range);
+ shift = 9 - shift;
+ u4_low <<= shift;
+ u4_range <<= shift;
+
+ /* bits to be inserted in the bitstream */
+ ps_cab_enc_env->u4_bits_gen += shift;
+ ps_cab_enc_env->u4_code_int_range = u4_range;
+ ps_cab_enc_env->u4_code_int_low = u4_low;
+
+ /* generate stream when a byte is ready */
+ if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+ {
+ ih264e_cabac_put_byte(ps_cabac);
+ }
+
+}
+
+
+
+
+ /**
+ *******************************************************************************
+ *
+ * @brief
+ * Encoding process for a binary decision :implements encoding process of a decision
+ * as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol. Implements
+ * flowchart Figure 9-7( ITU_T_H264-201402)
+ *
+ * @param[in] u4_bins
+ * array of bin values
+ *
+ * @param[in] i1_bins_len
+ * Length of bins, maximum 32
+ *
+ * @param[in] u4_ctx_inc
+ * CtxInc, byte0- bin0, byte1-bin1 ..
+ *
+ * @param[in] i1_valid_len
+ * valid length of bins, after that CtxInc is constant
+ *
+ * @param[in] pu1_bin_ctxt_type
+ * Pointer to binary contexts
+
+ * @param[in] ps_cabac
+ * Pointer to cabac_context_structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264e_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len,
+ UWORD32 u4_ctx_inc, WORD8 i1_valid_len,
+ bin_ctxt_model *pu1_bin_ctxt_type,
+ cabac_ctxt_t *ps_cabac)
+{
+ WORD8 i;
+ UWORD8 u1_ctx_inc, u1_bin;
+
+ for (i = 0; i < i1_bins_len; i++)
+ {
+ u1_bin = (u4_bins & 0x01);
+ u4_bins = u4_bins >> 1;
+ u1_ctx_inc = u4_ctx_inc & 0x0f;
+ if (i < i1_valid_len)
+ u4_ctx_inc = u4_ctx_inc >> 4;
+ /* Encode the bin */
+ ih264e_cabac_encode_bin(ps_cabac, u1_bin,
+ pu1_bin_ctxt_type + u1_ctx_inc);
+ }
+
+}
+
+
+
+
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * Encoding process for a binary decision before termination:Encoding process
+ * of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11.
+ *
+ * @param[in] ps_cabac
+ * Pointer to cabac structure
+ *
+ * @param[in] term_bin
+ * Symbol value, end of slice or not, term_bin is binary
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264e_cabac_encode_terminate(cabac_ctxt_t *ps_cabac, WORD32 term_bin)
+{
+
+ encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
+
+ UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
+ UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+ UWORD32 u4_rlps;
+ WORD32 shift;
+
+ /* Sanity checks */
+ ASSERT((u4_range >= 256) && (u4_range < 512));
+ ASSERT((term_bin == 0) || (term_bin == 1));
+
+ /* term_bin = 1 has lps range = 2 */
+ u4_rlps = 2;
+ u4_range -= u4_rlps;
+
+ /* if terminate L is incremented by curR and R=2 */
+ if (term_bin)
+ {
+ /* lps path; L= L + R; R = RLPS */
+ u4_low += u4_range;
+ u4_range = u4_rlps;
+ }
+
+ /*****************************************************************/
+ /* Renormalization; calculate bits generated based on range(R) */
+ /* Note : 6 <= R < 512; R is 2 only for terminating encode */
+ /*****************************************************************/
+ GETRANGE(shift, u4_range);
+ shift = 9 - shift;
+ u4_low <<= shift;
+ u4_range <<= shift;
+
+ /* bits to be inserted in the bitstream */
+ ps_cab_enc_env->u4_bits_gen += shift;
+ ps_cab_enc_env->u4_code_int_range = u4_range;
+ ps_cab_enc_env->u4_code_int_low = u4_low;
+
+ /* generate stream when a byte is ready */
+ if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+ {
+ ih264e_cabac_put_byte(ps_cabac);
+ }
+
+ if (term_bin)
+ {
+ ih264e_cabac_flush(ps_cabac);
+ }
+
+}
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * Bypass encoding process for binary decisions: Explained (9.3.4.4 :ITU_T_H264-201402)
+ * , flowchart 9-10.
+ *
+ * @param[ino] ps_cabac : pointer to cabac context (handle)
+ *
+ * @param[in] bin : bypass bin(0/1) to be encoded
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void ih264e_cabac_encode_bypass_bin(cabac_ctxt_t *ps_cabac, WORD32 bin)
+{
+
+ encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
+
+ UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
+ UWORD32 u4_low = ps_cab_enc_env->u4_code_int_low;
+
+ /* Sanity checks */
+ ASSERT((u4_range >= 256) && (u4_range < 512));
+ ASSERT((bin == 0) || (bin == 1));
+
+ u4_low <<= 1;
+ /* add range if bin is 1 */
+ if (bin)
+ {
+ u4_low += u4_range;
+ }
+
+ /* 1 bit to be inserted in the bitstream */
+ ps_cab_enc_env->u4_bits_gen++;
+ ps_cab_enc_env->u4_code_int_low = u4_low;
+
+ /* generate stream when a byte is ready */
+ if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+ {
+ ih264e_cabac_put_byte(ps_cabac);
+ }
+
+}
+
+
+ /**
+ ******************************************************************************
+ *
+ * @brief Encodes a series of bypass bins (FLC bypass bins)
+ *
+ * @par Description
+ * This function is more optimal than calling ih264e_cabac_encode_bypass_bin()
+ * in a loop as cabac low, renorm and generating the stream (8bins at a time)
+ * can be done in one operation
+ *
+ * @param[inout]ps_cabac
+ * pointer to cabac context (handle)
+ *
+ * @param[in] u4_bins
+ * syntax element to be coded (as FLC bins)
+ *
+ * @param[in] num_bins
+ * This is the FLC length for u4_sym
+ *
+ * @return
+ *
+ ******************************************************************************
+ */
+
+void ih264e_cabac_encode_bypass_bins(cabac_ctxt_t *ps_cabac, UWORD32 u4_bins,
+ WORD32 num_bins)
+{
+
+ encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac->s_cab_enc_env);
+
+ UWORD32 u4_range = ps_cab_enc_env->u4_code_int_range;
+ WORD32 next_byte;
+ UWORD32 rev_next_byte;
+
+ /* Sanity checks */
+ ASSERT((num_bins < 33) && (num_bins > 0));
+ ASSERT((u4_range >= 256) && (u4_range < 512));
+
+ /* Compute bit always to populate the trace */
+ /* increment bits generated by num_bins */
+
+ /* Encode 8bins at a time and put in the bit-stream */
+ while (num_bins > 8)
+ {
+ num_bins -= 8;
+
+ /* extract the leading 8 bins */
+ next_byte = (u4_bins) & 0xff;
+ u4_bins >>= 8;
+ REV_NBITS(next_byte, 8, rev_next_byte);
+
+ /* L = (L << 8) + (R * next_byte) */
+ ps_cab_enc_env->u4_code_int_low <<= 8;
+ ps_cab_enc_env->u4_code_int_low += (rev_next_byte * u4_range);
+ ps_cab_enc_env->u4_bits_gen += 8;
+
+ if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+ {
+ /* insert the leading byte of low into stream */
+ ih264e_cabac_put_byte(ps_cabac);
+ }
+ }
+
+ /* Update low with remaining bins and return */
+ next_byte = (u4_bins & ((1 << num_bins) - 1));
+
+ REV_NBITS(next_byte, num_bins, rev_next_byte);
+
+ ps_cab_enc_env->u4_code_int_low <<= num_bins;
+ ps_cab_enc_env->u4_code_int_low += (rev_next_byte * u4_range);
+ ps_cab_enc_env->u4_bits_gen += num_bins;
+
+ if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+ {
+ /* insert the leading byte of low into stream */
+ ih264e_cabac_put_byte(ps_cabac);
+ }
+
+}
+
+
+
+
+
+
+
diff --git a/encoder/ih264e_cabac.h b/encoder/ih264e_cabac.h
new file mode 100644
index 0000000..e781783
--- /dev/null
+++ b/encoder/ih264e_cabac.h
@@ -0,0 +1,452 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_cabac_structs.h
+ *
+ * @brief
+ * This file contains cabac related macros, enums, tables and function declarations.
+ *
+ * @author
+ * Doney Alex
+ *
+ * @remarks
+ * none
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264E_CABAC_H_
+#define IH264E_CABAC_H_
+
+
+
+/*******************************************************************************
+@brief Bit precision of cabac engine;
+*******************************************************************************
+*/
+#define CABAC_BITS 9
+
+
+
+
+/**
+******************************************************************************
+ * @macro Count number of bits set
+******************************************************************************
+*/
+#define REV_NBITS(word, size, rev_word) \
+{ \
+ WORD32 i; \
+ rev_word = 0; \
+ for (i = 0; i < (size); i++) \
+ { \
+ UWORD32 bit = ((word) >> i) & 1; \
+ rev_word += (1 << ((size) - i - 1)) * bit; \
+ } \
+} \
+
+/**
+******************************************************************************
+ * @macro Reverse bits in an unsigned integer
+******************************************************************************
+*/
+#define REV(u4_input, u4_output) \
+{ \
+ UWORD32 u4_temp = (u4_input); \
+ WORD8 i; \
+ u4_output = 0; \
+ for (i = 0; i < 32; i++) \
+ { \
+ u4_output = (u4_output << 1) + \
+ ((u4_temp >> i) & 0x01); \
+ } \
+}
+
+/**
+******************************************************************************
+*! Bit manipulation macros
+******************************************************************************
+*/
+#define SETBIT(a, i) ((a) |= (1 << (i)))
+#define CLEARBIT(a, i) ((a) &= ~(1 << (i)))
+
+
+/**
+******************************************************************************
+*! Cabac module expect atlesat MIN_STREAM_SIZE_MB bytes left in stream buffer
+*! for encoding an MB
+******************************************************************************
+*/
+#define MIN_STREAM_SIZE_MB 1024
+
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize default context values and pointers.
+ *
+ * @param[in] ps_ent_ctxt
+ * Pointer to entropy context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264e_init_cabac_table(entropy_ctxt_t *ps_ent_ctxt);
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize cabac context: Intitalize all contest with init values given in the spec.
+ * Called at the beginning of entropy coding of each slice for CABAC encoding.
+ *
+ * @param[in] ps_ent_ctxt
+ * Pointer to entropy context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264e_init_cabac_ctxt(entropy_ctxt_t *ps_ent_ctxt);
+
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * k-th order Exp-Golomb (UEGk) binarization process: Implements concatenated
+ * unary/ k-th order Exp-Golomb (UEGk) binarization process,
+ * where k = 0 as defined in 9.3.2.3 of ITU_T_H264-201402
+ *
+ * @param[in] i2_sufs
+ * Suffix bit string
+ *
+ * @param[in] pi1_bins_len
+ * Pointer to length of the string
+ *
+ * @returns Binarized value
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+UWORD32 ih264e_cabac_UEGk0_binarization(WORD16 i2_sufs, WORD8 *pi1_bins_len);
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Get cabac context for the MB :calculates the pointers to Top and left
+ * cabac neighbor context depending upon neighbor availability.
+ *
+ * @param[in] ps_ent_ctxt
+ * Pointer to entropy context structure
+ *
+ * @param[in] u4_mb_type
+ * Type of MB
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264e_get_cabac_context(entropy_ctxt_t *ps_ent_ctxt, WORD32 u4_mb_type);
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * flushing at termination: Explained in flowchart 9-12(ITU_T_H264-201402).
+ *
+ * @param[in] ps_cabac_ctxt
+ * pointer to cabac context (handle)
+ *
+ * @returns success or failure error code
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+WORD32 ih264e_cabac_flush(cabac_ctxt_t *ps_cabac_ctxt);
+
+
+/**
+ ******************************************************************************
+ *
+ * @brief Puts new byte (and outstanding bytes) into bitstream after cabac
+ * renormalization
+ *
+ * @par Description
+ * 1. Extract the leading byte of low(L)
+ * 2. If leading byte=0xff increment outstanding bytes and return
+ * (as the actual bits depend on carry propogation later)
+ * 3. If leading byte is not 0xff check for any carry propogation
+ * 4. Insert the carry (propogated in previous byte) along with outstanding
+ * bytes (if any) and leading byte
+ *
+ *
+ * @param[inout] ps_cabac_ctxt
+ * pointer to cabac context (handle)
+ *
+ * @return
+ *
+ ******************************************************************************
+ */
+void ih264e_cabac_put_byte(cabac_ctxt_t *ps_cabac_ctxt);
+
+
+/**
+ ******************************************************************************
+ *
+ * @brief Codes a bin based on probablilty and mps packed context model
+ *
+ * @par Description
+ * 1. Apart from encoding bin, context model is updated as per state transition
+ * 2. Range and Low renormalization is done based on bin and original state
+ * 3. After renorm bistream is updated (if required)
+ *
+ * @param[inout] ps_cabac
+ * pointer to cabac context (handle)
+ *
+ * @param[in] bin
+ * bin(boolean) to be encoded
+ *
+ * @param[in] pu1_bin_ctxts
+ * index of cabac context model containing pState[bits 5-0] | MPS[bit6]
+ *
+ * @return
+ *
+ ******************************************************************************
+ */
+void ih264e_cabac_encode_bin(cabac_ctxt_t *ps_cabac, WORD32 bin,
+ bin_ctxt_model *pu1_bin_ctxts);
+
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Encoding process for a binary decision :implements encoding process of a decision
+ * as defined in 9.3.4.2 . This function encodes multiple bins, of a symbol. Implements
+ * flowchart Figure 9-7( ITU_T_H264-201402)
+ *
+ * @param[in] u4_bins
+ * array of bin values
+ *
+ * @param[in] i1_bins_len
+ * Length of bins, maximum 32
+ *
+ * @param[in] u4_ctx_inc
+ * CtxInc, byte0- bin0, byte1-bin1 ..
+ *
+ * @param[in] i1_valid_len
+ * valid length of bins, after that CtxInc is constant
+ *
+ * @param[in] pu1_bin_ctxt_type
+ * Pointer to binary contexts
+
+ * @param[in] ps_cabac
+ * Pointer to cabac_context_structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264e_encode_decision_bins(UWORD32 u4_bins, WORD8 i1_bins_len,
+ UWORD32 u4_ctx_inc, WORD8 i1_valid_len,
+ bin_ctxt_model *pu1_bin_ctxt_type,
+ cabac_ctxt_t *ps_cabac);
+
+/**
+ *******************************************************************************
+ * @brief
+ * Encoding process for a binary decision before termination:Encoding process
+ * of a termination(9.3.4.5 :ITU_T_H264-201402) . Explained in flowchart 9-11.
+ *
+ * @param[in] ps_cabac
+ * Pointer to cabac structure
+ *
+ * @param[in] term_bin
+ * Symbol value, end of slice or not, term_bin is binary
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264e_cabac_encode_terminate(cabac_ctxt_t *ps_cabac, WORD32 term_bin);
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * Bypass encoding process for binary decisions: Explained (9.3.4.4 :ITU_T_H264-201402)
+ * , flowchart 9-10.
+ *
+ * @param[in] ps_cabac : pointer to cabac context (handle)
+ *
+ * @param[in] bin : bypass bin(0/1) to be encoded
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void ih264e_cabac_encode_bypass_bin(cabac_ctxt_t *ps_cabac, WORD32 bin);
+
+
+
+/**
+ ******************************************************************************
+ *
+ * @brief Encodes a series of bypass bins (FLC bypass bins)
+ *
+ * @par Description
+ * This function is more optimal than calling ih264e_cabac_encode_bypass_bin()
+ * in a loop as cabac low, renorm and generating the stream (8bins at a time)
+ * can be done in one operation
+ *
+ * @param[inout]ps_cabac
+ * pointer to cabac context (handle)
+ *
+ * @param[in] u4_bins
+ * syntax element to be coded (as FLC bins)
+ *
+ * @param[in] num_bins
+ * This is the FLC length for u4_sym
+ *
+ * @return
+ *
+ ******************************************************************************
+ */
+
+void ih264e_cabac_encode_bypass_bins(cabac_ctxt_t *ps_cabac, UWORD32 u4_bins,
+ WORD32 num_bins);
+
+
+
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function generates CABAC coded bit stream for an Intra Slice.
+ *
+ * @description
+ * The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
+ * (if present), mb qp delta, coded block pattern, chroma mb mode and
+ * luma/chroma residue. These syntax elements are written as directed by table
+ * 7.3.5 of h264 specification.
+ *
+ * @param[in] ps_ent_ctxt
+ * pointer to entropy context
+ *
+ * @returns error code
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+IH264E_ERROR_T ih264e_write_islice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt);
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function generates CABAC coded bit stream for Inter slices
+ *
+ * @description
+ * The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+ * (if present), mb qp delta, coded block pattern, chroma mb mode and
+ * luma/chroma residue. These syntax elements are written as directed by table
+ * 7.3.5 of h264 specification
+ *
+ * @param[in] ps_ent_ctxt
+ * pointer to entropy context
+ *
+ * @returns error code
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt);
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function generates CABAC coded bit stream for B slices
+ *
+ * @description
+ * The mb syntax layer for inter slices constitutes luma mb mode,
+ * mb qp delta, coded block pattern, chroma mb mode and
+ * luma/chroma residue. These syntax elements are written as directed by table
+ * 7.3.5 of h264 specification
+ *
+ * @param[in] ps_ent_ctxt
+ * pointer to entropy context
+ *
+ * @returns error code
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt);
+
+
+#endif /* IH264E_CABAC_H_ */
diff --git a/encoder/ih264e_cabac_encode.c b/encoder/ih264e_cabac_encode.c
new file mode 100644
index 0000000..ebcd418
--- /dev/null
+++ b/encoder/ih264e_cabac_encode.c
@@ -0,0 +1,2391 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_cabac.c
+*
+* @brief
+* Contains all functions to encode in CABAC entropy mode
+*
+*
+* @author
+* Doney Alex
+*
+* @par List of Functions:
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264_macros.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
+#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
+#include "ih264e_encode_header.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Encodes mb_skip_flag using CABAC entropy coding mode.
+ *
+ * @param[in] u1_mb_skip_flag
+ * mb_skip_flag
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ * @param[in] u4_ctxidx_offset
+ * ctxIdxOffset for mb_skip_flag context
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_enc_mb_skip(UWORD8 u1_mb_skip_flag,
+ cabac_ctxt_t *ps_cabac_ctxt,
+ UWORD32 u4_ctxidx_offset)
+{
+
+ UWORD8 u4_ctx_inc;
+ WORD8 a, b;
+ a = ((ps_cabac_ctxt->ps_left_ctxt_mb_info->u1_mb_type & CAB_SKIP_MASK) ?
+ 0 : 1);
+ b = ((ps_cabac_ctxt->ps_top_ctxt_mb_info->u1_mb_type & CAB_SKIP_MASK) ?
+ 0 : 1);
+
+ u4_ctx_inc = a + b;
+ /* Encode the bin */
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ (UWORD32) u1_mb_skip_flag,
+ ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctxidx_offset
+ + u4_ctx_inc);
+
+}
+
+
+/* ! < Table 9-36 – Binarization for macroblock types in I slices in ITU_T_H264-201402
+ * Bits 0-7 : binarised value
+ * Bits 8-15: length of binary sequence
+ */
+static const UWORD32 u4_mb_type_intra[26] =
+ { 0x0100, 0x0620, 0x0621, 0x0622, 0x0623, 0x0748, 0x0749, 0x074a, 0x074b,
+ 0x074c, 0x074d, 0x074e, 0x074f, 0x0628, 0x0629, 0x062a, 0x062b, 0x0758,
+ 0x0759, 0x075a, 0x075b, 0x075c, 0x075d, 0x075e, 0x075f, 0x0203 };
+
+
+/* CtxInc for mb types */
+static const UWORD32 u4_mb_ctxinc[2][26] =
+{
+ /* Intra CtxInc's */
+ { 0x00,
+ 0x03467, 0x03467, 0x03467, 0x03467, 0x034567, 0x034567, 0x034567,
+ 0x034567, 0x034567, 0x034567, 0x034567, 0x034567, 0x03467, 0x03467,
+ 0x03467, 0x03467, 0x034567, 0x034567, 0x034567, 0x034567, 0x034567,
+ 0x034567, 0x034567, 0x034567, 0x00},
+ /* Inter CtxInc's */
+ { 0x00,
+ 0x001233, 0x001233, 0x001233, 0x001233, 0x0012233, 0x0012233, 0x0012233,
+ 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x001233, 0x001233,
+ 0x001233, 0x001233, 0x0012233, 0x0012233, 0x0012233, 0x0012233, 0x0012233,
+ 0x0012233, 0x0012233, 0x0012233, 0x00}
+};
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Encodes mb_type for an intra MB.
+ *
+ * @param[in] u4_slice_type
+ * slice type
+ *
+ * @param[in] u4_intra_mb_type
+ * MB type (Table 7-11)
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ ** @param[in] u4_ctxidx_offset
+ * ctxIdxOffset for mb_type context
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+static void ih264e_cabac_enc_intra_mb_type(UWORD32 u4_slice_type,
+ UWORD32 u4_intra_mb_type,
+ cabac_ctxt_t *ps_cabac_ctxt,
+ UWORD32 u4_ctx_idx_offset)
+{
+
+ encoding_envirnoment_t *ps_cab_enc_env = &(ps_cabac_ctxt->s_cab_enc_env);
+ bin_ctxt_model *pu1_mb_bin_ctxt, *pu1_bin_ctxt;
+ UWORD8 u1_bin;
+ mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info;
+ mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info;
+ UWORD32 u4_bins;
+ UWORD32 u4_ctx_inc;
+ WORD8 i1_bins_len;
+ UWORD32 u4_code_int_range;
+ UWORD32 u4_code_int_low;
+ UWORD16 u2_quant_code_int_range;
+ UWORD16 u4_code_int_range_lps;
+ WORD8 i;
+ UWORD8 u1_ctx_inc;
+ UWORD32 u4_table_val;
+
+ pu1_mb_bin_ctxt = ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_idx_offset;
+
+ u4_bins = u4_mb_type_intra[u4_intra_mb_type];
+ i1_bins_len = (WORD8) ((u4_bins >> 8) & 0x0f);
+ u4_ctx_inc = u4_mb_ctxinc[(u4_slice_type != ISLICE)][u4_intra_mb_type];
+ u1_ctx_inc = 0;
+ if (u4_slice_type == ISLICE)
+ {
+ if (ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info)
+ u1_ctx_inc += ((ps_left_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0);
+ if (ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info)
+ u1_ctx_inc += ((ps_top_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0);
+
+ u4_ctx_inc = (u4_ctx_inc | (u1_ctx_inc << ((i1_bins_len - 1) << 2)));
+ }
+ else
+ {
+ pu1_mb_bin_ctxt += 3;
+ if (u4_slice_type == BSLICE)
+ pu1_mb_bin_ctxt += 2;
+
+ }
+
+ u4_code_int_range = ps_cab_enc_env->u4_code_int_range;
+ u4_code_int_low = ps_cab_enc_env->u4_code_int_low;
+
+ for (i = (i1_bins_len - 1); i >= 0; i--)
+ {
+ WORD32 shift;
+
+ u1_ctx_inc = ((u4_ctx_inc >> (i << 2)) & 0x0f);
+ u1_bin = ((u4_bins >> i) & 0x01);
+ /* Encode the bin */
+ pu1_bin_ctxt = pu1_mb_bin_ctxt + u1_ctx_inc;
+ if (i != (i1_bins_len - 2))
+ {
+ WORD8 i1_mps = !!((*pu1_bin_ctxt) & (0x40));
+ WORD8 i1_state = (*pu1_bin_ctxt) & 0x3F;
+
+ u2_quant_code_int_range = ((u4_code_int_range >> 6) & 0x03);
+ u4_table_val =
+ gau4_ih264_cabac_table[i1_state][u2_quant_code_int_range];
+ u4_code_int_range_lps = u4_table_val & 0xFF;
+
+ u4_code_int_range -= u4_code_int_range_lps;
+ if (u1_bin != i1_mps)
+ {
+ u4_code_int_low += u4_code_int_range;
+ u4_code_int_range = u4_code_int_range_lps;
+ if (i1_state == 0)
+ {
+ /* MPS(CtxIdx) = 1 - MPS(CtxIdx) */
+ i1_mps = 1 - i1_mps;
+ }
+
+ i1_state = (u4_table_val >> 15) & 0x3F;
+ }
+ else
+ {
+ i1_state = (u4_table_val >> 8) & 0x3F;
+
+ }
+
+ (*pu1_bin_ctxt) = (i1_mps << 6) | i1_state;
+ }
+ else
+ {
+ u4_code_int_range -= 2;
+ }
+
+ /* Renormalize */
+ /*****************************************************************/
+ /* Renormalization; calculate bits generated based on range(R) */
+ /* Note : 6 <= R < 512; R is 2 only for terminating encode */
+ /*****************************************************************/
+ GETRANGE(shift, u4_code_int_range);
+ shift = 9 - shift;
+ u4_code_int_low <<= shift;
+ u4_code_int_range <<= shift;
+
+ /* bits to be inserted in the bitstream */
+ ps_cab_enc_env->u4_bits_gen += shift;
+ ps_cab_enc_env->u4_code_int_range = u4_code_int_range;
+ ps_cab_enc_env->u4_code_int_low = u4_code_int_low;
+
+ /* generate stream when a byte is ready */
+ if (ps_cab_enc_env->u4_bits_gen > CABAC_BITS)
+ {
+ ih264e_cabac_put_byte(ps_cabac_ctxt);
+ u4_code_int_range = ps_cab_enc_env->u4_code_int_range;
+ u4_code_int_low = ps_cab_enc_env->u4_code_int_low;
+
+ }
+ }
+}
+
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Encodes prev_intra4x4_pred_mode_flag and
+ * rem_intra4x4_pred_mode using CABAC entropy coding mode
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ * @param[in] pu1_intra_4x4_modes
+ * Pointer to array containing prev_intra4x4_pred_mode_flag and
+ * rem_intra4x4_pred_mode
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_enc_4x4mb_modes(cabac_ctxt_t *ps_cabac_ctxt,
+ UWORD8 *pu1_intra_4x4_modes)
+{
+ WORD32 i;
+ WORD8 byte;
+ for (i = 0; i < 16; i += 2)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_intra_4x4_modes++;
+ if (byte & 0x1)
+ {
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ 1,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + PREV_INTRA4X4_PRED_MODE_FLAG);
+ }
+ else
+ {
+ /* Binarization is FL and Cmax=7 */
+ ih264e_encode_decision_bins(byte & 0xF,
+ 4,
+ 0x05554,
+ 4,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + REM_INTRA4X4_PRED_MODE - 5,
+ ps_cabac_ctxt);
+ }
+ /* sub blk idx 2 */
+ byte >>= 4;
+ if (byte & 0x1)
+ {
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ 1,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + PREV_INTRA4X4_PRED_MODE_FLAG);
+ }
+ else
+ {
+ ih264e_encode_decision_bins(byte & 0xF,
+ 4,
+ 0x05554,
+ 4,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + REM_INTRA4X4_PRED_MODE - 5,
+ ps_cabac_ctxt);
+ }
+ }
+}
+
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Encodes chroma intrapred mode for the MB.
+ *
+ * @param[in] u1_chroma_pred_mode
+ * Chroma intr prediction mode
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_enc_chroma_predmode(UWORD8 u1_chroma_pred_mode,
+ cabac_ctxt_t *ps_cabac_ctxt)
+{
+
+ WORD8 i1_temp;
+ mb_info_ctxt_t *ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
+ mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info;
+ mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info;
+ UWORD32 u4_bins = 0;
+ WORD8 i1_bins_len = 1;
+ UWORD32 u4_ctx_inc = 0;
+ UWORD8 a, b;
+ a = ((ps_left_ctxt->u1_intrapred_chroma_mode != 0) ? 1 : 0);
+ b = ((ps_top_ctxt->u1_intrapred_chroma_mode != 0) ? 1 : 0);
+
+ /* Binarization is TU and Cmax=3 */
+ ps_curr_ctxt->u1_intrapred_chroma_mode = u1_chroma_pred_mode;
+
+ u4_ctx_inc = a + b;
+ u4_ctx_inc = (u4_ctx_inc | 0x330);
+ if (u1_chroma_pred_mode)
+ {
+ u4_bins = 1;
+ i1_temp = u1_chroma_pred_mode;
+ i1_temp--;
+ /* Put a stream of 1's of length Chromaps_pred_mode_ctxt value */
+ while (i1_temp)
+ {
+ u4_bins = (u4_bins | (1 << i1_bins_len));
+ i1_bins_len++;
+ i1_temp--;
+ }
+ /* If Chromaps_pred_mode_ctxt < Cmax i.e 3. Terminate put a zero */
+ if (u1_chroma_pred_mode < 3)
+ {
+ i1_bins_len++;
+ }
+ }
+
+ ih264e_encode_decision_bins(u4_bins,
+ i1_bins_len,
+ u4_ctx_inc,
+ 3,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + INTRA_CHROMA_PRED_MODE,
+ ps_cabac_ctxt);
+
+}
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Encodes CBP for the MB.
+ *
+ * @param[in] u1_cbp
+ * CBP for the MB
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_enc_cbp(UWORD32 u4_cbp, cabac_ctxt_t *ps_cabac_ctxt)
+{
+ mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info;
+ mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info;
+ WORD8 i2_cbp_chroma, i, j;
+ UWORD8 u1_ctxt_inc, u1_bin;
+ UWORD8 a, b;
+ UWORD32 u4_ctx_inc;
+ UWORD32 u4_bins;
+ WORD8 i1_bins_len;
+
+ /* CBP Luma, FL, Cmax = 15, L = 4 */
+ u4_ctx_inc = 0;
+ u4_bins = 0;
+ i1_bins_len = 5;
+ for (i = 0; i < 4; i++)
+ {
+ /* calulate ctxtInc, depending on neighbour availability */
+ /* u1_ctxt_inc = CondTerm(A) + 2 * CondTerm(B);
+ A: Left block and B: Top block */
+
+ /* Check for Top availability */
+ if (i >> 1)
+ {
+ j = i - 2;
+ /* Top is available always and it's current MB */
+ b = (((u4_cbp >> j) & 0x01) != 0 ? 0 : 1);
+ }
+ else
+ {
+ /* for blocks whose top reference is in another MB */
+ {
+ j = i + 2;
+ b = ((ps_top_ctxt->u1_cbp >> j) & 0x01) ? 0 : 1;
+ }
+ }
+
+ /* Check for Left availability */
+ if (i & 0x01)
+ {
+ /* Left is available always and it's current MB */
+ j = i - 1;
+ a = (((u4_cbp >> j) & 0x01) != 0 ? 0 : 1);
+ }
+ else
+ {
+ {
+ j = i + 1;
+ a = ((ps_left_ctxt->u1_cbp >> j) & 0x01) ? 0 : 1;
+ }
+ }
+ u1_ctxt_inc = a + 2 * b;
+ u1_bin = ((u4_cbp >> i) & 0x01);
+ u4_ctx_inc = (u4_ctx_inc | (u1_ctxt_inc << (i << 2)));
+ u4_bins = (u4_bins | (u1_bin << i));
+ }
+
+ /* CBP Chroma, TU, Cmax = 2 */
+ i2_cbp_chroma = u4_cbp >> 4;
+ /* calulate ctxtInc, depending on neighbour availability */
+ a = (ps_left_ctxt->u1_cbp > 15) ? 1 : 0;
+ b = (ps_top_ctxt->u1_cbp > 15) ? 1 : 0;
+
+ u1_ctxt_inc = a + 2 * b;
+ if (i2_cbp_chroma)
+ {
+ u4_ctx_inc = u4_ctx_inc | ((4 + u1_ctxt_inc) << 16);
+ u4_bins = (u4_bins | 0x10);
+ /* calulate ctxtInc, depending on neighbour availability */
+ a = (ps_left_ctxt->u1_cbp > 31) ? 1 : 0;
+ b = (ps_top_ctxt->u1_cbp > 31) ? 1 : 0;
+ u1_ctxt_inc = a + 2 * b;
+ u4_ctx_inc = u4_ctx_inc | ((8 + u1_ctxt_inc) << 20);
+ u4_bins = (u4_bins | (((i2_cbp_chroma >> 1) & 0x01) << i1_bins_len));
+ i1_bins_len++;
+ }
+ else
+ {
+ u4_ctx_inc = (u4_ctx_inc | ((4 + u1_ctxt_inc) << 16));
+ }
+ ih264e_encode_decision_bins(u4_bins, i1_bins_len, u4_ctx_inc, 8,
+ ps_cabac_ctxt->au1_cabac_ctxt_table + CBP_LUMA,
+ ps_cabac_ctxt);
+}
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Encodes mb_qp_delta for the MB.
+ *
+ * @param[in] i1_mb_qp_delta
+ * mb_qp_delta
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_enc_mb_qp_delta(WORD8 i1_mb_qp_delta,
+ cabac_ctxt_t *ps_cabac_ctxt)
+{
+ UWORD8 u1_code_num;
+ UWORD8 u1_ctxt_inc;
+
+ UWORD32 u4_ctx_inc;
+ UWORD32 u4_bins;
+ WORD8 i1_bins_len;
+ UWORD8 u1_ctx_inc, u1_bin;
+ /* Range of ps_mb_qp_delta_ctxt= -26 to +25 inclusive */
+ ASSERT((i1_mb_qp_delta < 26) && (i1_mb_qp_delta > -27));
+ /* if ps_mb_qp_delta_ctxt=0, then codeNum=0 */
+ u1_code_num = 0;
+ if (i1_mb_qp_delta > 0)
+ u1_code_num = (i1_mb_qp_delta << 1) - 1;
+ else if (i1_mb_qp_delta < 0)
+ u1_code_num = (ABS(i1_mb_qp_delta)) << 1;
+
+ u4_ctx_inc = 0;
+ u4_bins = 0;
+ i1_bins_len = 1;
+ /* calculate ctxtInc, depending on neighbour availability */
+ u1_ctxt_inc = (!(!(ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt)));
+ ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = i1_mb_qp_delta;
+
+ if (u1_code_num == 0)
+ {
+ /* b0 */
+ u1_bin = (UWORD8) (u4_bins);
+ u1_ctx_inc = u1_ctxt_inc & 0x0f;
+ /* Encode the bin */
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ u1_bin,
+ ps_cabac_ctxt->au1_cabac_ctxt_table + MB_QP_DELTA
+ + u1_ctx_inc);
+
+ }
+ else
+ {
+ /* b0 */
+ u4_ctx_inc = u1_ctxt_inc;
+ u4_bins = 1;
+ u1_code_num--;
+ if (u1_code_num == 0)
+ {
+ /* b1 */
+ u4_ctx_inc = (u4_ctx_inc | 0x20);
+ i1_bins_len++;
+ ih264e_encode_decision_bins(u4_bins, i1_bins_len, u4_ctx_inc, 3,
+ ps_cabac_ctxt->au1_cabac_ctxt_table + MB_QP_DELTA,
+ ps_cabac_ctxt);
+ }
+ else
+ {
+ /* b1 */
+ u4_ctx_inc = (u4_ctx_inc | 0x20);
+ u4_bins = (u4_bins | (1 << i1_bins_len));
+ i1_bins_len++;
+ u1_code_num--;
+ /* BinIdx from b2 onwards */
+ if (u1_code_num < 30)
+ { /* maximum i1_bins_len = 31 */
+ while (u1_code_num)
+ {
+ u4_bins = (u4_bins | (1 << i1_bins_len));
+ i1_bins_len++;
+ u1_code_num--;
+ };
+ u4_ctx_inc = (u4_ctx_inc | 0x300);
+ i1_bins_len++;
+ ih264e_encode_decision_bins(u4_bins,
+ i1_bins_len,
+ u4_ctx_inc,
+ 2,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + MB_QP_DELTA,
+ ps_cabac_ctxt);
+ }
+ else
+ {
+ /* maximum i1_bins_len = 53 */
+ u4_bins = 0xffffffff;
+ i1_bins_len = 32;
+ u4_ctx_inc = (u4_ctx_inc | 0x300);
+ u1_code_num -= 30;
+ ih264e_encode_decision_bins(u4_bins,
+ i1_bins_len,
+ u4_ctx_inc,
+ 2,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + MB_QP_DELTA,
+ ps_cabac_ctxt);
+ u4_bins = 0;
+ i1_bins_len = 0;
+ u4_ctx_inc = 0x033;
+ while (u1_code_num)
+ {
+ u4_bins = (u4_bins | (1 << i1_bins_len));
+ i1_bins_len++;
+ u1_code_num--;
+ };
+
+ u4_ctx_inc = (u4_ctx_inc | 0x300);
+ i1_bins_len++;
+ ih264e_encode_decision_bins(u4_bins,
+ i1_bins_len,
+ u4_ctx_inc,
+ 1,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + MB_QP_DELTA,
+ ps_cabac_ctxt);
+ }
+ }
+ }
+}
+
+
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * Encodes 4residual_block_cabac as defined in 7.3.5.3.3.
+ *
+ * @param[in] pi2_res_block
+ * pointer to the array of residues
+ *
+ * @param[in] u1_nnz
+ * Number of non zero coeffs in the block
+ *
+ * @param[in] u1_max_num_coeffs
+ * Max number of coeffs that can be there in the block
+ *
+ * @param[in] u2_sig_coeff_map
+ * Significant coeff map
+ *
+ * @param[in] u4_ctx_cat_offset
+ * ctxIdxOffset for absolute value contexts
+ *
+ * @param[in] pu1_ctxt_sig_coeff
+ * Pointer to residual state variables
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_write_coeff4x4(WORD16 *pi2_res_block, UWORD8 u1_nnz,
+ UWORD8 u1_max_num_coeffs,
+ UWORD16 u2_sig_coeff_map,
+ UWORD32 u4_ctx_cat_offset,
+ bin_ctxt_model *pu1_ctxt_sig_coeff,
+ cabac_ctxt_t *ps_cabac_ctxt)
+{
+
+ WORD8 i;
+ WORD16 *pi16_coeffs;
+ UWORD32 u4_sig_coeff, u4_bins;
+ UWORD32 u4_ctx_inc;
+ UWORD8 u1_last_sig_coef_index = (31 - CLZ(u2_sig_coeff_map));
+
+ /* Always put Coded Block Flag as 1 */
+
+ pi16_coeffs = pi2_res_block;
+ {
+ bin_ctxt_model *pu1_bin_ctxt;
+ UWORD8 u1_bin, uc_last;
+
+ i = 0;
+ pu1_bin_ctxt = pu1_ctxt_sig_coeff;
+ u4_sig_coeff = 0;
+ u1_bin = 1;
+ if ((u1_last_sig_coef_index))
+ {
+ u1_bin = !!(u2_sig_coeff_map & 01);
+ }
+ uc_last = 1;
+
+ do
+ {
+ /* Encode Decision */
+ ih264e_cabac_encode_bin(ps_cabac_ctxt, u1_bin, pu1_bin_ctxt);
+
+ if (u1_bin & uc_last)
+ {
+ u4_sig_coeff = (u4_sig_coeff | (1 << i));
+ pu1_bin_ctxt = pu1_ctxt_sig_coeff + i
+ + LAST_SIGNIFICANT_COEFF_FLAG_FRAME
+ - SIGNIFICANT_COEFF_FLAG_FRAME;
+ u1_bin = (i == u1_last_sig_coef_index);
+ uc_last = 0;
+ }
+ else
+ {
+ i = i + 1;
+ pu1_bin_ctxt = pu1_ctxt_sig_coeff + i;
+ u1_bin = (i == u1_last_sig_coef_index);
+ uc_last = 1;
+ if ((i != u1_last_sig_coef_index))
+ {
+ u1_bin = !!((u2_sig_coeff_map >> i) & 01);
+ }
+ }
+ }while (!((i > u1_last_sig_coef_index)
+ || (i > (u1_max_num_coeffs - 1))));
+ }
+
+ /* Encode coeff_abs_level_minus1 and coeff_sign_flag */
+ {
+ UWORD8 u1_sign;
+ UWORD16 u2_abs_level;
+ UWORD8 u1_abs_level_equal1 = 1, u1_abs_level_gt1 = 0;
+ UWORD8 u1_ctx_inc;
+ UWORD8 u1_coff;
+ WORD16 i2_sufs;
+ WORD8 i1_bins_len;
+ i = u1_last_sig_coef_index;
+ pi16_coeffs = pi2_res_block + u1_nnz - 1;
+ do
+ {
+ {
+ u4_sig_coeff = u4_sig_coeff & ((1 << i) - 1);
+ u4_bins = 0;
+ u4_ctx_inc = 0;
+ i1_bins_len = 1;
+ /* Encode the AbsLevelMinus1 */
+ u2_abs_level = ABS(*(pi16_coeffs)) - 1;
+ /* CtxInc for bin0 */
+ u4_ctx_inc = MIN(u1_abs_level_equal1, 4);
+ /* CtxInc for remaining */
+ u1_ctx_inc = 5 + MIN(u1_abs_level_gt1, 4);
+ u4_ctx_inc = u4_ctx_inc + (u1_ctx_inc << 4);
+ if (u2_abs_level)
+ {
+ u1_abs_level_gt1++;
+ u1_abs_level_equal1 = 0;
+ }
+ if (!u1_abs_level_gt1)
+ u1_abs_level_equal1++;
+
+ u1_coff = 14;
+ if (u2_abs_level >= u1_coff)
+ {
+ /* Prefix TU i.e string of 14 1's */
+ u4_bins = 0x3fff;
+ i1_bins_len = 14;
+ ih264e_encode_decision_bins(u4_bins, i1_bins_len,
+ u4_ctx_inc, 1, ps_cabac_ctxt->au1_cabac_ctxt_table
+ + u4_ctx_cat_offset,
+ ps_cabac_ctxt);
+
+ /* Suffix, uses EncodeBypass */
+ i2_sufs = u2_abs_level - u1_coff;
+
+ u4_bins = ih264e_cabac_UEGk0_binarization(i2_sufs,
+ &i1_bins_len);
+
+ ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, u4_bins,
+ i1_bins_len);
+
+ }
+ else
+ {
+ /* Prefix only */
+ u4_bins = (1 << u2_abs_level) - 1;
+ i1_bins_len = u2_abs_level + 1;
+ /* Encode Terminating bit */
+ ih264e_encode_decision_bins(u4_bins, i1_bins_len,
+ u4_ctx_inc, 1, ps_cabac_ctxt->au1_cabac_ctxt_table
+ + u4_ctx_cat_offset,
+ ps_cabac_ctxt);
+ }
+ }
+ /* encode coeff_sign_flag[i] */
+ u1_sign = ((*pi16_coeffs) < 0) ? 1 : 0;
+ ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, u1_sign, 1);
+ i = CLZ(u4_sig_coeff);
+ i = 31 - i;
+ pi16_coeffs--;
+ }while (u4_sig_coeff);
+ }
+
+}
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * Write DC coeffs for intra predicted luma block
+ *
+ * @param[in] ps_ent_ctxt
+ * Pointer to entropy context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_encode_residue_luma_dc(entropy_ctxt_t *ps_ent_ctxt)
+{
+
+ /* CABAC context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+ tu_sblk_coeff_data_t *ps_mb_coeff_data;
+
+ /* packed residue */
+ void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data;
+ UWORD16 u2_sig_coeff_map;
+ WORD16 *pi2_res_block;
+ UWORD8 u1_nnz;
+ UWORD8 u1_cbf;
+ mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info;
+ mb_info_ctxt_t *p_CurCtxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
+
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u1_nnz,
+ u2_sig_coeff_map, pi2_res_block);
+
+ u1_cbf = !!(u1_nnz);
+
+ {
+ UWORD32 u4_ctx_inc;
+ UWORD8 u1_a, u1_b;
+
+ u1_a = ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] & 0x1;
+ u1_b = ps_top_ctxt->u1_yuv_dc_csbp & 0x1;
+ u4_ctx_inc = u1_a + (u1_b << 1);
+
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ u1_cbf,
+ ps_cabac_ctxt->au1_cabac_ctxt_table + CBF
+ + (LUMA_DC_CTXCAT << 2) + u4_ctx_inc);
+ }
+
+ /* Write coded_block_flag */
+ if (u1_cbf)
+ {
+ ih264e_cabac_write_coeff4x4(pi2_res_block,
+ u1_nnz,
+ 15,
+ u2_sig_coeff_map,
+ COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_0_OFFSET,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + SIGNIFICANT_COEFF_FLAG_FRAME
+ + SIG_COEFF_CTXT_CAT_0_OFFSET,
+ ps_cabac_ctxt);
+
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] |= 0x1;
+ p_CurCtxt->u1_yuv_dc_csbp |= 0x1;
+ }
+ else
+ {
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6;
+ p_CurCtxt->u1_yuv_dc_csbp &= 0x6;
+ }
+
+ ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data;
+}
+
+
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * Write chroma residues to the bitstream
+ *
+ * @param[in] ps_ent_ctxt
+ * Pointer to entropy context structure
+ *
+ * @param[in] u1_chroma_cbp
+ * coded block pattern, chroma
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_write_chroma_residue(entropy_ctxt_t *ps_ent_ctxt,
+ UWORD8 u1_chroma_cbp)
+{
+ /* CABAC context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+ tu_sblk_coeff_data_t *ps_mb_coeff_data;
+ /* packed residue */
+ void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data;
+ UWORD16 u2_sig_coeff_map;
+ UWORD8 u1_nnz;
+ mb_info_ctxt_t *ps_top_ctxt_mb_info, *ps_curr_ctxt;
+
+ ps_top_ctxt_mb_info = ps_cabac_ctxt->ps_top_ctxt_mb_info;
+ ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
+
+ /********************/
+ /* Write Chroma DC */
+ /********************/
+ {
+ WORD16 *pi2_res_block;
+ UWORD8 u1_left_dc_csbp, u1_top_dc_csbp, u1_uv, u1_cbf;
+
+ u1_left_dc_csbp = (ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0]) >> 1;
+ u1_top_dc_csbp = (ps_top_ctxt_mb_info->u1_yuv_dc_csbp) >> 1;
+
+ for (u1_uv = 0; u1_uv < 2; u1_uv++)
+ {
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data,
+ u1_nnz, u2_sig_coeff_map, pi2_res_block);
+ u1_cbf = !!(u1_nnz);
+ {
+ UWORD8 u1_a, u1_b;
+ UWORD32 u4_ctx_inc;
+ u1_a = (u1_left_dc_csbp >> u1_uv) & 0x01;
+ u1_b = (u1_top_dc_csbp >> u1_uv) & 0x01;
+ u4_ctx_inc = (u1_a + (u1_b << 1));
+
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ u1_cbf,
+ ps_cabac_ctxt->au1_cabac_ctxt_table + CBF
+ + (CHROMA_DC_CTXCAT << 2)
+ + u4_ctx_inc);
+ }
+
+ if (u1_cbf)
+ {
+ ih264e_cabac_write_coeff4x4(pi2_res_block,
+ u1_nnz,
+ 3,
+ u2_sig_coeff_map,
+ COEFF_ABS_LEVEL_MINUS1
+ + COEFF_ABS_LEVEL_CAT_3_OFFSET,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + SIGNIFICANT_COEFF_FLAG_FRAME
+ + SIG_COEFF_CTXT_CAT_3_OFFSET,
+ ps_cabac_ctxt);
+
+ SETBIT(u1_top_dc_csbp, u1_uv);
+ SETBIT(u1_left_dc_csbp, u1_uv);
+ }
+ else
+ {
+ CLEARBIT(u1_top_dc_csbp, u1_uv);
+ CLEARBIT(u1_left_dc_csbp, u1_uv);
+ }
+ }
+ /*************************************************************/
+ /* Update the DC csbp */
+ /*************************************************************/
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x1;
+ ps_curr_ctxt->u1_yuv_dc_csbp &= 0x1;
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] |= (u1_left_dc_csbp << 1);
+ ps_curr_ctxt->u1_yuv_dc_csbp |= (u1_top_dc_csbp << 1);
+ }
+ /*******************/
+ /* Write Chroma AC */
+ /*******************/
+ {
+ if (u1_chroma_cbp == 2)
+ {
+ UWORD8 u1_uv_blkno, u1_left_ac_csbp, u1_top_ac_csbp;
+ WORD16 *pi2_res_block;
+ u1_left_ac_csbp = ps_cabac_ctxt->pu1_left_uv_ac_csbp[0];
+ u1_top_ac_csbp = ps_top_ctxt_mb_info->u1_yuv_ac_csbp >> 4;
+
+ for (u1_uv_blkno = 0; u1_uv_blkno < 8; u1_uv_blkno++)
+ {
+ UWORD8 u1_cbf;
+ UWORD8 u1_b2b0, u1_b2b1;
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data,
+ u1_nnz, u2_sig_coeff_map,
+ pi2_res_block);
+
+ u1_cbf = !!(u1_nnz);
+ u1_b2b0 = ((u1_uv_blkno & 0x4) >> 1) | (u1_uv_blkno & 0x1);
+ u1_b2b1 = ((u1_uv_blkno & 0x4) >> 1)
+ | ((u1_uv_blkno & 0x2) >> 1);
+
+ {
+ UWORD8 u1_a, u1_b;
+ UWORD32 u4_ctx_inc;
+ /* write coded_block_flag */
+ u1_a = (u1_left_ac_csbp >> u1_b2b1) & 0x1;
+ u1_b = (u1_top_ac_csbp >> u1_b2b0) & 0x1;
+ u4_ctx_inc = u1_a + (u1_b << 1);
+
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ u1_cbf,
+ ps_cabac_ctxt->au1_cabac_ctxt_table + CBF
+ + (CHROMA_AC_CTXCAT << 2)
+ + u4_ctx_inc);
+
+ }
+ if (u1_cbf)
+ {
+ ih264e_cabac_write_coeff4x4(pi2_res_block,
+ u1_nnz,
+ 14,
+ u2_sig_coeff_map,
+ COEFF_ABS_LEVEL_MINUS1
+ + COEFF_ABS_LEVEL_CAT_4_OFFSET,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + +SIGNIFICANT_COEFF_FLAG_FRAME
+ + SIG_COEFF_CTXT_CAT_4_OFFSET,
+ ps_cabac_ctxt);
+
+ SETBIT(u1_left_ac_csbp, u1_b2b1);
+ SETBIT(u1_top_ac_csbp, u1_b2b0);
+ }
+ else
+ {
+ CLEARBIT(u1_left_ac_csbp, u1_b2b1);
+ CLEARBIT(u1_top_ac_csbp, u1_b2b0);
+
+ }
+ }
+ /*************************************************************/
+ /* Update the AC csbp */
+ /*************************************************************/
+ ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = u1_left_ac_csbp;
+ ps_curr_ctxt->u1_yuv_ac_csbp &= 0x0f;
+ ps_curr_ctxt->u1_yuv_ac_csbp |= (u1_top_ac_csbp << 4);
+ }
+ else
+ {
+ ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = 0;
+ ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf;
+ }
+ }
+ ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data;
+}
+
+
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * Encodes Residues for the MB as defined in 7.3.5.3
+ *
+ * @param[in] ps_ent_ctxt
+ * Pointer to entropy context structure
+ *
+ * @param[in] u1_cbp
+ * coded block pattern
+ *
+ * @param[in] u1_ctx_cat
+ * Context category, LUMA_AC_CTXCAT or LUMA_4x4_CTXCAT
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_encode_residue(entropy_ctxt_t *ps_ent_ctxt,
+ UWORD32 u4_cbp, UWORD8 u1_ctx_cat)
+{
+ /* CABAC context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+
+ tu_sblk_coeff_data_t *ps_mb_coeff_data;
+ /* packed residue */
+ void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data;
+ UWORD16 u2_sig_coeff_map;
+ UWORD8 u1_nnz;
+ mb_info_ctxt_t *ps_curr_ctxt;
+ mb_info_ctxt_t *ps_top_ctxt;
+ UWORD8 u1_left_ac_csbp;
+ UWORD8 u1_top_ac_csbp;
+ UWORD32 u4_ctx_idx_offset_sig_coef, u4_ctx_idx_offset_abs_lvl;
+ ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
+ ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info;
+ u1_left_ac_csbp = ps_cabac_ctxt->pu1_left_y_ac_csbp[0];
+ u1_top_ac_csbp = ps_top_ctxt->u1_yuv_ac_csbp;
+
+ if (u4_cbp & 0xf)
+ {
+ /* Write luma residue */
+ UWORD8 u1_offset;
+ WORD16 *pi2_res_block;
+ UWORD8 u1_subblk_num;
+ if (u1_ctx_cat == LUMA_AC_CTXCAT)
+ {
+ u1_offset = 1;
+ u4_ctx_idx_offset_sig_coef = SIG_COEFF_CTXT_CAT_1_OFFSET;
+ u4_ctx_idx_offset_abs_lvl = COEFF_ABS_LEVEL_MINUS1
+ + COEFF_ABS_LEVEL_CAT_1_OFFSET;
+ }
+ else
+ {
+ u1_offset = 0;
+ u4_ctx_idx_offset_sig_coef = SIG_COEFF_CTXT_CAT_2_OFFSET;
+ u4_ctx_idx_offset_abs_lvl = COEFF_ABS_LEVEL_MINUS1
+ + COEFF_ABS_LEVEL_CAT_2_OFFSET;
+ }
+
+ for (u1_subblk_num = 0; u1_subblk_num < 16; u1_subblk_num++)
+ {
+ UWORD8 u1_b0, u1_b1, u1_b2, u1_b3, u1_b2b0, u1_b3b1, u1_b3b2;
+ u1_b0 = (u1_subblk_num & 0x1);
+ u1_b1 = (u1_subblk_num & 0x2) >> 1;
+ u1_b2 = (u1_subblk_num & 0x4) >> 2;
+ u1_b3 = (u1_subblk_num & 0x8) >> 3;
+ u1_b2b0 = (u1_b2 << 1) | (u1_b0);
+ u1_b3b1 = (u1_b3 << 1) | (u1_b1);
+ u1_b3b2 = (u1_b3 << 1) | (u1_b2);
+
+ if (!((u4_cbp >> u1_b3b2) & 0x1))
+ {
+ /* ---------------------------------------------------------- */
+ /* The current block is not coded so skip all the sub block */
+ /* and set the pointer of scan level, csbp accrodingly */
+ /* ---------------------------------------------------------- */
+ CLEARBIT(u1_top_ac_csbp, u1_b2b0);
+ CLEARBIT(u1_top_ac_csbp, (u1_b2b0 + 1));
+ CLEARBIT(u1_left_ac_csbp, u1_b3b1);
+ CLEARBIT(u1_left_ac_csbp, (u1_b3b1 + 1));
+
+ u1_subblk_num += 3;
+ }
+ else
+ {
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data,
+ u1_nnz, u2_sig_coeff_map,
+ pi2_res_block);
+
+ UWORD8 u1_csbf = !!(u1_nnz);
+ {
+ UWORD8 u1_a, u1_b;
+ UWORD32 u4_ctx_inc;
+ u1_b = (u1_top_ac_csbp >> u1_b2b0) & 0x01;
+ u1_a = (u1_left_ac_csbp >> u1_b3b1) & 0x01;
+ u4_ctx_inc = u1_a + (u1_b << 1);
+
+ /* Encode the bin */
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ u1_csbf,
+ ps_cabac_ctxt->au1_cabac_ctxt_table + CBF
+ + (u1_ctx_cat << 2) + u4_ctx_inc);
+
+ }
+ /**************************/
+ /* Write coded_block_flag */
+ /**************************/
+ if (u1_csbf)
+ {
+ ih264e_cabac_write_coeff4x4(pi2_res_block,
+ u1_nnz,
+ (UWORD8) (15 - u1_offset),
+ u2_sig_coeff_map,
+ u4_ctx_idx_offset_abs_lvl,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + SIGNIFICANT_COEFF_FLAG_FRAME
+ + u4_ctx_idx_offset_sig_coef,
+ ps_cabac_ctxt);
+
+ SETBIT(u1_top_ac_csbp, u1_b2b0);
+ SETBIT(u1_left_ac_csbp, u1_b3b1);
+ }
+ else
+ {
+ CLEARBIT(u1_top_ac_csbp, u1_b2b0);
+ CLEARBIT(u1_left_ac_csbp, u1_b3b1);
+ }
+ }
+ }
+ /**************************************************************************/
+ /* Update the AC csbp */
+ /**************************************************************************/
+ ps_cabac_ctxt->pu1_left_y_ac_csbp[0] = u1_left_ac_csbp & 0xf;
+ u1_top_ac_csbp &= 0x0f;
+ ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf0;
+ ps_curr_ctxt->u1_yuv_ac_csbp |= u1_top_ac_csbp;
+ }
+ else
+ {
+ ps_cabac_ctxt->pu1_left_y_ac_csbp[0] = 0;
+ ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf0;
+ }
+
+ /* Write chroma residue */
+
+ ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data;
+ {
+ UWORD8 u1_cbp_chroma;
+ u1_cbp_chroma = u4_cbp >> 4;
+ if (u1_cbp_chroma)
+ {
+ ih264e_cabac_write_chroma_residue(ps_ent_ctxt, u1_cbp_chroma);
+ }
+ else
+ {
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x1;
+ ps_curr_ctxt->u1_yuv_dc_csbp &= 0x1;
+ ps_cabac_ctxt->pu1_left_uv_ac_csbp[0] = 0;
+ ps_curr_ctxt->u1_yuv_ac_csbp &= 0xf;
+ }
+ }
+}
+
+/**
+ *******************************************************************************
+ * @brief
+ * Encodes a Motion vector (9.3.3.1.1.7 )
+ *
+ * @param[in] u1_mvd
+ * Motion vector to be encoded
+ *
+ * @param[in] u4_ctx_idx_offset
+ * * ctxIdxOffset for MV_X or MV_Ycontext
+ *
+ * @param[in] ui2_abs_mvd
+ * sum of absolute value of corresponding neighboring motion vectors
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_enc_ctx_mvd(WORD16 u1_mvd, UWORD32 u4_ctx_idx_offset,
+ UWORD16 ui2_abs_mvd,
+ cabac_ctxt_t *ps_cabac_ctxt)
+{
+
+ UWORD8 u1_bin, u1_ctxt_inc;
+ WORD8 k = 3, u1_coff = 9;
+ WORD16 i2_abs_mvd, i2_sufs;
+ UWORD32 u4_ctx_inc;
+ UWORD32 u4_bins;
+ WORD8 i1_bins_len;
+
+ /* if mvd < u1_coff
+ only Prefix
+ else
+ Prefix + Suffix
+
+ encode sign bit
+
+ Prefix TU encoding Cmax =u1_coff and Suffix 3rd order Exp-Golomb
+ */
+
+ if (ui2_abs_mvd < 3)
+ u4_ctx_inc = 0;
+ else if (ui2_abs_mvd > 32)
+ u4_ctx_inc = 2;
+ else
+ u4_ctx_inc = 1;
+
+ u4_bins = 0;
+ i1_bins_len = 1;
+
+ if (u1_mvd == 0)
+ {
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ 0,
+ ps_cabac_ctxt->au1_cabac_ctxt_table + u4_ctx_idx_offset
+ + u4_ctx_inc);
+ }
+ else
+ {
+ i2_abs_mvd = ABS(u1_mvd);
+ if (i2_abs_mvd >= u1_coff)
+ {
+ /* Prefix TU i.e string of 9 1's */
+ u4_bins = 0x1ff;
+ i1_bins_len = 9;
+ u4_ctx_inc = (u4_ctx_inc | 0x065430);
+
+ ih264e_encode_decision_bins(u4_bins,
+ i1_bins_len,
+ u4_ctx_inc,
+ 4,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + u4_ctx_idx_offset,
+ ps_cabac_ctxt);
+
+ /* Suffix, uses EncodeBypass */
+ u4_bins = 0;
+ i1_bins_len = 0;
+ i2_sufs = i2_abs_mvd - u1_coff;
+ while (1)
+ {
+ if (i2_sufs >= (1 << k))
+ {
+ u4_bins = (u4_bins | (1 << i1_bins_len));
+ i1_bins_len++;
+ i2_sufs = i2_sufs - (1 << k);
+ k++;
+ }
+ else
+ {
+ i1_bins_len++;
+ while (k--)
+ {
+ u1_bin = ((i2_sufs >> k) & 0x01);
+ u4_bins = (u4_bins | (u1_bin << i1_bins_len));
+ i1_bins_len++;
+ }
+ break;
+ }
+ }
+ ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, u4_bins,
+ i1_bins_len);
+ }
+ else
+ {
+ /* Prefix only */
+ /* b0 */
+ u4_bins = 1;
+ i2_abs_mvd--;
+ u1_ctxt_inc = 3;
+ while (i2_abs_mvd)
+ {
+ i2_abs_mvd--;
+ u4_bins = (u4_bins | (1 << i1_bins_len));
+ if (u1_ctxt_inc <= 6)
+ {
+ u4_ctx_inc = (u4_ctx_inc
+ | (u1_ctxt_inc << (i1_bins_len << 2)));
+ u1_ctxt_inc++;
+ }
+ i1_bins_len++;
+ }
+ /* Encode Terminating bit */
+ if (i1_bins_len <= 4)
+ u4_ctx_inc = (u4_ctx_inc | (u1_ctxt_inc << (i1_bins_len << 2)));
+ i1_bins_len++;
+ ih264e_encode_decision_bins(u4_bins,
+ i1_bins_len,
+ u4_ctx_inc,
+ 4,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + u4_ctx_idx_offset,
+ ps_cabac_ctxt);
+ }
+ /* sign bit, uses EncodeBypass */
+ if (u1_mvd > 0)
+ ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, 0, 1);
+ else
+ ih264e_cabac_encode_bypass_bins(ps_cabac_ctxt, 1, 1);
+ }
+}
+
+/**
+ *******************************************************************************
+ * @brief
+ * Encodes all motion vectors for a P16x16 MB
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ * @param[in] pi2_mv_ptr
+ * Pointer to array of motion vectors
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_enc_mvds_p16x16(cabac_ctxt_t *ps_cabac_ctxt,
+ WORD16 *pi2_mv_ptr)
+{
+
+
+ /* Encode the differential component of the motion vectors */
+
+ {
+ UWORD8 u1_abs_mvd_x, u1_abs_mvd_y;
+ UWORD8 *pu1_top_mv_ctxt, *pu1_lft_mv_ctxt;
+ WORD16 u2_mv;
+ u1_abs_mvd_x = 0;
+ u1_abs_mvd_y = 0;
+ pu1_top_mv_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv[0];
+ pu1_lft_mv_ctxt = ps_cabac_ctxt->pu1_left_mv_ctxt_inc[0];
+ {
+ UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a,
+ u2_abs_mvd_y_b;
+ u2_abs_mvd_x_b = (UWORD16) pu1_top_mv_ctxt[0];
+ u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[1];
+ u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[0];
+ u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[1];
+ u2_mv = *(pi2_mv_ptr++);
+
+ ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_X,
+ (UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b),
+ ps_cabac_ctxt);
+
+ u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv));
+ u2_mv = *(pi2_mv_ptr++);
+
+ ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_Y,
+ (UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b),
+ ps_cabac_ctxt);
+
+ u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv));
+ }
+ /***************************************************************/
+ /* Store abs_mvd_values cabac contexts */
+ /***************************************************************/
+ pu1_top_mv_ctxt[0] = pu1_lft_mv_ctxt[0] = u1_abs_mvd_x;
+ pu1_top_mv_ctxt[1] = pu1_lft_mv_ctxt[1] = u1_abs_mvd_y;
+ }
+}
+
+
+/**
+ *******************************************************************************
+ * @brief
+ * Encodes all motion vectors for a B MB (Assues that mbype is B_L0_16x16, B_L1_16x16 or B_Bi_16x16
+ *
+ * @param[in] ps_cabac_ctxt
+ * Pointer to cabac context structure
+ *
+ * @param[in] pi2_mv_ptr
+ * Pointer to array of motion vectors
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+static void ih264e_cabac_enc_mvds_b16x16(cabac_ctxt_t *ps_cabac_ctxt,
+ WORD16 *pi2_mv_ptr,
+ WORD32 i4_mb_part_pred_mode )
+{
+
+ /* Encode the differential component of the motion vectors */
+
+ {
+ UWORD8 u1_abs_mvd_x, u1_abs_mvd_y;
+ UWORD8 *pu1_top_mv_ctxt, *pu1_lft_mv_ctxt;
+ WORD16 u2_mv;
+ u1_abs_mvd_x = 0;
+ u1_abs_mvd_y = 0;
+ pu1_top_mv_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_mv[0];
+ pu1_lft_mv_ctxt = ps_cabac_ctxt->pu1_left_mv_ctxt_inc[0];
+ if (i4_mb_part_pred_mode != PRED_L1)/* || PRED_BI */
+ {
+ UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a,
+ u2_abs_mvd_y_b;
+ u2_abs_mvd_x_b = (UWORD16) pu1_top_mv_ctxt[0];
+ u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[1];
+ u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[0];
+ u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[1];
+ u2_mv = *(pi2_mv_ptr++);
+
+ ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_X,
+ (UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b),
+ ps_cabac_ctxt);
+
+ u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv));
+ u2_mv = *(pi2_mv_ptr++);
+
+ ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_Y,
+ (UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b),
+ ps_cabac_ctxt);
+
+ u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv));
+ }
+ /***************************************************************/
+ /* Store abs_mvd_values cabac contexts */
+ /***************************************************************/
+ pu1_top_mv_ctxt[0] = pu1_lft_mv_ctxt[0] = u1_abs_mvd_x;
+ pu1_top_mv_ctxt[1] = pu1_lft_mv_ctxt[1] = u1_abs_mvd_y;
+
+ u1_abs_mvd_x = 0;
+ u1_abs_mvd_y = 0;
+ if (i4_mb_part_pred_mode != PRED_L0)/* || PRED_BI */
+ {
+ UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a,
+ u2_abs_mvd_y_b;
+ u2_abs_mvd_x_b = (UWORD16) pu1_top_mv_ctxt[2];
+ u2_abs_mvd_y_b = (UWORD16) pu1_top_mv_ctxt[3];
+ u2_abs_mvd_x_a = (UWORD16) pu1_lft_mv_ctxt[2];
+ u2_abs_mvd_y_a = (UWORD16) pu1_lft_mv_ctxt[3];
+ u2_mv = *(pi2_mv_ptr++);
+
+ ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_X,
+ (UWORD16) (u2_abs_mvd_x_a + u2_abs_mvd_x_b),
+ ps_cabac_ctxt);
+
+ u1_abs_mvd_x = CLIP3(0, 127, ABS(u2_mv));
+ u2_mv = *(pi2_mv_ptr++);
+
+ ih264e_cabac_enc_ctx_mvd(u2_mv, MVD_Y,
+ (UWORD16) (u2_abs_mvd_y_a + u2_abs_mvd_y_b),
+ ps_cabac_ctxt);
+
+ u1_abs_mvd_y = CLIP3(0, 127, ABS(u2_mv));
+ }
+ /***************************************************************/
+ /* Store abs_mvd_values cabac contexts */
+ /***************************************************************/
+ pu1_top_mv_ctxt[2] = pu1_lft_mv_ctxt[2] = u1_abs_mvd_x;
+ pu1_top_mv_ctxt[3] = pu1_lft_mv_ctxt[3] = u1_abs_mvd_y;
+ }
+}
+
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function generates CABAC coded bit stream for an Intra Slice.
+ *
+ * @description
+ * The mb syntax layer for intra slices constitutes luma mb mode, mb qp delta, coded block pattern, chroma mb mode and
+ * luma/chroma residue. These syntax elements are written as directed by table
+ * 7.3.5 of h264 specification.
+ *
+ * @param[in] ps_ent_ctxt
+ * pointer to entropy context
+ *
+ * @returns error code
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+IH264E_ERROR_T ih264e_write_islice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* bit stream ptr */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+ /* CABAC context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+ /* packed header data */
+ UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+ mb_info_ctxt_t *ps_curr_ctxt;
+ WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+ WORD8 mb_qp_delta;
+ UWORD32 u4_cbp_l, u4_cbp_c;
+ WORD32 byte_count = 0;
+ WORD32 bitstream_start_offset, bitstream_end_offset;
+
+ if ((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB)
+ >= ps_bitstream->u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+ /* mb header info */
+ mb_tpm = *pu1_byte++;
+ byte_count++;
+ cbp = *pu1_byte++;
+ byte_count++;
+ mb_qp_delta = *pu1_byte++;
+ byte_count++;
+ /* mb type */
+ mb_type = mb_tpm & 0xF;
+
+ ih264e_get_cabac_context(ps_ent_ctxt, mb_type);
+ ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
+
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+ u4_cbp_c = (cbp >> 4);
+ u4_cbp_l = (cbp & 0xF);
+ if (mb_type == I16x16)
+ {
+ luma_intra_mode = ((mb_tpm >> 4) & 3) + 1 + (u4_cbp_c << 2)
+ + (u4_cbp_l == 15) * 12;
+ }
+ else
+ {
+ luma_intra_mode = 0;
+ }
+
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ /* Encode Intra pred mode, Luma */
+ ih264e_cabac_enc_intra_mb_type(ISLICE, luma_intra_mode, ps_cabac_ctxt,
+ MB_TYPE_I_SLICE);
+
+ if (mb_type == I4x4)
+ { /* Encode 4x4 MB modes */
+ ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, pu1_byte);
+ byte_count += 8;
+ }
+ /* Encode chroma mode */
+ ih264e_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt);
+
+ if (mb_type != I16x16)
+ { /* Encode MB cbp */
+ ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt);
+ }
+
+ if ((cbp > 0) || (mb_type == I16x16))
+ {
+ /* Encode mb_qp_delta */
+ ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt);
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset
+ - bitstream_start_offset;
+ /* Starting bitstream offset for residue */
+ bitstream_start_offset = bitstream_end_offset;
+ if (mb_type == I16x16)
+ {
+ ps_curr_ctxt->u1_mb_type = CAB_I16x16;
+ ps_curr_ctxt->u1_cbp = cbp;
+ ih264e_cabac_encode_residue_luma_dc(ps_ent_ctxt);
+ ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_AC_CTXCAT);
+ }
+ else
+ {
+ ps_curr_ctxt->u1_cbp = cbp;
+ ps_curr_ctxt->u1_mb_type = I4x4;
+ ps_curr_ctxt->u1_mb_type = CAB_I4x4;
+ ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT);
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6;
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6;
+ }
+ /* Ending bitstream offset for reside in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset
+ - bitstream_start_offset;
+ }
+ else
+ {
+ ps_curr_ctxt->u1_yuv_ac_csbp = 0;
+ ps_curr_ctxt->u1_yuv_dc_csbp = 0;
+ *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0;
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset
+ - bitstream_start_offset;
+
+ /* Computing the number of used used for encoding the MB syntax */
+ }
+ memset(ps_curr_ctxt->u1_mv, 0, 16);
+ memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_cbp = cbp;
+ ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+ if (mb_type == I16x16)
+ {
+ ps_curr_ctxt->u1_mb_type = CAB_I16x16;
+
+ }
+ else
+ {
+ ps_curr_ctxt->u1_mb_type = CAB_I4x4;
+
+ }
+ return IH264E_SUCCESS;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function generates CABAC coded bit stream for Inter slices
+ *
+ * @description
+ * The mb syntax layer for inter slices constitutes luma mb mode, mb qp delta, coded block pattern, chroma mb mode and
+ * luma/chroma residue. These syntax elements are written as directed by table
+ * 7.3.5 of h264 specification
+ *
+ * @param[in] ps_ent_ctxt
+ * pointer to entropy context
+ *
+ * @returns error code
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+IH264E_ERROR_T ih264e_write_pslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* bit stream ptr */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+ /* CABAC context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+
+ mb_info_ctxt_t *ps_curr_ctxt;
+
+ WORD32 bitstream_start_offset, bitstream_end_offset;
+ WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+ WORD8 mb_qp_delta;
+ UWORD32 u4_cbp_l, u4_cbp_c;
+ WORD32 byte_count = 0;
+ UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+
+ if ((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB)
+ >= ps_bitstream->u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+ /* mb header info */
+ mb_tpm = *pu1_byte++;
+ byte_count++;
+
+ /* mb type */
+ mb_type = mb_tpm & 0xF;
+ /* CABAC contexts for the MB */
+ ih264e_get_cabac_context(ps_ent_ctxt, mb_type);
+ ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
+
+ /* if Intra MB */
+ if (mb_type == I16x16 || mb_type == I4x4)
+ {
+ cbp = *pu1_byte++;
+ byte_count++;
+ mb_qp_delta = *pu1_byte++;
+ byte_count++;
+
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+
+ /* Encode mb_skip_flag */
+ ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_P_SLICE);
+ u4_cbp_c = (cbp >> 4);
+ u4_cbp_l = (cbp & 0xF);
+ if (mb_type == I16x16)
+ {
+ luma_intra_mode = ((mb_tpm >> 4) & 3) + 1 + (u4_cbp_c << 2)
+ + (u4_cbp_l == 15) * 12;
+ }
+ else
+ {
+ luma_intra_mode = 0;
+ }
+ /* Encode intra mb type */
+ {
+ ih264e_cabac_encode_bin(ps_cabac_ctxt,
+ 1,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + MB_TYPE_P_SLICE);
+
+ ih264e_cabac_enc_intra_mb_type(PSLICE, (UWORD8) luma_intra_mode,
+ ps_cabac_ctxt, MB_TYPE_P_SLICE);
+ }
+
+ if (mb_type == I4x4)
+ { /* Intra 4x4 modes */
+ ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, pu1_byte);
+ byte_count += 8;
+ }
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ ih264e_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt);
+
+ if (mb_type != I16x16)
+ {
+ /* encode CBP */
+ ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt);
+ }
+
+ if ((cbp > 0) || (mb_type == I16x16))
+ {
+ ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt);
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset
+ - bitstream_start_offset;
+ /* Starting bitstream offset for residue */
+ bitstream_start_offset = bitstream_end_offset;
+
+ /* Encoding Residue */
+ if (mb_type == I16x16)
+ {
+ ps_curr_ctxt->u1_mb_type = CAB_I16x16;
+ ps_curr_ctxt->u1_cbp = (UWORD8) cbp;
+ ih264e_cabac_encode_residue_luma_dc(ps_ent_ctxt);
+ ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_AC_CTXCAT);
+ }
+ else
+ {
+ ps_curr_ctxt->u1_cbp = (UWORD8) cbp;
+ ps_curr_ctxt->u1_mb_type = I4x4;
+ ps_curr_ctxt->u1_mb_type = CAB_I4x4;
+ ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT);
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6;
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6;
+ }
+
+ /* Ending bitstream offset for reside in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset
+ - bitstream_start_offset;
+ }
+ else
+ {
+ ps_curr_ctxt->u1_yuv_ac_csbp = 0;
+ ps_curr_ctxt->u1_yuv_dc_csbp = 0;
+ *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0;
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset
+ - bitstream_start_offset;
+ }
+
+ memset(ps_curr_ctxt->u1_mv, 0, 16);
+ memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_cbp = (UWORD8) cbp;
+
+ if (mb_type == I16x16)
+ {
+ ps_curr_ctxt->u1_mb_type = CAB_I16x16;
+ }
+ else
+ {
+ ps_curr_ctxt->u1_mb_type = CAB_I4x4;
+ }
+
+ ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+
+ return IH264E_SUCCESS;
+ }
+ else /* Inter MB */
+ {
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+ /* Encoding P16x16 */
+ if (mb_type != PSKIP)
+ {
+ cbp = *pu1_byte++;
+ byte_count++;
+ mb_qp_delta = *pu1_byte++;
+ byte_count++;
+
+ /* Encoding mb_skip */
+ ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_P_SLICE);
+
+ /* Encoding mb_type as P16x16 */
+ {
+ UWORD32 u4_ctx_inc_p;
+ u4_ctx_inc_p = (0x010 + ((2) << 8));
+
+ ih264e_encode_decision_bins(0, 3, u4_ctx_inc_p, 3,
+ &(ps_cabac_ctxt->au1_cabac_ctxt_table[MB_TYPE_P_SLICE]),
+ ps_cabac_ctxt);
+ }
+ ps_curr_ctxt->u1_mb_type = CAB_P;
+ {
+ WORD16 *pi2_mv_ptr = (WORD16 *) pu1_byte;
+ byte_count += 4;
+ ps_curr_ctxt->u1_mb_type = (ps_curr_ctxt->u1_mb_type
+ | CAB_NON_BD16x16);
+ /* Encoding motion vector for P16x16 */
+ ih264e_cabac_enc_mvds_p16x16(ps_cabac_ctxt, pi2_mv_ptr);
+ }
+ /* Encode CBP */
+ ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt);
+
+ if (cbp)
+ {
+ /* encode mb_qp_delta */
+ ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt);
+ }
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset
+ - bitstream_start_offset;
+ /* Starting bitstream offset for residue */
+ bitstream_start_offset = bitstream_end_offset;
+
+ }
+ else/* MB = PSKIP */
+ {
+ ih264e_cabac_enc_mb_skip(1, ps_cabac_ctxt, MB_SKIP_FLAG_P_SLICE);
+
+ ps_curr_ctxt->u1_mb_type = CAB_P_SKIP;
+ (*ps_ent_ctxt->pi4_mb_skip_run)++;
+
+ memset(ps_curr_ctxt->u1_mv, 0, 16);
+ memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
+ cbp = 0;
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset
+ - bitstream_start_offset;
+ /* Starting bitstream offset for residue */
+
+ }
+
+ if (cbp > 0)
+ {
+ /* Encode residue */
+ ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT);
+ /* Ending bitstream offset for reside in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_residue_bits[1] += bitstream_end_offset
+ - bitstream_start_offset;
+
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6;
+ ps_curr_ctxt->u1_yuv_dc_csbp &= 0x6;
+ }
+ else
+ {
+ ps_curr_ctxt->u1_yuv_ac_csbp = 0;
+ ps_curr_ctxt->u1_yuv_dc_csbp = 0;
+ *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0;
+ }
+ ps_curr_ctxt->u1_intrapred_chroma_mode = 0;
+ ps_curr_ctxt->u1_cbp = cbp;
+ ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+ return IH264E_SUCCESS;
+ }
+}
+
+
+/* ! < Table 9-37 – Binarization for macroblock types in B slices in ITU_T_H264-201402
+ * Bits 0-7 : binarised value
+ * Bits 8-15: length of binary sequence */
+
+
+static const UWORD32 u4_b_mb_type[27] = { 0x0100, 0x0301, 0x0305, 0x0603,
+ 0x0623, 0x0613, 0x0633, 0x060b,
+ 0x062b, 0x061b, 0x063b, 0x061f,
+ 0x0707, 0x0747, 0x0727, 0x0767,
+ 0x0717, 0x0757, 0x0737, 0x0777,
+ 0x070f, 0x074f, 0x063f };
+/* CtxInc for mb types in B slices */
+static const UWORD32 ui_b_mb_type_ctx_inc[27] = { 0x00, 0x0530, 0x0530,
+ 0x0555430, 0x0555430,
+ 0x0555430, 0x0555430,
+ 0x0555430, 0x0555430,
+ 0x0555430, 0x0555430,
+ 0x0555430, 0x05555430,
+ 0x05555430, 0x05555430,
+ 0x05555430, 0x05555430,
+ 0x05555430, 0x05555430,
+ 0x05555430, 0x05555430,
+ 0x05555430, 0x0555430 };
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function generates CABAC coded bit stream for B slices
+ *
+ * @description
+ * The mb syntax layer for inter slices constitutes luma mb mode,
+ * mb qp delta, coded block pattern, chroma mb mode and
+ * luma/chroma residue. These syntax elements are written as directed by table
+ * 7.3.5 of h264 specification
+ *
+ * @param[in] ps_ent_ctxt
+ * pointer to entropy context
+ *
+ * @returns error code
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+IH264E_ERROR_T ih264e_write_bslice_mb_cabac(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* bit stream ptr */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+ /* CABAC context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+
+ mb_info_ctxt_t *ps_curr_ctxt;
+
+ WORD32 bitstream_start_offset, bitstream_end_offset;
+ WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+ WORD8 mb_qp_delta;
+ UWORD32 u4_cbp_l, u4_cbp_c;
+ WORD32 byte_count = 0;
+ UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+
+ if ((ps_bitstream->u4_strm_buf_offset + MIN_STREAM_SIZE_MB)
+ >= ps_bitstream->u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+ /* mb header info */
+ mb_tpm = *pu1_byte++;
+ byte_count++;
+
+ /* mb type */
+ mb_type = mb_tpm & 0xF;
+ /* CABAC contexts for the MB */
+ ih264e_get_cabac_context(ps_ent_ctxt, mb_type);
+ ps_curr_ctxt = ps_cabac_ctxt->ps_curr_ctxt_mb_info;
+
+ /* if Intra MB */
+ if (mb_type == I16x16 || mb_type == I4x4)
+ {
+ cbp = *pu1_byte++;
+ byte_count++;
+ mb_qp_delta = *pu1_byte++;
+ byte_count++;
+
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+
+ /* Encode mb_skip_flag */
+ ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE);
+ u4_cbp_c = (cbp >> 4);
+ u4_cbp_l = (cbp & 0xF);
+ if (mb_type == I16x16)
+ {
+ luma_intra_mode = ((mb_tpm >> 4) & 3) + 1 + (u4_cbp_c << 2)
+ + (u4_cbp_l == 15) * 12;
+ }
+ else
+ {
+ luma_intra_mode = 0;
+ }
+ /* Encode intra mb type */
+ {
+ mb_info_ctxt_t *ps_left_ctxt = ps_cabac_ctxt->ps_left_ctxt_mb_info;
+ mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info;
+ UWORD32 u4_ctx_inc = 0;
+
+ if (ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info)
+ u4_ctx_inc += ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK)
+ != CAB_BD16x16) ? 1 : 0;
+ if (ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info)
+ u4_ctx_inc += ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK)
+ != CAB_BD16x16) ? 1 : 0;
+
+ /* Intra Prefix Only "111101" */
+ u4_ctx_inc = (u4_ctx_inc | 0x05555430);
+ ih264e_encode_decision_bins(0x2f,
+ 6,
+ u4_ctx_inc,
+ 3,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + MB_TYPE_B_SLICE,
+ ps_cabac_ctxt);
+
+ ih264e_cabac_enc_intra_mb_type(BSLICE, (UWORD8) luma_intra_mode,
+ ps_cabac_ctxt, MB_TYPE_B_SLICE);
+
+ }
+
+ if (mb_type == I4x4)
+ { /* Intra 4x4 modes */
+ ih264e_cabac_enc_4x4mb_modes(ps_cabac_ctxt, pu1_byte);
+ byte_count += 8;
+ }
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ ih264e_cabac_enc_chroma_predmode(chroma_intra_mode, ps_cabac_ctxt);
+
+ if (mb_type != I16x16)
+ {
+ /* encode CBP */
+ ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt);
+ }
+
+ if ((cbp > 0) || (mb_type == I16x16))
+ {
+ ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt);
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset
+ - bitstream_start_offset;
+ /* Starting bitstream offset for residue */
+ bitstream_start_offset = bitstream_end_offset;
+
+ /* Encoding Residue */
+ if (mb_type == I16x16)
+ {
+ ps_curr_ctxt->u1_mb_type = CAB_I16x16;
+ ps_curr_ctxt->u1_cbp = (UWORD8) cbp;
+ ih264e_cabac_encode_residue_luma_dc(ps_ent_ctxt);
+ ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_AC_CTXCAT);
+ }
+ else
+ {
+ ps_curr_ctxt->u1_cbp = (UWORD8) cbp;
+ ps_curr_ctxt->u1_mb_type = I4x4;
+ ps_curr_ctxt->u1_mb_type = CAB_I4x4;
+ ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT);
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6;
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_yuv_dc_csbp &= 0x6;
+ }
+
+ /* Ending bitstream offset for reside in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset
+ - bitstream_start_offset;
+ }
+ else
+ {
+ ps_curr_ctxt->u1_yuv_ac_csbp = 0;
+ ps_curr_ctxt->u1_yuv_dc_csbp = 0;
+ *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0;
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset
+ - bitstream_start_offset;
+ }
+
+ memset(ps_curr_ctxt->u1_mv, 0, 16);
+ memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
+ ps_cabac_ctxt->ps_curr_ctxt_mb_info->u1_cbp = (UWORD8) cbp;
+
+ if (mb_type == I16x16)
+ {
+ ps_curr_ctxt->u1_mb_type = CAB_I16x16;
+ }
+ else
+ {
+ ps_curr_ctxt->u1_mb_type = CAB_I4x4;
+ }
+
+ ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+
+ return IH264E_SUCCESS;
+ }
+
+ else /* Inter MB */
+ {
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+ /* Encoding B_Direct_16x16 */
+ if (mb_type == BDIRECT)
+ {
+ cbp = *pu1_byte++;
+ byte_count++;
+ mb_qp_delta = *pu1_byte++;
+ byte_count++;
+
+ /* Encoding mb_skip */
+ ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE);
+
+ /* Encoding mb_type as B_Direct_16x16 */
+ {
+
+ mb_info_ctxt_t *ps_left_ctxt =
+ ps_cabac_ctxt->ps_left_ctxt_mb_info;
+ mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info;
+ UWORD32 u4_ctx_inc = 0;
+
+ if (ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info)
+ u4_ctx_inc += ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK)
+ != CAB_BD16x16) ? 1 : 0;
+ if (ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info)
+ u4_ctx_inc += ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK)
+ != CAB_BD16x16) ? 1 : 0;
+ /* Encode the bin */
+ ih264e_cabac_encode_bin(
+ ps_cabac_ctxt,
+ 0,
+ ps_cabac_ctxt->au1_cabac_ctxt_table
+ + MB_TYPE_B_SLICE + u4_ctx_inc);
+
+ }
+ ps_curr_ctxt->u1_mb_type = CAB_BD16x16;
+ memset(ps_curr_ctxt->u1_mv, 0, 16);
+ memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
+
+ /* Encode CBP */
+ ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt);
+
+ if (cbp)
+ {
+ /* encode mb_qp_delta */
+ ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt);
+ }
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset
+ - bitstream_start_offset;
+ /* Starting bitstream offset for residue */
+ bitstream_start_offset = bitstream_end_offset;
+ /* Starting bitstream offset for residue */
+
+ }
+
+ else if (mb_type == BSKIP)/* MB = BSKIP */
+ {
+ ih264e_cabac_enc_mb_skip(1, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE);
+
+ ps_curr_ctxt->u1_mb_type = CAB_B_SKIP;
+
+ memset(ps_curr_ctxt->u1_mv, 0, 16);
+ memset(ps_cabac_ctxt->pu1_left_mv_ctxt_inc, 0, 16);
+ cbp = 0;
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset
+ - bitstream_start_offset;
+ /* Starting bitstream offset for residue */
+
+ }
+
+ else /* mbype is B_L0_16x16, B_L1_16x16 or B_Bi_16x16 */
+ {
+ WORD32 i4_mb_part_pred_mode = (mb_tpm >> 4);
+ UWORD32 u4_mb_type = mb_type - B16x16 + B_L0_16x16
+ + i4_mb_part_pred_mode;
+ cbp = *pu1_byte++;
+ byte_count++;
+ mb_qp_delta = *pu1_byte++;
+ byte_count++;
+
+ /* Encoding mb_skip */
+ ih264e_cabac_enc_mb_skip(0, ps_cabac_ctxt, MB_SKIP_FLAG_B_SLICE);
+
+ /* Encoding mb_type as B16x16 */
+ {
+ mb_info_ctxt_t *ps_left_ctxt =
+ ps_cabac_ctxt->ps_left_ctxt_mb_info;
+ mb_info_ctxt_t *ps_top_ctxt = ps_cabac_ctxt->ps_top_ctxt_mb_info;
+ UWORD32 u4_ctx_inc = 0;
+
+ UWORD32 u4_mb_type_bins = u4_b_mb_type[u4_mb_type];
+ UWORD32 u4_bin_len = (u4_mb_type_bins >> 8) & 0x0F;
+ u4_mb_type_bins = u4_mb_type_bins & 0xFF;
+
+ if (ps_left_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info)
+ u4_ctx_inc += ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK)
+ != CAB_BD16x16) ? 1 : 0;
+ if (ps_top_ctxt != ps_cabac_ctxt->ps_def_ctxt_mb_info)
+ u4_ctx_inc += ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK)
+ != CAB_BD16x16) ? 1 : 0;
+
+ u4_ctx_inc = u4_ctx_inc | ui_b_mb_type_ctx_inc[u4_mb_type];
+
+ ih264e_encode_decision_bins(u4_mb_type_bins,
+ u4_bin_len,
+ u4_ctx_inc,
+ u4_bin_len,
+ &(ps_cabac_ctxt->au1_cabac_ctxt_table[MB_TYPE_B_SLICE]),
+ ps_cabac_ctxt);
+ }
+
+ ps_curr_ctxt->u1_mb_type = CAB_NON_BD16x16;
+ {
+ WORD16 *pi2_mv_ptr = (WORD16 *) pu1_byte;
+ /* Get the pred modes */
+
+ byte_count += 4 * (1 + (i4_mb_part_pred_mode == PRED_BI));
+
+ ps_curr_ctxt->u1_mb_type = (ps_curr_ctxt->u1_mb_type
+ | CAB_NON_BD16x16);
+ /* Encoding motion vector for B16x16 */
+ ih264e_cabac_enc_mvds_b16x16(ps_cabac_ctxt, pi2_mv_ptr,
+ i4_mb_part_pred_mode);
+ }
+ /* Encode CBP */
+ ih264e_cabac_enc_cbp(cbp, ps_cabac_ctxt);
+
+ if (cbp)
+ {
+ /* encode mb_qp_delta */
+ ih264e_cabac_enc_mb_qp_delta(mb_qp_delta, ps_cabac_ctxt);
+ }
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_header_bits[1] += bitstream_end_offset
+ - bitstream_start_offset;
+ /* Starting bitstream offset for residue */
+ bitstream_start_offset = bitstream_end_offset;
+ }
+
+ if (cbp > 0)
+ {
+ /* Encode residue */
+ ih264e_cabac_encode_residue(ps_ent_ctxt, cbp, LUMA_4X4_CTXCAT);
+ /* Ending bitstream offset for reside in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_residue_bits[1] += bitstream_end_offset
+ - bitstream_start_offset;
+
+ ps_cabac_ctxt->pu1_left_yuv_dc_csbp[0] &= 0x6;
+ ps_curr_ctxt->u1_yuv_dc_csbp &= 0x6;
+ }
+ else
+ {
+ ps_curr_ctxt->u1_yuv_ac_csbp = 0;
+ ps_curr_ctxt->u1_yuv_dc_csbp = 0;
+ *(ps_cabac_ctxt->pu1_left_uv_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_y_ac_csbp) = 0;
+ *(ps_cabac_ctxt->pu1_left_yuv_dc_csbp) = 0;
+ }
+ ps_curr_ctxt->u1_intrapred_chroma_mode = 0;
+ ps_curr_ctxt->u1_cbp = cbp;
+ ps_ent_ctxt->pv_mb_header_data = ((WORD8 *)ps_ent_ctxt->pv_mb_header_data) + byte_count;
+ return IH264E_SUCCESS;
+ }
+}
diff --git a/encoder/ih264e_cabac_init.c b/encoder/ih264e_cabac_init.c
new file mode 100644
index 0000000..347842c
--- /dev/null
+++ b/encoder/ih264e_cabac_init.c
@@ -0,0 +1,226 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_cabac_init.c
+*
+* @brief
+* Contains all initialization functions for cabac contexts
+*
+* @author
+* Doney Alex
+*
+* @par List of Functions:
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
+#include "ime_structs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_platform_macros.h"
+#include "ih264_macros.h"
+#include "ih264_buf_mgr.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_common_tables.h"
+#include "ih264_cabac_tables.h"
+#include "ih264_list.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
+#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
+#include "ih264e_process.h"
+#include "ithread.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_encode_header.h"
+#include "ih264e_globals.h"
+#include "ih264e_config.h"
+#include "ih264e_trace.h"
+#include "ih264e_statistics.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_deblk.h"
+#include "ih264e_me.h"
+#include "ih264e_debug.h"
+#include "ih264e_master.h"
+#include "ih264e_utils.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_platform_macros.h"
+#include "ime_statistics.h"
+
+
+
+/*****************************************************************************/
+/* Function definitions . */
+/*****************************************************************************/
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize cabac encoding environment
+ *
+ * @param[in] ps_cab_enc_env
+ * Pointer to encoding_envirnoment_t structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+*/
+static void ih264e_init_cabac_enc_envirnoment(encoding_envirnoment_t *ps_cab_enc_env)
+{
+ ps_cab_enc_env->u4_code_int_low = 0;
+ ps_cab_enc_env->u4_code_int_range = 0x1fe;
+ ps_cab_enc_env->u4_out_standing_bytes = 0;
+ ps_cab_enc_env->u4_bits_gen = 0;
+}
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize default context values and pointers (Called once at the beginning of encoding).
+ *
+ * @param[in] ps_ent_ctxt
+ * Pointer to entropy context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+*/
+void ih264e_init_cabac_table(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* CABAC context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+ ps_cabac_ctxt->ps_mb_map_ctxt_inc = ps_cabac_ctxt->ps_mb_map_ctxt_inc_base + 1;
+ ps_cabac_ctxt->ps_lft_csbp = &ps_cabac_ctxt->s_lft_csbp;
+ ps_cabac_ctxt->ps_bitstrm = ps_ent_ctxt->ps_bitstrm;
+
+ {
+ /* 0th entry of mb_map_ctxt_inc will be always be containing default values */
+ /* for CABAC context representing MB not available */
+ mb_info_ctxt_t *ps_def_ctxt = ps_cabac_ctxt->ps_mb_map_ctxt_inc - 1;
+ UWORD32 *pu4_temp;
+ WORD8 i;
+
+ ps_def_ctxt->u1_mb_type = CAB_SKIP;
+ ps_def_ctxt->u1_cbp = 0x0f;
+ ps_def_ctxt->u1_intrapred_chroma_mode = 0;
+ pu4_temp = (UWORD32 *)ps_def_ctxt->i1_ref_idx;
+ pu4_temp[0] = 0;
+ pu4_temp = (UWORD32 *)ps_def_ctxt->u1_mv;
+ for (i = 0; i < 4; i++, pu4_temp++)
+ (*pu4_temp) = 0;
+ ps_cabac_ctxt->ps_def_ctxt_mb_info = ps_def_ctxt;
+ }
+}
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Initialize cabac context: Initialize all contest with init values given in the spec.
+ * Called at the beginning of entropy coding of each slice for CABAC encoding.
+ *
+ * @param[in] ps_ent_ctxt
+ * Pointer to entropy context structure
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264e_init_cabac_ctxt(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* CABAC context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_ent_ctxt->ps_cabac;
+
+ /* slice header */
+ slice_header_t *ps_slice_hdr = ps_ent_ctxt->ps_slice_hdr_base;
+ const UWORD8 u1_slice_type = ps_slice_hdr->u1_slice_type;
+ WORD8 i1_cabac_init_idc = 0;
+ bin_ctxt_model *au1_cabac_ctxt_table = ps_cabac_ctxt->au1_cabac_ctxt_table;
+ UWORD8 u1_qp_y = ps_slice_hdr->i1_slice_qp;
+
+ ih264e_init_cabac_enc_envirnoment(&ps_cabac_ctxt->s_cab_enc_env);
+
+ ps_cabac_ctxt->i1_prevps_mb_qp_delta_ctxt = 0;
+
+ if (ISLICE != u1_slice_type)
+ {
+ i1_cabac_init_idc = ps_slice_hdr->i1_cabac_init_idc;
+ }
+ else
+ {
+ i1_cabac_init_idc = 3;
+
+ }
+
+ memcpy(au1_cabac_ctxt_table,
+ gau1_ih264_cabac_ctxt_init_table[i1_cabac_init_idc][u1_qp_y],
+ NUM_CABAC_CTXTS * sizeof(bin_ctxt_model));
+
+}
diff --git a/encoder/ih264e_cabac_structs.h b/encoder/ih264e_cabac_structs.h
new file mode 100644
index 0000000..82938ca
--- /dev/null
+++ b/encoder/ih264e_cabac_structs.h
@@ -0,0 +1,221 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_cabac_structs.h
+ *
+ * @brief
+ * This file contains cabac related structure definitions.
+ *
+ * @author
+ * Doney Alex
+ *
+ * @remarks
+ * none
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264E_CABAC_STRUCTS_H_
+#define IH264E_CABAC_STRUCTS_H_
+
+
+
+#define CABAC_INIT_IDC 2
+
+
+/**
+ ******************************************************************************
+ * @brief typedef for context model
+ ******************************************************************************
+ */
+
+/* bits 0 to 5 :state
+ bit 6 :mps */
+typedef UWORD8 bin_ctxt_model;
+
+/**
+ ******************************************************************************
+ * @brief MB info for cabac
+ ******************************************************************************
+ */
+typedef struct
+{
+ /* Neighbour availability Variables needed to get CtxtInc, for CABAC */
+ UWORD8 u1_mb_type; /* !< macroblock type: I/P/B/SI/SP */
+
+ UWORD8 u1_cbp; /* !< Coded Block Pattern */
+ UWORD8 u1_intrapred_chroma_mode;
+
+ /*************************************************************************/
+ /* Arrangnment of AC CSBP */
+ /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
+ /* CSBP: V1 V0 U1 U0 Y3 Y2 Y1 Y0 */
+ /*************************************************************************/
+ UWORD8 u1_yuv_ac_csbp;
+ /*************************************************************************/
+ /* Arrangnment of DC CSBP */
+ /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
+ /* CSBP: x x x x x Vdc Udc Ydc */
+ /*************************************************************************/
+ UWORD8 u1_yuv_dc_csbp;
+
+ WORD8 i1_ref_idx[4];
+ UWORD8 u1_mv[4][4];
+} mb_info_ctxt_t;
+
+
+/**
+ ******************************************************************************
+ * @brief CSBP info for CABAC
+ ******************************************************************************
+ */
+typedef struct
+{
+ /*************************************************************************/
+ /* Arrangnment of Luma AC CSBP for leftMb */
+ /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
+ /* CSBP: X X X X Y3 Y2 Y1 Y0 */
+ /*************************************************************************/
+ /*************************************************************************/
+ /* Points either to u1_y_ac_csbp_top_mb or u1_y_ac_csbp_bot_mb */
+ /*************************************************************************/
+ UWORD8 u1_y_ac_csbp_top_mb;
+ UWORD8 u1_y_ac_csbp_bot_mb;
+
+ /*************************************************************************/
+ /* Arrangnment of Chroma AC CSBP for leftMb */
+ /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
+ /* CSBP: X X X X V1 V0 U1 U0 */
+ /*************************************************************************/
+ /*************************************************************************/
+ /* Points either to u1_uv_ac_csbp_top_mb or u1_uv_ac_csbp_bot_mb */
+ /*************************************************************************/
+ UWORD8 u1_uv_ac_csbp_top_mb;
+ UWORD8 u1_uv_ac_csbp_bot_mb;
+
+ /*************************************************************************/
+ /* Arrangnment of DC CSBP */
+ /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
+ /* CSBP: x x x x x Vdc Udc Ydc */
+ /*************************************************************************/
+ /*************************************************************************/
+ /* Points either to u1_yuv_dc_csbp_top_mb or u1_yuv_dc_csbp_bot_mb */
+ /*************************************************************************/
+ UWORD8 u1_yuv_dc_csbp_top_mb;
+ UWORD8 u1_yuv_dc_csbp_bot_mb;
+} cab_csbp_t;
+
+/**
+ ******************************************************************************
+ * @brief CABAC Encoding Environment
+ ******************************************************************************
+ */
+
+typedef struct
+{
+ /** cabac interval start L */
+ UWORD32 u4_code_int_low;
+
+ /** cabac interval range R */
+ UWORD32 u4_code_int_range;
+
+ /** bytes_outsanding; number of 0xFF bits that occur during renorm
+ * These will be accumulated till the carry bit is knwon
+ */
+ UWORD32 u4_out_standing_bytes;
+
+ /** bits generated during renormalization
+ * A byte is put to stream/u4_out_standing_bytes from u4_low(L) when
+ * u4_bits_gen exceeds 8
+ */
+ UWORD32 u4_bits_gen;
+} encoding_envirnoment_t;
+
+
+/**
+ ******************************************************************************
+ * @brief CABAC Context structure : Variables to handle Cabac
+ ******************************************************************************
+ */
+typedef struct
+{
+
+ /* Base pointer to all the cabac contexts */
+ bin_ctxt_model au1_cabac_ctxt_table[NUM_CABAC_CTXTS];
+
+
+ cab_csbp_t s_lft_csbp;
+
+ /**
+ * pointer to Bitstream structure
+ */
+ bitstrm_t *ps_bitstrm;
+
+ /* Pointer to mb_info_ctxt_t map_base */
+ mb_info_ctxt_t *ps_mb_map_ctxt_inc_base;
+
+ /* Pointer to encoding_envirnoment_t */
+ encoding_envirnoment_t s_cab_enc_env;
+
+ /* These things need to be updated at each MbLevel */
+
+ /* Prev ps_mb_qp_delta_ctxt */
+ WORD8 i1_prevps_mb_qp_delta_ctxt;
+
+ /* Pointer to mb_info_ctxt_t map */
+ mb_info_ctxt_t *ps_mb_map_ctxt_inc;
+
+ /* Pointer to default mb_info_ctxt_t */
+ mb_info_ctxt_t *ps_def_ctxt_mb_info;
+
+ /* Pointer to current mb_info_ctxt_t */
+ mb_info_ctxt_t *ps_curr_ctxt_mb_info;
+
+ /* Pointer to left mb_info_ctxt_t */
+ mb_info_ctxt_t *ps_left_ctxt_mb_info;
+
+ /* Pointer to top mb_info_ctxt_t */
+ mb_info_ctxt_t *ps_top_ctxt_mb_info;
+
+ /* Poniter to left csbp structure */
+ cab_csbp_t *ps_lft_csbp;
+ UWORD8 *pu1_left_y_ac_csbp;
+ UWORD8 *pu1_left_uv_ac_csbp;
+ UWORD8 *pu1_left_yuv_dc_csbp;
+
+ /***************************************************************************/
+ /* Ref_idx contexts are stored in the following way */
+ /* Array Idx 0,1 for reference indices in Forward direction */
+ /* Array Idx 2,3 for reference indices in backward direction */
+ /***************************************************************************/
+ /* Dimensions for u1_left_ref_ctxt_inc_arr is [2][4] for Mbaff:Top and Bot */
+ WORD8 i1_left_ref_idx_ctx_inc_arr[2][4];
+ WORD8 *pi1_left_ref_idx_ctxt_inc;
+
+ /* Dimensions for u1_left_mv_ctxt_inc_arr is [2][4][4] for Mbaff case */
+ UWORD8 u1_left_mv_ctxt_inc_arr[2][4][4];
+ UWORD8 (*pu1_left_mv_ctxt_inc)[4];
+
+} cabac_ctxt_t;
+
+#endif /* IH264E_CABAC_STRUCTS_H_ */
diff --git a/encoder/ih264e_cavlc.c b/encoder/ih264e_cavlc.c
index 1f98b6a..5d819d9 100644
--- a/encoder/ih264e_cavlc.c
+++ b/encoder/ih264e_cavlc.c
@@ -35,8 +35,8 @@
* - ih264e_write_coeff4x4_cavlc()
* - ih264e_write_coeff8x8_cavlc()
* - ih264e_encode_residue()
-* - ih264e_write_islice_mb()
-* - ih264e_write_pslice_mb()
+* - ih264e_write_islice_mb_cavlc()
+* - ih264e_write_pslice_mb_cavlc()
*
* @remarks
* None
@@ -65,8 +65,8 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -75,9 +75,11 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_encode_header.h"
#include "ih264_cavlc_tables.h"
@@ -712,8 +714,8 @@ static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt,
/* temp var */
UWORD32 u4_nC, u4_ngbr_avlb;
UWORD8 au1_nnz[4], *pu1_ngbr_avlb, *pu1_top_nnz, *pu1_left_nnz;
- UWORD16 au2_sig_coeff_map[4];
- WORD16 *pi2_res_block[4];
+ UWORD16 au2_sig_coeff_map[4] = {0};
+ WORD16 *pi2_res_block[4] = {NULL};
UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx;
tu_sblk_coeff_data_t *ps_mb_coeff_data;
ENTROPY_BLK_TYPE e_entropy_blk_type = CAVLC_LUMA_4x4;
@@ -925,7 +927,6 @@ static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt,
return error_status;
}
-#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + 32 - ps_bitstream->i4_bits_left_in_cw)
/**
*******************************************************************************
@@ -948,7 +949,7 @@ static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt,
*
*******************************************************************************
*/
-IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt)
+IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
{
/* error status */
IH264E_ERROR_T error_status = IH264E_SUCCESS;
@@ -1170,7 +1171,7 @@ IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt)
*
*******************************************************************************
*/
-IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt)
+IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
{
/* error status */
IH264E_ERROR_T error_status = IH264E_SUCCESS;
@@ -1406,7 +1407,6 @@ IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt)
for (i = 0; i < (WORD32)u4_part_cnt; i++)
{
PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv x");
-
PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv y");
}
@@ -1425,6 +1425,323 @@ IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt)
PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta");
}
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+ /* start bitstream offset for residue in bits */
+ bitstream_start_offset = bitstream_end_offset;
+
+ /* residual */
+ error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp);
+
+ /* Ending bitstream offset for residue in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_residue_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+ /* store the index of the next mb syntax layer */
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+ return error_status;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for B slices
+*
+* @description
+* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+ /* bit stream ptr */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+ /* packed header data */
+ UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+
+ /* mb header info */
+ /*
+ * mb_tpm : mb type plus mode
+ * mb_type : luma mb type and chroma mb type are packed
+ * cbp : coded block pattern
+ * mb_qp_delta : mb qp delta
+ * chroma_intra_mode : chroma intra mode
+ * luma_intra_mode : luma intra mode
+ * ps_pu : Pointer to the array of structures having motion vectors, size
+ * and position of sub partitions
+ */
+ WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+ WORD8 mb_qp_delta;
+
+ /* temp var */
+ WORD32 i, mb_type_stream, cbptable = 1;
+
+ WORD32 is_inter = 0;
+
+ WORD32 bitstream_start_offset, bitstream_end_offset;
+
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+
+ /********************************************************************/
+ /* BEGIN HEADER GENERATION */
+ /********************************************************************/
+
+ mb_tpm = *pu1_byte++;
+
+ /* mb type */
+ mb_type = mb_tpm & 0xF;
+
+ /* check for skip */
+ if (mb_type == BSKIP)
+ {
+ UWORD32 *nnz;
+
+ is_inter = 1;
+
+ /* increment skip counter */
+ (*ps_ent_ctxt->pi4_mb_skip_run)++;
+
+ /* store the index of the next mb syntax layer */
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+ /* set nnz to zero */
+ ps_ent_ctxt->u4_left_nnz_luma = 0;
+ nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x];
+ *nnz = 0;
+ ps_ent_ctxt->u4_left_nnz_cbcr = 0;
+ nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x];
+ *nnz = 0;
+
+ /* residual */
+ error_status = ih264e_encode_residue(ps_ent_ctxt, B16x16, 0);
+
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset
+ - bitstream_start_offset;
+
+ return error_status;
+ }
+
+
+ /* remaining mb header info */
+ cbp = *pu1_byte++;
+ mb_qp_delta = *pu1_byte++;
+
+ /* mb skip run */
+ PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run");
+
+ /* reset skip counter */
+ *ps_ent_ctxt->pi4_mb_skip_run = 0;
+
+ /* is intra ? */
+ if (mb_type == I16x16)
+ {
+ UWORD32 u4_cbp_l, u4_cbp_c;
+
+ is_inter = 0;
+
+ u4_cbp_c = (cbp >> 4);
+ u4_cbp_l = (cbp & 0xF);
+ luma_intra_mode = (mb_tpm >> 4) & 3;
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12;
+
+ mb_type_stream += 23;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type");
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I4x4)
+ {
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ is_inter = 0;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+ cbptable = 0;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 23, error_status, "mb type");
+
+ for (i = 0; i < 16; i += 2)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I8x8)
+ {
+ /* transform 8x8 flag */
+ UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
+
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ is_inter = 0;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+ cbptable = 0;
+
+ ASSERT(0);
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 23, error_status, "mb type");
+
+ /* u4_transform_size_8x8_flag */
+ PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag");
+
+ /* write sub block modes */
+ for (i = 0; i < 4; i++)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if(mb_type == BDIRECT)
+ {
+ is_inter = 1;
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, B_DIRECT_16x16, error_status, "mb type");
+ }
+ else /* if mb_type == B16x16 */
+ {
+ /* inter macro block partition cnt for 16x16 16x8 8x16 8x8 */
+ const UWORD8 au1_part_cnt[] = { 1, 2, 2, 4 };
+
+ /* mv ptr */
+ WORD16 *pi2_mvd_ptr = (WORD16 *)pu1_byte;
+
+ /* number of partitions for the current mb */
+ UWORD32 u4_part_cnt = au1_part_cnt[mb_type - B16x16];
+
+ /* Get the pred modes */
+ WORD32 i4_mb_part_pred_mode = (mb_tpm >> 4);
+
+ is_inter = 1;
+
+ mb_type_stream = mb_type - B16x16 + B_L0_16x16 + i4_mb_part_pred_mode;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type");
+
+ for (i = 0; i < (WORD32)u4_part_cnt; i++)
+ {
+ if (i4_mb_part_pred_mode != PRED_L1)/* || PRED_BI */
+ {
+ PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr++, error_status, "mv l0 x");
+ PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr++, error_status, "mv l0 y");
+ }
+ if (i4_mb_part_pred_mode != PRED_L0)/* || PRED_BI */
+ {
+ PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr++, error_status, "mv l1 x");
+ PUT_BITS_SEV(ps_bitstream, *pi2_mvd_ptr++, error_status, "mv l1 y");
+ }
+ }
+
+ pu1_byte = (UWORD8 *)pi2_mvd_ptr;
+ }
+
+ /* coded_block_pattern */
+ if (mb_type != I16x16)
+ {
+ PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][cbptable], error_status, "coded_block_pattern");
+ }
+
+ if (cbp || mb_type == I16x16)
+ {
+ /* mb_qp_delta */
+ PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta");
+ }
/* Ending bitstream offset for header in bits */
bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
diff --git a/encoder/ih264e_cavlc.h b/encoder/ih264e_cavlc.h
index acd0def..8da2cea 100644
--- a/encoder/ih264e_cavlc.h
+++ b/encoder/ih264e_cavlc.h
@@ -42,23 +42,6 @@
/* Function macro definitions */
/*****************************************************************************/
-#define PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u4_nnz, u4_sig_coeff_map, pi2_res_block) \
-{\
- ps_mb_coeff_data = pv_mb_coeff_data; \
- u4_nnz = ps_mb_coeff_data->i4_sig_map_nnz & 0xff; \
- if (u4_nnz)\
- {\
- u4_sig_coeff_map = ps_mb_coeff_data->i4_sig_map_nnz >> 16; \
- pi2_res_block = ps_mb_coeff_data->ai2_residue; \
- pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz); \
- }\
- else\
- {\
- pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue;\
- }\
-}
-
-
/*****************************************************************************/
/* Extern Function Declarations */
/*****************************************************************************/
@@ -84,7 +67,7 @@
*
*******************************************************************************
*/
-IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt);
+IH264E_ERROR_T ih264e_write_islice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt);
/**
*******************************************************************************
@@ -107,6 +90,29 @@ IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt);
*
*******************************************************************************
*/
-IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt);
+IH264E_ERROR_T ih264e_write_pslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt);
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for Inter(B) slices
+*
+* @description
+* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_bslice_mb_cavlc(entropy_ctxt_t *ps_ent_ctxt);
#endif /* IH264E_CAVLC_H_ */
diff --git a/encoder/ih264e_core_coding.c b/encoder/ih264e_core_coding.c
index 89243a5..76266d7 100644
--- a/encoder/ih264e_core_coding.c
+++ b/encoder/ih264e_core_coding.c
@@ -65,6 +65,7 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -73,9 +74,11 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_globals.h"
#include "ih264e_core_coding.h"
@@ -1249,7 +1252,7 @@ void ih264e_pack_c_mb(WORD16 *pi2_res_mb,
* For that there are two paths we need to look for
* One is the path to bitstream , these variables should have the proper input
* configured UV or VU
- * For the other path the inverse transform variables should have ehat ever 0ordering the
+ * For the other path the inverse transform variables should have what ever ordering the
* input had
*/
@@ -2019,7 +2022,7 @@ UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc)
WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
/* strides */
- WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
WORD32 i4_res_strd = ps_proc->i4_res_strd;
@@ -2281,7 +2284,7 @@ UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc)
WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
/* strides */
- WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
WORD32 i4_res_strd = ps_proc->i4_res_strd;
diff --git a/encoder/ih264e_deblk.c b/encoder/ih264e_deblk.c
index 8a11bdb..db176ac 100644
--- a/encoder/ih264e_deblk.c
+++ b/encoder/ih264e_deblk.c
@@ -63,6 +63,7 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -71,12 +72,13 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264_trans_data.h"
-#include "ih264_deblk_edge_filters.h"
#include "ih264_deblk_tables.h"
#include "ih264e_deblk.h"
@@ -150,20 +152,15 @@ static const UWORD16 ih264e_gu2_4x4_v2h_reorder[16] =
* @param[in] u4_left_mb_csbp
* coded sub block pattern of top mb
*
-* @param[in] ps_leftMvPred
-* MV of left mb
-*
-* @param[in] ps_topMvPred
-* MV of top mb
+* @param[in] ps_left_pu
+* PU for left MB
*
-* @param[in] ps_curMvPred
-* MV of curr mb
+* @param[in] ps_top_pu
+* PU for top MB
*
-* @param[in] u1_left_intra
-* is left intra
+* @param[in] ps_curr_pu
+* PU for current MB
*
-* @param[in] u1_top_intra
-* is top intra
*
* @returns none
*
@@ -176,64 +173,65 @@ static void ih264e_fill_bs_1mv_1ref_non_mbaff(UWORD32 *pu4_horz_bs,
UWORD32 u4_left_mb_csbp,
UWORD32 u4_top_mb_csbp,
UWORD32 u4_cur_mb_csbp,
- mv_t *ps_leftMvPred,
- mv_t *ps_topMvPred,
- mv_t *ps_curMvPred,
- UWORD8 u1_left_intra,
- UWORD8 u1_top_intra)
+ enc_pu_t *ps_left_pu,
+ enc_pu_t *ps_top_pu,
+ enc_pu_t *ps_curr_pu)
{
/* motion vectors of blks p & q */
- WORD16 i16_qMv0, i16_qMv1, i16_pMv0, i16_pMv1;
+ WORD16 i16_qMvl0_x, i16_qMvl0_y, i16_pMvl0_x, i16_pMvl0_y;
+ WORD16 i16_qMvl1_x, i16_qMvl1_y, i16_pMvl1_x, i16_pMvl1_y;
/* temp var */
- UWORD32 u4_lft_flag, u4_top_flag;
- const UWORD32 *bs_map;
- UWORD32 u4_reordered_vert_bs_enc, u4_temp;
+ UWORD32 u4_left_flag, u4_top_flag;
+ const UWORD32 *bs_map;
+ UWORD32 u4_reordered_vert_bs_enc, u4_temp;
/* Coded Pattern for Horizontal Edge */
/*-----------------------------------------------------------------------*/
/*u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */
/*-----------------------------------------------------------------------*/
- UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12);
- UWORD32 u4_horz_bs_enc = u4_cur_mb_csbp | u4_nbr_horz_csbp;
+ UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12);
+ UWORD32 u4_horz_bs_enc = u4_cur_mb_csbp | u4_nbr_horz_csbp;
/* Coded Pattern for Vertical Edge */
/*-----------------------------------------------------------------------*/
/*u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0 */
/*-----------------------------------------------------------------------*/
- UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK;
+ UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK;
/*-----------------------------------------------------------------------*/
/*u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */
/*-----------------------------------------------------------------------*/
- UWORD32 u4_cur_mb_masked_csbp =(u4_cur_mb_csbp<<1)&(~CSBP_LEFT_BLOCK_MASK);
+ UWORD32 u4_cur_mb_masked_csbp = (u4_cur_mb_csbp << 1)
+ & (~CSBP_LEFT_BLOCK_MASK);
/*-----------------------------------------------------------------------*/
/*u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */
/*-----------------------------------------------------------------------*/
- UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp) | (u4_left_mb_masked_csbp >> 3);
- UWORD32 u4_vert_bs_enc = u4_cur_mb_csbp | u4_nbr_vert_csbp;
+ UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp)
+ | (u4_left_mb_masked_csbp >> 3);
+ UWORD32 u4_vert_bs_enc = u4_cur_mb_csbp | u4_nbr_vert_csbp;
/* BS Calculation for MB Boundary Edges */
/* BS calculation for 1 2 3 horizontal boundary */
- bs_map = gu4_bs_table[0];
+ bs_map = gu4_bs_table[0];
pu4_horz_bs[1] = bs_map[(u4_horz_bs_enc >> 4) & 0xF];
pu4_horz_bs[2] = bs_map[(u4_horz_bs_enc >> 8) & 0xF];
pu4_horz_bs[3] = bs_map[(u4_horz_bs_enc >> 12) & 0xF];
/* BS calculation for 5 6 7 vertical boundary */
/* Do 4x4 tranpose of u4_vert_bs_enc by using look up table for reorder */
- u4_reordered_vert_bs_enc = ih264e_gu2_4x4_v2h_reorder[u4_vert_bs_enc & 0xF];
+ u4_reordered_vert_bs_enc = ih264e_gu2_4x4_v2h_reorder[u4_vert_bs_enc & 0xF];
- u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 4) & 0xF];
- u4_reordered_vert_bs_enc |= (u4_temp << 1);
+ u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 4) & 0xF];
+ u4_reordered_vert_bs_enc |= (u4_temp << 1);
- u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 8) & 0xF];
- u4_reordered_vert_bs_enc |= (u4_temp << 2);
+ u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 8) & 0xF];
+ u4_reordered_vert_bs_enc |= (u4_temp << 2);
- u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 12) & 0xF];
- u4_reordered_vert_bs_enc |= (u4_temp << 3);
+ u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 12) & 0xF];
+ u4_reordered_vert_bs_enc |= (u4_temp << 3);
pu4_vert_bs[1] = bs_map[(u4_reordered_vert_bs_enc >> 4) & 0xF];
pu4_vert_bs[2] = bs_map[(u4_reordered_vert_bs_enc >> 8) & 0xF];
@@ -241,39 +239,96 @@ static void ih264e_fill_bs_1mv_1ref_non_mbaff(UWORD32 *pu4_horz_bs,
/* BS Calculation for MB Boundary Edges */
- i16_qMv0 = ps_curMvPred->i2_mvx;
- i16_qMv1 = ps_curMvPred->i2_mvy;
-
- if (u1_top_intra)
+ if (ps_top_pu->b1_intra_flag)
{
pu4_horz_bs[0] = 0x04040404;
}
else
{
- i16_pMv0 = ps_topMvPred->i2_mvx;
- i16_pMv1 = ps_topMvPred->i2_mvy;
+ if (ps_curr_pu->b2_pred_mode != ps_top_pu->b2_pred_mode)
+ {
+ u4_top_flag = 1;
+ }
+ else if(ps_curr_pu->b2_pred_mode != 2)
+ {
+ i16_pMvl0_x = ps_top_pu->s_me_info[ps_top_pu->b2_pred_mode].s_mv.i2_mvx;
+ i16_pMvl0_y = ps_top_pu->s_me_info[ps_top_pu->b2_pred_mode].s_mv.i2_mvy;
+
+ i16_qMvl0_x = ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv.i2_mvx;
+ i16_qMvl0_y = ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv.i2_mvy;
- u4_top_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) |
- (ABS((i16_pMv1 - i16_qMv1)) >= 4);
- bs_map = gu4_bs_table[!!u4_top_flag];
+ u4_top_flag = (ABS((i16_pMvl0_x - i16_qMvl0_x)) >= 4)
+ | (ABS((i16_pMvl0_y - i16_qMvl0_y)) >= 4);
+ }
+ else
+ {
+
+ i16_pMvl0_x = ps_top_pu->s_me_info[PRED_L0].s_mv.i2_mvx;
+ i16_pMvl0_y = ps_top_pu->s_me_info[PRED_L0].s_mv.i2_mvy;
+ i16_pMvl1_x = ps_top_pu->s_me_info[PRED_L1].s_mv.i2_mvx;
+ i16_pMvl1_y = ps_top_pu->s_me_info[PRED_L1].s_mv.i2_mvy;
+
+ i16_qMvl0_x = ps_curr_pu->s_me_info[PRED_L0].s_mv.i2_mvx;
+ i16_qMvl0_y = ps_curr_pu->s_me_info[PRED_L0].s_mv.i2_mvy;
+ i16_qMvl1_x = ps_curr_pu->s_me_info[PRED_L1].s_mv.i2_mvx;
+ i16_qMvl1_y = ps_curr_pu->s_me_info[PRED_L1].s_mv.i2_mvy;
+
+
+ u4_top_flag = (ABS((i16_pMvl0_x - i16_qMvl0_x)) >= 4)
+ | (ABS((i16_pMvl0_y - i16_qMvl0_y)) >= 4)
+ | (ABS((i16_pMvl1_x - i16_qMvl1_x)) >= 4)
+ | (ABS((i16_pMvl1_y - i16_qMvl1_y)) >= 4);
+ }
+
+ bs_map = gu4_bs_table[!!u4_top_flag];
pu4_horz_bs[0] = bs_map[u4_horz_bs_enc & 0xF];
}
- if (u1_left_intra)
+
+ if (ps_left_pu->b1_intra_flag)
{
pu4_vert_bs[0] = 0x04040404;
}
else
{
- i16_pMv0 = ps_leftMvPred->i2_mvx;
- i16_pMv1 = ps_leftMvPred->i2_mvy;
+ if (ps_curr_pu->b2_pred_mode != ps_left_pu->b2_pred_mode)
+ {
+ u4_left_flag = 1;
+ }
+ else if(ps_curr_pu->b2_pred_mode != 2)/* Not bipred */
+ {
+ i16_pMvl0_x = ps_left_pu->s_me_info[ps_left_pu->b2_pred_mode].s_mv.i2_mvx;
+ i16_pMvl0_y = ps_left_pu->s_me_info[ps_left_pu->b2_pred_mode].s_mv.i2_mvy;
+
+ i16_qMvl0_x = ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv.i2_mvx;
+ i16_qMvl0_y = ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv.i2_mvy;
- u4_lft_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) |
- (ABS((i16_pMv1 - i16_qMv1)) >= 4);
+ u4_left_flag = (ABS((i16_pMvl0_x - i16_qMvl0_x)) >= 4)
+ | (ABS((i16_pMvl0_y - i16_qMvl0_y)) >= 4);
+ }
+ else
+ {
+
+ i16_pMvl0_x = ps_left_pu->s_me_info[PRED_L0].s_mv.i2_mvx;
+ i16_pMvl0_y = ps_left_pu->s_me_info[PRED_L0].s_mv.i2_mvy;
+ i16_pMvl1_x = ps_left_pu->s_me_info[PRED_L1].s_mv.i2_mvx;
+ i16_pMvl1_y = ps_left_pu->s_me_info[PRED_L1].s_mv.i2_mvy;
+
+ i16_qMvl0_x = ps_curr_pu->s_me_info[PRED_L0].s_mv.i2_mvx;
+ i16_qMvl0_y = ps_curr_pu->s_me_info[PRED_L0].s_mv.i2_mvy;
+ i16_qMvl1_x = ps_curr_pu->s_me_info[PRED_L1].s_mv.i2_mvx;
+ i16_qMvl1_y = ps_curr_pu->s_me_info[PRED_L1].s_mv.i2_mvy;
+
+
+ u4_left_flag = (ABS((i16_pMvl0_x - i16_qMvl0_x)) >= 4)
+ | (ABS((i16_pMvl0_y - i16_qMvl0_y)) >= 4)
+ | (ABS((i16_pMvl1_x - i16_qMvl1_x)) >= 4)
+ | (ABS((i16_pMvl1_y - i16_qMvl1_y)) >= 4);
+ }
- bs_map = gu4_bs_table[!!u4_lft_flag];
+ bs_map = gu4_bs_table[!!u4_left_flag];
pu4_vert_bs[0] = bs_map[u4_reordered_vert_bs_enc & 0xF];
}
}
@@ -331,8 +386,7 @@ static UWORD32 ih264e_calculate_csbp(process_ctxt_t *ps_proc)
*
* @returns none
*
-* @remarks In this module it is assumed that their is only single reference
-* frame and is always the most recently used anchor frame
+* @remarks
*
*******************************************************************************
*/
@@ -394,14 +448,18 @@ void ih264e_compute_bs(process_ctxt_t * ps_proc)
if (i4_mb_x == 0)
{
ps_left_mb_syntax_ele->u4_csbp = 0;
- ps_left_mb_syntax_ele->u2_is_intra = 0;
- ps_proc->s_left_mb_pu.s_l0_mv = ps_proc->ps_pu->s_l0_mv;
+ ps_proc->s_left_mb_pu.b1_intra_flag = 0;
+ ps_proc->s_left_mb_pu.b2_pred_mode = ps_proc->ps_pu->b2_pred_mode;
+ ps_proc->s_left_mb_pu.s_me_info[0].s_mv = ps_proc->ps_pu->s_me_info[0].s_mv;
+ ps_proc->s_left_mb_pu.s_me_info[1].s_mv = ps_proc->ps_pu->s_me_info[1].s_mv;
}
if (i4_mb_y == 0)
{
ps_top_mb_syntax_ele->u4_csbp = 0;
- ps_top_mb_syntax_ele->u2_is_intra = 0;
- ps_top_row_pu->s_l0_mv = ps_proc->ps_pu->s_l0_mv;
+ ps_top_row_pu->b1_intra_flag = 0;
+ ps_top_row_pu->b2_pred_mode = ps_proc->ps_pu->b2_pred_mode;
+ ps_top_row_pu->s_me_info[0].s_mv = ps_proc->ps_pu->s_me_info[0].s_mv;
+ ps_top_row_pu->s_me_info[1].s_mv = ps_proc->ps_pu->s_me_info[1].s_mv;
}
ih264e_fill_bs_1mv_1ref_non_mbaff(pu4_pic_horz_bs,
@@ -409,11 +467,9 @@ void ih264e_compute_bs(process_ctxt_t * ps_proc)
ps_left_mb_syntax_ele->u4_csbp,
ps_top_mb_syntax_ele->u4_csbp,
ps_proc->u4_csbp,
- &ps_proc->s_left_mb_pu.s_l0_mv,
- &ps_top_row_pu->s_l0_mv,
- &ps_proc->ps_pu->s_l0_mv,
- ps_left_mb_syntax_ele->u2_is_intra,
- ps_top_mb_syntax_ele->u2_is_intra);
+ &ps_proc->s_left_mb_pu,
+ ps_top_row_pu,
+ ps_proc->ps_pu);
}
return ;
diff --git a/encoder/ih264e_defs.h b/encoder/ih264e_defs.h
index 76929ef..aee270e 100644
--- a/encoder/ih264e_defs.h
+++ b/encoder/ih264e_defs.h
@@ -38,6 +38,22 @@
#define IH264E_DEFS_H_
+#define PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u4_nnz, u4_sig_coeff_map, pi2_res_block) \
+{ \
+ ps_mb_coeff_data = pv_mb_coeff_data; \
+ u4_nnz = ps_mb_coeff_data->i4_sig_map_nnz & 0xff; \
+ if (u4_nnz) \
+ { \
+ u4_sig_coeff_map = ps_mb_coeff_data->i4_sig_map_nnz >> 16; \
+ pi2_res_block = ps_mb_coeff_data->ai2_residue; \
+ pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz); \
+ } \
+ else \
+ { \
+ pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue; \
+ } \
+}
+
/*****************************************************************************/
/* Width and height restrictions */
/*****************************************************************************/
@@ -104,11 +120,31 @@
/* Number of frame restrictions */
/*****************************************************************************/
/**
+ * Maximum number of reference pictures
+ */
+#define MAX_REF_PIC_CNT 2
+
+/**
+ * Minimum number of reference pictures
+ */
+#define MIN_REF_PIC_CNT 1
+
+/**
+ * Maximum number of B pictures between two I/P pictures
+ */
+#define MAX_NUM_BFRAMES 10
+
+/**
* Maximum number of reference buffers in DPB manager
*/
#define MAX_REF_CNT 32
/*****************************************************************************/
+/* Minimum size of inter prediction unit supported by encoder */
+/*****************************************************************************/
+#define ENC_MIN_PU_SIZE 16
+
+/*****************************************************************************/
/* Num cores releated defs */
/*****************************************************************************/
/**
@@ -125,7 +161,7 @@
* Maximum process context sets
* Used to stagger encoding of MAX_CTXT_SETS in parallel
*/
-#define MAX_CTXT_SETS 2
+#define MAX_CTXT_SETS 1
/**
* Maximum number of contexts
* Kept as twice the number of threads, to make it easier to initialize the contexts
@@ -165,6 +201,7 @@
#define DEFAULT_RC IVE_RC_STORAGE
#define DEFAULT_MAX_FRAMERATE 120000
#define DEFAULT_MAX_BITRATE 20000000
+#define DEFAULT_MAX_NUM_BFRAMES 0
#define DEFAULT_MAX_SRCH_RANGE_X 256
#define DEFAULT_MAX_SRCH_RANGE_Y 256
#define DEFAULT_SLICE_PARAM 256
@@ -206,6 +243,7 @@
#define DEFAULT_ENC_SPEED_PRESET IVE_USER_DEFINED
#define DEFAULT_PRE_ENC_ME 0
#define DEFAULT_PRE_ENC_IPE 0
+#define DEFAULT_ENTROPY_CODING_MODE 0
/** Maximum number of entries in input buffer list */
#define MAX_INP_BUF_LIST_ENTRIES 32
@@ -217,7 +255,10 @@
#define MAX_REC_LIST_ENTRIES 16
/** Number of buffers created to hold half-pel planes for every reference buffer */
- #define HPEL_PLANES_CNT 1
+#define HPEL_PLANES_CNT 1
+
+/** Number of buffers Needed for SUBPEL and BIPRED computation */
+#define SUBPEL_BUFF_CNT 4
/**
*****************************************************************************
@@ -262,6 +303,16 @@ enum
MEM_REC_CODEC,
/**
+ * Cabac context
+ */
+ MEM_REC_CABAC,
+
+ /**
+ * Cabac context_mb_info
+ */
+ MEM_REC_CABAC_MB_INFO,
+
+ /**
* entropy context
*/
MEM_REC_ENTROPY,
@@ -483,8 +534,6 @@ enum
#define MIN_RAW_BUFS_RGBA8888_COMP 1
#define MIN_RAW_BUFS_420SP_COMP 2
-#define MAX_NMB 120
-
/** Maximum number of active config paramter sets */
#define MAX_ACTIVE_CONFIG_PARAMS 32
@@ -525,9 +574,9 @@ enum
/* [0 - 00 - 00110] */
#define NAL_SEI_FIRST_BYTE 0x06
-#define H264_ALLOC_INTER_FRM_INTV 1
+#define H264_ALLOC_INTER_FRM_INTV 2
-#define H264_MPEG_QP_MAP 191
+#define H264_MPEG_QP_MAP 255
#define MPEG2_QP_ELEM (H264_MPEG_QP_MAP + 1)
#define H264_QP_ELEM (MAX_H264_QP + 1)
diff --git a/encoder/ih264e_encode.c b/encoder/ih264e_encode.c
index ffc6fb7..c027321 100644
--- a/encoder/ih264e_encode.c
+++ b/encoder/ih264e_encode.c
@@ -48,7 +48,7 @@
#include <stdlib.h>
#include <string.h>
#include <assert.h>
-
+#include <limits.h>
/* User Include files */
#include "ih264e_config.h"
#include "ih264_typedefs.h"
@@ -63,26 +63,25 @@
#include "ih264_platform_macros.h"
#include "ih264_error.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
-#include "ih264_error.h"
-#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
#include "ih264_inter_pred_filters.h"
#include "ih264_mem_fns.h"
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ih264_list.h"
#include "ih264e_error.h"
#include "ih264e_defs.h"
-#include "ih264_padding.h"
#include "ih264e_bitstream.h"
#include "irc_mem_req_and_acq.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
#include "ih264e_time_stamp.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_master.h"
#include "ih264e_process.h"
@@ -90,7 +89,6 @@
#include "ih264_dpb_mgr.h"
#include "ih264e_utils.h"
#include "ih264e_fmt_conv.h"
-#include "ih264e_config.h"
#include "ih264e_statistics.h"
#include "ih264e_trace.h"
#include "ih264e_debug.h"
@@ -217,7 +215,7 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
out_buf_t s_out_buf;
/* temp var */
- WORD32 ctxt_sel = 0, i;
+ WORD32 ctxt_sel = 0, i, i4_rc_pre_enc_skip;
/********************************************************************/
/* BEGIN INIT */
@@ -228,30 +226,27 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
ps_video_encode_op->s_ive_op.dump_recon = 0;
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
- /* copy input info. to internal structure */
- s_inp_buf.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
- s_inp_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
- s_inp_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
- s_inp_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
- s_inp_buf.pv_mb_info = ps_video_encode_ip->s_ive_ip.pv_mb_info;
- s_inp_buf.u4_mb_info_type = ps_video_encode_ip->s_ive_ip.u4_mb_info_type;
- s_inp_buf.pv_pic_info = ps_video_encode_ip->s_ive_ip.pv_pic_info;
- s_inp_buf.u4_pic_info_type = ps_video_encode_ip->s_ive_ip.u4_pic_info_type;
+ /* Check for output memory allocation size */
+ if (ps_video_encode_ip->s_ive_ip.s_out_buf.u4_bufsize < MIN_STREAM_SIZE)
+ {
+ error_status |= IH264E_INSUFFICIENT_OUTPUT_BUFFER;
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_UNSUPPORTEDPARAM,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+ }
/* copy output info. to internal structure */
s_out_buf.s_bits_buf = ps_video_encode_ip->s_ive_ip.s_out_buf;
- s_out_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
+ s_out_buf.u4_is_last = 0;
s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
/* api call cnt */
ps_codec->i4_encode_api_call_cnt += 1;
- /* curr pic cnt */
- ps_codec->i4_pic_cnt += 1;
-
/* codec context selector */
- ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+ ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* reset status flags */
ps_codec->ai4_pic_cnt[ctxt_sel] = -1;
@@ -274,8 +269,8 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
if (1 == ps_cfg->u4_is_valid)
{
- if ( ((ps_cfg->u4_timestamp_high == s_inp_buf.u4_timestamp_high) &&
- (ps_cfg->u4_timestamp_low == s_inp_buf.u4_timestamp_low)) ||
+ if ( ((ps_cfg->u4_timestamp_high == ps_video_encode_ip->s_ive_ip.u4_timestamp_high) &&
+ (ps_cfg->u4_timestamp_low == ps_video_encode_ip->s_ive_ip.u4_timestamp_low)) ||
((WORD32)ps_cfg->u4_timestamp_high == -1) ||
((WORD32)ps_cfg->u4_timestamp_low == -1) )
{
@@ -309,6 +304,12 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
}
#endif /*LOGO_EN*/
+ /* In case of alt ref and B pics we will have non reference frame in stream */
+ if (ps_codec->s_cfg.u4_enable_alt_ref || ps_codec->s_cfg.u4_num_bframes)
+ {
+ ps_codec->i4_non_ref_frames_in_stream = 1;
+ }
+
if (ps_codec->i4_encode_api_call_cnt == 0)
{
/********************************************************************/
@@ -355,11 +356,9 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
/* api call cnt */
ps_codec->i4_encode_api_call_cnt --;
- /* curr pic cnt */
- ps_codec->i4_pic_cnt --;
-
/* header mode tag is not sticky */
ps_codec->i4_header_mode = 0;
+ ps_codec->i4_gen_header = 0;
/* send the input to app */
ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
@@ -381,8 +380,18 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
return IV_SUCCESS;
}
+ /* curr pic cnt */
+ ps_codec->i4_pic_cnt += 1;
+
+ i4_rc_pre_enc_skip = 0;
+ i4_rc_pre_enc_skip = ih264e_input_queue_update(
+ ps_codec, &ps_video_encode_ip->s_ive_ip, &s_inp_buf);
+
+ s_out_buf.u4_is_last = s_inp_buf.u4_is_last;
+ ps_video_encode_op->s_ive_op.u4_is_last = s_inp_buf.u4_is_last;
- if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL)
+ /* Only encode if the current frame is not pre-encode skip */
+ if (!i4_rc_pre_enc_skip && s_inp_buf.s_raw_buf.apv_bufs[0])
{
/* array giving pic cnt that is being processed in curr context set */
ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt;
@@ -394,172 +403,282 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
ps_video_encode_op->s_ive_op.u4_error_code,
IV_FAIL);
- if (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0)
- {
- /* proc ctxt base idx */
- WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
+ /* proc ctxt base idx */
+ WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
- /* proc ctxt */
- process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
- WORD32 ret = 0;
+ WORD32 ret = 0;
- /* number of addl. threads to be created */
- WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1;
+ /* number of addl. threads to be created */
+ WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1;
- for (i = 0; i < num_thread_cnt; i++)
+ for (i = 0; i < num_thread_cnt; i++)
+ {
+ ret = ithread_create(ps_codec->apv_proc_thread_handle[i],
+ NULL,
+ (void *)ih264e_process_thread,
+ &ps_codec->as_process[i + 1]);
+ if (ret != 0)
{
- ret = ithread_create(ps_codec->apv_proc_thread_handle[i],
- NULL,
- (void*)ih264e_process_thread,
- &ps_codec->as_process[i + 1]);
- if (ret != 0)
- {
- printf("pthread Create Failed");
- assert(0);
- }
+ printf("pthread Create Failed");
+ assert(0);
+ }
- ps_codec->ai4_process_thread_created[i] = 1;
+ ps_codec->ai4_process_thread_created[i] = 1;
- ps_codec->i4_proc_thread_cnt++;
- }
+ ps_codec->i4_proc_thread_cnt++;
+ }
- /* launch job */
- ih264e_process_thread(ps_proc);
+ /* launch job */
+ ih264e_process_thread(ps_proc);
- /* Join threads at the end of encoding a frame */
- ih264e_join_threads(ps_codec);
+ /* Join threads at the end of encoding a frame */
+ ih264e_join_threads(ps_codec);
- ih264_list_reset(ps_codec->pv_proc_jobq);
+ ih264_list_reset(ps_codec->pv_proc_jobq);
- ih264_list_reset(ps_codec->pv_entropy_jobq);
- }
+ ih264_list_reset(ps_codec->pv_entropy_jobq);
}
- if (-1 != ps_codec->ai4_pic_cnt[ctxt_sel])
- {
- /* proc ctxt base idx */
- WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
- /* proc ctxt */
- process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
+ /****************************************************************************
+ * RECON
+ * Since we have forward dependent frames, we cannot return recon in encoding
+ * order. It must be in poc order, or input pic order. To achieve this we
+ * introduce a delay of 1 to the recon wrt encode. Now since we have that
+ * delay, at any point minimum of pic_cnt in our ref buffer will be the
+ * correct frame. For ex let our GOP be IBBP [1 2 3 4] . The encode order
+ * will be [1 4 2 3] .Now since we have a delay of 1, when we are done with
+ * encoding 4, the min in the list will be 1. After encoding 2, it will be
+ * 2, 3 after 3 and 4 after 4. Hence we can return in sequence. Note
+ * that the 1 delay is critical. Hence if we have post enc skip, we must
+ * skip here too. Note that since post enc skip already frees the recon
+ * buffer we need not do any thing here
+ *
+ * We need to return a recon when ever we consume an input buffer. This
+ * comsumption include a pre or post enc skip. Thus dump recon is set for
+ * all cases except when
+ * 1) We are waiting -> ps_codec->i4_frame_num > 1
+ * 2) When the input buffer is null [ ie we are not consuming any inp]
+ * An exception need to be made for the case when we have the last buffer
+ * since we need to flush out the on remainig recon.
+ ****************************************************************************/
- /* receive output back from codec */
- s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+ ps_video_encode_op->s_ive_op.dump_recon = 0;
- /* send the output to app */
- ps_video_encode_op->s_ive_op.output_present = 1;
- ps_video_encode_op->s_ive_op.dump_recon = 1;
- ps_video_encode_op->s_ive_op.s_out_buf = s_out_buf.s_bits_buf;
- ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+ if (ps_codec->s_cfg.u4_enable_recon && (ps_codec->i4_frame_num > 1)
+ && (s_inp_buf.s_raw_buf.apv_bufs[0] || s_inp_buf.u4_is_last))
+ {
+ /* error status */
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ pic_buf_t *ps_pic_buf = NULL;
+ WORD32 i4_buf_status, i4_curr_poc = 32768;
- /* receive input back from codec */
- s_inp_buf = ps_proc->s_inp_buf;
+ /* In case of skips we return recon, but indicate that buffer is zero size */
+ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
+ || i4_rc_pre_enc_skip)
+ {
- /* send the input to app */
- ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
+ ps_video_encode_op->s_ive_op.dump_recon = 1;
+ ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[0] = 0;
+ ps_video_encode_op->s_ive_op.s_recon_buf.au4_wd[1] = 0;
- if (ps_codec->s_cfg.u4_enable_recon &&
- ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0)
+ }
+ else
{
- /* error status */
- IH264_ERROR_T ret = IH264_SUCCESS;
-
- /* recon buffer */
- rec_buf_t *ps_rec_buf = &ps_codec->as_rec_buf[ctxt_sel];
-
- ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf;
-
- /* copy/convert the recon buffer and return */
- ih264e_fmt_conv(ps_codec, &ps_rec_buf->s_pic_buf,
- ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0],
- ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1],
- ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2],
- ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0],
- ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1],
- 0,
- ps_codec->s_cfg.u4_disp_ht);
-
- ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_rec_buf->s_pic_buf.i4_buf_id, BUF_MGR_IO);
- if (IH264_SUCCESS != ret)
+ for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
{
- SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
- IVE_FATALERROR,
+ if (ps_codec->as_ref_set[i].i4_pic_cnt == -1)
+ continue;
+
+ i4_buf_status = ih264_buf_mgr_get_status(
+ ps_codec->pv_ref_buf_mgr,
+ ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
+
+ if ((i4_buf_status & BUF_MGR_IO)
+ && (ps_codec->as_ref_set[i].i4_poc < i4_curr_poc))
+ {
+ ps_pic_buf = ps_codec->as_ref_set[i].ps_pic_buf;
+ i4_curr_poc = ps_codec->as_ref_set[i].i4_poc;
+ }
+ }
+
+ ps_video_encode_op->s_ive_op.s_recon_buf =
+ ps_video_encode_ip->s_ive_ip.s_recon_buf;
+
+ /*
+ * If we get a valid buffer. output and free recon.
+ *
+ * we may get an invalid buffer if num_b_frames is 0. This is because
+ * We assume that there will be a ref frame in ref list after encoding
+ * the last frame. With B frames this is correct since its forward ref
+ * pic will be in the ref list. But if num_b_frames is 0, we will not
+ * have a forward ref pic
+ */
+
+ if (ps_pic_buf)
+ {
+ /* copy/convert the recon buffer and return */
+ ih264e_fmt_conv(ps_codec,
+ ps_pic_buf,
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1],
+ 0, ps_codec->s_cfg.u4_disp_ht);
+
+ ps_video_encode_op->s_ive_op.dump_recon = 1;
+
+ ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
+ ps_pic_buf->i4_buf_id, BUF_MGR_IO);
+
+ if (IH264_SUCCESS != ret)
+ {
+ SET_ERROR_ON_RETURN(
+ (IH264E_ERROR_T)ret, IVE_FATALERROR,
ps_video_encode_op->s_ive_op.u4_error_code,
IV_FAIL);
+ }
}
}
+ }
- /* release buffers from ref list */
- if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1)
- {
- /* pic info */
- pic_buf_t *ps_cur_pic;
- /* mv info */
- mv_buf_t *ps_cur_mv_buf;
+ /***************************************************************************
+ * Free reference buffers:
+ * In case of a post enc skip, we have to ensure that those pics will not
+ * be used as reference anymore. In all other cases we will not even mark
+ * the ref buffers
+ ***************************************************************************/
+ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
+ {
+ /* pic info */
+ pic_buf_t *ps_cur_pic;
+
+ /* mv info */
+ mv_buf_t *ps_cur_mv_buf;
- /* error status */
- IH264_ERROR_T ret = IH264_SUCCESS;
+ /* error status */
+ IH264_ERROR_T ret = IH264_SUCCESS;
- /* Decrement coded pic count */
- ps_codec->i4_coded_pic_cnt--;
+ /* Decrement coded pic count */
+ ps_codec->i4_poc--;
- /* loop through to get the min pic cnt among the list of pics stored in ref list */
- /* since the skipped frame may not be on reference list, we may not have an MV bank
- * hence free only if we have allocated */
- for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+ /* loop through to get the min pic cnt among the list of pics stored in ref list */
+ /* since the skipped frame may not be on reference list, we may not have an MV bank
+ * hence free only if we have allocated */
+ for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+ {
+ if (ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt)
{
- if (ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt)
- {
- ps_codec->as_ref_set[i].i4_pic_cnt = -1;
- ps_codec->as_ref_set[i].i4_poc = -1;
-
- ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf;
-
- ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_mv_buf;
-
- /* release this frame from reference list */
- ret = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_REF);
- SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
- IVE_FATALERROR,
- ps_video_encode_op->s_ive_op.u4_error_code,
- IV_FAIL);
-
- ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_REF);
- SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
- IVE_FATALERROR,
- ps_video_encode_op->s_ive_op.u4_error_code,
- IV_FAIL);
- break;
- }
+
+ ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf;
+
+ ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_mv_buf;
+
+ /* release this frame from reference list and recon list */
+ ret = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_REF);
+ ret |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_IO);
+ SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_REF);
+ ret |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_IO);
+ SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+ break;
}
}
+ }
+
+ /*
+ * Since recon is not in sync with output, ie there can be frame to be
+ * given back as recon even after last output. Hence we need to mark that
+ * the output is not the last.
+ * Hence search through reflist and mark appropriately
+ */
+ if (ps_codec->s_cfg.u4_enable_recon)
+ {
+ WORD32 i4_buf_status = 0;
- if ((ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1) ||
- (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 1))
+ for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
{
- ps_video_encode_op->s_ive_op.dump_recon = 0;
+ if (ps_codec->as_ref_set[i].i4_pic_cnt == -1)
+ continue;
+
+ i4_buf_status |= ih264_buf_mgr_get_status(
+ ps_codec->pv_ref_buf_mgr,
+ ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
}
- else
+
+ if (i4_buf_status & BUF_MGR_IO)
{
- /* set output pic type */
- if (ps_codec->i4_slice_type == PSLICE)
- {
- ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME;
- }
- else if (ps_codec->i4_slice_type == ISLICE && ps_codec->u4_is_idr != 1)
- {
+ s_out_buf.u4_is_last = 0;
+ ps_video_encode_op->s_ive_op.u4_is_last = 0;
+ }
+ }
+
+
+ /**************************************************************************
+ * Signaling to APP
+ * 1) If we valid a valid output mark it so
+ * 2) Set the codec output ps_video_encode_op
+ * 3) Set the error status
+ * 4) Set the return Pic type
+ * Note that we already has marked recon properly
+ * 5)Send the consumed input back to app so that it can free it if possible
+ *
+ * We will have to return the output and input buffers unconditionally
+ * so that app can release them
+ **************************************************************************/
+ if (!i4_rc_pre_enc_skip
+ && !ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
+ && s_inp_buf.s_raw_buf.apv_bufs[0])
+ {
+
+ /* receive output back from codec */
+ s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+ /* send the output to app */
+ ps_video_encode_op->s_ive_op.output_present = 1;
+ ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+ /* Set the time stamps of the encodec input */
+ ps_video_encode_op->s_ive_op.u4_timestamp_low = s_inp_buf.u4_timestamp_low;
+ ps_video_encode_op->s_ive_op.u4_timestamp_high = s_inp_buf.u4_timestamp_high;
+
+
+ switch (ps_codec->pic_type)
+ {
+ case PIC_IDR:
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type =IV_IDR_FRAME;
+ break;
+
+ case PIC_I:
ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME;
- }
- else
- {
- ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME;
- }
+ break;
+
+ case PIC_P:
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME;
+ break;
+
+ case PIC_B:
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_B_FRAME;
+ break;
+
+ default:
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
+ break;
}
- /* loop through to get the error status */
for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
{
error_status |= ps_codec->as_process[ctxt_sel + i].i4_error_code;
@@ -569,6 +688,36 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
ps_video_encode_op->s_ive_op.u4_error_code,
IV_FAIL);
}
+ else
+ {
+ /* proc ctxt base idx */
+ WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
+
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
+
+ /* receive output back from codec */
+ s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+ ps_video_encode_op->s_ive_op.output_present = 0;
+ ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+ /* Set the time stamps of the encodec input */
+ ps_video_encode_op->s_ive_op.u4_timestamp_low = 0;
+ ps_video_encode_op->s_ive_op.u4_timestamp_high = 0;
+
+ /* receive input back from codec and send it to app */
+ s_inp_buf = ps_proc->s_inp_buf;
+ ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
+
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
+
+ }
+
+ /* Send the input to encoder so that it can free it if possible */
+ ps_video_encode_op->s_ive_op.s_out_buf = s_out_buf.s_bits_buf;
+ ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
+
if (1 == s_inp_buf.u4_is_last)
{
diff --git a/encoder/ih264e_encode_header.c b/encoder/ih264e_encode_header.c
index 67e5409..cc81e1b 100644
--- a/encoder/ih264e_encode_header.c
+++ b/encoder/ih264e_encode_header.c
@@ -65,14 +65,13 @@
#include "ithread.h"
#include "ih264e_config.h"
#include "ih264e_trace.h"
-#include "ih264_typedefs.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ih264_debug.h"
#include "ih264_defs.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -81,14 +80,17 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ih264e_defs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_encode_header.h"
#include "ih264_common_tables.h"
#include "ih264_macros.h"
+#include "ih264e_utils.h"
/*****************************************************************************/
@@ -523,11 +525,12 @@ WORD32 ih264e_generate_slice_header(bitstrm_t *ps_bitstrm,
{
/* num_ref_idx_l0_active_minus1 */
PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l0_active - 1, return_status, "num_ref_idx_l0_active_minus1");
- }
- if (ps_slice_hdr->u1_slice_type == BSLICE)
- {
- /* num_ref_idx_l1_active_minus1 */
- PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status, "num_ref_idx_l1_active_minus1");
+
+ if (ps_slice_hdr->u1_slice_type == BSLICE)
+ {
+ /* num_ref_idx_l1_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status, "num_ref_idx_l1_active_minus1");
+ }
}
}
@@ -544,9 +547,20 @@ WORD32 ih264e_generate_slice_header(bitstrm_t *ps_bitstrm,
}
}
+ if (ps_slice_hdr->u1_slice_type == BSLICE)
+ {
+ /* ref_pic_list_reordering_flag_l1 */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_ref_idx_reordering_flag_l1, 1, return_status, "ref_pic_list_reordering_flag_l1");
+
+ if (ps_slice_hdr->u1_ref_idx_reordering_flag_l1)
+ {
+
+ }
+ }
+
if ((ps_pps->i1_weighted_pred_flag &&
(ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE)) ||
- (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_slice_hdr->u1_slice_type == BSLICE))
+ (ps_slice_hdr->u1_slice_type == BSLICE && ps_pps->i1_weighted_bipred_idc == 1))
{
/* TODO_LATER: Currently there is no support for weighted prediction.
This needs to be updated when the support is added */
@@ -662,8 +676,8 @@ IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps)
* To the constrained baseline profile if we add support for B slices, support for encoding interlaced frames,
* support for weighted prediction and introduce CABAC entropy coding then we have Main Profile.
*/
- if ((ps_cfg->u4_num_b_frames) || (ps_cfg->e_content_type != IV_PROGRESSIVE) ||
- (ps_cfg->u4_entropy_coding_mode == CABAC) || (ps_cfg->u4_weighted_prediction))
+ if ((ps_cfg->u4_num_bframes) || (ps_cfg->e_content_type != IV_PROGRESSIVE) ||
+ (ps_cfg->u4_entropy_coding_mode == CABAC) || (ps_cfg->u4_weighted_prediction))
{
ps_sps->u1_profile_idc = IH264_PROFILE_MAIN;
}
@@ -673,17 +687,8 @@ IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps)
}
/* level */
- ps_sps->u1_level_idc = ps_cfg->u4_max_level;
-// i4_err_code = ih264e_get_level(ps_cfg, &level_idc);
-// if (i4_err_code == IH264E_SUCCESS)
-// {
-// ps_sps->u1_level_idc = level_idc;
-//
-// }
-// else
-// {
-// return i4_err_code;
-// }
+ ps_sps->u1_level_idc = MAX(ps_cfg->u4_max_level,
+ (UWORD32)ih264e_get_min_level(ps_cfg->u4_max_wd, ps_cfg->u4_max_ht));
/* constrained flags */
/*
@@ -748,8 +753,10 @@ IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps)
/* pic_order_cnt_type */
ps_sps->i1_pic_order_cnt_type = 2;
- if(ps_cfg->u4_enable_alt_ref)
+ if (ps_codec->i4_non_ref_frames_in_stream)
+ {
ps_sps->i1_pic_order_cnt_type = 0;
+ }
/* log2_max_pic_order_cnt_lsb_minus4 */
ps_sps->i1_log2_max_pic_order_cnt_lsb = 8;
@@ -765,8 +772,15 @@ IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps)
}
/* num_ref_frames */
- /* FIXME : Fix this hard coding */
- ps_sps->u1_max_num_ref_frames = 1;
+ /* TODO : Should we have a flexible num ref frames */
+ if (ps_codec->s_cfg.u4_num_bframes > 0)
+ {
+ ps_sps->u1_max_num_ref_frames = 2;
+ }
+ else
+ {
+ ps_sps->u1_max_num_ref_frames = 1;
+ }
/* gaps_in_frame_num_value_allowed_flag */
ps_sps->i1_gaps_in_frame_num_value_allowed_flag = 0;
@@ -852,7 +866,7 @@ IH264E_ERROR_T ih264e_populate_pps(codec_t *ps_codec, pps_t *ps_pps)
/* entropy_coding_mode */
ps_pps->u1_entropy_coding_mode_flag = ps_cfg->u4_entropy_coding_mode;
- /* pic_order_present_flag is unset for POC type 2 */
+ /* pic_order_present_flag is unset if we don't have feilds */
ps_pps->u1_pic_order_present_flag = 0;
/* Currently number of slice groups supported are 1 */
@@ -980,18 +994,17 @@ WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc,
if (ps_sps->i1_pic_order_cnt_type == 0)
{
- WORD32 val;
- val = ps_codec->i4_coded_pic_cnt;
- val %= (1 << ps_sps->i1_log2_max_pic_order_cnt_lsb);
- ps_slice_hdr->i4_pic_order_cnt_lsb = val;
+ WORD32 i4_poc;
+ i4_poc = ps_codec->i4_poc;
+ i4_poc %= (1 << ps_sps->i1_log2_max_pic_order_cnt_lsb);
+ ps_slice_hdr->i4_pic_order_cnt_lsb = i4_poc;
}
+ /* TODO add support for poc type 1 */
else if (ps_sps->i1_pic_order_cnt_type == 1)
{
}
- if(0 == ps_slice_hdr->u2_first_mb_in_slice)
- ps_codec->i4_coded_pic_cnt++;
/*
* redundant slices are not currently supported.
@@ -1005,7 +1018,7 @@ WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc,
/* direct spatial mv pred flag */
if (ps_proc->i4_slice_type == BSLICE)
{
-
+ ps_slice_hdr->u1_direct_spatial_mv_pred_flag = 1;
}
if (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == BSLICE)
@@ -1036,11 +1049,23 @@ WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc,
{
}
+
+ /* ref_pic_list_reordering_flag_l1 */
+ ps_slice_hdr->u1_ref_idx_reordering_flag_l1 = 0;
+
+ if (ps_slice_hdr->u1_ref_idx_reordering_flag_l1)
+ {
+
+ }
}
+
+ /* Currently we do not support weighted pred */
+ /* ps_slice_hdr->u1_weighted_bipred_idc = 0; */
+
if ((ps_pps->i1_weighted_pred_flag &&
(ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE)) ||
- (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_proc->i4_slice_type == BSLICE))
+ (ps_proc->i4_slice_type == BSLICE && ps_pps->i1_weighted_bipred_idc == 1))
{
/* TODO_LATER: Currently there is no support for weighted prediction.
This needs to be updated when the support is added */
@@ -1114,6 +1139,8 @@ WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc,
* If this is not the case, we have to add Slice group map type to the bit stream */
}
+ ps_slice_hdr->i1_cabac_init_idc = CABAC_INIT_IDC;
+
return IH264E_SUCCESS;
}
diff --git a/encoder/ih264e_error.h b/encoder/ih264e_error.h
index 8fe9dac..1eba46c 100644
--- a/encoder/ih264e_error.h
+++ b/encoder/ih264e_error.h
@@ -218,7 +218,10 @@ typedef enum
IH264E_INVALID_ALT_REF_OPTION = IH264E_CODEC_ERROR_START + 0x2E,
/**No free picture buffer available to store recon pic */
- IH264E_NO_FREE_RECONBUF = IH264E_CODEC_ERROR_START + 0x2F,
+ IH264E_NO_FREE_RECONBUF = IH264E_CODEC_ERROR_START + 0x2F,
+
+ /**Not enough memory allocated as output buffer */
+ IH264E_INSUFFICIENT_OUTPUT_BUFFER = IH264E_CODEC_ERROR_START + 0x30,
/**max failure error code to ensure enum is 32 bits wide */
IH264E_FAIL = -1,
diff --git a/encoder/ih264e_fmt_conv.c b/encoder/ih264e_fmt_conv.c
index 393d6ca..e06aea1 100644
--- a/encoder/ih264e_fmt_conv.c
+++ b/encoder/ih264e_fmt_conv.c
@@ -65,8 +65,8 @@
#include "ih264_defs.h"
#include "ih264_debug.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -75,9 +75,9 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ih264_macros.h"
#include "ih264_platform_macros.h"
-#include "ih264_error.h"
#include "ih264_buf_mgr.h"
#include "ih264e_defs.h"
#include "ih264e_error.h"
@@ -85,6 +85,7 @@
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_fmt_conv.h"
diff --git a/encoder/ih264e_function_selector_generic.c b/encoder/ih264e_function_selector_generic.c
index 65f943a..8305fd2 100644
--- a/encoder/ih264e_function_selector_generic.c
+++ b/encoder/ih264e_function_selector_generic.c
@@ -60,8 +60,8 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -70,24 +70,21 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_platform_macros.h"
-#include "ih264_intra_pred_filters.h"
-#include "ih264_trans_quant_itrans_iquant.h"
-#include "ih264e_defs.h"
-#include "ih264e_structs.h"
-#include "ih264_deblk_edge_filters.h"
+#include "ih264e_cabac.h"
#include "ih264e_core_coding.h"
#include "ih264_cavlc_tables.h"
#include "ih264e_cavlc.h"
-#include "ih264_padding.h"
#include "ih264e_intra_modes_eval.h"
-#include "ih264_mem_fns.h"
#include "ih264e_fmt_conv.h"
#include "ih264e_half_pel.h"
+#include "ih264e_me.h"
/*****************************************************************************/
@@ -197,8 +194,12 @@ void ih264e_init_function_ptr_generic(codec_t *ps_codec)
ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4;
/* write mb syntax layer */
- ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
- ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+ ps_codec->pf_write_mb_syntax_layer[CAVLC][ISLICE] = ih264e_write_islice_mb_cavlc;
+ ps_codec->pf_write_mb_syntax_layer[CAVLC][PSLICE] = ih264e_write_pslice_mb_cavlc;
+ ps_codec->pf_write_mb_syntax_layer[CAVLC][BSLICE] = ih264e_write_bslice_mb_cavlc;
+ ps_codec->pf_write_mb_syntax_layer[CABAC][ISLICE] = ih264e_write_islice_mb_cabac;
+ ps_codec->pf_write_mb_syntax_layer[CABAC][PSLICE] = ih264e_write_pslice_mb_cabac;
+ ps_codec->pf_write_mb_syntax_layer[CABAC][BSLICE] = ih264e_write_bslice_mb_cabac;
/* Padding Functions */
ps_codec->pf_pad_top = ih264_pad_top;
@@ -255,5 +256,14 @@ void ih264e_init_function_ptr_generic(codec_t *ps_codec)
ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz;
ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert;
+ /* ME compute */
+ ps_codec->apf_compute_me[PSLICE] = &ih264e_compute_me_single_reflist;
+ ps_codec->apf_compute_me[BSLICE] = &ih264e_compute_me_multi_reflist;
+
+ /* skip decision */
+ ps_codec->apf_find_skip_params_me[PSLICE] = &ih264e_find_pskip_params_me;
+ ps_codec->apf_find_skip_params_me[BSLICE] = &ih264e_find_bskip_params_me;
+
+
return;
}
diff --git a/encoder/ih264e_globals.c b/encoder/ih264e_globals.c
index e2b46a4..6719c5f 100644
--- a/encoder/ih264e_globals.c
+++ b/encoder/ih264e_globals.c
@@ -196,66 +196,138 @@ const WORD8 gi1_mv_pred_condition[8] =
-1, 0, 1, -1, 2, -1, -1, -1
};
-/**
-******************************************************************************
-* @brief maps the h264 quantizer to the mpeg2 quantizer scale
-* input : h264 qp
-* output : equivalent mpeg 2 qp
-* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1]
-******************************************************************************
-*/
+
+/*******************************************************************************
+ * Translation of MPEG QP to H264 QP
+ ******************************************************************************/
+/*
+ * Note : RC library models QP and bits assuming the QP to be MPEG2.
+ * Since MPEG qp varies linearly, when the relationship is computed,
+ * it learns that delta(qp) => delta(bits). Now what we are doing by the
+ * transation of qp is that
+ * QPrc = a + b*2^(QPen)
+ * By not considering the weight matrix in both MPEG and H264 we in effect
+ * only changing the relation to
+ * QPrc = c + d*2^(QPen)
+ * This will only entatil changin the RC model parameters, and this will
+ * not affect rc relation at all
+ *
+ *
+ * We have MPEG qp which varies from 0-228. The quantization factor has a linear
+ * relation ship with the size of quantized values
+ *
+ * We also have H264 Qp, which varies such that for a change in QP of 6 , we
+ * double the corresponding scaling factor. Hence the scaling is linear in terms
+ * of 2^(QPh/6)
+ *
+ * Now we want to have translation between QPm and QPh. Hence we can write
+ *
+ * QPm = a + b*2^(QPh/6)
+ *
+ * Appling boundary condition that
+ * 1) QPm = 0.625 if QPh = 0
+ * 2) QPm = 224 if QPh = 51,
+ *
+ * we will have
+ * a = 0.0063, b = 0.6187
+ *
+ * Hence the relatiohship is
+ * QPm = a + b*2^(Qph/6)
+ * QPh = 6*log((Qpm - a)/b)
+ *
+ *
+ * Unrounded values for gau1_h264_to_mpeg2_qmap[H264_QP_ELEM] =
+ *
+ * 0.625 0.70077 0.78581 0.88127 0.98843 1.10870
+ * 1.24370 1.39523 1.56533 1.75625 1.97055 2.21110
+ * 2.48110 2.78417 3.12435 3.50620 3.93480 4.41589
+ * 4.95590 5.56204 6.24241 7.00609 7.86330 8.82548
+ * 9.90550 11.11778 12.47851 14.00588 15.72030 17.64467
+ * 19.80470 22.22925 24.95072 28.00547 31.43430 35.28304
+ * 39.60310 44.45221 49.89514 56.00463 62.86230 70.55978
+ * 79.19990 88.89811 99.78398 112.00296 125.71830 141.11325
+ * 158.39350 177.78992 199.56167 223.99963
+ *
+ *
+ *
+ * Unrounded values for gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM]
+ *
+ * 0 4.1014 10.1288 13.6477 16.1425 18.0768 19.6568
+ * 20.9925 22.1493 23.1696 24.0822 24.9078 25.6614 26.3546
+ * 26.9964 27.5938 28.1527 28.6777 29.1726 29.6408 30.0850
+ * 30.5074 30.9102 31.2951 31.6636 32.0171 32.3567 32.6834
+ * 32.9983 33.3021 33.5957 33.8795 34.1544 34.4208 34.6793
+ * 34.9303 35.1742 35.4114 35.6423 35.8671 36.0863 36.3001
+ * 36.5087 36.7124 36.9115 37.1060 37.2963 37.4825 37.6648
+ * 37.8433 38.0182 38.1896 38.3577 38.5226 38.6844 38.8433
+ * 38.9993 39.1525 39.3031 39.4511 39.5966 39.7397 39.8804
+ * 40.0189 40.1553 40.2895 40.4217 40.5518 40.6801 40.8065
+ * 40.9310 41.0538 41.1749 41.2943 41.4121 41.5283 41.6430
+ * 41.7561 41.8678 41.9781 42.0870 42.1946 42.3008 42.4057
+ * 42.5094 42.6118 42.7131 42.8132 42.9121 43.0099 43.1066
+ * 43.2023 43.2969 43.3905 43.4831 43.5747 43.6653 43.7550
+ * 43.8438 43.9317 44.0187 44.1049 44.1901 44.2746 44.3582
+ * 44.4411 44.5231 44.6044 44.6849 44.7647 44.8438 44.9221
+ * 44.9998 45.0767 45.1530 45.2286 45.3035 45.3779 45.4515
+ * 45.5246 45.5970 45.6689 45.7401 45.8108 45.8809 45.9504
+ * 46.0194 46.0878 46.1557 46.2231 46.2899 46.3563 46.4221
+ * 46.4874 46.5523 46.6166 46.6805 46.7439 46.8069 46.8694
+ * 46.9314 46.9930 47.0542 47.1150 47.1753 47.2352 47.2947
+ * 47.3538 47.4125 47.4708 47.5287 47.5862 47.6433 47.7001
+ * 47.7565 47.8125 47.8682 47.9235 47.9785 48.0331 48.0874
+ * 48.1413 48.1949 48.2482 48.3011 48.3537 48.4060 48.4580
+ * 48.5097 48.5611 48.6122 48.6629 48.7134 48.7636 48.8135
+ * 48.8631 48.9124 48.9615 49.0102 49.0587 49.1069 49.1549
+ * 49.2026 49.2500 49.2972 49.3441 49.3908 49.4372 49.4834
+ * 49.5293 49.5750 49.6204 49.6656 49.7106 49.7553 49.7998
+ * 49.8441 49.8882 49.9320 49.9756 50.0190 50.0622 50.1051
+ * 50.1479 50.1904 50.2327 50.2749 50.3168 50.3585 50.4000
+ * 50.4413 50.4825 50.5234 50.5641 50.6047 50.6450 50.6852
+ * 50.7252 50.7650 50.8046 50.8440 50.8833 50.9224 50.9613
+ * 51.0000
+ */
+
const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM] =
{
- 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 3, 3, 3, 4,
- 4, 4, 5, 6, 6, 7, 8, 9,
- 10, 11, 13, 14, 16, 18, 20, 23,
- 25, 29, 32, 36, 40, 45, 51, 57,
- 64, 72, 81, 91, 102, 114, 128, 144,
- 161, 181, 203, 228,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 3, 3, 4,
+ 4, 4, 5, 6, 6, 7, 8, 9,
+ 10, 11, 12, 14, 16, 18, 20, 22,
+ 25, 28, 31, 35, 40, 44, 50, 56,
+ 63, 71, 79, 89, 100, 112, 126, 141,
+ 158, 178, 200, 224
};
-/**
-******************************************************************************
-* @brief maps the mpeg2 quantizer to the h264 quantizer scale
-* input : mpeg2 qp
-* output : equivalent h264qp
-* @remarks MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32
-* k = 0 (for intra) k = sign(QFij)
-* H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6)
-*
-* Excluding the portion of R(QP%6,i,j) that is due to
-* the DCT scale factors, the 6 entries after dividing by 64 (2^6)
-* correspond to dequant values of
-* 2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375.
-* (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc)
-*
-* Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2
-* (the actual mapping seems to be to MPEG2 qscale of 2.5),
-* and the fact that the effective h264 quantizer changes by
-* a factor of 2 for every 6 steps, the following mapping is
-* obtained:
-* h264qp = 6*(log2(mpeg2qscale/2)) + 12.
-*
-* Note that the quant matrix entry assumed for the above
-* equality is 16. Hence when the mpeg2 quant matrix entries
-* are all 16, this lookup can be used as is (which is the
-* default inter quant matrix in mpeg-2).
-******************************************************************************
-*/
const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM] =
{
- 0, 4, 10, 14, 16, 18, 20, 21, 22, 23, 24, 25, 26, 26, 27, 27,
- 28, 29, 29, 29, 30, 30, 31, 31, 32, 32, 32, 33, 33, 33, 33, 34,
- 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 37,
- 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40,
- 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
- 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45,
- 45, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46,
- 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47,
- 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48,
- 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49,
- 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+ 0, 4, 10, 14, 16, 18, 20, 21,
+ 22, 23, 24, 25, 26, 26, 27, 28,
+ 28, 29, 29, 30, 30, 31, 31, 31,
+ 32, 32, 32, 33, 33, 33, 34, 34,
+ 34, 34, 35, 35, 35, 35, 36, 36,
+ 36, 36, 37, 37, 37, 37, 37, 37,
+ 38, 38, 38, 38, 38, 39, 39, 39,
+ 39, 39, 39, 39, 40, 40, 40, 40,
+ 40, 40, 40, 41, 41, 41, 41, 41,
+ 41, 41, 41, 42, 42, 42, 42, 42,
+ 42, 42, 42, 42, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 43, 43, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 44, 45, 45, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 47, 47,
+ 47, 47, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 47, 47, 47, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48,
+ 49, 49, 49, 49, 49, 49, 49, 49,
+ 49, 49, 49, 49, 49, 49, 49, 49,
+ 49, 49, 49, 49, 49, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51,
+ 51
};
diff --git a/encoder/ih264e_half_pel.c b/encoder/ih264e_half_pel.c
index cb475a1..4871f40 100644
--- a/encoder/ih264e_half_pel.c
+++ b/encoder/ih264e_half_pel.c
@@ -55,7 +55,6 @@
#include "ih264_defs.h"
#include "ih264e_half_pel.h"
#include "ih264_macros.h"
-#include "ih264e_half_pel.h"
#include "ih264e_debug.h"
#include "ih264_inter_pred_filters.h"
#include "ih264_mem_fns.h"
diff --git a/encoder/ih264e_intra_modes_eval.c b/encoder/ih264e_intra_modes_eval.c
index b41d717..52b3034 100644
--- a/encoder/ih264e_intra_modes_eval.c
+++ b/encoder/ih264e_intra_modes_eval.c
@@ -74,15 +74,17 @@
#include "ih264_inter_pred_filters.h"
#include "ih264_mem_fns.h"
#include "ih264_padding.h"
-#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ime_distortion_metrics.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
+#include "ime_defs.h"
#include "ime_structs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_intra_modes_eval.h"
#include "ih264e_globals.h"
@@ -372,9 +374,10 @@ void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps
UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
/* init temp var */
- if (ps_proc->i4_slice_type == PSLICE)
+ if (ps_proc->i4_slice_type != ISLICE)
{
- offset = 5;
+ /* Offset for MBtype */
+ offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
}
@@ -1315,7 +1318,7 @@ void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_
UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
/* strides */
- WORD32 i4_src_strd_c = ps_proc->i4_src_strd;
+ WORD32 i4_src_strd_c = ps_proc->i4_src_chroma_strd;
WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd;
diff --git a/encoder/ih264e_mc.c b/encoder/ih264e_mc.c
index 2dd0974..2b19dd1 100644
--- a/encoder/ih264e_mc.c
+++ b/encoder/ih264e_mc.c
@@ -19,25 +19,25 @@
*/
/**
-*******************************************************************************
-* @file
-* ih264e_mc.c
-*
-* @brief
-* Contains definition of functions for motion compensation
-*
-* @author
-* ittiam
-*
-* @par List of Functions:
-* - ih264e_motion_comp_luma()
-* - ih264e_motion_comp_chroma()
-*
-* @remarks
-* None
-*
-*******************************************************************************
-*/
+ *******************************************************************************
+ * @file
+ * ih264e_mc.c
+ *
+ * @brief
+ * Contains definition of functions for motion compensation
+ *
+ * @author
+ * ittiam
+ *
+ * @par List of Functions:
+ * - ih264e_motion_comp_luma()
+ * - ih264e_motion_comp_chroma()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
/*****************************************************************************/
/* File Includes */
@@ -52,6 +52,7 @@
#include "iv2.h"
#include "ive2.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264_structs.h"
#include "ih264_inter_pred_filters.h"
@@ -60,57 +61,52 @@
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "ih264_trans_quant_itrans_iquant.h"
-#include "ih264_inter_pred_filters.h"
-#include "ih264_mem_fns.h"
-#include "ih264_padding.h"
-#include "ih264_intra_pred_filters.h"
-#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ih264e_defs.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_mc.h"
#include "ih264e_half_pel.h"
-
/*****************************************************************************/
/* Function Definitions */
/*****************************************************************************/
/**
-******************************************************************************
-*
-* @brief
-* performs motion compensation for a luma mb for the given mv.
-*
-* @par Description
-* This routine performs motion compensation of an inter mb. When the inter
-* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
-* to pred buffer. In this case the function returns pointer and stride of the
-* ref. buffer and this info is used in place of pred buffer else where.
-* In other cases, the pred buffer is populated via copy / filtering + copy
-* (q pel cases) and returned.
-*
-* @param[in] ps_proc
-* pointer to current proc ctxt
-*
-* @param[out] pu1_pseudo_pred
-* pseudo prediction buffer
-*
-* @param[out] u4_pseudo_pred_strd
-* pseudo pred buffer stride
-*
-* @return none
-*
-* @remarks Assumes half pel buffers for the entire frame are populated.
-*
-******************************************************************************
-*/
-void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
- UWORD8 **pu1_pseudo_pred,
+ ******************************************************************************
+ *
+ * @brief
+ * performs motion compensation for a luma mb for the given mv.
+ *
+ * @par Description
+ * This routine performs motion compensation of an inter mb. When the inter
+ * mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+ * to pred buffer. In this case the function returns pointer and stride of the
+ * ref. buffer and this info is used in place of pred buffer else where.
+ * In other cases, the pred buffer is populated via copy / filtering + copy
+ * (q pel cases) and returned.
+ *
+ * @param[in] ps_proc
+ * pointer to current proc ctxt
+ *
+ * @param[out] pu1_pseudo_pred
+ * pseudo prediction buffer
+ *
+ * @param[out] u4_pseudo_pred_strd
+ * pseudo pred buffer stride
+ *
+ * @return none
+ *
+ * @remarks Assumes half pel buffers for the entire frame are populated.
+ *
+ ******************************************************************************
+ */
+void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, UWORD8 **pu1_pseudo_pred,
WORD32 *pi4_pseudo_pred_strd)
{
/* codec context */
@@ -152,51 +148,96 @@ void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
/* half / qpel coefficient */
UWORD32 u4_subpel_factor;
+ /* BIPRED Flag */
+ WORD32 i4_bipred_flag;
+
/* temp var */
UWORD32 u4_lkup_idx1;
/* Init */
i4_ref_strd[0] = ps_proc->i4_rec_strd;
- i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_hp_buf_strd;
+ i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] =
+ ps_me_ctxt->u4_subpel_buf_strd;
- for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+ for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
+ u4_num_prtn++)
{
+ mv_t *ps_curr_mv;
+
/* update ptr to curr partition */
ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
+ /* Set no no bipred */
+ i4_bipred_flag = 0;
+
+ switch (ps_curr_pu->b2_pred_mode)
+ {
+ case PRED_L0:
+ ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
+ pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
+ break;
+
+ case PRED_L1:
+ ps_curr_mv = &ps_curr_pu->s_me_info[1].s_mv;
+ pu1_ref[0] = ps_proc->apu1_ref_buf_luma[1];
+ break;
+
+ case PRED_BI:
+ /*
+ * In case of PRED_BI, we only need to ensure that
+ * the reference buffer that gets selected is
+ * ps_proc->pu1_best_subpel_buf
+ */
+
+ /* Dummy */
+ ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
+ pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
+
+ i4_bipred_flag = 1;
+ break;
+
+ default:
+ ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
+ pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
+ break;
+
+ }
/* get full pel mv's (full pel units) */
- u4_mv_x_full = ps_curr_pu->s_l0_mv.i2_mvx >> 2;
- u4_mv_y_full = ps_curr_pu->s_l0_mv.i2_mvy >> 2;
+ u4_mv_x_full = ps_curr_mv->i2_mvx >> 2;
+ u4_mv_y_full = ps_curr_mv->i2_mvy >> 2;
/* get half pel mv's */
- u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
- u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+ u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
+ u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
/* get quarter pel mv's */
- u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
- u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+ u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
+ u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
/* width and height of partition */
wd = (ps_curr_pu->b4_wd + 1) << 2;
ht = (ps_curr_pu->b4_ht + 1) << 2;
/* decision ? qpel/hpel, fpel */
- u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
+ u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2)
+ + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
- /* update ref buffer ptrs */
- pu1_ref[0] = ps_proc->pu1_ref_buf_luma + (u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full;
+ /* Move ref to position given by MV */
+ pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full);
- pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
+ /* Sub pel ptrs/ Biperd pointers init */
+ pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
-
/* update pred buff ptr */
- pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 4 * ps_curr_pu->b4_pos_x;
+ pu1_pred = ps_proc->pu1_pred_mb
+ + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
+ + 4 * ps_curr_pu->b4_pos_x;
- /*u4_lkup_idx1 will be non zero for half pel*/
- u4_lkup_idx1 = (u4_subpel_factor >> 2 ) != 0 ;
+ /* u4_lkup_idx1 will be non zero for half pel and bipred */
+ u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag;
{
/********************************************************************/
@@ -218,7 +259,11 @@ void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
*/
else
{
- ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred, i4_ref_strd[u4_lkup_idx1], i4_pred_strd, ht, wd, NULL, 0);
+ ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1],
+ pu1_pred,
+ i4_ref_strd[u4_lkup_idx1],
+ i4_pred_strd, ht, wd, NULL,
+ 0);
}
}
@@ -226,24 +271,24 @@ void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
}
/**
-******************************************************************************
-*
-* @brief
-* performs motion compensation for chroma mb
-*
-* @par Description
-* Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
-* according to the motion vectors given
-*
-* @param[in] ps_proc
-* pointer to current proc ctxt
-*
-* @return none
-*
-* @remarks Assumes half pel and quarter pel buffers for the entire frame are
-* populated.
-******************************************************************************
-*/
+ ******************************************************************************
+ *
+ * @brief
+ * performs motion compensation for chroma mb
+ *
+ * @par Description
+ * Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+ * according to the motion vectors given
+ *
+ * @param[in] ps_proc
+ * pointer to current proc ctxt
+ *
+ * @return none
+ *
+ * @remarks Assumes half pel and quarter pel buffers for the entire frame are
+ * populated.
+ ******************************************************************************
+ */
void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc)
{
/* codec context */
@@ -283,38 +328,122 @@ void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc)
WORD32 u4_mv_y;
UWORD8 u1_dx, u1_dy;
- for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+ for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
+ u4_num_prtn++)
{
- ps_curr_pu =ps_proc->ps_pu + u4_num_prtn;
+ mv_t *ps_curr_mv;
- u4_mv_x = ps_curr_pu->s_l0_mv.i2_mvx >> 3;
- u4_mv_y = ps_curr_pu->s_l0_mv.i2_mvy >> 3;
+ ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
- /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed with dx, dy =4*/
- u4_mv_x_full = (ps_curr_pu->s_l0_mv.i2_mvx & 0x4) >> 2;
- u4_mv_y_full = (ps_curr_pu->s_l0_mv.i2_mvy & 0x4) >> 2;
+ if (ps_curr_pu->b2_pred_mode != PRED_BI)
+ {
+ ps_curr_mv = &ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv;
+ pu1_ref = ps_proc->apu1_ref_buf_chroma[ps_curr_pu->b2_pred_mode];
- /* get half pel mv's */
- u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
- u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+ u4_mv_x = ps_curr_mv->i2_mvx >> 3;
+ u4_mv_y = ps_curr_mv->i2_mvy >> 3;
- /* get quarter pel mv's */
- u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
- u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+ /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed wiith dx, dy =4 */
+ u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
+ u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
+
+ /* get half pel mv's */
+ u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
+ u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
+
+ /* get quarter pel mv's */
+ u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
+ u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
- /* width and height of sub macro block */
- wd = (ps_curr_pu->b4_wd + 1) << 1;
- ht = (ps_curr_pu->b4_ht + 1) << 1;
+ /* width and height of sub macro block */
+ wd = (ps_curr_pu->b4_wd + 1) << 1;
+ ht = (ps_curr_pu->b4_ht + 1) << 1;
- /* move the pointers so that they point to the motion compensated locations */
- pu1_ref = ps_proc->pu1_ref_buf_chroma + (u4_mv_y * i4_ref_strd) + (u4_mv_x << 1);
+ /* move the pointers so that they point to the motion compensated locations */
+ pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
- pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 2 * ps_curr_pu->b4_pos_x;
+ pu1_pred = ps_proc->pu1_pred_mb
+ + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
+ + 2 * ps_curr_pu->b4_pos_x;
- u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
- u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
+ u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
+ u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
- ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd,
- u1_dx, u1_dy, ht, wd);
+ /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with
+ * separate functions for better performance
+ *
+ * ih264_inter_pred_chroma_dx_zero_a9q
+ * and
+ * ih264_inter_pred_chroma_dy_zero_a9q
+ */
+
+ ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd,
+ i4_pred_strd, u1_dx, u1_dy, ht, wd);
+ }
+ else /* If the pred mode is PRED_BI */
+ {
+ /*
+ * We need to interpolate the L0 and L1 ref pics with the chorma MV
+ * then use them to average for bilinrar interpred
+ */
+ WORD32 i4_predmode;
+ UWORD8 *pu1_ref_buf[2];
+
+ /* Temporary buffers to store the interpolated value from L0 and L1 */
+ pu1_ref_buf[PRED_L0] = ps_proc->apu1_subpel_buffs[0];
+ pu1_ref_buf[PRED_L1] = ps_proc->apu1_subpel_buffs[1];
+
+
+ for (i4_predmode = 0; i4_predmode < PRED_BI; i4_predmode++)
+ {
+ ps_curr_mv = &ps_curr_pu->s_me_info[i4_predmode].s_mv;
+ pu1_ref = ps_proc->apu1_ref_buf_chroma[i4_predmode];
+
+ u4_mv_x = ps_curr_mv->i2_mvx >> 3;
+ u4_mv_y = ps_curr_mv->i2_mvy >> 3;
+
+ /*
+ * corresponds to full pel motion vector in luma, but in chroma
+ * corresponds to pel formed wiith dx, dy =4
+ */
+ u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
+ u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
+
+ /* get half pel mv's */
+ u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
+ u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
+
+ /* get quarter pel mv's */
+ u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
+ u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
+
+ /* width and height of sub macro block */
+ wd = (ps_curr_pu->b4_wd + 1) << 1;
+ ht = (ps_curr_pu->b4_ht + 1) << 1;
+
+ /* move the pointers so that they point to the motion compensated locations */
+ pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
+
+ pu1_pred = ps_proc->pu1_pred_mb
+ + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
+ + 2 * ps_curr_pu->b4_pos_x;
+
+ u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1)
+ + (u4_mv_x_qpel);
+ u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1)
+ + (u4_mv_y_qpel);
+
+ ps_codec->pf_inter_pred_chroma(pu1_ref,
+ pu1_ref_buf[i4_predmode],
+ i4_ref_strd, MB_SIZE, u1_dx,
+ u1_dy, ht, wd);
+ }
+
+ ps_codec->pf_inter_pred_luma_bilinear(pu1_ref_buf[PRED_L0],
+ pu1_ref_buf[PRED_L1], pu1_pred,
+ MB_SIZE, MB_SIZE,
+ i4_pred_strd, MB_SIZE >> 1,
+ MB_SIZE);
+ }
}
}
diff --git a/encoder/ih264e_me.c b/encoder/ih264e_me.c
index 9e8d7a3..68bdea6 100644
--- a/encoder/ih264e_me.c
+++ b/encoder/ih264e_me.c
@@ -75,20 +75,20 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ih264e_defs.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_globals.h"
#include "ih264_macros.h"
#include "ih264e_me.h"
#include "ime.h"
-#include "ime_distortion_metrics.h"
#include "ih264_debug.h"
-#include "ithread.h"
#include "ih264e_intra_modes_eval.h"
#include "ih264e_core_coding.h"
#include "ih264e_mc.h"
@@ -164,6 +164,8 @@ void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
}
}
+
+
/**
*******************************************************************************
*
@@ -204,37 +206,25 @@ void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
* number of such MVs
*
* @remarks
-* Assumptions : 1. Assumes Single reference frame
-* 2. Assumes Only partition of size 16x16
+* Assumptions : 1. Assumes Only partition of size 16x16
*
*******************************************************************************
*/
static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
- me_ctxt_t *ps_me_ctxt)
+ me_ctxt_t *ps_me_ctxt,
+ WORD32 i4_reflist)
{
/* curr mb indices */
WORD32 i4_mb_x = ps_proc->i4_mb_x;
- /* left mb motion vector */
- mv_t *ps_left_mv;
-
- /* top left mb motion vector */
- mv_t *ps_top_mv;
-
- /* top left mb motion vector */
- mv_t *ps_top_left_mv;
+ /* Motion vector */
+ mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
- /* top left mb motion vector */
- mv_t *ps_top_right_mv;
-
- /* skip mv */
- mv_t *ps_skip_mv = ps_proc->ps_skip_mv;
+ /* Pred modes */
+ WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
/* mb part info */
- mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
-
- /* num of candidate search candidates */
- UWORD32 u4_num_candidates = 0;
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
/* mvs */
WORD32 mvx, mvy;
@@ -242,29 +232,36 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
/* ngbr availability */
block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+ /* Current mode */
+ WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
+
/* srch range*/
WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
- ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_l0_mv;
- ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_l0_mv;
- ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_l0_mv;
- ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_l0_mv;
+ ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
+ ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_me_info[i4_reflist].s_mv;
+ ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
+ ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_me_info[i4_reflist].s_mv;
+
+ i4_left_mode = ps_proc->s_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
+ i4_top_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x)->b2_pred_mode != i4_cmpl_predmode;
+ i4_top_left_mode = ps_proc->s_top_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
+ i4_top_right_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->b2_pred_mode != i4_cmpl_predmode;
+
+ /* num of candidate search candidates */
+ UWORD32 u4_num_candidates =0 ;
- /************************************************************/
/* Taking the Zero motion vector as one of the candidates */
- /************************************************************/
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = 0;
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = 0;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = 0;
u4_num_candidates++;
- /************************************************************/
/* Taking the Left MV Predictor as one of the candidates */
- /************************************************************/
- if (ps_ngbr_avbl->u1_mb_a)
+ if (ps_ngbr_avbl->u1_mb_a && i4_left_mode)
{
mvx = (ps_left_mv->i2_mvx + 2) >> 2;
mvy = (ps_left_mv->i2_mvy + 2) >> 2;
@@ -272,21 +269,14 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
u4_num_candidates ++;
}
- /*else
- {
- ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvx = 0;
- ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvy = 0;
- }*/
- /************************************************************/
/* Taking the Top MV Predictor as one of the candidates */
- /************************************************************/
- if (ps_ngbr_avbl->u1_mb_b)
+ if (ps_ngbr_avbl->u1_mb_b && i4_top_mode)
{
mvx = (ps_top_mv->i2_mvx + 2) >> 2;
mvy = (ps_top_mv->i2_mvy + 2) >> 2;
@@ -294,15 +284,13 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
u4_num_candidates ++;
- /************************************************************/
/* Taking the TopRt MV Predictor as one of the candidates */
- /************************************************************/
- if (ps_ngbr_avbl->u1_mb_c)
+ if (ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
{
mvx = (ps_top_right_mv->i2_mvx + 2) >> 2;
mvy = (ps_top_right_mv->i2_mvy + 2)>> 2;
@@ -310,15 +298,13 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
u4_num_candidates ++;
}
- /************************************************************/
/* Taking the TopLt MV Predictor as one of the candidates */
- /************************************************************/
- else if (ps_ngbr_avbl->u1_mb_d)
+ else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
{
mvx = (ps_top_left_mv->i2_mvx + 2) >> 2;
mvy = (ps_top_left_mv->i2_mvy + 2) >> 2;
@@ -326,84 +312,84 @@ static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
u4_num_candidates ++;
}
- /*else
- {
- ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0;
- ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0;
- }*/
}
- /*else
- {
- ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvx = 0;
- ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvy = 0;
-
- ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0;
- ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0;
- }*/
/********************************************************************/
/* MV Prediction */
/********************************************************************/
- ih264e_mv_pred_me(ps_proc);
+ ih264e_mv_pred_me(ps_proc, i4_reflist);
- ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv->i2_mvx;
- ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv->i2_mvy;
+ ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
+ ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
- /************************************************************/
/* Get the skip motion vector */
- /************************************************************/
- ih264e_find_skip_motion_vector(ps_proc, 1);
+ {
+ ps_me_ctxt->i4_skip_type = ps_proc->ps_codec->apf_find_skip_params_me
+ [ps_proc->i4_slice_type](ps_proc, i4_reflist);
- /************************************************************/
- /* Taking the Skip motion vector as one of the candidates */
- /************************************************************/
- mvx = (ps_skip_mv->i2_mvx + 2) >> 2;
- mvy = (ps_skip_mv->i2_mvy + 2) >> 2;
+ /* Taking the Skip motion vector as one of the candidates */
+ mvx = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvx + 2) >> 2;
+ mvy = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvy + 2) >> 2;
- mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
- mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
- ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
+ u4_num_candidates++;
- u4_num_candidates++;
+ if (ps_proc->i4_slice_type == BSLICE)
+ {
+ /* Taking the temporal Skip motion vector as one of the candidates */
+ mvx = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvx + 2) >> 2;
+ mvy = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvy + 2) >> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
- ASSERT(u4_num_candidates <= 5);
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
+ u4_num_candidates++;
+ }
+ }
- ps_me_ctxt->u4_num_candidates = u4_num_candidates;
+ ASSERT(u4_num_candidates <= 6);
+
+ ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
}
/**
*******************************************************************************
*
-* @brief The function gives the skip motion vector
+* @brief The function computes parameters for a PSKIP MB
*
* @par Description:
-* The function gives the skip motion vector
+* The function updates the skip motion vector and checks if the current
+* MB can be a skip PSKIP mB or not
*
-* @param[in] ps_left_mb_pu
-* pointer to left mb motion vector info
+* @param[in] ps_proc
+* Pointer to process context
*
-* @param[in] ps_top_row_pu
-* pointer to top & top right mb motion vector info
+* @param[in] u4_for_me
+* Flag to indicate function is called for ME or not
*
-* @param[out] ps_pred_mv
-* pointer to candidate predictors for the current block
+* @param[out] i4_ref_list
+* Current active refernce list
*
-* @returns The x & y components of the MV predictor.
+* @returns Flag indicating if the current MB can be marked as skip
*
-* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
* specification.
*
*******************************************************************************
*/
-void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me)
+WORD32 ih264e_find_pskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
{
/* left mb motion vector */
enc_pu_t *ps_left_mb_pu ;
@@ -411,35 +397,116 @@ void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me)
/* top mb motion vector */
enc_pu_t *ps_top_mb_pu ;
- /* skip mv */
- mv_t *ps_skip_mv = ps_proc->ps_skip_mv;
+ /* Skip mv */
+ mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
+
+ UNUSED(i4_reflist);
+
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu ;
+ ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
+
+ if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
+ (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
+ (
+ (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
+ (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
+ (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
+ ) ||
+ (
+ (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
+ (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
+ (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
+ )
+ )
- if (u4_for_me == 1)
{
- ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
- ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
+ ps_skip_mv->i2_mvx = 0;
+ ps_skip_mv->i2_mvy = 0;
}
else
{
- ps_left_mb_pu = &ps_proc->s_left_mb_pu ;
- ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
+ ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
+ ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
+ }
+
+ if ( (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx)
+ && (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
+ {
+ return 1;
}
- if ( (!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
- (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
- ((ps_left_mb_pu->i1_l0_ref_idx | ps_left_mb_pu->s_l0_mv.i2_mvx | ps_left_mb_pu->s_l0_mv.i2_mvy) == 0) ||
- ((ps_top_mb_pu->i1_l0_ref_idx | ps_top_mb_pu->s_l0_mv.i2_mvx | ps_top_mb_pu->s_l0_mv.i2_mvy) == 0) )
+ return 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief The function computes parameters for a PSKIP MB
+*
+* @par Description:
+* The function updates the skip motion vector and checks if the current
+* MB can be a skip PSKIP mB or not
+*
+* @param[in] ps_proc
+* Pointer to process context
+*
+* @param[in] u4_for_me
+* Flag to dincate fucntion is called for ME or not
+*
+* @param[out] i4_ref_list
+* Current active refernce list
+*
+* @returns Flag indicating if the current MB can be marked as skip
+*
+* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
+* specification.
+*
+*******************************************************************************
+*/
+WORD32 ih264e_find_pskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
+{
+ /* left mb motion vector */
+ enc_pu_t *ps_left_mb_pu ;
+
+ /* top mb motion vector */
+ enc_pu_t *ps_top_mb_pu ;
+
+ /* Skip mv */
+ mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
+
+ UNUSED(i4_reflist);
+
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+ ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
+
+ if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
+ (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
+ (
+ (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
+ (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
+ (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
+ ) ||
+ (
+ (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
+ (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
+ (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
+ )
+ )
+
{
ps_skip_mv->i2_mvx = 0;
ps_skip_mv->i2_mvy = 0;
}
else
{
- ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv->i2_mvx;
- ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv->i2_mvy;
+ ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
+ ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
}
+
+ return PRED_L0;
}
+
/**
*******************************************************************************
*
@@ -469,61 +536,64 @@ void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me)
*/
void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
enc_pu_t *ps_top_row_pu,
- mv_t *ps_pred_mv)
+ enc_pu_mv_t *ps_pred_mv,
+ WORD32 i4_ref_list)
{
- /* curr frame ref idx */
- /* we are assuming that we are operating on single reference frame
- * hence the ref idx is insignificant during mv prediction.
- */
- WORD32 u4_ref_idx = 0;
- /* temp var */
- WORD32 pred_algo = 3, a, b, c;
-
- /* If only one of the candidate blocks has a reference frame equal to
- * the current block then use the same block as the final predictor */
- a = (ps_left_mb_pu->i1_l0_ref_idx == u4_ref_idx)? 0:-1;
- b = (ps_top_row_pu[0].i1_l0_ref_idx == u4_ref_idx)? 0:-1;
- c = (ps_top_row_pu[1].i1_l0_ref_idx == u4_ref_idx)? 0:-1;
-
- if (a == 0 && b == -1 && c == -1)
- pred_algo = 0; /* LEFT */
- else if (a == -1 && b == 0 && c == -1)
- pred_algo = 1; /* TOP */
- else if (a == -1 && b == -1 && c == 0)
- pred_algo = 2; /* TOP RIGHT */
-
- switch (pred_algo)
- {
- case 0:
- /* left */
- ps_pred_mv->i2_mvx = ps_left_mb_pu->s_l0_mv.i2_mvx;
- ps_pred_mv->i2_mvy = ps_left_mb_pu->s_l0_mv.i2_mvy;
- break;
- case 1:
- /* top */
- ps_pred_mv->i2_mvx = ps_top_row_pu[0].s_l0_mv.i2_mvx;
- ps_pred_mv->i2_mvy = ps_top_row_pu[0].s_l0_mv.i2_mvy;
- break;
- case 2:
- /* top right */
- ps_pred_mv->i2_mvx = ps_top_row_pu[1].s_l0_mv.i2_mvx;
- ps_pred_mv->i2_mvy = ps_top_row_pu[1].s_l0_mv.i2_mvy;
- break;
- case 3:
- /* median */
- MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvx,
- ps_top_row_pu[0].s_l0_mv.i2_mvx,
- ps_top_row_pu[1].s_l0_mv.i2_mvx,
- ps_pred_mv->i2_mvx);
- MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvy,
- ps_top_row_pu[0].s_l0_mv.i2_mvy,
- ps_top_row_pu[1].s_l0_mv.i2_mvy,
- ps_pred_mv->i2_mvy);
+ /* Indicated the current ref */
+ WORD8 i1_ref_idx;
- break;
- default:
- break;
+ /* For pred L0 */
+ i1_ref_idx = -1;
+ {
+ /* temp var */
+ WORD32 pred_algo = 3, a, b, c;
+
+ /* If only one of the candidate blocks has a reference frame equal to
+ * the current block then use the same block as the final predictor */
+ a = (ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
+ b = (ps_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
+ c = (ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
+
+ if (a == 0 && b == -1 && c == -1)
+ pred_algo = 0; /* LEFT */
+ else if(a == -1 && b == 0 && c == -1)
+ pred_algo = 1; /* TOP */
+ else if(a == -1 && b == -1 && c == 0)
+ pred_algo = 2; /* TOP RIGHT */
+
+ switch (pred_algo)
+ {
+ case 0:
+ /* left */
+ ps_pred_mv->s_mv.i2_mvx = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx;
+ ps_pred_mv->s_mv.i2_mvy = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy;
+ break;
+ case 1:
+ /* top */
+ ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx;
+ ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy;
+ break;
+ case 2:
+ /* top right */
+ ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx;
+ ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy;
+ break;
+ case 3:
+ /* median */
+ MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx,
+ ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx,
+ ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx,
+ ps_pred_mv->s_mv.i2_mvx);
+ MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy,
+ ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy,
+ ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy,
+ ps_pred_mv->s_mv.i2_mvy);
+
+ break;
+ default:
+ break;
+ }
}
}
@@ -545,31 +615,34 @@ void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
*
*******************************************************************************
*/
-void ih264e_mv_pred(process_ctxt_t *ps_proc)
+void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_slice_type)
{
/* left mb motion vector */
- enc_pu_t *ps_left_mb_pu ;
+ enc_pu_t *ps_left_mb_pu;
/* top left mb motion vector */
- enc_pu_t *ps_top_left_mb_pu ;
+ enc_pu_t *ps_top_left_mb_pu;
/* top row motion vector info */
enc_pu_t *ps_top_row_pu;
/* predicted motion vector */
- mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
+ enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
/* zero mv */
- mv_t zero_mv = {0, 0};
+ mv_t zero_mv = { 0, 0 };
/* mb neighbor availability */
block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
/* mb syntax elements of neighbors */
- mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
- mb_info_t *ps_top_left_syn;
- UWORD32 u4_left_is_intra;
+ mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+ mb_info_t *ps_top_left_syn;
+ UWORD32 u4_left_is_intra;
+
+ /* Temp var */
+ WORD32 i4_reflist, max_reflist, i4_cmpl_predmode;
ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
@@ -577,44 +650,58 @@ void ih264e_mv_pred(process_ctxt_t *ps_proc)
ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
- /* Before performing mv prediction prepare the ngbr information and
- * reset motion vectors basing on their availability */
- if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1) )
- {
- /* left mv */
- ps_left_mb_pu->i1_l0_ref_idx = -1;
- ps_left_mb_pu->s_l0_mv = zero_mv;
- }
- if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra)
- {
- /* top mv */
- ps_top_row_pu[0].i1_l0_ref_idx = -1;
- ps_top_row_pu[0].s_l0_mv = zero_mv;
- }
- if (!ps_ngbr_avbl->u1_mb_c)
+ /* Number of ref lists to process */
+ max_reflist = (i4_slice_type == PSLICE) ? 1 : 2;
+
+ for (i4_reflist = 0; i4_reflist < max_reflist; i4_reflist++)
{
- /* top right mv - When top right partition is not available for
- * prediction if top left is available use it for prediction else
- * set the mv information to -1 and (0, 0)
- * */
- if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra)
+ i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
+
+ /* Before performing mv prediction prepare the ngbr information and
+ * reset motion vectors basing on their availability */
+ if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1)
+ || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
{
- ps_top_row_pu[1].i1_l0_ref_idx = -1;
- ps_top_row_pu[1].s_l0_mv = zero_mv;
+ /* left mv */
+ ps_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx = 0;
+ ps_left_mb_pu->s_me_info[i4_reflist].s_mv = zero_mv;
}
- else
+ if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra
+ || (ps_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
{
- ps_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx;
- ps_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv;
+ /* top mv */
+ ps_top_row_pu[0].s_me_info[i4_reflist].i1_ref_idx = 0;
+ ps_top_row_pu[0].s_me_info[i4_reflist].s_mv = zero_mv;
}
- }
- else if (ps_top_syn[1].u2_is_intra)
- {
- ps_top_row_pu[1].i1_l0_ref_idx = -1;
- ps_top_row_pu[1].s_l0_mv = zero_mv;
+
+ if (!ps_ngbr_avbl->u1_mb_c)
+ {
+ /* top right mv - When top right partition is not available for
+ * prediction if top left is available use it for prediction else
+ * set the mv information to -1 and (0, 0)
+ * */
+ if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra
+ || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
+ {
+ ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
+ ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
+ }
+ else
+ {
+ ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = ps_top_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx;
+ ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = ps_top_left_mb_pu->s_me_info[i4_reflist].s_mv;
+ }
+ }
+ else if(ps_top_syn[1].u2_is_intra
+ || (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode))
+ {
+ ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
+ ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
+ }
+
+ ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, &ps_pred_mv[i4_reflist], i4_reflist);
}
- ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, ps_pred_mv);
}
/**
@@ -635,7 +722,7 @@ void ih264e_mv_pred(process_ctxt_t *ps_proc)
*
*******************************************************************************
*/
-void ih264e_mv_pred_me(process_ctxt_t *ps_proc)
+void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list)
{
/* left mb motion vector */
enc_pu_t *ps_left_mb_pu ;
@@ -649,11 +736,14 @@ void ih264e_mv_pred_me(process_ctxt_t *ps_proc)
enc_pu_t s_top_row_pu[2];
/* predicted motion vector */
- mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
+ enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
/* zero mv */
mv_t zero_mv = {0, 0};
+ /* Complementary pred mode */
+ WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? PRED_L1 : PRED_L0;
+
/* mb neighbor availability */
block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
@@ -664,19 +754,23 @@ void ih264e_mv_pred_me(process_ctxt_t *ps_proc)
s_top_row_pu[0] = ps_top_row_pu[0];
s_top_row_pu[1] = ps_top_row_pu[1];
- /* Before performing mv prediction prepare the ngbr information and
- * reset motion vectors basing on their availability */
- if (!ps_ngbr_avbl->u1_mb_a )
+ /*
+ * Before performing mv prediction prepare the ngbr information and
+ * reset motion vectors basing on their availability
+ */
+
+ if (!ps_ngbr_avbl->u1_mb_a || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
{
/* left mv */
- ps_left_mb_pu->i1_l0_ref_idx = -1;
- ps_left_mb_pu->s_l0_mv = zero_mv;
+ ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx = 0;
+ ps_left_mb_pu->s_me_info[i4_ref_list].s_mv = zero_mv;
}
- if (!ps_ngbr_avbl->u1_mb_b )
+ if (!ps_ngbr_avbl->u1_mb_b || (s_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
{
/* top mv */
- s_top_row_pu[0].i1_l0_ref_idx = -1;
- s_top_row_pu[0].s_l0_mv = zero_mv;
+ s_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx = 0;
+ s_top_row_pu[0].s_me_info[i4_ref_list].s_mv = zero_mv;
+
}
if (!ps_ngbr_avbl->u1_mb_c)
{
@@ -684,19 +778,28 @@ void ih264e_mv_pred_me(process_ctxt_t *ps_proc)
* prediction if top left is available use it for prediction else
* set the mv information to -1 and (0, 0)
* */
- if (!ps_ngbr_avbl->u1_mb_d)
+ if (!ps_ngbr_avbl->u1_mb_d || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
{
- s_top_row_pu[1].i1_l0_ref_idx = -1;
- s_top_row_pu[1].s_l0_mv = zero_mv;
+ s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
+ s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
+
+ s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
+ s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
}
else
{
- s_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx;
- s_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv;
+ s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = ps_top_left_mb_pu->s_me_info[0].i1_ref_idx;
+ s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = ps_top_left_mb_pu->s_me_info[0].s_mv;
}
}
+ else if (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode)
+ {
+ ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
+ ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
+ }
- ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]), ps_pred_mv);
+ ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]),
+ &ps_pred_mv[i4_ref_list], i4_ref_list);
}
/**
@@ -722,20 +825,40 @@ void ih264e_init_me(process_ctxt_t *ps_proc)
/* me ctxt */
me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
+
+ if (ps_codec->s_cfg.u4_num_bframes == 0)
+ {
+ ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
+ }
+ else
+ {
+ ps_me_ctxt->i4_skip_bias[PSLICE] = SKIP_BIAS_P;
+ }
+
/* src ptr */
ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
+ /* src stride */
+ ps_me_ctxt->i4_src_strd = ps_proc->i4_src_strd;
- /* ref ptr */
- ps_me_ctxt->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma;
+ /* ref ptrs and corresponding lagrange params */
+ ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma[0];
+ ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma[1];
- /* lagrange param */
ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
+
+
}
+
/**
*******************************************************************************
*
-* @brief This function performs motion estimation for the current mb
+* @brief This function performs motion estimation for the current mb using
+* single reference list
*
* @par Description:
* The current mb is compared with a list of mb's in the reference frame for
@@ -753,7 +876,7 @@ void ih264e_init_me(process_ctxt_t *ps_proc)
*
*******************************************************************************
*/
-void ih264e_compute_me(process_ctxt_t *ps_proc)
+void ih264e_compute_me_single_reflist(process_ctxt_t *ps_proc)
{
/* me ctxt */
me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
@@ -761,20 +884,6 @@ void ih264e_compute_me(process_ctxt_t *ps_proc)
/* codec context */
codec_t *ps_codec = ps_proc->ps_codec;
-// /* mb syntax elements of neighbors */
-// mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
-// mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
-
- /* mb part info */
- mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
- mb_part_ctxt skip_mb_part_info;
-
- /* temp var */
- WORD32 rows_above, rows_below, columns_left, columns_right,u4_use_stat_sad;
-
- /* Motion vectors in full-pel units */
- WORD16 mv_x, mv_y;
-
/* recon stride */
WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
@@ -787,118 +896,104 @@ void ih264e_compute_me(process_ctxt_t *ps_proc)
/* Sad therholds */
ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
- /*Best half pel buffer*/
- UWORD8 *pu1_best_subpel_buf = ps_proc->pu1_best_subpel_buf;
- UWORD32 u4_bst_spel_strd = ps_proc->u4_bst_spel_buf_strd;
-
- /* During evaluation for motion vectors do not search through padded regions */
- /* Obtain number of rows and columns that are effective for computing for me evaluation */
- rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
- rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
- columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
- columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
+ /* Mb part ctxts for SKIP */
+ mb_part_ctxt s_skip_mbpart;
- /* init srch range */
- /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
- * on all sides.
- */
-// ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, ps_me_ctxt->ai2_srch_boundaries[0]);
-// ps_me_ctxt->i4_srch_range_e = MIN(columns_right, ps_me_ctxt->ai2_srch_boundaries[0]);
-// ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, ps_me_ctxt->ai2_srch_boundaries[1]);
-// ps_me_ctxt->i4_srch_range_s = MIN(rows_below, ps_me_ctxt->ai2_srch_boundaries[1]);
-
- ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
- ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
- ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
- ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
-
- /* this is to facilitate fast sub pel computation with minimal loads */
- if (ps_me_ctxt->u4_enable_hpel)
{
+ WORD32 rows_above, rows_below, columns_left, columns_right;
+
+ /* During evaluation for motion vectors do not search through padded regions */
+ /* Obtain number of rows and columns that are effective for computing for me evaluation */
+ rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
+ rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
+ columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
+ columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
+
+ /* init srch range */
+ /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
+ * on all sides.
+ */
+ ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+ ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+ ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+ ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+
+ /* this is to facilitate fast sub pel computation with minimal loads */
ps_me_ctxt->i4_srch_range_w += 1;
ps_me_ctxt->i4_srch_range_e -= 1;
ps_me_ctxt->i4_srch_range_n += 1;
ps_me_ctxt->i4_srch_range_s -= 1;
}
- /*Initialize the min sad option*/
- ps_me_ctxt->u4_min_sad_reached = 0; /*Not yet found min sad*/
- ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
+ /* Compute ME and store the MVs */
- /************************************************************/
- /* Get the seed motion vector candidates */
- /************************************************************/
- ih264e_get_search_candidates(ps_proc, ps_me_ctxt);
-
- /************************************************************/
- /* Init the MB part ctxt structure */
- /************************************************************/
- ps_mb_part->s_mv_curr.i2_mvx = 0;
- ps_mb_part->s_mv_curr.i2_mvy = 0;
- ps_mb_part->i4_mb_cost = INT_MAX;
- ps_mb_part->i4_mb_distortion = INT_MAX;
-
- /* With NMB changes this logic will not work as we cannot exit NME in between*/
- /********************************************************************/
- /* Analyse skip */
- /********************************************************************/
-// if (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 0
-// && u4_frame_level_me == 0)
-// {
-// if ( (ps_proc->ps_ngbr_avbl->u1_mb_a && (ps_me_ctxt->u4_left_is_skip == 1)) ||
-// (ps_proc->ps_ngbr_avbl->u1_mb_b && ps_top_syn->u2_mb_type == PSKIP) ||
-// (ps_proc->ps_ngbr_avbl->u1_mb_d && ps_top_left_syn->u2_mb_type == PSKIP) )
-// {
-// if ( 0 == ih264e_analyse_skip(ps_proc, ps_me_ctxt) )
-// {
-// return;
-// }
-// }
-// }
-
- /********************************************************************/
- /* compute skip cost */
- /********************************************************************/
- /* See if we need to use modified sad */
- u4_use_stat_sad = (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 1);
-
- /* init the cost of skip MB */
- skip_mb_part_info.i4_mb_cost = INT_MAX;
- ime_compute_skip_cost(ps_me_ctxt, ps_proc->ps_skip_mv, &skip_mb_part_info, u4_use_stat_sad);
+ /***********************************************************************
+ * Compute ME for list L0
+ ***********************************************************************/
+ /* Init SATQD for the current list */
+ ps_me_ctxt->u4_min_sad_reached = 0;
+ ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
- if (ps_me_ctxt->u4_min_sad_reached == 0)
+ /* Get the seed motion vector candidates */
+ ih264e_get_search_candidates(ps_proc, ps_me_ctxt, PRED_L0);
+
+ /* ****************************************************************
+ *Evaluate the SKIP for current list
+ * ****************************************************************/
+ s_skip_mbpart.s_mv_curr.i2_mvx = 0;
+ s_skip_mbpart.s_mv_curr.i2_mvy = 0;
+ s_skip_mbpart.i4_mb_cost = INT_MAX;
+ s_skip_mbpart.i4_mb_distortion = INT_MAX;
+
+ ime_compute_skip_cost( ps_me_ctxt,
+ (ime_mv_t *)(&ps_proc->ps_skip_mv[PRED_L0].s_mv),
+ &s_skip_mbpart,
+ ps_proc->ps_codec->s_cfg.u4_enable_satqd,
+ PRED_L0,
+ 0 /* Not a Bslice */ );
+
+ s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
+ s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
+
+ /******************************************************************
+ * Evaluate ME For current list
+ *****************************************************************/
+ ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx = 0;
+ ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy = 0;
+ ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = INT_MAX;
+ ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = INT_MAX;
+
+ /* Init Hpel */
+ ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf = NULL;
+
+ /* In case we found out the minimum SAD, exit the ME eval */
+ if (!ps_me_ctxt->u4_min_sad_reached)
{
- /************************************************************/
- /* Evaluate search candidates for initial mv pt. */
- /************************************************************/
- ime_evaluate_init_srchposn_16x16(ps_me_ctxt);
+ /* Evaluate search candidates for initial mv pt */
+ ime_evaluate_init_srchposn_16x16(ps_me_ctxt, PRED_L0);
/********************************************************************/
/* full pel motion estimation */
/********************************************************************/
- ime_full_pel_motion_estimation_16x16(ps_me_ctxt);
+ ime_full_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
- DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
- (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
+ /* Scale the MV to qpel resolution */
+ ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx <<= 2;
+ ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy <<= 2;
- DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
- /********************************************************************/
- /* sub pel motion estimation */
- /********************************************************************/
if (ps_me_ctxt->u4_enable_hpel)
{
- /* motion vectors in terms of full pel values */
- mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
- mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
-
/* moving src pointer to the converged motion vector location*/
- pu1_hpel_src = ps_me_ctxt->pu1_ref_buf_luma + mv_x + (mv_y * i4_rec_strd);
+ pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0]
+ + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx >> 2)
+ + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy >> 2)* i4_rec_strd;
- ps_me_ctxt->pu1_half_x = ps_proc->pu1_half_x;
- ps_me_ctxt->pu1_half_y = ps_proc->pu1_half_y;
- ps_me_ctxt->pu1_half_xy = ps_proc->pu1_half_xy;
- ps_me_ctxt->u4_hp_buf_strd = HP_BUFF_WD;
+ ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
+ ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
+ ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
+
+ ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
/* half pel search is done for both sides of full pel,
* hence half_x of width x height = 17x16 is created
@@ -907,9 +1002,9 @@ void ih264e_compute_me(process_ctxt_t *ps_proc)
/* computing half_x */
ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
- ps_proc->pu1_half_x,
+ ps_me_ctxt->apu1_subpel_buffs[0],
i4_rec_strd,
- ps_me_ctxt->u4_hp_buf_strd);
+ ps_me_ctxt->u4_subpel_buf_strd);
/*
* Halfpel search is done for both sides of full pel,
@@ -918,61 +1013,57 @@ void ih264e_compute_me(process_ctxt_t *ps_proc)
* for half_xy top_left is required
* hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
*/
-
pu1_hpel_src -= i4_rec_strd;
/* computing half_y , and half_xy*/
ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
- pu1_hpel_src, ps_proc->pu1_half_y,
- ps_proc->pu1_half_xy, i4_rec_strd,
- ps_me_ctxt->u4_hp_buf_strd, ps_proc->ai16_pred1 + 3,
- ps_me_ctxt->u4_hp_buf_strd);
+ pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
+ ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
+ ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
+ ps_me_ctxt->u4_subpel_buf_strd);
- ime_sub_pel_motion_estimation_16x16(ps_me_ctxt);
+ ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
}
}
- {
- /* if skip gives a better cost than other search, copy the cost accordingly*/
- if (skip_mb_part_info.i4_mb_cost < ps_mb_part->i4_mb_cost)
- {
- ps_mb_part->i4_mb_cost = skip_mb_part_info.i4_mb_cost;
- ps_mb_part->i4_mb_distortion = skip_mb_part_info.i4_mb_distortion;
- ps_mb_part->s_mv_curr.i2_mvx = skip_mb_part_info.s_mv_curr.i2_mvx;
- ps_mb_part->s_mv_curr.i2_mvy = skip_mb_part_info.s_mv_curr.i2_mvy;
- }
- else
- {
- /*
- * If the current MB has a sub pel component,
- * we need to copy that to the best subpel buffer
- */
- if (ps_me_ctxt->u4_enable_hpel && ps_mb_part->pu1_best_hpel_buf)
- {
- ps_codec->pf_inter_pred_luma_copy(ps_mb_part->pu1_best_hpel_buf,
- pu1_best_subpel_buf,
- ps_me_ctxt->u4_hp_buf_strd,
- u4_bst_spel_strd, MB_SIZE,
- MB_SIZE, NULL, 0);
- }
- }
+ /***********************************************************************
+ * If a particular skiip Mv is giving better sad, copy to the corresponding
+ * MBPART
+ * In B slices this loop should go only to PREDL1: If we found min sad
+ * we will go to the skip ref list only
+ * Have to find a way to make it without too much change or new vars
+ **********************************************************************/
+ if (s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost)
+ {
+ ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
+ ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
+ ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
}
-
- DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 0);
-
- /* update the type of the mb if necessary */
- if (ps_me_ctxt->s_mb_part.i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
+ else if (ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf)
{
- /* mb cost */
- ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->s_mb_part.i4_mb_cost;
+ /* Now we have to copy the buffers */
+ ps_codec->pf_inter_pred_luma_copy(
+ ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf,
+ ps_proc->pu1_best_subpel_buf,
+ ps_me_ctxt->u4_subpel_buf_strd,
+ ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
+ NULL, 0);
+ }
- /* mb distortion */
- ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->s_mb_part.i4_mb_distortion;
+ /**********************************************************************
+ * Now get the minimum of MB part sads by searching over all ref lists
+ **********************************************************************/
+ ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx;
+ ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy;
+ ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost;
+ ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion;
+ ps_proc->ps_cur_mb->u4_mb_type = P16x16;
+ ps_proc->ps_pu->b2_pred_mode = PRED_L0 ;
- /* mb type */
- ps_proc->ps_cur_mb->u4_mb_type = P16x16;
- }
+ /* Mark the reflists */
+ ps_proc->ps_pu->s_me_info[0].i1_ref_idx = -1;
+ ps_proc->ps_pu->s_me_info[1].i1_ref_idx = 0;
/* number of partitions */
ps_proc->u4_num_sub_partitions = 1;
@@ -986,19 +1077,13 @@ void ih264e_compute_me(process_ctxt_t *ps_proc)
ps_proc->ps_pu->b4_wd = 3;
ps_proc->ps_pu->b4_ht = 3;
- /* ref idx */
- ps_proc->ps_pu->i1_l0_ref_idx = 0;
-
- /* motion vector L0 */
- ps_proc->ps_pu->s_l0_mv.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx;
- ps_proc->ps_pu->s_l0_mv.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy;
-
/* Update min sad conditions */
if (ps_me_ctxt->u4_min_sad_reached == 1)
{
ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
}
+
}
/**
@@ -1054,9 +1139,9 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
}
}
- ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].s_skip_mv);
+ ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
- ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].s_pred_mv);
+ ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
@@ -1080,7 +1165,8 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
/* init me */
ih264e_init_me(ps_proc);
- ih264e_compute_me(ps_proc);
+ /* Compute ME according to slice type */
+ ps_proc->ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
/* update top and left structs */
{
@@ -1119,7 +1205,8 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
/* update buffers pointers */
ps_proc->pu1_src_buf_luma += MB_SIZE;
ps_proc->pu1_rec_buf_luma += MB_SIZE;
- ps_proc->pu1_ref_buf_luma += MB_SIZE;
+ ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
+ ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
/*
* Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
@@ -1127,7 +1214,9 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
*/
ps_proc->pu1_src_buf_chroma += MB_SIZE;
ps_proc->pu1_rec_buf_chroma += MB_SIZE;
- ps_proc->pu1_ref_buf_chroma += MB_SIZE;
+ ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
+ ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
+
ps_proc->pu4_mb_pu_cnt += 1;
}
@@ -1139,7 +1228,8 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
/* update buffers pointers */
ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
- ps_proc->pu1_ref_buf_luma -= MB_SIZE * u4_nmb_count;
+ ps_proc->apu1_ref_buf_luma[0] -= MB_SIZE * u4_nmb_count;
+ ps_proc->apu1_ref_buf_luma[1] -= MB_SIZE * u4_nmb_count;
/*
* Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
@@ -1147,7 +1237,892 @@ void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
*/
ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
- ps_proc->pu1_ref_buf_chroma -= MB_SIZE * u4_nmb_count;
+ ps_proc->apu1_ref_buf_chroma[0] -= MB_SIZE * u4_nmb_count;
+ ps_proc->apu1_ref_buf_chroma[1] -= MB_SIZE * u4_nmb_count;
+
ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
}
+
+
+/**
+*******************************************************************************
+*
+* @brief The function computes parameters for a BSKIP MB
+*
+* @par Description:
+* The function updates the skip motion vector for B Mb, check if the Mb can be
+* marked as skip and returns it
+*
+* @param[in] ps_proc
+* Pointer to process context
+*
+* @param[in] u4_for_me
+* Dummy
+*
+* @param[in] i4_reflist
+* Dummy
+*
+* @returns Flag indicating if the current Mb can be skip or not
+*
+* @remarks
+* The code implements the logic as described in sec 8.4.1.2.2
+* It also computes co-located MB parmas according to sec 8.4.1.2.1
+*
+* Need to add condition for this fucntion to be used in ME
+*
+*******************************************************************************/
+WORD32 ih264e_find_bskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
+{
+ /* Colzero for co-located MB */
+ WORD32 i4_colzeroflag;
+
+ /* motion vectors for neighbouring MBs */
+ enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
+
+ /* Variables to check if a particular mB is available */
+ WORD32 i4_a, i4_b, i4_c, i4_c_avail;;
+
+ /* Mode availability, init to no modes available */
+ WORD32 i4_mode_avail;
+
+ /* mb neighbor availability */
+ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ /* Temp var */
+ WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
+
+ /*
+ * Colocated motion vector
+ */
+ mv_t s_mvcol;
+
+ /*
+ * Colocated picture idx
+ */
+ WORD32 i4_refidxcol;
+
+ UNUSED(i4_reflist);
+
+ /**************************************************************************
+ *Find co-located MB parameters
+ * See sec 8.4.1.2.1 for reference
+ **************************************************************************/
+ {
+ /*
+ * Find the co-located Mb and update the skip and pred appropriately
+ * 1) Default colpic is forward ref : Table 8-6
+ * 2) Default mb col is current MB : Table 8-8
+ */
+
+ if (ps_proc->ps_colpu->b1_intra_flag)
+ {
+ s_mvcol.i2_mvx = 0;
+ s_mvcol.i2_mvy = 0;
+ i4_refidxcol = -1;
+ }
+ else
+ {
+ if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
+ {
+ s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
+ i4_refidxcol = 0;
+ }
+ else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
+ {
+ s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
+ i4_refidxcol = 0;
+ }
+ }
+
+ /* RefPicList1[ 0 ] is marked as "used for short-term reference", as default */
+ i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
+ && (ABS(s_mvcol.i2_mvy) <= 1));
+
+ }
+
+ /***************************************************************************
+ * Evaluating skip params : Spatial Skip
+ **************************************************************************/
+ {
+ /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
+ ps_a_pu = &ps_proc->s_left_mb_pu_ME;
+ ps_b_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
+
+ i4_c_avail = 0;
+ if (ps_ngbr_avbl->u1_mb_c)
+ {
+ ps_c_pu = &((ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x)[1]);
+ i4_c_avail = 1;
+ }
+ else
+ {
+ ps_c_pu = &ps_proc->s_top_left_mb_pu_ME;
+ i4_c_avail = ps_ngbr_avbl->u1_mb_d;
+ }
+
+ i4_a = ps_ngbr_avbl->u1_mb_a;
+ i4_b = ps_ngbr_avbl->u1_mb_b;
+ i4_c = i4_c_avail;
+
+ /* Init to no mode avail */
+ i4_mode_avail = 0;
+ for (i = 0; i < 2; i++)
+ {
+ i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
+
+ i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
+ i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
+ i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
+ }
+
+ if (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
+ {
+ i4_skip_type= PRED_BI;
+ }
+ else if(i4_mode_avail == 0x1)
+ {
+ i4_skip_type = PRED_L0;
+ }
+ else if(i4_mode_avail == 0x2)
+ {
+ i4_skip_type = PRED_L1;
+ }
+
+ /* Update skip MV for L0 */
+ if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
+ {
+ ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
+ ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
+ }
+ else
+ {
+ ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
+ ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
+ }
+
+ /* Update skip MV for L1 */
+ if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
+ {
+ ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
+ ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
+ }
+ else
+ {
+ ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
+ ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
+ }
+
+ }
+
+ /***************************************************************************
+ * Evaluating skip params : Temporal skip
+ **************************************************************************/
+ {
+ pic_buf_t * ps_ref_pic[MAX_REF_PIC_CNT];
+ WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
+ enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
+
+ ps_ref_pic[PRED_L0] = ps_proc->aps_ref_pic[PRED_L0];
+ ps_ref_pic[PRED_L1] = ps_proc->aps_ref_pic[PRED_L1];
+
+ i4_tb = ps_proc->ps_codec->i4_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
+ i4_td = ps_ref_pic[PRED_L1]->i4_abs_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
+
+ i4_tb = CLIP3(-128, 127, i4_tb);
+ i4_td = CLIP3(-128, 127, i4_td);
+
+ i4_tx = ( 16384 + ABS( i4_td / 2 ) ) / i4_td ;
+ i4_dist_scale_factor = CLIP3( -1024, 1023, ( i4_tb * i4_tx + 32 ) >> 6 );
+
+ /* Motion vectors taken in full pel resolution , hence -> (& 0xfffc) operation */
+ ps_skip_mv[PRED_L0].s_mv.i2_mvx = (( i4_dist_scale_factor * s_mvcol.i2_mvx + 128 ) >> 8) & 0xfffc;
+ ps_skip_mv[PRED_L0].s_mv.i2_mvy = (( i4_dist_scale_factor * s_mvcol.i2_mvy + 128 ) >> 8) & 0xfffc;
+
+ ps_skip_mv[PRED_L1].s_mv.i2_mvx = (ps_skip_mv[PRED_L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
+ ps_skip_mv[PRED_L1].s_mv.i2_mvy = (ps_skip_mv[PRED_L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
+
+ }
+
+ return i4_skip_type;
+}
+
+/**
+*******************************************************************************
+*
+* @brief The function computes the skip motion vectoe for B mb
+*
+* @par Description:
+* The function gives the skip motion vector for B Mb, check if the Mb can be
+* marked as skip
+*
+* @param[in] ps_proc
+* Pointer to process context
+*
+* @param[in] u4_for_me
+* Dummy
+*
+* @param[in] u4_for_me
+* Dummy
+*
+* @returns Flag indicating if the current Mb can be skip or not
+*
+* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
+* specification. It also computes co-located MB parmas according to sec 8.4.1.2.1
+*
+*******************************************************************************/
+WORD32 ih264e_find_bskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
+{
+ WORD32 i4_colzeroflag;
+
+ /* motion vectors */
+ enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
+
+ /* Syntax elem */
+ mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
+
+ /* Variables to check if a particular mB is available */
+ WORD32 i4_a, i4_b, i4_c, i4_c_avail;
+
+ /* Mode availability, init to no modes available */
+ WORD32 i4_mode_avail;
+
+ /* mb neighbor availability */
+ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ /* Temp var */
+ WORD32 i, i4_cmpl_mode;
+
+ UNUSED(i4_reflist);
+
+ /**************************************************************************
+ *Find co-locates parameters
+ * See sec 8.4.1.2.1 for reference
+ **************************************************************************/
+ {
+ /*
+ * Find the co-located Mb and update the skip and pred appropriately
+ * 1) Default colpic is forward ref : Table 8-6
+ * 2) Default mb col is current MB : Table 8-8
+ */
+
+ mv_t s_mvcol;
+ WORD32 i4_refidxcol;
+
+ if (ps_proc->ps_colpu->b1_intra_flag)
+ {
+ s_mvcol.i2_mvx = 0;
+ s_mvcol.i2_mvy = 0;
+ i4_refidxcol = -1;
+ }
+ else
+ {
+ if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
+ {
+ s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
+ i4_refidxcol = 0;
+ }
+ else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
+ {
+ s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
+ i4_refidxcol = 0;
+ }
+ }
+
+ /* RefPicList1[ 0 ] is marked as "used for short-term reference", as default */
+ i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
+ && (ABS(s_mvcol.i2_mvy) <= 1));
+
+ }
+
+ /***************************************************************************
+ * Evaluating skip params
+ **************************************************************************/
+ /* Section 8.4.1.2.2 */
+ ps_a_syn = &ps_proc->s_left_mb_syntax_ele;
+ ps_a_pu = &ps_proc->s_left_mb_pu;
+
+ ps_b_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+ ps_b_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
+
+ i4_c_avail = 0;
+ if (ps_ngbr_avbl->u1_mb_c)
+ {
+ ps_c_syn = &((ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x)[1]);
+ ps_c_pu = &((ps_proc->ps_top_row_pu + ps_proc->i4_mb_x)[1]);
+ i4_c_avail = 1;
+ }
+ else
+ {
+ ps_c_syn = &(ps_proc->s_top_left_mb_syntax_ele);
+ ps_c_pu = &ps_proc->s_top_left_mb_pu;
+ i4_c_avail = ps_ngbr_avbl->u1_mb_d;
+ }
+
+
+ i4_a = ps_ngbr_avbl->u1_mb_a;
+ i4_a &= !ps_a_syn->u2_is_intra;
+
+ i4_b = ps_ngbr_avbl->u1_mb_b;
+ i4_b &= !ps_b_syn->u2_is_intra;
+
+ i4_c = i4_c_avail;
+ i4_c &= !ps_c_syn->u2_is_intra;
+
+ /* Init to no mode avail */
+ i4_mode_avail = 0;
+ for (i = 0; i < 2; i++)
+ {
+ i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
+
+ i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
+ i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
+ i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
+ }
+
+ /* Update skip MV for L0 */
+ if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
+ {
+ ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
+ ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
+ }
+ else
+ {
+ ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
+ ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
+ }
+
+ /* Update skip MV for L1 */
+ if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
+ {
+ ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
+ ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
+ }
+ else
+ {
+ ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
+ ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
+ }
+
+ /* Now see if the ME information matches the SKIP information */
+ switch (ps_proc->ps_pu->b2_pred_mode)
+ {
+ case PRED_BI:
+ if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
+ && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
+ && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
+ && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
+ && (i4_mode_avail == 0x3 || i4_mode_avail == 0x0))
+ {
+ return 1;
+ }
+ break;
+
+ case PRED_L0:
+ if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
+ && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
+ && (i4_mode_avail == 0x1))
+ {
+ return 1;
+ }
+ break;
+
+ case PRED_L1:
+ if ( (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
+ && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
+ && (i4_mode_avail == 0x2))
+ {
+ return 1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector among the tentative mv
+* candidates chosen.
+*
+* @par Description:
+* This function determines the position in the search window at which the motion
+* estimation should begin in order to minimise the number of search iterations.
+*
+* @param[in] ps_mb_part
+* pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] u4_lambda_motion
+* lambda motion
+*
+* @param[in] u4_fast_flag
+* enable/disable fast sad computation
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks Currently onyl 4 search candiates are supported
+*
+*******************************************************************************
+*/
+void ih264e_evaluate_bipred(me_ctxt_t *ps_me_ctxt,
+ process_ctxt_t *ps_proc,
+ mb_part_ctxt *ps_mb_ctxt_bi)
+{
+
+ UWORD32 i, u4_fast_sad;
+
+ WORD32 i4_dest_buff;
+
+ mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
+
+ UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
+
+ UWORD8 *pu1_dst_buf;
+
+ WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
+
+ WORD32 i4_mb_distortion, i4_mb_cost;
+
+ u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+ i4_dest_buff = 0;
+ for (i = 0; i < ps_me_ctxt->u4_num_candidates[PRED_BI]; i += 2)
+ {
+ pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
+
+ s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx >> 2;
+ s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy >> 2;
+ s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx >> 2;
+ s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy >> 2;
+
+ ps_l0_pred_mv = &ps_proc->ps_pred_mv[PRED_L0].s_mv;
+ ps_l1_pred_mv = &ps_proc->ps_pred_mv[PRED_L1].s_mv;
+
+ if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx & 0x3)||
+ (ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy & 0x3))
+ {
+ pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf;
+ i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
+ }
+ else
+ {
+ pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + (s_l0_mv.i2_mvx) + ((s_l0_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
+ i4_ref_l0_stride = ps_me_ctxt->i4_rec_strd;
+ }
+
+
+ if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx & 0x3) ||
+ (ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy & 0x3))
+ {
+ pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[PRED_L1].pu1_best_hpel_buf;
+ i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
+ }
+ else
+ {
+ pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L1] + (s_l1_mv.i2_mvx) + ((s_l1_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
+ i4_ref_l1_stride = ps_me_ctxt->i4_rec_strd;
+ }
+
+ ps_proc->ps_codec->pf_inter_pred_luma_bilinear(
+ pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf,
+ i4_ref_l0_stride, i4_ref_l1_stride,
+ ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
+
+ ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
+ ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf,
+ ps_me_ctxt->i4_src_strd, ps_me_ctxt->u4_subpel_buf_strd,
+ ps_mb_ctxt_bi->i4_mb_distortion, &i4_mb_distortion);
+
+ /* compute cost */
+ i4_mb_cost = ps_me_ctxt->pu1_mv_bits[( s_l0_mv.i2_mvy << 2 ) - ps_l0_pred_mv->i2_mvx];
+ i4_mb_cost += ps_me_ctxt->pu1_mv_bits[( s_l0_mv.i2_mvy << 2 ) - ps_l0_pred_mv->i2_mvy];
+ i4_mb_cost += ps_me_ctxt->pu1_mv_bits[( s_l1_mv.i2_mvx << 2 ) - ps_l1_pred_mv->i2_mvx];
+ i4_mb_cost += ps_me_ctxt->pu1_mv_bits[( s_l1_mv.i2_mvy << 2 ) - ps_l1_pred_mv->i2_mvy];
+
+ i4_mb_cost -= (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == PRED_BI) * (i == 0);
+
+
+ i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
+ i4_mb_cost += i4_mb_distortion;
+
+ if (i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
+ {
+ ps_mb_ctxt_bi->i4_srch_pos_idx = (i>>1);
+ ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
+ ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
+ ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
+ i4_dest_buff = (i4_dest_buff + 1) % 2;
+ }
+ }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current mb
+*
+* @par Description:
+* The current mb is compared with a list of mb's in the reference frame for
+* least cost. The mb that offers least cost is chosen as predicted mb and the
+* displacement of the predicted mb from index location of the current mb is
+* signaled as mv. The list of the mb's that are chosen in the reference frame
+* are dependent on the speed of the ME configured.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns motion vector of the pred mb, sad, cost.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me_multi_reflist(process_ctxt_t *ps_proc)
+{
+ /* me ctxt */
+ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* Temp variables for looping over ref lists */
+ WORD32 i4_reflist, i4_max_reflist;
+
+ /* recon stride */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* source buffer for halp pel generation functions */
+ UWORD8 *pu1_hpel_src;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* Sad therholds */
+ ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
+
+ /* Mb part ctxts for SKIP */
+ mb_part_ctxt as_skip_mbpart[2];
+
+ {
+ WORD32 rows_above, rows_below, columns_left, columns_right;
+
+ /* During evaluation for motion vectors do not search through padded regions */
+ /* Obtain number of rows and columns that are effective for computing for me evaluation */
+ rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
+ rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
+ columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
+ columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
+
+ /* init srch range */
+ /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
+ * on all sides.
+ */
+ ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+ ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+ ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+ ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+
+ /* this is to facilitate fast sub pel computation with minimal loads */
+ if (ps_me_ctxt->u4_enable_hpel)
+ {
+ ps_me_ctxt->i4_srch_range_w += 1;
+ ps_me_ctxt->i4_srch_range_e -= 1;
+ ps_me_ctxt->i4_srch_range_n += 1;
+ ps_me_ctxt->i4_srch_range_s -= 1;
+ }
+ }
+
+ /* Compute ME and store the MVs */
+ {
+ /***********************************************************************
+ * Compute ME for lists L0 and L1
+ * For L0 -> L0 skip + L0
+ * for L1 -> L0 skip + L0 + L1 skip + L1
+ ***********************************************************************/
+ i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? PRED_L0 : PRED_L1;
+
+ /* Init SATQD for the current list */
+ ps_me_ctxt->u4_min_sad_reached = 0;
+ ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
+
+ for (i4_reflist = PRED_L0; i4_reflist <= i4_max_reflist; i4_reflist++)
+ {
+
+ /* Get the seed motion vector candidates */
+ ih264e_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
+
+ /* ****************************************************************
+ *Evaluate the SKIP for current list
+ * ****************************************************************/
+ as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
+ as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
+ as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
+ as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
+
+ if (ps_me_ctxt->i4_skip_type == i4_reflist)
+ {
+ ime_compute_skip_cost( ps_me_ctxt,
+ (ime_mv_t *)(&ps_proc->ps_skip_mv[i4_reflist].s_mv),
+ &as_skip_mbpart[i4_reflist],
+ ps_proc->ps_codec->s_cfg.u4_enable_satqd,
+ i4_reflist,
+ (ps_proc->i4_slice_type == BSLICE) );
+ }
+
+ as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
+ as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
+
+ /******************************************************************
+ * Evaluate ME For current list
+ *****************************************************************/
+ ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
+ ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
+ ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
+ ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
+
+ /* Init Hpel */
+ ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
+
+ /* In case we found out the minimum SAD, exit the ME eval */
+ if (ps_me_ctxt->u4_min_sad_reached)
+ {
+ i4_max_reflist = i4_reflist;
+ break;
+ }
+
+
+ /* Evaluate search candidates for initial mv pt */
+ ime_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
+
+ /********************************************************************/
+ /* full pel motion estimation */
+ /********************************************************************/
+ ime_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
+
+ DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
+ (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
+
+ DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
+
+ /* Scale the MV to qpel resolution */
+ ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
+ ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
+
+ if (ps_me_ctxt->u4_enable_hpel)
+ {
+ /* moving src pointer to the converged motion vector location */
+ pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]
+ + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2)
+ + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2)* i4_rec_strd);
+
+ ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
+ ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
+ ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
+
+ /* Init the search position to an invalid number */
+ ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
+
+ /* Incase a buffer is still in use by L0, replace it with spare buff */
+ ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx] =
+ ps_proc->apu1_subpel_buffs[3];
+
+
+ ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
+
+ /* half pel search is done for both sides of full pel,
+ * hence half_x of width x height = 17x16 is created
+ * starting from left half_x of converged full pel */
+ pu1_hpel_src -= 1;
+
+ /* computing half_x */
+ ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
+ ps_me_ctxt->apu1_subpel_buffs[0],
+ i4_rec_strd,
+ ps_me_ctxt->u4_subpel_buf_strd);
+
+ /*
+ * Halfpel search is done for both sides of full pel,
+ * hence half_y of width x height = 16x17 is created
+ * starting from top half_y of converged full pel
+ * for half_xy top_left is required
+ * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
+ */
+ pu1_hpel_src -= i4_rec_strd;
+
+ /* computing half_y and half_xy */
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
+ pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
+ ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
+ ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
+ ps_me_ctxt->u4_subpel_buf_strd);
+
+ ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
+
+ }
+ }
+
+ /***********************************************************************
+ * If a particular skiip Mv is giving better sad, copy to the corresponding
+ * MBPART
+ * In B slices this loop should go only to PREDL1: If we found min sad
+ * we will go to the skip ref list only
+ * Have to find a way to make it without too much change or new vars
+ **********************************************************************/
+ for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
+ {
+ if (as_skip_mbpart[i4_reflist].i4_mb_cost < ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
+ {
+ ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = as_skip_mbpart[i4_reflist].i4_mb_cost;
+ ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = as_skip_mbpart[i4_reflist].i4_mb_distortion;
+ ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
+ }
+ }
+
+ /***********************************************************************
+ * Compute ME for BI
+ * In case of BI we do ME for two candidates
+ * 1) The best L0 and L1 Mvs
+ * 2) Skip L0 and L1 MVs
+ *
+ * TODO
+ * one of the search candidates is skip. Hence it may be duplicated
+ ***********************************************************************/
+ if (i4_max_reflist == PRED_L1 && ps_me_ctxt->u4_min_sad_reached == 0)
+ {
+ WORD32 i, j = 0;
+ WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
+ WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
+
+ /* Get the free buffers */
+ l0_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx;
+ l1_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L1].i4_srch_pos_idx;
+
+ /* Search for the two free buffers in subpel list */
+ for (i = 0; i < SUBPEL_BUFF_CNT; i++)
+ {
+ if (i != l0_srch_pos_idx && i != l1_srch_pos_idx)
+ {
+ ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
+ j++;
+ }
+ }
+ ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
+
+ /* Copy the statial SKIP MV of each list */
+ i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L0] - 2;
+ i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L1] - 2;
+ ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
+ ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
+ ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
+ ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
+
+ /* Copy the SKIP MV temporal of each list */
+ i4_l0_skip_mv_idx++;
+ i4_l1_skip_mv_idx++;
+ ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
+ ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
+ ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
+ ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
+
+ /* Copy the best MV after ME */
+ ps_me_ctxt->as_mv_init_search[PRED_BI][4] = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr;
+ ps_me_ctxt->as_mv_init_search[PRED_BI][5] = ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr;
+
+ ps_me_ctxt->u4_num_candidates[PRED_BI] = 6;
+
+ ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_cost = INT_MAX;
+ ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_distortion = INT_MAX;
+
+ ih264e_evaluate_bipred(ps_me_ctxt, ps_proc,
+ &ps_me_ctxt->as_mb_part[PRED_BI]);
+
+ i4_max_reflist = PRED_BI;
+ }
+
+ /**********************************************************************
+ * Now get the minimum of MB part sads by searching over all ref lists
+ **********************************************************************/
+ ps_proc->ps_pu->b2_pred_mode = 0x3;
+
+ for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
+ {
+ if (ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
+ {
+ ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
+ ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
+ ps_proc->ps_cur_mb->u4_mb_type = (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
+ ps_proc->ps_pu->b2_pred_mode = i4_reflist ;
+ }
+ }
+
+ /**********************************************************************
+ * In case we have a BI MB, we have to copy the buffers and set proer MV's
+ * 1)In case its BI, we need to get the best MVs given by BI and update
+ * to their corresponding MB part
+ * 2)We also need to copy the buffer in which bipred buff is populated
+ *
+ * Not that if we have
+ **********************************************************************/
+ if (ps_proc->ps_pu->b2_pred_mode == PRED_BI)
+ {
+ WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[PRED_BI].i4_srch_pos_idx;
+ UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[PRED_BI].pu1_best_hpel_buf;
+
+ ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][i4_srch_pos << 1];
+ ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][(i4_srch_pos << 1) + 1];
+
+ /* Now we have to copy the buffers */
+ ps_codec->pf_inter_pred_luma_copy(pu1_bi_buf,
+ ps_proc->pu1_best_subpel_buf,
+ ps_me_ctxt->u4_subpel_buf_strd,
+ ps_proc->u4_bst_spel_buf_strd,
+ MB_SIZE, MB_SIZE, NULL, 0);
+
+ }
+ else if (ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf)
+ {
+ /* Now we have to copy the buffers */
+ ps_codec->pf_inter_pred_luma_copy(
+ ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf,
+ ps_proc->pu1_best_subpel_buf,
+ ps_me_ctxt->u4_subpel_buf_strd,
+ ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
+ NULL, 0);
+ }
+ }
+
+ /**************************************************************************
+ *Now copy the MVs to the current PU with qpel scaling
+ ***************************************************************************/
+ ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx);
+ ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy);
+ ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvx);
+ ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvy);
+
+
+ ps_proc->ps_pu->s_me_info[0].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L1)? -1:0;
+ ps_proc->ps_pu->s_me_info[1].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L0)? -1:0;
+
+ /* number of partitions */
+ ps_proc->u4_num_sub_partitions = 1;
+ *(ps_proc->pu4_mb_pu_cnt) = 1;
+
+ /* position in-terms of PU */
+ ps_proc->ps_pu->b4_pos_x = 0;
+ ps_proc->ps_pu->b4_pos_y = 0;
+
+ /* PU size */
+ ps_proc->ps_pu->b4_wd = 3;
+ ps_proc->ps_pu->b4_ht = 3;
+
+ /* Update min sad conditions */
+ if (ps_me_ctxt->u4_min_sad_reached == 1)
+ {
+ ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
+ ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
+ }
+}
+
diff --git a/encoder/ih264e_me.h b/encoder/ih264e_me.h
index c4834a1..bd88a01 100644
--- a/encoder/ih264e_me.h
+++ b/encoder/ih264e_me.h
@@ -42,10 +42,10 @@
/*****************************************************************************/
/**
-******************************************************************************
+ ******************************************************************************
* @brief compute median of 3 elements (a, b, c) and store the output
* in to result. This is used for mv prediction
-******************************************************************************
+ ******************************************************************************
*/
#define MEDIAN(a, b, c, result) if (a > b){\
@@ -69,210 +69,285 @@
}\
}
-
-
/*****************************************************************************/
/* Extern Function Declarations */
/*****************************************************************************/
/**
-*******************************************************************************
-*
-* @brief
-* This function populates the length of the codewords for motion vectors in the
-* range (-search range, search range) in pixels
-*
-* @param[in] ps_me
-* Pointer to me ctxt
-*
-* @param[out] pu1_mv_bits
-* length of the codeword for all mv's
-*
-* @remarks The length of the code words are derived from signed exponential
-* goloumb codes.
-*
-*******************************************************************************
-*/
-void ih264e_init_mv_bits
- (
- me_ctxt_t *ps_me
- );
+ *******************************************************************************
+ *
+ * @brief
+ * This function populates the length of the codewords for motion vectors in the
+ * range (-search range, search range) in pixels
+ *
+ * @param[in] ps_me
+ * Pointer to me ctxt
+ *
+ * @param[out] pu1_mv_bits
+ * length of the codeword for all mv's
+ *
+ * @remarks The length of the code words are derived from signed exponential
+ * goloumb codes.
+ *
+ *******************************************************************************
+ */
+void ih264e_init_mv_bits(me_ctxt_t *ps_me);
/**
-*******************************************************************************
-*
-* @brief The function gives the skip motion vector
-*
-* @par Description:
-* The function gives the skip motion vector
-*
-* @param[in] ps_left_mb_pu
-* pointer to left mb motion vector info
-*
-* @param[in] ps_top_row_pu
-* pointer to top & top right mb motion vector info
-*
-* @param[out] ps_pred_mv
-* pointer to candidate predictors for the current block
-*
-* @returns The x & y components of the MV predictor.
-*
-* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
-* specification.
-*
-*******************************************************************************
+ *******************************************************************************
+ *
+ * @brief The function computes the parameters for a P skip MB
+ *
+ * @par Description:
+ * The function computes the parameters for a P skip MB
+ *
+ * @param[in] ps_proc
+ * Process context
+ *
+ * @param[in] u4_for_me
+ * Flag to indicate the purpose of computing skip
+ *
+ * @param[out] ps_pred_mv
+ * Flag to indicate the current active refernce list
+ *
+ * @returns
+ * 1) Updates skip MV in proc
+ * 2) Returns if the current MB can be coded as skip or not
+ *
+ * @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+ * specification.
+ *
+ *******************************************************************************
*/
-void ih264e_find_skip_motion_vector
- (
- process_ctxt_t *ps_proc,
- UWORD32 u4_for_me
- );
+ih264e_skip_params_ft ih264e_find_pskip_params;
/**
-*******************************************************************************
-*
-* @brief motion vector predictor
-*
-* @par Description:
-* The routine calculates the motion vector predictor for a given block,
-* given the candidate MV predictors.
-*
-* @param[in] ps_left_mb_pu
-* pointer to left mb motion vector info
-*
-* @param[in] ps_top_row_pu
-* pointer to top & top right mb motion vector info
-*
-* @param[out] ps_pred_mv
-* pointer to candidate predictors for the current block
-*
-* @returns The x & y components of the MV predictor.
-*
-* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
-* specification.
-* Assumptions : 1. Assumes Single reference frame
-* 2. Assumes Only partition of size 16x16
-*
-*******************************************************************************
+ *******************************************************************************
+ *
+ * @brief The function computes the parameters for a P skip MB
+ *
+ * @par Description:
+ * The function computes the parameters for a P skip MB
+ *
+ * @param[in] ps_proc
+ * Process context
+ *
+ * @param[in] u4_for_me
+ * Flag to indicate the purpose of computing skip
+ *
+ * @param[out] ps_pred_mv
+ * Flag to indicate the current active refernce list
+ *
+ * @returns
+ * 1) Updates skip MV in proc
+ * 2) Returns if the current MB can be coded as skip or not
+ *
+ * @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+ * specification.
+ *
+ *******************************************************************************
*/
-void ih264e_get_mv_predictor
- (
- enc_pu_t *ps_left_mb_pu,
- enc_pu_t *ps_top_row_pu,
- mv_t *ps_pred_mv
- );
+ih264e_skip_params_ft ih264e_find_pskip_params_me;
/**
-*******************************************************************************
-*
-* @brief This function computes the best motion vector for the current mb
-*
-* @par Description:
-* This function currently does nothing except set motion vectors from external
-* source
-*
-* @param[in] ps_proc
-* Process context corresponding to the job
-*
-* @returns none
-*
-* @remarks none
-*
-*******************************************************************************
+ *******************************************************************************
+ *
+ * @brief The function computes the parameters for a B skip MB
+ *
+ * @par Description:
+ * The function computes the parameters for a B skip MB
+ *
+ * @param[in] ps_proc
+ * Process context
+ *
+ * @param[in] u4_for_me
+ * Flag to indicate the purpose of computing skip
+ *
+ * @param[out] ps_pred_mv
+ * Flag to indicate the current active refernce list
+ *
+ * @returns
+ * 1) Updates skip MV in proc
+ * 2) Returns if the current MB can be coded as skip or not
+ *
+ * @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+ * specification.
+ *
+ *******************************************************************************
*/
-void ih264e_compute_me
- (
- process_ctxt_t *ps_proc
- );
+ih264e_skip_params_ft ih264e_find_bskip_params;
/**
-*******************************************************************************
-*
-* @brief This function initializes me ctxt
-*
-* @par Description:
-* Before dispatching the current job to me thread, the me context associated
-* with the job is initialized.
-*
-* @param[in] ps_proc
-* Process context corresponding to the job
-*
-* @returns none
-*
-* @remarks none
-*
-*******************************************************************************
+ *******************************************************************************
+ *
+ * @brief The function computes the parameters for a B skip MB
+ *
+ * @par Description:
+ * The function computes the parameters for a B skip MB
+ *
+ * @param[in] ps_proc
+ * Process context
+ *
+ * @param[in] u4_for_me
+ * Flag to indicate the purpose of computing skip
+ *
+ * @param[out] ps_pred_mv
+ * Flag to indicate the current active refernce list
+ *
+ * @returns
+ * 1) Updates skip MV in proc
+ * 2) The type of SKIP [L0/L1/BI]
+ *
+ * @remarks
+ *******************************************************************************
*/
+ih264e_skip_params_ft ih264e_find_bskip_params_me;
+
+/**
+ *******************************************************************************
+ *
+ * @brief motion vector predictor
+ *
+ * @par Description:
+ * The routine calculates the motion vector predictor for a given block,
+ * given the candidate MV predictors.
+ *
+ * @param[in] ps_left_mb_pu
+ * pointer to left mb motion vector info
+ *
+ * @param[in] ps_top_row_pu
+ * pointer to top & top right mb motion vector info
+ *
+ * @param[out] ps_pred_mv
+ * pointer to candidate predictors for the current block
+ *
+ * @returns The x & y components of the MV predictor.
+ *
+ * @remarks The code implements the logic as described in sec 8.4.1.3 in H264
+ * specification.
+ * Assumptions : 1. Assumes Only partition of size 16x16
+ *
+ *******************************************************************************
+ */
+void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu, enc_pu_t *ps_top_row_pu,
+ enc_pu_mv_t *ps_pred_mv, WORD32 i4_ref_list);
+
+/**
+ *******************************************************************************
+ *
+ * @brief This fucntion evalues ME for 2 reference lists
+ *
+ * @par Description:
+ * It evaluates skip, full-pel an half-pel and assigns the correct MV in proc
+ *
+ * @param[in] ps_proc
+ * Process context corresponding to the job
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+ih264e_compute_me_ft ih264e_compute_me_multi_reflist;
+
+/**
+ *******************************************************************************
+ *
+ * @brief This fucntion evalues ME for single reflist [Pred L0]
+ *
+ * @par Description:
+ * It evaluates skip, full-pel an half-pel and assigns the correct MV in proc
+ *
+ * @param[in] ps_proc
+ * Process context corresponding to the job
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+ih264e_compute_me_ft ih264e_compute_me_single_reflist;
+
+/**
+ *******************************************************************************
+ *
+ * @brief This function initializes me ctxt
+ *
+ * @par Description:
+ * Before dispatching the current job to me thread, the me context associated
+ * with the job is initialized.
+ *
+ * @param[in] ps_proc
+ * Process context corresponding to the job
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
void ih264e_init_me(process_ctxt_t *ps_proc);
/**
-*******************************************************************************
-*
-* @brief This function performs motion estimation for the current NMB
-*
-* @par Description:
-* Intializes input and output pointers required by the function ih264e_compute_me
-* and calls the function ih264e_compute_me in a loop to process NMBs.
-*
-* @param[in] ps_proc
-* Process context corresponding to the job
-*
-* @returns
-*
-* @remarks none
-*
-*******************************************************************************
-*/
-void ih264e_compute_me_nmb
- (
- process_ctxt_t *ps_proc,
- UWORD32 u4_nmb_count
- );
+ *******************************************************************************
+ *
+ * @brief This function performs motion estimation for the current NMB
+ *
+ * @par Description:
+ * Intializes input and output pointers required by the function ih264e_compute_me
+ * and calls the function ih264e_compute_me in a loop to process NMBs.
+ *
+ * @param[in] ps_proc
+ * Process context corresponding to the job
+ *
+ * @returns
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count);
/**
-*******************************************************************************
-*
-* @brief This function performs MV prediction
-*
-* @par Description:
-*
-* @param[in] ps_proc
-* Process context corresponding to the job
-*
-* @returns none
-*
-* @remarks none
-* This function will update the MB availability since intra inter decision
-* should be done before the call
-*
-*******************************************************************************
-*/
-void ih264e_mv_pred
- (
- process_ctxt_t *ps_proc
- );
+ *******************************************************************************
+ *
+ * @brief This function performs MV prediction
+ *
+ * @par Description:
+ *
+ * @param[in] ps_proc
+ * Process context corresponding to the job
+ *
+ * @returns none
+ *
+ * @remarks none
+ * This function will update the MB availability since intra inter decision
+ * should be done before the call
+ *
+ *******************************************************************************
+ */
+void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_reflist);
/**
-*******************************************************************************
-*
-* @brief This function approximates Pred. MV
-*
-* @par Description:
-*
-* @param[in] ps_proc
-* Process context corresponding to the job
-*
-* @returns none
-*
-* @remarks none
-* Motion estimation happens at nmb level. For cost calculations, mv is appro
-* ximated using this function
-*
-*******************************************************************************
-*/
-void ih264e_mv_pred_me
- (
- process_ctxt_t *ps_proc
- );
+ *******************************************************************************
+ *
+ * @brief This function approximates Pred. MV
+ *
+ * @par Description:
+ *
+ * @param[in] ps_proc
+ * Process context corresponding to the job
+ *
+ * @returns none
+ *
+ * @remarks none
+ * Motion estimation happens at nmb level. For cost calculations, mv is appro
+ * ximated using this function
+ *
+ *******************************************************************************
+ */
+void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list);
#endif /* IH264E_ME_H_ */
diff --git a/encoder/ih264e_modify_frm_rate.c b/encoder/ih264e_modify_frm_rate.c
index bc0e873..f1e6e61 100644
--- a/encoder/ih264e_modify_frm_rate.c
+++ b/encoder/ih264e_modify_frm_rate.c
@@ -57,14 +57,17 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ih264e_defs.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_rc_mem_interface.h"
#include "ih264e_time_stamp.h"
@@ -103,7 +106,7 @@ WORD32 ih264e_pd_frm_rate_get_init_free_memtab(pd_frm_rate_handle *pps_pd_frm_ra
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static pd_frm_rate_t s_temp_pd_frm_rate_t;
+ pd_frm_rate_t s_temp_pd_frm_rate_t;
/* Hack for al alloc, during which we dont have any state memory.
Dereferencing can cause issues */
diff --git a/encoder/ih264e_process.c b/encoder/ih264e_process.c
index 670428e..850cefc 100644
--- a/encoder/ih264e_process.c
+++ b/encoder/ih264e_process.c
@@ -68,8 +68,8 @@
#include "ih264_defs.h"
#include "ih264_debug.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -78,20 +78,21 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ih264_platform_macros.h"
#include "ih264_macros.h"
-#include "ih264_error.h"
#include "ih264_buf_mgr.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
-#include "ih264_structs.h"
#include "ih264_common_tables.h"
#include "ih264_list.h"
#include "ih264e_defs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
#include "ih264e_process.h"
#include "ithread.h"
#include "ih264e_intra_modes_eval.h"
@@ -105,15 +106,11 @@
#include "ih264e_deblk.h"
#include "ih264e_me.h"
#include "ih264e_debug.h"
-#include "ih264e_process.h"
#include "ih264e_master.h"
#include "ih264e_utils.h"
#include "irc_mem_req_and_acq.h"
-#include "irc_cntrl_param.h"
-#include "irc_frame_info_collector.h"
#include "irc_rate_control_api.h"
#include "ih264e_platform_macros.h"
-#include "ih264_padding.h"
#include "ime_statistics.h"
@@ -141,7 +138,7 @@
IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
{
/* choose between ping-pong process buffer set */
- WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* entropy ctxt */
entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
@@ -274,7 +271,6 @@ IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
*
*******************************************************************************
*/
-#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + WORD_SIZE - ps_bitstream->i4_bits_left_in_cw)
IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
{
@@ -284,6 +280,9 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
/* entropy context */
entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+ /* cabac context */
+ cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
+
/* sps */
sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
@@ -309,12 +308,12 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
UWORD8 *pu1_entropy_map_curr;
/* proc base idx */
- WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+ WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* temp var */
WORD32 i4_wd_mbs, i4_ht_mbs;
UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
-
+ WORD32 bitstream_start_offset, bitstream_end_offset;
/********************************************************************/
/* BEGIN INIT */
/********************************************************************/
@@ -391,6 +390,13 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
/* once start of frame / slice is done, you can reset it */
/* it is the responsibility of the caller to set this flag */
ps_entropy->i4_sof = 0;
+
+ if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
+ {
+ BITSTREAM_BYTE_ALIGN(ps_bitstrm);
+ BITSTREAM_FLUSH(ps_bitstrm);
+ ih264e_init_cabac_ctxt(ps_entropy);
+ }
}
/* begin entropy coding for the mb set */
@@ -399,7 +405,7 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
/* init ptrs/indices */
if (ps_entropy->i4_mb_x == i4_wd_mbs)
{
- ps_entropy->i4_mb_y ++;
+ ps_entropy->i4_mb_y++;
ps_entropy->i4_mb_x = 0;
/* packed mb coeff data */
@@ -411,7 +417,7 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
/* proc map */
- pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+ pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
/* entropy map */
pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
@@ -430,20 +436,31 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
volatile UWORD8 *pu1_buf1;
WORD32 idx = ps_entropy->i4_mb_x;
- pu1_buf1 = pu1_proc_map + idx;
- if(*pu1_buf1)
+ pu1_buf1 = pu1_proc_map + idx;
+ if (*pu1_buf1)
break;
ithread_yield();
}
+
/* write mb layer */
- ps_codec->pf_write_mb_syntax_layer[i4_slice_type](ps_entropy);
+ ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
/* set entropy map */
pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
- u4_mb_idx ++;
- ps_entropy->i4_mb_x ++;
+ u4_mb_idx++;
+ ps_entropy->i4_mb_x++;
+ /* check for eof */
+ if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
+ {
+ if (ps_entropy->i4_mb_x < i4_wd_mbs)
+ {
+ ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
+ }
+ }
if (ps_entropy->i4_mb_x == i4_wd_mbs)
{
@@ -459,39 +476,65 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
/* No need to open a slice at end of frame. The current slice can be closed at the time
* of signaling eof flag.
*/
- if ( (u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx != pu1_slice_idx[u4_mb_idx]))
+ if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
+ != pu1_slice_idx[u4_mb_idx]))
{
- /* mb skip run */
- if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
- {
- if (*ps_entropy->pi4_mb_skip_run)
+ if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
+ { /* mb skip run */
+ if ((i4_slice_type != ISLICE)
+ && *ps_entropy->pi4_mb_skip_run)
{
+ if (*ps_entropy->pi4_mb_skip_run)
+ {
PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
- *ps_entropy->pi4_mb_skip_run = 0;
+ *ps_entropy->pi4_mb_skip_run = 0;
+ }
}
+ /* put rbsp trailing bits for the previous slice */
+ ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+ }
+ else
+ {
+ ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
}
-
- /* put rbsp trailing bits for the previous slice */
- ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
/* update slice header pointer */
i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
- ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
+ ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
/* populate slice header */
ps_entropy->i4_mb_start_add = u4_mb_idx;
- ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
+ ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
+ ps_sps);
/* generate slice header */
- ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
- ps_pps, ps_sps);
+ ps_entropy->i4_error_code |= ih264e_generate_slice_header(
+ ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
+ if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
+ {
+ BITSTREAM_BYTE_ALIGN(ps_bitstrm);
+ BITSTREAM_FLUSH(ps_bitstrm);
+ ih264e_init_cabac_ctxt(ps_entropy);
+ }
+ }
+ else
+ {
+ if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
+ && u4_mb_idx != u4_mb_cnt)
+ {
+ ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
+ }
}
}
-
/* Dont execute any further instructions until store synchronization took place */
DATA_SYNC();
}
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
+ ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
+ bitstream_end_offset - bitstream_start_offset;
}
/* check for eof */
@@ -500,30 +543,47 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
/* set end of frame flag */
ps_entropy->i4_eof = 1;
}
+ else
+ {
+ if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
+ && ps_codec->s_cfg.e_slice_mode
+ != IVE_SLICE_MODE_BLOCKS)
+ {
+ ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
+ }
+ }
if (ps_entropy->i4_eof)
{
- /* mb skip run */
- if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
+ if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
{
- if (*ps_entropy->pi4_mb_skip_run)
+ /* mb skip run */
+ if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
{
- PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
- *ps_entropy->pi4_mb_skip_run = 0;
+ if (*ps_entropy->pi4_mb_skip_run)
+ {
+ PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
+ ps_entropy->i4_error_code, "mb skip run");
+ *ps_entropy->pi4_mb_skip_run = 0;
+ }
}
+ /* put rbsp trailing bits */
+ ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+ }
+ else
+ {
+ ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
}
-
- /* put rbsp trailing bits */
- ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
/* update current frame stats to rc library */
- if (IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode)
{
/* number of bytes to stuff */
WORD32 i4_stuff_bytes;
/* update */
- i4_stuff_bytes = ih264e_update_rc_post_enc(ps_codec, ctxt_sel, ps_proc->i4_pic_cnt);
+ i4_stuff_bytes = ih264e_update_rc_post_enc(
+ ps_codec, ctxt_sel,
+ (ps_proc->ps_codec->i4_poc == 0));
/* cbr rc - house keeping */
if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
@@ -537,10 +597,21 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
}
}
+ /*
+ *Frame number is to be incremented only if the current frame is a
+ * reference frame. After each successful frame encode, we increment
+ * frame number by 1
+ */
+ if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
+ && ps_codec->u4_is_curr_frm_ref)
+ {
+ ps_codec->i4_frame_num++;
+ }
/********************************************************************/
/* signal the output */
/********************************************************************/
- ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = ps_entropy->ps_bitstrm->u4_strm_buf_offset;
+ ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
+ ps_entropy->ps_bitstrm->u4_strm_buf_offset;
DEBUG("entropy status %x", ps_entropy->i4_error_code);
}
@@ -679,9 +750,9 @@ IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
i2_mv_ptr = (WORD16 *)pu1_ptr;
- *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvx - ps_proc->ps_pred_mv->i2_mvx;
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
- *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvy - ps_proc->ps_pred_mv->i2_mvy;
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
/* end of mb layer */
ps_proc->pv_mb_header_data = i2_mv_ptr;
@@ -697,6 +768,79 @@ IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
/* end of mb layer */
ps_proc->pv_mb_header_data = pu1_ptr;
}
+ else if(u4_mb_type == B16x16)
+ {
+
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ WORD16 *i2_mv_ptr;
+
+ UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
+
+ /* cbp */
+ *pu1_ptr++ = ps_proc->u4_cbp;
+
+ /* mb qp delta */
+ *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+ /* l0 & l1 me data */
+ i2_mv_ptr = (WORD16 *)pu1_ptr;
+
+ if (u4_pred_mode != PRED_L1)
+ {
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
+ - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
+
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
+ - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
+ }
+ if (u4_pred_mode != PRED_L0)
+ {
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
+ - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
+
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
+ - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
+ }
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = i2_mv_ptr;
+
+ }
+ else if(u4_mb_type == BDIRECT)
+ {
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = u4_mb_type;
+
+ /* cbp */
+ *pu1_ptr++ = ps_proc->u4_cbp;
+
+ /* mb qp delta */
+ *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+ ps_proc->pv_mb_header_data = pu1_ptr;
+
+ }
+ else if(u4_mb_type == BSKIP)
+ {
+ UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
+
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = pu1_ptr;
+ }
return IH264E_SUCCESS;
}
@@ -788,12 +932,11 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
/* mb type, mb class, csbp */
*ps_top_left_syn = *ps_top_syn;
- if (ps_proc->i4_slice_type == PSLICE)
+ if (ps_proc->i4_slice_type != ISLICE)
{
/*****************************************/
/* update top left with top info results */
/*****************************************/
-
/* mv */
*ps_top_left_mb_pu = *ps_top_row_pu;
}
@@ -832,17 +975,13 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
}
- if (ps_proc->i4_slice_type == PSLICE)
+ if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
{
/* mv */
*ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
-
-// /* reset ngbr mv's */
-// ps_top_row_pu->i1_l0_ref_idx = -1;
-// ps_top_row_pu->s_l0_mv = zero_mv;
-//
-// *ps_left_mb_pu = *ps_top_row_pu;
}
+
+ *ps_proc->pu4_mb_pu_cnt = 1;
}
else
{
@@ -898,7 +1037,7 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
s_job.i2_mb_y = ps_proc->i4_mb_y;
/* proc base idx */
- s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ;
+ s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
/* queue the job */
error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
@@ -929,7 +1068,8 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
/* update buffers pointers */
ps_proc->pu1_src_buf_luma += MB_SIZE;
ps_proc->pu1_rec_buf_luma += MB_SIZE;
- ps_proc->pu1_ref_buf_luma += MB_SIZE;
+ ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
+ ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
/*
* Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
@@ -937,7 +1077,9 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
*/
ps_proc->pu1_src_buf_chroma += MB_SIZE;
ps_proc->pu1_rec_buf_chroma += MB_SIZE;
- ps_proc->pu1_ref_buf_chroma += MB_SIZE;
+ ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
+ ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
+
/* Reset cost, distortion params */
@@ -948,6 +1090,10 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
ps_proc->pu4_mb_pu_cnt += 1;
+ /* Update colocated pu */
+ if (ps_proc->i4_slice_type == BSLICE)
+ ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
+
/* deblk ctxts */
if (ps_proc->u4_disable_deblock_level != 1)
{
@@ -1004,6 +1150,7 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
/* strides */
WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
/* quant params */
@@ -1035,43 +1182,54 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
i4_mb_y = ps_proc->i4_mb_y;
/* Number of mbs processed in one loop of process function */
- ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs;
- ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs;
+ ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
+ ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
+ /* init buffer pointers */
convert_uv_only = 1;
- if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1))
+ if (u4_pad_bottom_sz || u4_pad_right_sz ||
+ ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
{
- u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
+ if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
+ u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
+ i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
convert_uv_only = 0;
-
}
else
+ {
+ i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
+ }
- /* init buffer pointers */
if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
- ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
+ ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
+ u4_pad_bottom_sz || u4_pad_right_sz)
{
if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
(ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
+ i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
}
else
{
- ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * BLK8x8SIZE);
+ i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
+ ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
}
ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
- ps_proc->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
- ps_proc->pu1_ref_buf_chroma = ps_proc->pu1_ref_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
+ /* Tempral back and forward reference buffer */
+ ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
+ ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
+ ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
+ ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
/*
* Do color space conversion
@@ -1084,9 +1242,9 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
/* In case of 420 semi-planar input, copy last few rows to intermediate
buffer as chroma trans functions access one extra byte due to interleaved input.
This data will be padded if required */
- if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
+ if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
{
- WORD32 num_rows = ps_codec->s_cfg.u4_disp_ht & 0xF;
+ WORD32 num_rows = MB_SIZE;
UWORD8 *pu1_src;
UWORD8 *pu1_dst;
WORD32 i;
@@ -1095,11 +1253,16 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
pu1_dst = ps_proc->pu1_src_buf_luma;
- for (i = 0; i < num_rows; i++)
- {
- memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
- pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
- pu1_dst += ps_proc->i4_src_strd;
+ /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
+ if (u4_pad_bottom_sz || u4_pad_right_sz) {
+ if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
+ num_rows = MB_SIZE - u4_pad_bottom_sz;
+ for (i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
+ pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
+ pu1_dst += ps_proc->i4_src_strd;
+ }
}
pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
@@ -1108,12 +1271,15 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
/* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
* due to interleaved input
*/
- num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
+ if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
+ num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
+ else
+ num_rows = BLK8x8SIZE;
for (i = 0; i < num_rows; i++)
{
memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
- pu1_dst += ps_proc->i4_src_strd;
+ pu1_dst += ps_proc->i4_src_chroma_strd;
}
}
@@ -1137,7 +1303,7 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
- ps_proc->i4_src_strd, ps_proc->i4_src_strd,
+ ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
convert_uv_only);
break;
@@ -1150,8 +1316,8 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
ps_proc->pu1_src_buf_chroma,
ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
- ps_proc->i4_src_strd, ps_proc->i4_src_strd,
- ps_proc->i4_src_strd,
+ ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
+ ps_proc->i4_src_chroma_strd,
ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
break;
@@ -1159,8 +1325,7 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
break;
}
- if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0) &&
- (ps_proc->i4_src_strd > (WORD32)ps_codec->s_cfg.u4_disp_wd) )
+ if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
{
UWORD32 u4_pad_wd, u4_pad_ht;
u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
@@ -1175,7 +1340,7 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
ih264_pad_right_chroma(
ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
- ps_proc->i4_src_strd, u4_pad_ht / 2, u4_pad_wd);
+ ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
}
/* pad bottom edge */
@@ -1184,8 +1349,8 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
- ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2,
- ps_proc->i4_src_strd, ps_proc->i4_src_strd, (u4_pad_bottom_sz / 2));
+ ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
+ ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
}
@@ -1206,7 +1371,12 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
/*********************************************************************/
/* init mv buffer ptr */
- ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+ ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
+ ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
+
+ /* Init co-located mv buffer */
+ ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
+ ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
if (i4_mb_y == 0)
{
@@ -1214,7 +1384,8 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
}
else
{
- ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+ ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
+ ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
}
ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
@@ -1743,34 +1914,69 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc)
WORD32 luma_idx, chroma_idx, is_intra;
/* temp variables */
- WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+ WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
- /* list of modes for evaluation */
+ /*
+ * list of modes for evaluation
+ * -------------------------------------------------------------------------
+ * Note on enabling I4x4 and I16x16
+ * At very low QP's the hadamard transform in I16x16 will push up the maximum
+ * coeff value very high. CAVLC may not be able to represent the value and
+ * hence the stream may not be decodable in some clips.
+ * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
+ */
if (ps_proc->i4_slice_type == ISLICE)
{
- /* enable intra 16x16 */
- u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+ if (ps_proc->u4_frame_qp > 10)
+ {
+ /* enable intra 16x16 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
- /* enable intra 8x8 */
- u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
+ /* enable intra 8x8 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
+ }
/* enable intra 4x4 */
u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
+ u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
+
}
else if (ps_proc->i4_slice_type == PSLICE)
{
- /* enable intra 16x16 */
- u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+ if (ps_proc->u4_frame_qp > 10)
+ {
+ /* enable intra 16x16 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+ }
/* enable intra 4x4 */
if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
{
u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
}
+ u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
- /* enable inter 16x16 */
+ /* enable inter P16x16 */
u4_valid_modes |= (1 << P16x16);
}
+ else if (ps_proc->i4_slice_type == BSLICE)
+ {
+ if (ps_proc->u4_frame_qp > 10)
+ {
+ /* enable intra 16x16 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+ }
+
+ /* enable intra 4x4 */
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+ {
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
+ }
+ u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
+
+ /* enable inter B16x16 */
+ u4_valid_modes |= (1 << B16x16);
+ }
/* init entropy */
@@ -1806,7 +2012,7 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc)
(ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
/* evaluate inter 16x16 modes */
- if (u4_valid_modes & (1 << P16x16))
+ if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
{
/* compute nmb me */
if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
@@ -1823,9 +2029,9 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc)
ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
- ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_skip_mv);
+ ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
- ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_pred_mv);
+ ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
@@ -1889,7 +2095,7 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc)
{
/* intra gating in inter slices */
/* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
- if (i4_air_enable_inter && ps_proc->i4_slice_type == PSLICE && ps_codec->u4_inter_gate)
+ if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
{
/* distortion of neighboring blocks */
WORD32 i4_distortion[4];
@@ -1906,6 +2112,7 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc)
}
+
/* If we are going to force intra we need to evaluate intra irrespective of gating */
if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
{
@@ -1933,10 +2140,10 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc)
{
ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
}
- }
- }
+ }
}
+ }
/* is intra */
if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
@@ -1955,13 +2162,14 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc)
is_intra = 0;
}
ps_proc->u4_is_intra = is_intra;
+ ps_proc->ps_pu->b1_intra_flag = is_intra;
/* redo MV pred of neighbors in the case intra mb */
/* TODO : currently called unconditionally, needs to be called only in the case of intra
* to modify neighbors */
if (ps_proc->i4_slice_type != ISLICE)
{
- ih264e_mv_pred(ps_proc);
+ ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
}
/* Perform luma mb core coding */
@@ -1973,18 +2181,18 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc)
/* coded block pattern */
ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
- /* mb skip */
- if (is_intra == 0)
+ if (!ps_proc->u4_is_intra)
{
- if (ps_proc->u4_cbp == 0)
+ if (ps_proc->i4_slice_type == BSLICE)
{
- /* get skip mv */
- UWORD32 u4_for_me = 0;
- ih264e_find_skip_motion_vector(ps_proc,u4_for_me);
-
- /* skip ? */
- if (ps_proc->ps_skip_mv->i2_mvx == ps_proc->ps_pu->s_l0_mv.i2_mvx &&
- ps_proc->ps_skip_mv->i2_mvy == ps_proc->ps_pu->s_l0_mv.i2_mvy)
+ if (ih264e_find_bskip_params(ps_proc, PRED_L0))
+ {
+ ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
+ }
+ }
+ else if(!ps_proc->u4_cbp)
+ {
+ if (ih264e_find_pskip_params(ps_proc, PRED_L0))
{
ps_proc->u4_mb_type = PSKIP;
}
@@ -2090,106 +2298,6 @@ UPDATE_MB_INFO:
*******************************************************************************
*
* @brief
-* function to receive frame qp and pic type before encoding
-*
-* @par Description:
-* Before encoding the frame, this function calls the rc library for frame qp
-* and picture type
-*
-* @param[in] ps_codec
-* Pointer to codec context
-*
-* @param[in] pic_cnt
-* pic count
-*
-* @param[out] pi4_pic_type
-* pic type
-
-* @returns skip_src
-* if the source frame rate and target frame rate are not identical, the encoder
-* skips few source frames. skip_src is set when the source need not be encoded.
-*
-* @remarks none
-*
-*******************************************************************************
-*/
-WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type)
-{
- /* rate control context */
- rate_control_ctxt_t *ps_rate_control = &ps_codec->s_rate_control;
-
- /* frame qp */
- UWORD8 u1_frame_qp;
-
- /* pic type */
- PIC_TYPE_T pic_type = PIC_NA;
-
- /* should src be skipped */
- WORD32 skip_src = 0;
-
- /* temp var */
- WORD32 delta_time_stamp = 1;
-
- /* see if the app requires any specific frame */
- if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
- {
- irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api);
- }
-
- /* call rate control lib to get curr pic type and qp to be used */
- skip_src = ih264e_rc_pre_enc(ps_rate_control->pps_rate_control_api,
- ps_rate_control->pps_pd_frm_rate,
- ps_rate_control->pps_time_stamp,
- ps_rate_control->pps_frame_time,
- delta_time_stamp,
- (ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs),
- &ps_rate_control->e_pic_type,
- &u1_frame_qp);
-
- switch (ps_rate_control->e_pic_type)
- {
- case I_PIC:
- pic_type = PIC_I;
- break;
-
- case P_PIC:
- pic_type = PIC_P;
- break;
-
- case B_PIC:
- pic_type = PIC_B;
- break;
-
- default:
- break;
- }
-
- /* is idr? */
- if ((0 == cur_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval) ||
- ps_codec->force_curr_frame_type == IV_IDR_FRAME)
- {
- pic_type = PIC_IDR;
- }
-
- /* force frame tag is not sticky */
- if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
- {
- ps_codec->force_curr_frame_type = IV_NA_FRAME;
- }
-
- /* qp */
- ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp];
-
- /* pic type */
- *pi4_pic_type = pic_type;
-
- return skip_src;
-}
-
-/**
-*******************************************************************************
-*
-* @brief
* Function to update rc context after encoding
*
* @par Description
@@ -2214,7 +2322,7 @@ WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *p
*
*******************************************************************************
*/
-WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt)
+WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
{
/* proc set base idx */
WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
@@ -2295,18 +2403,11 @@ WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_
ps_codec->s_rate_control.pps_frame_time,
(ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
&rc_pic_type,
- pic_cnt,
+ i4_is_first_frm,
&ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
u1_frame_qp,
&ps_codec->s_rate_control.num_intra_in_prev_frame,
&ps_codec->s_rate_control.i4_avg_activity);
-
- /* in case the frame needs to be skipped, the frame num should not be incremented */
- if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
- {
- ps_codec->i4_frame_num --;
- }
-
return i4_stuffing_byte;
}
@@ -2358,7 +2459,7 @@ WORD32 ih264e_process_thread(void *pv_proc)
int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
/* codec context selector */
- WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
diff --git a/encoder/ih264e_process.h b/encoder/ih264e_process.h
index 9715434..9cfdac8 100644
--- a/encoder/ih264e_process.h
+++ b/encoder/ih264e_process.h
@@ -284,36 +284,6 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc);
*******************************************************************************
*
* @brief
-* function to receive frame qp and pic type before encoding
-*
-* @par Description:
-* Before encoding the frame, this function calls the rc library for frame qp
-* and picture type
-*
-* @param[in] ps_codec
-* Pointer to codec context
-*
-* @param[in] pic_cnt
-* pic count
-*
-* @param[out] pi4_pic_type
-* pic type
-
-* @returns skip_src
-* if the source frame rate and target frame rate are not identical, the encoder
-* skips few source frames. skip_src is set when the source need not be encoded.
-*
-* @remarks none
-*
-*******************************************************************************
-*/
-WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type);
-
-
-/**
-*******************************************************************************
-*
-* @brief
* Function to update rc context after encoding
*
* @par Description
diff --git a/encoder/ih264e_rate_control.c b/encoder/ih264e_rate_control.c
index 1e2fe4f..1da2f03 100644
--- a/encoder/ih264e_rate_control.c
+++ b/encoder/ih264e_rate_control.c
@@ -63,6 +63,7 @@
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "ih264_common_tables.h"
+#include "ih264_cabac_tables.h"
#include "ih264e_defs.h"
#include "ih264e_globals.h"
#include "irc_mem_req_and_acq.h"
@@ -75,7 +76,9 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_utils.h"
#include "irc_trace_support.h"
@@ -186,6 +189,7 @@ void ih264e_rc_init(void *pv_rc_api,
UWORD32 u4_peak_bit_rate,
UWORD32 u4_max_delay,
UWORD32 u4_intra_frame_interval,
+ WORD32 i4_inter_frm_int,
UWORD8 *pu1_init_qp,
WORD32 i4_max_inter_frm_int,
UWORD8 *pu1_min_max_qp,
@@ -230,6 +234,9 @@ void ih264e_rc_init(void *pv_rc_api,
u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time);
u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time);
+ /* Init max_inter_frame int */
+ i4_max_inter_frm_int = (i4_inter_frm_int == 1) ? 2 : (i4_inter_frm_int + 2);
+
/* Initialize the rate control */
irc_initialise_rate_control(pv_rc_api, /* RC handle */
e_rate_control_type, /* RC algo type */
@@ -240,6 +247,7 @@ void ih264e_rc_init(void *pv_rc_api,
u4_src_frm_rate, /* Src frame_rate */
u4_max_delay, /* Max buffer delay */
u4_intra_frame_interval, /* Intra frm_interval */
+ i4_inter_frm_int, /* Inter frame interval */
pu1_init_qp, /* Init QP array[3]:[I][P][B] */
u4_max_cpb_size, /* Max VBV/CPB Buffer Size */
i4_max_inter_frm_int, /* Max inter frm_interval */
@@ -268,13 +276,13 @@ void ih264e_rc_init(void *pv_rc_api,
*
*******************************************************************************
*/
-picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api)
+picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no)
{
- WORD32 i4_pic_id = 0;
- WORD32 i4_pic_disp_order_no = 0;
picture_type_e e_rc_pic_type = P_PIC;
- irc_get_picture_details(pv_rc_api, &i4_pic_id, &i4_pic_disp_order_no,
+ irc_get_picture_details(pv_rc_api, pi4_pic_id, pi4_pic_disp_order_no,
&e_rc_pic_type);
return (e_rc_pic_type);
@@ -286,8 +294,9 @@ picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api)
* @brief Function to get rate control output before encoding
*
* @par Description
-* This function is called before encoding the current frame and gets the qp
-* for the current frame from rate control module
+* This function is called before queing the current frame. It decides if we should
+* skip the current iput buffer due to frame rate mismatch. It also updates RC about
+* the acehivble frame rate
*
* @param[in] ps_rate_control_api
* Handle to rate control api
@@ -314,138 +323,58 @@ picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api)
* QP for current frame
*
* @returns
-* Skip or encode the current frame
+* Skip or queue the current frame
*
* @remarks
*
*******************************************************************************
*/
-WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api,
- void * ps_pd_frm_rate,
- void * ps_time_stamp,
- void * ps_frame_time,
- WORD32 i4_delta_time_stamp,
- WORD32 i4_total_mb_in_frame,
- picture_type_e *pe_vop_coding_type,
- UWORD8 *pu1_frame_qp)
+WORD32 ih264e_update_rc_framerates(void *ps_rate_control_api,
+ void *ps_pd_frm_rate,
+ void *ps_time_stamp,
+ void *ps_frame_time)
{
- WORD8 i4_skip_src = 0, i4_num_app_skips = 0;
+ WORD8 i4_skip_src = 0;
UWORD32 u4_src_not_skipped_for_dts = 0;
- /* Variables for the update_frm_level_info */
- WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE];
- WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0};
- WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0};
- WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0};
- WORD32 i4_total_frame_bits = 0;
- WORD32 i4_total_hdr_bits = 0;
- WORD32 i4_avg_mb_activity = 0;
- WORD32 i4_intra_frm_cost = 0;
- UWORD8 u1_is_scd = 0;
-
- /* Set all the MBs to Intra */
- ai4_tot_mb_in_type[0] = i4_total_mb_in_frame;
- ai4_tot_mb_in_type[1] = 0;
-
- /* If delta time stamp is greater than 1, do rcupdate that many times */
- for (i4_num_app_skips = 0; (i4_num_app_skips < i4_delta_time_stamp - 1); i4_num_app_skips++)
- {
- /*update the missing frames frm_rate with 0 */
- ih264e_update_pd_frm_rate(ps_pd_frm_rate,0);
-
- /* Update the time stamp */
- ih264e_update_time_stamp(ps_time_stamp);
-
- /* Do a pre encode skip update */
-
- irc_update_frame_level_info(ps_rate_control_api,
- (*pe_vop_coding_type),
- ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */
- i4_total_frame_bits, /* Total frame bits actually consumed */
- i4_total_hdr_bits, /*header bits for model updation*/
- ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
- ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */
- ai4_tot_mb_in_type, /* total number of mbs in each mb type */
- i4_avg_mb_activity, /* Average mb activity in frame */
- u1_is_scd, /* Is a scene change detected at the current frame */
- 1, /* If it's a pre-encode skip */
- i4_intra_frm_cost, /* Sum of Intra cost for each frame */
- 0); /* Is pic handling [irc_update_pic_handling_state] done before update */
- }
-
/* Update the time stamp for the current frame */
ih264e_update_time_stamp(ps_time_stamp);
/* Check if a src not needs to be skipped */
i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time,
- i4_delta_time_stamp,
+ 1,
&u4_src_not_skipped_for_dts);
- /***********************************************************************
- Based on difference in source and target frame rate frames are skipped
- ***********************************************************************/
if (i4_skip_src)
{
+ /***********************************************************************
+ *Based on difference in source and target frame rate frames are skipped
+ ***********************************************************************/
/*update the missing frames frm_rate with 0 */
- ih264e_update_pd_frm_rate(ps_pd_frm_rate,0);
-
- /* Do a pre encode skip update */
- irc_update_frame_level_info(ps_rate_control_api,
- (*pe_vop_coding_type),
- ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */
- i4_total_frame_bits, /* Total frame bits actually consumed */
- i4_total_hdr_bits, /*header bits for model updation*/
- ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
- ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */
- ai4_tot_mb_in_type, /* total number of mbs in each mb type */
- i4_avg_mb_activity, /* Average mb activity in frame */
- u1_is_scd, /* Is a scene change detected at the current frame */
- 1, /* If it's a pre-encode skip */
- i4_intra_frm_cost, /* Sum of Intra cost for each frame */
- 0); /* Is pic handling [irc_update_pic_handling_state] done before update */
-
- /* Set the current frame type to NA */
- *pe_vop_coding_type = BUF_PIC;
+ ih264e_update_pd_frm_rate(ps_pd_frm_rate, 0);
}
else
{
-#define MAX_FRAME_BITS 0x7FFFFFFF
-// WORD32 i4_pic_id;
-// WORD32 i4_pic_disp_order_no;
WORD32 i4_avg_frm_rate, i4_source_frame_rate;
- i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time);
+ i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(
+ ps_frame_time);
/* Update the frame rate of the frame present with the tgt_frm_rate */
/* If the frm was not skipped due to delta_time_stamp, update the
- frame_rate with double the tgt_frame_rate value, so that it makes
- up for one of the frames skipped by the application */
- ih264e_update_pd_frm_rate(ps_pd_frm_rate,
- i4_source_frame_rate);
+ frame_rate with double the tgt_frame_rate value, so that it makes
+ up for one of the frames skipped by the application */
+ ih264e_update_pd_frm_rate(ps_pd_frm_rate, i4_source_frame_rate);
/* Based on the update get the average frame rate */
i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate);
/* Call the RC library function to change the frame_rate to the
- actually achieved frm_rate */
+ actually achieved frm_rate */
irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate);
-
- /* --------Rate control related things. Get pic type and frame Qp---------*/
- /* Add picture to the stack. For IPP encoder we push the variable
- into the stack and get back the variables by requesting RC.
- This interface is designed for IPB encoder */
- irc_add_picture_to_stack(ps_rate_control_api, 1);
-
- /* Query the picture_type */
- *pe_vop_coding_type = ih264e_rc_get_picture_details(ps_rate_control_api);
-
- /* Get current frame Qp */
- pu1_frame_qp[0] = (UWORD8)irc_get_frame_level_qp(ps_rate_control_api,
- (picture_type_e)(pe_vop_coding_type[0]),
- MAX_FRAME_BITS);
}
- return(i4_skip_src);
+ return (i4_skip_src);
}
/**
@@ -678,8 +607,8 @@ WORD32 ih264e_rc_post_enc(void * ps_rate_control_api,
&u1_enc_buf_overflow,&u1_enc_buf_underflow);
/* We skip the frame if decoder buffer is underflowing. But we never skip first I frame */
- // if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1))
- if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0))
+ if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1))
+ // if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0))
{
irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e)pe_vop_coding_type[0]);
// i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc);
diff --git a/encoder/ih264e_rate_control.h b/encoder/ih264e_rate_control.h
index de9466a..cca9ad3 100644
--- a/encoder/ih264e_rate_control.h
+++ b/encoder/ih264e_rate_control.h
@@ -90,6 +90,9 @@
* @param[in] u4_intra_frame_interval
* Intra frame interval
*
+* @param[in] i4_inter_frm_int
+* Inter frame interval
+*
* @param[in] pu1_init_qp
* Initial qp
*
@@ -120,6 +123,7 @@ void ih264e_rc_init(void *pv_rc_api,
UWORD32 u4_peak_bit_rate,
UWORD32 u4_max_delay,
UWORD32 u4_intra_frame_interval,
+ WORD32 i4_inter_frm_int,
UWORD8 *pu1_init_qp,
WORD32 i4_max_inter_frm_int,
UWORD8 *pu1_min_max_qp,
@@ -143,13 +147,15 @@ void ih264e_rc_init(void *pv_rc_api,
*
*******************************************************************************
*/
-picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api);
+picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no);
/**
*******************************************************************************
*
-* @brief Function to get rate control output before encoding
+* @brief Function to set frame rate inside RC.
*
* @par Description
* This function is called before encoding the current frame and gets the qp
@@ -167,18 +173,6 @@ picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api);
* @param[in] ps_frame_time
* Handle to frame time context
*
-* @param[in] i4_delta_time_stamp
-* Time stamp difference between frames
-*
-* @param[in] i4_total_mb_in_frame
-* Total Macro Blocks in frame
-*
-* @param[in/out] pe_vop_coding_type
-* Picture coding type(I/P/B)
-*
-* @param[in/out] pu1_frame_qp
-* QP for current frame
-*
* @returns
* Skip or encode the current frame
*
@@ -186,14 +180,11 @@ picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api);
*
*******************************************************************************
*/
-WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api,
- void * ps_pd_frm_rate,
- void * ps_time_stamp,
- void * ps_frame_time,
- WORD32 i4_delta_time_stamp,
- WORD32 i4_total_mb_in_frame,
- picture_type_e *pe_vop_coding_type,
- UWORD8 *pu1_frame_qp);
+WORD32 ih264e_update_rc_framerates(void *ps_rate_control_api,
+ void *ps_pd_frm_rate,
+ void *ps_time_stamp,
+ void *ps_frame_time
+ );
/**
*******************************************************************************
diff --git a/encoder/ih264e_rc_mem_interface.c b/encoder/ih264e_rc_mem_interface.c
index e4d5781..a74513a 100644
--- a/encoder/ih264e_rc_mem_interface.c
+++ b/encoder/ih264e_rc_mem_interface.c
@@ -62,10 +62,10 @@
#include "iv2.h"
#include "ive2.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
#include "ih264e.h"
#include "ithread.h"
-#include "ih264e.h"
#include "ih264_defs.h"
#include "ih264_debug.h"
#include "ih264_macros.h"
@@ -80,12 +80,14 @@
#include "ih264_deblk_edge_filters.h"
#include "ih264_common_tables.h"
#include "ih264_list.h"
+#include "ih264_cabac_tables.h"
#include "ih264e_error.h"
#include "ih264e_defs.h"
#include "ih264e_bitstream.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_master.h"
#include "ih264_buf_mgr.h"
@@ -93,12 +95,8 @@
#include "ih264e_utils.h"
#include "ih264e_platform_macros.h"
#include "ih264_cavlc_tables.h"
-#include "ih264e_config.h"
#include "ih264e_statistics.h"
#include "ih264e_trace.h"
-#include "ih264e_statistics.h"
-#include "ih264e_error.h"
-#include "ih264e_utils.h"
#include "ih264e_fmt_conv.h"
#include "ih264e_cavlc.h"
#include "ih264e_rc_mem_interface.h"
@@ -332,7 +330,7 @@ WORD32 ih264e_get_rate_control_mem_tab(void *pv_rate_control,
iv_mem_rec_t *ps_mem,
ITT_FUNC_TYPE_E e_func_type)
{
- static itt_memtab_t as_itt_memtab[NUM_RC_MEMTABS];
+ itt_memtab_t as_itt_memtab[NUM_RC_MEMTABS];
WORD32 i4_num_memtab = 0, j = 0;
void *refptr2[4];
void **refptr1[4];
diff --git a/encoder/ih264e_structs.h b/encoder/ih264e_structs.h
index 1043a53..fc61277 100644
--- a/encoder/ih264e_structs.h
+++ b/encoder/ih264e_structs.h
@@ -39,6 +39,15 @@
#define IH264E_STRUCTS_H_
/*****************************************************************************/
+/* Structure definitions */
+/*****************************************************************************/
+
+/* Early declaration of structs */
+typedef struct _codec_t codec_t;
+typedef struct _proc_t process_ctxt_t;
+
+
+/*****************************************************************************/
/* Extern Function type definitions */
/*****************************************************************************/
@@ -154,6 +163,22 @@ typedef void (*pf_fmt_conv_422ile_to_420sp)(UWORD8 *pu1_y_buf, UWORD8 *pu1_u_buf
WORD32 u4_422i_stride);
+
+/**
+******************************************************************************
+ * @brief ME evaluation
+******************************************************************************
+ */
+typedef void ih264e_compute_me_ft(process_ctxt_t *);
+
+/**
+******************************************************************************
+ * @brief SKIP decision
+******************************************************************************
+ */
+typedef WORD32 ih264e_skip_params_ft(process_ctxt_t *, WORD32);
+
+
/*****************************************************************************/
/* Enums */
/*****************************************************************************/
@@ -196,11 +221,27 @@ typedef enum
*/
typedef struct
{
+ /**
+ * Motion Vector
+ */
+ mv_t s_mv;
/**
- * L0 Motion Vector
+ * Ref index
*/
- mv_t s_l0_mv;
+ WORD8 i1_ref_idx;
+
+} enc_pu_mv_t;
+
+
+/*
+ * Total Pu info for an MB
+ */
+typedef struct
+{
+
+ /* Array with ME info for all lists */
+ enc_pu_mv_t s_me_info[2];
/**
* PU X position in terms of min PU (4x4) units
@@ -223,13 +264,18 @@ typedef struct
UWORD32 b4_ht : 2;
/**
- * L0 Ref index
+ * Intra or Inter flag for each partition - 0 or 1
*/
- WORD8 i1_l0_ref_idx;
+ UWORD32 b1_intra_flag : 1;
+
+ /**
+ * PRED_L0, PRED_L1, PRED_BI
+ */
+ UWORD32 b2_pred_mode : 2;
+
} enc_pu_t;
-typedef struct _codec_t codec_t;
typedef struct
{
@@ -336,7 +382,7 @@ typedef struct
UWORD32 u4_max_bitrate;
/** Maximum number of consecutive B frames */
- UWORD32 u4_max_num_bframes;
+ UWORD32 u4_num_bframes;
/** Content type Interlaced/Progressive */
IV_CONTENT_TYPE_T e_content_type;
@@ -473,9 +519,6 @@ typedef struct
/** IDR frame interval */
UWORD32 u4_idr_frm_interval;
- /** consecutive B frames */
- UWORD32 u4_num_b_frames;
-
/** Disable deblock level (0: Enable completely, 3: Disable completely */
UWORD32 u4_disable_deblock_level;
@@ -859,6 +902,10 @@ typedef struct
*/
typedef struct
{
+ /**
+ * Pointer to the cabac context
+ */
+ cabac_ctxt_t *ps_cabac;
/**
* start of frame / start of slice flag
@@ -1142,9 +1189,9 @@ typedef struct
WORD32 i4_mb_cost;
WORD32 i4_mb_distortion;
+ enc_pu_mv_t as_skip_mv[4];
- mv_t s_skip_mv;
- mv_t s_pred_mv;
+ enc_pu_mv_t as_pred_mv[2];
block_neighbors_t s_ngbr_avbl;
@@ -1165,7 +1212,7 @@ typedef struct
* @brief Pixel processing thread context
******************************************************************************
*/
-typedef struct
+struct _proc_t
{
/**
* entropy context
@@ -1210,12 +1257,12 @@ typedef struct
/**
* Ref pointer to current MB luma
*/
- UWORD8 *pu1_ref_buf_luma;
+ UWORD8 *apu1_ref_buf_luma[MAX_REF_PIC_CNT];
/**
* Ref pointer to current MB chroma
*/
- UWORD8 *pu1_ref_buf_chroma;
+ UWORD8 *apu1_ref_buf_chroma[MAX_REF_PIC_CNT];
/**
* pointer to luma plane of input buffer (base :: mb (0,0))
@@ -1230,7 +1277,7 @@ typedef struct
/**
* pointer to luma plane of ref buffer (base :: mb (0,0))
*/
- UWORD8 *pu1_ref_buf_luma_base;
+ UWORD8 *apu1_ref_buf_luma_base[MAX_REF_PIC_CNT];
/**
* pointer to chroma plane of input buffer (base :: mb (0,0))
@@ -1256,7 +1303,7 @@ typedef struct
/**
* pointer to chroma plane of reconstructed buffer (base :: mb (0,0))
*/
- UWORD8 *pu1_ref_buf_chroma_base;
+ UWORD8 *apu1_ref_buf_chroma_base[MAX_REF_PIC_CNT];
/**
* Pointer to ME NMB info
@@ -1266,12 +1313,16 @@ typedef struct
mb_info_nmb_t *ps_cur_mb;
/**
- * source stride
- * (strides for luma and chroma are the same)
+ * source luma stride
*/
WORD32 i4_src_strd;
/**
+ * source chroma stride
+ */
+ WORD32 i4_src_chroma_strd;
+
+ /**
* recon stride & ref stride
* (strides for luma and chroma are the same)
*/
@@ -1504,9 +1555,19 @@ typedef struct
enc_pu_t *ps_pu;
/**
+ * Pointer to the pu of current co-located MB in list 1
+ */
+ enc_pu_t *ps_colpu;
+
+ /**
* predicted motion vector
*/
- mv_t *ps_pred_mv;
+ enc_pu_mv_t *ps_skip_mv;
+
+ /**
+ * predicted motion vector
+ */
+ enc_pu_mv_t *ps_pred_mv;
/**
* top row mb syntax information base
@@ -1554,7 +1615,6 @@ typedef struct
*/
enc_pu_t s_top_left_mb_pu_ME;
-
/**
* mb neighbor availability pointer
*/
@@ -1590,11 +1650,6 @@ typedef struct
UWORD8 *pu1_top_mb_intra_modes;
/**
- * skip motion vector info
- */
- mv_t *ps_skip_mv;
-
- /**
* left mb motion vector
*/
enc_pu_t s_left_mb_pu;
@@ -1802,9 +1857,14 @@ typedef struct
/**
* Reference picture for the current picture
- * TODO: Only 1 reference assumed currently
+ * TODO: Only 2 reference assumed currently
*/
- pic_buf_t *ps_ref_pic;
+ pic_buf_t *aps_ref_pic[MAX_REF_PIC_CNT];
+
+ /**
+ * Reference MV buff for the current picture
+ */
+ mv_buf_t *aps_mv_buf[MAX_REF_PIC_CNT];
/**
* frame info used by RC
@@ -1834,27 +1894,10 @@ typedef struct
*/
UWORD32 u4_compute_recon;
- /*
- * Buffer for holding half_x (1/2,1 - interpolated)
- * values when halfpel generation
- * for the entire plane is not enabled
- */
- UWORD8 *pu1_half_x;
-
/*
- * Buffer for holding half_x (1,1/2 - interpolated)
- * values when halfpel generation
- * for the entire plane is not enabled
+ * Temporary buffers to be used for subpel computation
*/
- UWORD8 *pu1_half_y;
-
- /*
- * Buffer for holding half_x (1/2,1/2 - interpolated)
- * values when halfpel generation
- * for the entire plane is not enabled
- *
- */
- UWORD8 *pu1_half_xy;
+ UWORD8 *apu1_subpel_buffs[SUBPEL_BUFF_CNT];
/*
* Buffer holding best sub pel values
@@ -1866,7 +1909,7 @@ typedef struct
*/
UWORD32 u4_bst_spel_buf_strd;
-} process_ctxt_t;
+};
/**
******************************************************************************
@@ -1921,12 +1964,13 @@ typedef struct
struct _codec_t
{
/**
- * Number of coded pictures
+ * Id of current pic (input order)
*/
- WORD32 i4_coded_pic_cnt;
+ WORD32 i4_poc;
/**
* Number of encode frame API calls made
+ * This variable must only be used for context selection [Read only]
*/
WORD32 i4_encode_api_call_cnt;
@@ -1961,12 +2005,6 @@ struct _codec_t
IV_COLOR_FORMAT_T e_codec_color_format;
/**
- * source stride
- * (strides for luma and chroma are the same)
- */
- WORD32 i4_src_strd;
-
- /**
* recon stride
* (strides for luma and chroma are the same)
*/
@@ -2305,6 +2343,7 @@ struct _codec_t
*/
ref_set_t as_ref_set[MAX_DPB_SIZE + MAX_CTXT_SETS];
+
/*
* Air pic cnt
* Contains the number of pictures that have been encoded with air
@@ -2319,12 +2358,16 @@ struct _codec_t
UWORD16 *pu2_intr_rfrsh_map;
/*
- * Alternate reference frames
* Indicates if the current frame is used as a reference frame
*/
UWORD32 u4_is_curr_frm_ref;
/*
+ * Indicates if there can be non reference frames in the stream
+ */
+ WORD32 i4_non_ref_frames_in_stream;
+
+ /*
* Memory for color space conversion for luma plane
*/
UWORD8 *pu1_y_csc_buf_base;
@@ -2510,6 +2553,18 @@ struct _codec_t
ime_compute_sad_ft *apf_compute_sad_16x16[2];
ime_compute_sad_ft *pf_compute_sad_16x8;
+
+ /**
+ * Function pointer for computing ME
+ * 1 for PSLICE and 1 for BSLICE
+ */
+ ih264e_compute_me_ft *apf_compute_me[2];
+
+ /**
+ * Function pointers for computing SKIP parameters
+ */
+ ih264e_skip_params_ft *apf_find_skip_params_me[2];
+
/**
* fn ptrs for memory handling operations
*/
@@ -2545,8 +2600,7 @@ struct _codec_t
/**
* write mb layer for a given slice I, P, B
*/
- IH264E_ERROR_T (*pf_write_mb_syntax_layer[3]) ( entropy_ctxt_t *ps_ent_ctxt );
-
+ IH264E_ERROR_T (*pf_write_mb_syntax_layer[2][3]) ( entropy_ctxt_t *ps_ent_ctxt );
/**
* Output buffer
@@ -2562,5 +2616,22 @@ struct _codec_t
* rate control context
*/
rate_control_ctxt_t s_rate_control;
+
+ /**
+ * input buffer queue
+ */
+ inp_buf_t as_inp_list[MAX_NUM_BFRAMES];
+
+ /**
+ * IDR flags for each input
+ */
+ WORD32 i4_idr_inp_list[MAX_NUM_BFRAMES];
+
+ /*
+ *Flag to indicate if we have recived the last input frame
+ */
+ WORD32 i4_last_inp_buff_received;
+
};
+
#endif /* IH264E_STRUCTS_H_ */
diff --git a/encoder/ih264e_time_stamp.c b/encoder/ih264e_time_stamp.c
index a6a7f3c..cd829b5 100644
--- a/encoder/ih264e_time_stamp.c
+++ b/encoder/ih264e_time_stamp.c
@@ -67,6 +67,7 @@
#include "ih264_defs.h"
#include "ih264e_defs.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
@@ -78,6 +79,8 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_rc_mem_interface.h"
#include "ih264e_time_stamp.h"
@@ -221,7 +224,7 @@ WORD32 ih264e_frame_time_get_init_free_memtab(frame_time_handle *pps_frame_time,
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static frame_time_t s_temp_frame_time_t;
+ frame_time_t s_temp_frame_time_t;
/* Hack for al alloc, during which we dont have any state memory.
Dereferencing can cause issues */
@@ -404,7 +407,7 @@ WORD32 ih264e_time_stamp_get_init_free_memtab(time_stamp_handle *pps_time_stamp,
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static time_stamp_t s_temp_time_stamp_t;
+ time_stamp_t s_temp_time_stamp_t;
/* Hack for al alloc, during which we dont have any state memory.
Dereferencing can cause issues */
diff --git a/encoder/ih264e_utils.c b/encoder/ih264e_utils.c
index 3657f33..b339143 100644
--- a/encoder/ih264e_utils.c
+++ b/encoder/ih264e_utils.c
@@ -68,8 +68,8 @@
#include "ih264_defs.h"
#include "ih264_size_defs.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -78,6 +78,7 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
#include "ih264_macros.h"
#include "ih264_common_tables.h"
#include "ih264_debug.h"
@@ -91,7 +92,9 @@
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
#include "ih264e_utils.h"
#include "ih264e_config.h"
#include "ih264e_statistics.h"
@@ -99,9 +102,7 @@
#include "ih264_list.h"
#include "ih264e_encode_header.h"
#include "ih264e_me.h"
-#include "ime_defs.h"
#include "ime.h"
-#include "ih264e_rate_control.h"
#include "ih264e_core_coding.h"
#include "ih264e_rc_mem_interface.h"
#include "ih264e_time_stamp.h"
@@ -116,6 +117,246 @@
/*****************************************************************************/
/**
+ *******************************************************************************
+ *
+ * @brief
+ * Queues the current buffer, gets back a another buffer for encoding with corrent
+ * picture type
+ *
+ * @par Description:
+ * This function performs 3 distinct but related functions.
+ * 1) Maintains an input queue [Note the the term queue donot imply a
+ * first-in first-out logic here] that queues input and dequeues them so
+ * that input frames can be encoded at any predetermined encoding order
+ * 2) Uses RC library to decide which frame must be encoded in current pass
+ * and which picture type it must be encoded to.
+ * 3) Uses RC library to decide the QP at which current frame has to be
+ * encoded
+ * 4) Determines if the current picture must be encoded or not based on
+ * PRE-ENC skip
+ *
+ * Input queue is used for storing input buffers till they are used for
+ * encoding. This queue is maintained at ps_codec->as_inp_list. Whenever a
+ * valid input comes, it is added to the end of queue. This same input is
+ * added to RC queue using the identifier as ps_codec->i4_pic_cnt. Hence any
+ * pic from RC can be located in the input queue easily.
+ *
+ * The dequeue operation does not start till we have ps_codec->s_cfg.u4_max_num_bframes
+ * frames in the queue. THis is done in order to ensure that once output starts
+ * we will have a constant stream of output with no gaps.
+ *
+ * THe output frame order is governed by RC library. When ever we dequeue a
+ * buffer from RC library, it ensures that we will get them in encoding order
+ * With the output of RC library, we can use the picture id to dequeue the
+ * corresponding buffer from input queue and encode it.
+ *
+ * Condition at the end of stream.
+ * -------------------------------
+ * At the last valid buffer from the app, we will get ps_ive_ip->u4_is_last
+ * to be set. This will the given to lib when appropriate input buffer is
+ * given to encoding.
+ *
+ * Since we have to output is not in sync with input, we will have frames to
+ * encode even after we recive the last vaild input buffer. Hence we have to
+ * make sure that we donot queue any new buffers once we get the flag [It may
+ * mess up GOP ?]. This is acheived by setting ps_codec->i4_last_inp_buff_received
+ * to act as a permenent marker for last frame recived [This may not be needed,
+ * because in our current app, all buffers after the last are marked as last.
+ * But can we rely on that?] . Hence after this flgag is set no new buffers are
+ * queued.
+ *
+ * @param[in] ps_codec
+ * Pointer to codec descriptor
+ *
+ * @param[in] ps_ive_ip
+ * Current input buffer to the encoder
+ *
+ * @param[out] ps_inp
+ * Buffer to be encoded in the current pass
+ *
+ * @returns
+ * Flag indicating if we have a pre-enc skip or not
+ *
+ * @remarks
+ * TODO (bpic)
+ * The check for null ans is last is redudent.
+ * Need to see if we can remove it
+ *
+ *******************************************************************************
+ */
+WORD32 ih264e_input_queue_update(codec_t *ps_codec,
+ ive_video_encode_ip_t *ps_ive_ip,
+ inp_buf_t *ps_enc_buff)
+{
+
+ inp_buf_t *ps_inp_buf;
+ picture_type_e e_pictype;
+ WORD32 i4_skip;
+ UWORD32 ctxt_sel, u4_pic_id, u4_pic_disp_id;
+ UWORD8 u1_frame_qp;
+ UWORD32 max_frame_bits = 0x7FFFFFFF;
+
+ /* Mark that the last input frame has been received */
+ if (ps_ive_ip->u4_is_last == 1)
+ {
+ ps_codec->i4_last_inp_buff_received = 1;
+ }
+
+ if (ps_ive_ip->s_inp_buf.apv_bufs[0] == NULL
+ && !ps_codec->i4_last_inp_buff_received)
+ {
+ ps_enc_buff->s_raw_buf.apv_bufs[0] = NULL;
+ return 0;
+ }
+
+ /***************************************************************************
+ * Check for pre enc skip
+ * When src and target frame rates donot match, we skip some frames to
+ * maintain the relation ship between them
+ **************************************************************************/
+ {
+ WORD32 skip_src;
+
+ skip_src = ih264e_update_rc_framerates(
+ ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_rate_control.pps_pd_frm_rate,
+ ps_codec->s_rate_control.pps_time_stamp,
+ ps_codec->s_rate_control.pps_frame_time);
+
+ if (skip_src) return 1;
+ }
+
+ /***************************************************************************
+ *Queue the input to the queue
+ **************************************************************************/
+ ps_inp_buf = &(ps_codec->as_inp_list[ps_codec->i4_pic_cnt
+ % MAX_NUM_BFRAMES]);
+
+ /* copy input info. to internal structure */
+ ps_inp_buf->s_raw_buf = ps_ive_ip->s_inp_buf;
+ ps_inp_buf->u4_timestamp_low = ps_ive_ip->u4_timestamp_low;
+ ps_inp_buf->u4_timestamp_high = ps_ive_ip->u4_timestamp_high;
+ ps_inp_buf->u4_is_last = ps_ive_ip->u4_is_last;
+ ps_inp_buf->pv_mb_info = ps_ive_ip->pv_mb_info;
+ ps_inp_buf->u4_mb_info_type = ps_ive_ip->u4_mb_info_type;
+ ps_inp_buf->pv_pic_info = ps_ive_ip->pv_pic_info;
+ ps_inp_buf->u4_pic_info_type = ps_ive_ip->u4_pic_info_type;
+
+ /***************************************************************************
+ * Now we should add the picture to RC stack here
+ **************************************************************************/
+ /*
+ * If an I frame has been requested, ask RC to force it
+ * For IDR requests, we have to ask RC to force I and set IDR by our selves
+ * since RC Donot know about IDR. For forcing an IDR at dequeue stage we
+ * should record that an IDR has been requested some where. Hence we will
+ * store it in the u4_idr_inp_list at a position same as that of input frame
+ */
+ {
+ WORD32 i4_force_idr, i4_force_i;
+
+ i4_force_idr = (ps_codec->force_curr_frame_type == IV_IDR_FRAME);
+ i4_force_idr |= !(ps_codec->i4_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval);
+
+ i4_force_i = (ps_codec->force_curr_frame_type == IV_I_FRAME);
+
+ ps_codec->i4_idr_inp_list[ps_codec->i4_pic_cnt % MAX_NUM_BFRAMES] = i4_force_idr;
+
+ if ((ps_codec->i4_frame_num > 0) && (i4_force_idr || i4_force_i))
+ {
+ irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api);
+ }
+ ps_codec->force_curr_frame_type = IV_NA_FRAME;
+ }
+
+ irc_add_picture_to_stack(ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->i4_pic_cnt);
+
+
+ /* Delay */
+ if (ps_codec->i4_encode_api_call_cnt
+ < (WORD32)(ps_codec->s_cfg.u4_num_bframes))
+ {
+ ps_enc_buff->s_raw_buf.apv_bufs[0] = NULL;
+ return 0;
+ }
+
+ /***************************************************************************
+ * Get a new pic to encode
+ **************************************************************************/
+ /* Query the picture_type */
+ e_pictype = ih264e_rc_get_picture_details(
+ ps_codec->s_rate_control.pps_rate_control_api, (WORD32 *)(&u4_pic_id),
+ (WORD32 *)(&u4_pic_disp_id));
+
+ switch (e_pictype)
+ {
+ case I_PIC:
+ ps_codec->pic_type = PIC_I;
+ break;
+ case P_PIC:
+ ps_codec->pic_type = PIC_P;
+ break;
+ case B_PIC:
+ ps_codec->pic_type = PIC_B;
+ break;
+ default:
+ ps_codec->pic_type = PIC_NA;
+ ps_enc_buff->s_raw_buf.apv_bufs[0] = NULL;
+ return 0;
+ }
+
+ /* Set IDR if it has been requested or its the IDR interval */
+ ps_codec->pic_type = ps_codec->i4_idr_inp_list[u4_pic_id % MAX_NUM_BFRAMES] ?
+ PIC_IDR : ps_codec->pic_type;
+ ps_codec->i4_idr_inp_list[u4_pic_id % MAX_NUM_BFRAMES] = 0;
+
+
+
+ /* Get current frame Qp */
+ u1_frame_qp = (UWORD8)irc_get_frame_level_qp(
+ ps_codec->s_rate_control.pps_rate_control_api, e_pictype,
+ max_frame_bits);
+ ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp];
+
+ /*
+ * copy the pic id to poc because the display order is assumed to be same
+ * as input order
+ */
+ ps_codec->i4_poc = u4_pic_id;
+
+ /***************************************************************************
+ * Now retrieve the correct picture from the queue
+ **************************************************************************/
+
+ /* Mark the skip flag */
+ i4_skip = 0;
+ ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
+ ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = i4_skip;
+
+ /* Get a buffer to encode */
+ ps_inp_buf = &(ps_codec->as_inp_list[u4_pic_id % MAX_NUM_BFRAMES]);
+
+ /* copy dequeued input to output */
+ ps_enc_buff->s_raw_buf = ps_inp_buf->s_raw_buf;
+ ps_enc_buff->u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
+ ps_enc_buff->u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
+ ps_enc_buff->u4_is_last = ps_inp_buf->u4_is_last;
+ ps_enc_buff->pv_mb_info = ps_inp_buf->pv_mb_info;
+ ps_enc_buff->u4_mb_info_type = ps_inp_buf->u4_mb_info_type;
+ ps_enc_buff->pv_pic_info = ps_inp_buf->pv_pic_info;
+ ps_enc_buff->u4_pic_info_type = ps_inp_buf->u4_pic_info_type;
+
+ if (ps_enc_buff->u4_is_last)
+ {
+ ps_codec->pic_type = PIC_NA;
+ }
+
+ /* Return the buffer status */
+ return (0);
+}
+
+/**
*******************************************************************************
*
* @brief
@@ -134,13 +375,15 @@
*
*******************************************************************************
*/
-WORD32 ih264e_get_min_level(WORD32 pic_size)
+WORD32 ih264e_get_min_level(WORD32 wd, WORD32 ht)
{
WORD32 lvl_idx = MAX_LEVEL, i;
-
+ WORD32 pic_size = wd * ht;
+ WORD32 max = MAX(wd, ht);
for (i = 0; i < MAX_LEVEL; i++)
{
- if (pic_size <= gai4_ih264_max_luma_pic_size[i])
+ if ((pic_size <= gai4_ih264_max_luma_pic_size[i]) &&
+ (max <= gai4_ih264_max_wd_ht[i]))
{
lvl_idx = i;
break;
@@ -331,7 +574,7 @@ WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size,
WORD32 num_samples;
WORD32 max_num_bufs;
WORD32 pad = MAX(horz_pad, vert_pad);
- UNUSED(pic_size);
+
/*
* If num_ref_frames and num_reorder_frmaes is specified
* Use minimum value
@@ -343,6 +586,7 @@ WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size,
/* Maximum number of luma samples in a picture at given level */
num_luma_samples = gai4_ih264_max_luma_pic_size[lvl_idx];
+ num_luma_samples = MAX(num_luma_samples, pic_size);
/* Account for chroma */
num_samples = num_luma_samples * 3 / 2;
@@ -403,7 +647,7 @@ WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples)
WORD32 mv_bank_size = 0;
/* number of sub mb partitions possible */
- WORD32 num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+ WORD32 num_pu = num_luma_samples / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE);
/* number of mbs */
WORD32 num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
@@ -413,10 +657,10 @@ WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples)
mv_bank_size += num_mb * sizeof(WORD32);
/* Size for pu_map */
- mv_bank_size += num_pu;
+ mv_bank_size += ALIGN4(num_pu);
/* Size for storing enc_pu_t for each PU */
- mv_bank_size += num_pu * sizeof(enc_pu_t);
+ mv_bank_size += ALIGN4(num_pu * sizeof(enc_pu_t));
return mv_bank_size;
}
@@ -547,7 +791,7 @@ IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
/* num of luma samples */
WORD32 num_luma_samples = ALIGN16(ps_codec->s_cfg.u4_wd)
- * ALIGN16(ps_codec->s_cfg.u4_ht);
+ * ALIGN16(ps_codec->s_cfg.u4_ht);
/* number of mb's & frame partitions */
WORD32 num_pu, num_mb;
@@ -573,7 +817,7 @@ IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
/* compute MV bank size per picture */
pic_mv_bank_size = ih264e_get_pic_mv_bank_size(num_luma_samples);
- num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+ num_pu = num_luma_samples / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE);
num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
i = 0;
ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
@@ -592,11 +836,13 @@ IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
}
ps_mv_buf->pu4_mb_pu_cnt = (UWORD32 *) pu1_buf;
+ pu1_buf += num_mb * sizeof(WORD32);
- ps_mv_buf->pu1_pic_pu_map = (pu1_buf + num_mb * sizeof(WORD32));
+ ps_mv_buf->pu1_pic_pu_map = pu1_buf;
+ pu1_buf += ALIGN4(num_pu);
- ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf + num_mb * sizeof(WORD32)
- + num_pu);
+ ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf);
+ pu1_buf += ALIGN4(num_pu * sizeof(enc_pu_t));
ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
ps_mv_buf, i);
@@ -608,7 +854,6 @@ IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
return error_status;
}
- pu1_buf += pic_mv_bank_size;
ps_mv_buf++;
i++;
}
@@ -1002,14 +1247,12 @@ IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec)
ps_codec->s_cfg.u4_target_bitrate,
ps_codec->s_cfg.u4_max_bitrate,
ps_codec->s_cfg.u4_vbv_buffer_delay,
- ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp,
- H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp,
+ ps_codec->s_cfg.u4_i_frm_interval,
+ ps_codec->s_cfg.u4_num_bframes + 1, au1_init_qp,
+ ps_codec->s_cfg.u4_num_bframes + 2 , au1_min_max_qp,
ps_codec->s_cfg.u4_max_level);
}
- /* src stride */
- ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd;
-
/* recon stride */
ps_codec->i4_rec_strd = ALIGN16(ps_codec->s_cfg.u4_max_wd) + PAD_WD;
@@ -1020,6 +1263,11 @@ IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec)
DEBUG_HISTOGRAM_INIT();
+
+ /* Init dependecy vars */
+ ps_codec->i4_last_inp_buff_received = 0;
+
+
return IH264E_SUCCESS;
}
@@ -1067,7 +1315,8 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
UWORD8 *pu1_cur_pic_luma, *pu1_cur_pic_chroma;
/* ref buffer set */
- pic_buf_t *ps_ref_pic;
+ pic_buf_t *aps_ref_pic[MAX_REF_PIC_CNT] = {NULL, NULL};
+ mv_buf_t *aps_mv_buf[MAX_REF_PIC_CNT] = {NULL, NULL};
WORD32 ref_set_id;
/* pic time stamp */
@@ -1075,14 +1324,11 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
UWORD32 u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
/* indices to access curr/prev frame info */
- WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* curr pic type */
PIC_TYPE_T *pic_type = &ps_codec->pic_type;
- /* should src be skipped */
- WORD32 *skip_src = &ps_codec->s_rate_control.pre_encode_skip[ctxt_sel];
-
/* Diamond search Iteration Max Cnt */
UWORD32 u4_num_layers =
(ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) ?
@@ -1094,62 +1340,46 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
/********************************************************************/
/* INITIALIZE CODEC CONTEXT */
/********************************************************************/
-
- /* pre enc rc call */
- *skip_src = ih264e_set_rc_pic_params(ps_codec,
- ps_codec->i4_encode_api_call_cnt,
- (WORD32 *) pic_type);
- if (*skip_src == 1)
+ /* slice_type */
+ if ((PIC_I == *pic_type) || (PIC_IDR == *pic_type))
{
- ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_inp_buf =
- *ps_inp_buf;
-
- /* inform output bytes generated as zero */
- ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 0;
-
- return error_status;
+ ps_codec->i4_slice_type = ISLICE;
}
-
- /********************************************************************/
- /* Alternate reference frame */
- /********************************************************************/
- if (ps_codec->s_cfg.u4_enable_alt_ref)
+ else if (PIC_P == *pic_type)
{
- if (PIC_IDR == *pic_type || PIC_I == *pic_type)
- {
- ps_codec->u4_is_curr_frm_ref = 1;
- }
- else
- {
- ps_codec->u4_is_curr_frm_ref = 1;
- if(ps_codec->i4_encode_api_call_cnt % (ps_codec->s_cfg.u4_enable_alt_ref + 1))
- ps_codec->u4_is_curr_frm_ref = 0;
- }
-
- if ((ps_codec->u4_is_curr_frm_ref == 1) || (ps_codec->i4_frame_num < 0))
- {
- ps_codec->i4_frame_num++;
- }
+ ps_codec->i4_slice_type = PSLICE;
}
- else
+ else if(PIC_B == *pic_type)
{
- ps_codec->u4_is_curr_frm_ref = 1;
-
- ps_codec->i4_frame_num++;
+ ps_codec->i4_slice_type = BSLICE;
}
- /* slice_type */
- ps_codec->i4_slice_type = PSLICE;
- if ((PIC_I == *pic_type) || (PIC_IDR == *pic_type))
+ /***************************************************************************
+ * Set up variables for sending frame number, poc and reference
+ * a) Set up alt ref too
+ **************************************************************************/
+
+ /* Check and set if the current frame is reference or not */
+ ps_codec->u4_is_curr_frm_ref = 0;
+
+ /* This frame is reference if its not a B pic, pending approval from alt ref */
+ ps_codec->u4_is_curr_frm_ref = (*pic_type != PIC_B);
+
+ /* In case if its a P pic, we will decide according to alt ref also */
+ if (ps_codec->s_cfg.u4_enable_alt_ref && (*pic_type == PIC_P)
+ && (ps_codec->i4_pic_cnt
+ % (ps_codec->s_cfg.u4_enable_alt_ref + 1)))
{
- ps_codec->i4_slice_type = ISLICE;
- }
- else if (PIC_P == *pic_type)
- {
- ps_codec->i4_slice_type = PSLICE;
+ ps_codec->u4_is_curr_frm_ref = 0;
}
+ /*
+ * Override everything in case of IDR
+ * Note that in case of IDR, at this point ps_codec->u4_is_curr_frm_ref must
+ * be 1
+ */
+
/* is this an IDR pic */
ps_codec->u4_is_idr = 0;
@@ -1165,6 +1395,10 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
ps_codec->i4_idr_pic_id++;
}
+ /***************************************************************************
+ * Set up Deblock
+ **************************************************************************/
+
/* set deblock disable flags based on disable deblock level */
ps_codec->i4_disable_deblk_pic = 1;
@@ -1235,93 +1469,132 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
ih264e_populate_pps(ps_codec, ps_pps);
}
- /* Reference and MV bank Buffer Manager */
+ /***************************************************************************
+ * Reference and MV bank Buffer Manager
+ * Here we will
+ * 1) Find the correct ref pics for the current frame
+ * 2) Free the ref pic that is not going to be used anywhere
+ * 3) Find a free buff from the list and assign it as the recon of
+ * current frame
+ *
+ * 1) Finding correct ref pic
+ * All pics needed for future are arranged in a picture list called
+ * ps_codec->as_ref_set. Each picture in this will have a pic buffer and
+ * MV buffer that is marked appropriately as BUF_MGR_REF, BUF_MGR_IO or
+ * BUF_MGR_CODEC. Also the pic_cnt and poc will also be present.
+ * Hence to find the ref pic we will loop through the list and find
+ * 2 pictures with maximum i4_pic_cnt .
+ *
+ * note that i4_pic_cnt == -1 is used to filter uninit ref pics.
+ * Now since we only have max two ref pics, we will always find max 2
+ * ref pics.
+
+ *
+ * 2) 3) Self explanatory
+ ***************************************************************************/
{
- /* min pic cnt among the list of pics stored in ref list */
- WORD32 min_pic_cnt;
+ /* Search for buffs with maximum pic cnt */
- /* max pic cnt among the list of pics stored in ref list */
- WORD32 max_pic_cnt;
+ WORD32 max_pic_cnt[] = { -1, -1 };
- /* temp var */
- WORD32 i;
+ mv_buf_t *ps_mv_buf_to_free[] = { NULL, NULL };
- ps_ref_pic = NULL;
+ /* temp var */
+ WORD32 i, buf_status;
- /* get reference picture when necessary */
- /* Only nearest picture encoded (max pic cnt) is used as reference */
- if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I))
+ for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
{
- max_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt;
+ if (ps_codec->as_ref_set[i].i4_pic_cnt == -1)
+ continue;
+
+ buf_status = ih264_buf_mgr_get_status(
+ ps_codec->pv_ref_buf_mgr,
+ ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
+
+ /* Ideally we should look for buffer status of MV BUFF also. But since
+ * the correponding MV buffs also will be at the same state. It dosent
+ * matter as of now. But the check will make the logic better */
+ if ((max_pic_cnt[0] < ps_codec->as_ref_set[i].i4_pic_cnt)
+ && (buf_status & BUF_MGR_REF))
+ {
+ if (max_pic_cnt[1] < ps_codec->as_ref_set[i].i4_pic_cnt)
+ {
+ max_pic_cnt[0] = max_pic_cnt[1];
+ aps_ref_pic[0] = aps_ref_pic[1];
+ aps_mv_buf[0] = aps_mv_buf[1];
- ps_ref_pic = ps_codec->as_ref_set[0].ps_pic_buf;
+ ps_mv_buf_to_free[0] = ps_mv_buf_to_free[1];
- /* loop through to get the max pic cnt among the list of pics stored in ref list */
- for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++)
- {
- if (max_pic_cnt < ps_codec->as_ref_set[i].i4_pic_cnt)
+ max_pic_cnt[1] = ps_codec->as_ref_set[i].i4_pic_cnt;
+ aps_ref_pic[1] = ps_codec->as_ref_set[i].ps_pic_buf;
+ aps_mv_buf[1] = ps_codec->as_ref_set[i].ps_mv_buf;
+ ps_mv_buf_to_free[1] = ps_codec->as_ref_set[i].ps_mv_buf;
+
+ }
+ else
{
- max_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt;
- ps_ref_pic = ps_codec->as_ref_set[i].ps_pic_buf;
+ max_pic_cnt[0] = ps_codec->as_ref_set[i].i4_pic_cnt;
+ aps_ref_pic[0] = ps_codec->as_ref_set[i].ps_pic_buf;
+ aps_mv_buf[0] = ps_codec->as_ref_set[i].ps_mv_buf;
+ ps_mv_buf_to_free[0] = ps_codec->as_ref_set[i].ps_mv_buf;
}
}
}
- /* get a location at which the curr pic info can be stored for future reference */
- ref_set_id = -1;
-
- for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+ /*
+ * Now if the current picture is I or P, we discard the back ref pic and
+ * assign forward ref as backward ref
+ */
+ if (*pic_type != PIC_B)
{
- if (-1 == ps_codec->as_ref_set[i].i4_pic_cnt)
+ if (ps_mv_buf_to_free[0])
{
- ref_set_id = i;
- break;
- }
- }
+ /* release this frame from reference list */
+ ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
+ ps_mv_buf_to_free[0]->i4_buf_id,
+ BUF_MGR_REF);
- /* If all the entries in the ref_set array are filled, then remove the entry with least pic_cnt */
- if (ref_set_id == -1)
- {
- /* pic info */
- pic_buf_t *ps_cur_pic;
-
- /* mv info */
- mv_buf_t *ps_cur_mv_buf;
-
- ref_set_id = 0;
- min_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt;
-
- /* loop through to get the min pic cnt among the list of pics stored in ref list */
- for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++)
- {
- if (min_pic_cnt > ps_codec->as_ref_set[i].i4_pic_cnt)
- {
- min_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt;
- ref_set_id = i;
- }
+ ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
+ aps_ref_pic[0]->i4_buf_id, BUF_MGR_REF);
}
- ps_cur_pic = ps_codec->as_ref_set[ref_set_id].ps_pic_buf;
-
- ps_cur_mv_buf = ps_codec->as_ref_set[ref_set_id].ps_mv_buf;
+ max_pic_cnt[0] = max_pic_cnt[1];
+ aps_ref_pic[0] = aps_ref_pic[1];
+ aps_mv_buf[0] = aps_mv_buf[1];
- /* release this frame from reference list */
- ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
- ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF);
-
- ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
- ps_cur_pic->i4_buf_id, BUF_MGR_REF);
+ /* Dummy */
+ max_pic_cnt[1] = -1;
}
- if (ps_codec->s_cfg.u4_enable_recon)
+ /*
+ * Mark all reference pic with unused buffers to be free
+ * We need this step since each one, ie ref, recon io etc only unset their
+ * respective flags. Hence we need to combine togather and mark the ref set
+ * accordingly
+ */
+ ref_set_id = -1;
+ for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
{
- ret = ih264_buf_mgr_check_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr);
+ if (ps_codec->as_ref_set[i].i4_pic_cnt == -1)
+ {
+ ref_set_id = i;
+ continue;
+ }
- if (ret != IH264_SUCCESS)
+ buf_status = ih264_buf_mgr_get_status(
+ ps_codec->pv_ref_buf_mgr,
+ ps_codec->as_ref_set[i].ps_pic_buf->i4_buf_id);
+
+ if ((buf_status & (BUF_MGR_REF | BUF_MGR_CODEC | BUF_MGR_IO)) == 0)
{
- return IH264E_NO_FREE_RECONBUF;
+ ps_codec->as_ref_set[i].i4_pic_cnt = -1;
+ ps_codec->as_ref_set[i].i4_poc = 32768;
+
+ ref_set_id = i;
}
}
+ /* An asssert failure here means we donot have any free buffs */
+ ASSERT(ref_set_id >= 0);
}
{
@@ -1353,7 +1626,6 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
* and getting a buffer id to free
*/
ps_mv_buf->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt;
-
ps_mv_buf->i4_buf_id = cur_mv_bank_buf_id;
}
@@ -1375,7 +1647,7 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
}
/* mark the buffer as needed for reference if the curr pic is available for ref */
- if (1 == ps_codec->u4_is_curr_frm_ref)
+ if (ps_codec->u4_is_curr_frm_ref)
{
ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id,
BUF_MGR_REF);
@@ -1392,7 +1664,7 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
ps_cur_pic->u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
ps_cur_pic->u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
- ps_cur_pic->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt;
+ ps_cur_pic->i4_abs_poc = ps_codec->i4_poc;
ps_cur_pic->i4_poc_lsb = ps_codec->i4_pic_order_cnt_lsb;
ps_cur_pic->i4_buf_id = cur_pic_buf_id;
@@ -1401,18 +1673,17 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
pu1_cur_pic_chroma = ps_cur_pic->pu1_chroma;
}
- /* in case the current picture is used for reference then add it to the reference set */
- if (ps_codec->u4_is_curr_frm_ref
- && ((*pic_type == PIC_IDR) || (*pic_type == PIC_I)
- || (*pic_type == PIC_P)))
+ /*
+ * Add the current picture to ref list independent of the fact that it is used
+ * as reference or not. This is because, now recon is not in sync with output
+ * hence we may need the current recon after some delay. By adding it to ref list
+ * we can retrieve the recon any time we want. The information that it is used
+ * for ref can still be found by checking the buffer status of pic buf.
+ */
{
ps_codec->as_ref_set[ref_set_id].i4_pic_cnt = ps_codec->i4_pic_cnt;
-
- /* TODO: Currently pic_cnt and poc are same - Once frame drops are introduced change appropriately */
- ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_pic_cnt;
-
+ ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_poc;
ps_codec->as_ref_set[ref_set_id].ps_mv_buf = ps_mv_buf;
-
ps_codec->as_ref_set[ref_set_id].ps_pic_buf = ps_cur_pic;
}
@@ -1463,9 +1734,6 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
/* chroma rec buffer */
ps_proc->pu1_rec_buf_chroma_base = pu1_cur_pic_chroma;
- /* src stride */
- ps_proc->i4_src_strd = ps_codec->i4_src_strd;
-
/* rec stride */
ps_proc->i4_rec_strd = ps_codec->i4_rec_strd;
@@ -1592,16 +1860,37 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
/* Pointer to current pictures mv buffers */
ps_proc->ps_cur_mv_buf = ps_mv_buf;
- /* pointer to ref picture */
- ps_proc->ps_ref_pic = ps_ref_pic;
+ /*
+ * pointer to ref picture
+ * 0 : Temporal back reference
+ * 1 : Temporal forward reference
+ */
+ ps_proc->aps_ref_pic[PRED_L0] = aps_ref_pic[PRED_L0];
+ ps_proc->aps_ref_pic[PRED_L1] = aps_ref_pic[PRED_L1];
+ if (ps_codec->pic_type == PIC_B)
+ {
+ ps_proc->aps_mv_buf[PRED_L0] = aps_mv_buf[PRED_L0];
+ ps_proc->aps_mv_buf[PRED_L1] = aps_mv_buf[PRED_L1];
+ }
+ else
+ {
+ /*
+ * Else is dummy since for non B pic we does not need this
+ * But an assignment here will help in not having a segfault
+ * when we calcualte colpic in P slices
+ */
+ ps_proc->aps_mv_buf[PRED_L0] = ps_mv_buf;
+ ps_proc->aps_mv_buf[PRED_L1] = ps_mv_buf;
+ }
if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I))
{
- /* ref pointer luma */
- ps_proc->pu1_ref_buf_luma_base = ps_ref_pic->pu1_luma;
+ /* temporal back an forward ref pointer luma and chroma */
+ ps_proc->apu1_ref_buf_luma_base[PRED_L0] = aps_ref_pic[PRED_L0]->pu1_luma;
+ ps_proc->apu1_ref_buf_chroma_base[PRED_L0] = aps_ref_pic[PRED_L0]->pu1_chroma;
- /* ref pointer chroma */
- ps_proc->pu1_ref_buf_chroma_base = ps_ref_pic->pu1_chroma;
+ ps_proc->apu1_ref_buf_luma_base[PRED_L1] = aps_ref_pic[PRED_L1]->pu1_luma;
+ ps_proc->apu1_ref_buf_chroma_base[PRED_L1] = aps_ref_pic[PRED_L1]->pu1_chroma;
}
/* Structure for current input buffer */
@@ -1649,6 +1938,9 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
/* slice hdr base */
ps_entropy->ps_slice_hdr_base = ps_proc->ps_slice_hdr_base;
+ /* Abs poc */
+ ps_entropy->i4_abs_pic_order_cnt = ps_proc->ps_codec->i4_poc;
+
/* initialize entropy map */
if (i == j)
{
@@ -1656,6 +1948,9 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
memset(ps_entropy->pu1_entropy_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
/* row 0 to ht in mbs */
memset(ps_entropy->pu1_entropy_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+ /* intialize cabac tables */
+ ih264e_init_cabac_table(ps_entropy);
}
/* wd in mbs */
@@ -1720,9 +2015,6 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
ps_me_ctxt->ai2_srch_boundaries[1] =
ps_codec->s_cfg.u4_srch_rng_y;
- /* src stride */
- ps_me_ctxt->i4_src_strd = ps_codec->i4_src_strd;
-
/* rec stride */
ps_me_ctxt->i4_rec_strd = ps_codec->i4_rec_strd;
@@ -1751,7 +2043,7 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
/* qp */
ps_me_ctxt->u1_mb_qp = ps_codec->u4_frame_qp;
- if ((i == 0) && (0 == ps_codec->i4_pic_cnt))
+ if ((i == j) && (0 == ps_codec->i4_poc))
{
/* init mv bits tables */
ih264e_init_mv_bits(ps_me_ctxt);
diff --git a/encoder/ih264e_utils.h b/encoder/ih264e_utils.h
index 651dad9..27e37e8 100644
--- a/encoder/ih264e_utils.h
+++ b/encoder/ih264e_utils.h
@@ -30,6 +30,7 @@
* Harish
*
* @par List of Functions:
+* -ih264e_input_queue_update()
* -ih264e_get_min_level()
* -ih264e_get_lvl_idx()
* -ih264e_get_dpb_size()
@@ -52,6 +53,35 @@
#define IH264E_UTILS_H_
/**
+ *******************************************************************************
+ *
+ * @brief
+ * Queues the current buffer, gets back a another buffer for encoding with corrent
+ * picture type
+ *
+ * @par Description:
+ *
+ * @param[in] ps_codec
+ * Pointer to codec descriptor
+ *
+ * @param[in] ps_ive_ip
+ * Current input buffer to the encoder
+ *
+ * @param[out] ps_inp
+ * Buffer to be encoded in the current pass
+ *
+ * @returns
+ * Flag indicating if we have a pre-enc skip or not
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+WORD32 ih264e_input_queue_update(codec_t *ps_codec,
+ ive_video_encode_ip_t *ps_ive_ip,
+ inp_buf_t *ps_enc_buff);
+
+/**
*******************************************************************************
*
* @brief
@@ -61,8 +91,11 @@
* Gets the minimum level index and then gets corresponding level.
* Also used to ignore invalid levels like 2.3, 3.3 etc
*
-* @param[in] level
-* Level of the stream
+* @param[in] wd
+* Width
+*
+* @param[in] ht
+* Height
*
* @returns Level index for a given level
*
@@ -70,7 +103,7 @@
*
*******************************************************************************
*/
-WORD32 ih264e_get_min_level(WORD32 pic_size);
+WORD32 ih264e_get_min_level(WORD32 wd, WORD32 ht);
/**
*******************************************************************************
diff --git a/encoder/ih264e_version.c b/encoder/ih264e_version.c
index 3dcba8d..173f17c 100644
--- a/encoder/ih264e_version.c
+++ b/encoder/ih264e_version.c
@@ -86,18 +86,9 @@
*******************************************************************************
*/
#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor) \
- strncpy(version_string,"@(#)Id:", MAX_STRLEN); \
- strncat(version_string,codec_name, MAX_STRLEN); \
- strncat(version_string,"_", MAX_STRLEN); \
- strncat(version_string,codec_release_type, MAX_STRLEN); \
- strncat(version_string," Ver:", MAX_STRLEN); \
- strncat(version_string,codec_release_ver, MAX_STRLEN); \
- strncat(version_string," Released by ", MAX_STRLEN); \
- strncat(version_string,codec_vendor, MAX_STRLEN); \
- strncat(version_string," Build: ", MAX_STRLEN); \
- strncat(version_string,__DATE__, MAX_STRLEN); \
- strncat(version_string," @ ", MAX_STRLEN); \
- strncat(version_string,__TIME__, MAX_STRLEN);
+ snprintf(version_string, MAX_STRLEN, \
+ "@(#)Id:%s_%s Ver:%s Released by %s Build: %s @ %s", \
+ codec_name, codec_release_type, codec_release_ver, codec_vendor, __DATE__, __TIME__)
/*****************************************************************************/
/* Function Definitions */
@@ -131,9 +122,9 @@ IV_STATUS_T ih264e_get_version(CHAR *pc_version, UWORD32 u4_version_bufsize)
VERSION(ac_version_tmp, CODEC_NAME, CODEC_RELEASE_TYPE, CODEC_RELEASE_VER,
CODEC_VENDOR);
- if (u4_version_bufsize >= (strnlen(ac_version_tmp, MAX_STRLEN) + 1))
+ if (u4_version_bufsize >= (strlen(ac_version_tmp) + 1))
{
- memcpy(pc_version, ac_version_tmp, (strnlen(ac_version_tmp, MAX_STRLEN) + 1));
+ memcpy(pc_version, ac_version_tmp, (strlen(ac_version_tmp) + 1));
return IV_SUCCESS;
}
else
diff --git a/encoder/ime.c b/encoder/ime.c
index c89aaab..cfd6e81 100644
--- a/encoder/ime.c
+++ b/encoder/ime.c
@@ -50,10 +50,10 @@
/* User include files */
#include "ime_typedefs.h"
#include "ime_distortion_metrics.h"
-#include "ime_structs.h"
#include "ime_defs.h"
-#include "ime_macros.h"
+#include "ime_structs.h"
#include "ime.h"
+#include "ime_macros.h"
#include "ime_statistics.h"
/**
@@ -87,10 +87,10 @@
*
*******************************************************************************
*/
-void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt)
+void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
{
/* MB partition info */
- mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
/* lagrange parameter */
UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
@@ -106,7 +106,7 @@ void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt)
/* pointer to src macro block */
UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
- UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
/* strides */
WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
@@ -271,22 +271,24 @@ void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt)
*
*******************************************************************************
*/
+
void ime_evaluate_init_srchposn_16x16
(
- me_ctxt_t *ps_me_ctxt
+ me_ctxt_t *ps_me_ctxt,
+ WORD32 i4_reflist
)
{
UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
/* candidate mv cnt */
- UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates;
+ UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
/* list of candidate mvs */
- ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search;
+ ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
/* pointer to src macro block */
UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
- UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
/* strides */
WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
@@ -302,46 +304,15 @@ void ime_evaluate_init_srchposn_16x16
WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
/* mb partitions info */
- mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+ mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
/* mv bits */
UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
/* temp var */
- UWORD32 i, j, u4_srch_pos_idx = 0;
+ UWORD32 i, j;
+ WORD32 i4_srch_pos_idx = 0;
UWORD8 *pu1_ref = NULL;
- WORD16 mv_x, mv_y;
-
- if (0)
- {
- /************************************************************/
- /* Compute SKIP Cost */
- /************************************************************/
- mv_x = ps_mv_list[SKIP_CAND].i2_mvx;
- mv_y = ps_mv_list[SKIP_CAND].i2_mvy;
-
- /* adjust ref pointer */
- pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd);
-
- /* compute distortion */
- ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
-
- /* for skip mode cost & distortion are identical
- * But we shall add a bias to favor skip mode.
- * Doc. JVT B118 Suggests SKIP_BIAS as 16.
- * TODO : Empirical analysis of SKIP_BIAS is necessary */
-
- i4_distortion_least = i4_mb_distortion;
-
- u4_srch_pos_idx = 0;
-
-#define SKIP_BIAS 8
-
- i4_mb_cost_least = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS);
-
-#undef SKIP_BIAS
- }
-
/* Carry out a search using each of the motion vector pairs identified above as predictors. */
/* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
@@ -366,6 +337,7 @@ void ime_evaluate_init_srchposn_16x16
/* compute distortion */
ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
+
DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
/* compute cost */
i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
@@ -377,22 +349,21 @@ void ime_evaluate_init_srchposn_16x16
i4_distortion_least = i4_mb_distortion;
- u4_srch_pos_idx = i;
+ i4_srch_pos_idx = i;
}
}
}
if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
{
- ps_mb_part->u4_srch_pos_idx = u4_srch_pos_idx;
+ ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
ps_mb_part->i4_mb_cost = i4_mb_cost_least;
ps_mb_part->i4_mb_distortion = i4_distortion_least;
- ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[u4_srch_pos_idx].i2_mvx;
- ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[u4_srch_pos_idx].i2_mvy;
+ ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
+ ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
}
}
-
/**
*******************************************************************************
*
@@ -419,11 +390,12 @@ void ime_evaluate_init_srchposn_16x16
*/
void ime_full_pel_motion_estimation_16x16
(
- me_ctxt_t *ps_me_ctxt
+ me_ctxt_t *ps_me_ctxt,
+ WORD32 i4_ref_list
)
{
/* mb part info */
- mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
/******************************************************************/
/* Modify Search range about initial candidate instead of zero mv */
@@ -448,19 +420,14 @@ void ime_full_pel_motion_estimation_16x16
switch (ps_me_ctxt->u4_me_speed_preset)
{
case DMND_SRCH:
- ime_diamond_search_16x16(ps_me_ctxt);
+ ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
break;
default:
assert(0);
break;
}
-
- ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx << 2;
- ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy << 2;
-
}
-
/**
*******************************************************************************
*
@@ -487,13 +454,13 @@ void ime_full_pel_motion_estimation_16x16
*/
void ime_sub_pel_motion_estimation_16x16
(
- me_ctxt_t *ps_me_ctxt
+ me_ctxt_t *ps_me_ctxt,
+ WORD32 i4_reflist
)
{
/* pointers to src & ref macro block */
UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
-
/* pointers to ref. half pel planes */
UWORD8 *pu1_ref_mb_half_x;
UWORD8 *pu1_ref_mb_half_y;
@@ -507,10 +474,10 @@ void ime_sub_pel_motion_estimation_16x16
/* strides */
WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
- WORD32 i4_ref_strd = ps_me_ctxt->u4_hp_buf_strd;
+ WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
/* mb partitions info */
- mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
/* SAD(distortion metric) of an mb */
WORD32 i4_mb_distortion;
@@ -523,7 +490,6 @@ void ime_sub_pel_motion_estimation_16x16
/*Best half pel buffer*/
UWORD8 *pu1_best_hpel_buf = NULL;
-
/* mv bits */
UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
@@ -550,6 +516,8 @@ void ime_sub_pel_motion_estimation_16x16
WORD32 i, j;
WORD32 ai4_sad[8];
+ WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
+
i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
@@ -575,10 +543,9 @@ void ime_sub_pel_motion_estimation_16x16
/* Hence corresponding adjustments made here */
/**************************************************************/
- pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->pu1_half_x + 1;
- pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->pu1_half_y + 1 + i4_ref_strd;
- pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->pu1_half_xy + 1 + i4_ref_strd;
-
+ pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
+ pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
+ pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
pu1_ref_mb_half_y,
@@ -611,8 +578,10 @@ void ime_sub_pel_motion_estimation_16x16
i2_mv_u_y = mv_y_tmp;
#ifndef HP_PL /*choosing whether left or right half_x*/
- ps_me_ctxt->pu1_half_x = pu1_ref_mb_half_x_temp - i;
+ ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
+
+ i4_srch_pos_idx = 0;
#endif
}
@@ -643,8 +612,10 @@ void ime_sub_pel_motion_estimation_16x16
i2_mv_u_y = mv_y_tmp;
#ifndef HP_PL/*choosing whether top or bottom half_y*/
- ps_me_ctxt->pu1_half_y = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
+ ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
+
+ i4_srch_pos_idx = 1;
#endif
}
@@ -678,23 +649,27 @@ void ime_sub_pel_motion_estimation_16x16
i2_mv_u_y = mv_y_tmp;
#ifndef HP_PL /*choosing between four half_xy */
- ps_me_ctxt->pu1_half_xy = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
+ ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
+
+ i4_srch_pos_idx = 2;
#endif
}
}
}
- ps_mb_part->i4_mb_cost = i4_mb_cost_least;
- ps_mb_part->i4_mb_distortion = i4_distortion_least;
- ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
- ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
- ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
-
+ if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
+ {
+ ps_mb_part->i4_mb_cost = i4_mb_cost_least;
+ ps_mb_part->i4_mb_distortion = i4_distortion_least;
+ ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
+ ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
+ ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
+ ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
+ }
}
-
/**
*******************************************************************************
*
@@ -705,132 +680,105 @@ void ime_sub_pel_motion_estimation_16x16
* @param[in] ps_me_ctxt
* pointer to me ctxt
*
-* @param[in] ps_skip_mv
-* pointer to skip mv
*
* @returns none
*
* @remarks
* NOTE: while computing the skip cost, do not enable early exit from compute
* sad function because, a negative bias gets added later
+* Note tha the last ME candidate in me ctxt is taken as skip motion vector
*
*******************************************************************************
*/
void ime_compute_skip_cost
(
me_ctxt_t *ps_me_ctxt,
- void *pv_skip_mv,
+ ime_mv_t *ps_skip_mv,
mb_part_ctxt *ps_smb_part_info,
- UWORD32 u4_use_stat_sad
+ UWORD32 u4_use_stat_sad,
+ WORD32 i4_reflist,
+ WORD32 i4_is_slice_type_b
)
{
- /* pointers to src & ref macro block */
- UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
- UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
-
- /* strides */
- WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
- WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
-
- /* enabled fast sad computation */
- UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
-
/* SAD(distortion metric) of an mb */
WORD32 i4_mb_distortion;
/* cost = distortion + u4_lambda_motion * rate */
WORD32 i4_mb_cost;
- /* Motion vectors in full-pel units */
- WORD16 mv_x, mv_y;
-
- /* lambda - lagrange constant */
- UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
-
- /* skip mv */
- ime_mv_t *ps_skip_mv = pv_skip_mv, s_clip_skip_mv;
-
/* temp var */
UWORD8 *pu1_ref = NULL;
- UWORD32 u4_is_nonzero;
- s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, ps_skip_mv->i2_mvx);
- s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, ps_skip_mv->i2_mvy);
+ ime_mv_t s_skip_mv;
- if ((s_clip_skip_mv.i2_mvx != ps_skip_mv->i2_mvx) ||
- (s_clip_skip_mv.i2_mvy != ps_skip_mv->i2_mvy))
- {
- /* skip motion vector not with in bounds */
- /* it is possible that mv is already evaluated */
- return ;
- }
+ s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2;
+ s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2;
- mv_x = (ps_skip_mv->i2_mvx + 2) >> 2;
- mv_y = (ps_skip_mv->i2_mvy + 2) >> 2;
-
- if ((mv_x << 2) != ps_skip_mv->i2_mvx || (mv_y << 2) != ps_skip_mv->i2_mvy)
+ /* Check if the skip mv is out of bounds or subpel */
{
+ /* skip mv */
+ ime_mv_t s_clip_skip_mv;
+ s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
+ s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
- return ;
+ if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
+ (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) ||
+ (ps_skip_mv->i2_mvx & 0x3) ||
+ (ps_skip_mv->i2_mvy & 0x3))
+ {
+ return ;
+ }
+ }
- }
- else
- {
- /* adjust ref pointer */
- pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd);
- }
+ /* adjust ref pointer */
+ pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx
+ + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd);
if(u4_use_stat_sad == 1)
{
- ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd,
- ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion,&u4_is_nonzero);
-
- /*
- *NOTE The check here is two fold
- * One is checking if the sad has been reached, ie min sad, which a configurable parameter
- * If that is reached,we need not do any mode evaluation
- * Similary if we find a distortion of zero there is no point of doing any further mode evaluation
- * as sad is a non negative quantity
- * hence in this case too, no further evaluation is necessary
- */
- /*
- *NOTE in case we need to disable the zero check using satdq,
- * we need only to set the u4_is_zero to a non zero value
- */
- if(u4_is_nonzero==0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
+ UWORD32 u4_is_nonzero;
+
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
+ ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
+ ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh,
+ &i4_mb_distortion, &u4_is_nonzero);
+
+ if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
{
- ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/
- ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0)?0:i4_mb_distortion;
+ ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
+ ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
}
}
else
{
- ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, INT_MAX, &i4_mb_distortion);
+ ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
+ ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
+ ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion);
if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
{
ps_me_ctxt->i4_min_sad = i4_mb_distortion;
- ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/
+ ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
}
}
+
/* for skip mode cost & distortion are identical
* But we shall add a bias to favor skip mode.
* Doc. JVT B118 Suggests SKIP_BIAS as 16.
* TODO : Empirical analysis of SKIP_BIAS is necessary */
-#define SKIP_BIAS 8
- i4_mb_cost = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS);
-#undef SKIP_BIAS
+
+ i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b));
if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
{
ps_smb_part_info->i4_mb_cost = i4_mb_cost;
ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
- ps_smb_part_info->s_mv_curr.i2_mvx = ps_skip_mv->i2_mvx;
- ps_smb_part_info->s_mv_curr.i2_mvy = ps_skip_mv->i2_mvy;
+ ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
+ ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
}
}
diff --git a/encoder/ime.h b/encoder/ime.h
index 5c039e8..17912d4 100644
--- a/encoder/ime.h
+++ b/encoder/ime.h
@@ -47,6 +47,19 @@
*/
#define NUM_LAYERS 16
+/**
+******************************************************************************
+ * @brief Skip Bias value for P slice
+******************************************************************************
+ */
+#define SKIP_BIAS_P 2
+
+/**
+******************************************************************************
+ * @brief Skip Bias value for B slice
+******************************************************************************
+ */
+#define SKIP_BIAS_B 16
/*****************************************************************************/
/* Extern Function Declarations */
@@ -84,8 +97,8 @@
* computational feasibility. This is only for quality eval purposes.
*
*******************************************************************************
-*/
-extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt);
+ */
+extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist);
/**
@@ -113,10 +126,8 @@ extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt);
*
*******************************************************************************
*/
-extern void ime_evaluate_init_srchposn_16x16
- (
- me_ctxt_t *ps_me_ctxt
- );
+extern void ime_evaluate_init_srchposn_16x16(me_ctxt_t *ps_me_ctxt,
+ WORD32 i4_reflist);
/**
*******************************************************************************
@@ -142,10 +153,8 @@ extern void ime_evaluate_init_srchposn_16x16
*
*******************************************************************************
*/
-extern void ime_full_pel_motion_estimation_16x16
- (
- me_ctxt_t *ps_me_ctxt
- );
+extern void ime_full_pel_motion_estimation_16x16(me_ctxt_t *ps_me_ctxt,
+ WORD32 i4_ref_list);
/**
*******************************************************************************
@@ -171,10 +180,8 @@ extern void ime_full_pel_motion_estimation_16x16
*
*******************************************************************************
*/
-extern void ime_sub_pel_motion_estimation_16x16
- (
- me_ctxt_t *ps_me_ctxt
- );
+extern void ime_sub_pel_motion_estimation_16x16(me_ctxt_t *ps_me_ctxt,
+ WORD32 i4_reflist);
/**
*******************************************************************************
@@ -189,6 +196,9 @@ extern void ime_sub_pel_motion_estimation_16x16
* @param[in] ps_skip_mv
* pointer to skip mv
*
+ @param[in] is_slice_type_b
+* Whether slice type is BSLICE or not
+
* @returns none
*
* @remarks
@@ -197,13 +207,12 @@ extern void ime_sub_pel_motion_estimation_16x16
*
*******************************************************************************
*/
-extern void ime_compute_skip_cost
- (
- me_ctxt_t *ps_me_ctxt,
- void *pv_skip_mv,
- mb_part_ctxt *ps_smb_part_info,
- UWORD32 u4_use_stat_sad
- );
+extern void ime_compute_skip_cost(me_ctxt_t *ps_me_ctxt,
+ ime_mv_t *ps_skip_mv,
+ mb_part_ctxt *ps_smb_part_info,
+ UWORD32 u4_use_stat_sad,
+ WORD32 i4_reflist,
+ WORD32 is_slice_type_b);
#endif /* IME_H_ */
diff --git a/encoder/ime_defs.h b/encoder/ime_defs.h
index 14d9c55..f82018d 100644
--- a/encoder/ime_defs.h
+++ b/encoder/ime_defs.h
@@ -55,5 +55,8 @@
#define NSTEP_SRCH 50
#define HEX_SRCH 75
+#define MAX_NUM_REFLIST 2
+#define SUBPEL_BUFF_CNT 4
+
#endif /*_IME_DEFS_H_*/
diff --git a/encoder/ime_distortion_metrics.c b/encoder/ime_distortion_metrics.c
index 23a1fbc..f8c44df 100644
--- a/encoder/ime_distortion_metrics.c
+++ b/encoder/ime_distortion_metrics.c
@@ -1260,3 +1260,4 @@ void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
else *sig_sad_dc = 1;
}
+
diff --git a/encoder/ime_distortion_metrics.h b/encoder/ime_distortion_metrics.h
index a30e1fc..5056ba0 100644
--- a/encoder/ime_distortion_metrics.h
+++ b/encoder/ime_distortion_metrics.h
@@ -130,6 +130,7 @@ ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter;
ime_compute_satqd_8x16_chroma_ft ime_compute_satqd_8x16_chroma;
ime_compute_satqd_16x16_lumaintra_ft ime_compute_satqd_16x16_lumaintra;
+
/*SSE4.2 Declarations*/
ime_compute_sad_ft ime_compute_sad_16x16_sse42;
ime_compute_sad_ft ime_compute_sad_16x16_fast_sse42;
@@ -164,7 +165,6 @@ ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_av8;
ime_compute_sad_stat ime_compute_16x16_sad_stat_av8;
ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_av8;
-
#endif /* IME_DISTORTION_METRICS_H_ */
diff --git a/encoder/ime_structs.h b/encoder/ime_structs.h
index 7819b91..9baacb3 100644
--- a/encoder/ime_structs.h
+++ b/encoder/ime_structs.h
@@ -90,7 +90,7 @@ typedef struct
/**
* Search position for least cost among the list of candidates
*/
- UWORD32 u4_srch_pos_idx;
+ WORD32 i4_srch_pos_idx;
/**
* Search position for least cost among the list of candidates
@@ -116,9 +116,9 @@ typedef struct
typedef struct
{
/**
- * Ref pointer to current MB luma
+ * Ref pointer to current MB luma for each ref list
*/
- UWORD8 *pu1_ref_buf_luma;
+ UWORD8 *apu1_ref_buf_luma[MAX_NUM_REFLIST];
/**
* Src pointer to current MB luma
@@ -190,13 +190,13 @@ typedef struct
/**
* Number of valid candidates for the Initial search position
*/
- UWORD32 u4_num_candidates;
+ UWORD32 u4_num_candidates[MAX_NUM_REFLIST + 1];
/**
- * Motion vector predictors derived from neighbouring
+ * Motion vector predictors derived from neighboring
* blocks for each of the six block partitions
*/
- ime_mv_t as_mv_init_search[5];
+ ime_mv_t as_mv_init_search[MAX_NUM_REFLIST + 1][6];
/**
* mv bits
@@ -247,10 +247,17 @@ typedef struct
UWORD32 u4_left_is_skip;
+ /* skip_type can be PREDL0, PREDL1 or BIPRED */
+ WORD32 i4_skip_type;
+
+ /* Biasing given for skip prediction */
+ WORD32 i4_skip_bias[2];
+
/**
* Structure to store the MB partition info
+ * We need 1(L0)+1(L1)+1(bi)
*/
- mb_part_ctxt s_mb_part;
+ mb_part_ctxt as_mb_part[MAX_NUM_REFLIST + 1];
/*
* Threshold to compare the sad with
*/
@@ -277,27 +284,17 @@ typedef struct
UWORD8 u1_mb_qp;
/*
- * Buffers for holding half_x , half_y and half_xy
- * values when halfpel generation
- * for the entire plane is not enabled
+ * Buffers for holding subpel and bipred temp buffers
*/
- UWORD8 *pu1_half_x;
- UWORD8 *pu1_half_y;
- UWORD8 *pu1_half_xy;
+ UWORD8 *apu1_subpel_buffs[SUBPEL_BUFF_CNT];
+ WORD32 u4_subpel_buf_strd;
/*
* Buffers to store the best halfpel plane*
*/
UWORD8 *pu1_hpel_buf;
- /*
- * Stride for hpel buffer
- */
- UWORD32 u4_hpel_buf_strd;
-
- WORD32 u4_hp_buf_strd;
-
} me_ctxt_t;
diff --git a/encoder/irc_bit_allocation.c b/encoder/irc_bit_allocation.c
index 1dfd9de..6f52970 100644
--- a/encoder/irc_bit_allocation.c
+++ b/encoder/irc_bit_allocation.c
@@ -251,7 +251,7 @@ WORD32 irc_ba_num_fill_use_free_memtab(bit_allocation_t **pps_bit_allocation,
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static bit_allocation_t s_bit_allocation_temp;
+ bit_allocation_t s_bit_allocation_temp;
/*
* Hack for all alloc, during which we don't have any state memory.
diff --git a/encoder/irc_cbr_buffer_control.c b/encoder/irc_cbr_buffer_control.c
index c179a28..9febbc8 100644
--- a/encoder/irc_cbr_buffer_control.c
+++ b/encoder/irc_cbr_buffer_control.c
@@ -79,7 +79,7 @@ WORD32 irc_cbr_buffer_num_fill_use_free_memtab(cbr_buffer_t **pps_cbr_buffer,
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0, i;
- static cbr_buffer_t s_cbr_buffer_temp;
+ cbr_buffer_t s_cbr_buffer_temp;
/*
* Hack for all alloc, during which we don't have any state memory.
diff --git a/encoder/irc_common.h b/encoder/irc_common.h
index c341de4..448fad3 100644
--- a/encoder/irc_common.h
+++ b/encoder/irc_common.h
@@ -97,7 +97,7 @@ typedef float number_t;
/* The ratios between I to P and P to B Qp is specified here */
#define K_Q 4
#define I_TO_P_RATIO (19) /* In K_Q Q factor */
-#define P_TO_B_RATIO (21) /* In K_Q Q factor */
+#define P_TO_B_RATIO (32) /* In K_Q Q factor */
#define P_TO_I_RATIO (13) /* In K_Q Q factor */
#endif /* _RC_COMMON_H_ */
diff --git a/encoder/irc_est_sad.c b/encoder/irc_est_sad.c
index 0d8abc2..97a0b68 100644
--- a/encoder/irc_est_sad.c
+++ b/encoder/irc_est_sad.c
@@ -58,7 +58,7 @@ WORD32 irc_est_sad_num_fill_use_free_memtab(est_sad_t **pps_est_sad,
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static est_sad_t s_est_sad;
+ est_sad_t s_est_sad;
/* Hack for al alloc, during which we don't have any state memory.
* Dereferencing can cause issues
diff --git a/encoder/irc_fixed_point_error_bits.c b/encoder/irc_fixed_point_error_bits.c
index 42dcfc5..1c35685 100644
--- a/encoder/irc_fixed_point_error_bits.c
+++ b/encoder/irc_fixed_point_error_bits.c
@@ -62,7 +62,7 @@ WORD32 irc_error_bits_num_fill_use_free_memtab(error_bits_t **pps_error_bits,
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static error_bits_t s_error_bits_temp;
+ error_bits_t s_error_bits_temp;
/*
* Hack for all alloc, during which we don't have any state memory.
diff --git a/encoder/irc_mb_model_based.c b/encoder/irc_mb_model_based.c
index 880ee19..b8e3d1b 100644
--- a/encoder/irc_mb_model_based.c
+++ b/encoder/irc_mb_model_based.c
@@ -47,7 +47,7 @@ WORD32 irc_mbrc_num_fill_use_free_memtab(mb_rate_control_t **pps_mb_rate_control
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static mb_rate_control_t s_mb_rate_control_temp;
+ mb_rate_control_t s_mb_rate_control_temp;
/*
* Hack for al alloc, during which we don't have any state memory.
diff --git a/encoder/irc_picture_type.c b/encoder/irc_picture_type.c
index 186188c..2a91572 100644
--- a/encoder/irc_picture_type.c
+++ b/encoder/irc_picture_type.c
@@ -225,7 +225,7 @@ WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_t **pps_pic_handli
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static pic_handling_t s_pic_handling_temp;
+ pic_handling_t s_pic_handling_temp;
/*
* Hack for al alloc, during which we dont have any state memory.
@@ -253,6 +253,7 @@ WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_t **pps_pic_handli
*****************************************************************************/
void irc_init_pic_handling(pic_handling_t *ps_pic_handling,
WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
WORD32 i4_max_inter_frm_int,
WORD32 i4_is_gop_closed)
{
@@ -262,7 +263,7 @@ void irc_init_pic_handling(pic_handling_t *ps_pic_handling,
/* Checks */
/* Codec Parameters */
ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int;
- ps_pic_handling->i4_inter_frm_int = i4_max_inter_frm_int;
+ ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int;
ps_pic_handling->i4_max_inter_frm_int = i4_max_inter_frm_int;
ps_pic_handling->i4_is_gop_closed = i4_is_gop_closed;
@@ -278,6 +279,10 @@ void irc_init_pic_handling(pic_handling_t *ps_pic_handling,
/* Indices to the pic_stack */
ps_pic_handling->i4_ref_pic_idx = 0;
+ /*
+ * B frame index should be ref_frame_num,
+ * which is 2 in out case
+ */
ps_pic_handling->i4_b_pic_idx = 2;
ps_pic_handling->i4_prev_b_pic_idx = 2;
@@ -302,7 +307,7 @@ void irc_init_pic_handling(pic_handling_t *ps_pic_handling,
/* Variables on which the bit allocation is dependent */
/* Get the pic distribution in the gop */
find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int,
- i4_max_inter_frm_int, i4_is_gop_closed,
+ i4_inter_frm_int, i4_is_gop_closed,
&ps_pic_handling->i4_b_in_incomp_subgop,
&ps_pic_handling->i4_extra_p);
@@ -528,8 +533,7 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id)
* 3)The new inter-frm-interval won't cross the intra_frm_interval
*/
if((ps_pic_handling->i4_change_in_inter_frm_int == 1)
- && ((i4_buf_pic_no % i4_inter_frm_int == 1)
- || (i4_pic_disp_order_no == 1) || (i4_inter_frm_int == 1)))
+ && ((i4_buf_pic_no % i4_inter_frm_int == 1)|| (i4_pic_disp_order_no == 1) || (i4_inter_frm_int == 1)))
{
/*
* Condition which checks if the new inter_frm_int will cross the
@@ -540,10 +544,31 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id)
if(i4_condn_for_change_in_inter_frm_int)
{
+ /*
+ * If there is a change in inter frame interval. We should set the b
+ * frame IDX to the (num ref frame - num ref frame in buf)+ i4_ref_pic_idx
+ * Since our case we have a structure of I B P or I B...B P only
+ * we have three cases
+ * 1) current incoming frame is I. Then we have to leave space for
+ * current I and next P hence write b idx as to ref idx + 2
+ * 2) Current incoming frame is B. In that case, we have I in buffer.
+ * Only one P needs space hence write b idx as ref idx +1
+ * 3) Current incoming frame is P. In that case we are at the end of
+ * gop [sub gop?] and we have to leave space for next gops I and P.
+ * Thus b idx = ref idx + 2
+ *
+ * In case of an closed Gop. The last frame has to be forced to be a P.
+ * Hence we may have problems in that case.
+ *
+ * Also this has the implicit assumption of only 2 ref frames
+ */
+ WORD32 i4_is_curr_frm_b = (i4_buf_pic_no % i4_new_inter_frm_int)&&
+ !(i4_is_gop_closed && (i4_b_count_in_gop == i4_b_frms_in_prd));
+
/*If the inter_frm_int = 1, then the b_pic_idx needs to be modified */
if(i4_inter_frm_int == 1)
{
- ps_pic_handling->i4_b_pic_idx = (1
+ ps_pic_handling->i4_b_pic_idx = ((i4_is_curr_frm_b ? 1 : 2)
+ ps_pic_handling->i4_ref_pic_idx)
% (i4_max_inter_frm_int + 1);
}
@@ -811,7 +836,42 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id)
i4_pic_disp_order_no++;
i4_buf_pic_no++;
+#if 0
/* For any gop */
+ /* BUG FIX
+ * This piece of code resets the gop upon I frame(?)
+ * This introduces a problem of GOP getting reset not at I frames as it should be
+ * The reason AFAIK is that
+ * 1) This code uses i4_pic_disp_order_no to reset GOP. I assume it computes
+ * if are at GOP boundary and does it, but not sure
+ * 2) The frames rmainign in GOP is done in post enc as it should be.
+ *
+ * Also ps_pic_handling->i4_pic_disp_order_no is incremented when a pic is added
+ * to stack becuase the additon is in disp order while poping is in encode order
+ *
+ * SUppose there is a deay od 1 frame between queue and encode.
+ * then he timing will be. Assume a GOP of IPPIPP
+ *
+ * Input buff Input to qu Output buf/encode buff remaining pic in gop
+ * 1 I I NA rest to 1 2
+ * 2 P P I 0 2
+ * 3 P P P 0 1
+ * 4 I I P reset to 1 2
+ * 5 P P I 1 1
+ * 6 P P P 1 0
+ * 7 NA NA P
+ *
+ * Hence our gop gets reset at I(1) and I(4) in the RC.thus the reaming pic in gop
+ * count will be as shown. We can clearly see that the GOP gets reset at I(4) .Hence
+ * for the correpondinng QP for output buf p(4) will be that of an I frame.
+ *
+ * By hiding this I hope to fix this problem. But Iam not sure exaclty.
+ * This needs to be investigated further
+ *
+ * By hiding this most likely we are in effect disabling the dynanic
+ * update of gop params.
+ */
+
if(ps_pic_handling->i4_pic_disp_order_no
== (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed)
* ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)))
@@ -831,6 +891,7 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id)
- ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
}
}
+#endif
/* End of GOP updates */
if(i4_pic_disp_order_no == (i4_p_frms_in_prd + i4_b_frms_in_prd + 1))
@@ -856,10 +917,8 @@ void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id)
/* Updating the vars which work on the encoded pics */
/* For the first gop */
- if(((ps_pic_handling->i4_is_first_gop)
- && (ps_pic_handling->i4_pic_disp_order_no
- == (i4_max_inter_frm_int - 1)))
- || (i4_intra_frm_int == 1))
+ if ((ps_pic_handling->i4_is_first_gop)
+ && (ps_pic_handling->i4_pic_disp_order_no == 0))
{
ps_pic_handling->i4_coded_pic_no = 0;
ps_pic_handling->i4_stack_count = 0;
diff --git a/encoder/irc_picture_type.h b/encoder/irc_picture_type.h
index 1af5424..021ee33 100644
--- a/encoder/irc_picture_type.h
+++ b/encoder/irc_picture_type.h
@@ -34,6 +34,7 @@ WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_handle *pps_pic_ha
void irc_init_pic_handling(pic_handling_handle ps_pic_handling,
WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
WORD32 i4_max_inter_frm_int,
WORD32 i4_is_gop_closed);
diff --git a/encoder/irc_rate_control_api.c b/encoder/irc_rate_control_api.c
index 6c6586e..95befce 100644
--- a/encoder/irc_rate_control_api.c
+++ b/encoder/irc_rate_control_api.c
@@ -43,6 +43,10 @@
#include "irc_rate_control_api_structs.h"
#include "irc_trace_support.h"
+
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#define MAX(a,b) (((a) > (b)) ? (a) : (b))
+
#define DEV_Q 4 /*Q format(Shift) for Deviation range factor */
#define HI_DEV_FCTR 22 /* 1.4*16 */
#define LO_DEV_FCTR 12 /* 0.75*16 */
@@ -73,7 +77,7 @@ WORD32 irc_rate_control_num_fill_use_free_memtab(rate_control_handle *pps_rate_c
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0, i;
- static rate_control_api_t s_temp_rc_api;
+ rate_control_api_t s_temp_rc_api;
/*
* Hack for al alloc, during which we dont have any state memory.
@@ -148,6 +152,7 @@ void irc_initialise_rate_control(rate_control_api_t *ps_rate_control_api,
UWORD32 u4_frame_rate,
UWORD32 u4_max_delay,
UWORD32 u4_intra_frame_interval,
+ WORD32 i4_inter_frm_int,
UWORD8 *pu1_init_qp,
UWORD32 u4_max_vbv_buff_size,
WORD32 i4_max_inter_frm_int,
@@ -172,7 +177,8 @@ void irc_initialise_rate_control(rate_control_api_t *ps_rate_control_api,
/* Initialize the pic_handling module */
irc_init_pic_handling(ps_rate_control_api->ps_pic_handling,
- (WORD32)u4_intra_frame_interval, i4_max_inter_frm_int,
+ (WORD32)u4_intra_frame_interval,
+ i4_inter_frm_int, i4_max_inter_frm_int,
i4_is_gop_closed);
/*** Initialize the rate control modules ***/
@@ -597,20 +603,25 @@ UWORD8 irc_get_frame_level_qp(rate_control_api_t *ps_rate_control_api,
}
}
- hi_dev_qp = GET_HI_DEV_QP(prev_qp);
/*
- * For lower QPs due to scale factor and fixed point arithmetic,
- * the hi_dev_qp can be same as that of the prev qp and in which
- * case it gets stuck in the lower most qp and thus not allowing
- * QPs not to change. To avoid this,for lower qps the hi_dev_qp
- * should be made slightly more than prev_qp
+ * Due to the inexact nature of translation tables, QP may
+ * get locked at some values. This is because of the inexactness of
+ * the tables causing a change of +-1 in back and forth translations.
+ * In that case, if we restrict the QP swing to +-1, we will get
+ * the lock up condition. Hence we make it such that we will have
+ * a swing of atleast +- 2 from prev_qp
*/
- if(prev_qp == hi_dev_qp)
- {
- hi_dev_qp += 1;
- }
+
lo_dev_qp = GET_LO_DEV_QP(prev_qp);
- u1_frame_qp = (UWORD8)CLIP_QP((WORD32)u1_frame_qp, hi_dev_qp, lo_dev_qp);
+ lo_dev_qp = MIN(lo_dev_qp, prev_qp - 2);
+ lo_dev_qp = MAX(lo_dev_qp, ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1)]);
+
+ hi_dev_qp = GET_HI_DEV_QP(prev_qp);
+ hi_dev_qp = MAX(hi_dev_qp, prev_qp + 2);
+ hi_dev_qp = MIN(hi_dev_qp, ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1) + 1]);
+
+ u1_frame_qp = (UWORD8)CLIP_QP((WORD32)u1_frame_qp, hi_dev_qp , lo_dev_qp);
+
}
else
{
diff --git a/encoder/irc_rate_control_api.h b/encoder/irc_rate_control_api.h
index 0173037..4b24ece 100644
--- a/encoder/irc_rate_control_api.h
+++ b/encoder/irc_rate_control_api.h
@@ -42,6 +42,7 @@ void irc_initialise_rate_control(rate_control_handle ps_rate_control_api,
UWORD32 u4_frame_rate,
UWORD32 u4_max_delay,
UWORD32 u4_intra_frame_interval,
+ WORD32 i4_inter_frm_int,
UWORD8 *pu1_init_qp,
UWORD32 u4_max_vbv_buff_size,
WORD32 i4_max_inter_frm_int,
diff --git a/encoder/irc_rd_model.c b/encoder/irc_rd_model.c
index f5c0737..62c7811 100644
--- a/encoder/irc_rd_model.c
+++ b/encoder/irc_rd_model.c
@@ -55,7 +55,7 @@ WORD32 irc_rd_model_num_fill_use_free_memtab(rc_rd_model_t **pps_rc_rd_model,
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static rc_rd_model_t s_rc_rd_model_temp;
+ rc_rd_model_t s_rc_rd_model_temp;
/*
* Hack for al alloc, during which we don't have any state memory.
@@ -115,11 +115,6 @@ static UWORD8 find_model_coeffs(UWORD32 *pi4_res_bits,
UWORD8 u1_num_frms_used = 0;
UWORD8 u1_frm_indx;
-#if !(ENABLE_QUAD_RC_MODEL||ENABLE_LIN_MODEL_WITH_INTERCEPT)
- UNUSED(pu1_num_skips);
- UNUSED(pmc_model_coeff);
- UNUSED(pmc_model_coeff_lin);
-#endif
float sum_y = 0;
float sum_x_y = 0;
float sum_x2_y = 0;
@@ -131,6 +126,12 @@ static UWORD8 find_model_coeffs(UWORD32 *pi4_res_bits,
float x0, y0;
float model_coeff_a = 0.0, model_coeff_b = 0.0, model_coeff_c = 0.0;
+#if !(ENABLE_QUAD_RC_MODEL||ENABLE_LIN_MODEL_WITH_INTERCEPT)
+ UNUSED(pu1_num_skips);
+ UNUSED(pmc_model_coeff);
+ UNUSED(pmc_model_coeff_lin);
+#endif
+
for(i = 0; i < u1_num_frms; i++)
{
if(-1 == pi1_frame_index[i])
diff --git a/encoder/irc_vbr_storage_vbv.c b/encoder/irc_vbr_storage_vbv.c
index 23e9959..aaf0d6e 100644
--- a/encoder/irc_vbr_storage_vbv.c
+++ b/encoder/irc_vbr_storage_vbv.c
@@ -73,7 +73,7 @@ WORD32 irc_vbr_vbv_num_fill_use_free_memtab(vbr_storage_vbv_t **pps_vbr_storage_
ITT_FUNC_TYPE_E e_func_type)
{
WORD32 i4_mem_tab_idx = 0;
- static vbr_storage_vbv_t s_vbr_storage_vbv_temp;
+ vbr_storage_vbv_t s_vbr_storage_vbv_temp;
/*
* Hack for al alloc, during which we don't have any state memory.
diff --git a/encoder/ithread.h b/encoder/ithread.h
deleted file mode 100644
index 82170a5..0000000
--- a/encoder/ithread.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/******************************************************************************
- *
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- *****************************************************************************
- * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
-*/
-/*****************************************************************************/
-/* */
-/* File Name : ithread.h */
-/* */
-/* Description : This file contains all the necessary structure and */
-/* enumeration definitions needed for the Application */
-/* Program Interface(API) of the */
-/* Thread Abstraction Layer */
-/* */
-/* List of Functions : ithread_get_handle_size() */
-/* ithread_get_mutex_lock_size() */
-/* ithread_create() */
-/* ithread_exit() */
-/* ithread_join() */
-/* ithread_get_mutex_struct_size() */
-/* ithread_mutex_init() */
-/* ithread_mutex_destroy() */
-/* ithread_mutex_lock() */
-/* ithread_mutex_unlock() */
-/* ithread_yield() */
-/* ithread_sleep() */
-/* ithread_msleep() */
-/* ithread_usleep() */
-/* ithread_get_sem_struct_size() */
-/* ithread_sem_init() */
-/* ithread_sem_post() */
-/* ithread_sem_wait() */
-/* ithread_sem_destroy() */
-/* ithread_set_affinity() */
-/* */
-/* Issues / Problems : None */
-/* */
-/* Revision History : */
-/* */
-/* DD MM YYYY Author(s) Changes */
-/* 06 09 2012 Harish Initial Version */
-/* */
-/*****************************************************************************/
-
-#ifndef _ITHREAD_H_
-#define _ITHREAD_H_
-
-UWORD32 ithread_get_handle_size(void);
-
-UWORD32 ithread_get_mutex_lock_size(void);
-
-WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument);
-
-void ithread_exit(void *val_ptr);
-
-WORD32 ithread_join(void *thread_id, void ** val_ptr);
-
-WORD32 ithread_get_mutex_struct_size(void);
-
-WORD32 ithread_mutex_init(void *mutex);
-
-WORD32 ithread_mutex_destroy(void *mutex);
-
-WORD32 ithread_mutex_lock(void *mutex);
-
-WORD32 ithread_mutex_unlock(void *mutex);
-
-void ithread_yield(void);
-
-void ithread_sleep(UWORD32 u4_time);
-
-void ithread_msleep(UWORD32 u4_time_ms);
-
-void ithread_usleep(UWORD32 u4_time_us);
-
-UWORD32 ithread_get_sem_struct_size(void);
-
-WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value);
-
-WORD32 ithread_sem_post(void *sem);
-
-WORD32 ithread_sem_wait(void *sem);
-
-WORD32 ithread_sem_destroy(void *sem);
-
-WORD32 ithread_set_affinity(WORD32 core_id);
-#endif /* _ITHREAD_H_ */
diff --git a/encoder/ive2.h b/encoder/ive2.h
index 8cb0fd1..7a543bb 100644
--- a/encoder/ive2.h
+++ b/encoder/ive2.h
@@ -293,7 +293,7 @@ typedef struct
UWORD32 u4_max_bitrate;
/** Maximum number of consecutive B frames */
- UWORD32 u4_max_num_bframes;
+ UWORD32 u4_num_bframes;
/** Content type Interlaced/Progressive */
IV_CONTENT_TYPE_T e_content_type;
@@ -394,6 +394,15 @@ typedef struct
/* encoded frame type */
UWORD32 u4_encoded_frame_type;
+ /** Flag to indicate if this is the last output from the encoder */
+ UWORD32 u4_is_last;
+
+ /** Lower 32bits of input time stamp */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of input time stamp */
+ UWORD32 u4_timestamp_high;
+
/** Descriptor for input raw buffer freed from codec */
iv_raw_buf_t s_inp_buf;
@@ -1339,9 +1348,6 @@ typedef struct
/** IDR frame interval */
UWORD32 u4_idr_frm_interval;
- /** consecutive B frames */
- UWORD32 u4_num_b_frames;
-
/** Lower 32bits of time stamp corresponding to input buffer,
* from which this command takes effect */
UWORD32 u4_timestamp_low;
@@ -1428,6 +1434,9 @@ typedef struct
* from which this command takes effect */
UWORD32 u4_timestamp_high;
+ /** Entropy coding mode flag: 0-CAVLC, 1-CABAC */
+ UWORD32 u4_entropy_coding_mode;
+
}ive_ctl_set_profile_params_ip_t;
/** Output structure : Set Profile Params */
diff --git a/encoder/mips/ih264e_function_selector.c b/encoder/mips/ih264e_function_selector.c
index 58ec4d0..980a744 100644
--- a/encoder/mips/ih264e_function_selector.c
+++ b/encoder/mips/ih264e_function_selector.c
@@ -58,8 +58,8 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -68,14 +68,15 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
-
+#include "ih264_cabac_tables.h"
#include "ih264_macros.h"
#include "ih264_platform_macros.h"
-#include "ih264e_defs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
#include "ih264e_platform_macros.h"
/**
diff --git a/encoder/x86/ih264e_function_selector.c b/encoder/x86/ih264e_function_selector.c
index 429cdab..b0acb19 100644
--- a/encoder/x86/ih264e_function_selector.c
+++ b/encoder/x86/ih264e_function_selector.c
@@ -58,8 +58,8 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -68,14 +68,15 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
-
+#include "ih264_cabac_tables.h"
#include "ih264_macros.h"
#include "ih264_platform_macros.h"
-#include "ih264e_defs.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
#include "ih264e_platform_macros.h"
/**
diff --git a/encoder/x86/ih264e_function_selector_sse42.c b/encoder/x86/ih264e_function_selector_sse42.c
index d953c76..6888e5d 100644
--- a/encoder/x86/ih264e_function_selector_sse42.c
+++ b/encoder/x86/ih264e_function_selector_sse42.c
@@ -59,8 +59,8 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -69,23 +69,18 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
-
+#include "ih264_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
#include "ih264e_platform_macros.h"
-#include "ih264_intra_pred_filters.h"
-#include "ih264_trans_quant_itrans_iquant.h"
-#include "ih264e_defs.h"
-#include "ih264e_structs.h"
-#include "ih264_deblk_edge_filters.h"
#include "ih264e_core_coding.h"
#include "ih264_cavlc_tables.h"
#include "ih264e_cavlc.h"
-#include "ih264_padding.h"
#include "ih264e_intra_modes_eval.h"
-#include "ih264_mem_fns.h"
#include "ih264e_fmt_conv.h"
#include "ih264e_half_pel.h"
diff --git a/encoder/x86/ih264e_function_selector_ssse3.c b/encoder/x86/ih264e_function_selector_ssse3.c
index 4eb4c7b..4419112 100644
--- a/encoder/x86/ih264e_function_selector_ssse3.c
+++ b/encoder/x86/ih264e_function_selector_ssse3.c
@@ -59,8 +59,8 @@
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
#include "ime_distortion_metrics.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-#include "ih264_defs.h"
#include "ih264_error.h"
#include "ih264_structs.h"
#include "ih264_trans_quant_itrans_iquant.h"
@@ -69,23 +69,18 @@
#include "ih264_padding.h"
#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
-
+#include "ih264_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
#include "ih264e_platform_macros.h"
-#include "ih264_intra_pred_filters.h"
-#include "ih264_trans_quant_itrans_iquant.h"
-#include "ih264e_defs.h"
-#include "ih264e_structs.h"
-#include "ih264_deblk_edge_filters.h"
+#include "ih264e_cabac.h"
#include "ih264e_core_coding.h"
#include "ih264_cavlc_tables.h"
#include "ih264e_cavlc.h"
-#include "ih264_padding.h"
#include "ih264e_intra_modes_eval.h"
-#include "ih264_mem_fns.h"
#include "ih264e_fmt_conv.h"
#include "ih264e_half_pel.h"
diff --git a/encoder/x86/ih264e_half_pel_ssse3.c b/encoder/x86/ih264e_half_pel_ssse3.c
index 42580fa..8da73b7 100644
--- a/encoder/x86/ih264e_half_pel_ssse3.c
+++ b/encoder/x86/ih264e_half_pel_ssse3.c
@@ -55,7 +55,6 @@
#include "ih264_defs.h"
#include "ih264e_half_pel.h"
#include "ih264_macros.h"
-#include "ih264e_half_pel.h"
#include "ih264e_debug.h"
#include "ih264_inter_pred_filters.h"
#include "ih264_mem_fns.h"
diff --git a/encoder/x86/ih264e_intra_modes_eval_ssse3.c b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
index 0f4a9ad..c11d7f2 100644
--- a/encoder/x86/ih264e_intra_modes_eval_ssse3.c
+++ b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
@@ -67,18 +67,20 @@
#include "ih264_inter_pred_filters.h"
#include "ih264_mem_fns.h"
#include "ih264_padding.h"
-#include "ih264_intra_pred_filters.h"
#include "ih264_deblk_edge_filters.h"
#include "ime_distortion_metrics.h"
#include "ih264e_error.h"
#include "ih264e_bitstream.h"
+#include "ime_defs.h"
#include "ime_structs.h"
-
+#include "ih264_cabac_tables.h"
#include "irc_cntrl_param.h"
#include "irc_frame_info_collector.h"
#include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
#include "ih264e_structs.h"
+#include "ih264e_cabac.h"
#include "ih264e_intra_modes_eval.h"
#include "ih264e_globals.h"
#include "ime_platform_macros.h"
diff --git a/encoder/x86/ime_distortion_metrics_sse42.c b/encoder/x86/ime_distortion_metrics_sse42.c
index baf18a4..0266916 100644
--- a/encoder/x86/ime_distortion_metrics_sse42.c
+++ b/encoder/x86/ime_distortion_metrics_sse42.c
@@ -249,12 +249,12 @@ void ime_compute_sad_16x8_sse42(UWORD8 *pu1_src,
WORD32 i4_max_sad,
WORD32 *pi4_mb_distortion)
{
- UNUSED (i4_max_sad);
__m128i src_r0, src_r1, src_r2, src_r3;
__m128i est_r0, est_r1, est_r2, est_r3;
__m128i res_r0, res_r1, res_r2, res_r3;
__m128i sad_val;
int val1, val2;
+ UNUSED (i4_max_sad);
// Row 0-3 sad calculation
src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
@@ -500,7 +500,6 @@ void ime_compute_sad_16x16_fast_sse42(UWORD8 *pu1_src,
WORD32 i4_max_sad,
WORD32 *pi4_mb_distortion)
{
- UNUSED (i4_max_sad);
__m128i src_r0, src_r1, src_r2, src_r3;
__m128i est_r0, est_r1, est_r2, est_r3;
__m128i res_r0, res_r1, res_r2, res_r3;
@@ -509,6 +508,7 @@ void ime_compute_sad_16x16_fast_sse42(UWORD8 *pu1_src,
WORD32 i4_sad;
UWORD8 *pu1_src_temp = pu1_src + src_strd;
UWORD8 *pu1_est_temp = pu1_est + est_strd;
+ UNUSED (i4_max_sad);
// Row 0,2,4,6 sad calculation
src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
diff --git a/test/Android.mk b/test/Android.mk
index adb14f0..0085832 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -6,4 +6,3 @@ include $(LOCAL_PATH)/encoder.mk
# decoder
include $(LOCAL_PATH)/decoder.mk
-
diff --git a/test/decoder/main.c b/test/decoder/main.c
index 921c240..8c9e885 100644
--- a/test/decoder/main.c
+++ b/test/decoder/main.c
@@ -1612,9 +1612,6 @@ WORD32 display_thread(void *pv_ctx)
}
ps_app_ctx->disp_deinit(ps_app_ctx->pv_disp_ctx);
- /* destroy the display thread */
- ithread_exit(ps_app_ctx->display_thread_handle);
-
return 0;
}
@@ -1827,6 +1824,7 @@ int main(WORD32 argc, CHAR *argv[])
UWORD32 frm_cnt = 0;
WORD32 total_bytes_comsumed;
UWORD32 max_op_frm_ts;
+ UWORD32 u4_num_disp_bufs_with_dec;;
#ifdef PROFILE_ENABLE
UWORD32 u4_tot_cycles = 0;
@@ -2313,6 +2311,7 @@ int main(WORD32 argc, CHAR *argv[])
ivd_ctl_getbufinfo_ip_t s_ctl_ip;
ivd_ctl_getbufinfo_op_t s_ctl_op;
+ WORD32 outlen = 0;
s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
@@ -2342,7 +2341,6 @@ int main(WORD32 argc, CHAR *argv[])
/* Or if shared and output is 420P */
if((0 == s_app_ctx.u4_share_disp_buf) || (IV_YUV_420P == s_app_ctx.e_output_chroma_format))
{
- UWORD32 outlen;
ps_out_buf->u4_min_out_buf_size[0] =
s_ctl_op.u4_min_out_buf_size[0];
ps_out_buf->u4_min_out_buf_size[1] =
@@ -2377,6 +2375,56 @@ int main(WORD32 argc, CHAR *argv[])
ps_out_buf->u4_num_bufs = s_ctl_op.u4_min_num_out_bufs;
}
+#ifdef APP_EXTRA_BUFS
+ s_app_ctx.disp_delay = EXTRA_DISP_BUFFERS;
+ s_ctl_op.u4_num_disp_bufs += EXTRA_DISP_BUFFERS;
+#endif
+
+ /*****************************************************************************/
+ /* API Call: Allocate display buffers for display buffer shared case */
+ /*****************************************************************************/
+
+ for(i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
+ {
+
+ s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[0] =
+ s_ctl_op.u4_min_out_buf_size[0];
+ s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[1] =
+ s_ctl_op.u4_min_out_buf_size[1];
+ s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[2] =
+ s_ctl_op.u4_min_out_buf_size[2];
+
+ outlen = s_ctl_op.u4_min_out_buf_size[0];
+ if(s_ctl_op.u4_min_num_out_bufs > 1)
+ outlen += s_ctl_op.u4_min_out_buf_size[1];
+
+ if(s_ctl_op.u4_min_num_out_bufs > 2)
+ outlen += s_ctl_op.u4_min_out_buf_size[2];
+
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[0] = (UWORD8 *)malloc(outlen);
+
+ if(s_app_ctx.s_disp_buffers[i].pu1_bufs[0] == NULL)
+ {
+ sprintf(ac_error_str,
+ "\nAllocation failure for output buffer of i4_size %d",
+ outlen);
+ codec_exit(ac_error_str);
+ }
+
+ if(s_ctl_op.u4_min_num_out_bufs > 1)
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[1] =
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[0]
+ + (s_ctl_op.u4_min_out_buf_size[0]);
+
+ if(s_ctl_op.u4_min_num_out_bufs > 2)
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[2] =
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[1]
+ + (s_ctl_op.u4_min_out_buf_size[1]);
+
+ s_app_ctx.s_disp_buffers[i].u4_num_bufs =
+ s_ctl_op.u4_min_num_out_bufs;
+ }
+ s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs;
}
}
@@ -2556,73 +2604,6 @@ int main(WORD32 argc, CHAR *argv[])
/*************************************************************************/
//if(1 == s_app_ctx.u4_share_disp_buf)
{
- ivd_ctl_getbufinfo_ip_t s_ctl_ip;
- ivd_ctl_getbufinfo_op_t s_ctl_op;
- WORD32 outlen = 0;
-
- s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
- s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
- s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t);
- s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t);
- ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
- (void *)&s_ctl_op);
- if(ret != IV_SUCCESS)
- {
- sprintf(ac_error_str, "Error in Get Buf Info %x", s_ctl_op.u4_error_code);
- codec_exit(ac_error_str);
- }
-
-#ifdef APP_EXTRA_BUFS
- s_app_ctx.disp_delay = EXTRA_DISP_BUFFERS;
- s_ctl_op.u4_num_disp_bufs += EXTRA_DISP_BUFFERS;
-#endif
-
- /*****************************************************************************/
- /* API Call: Allocate display buffers for display buffer shared case */
- /*****************************************************************************/
-
- for(i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
- {
-
- s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[0] =
- s_ctl_op.u4_min_out_buf_size[0];
- s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[1] =
- s_ctl_op.u4_min_out_buf_size[1];
- s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[2] =
- s_ctl_op.u4_min_out_buf_size[2];
-
- outlen = s_ctl_op.u4_min_out_buf_size[0];
- if(s_ctl_op.u4_min_num_out_bufs > 1)
- outlen += s_ctl_op.u4_min_out_buf_size[1];
-
- if(s_ctl_op.u4_min_num_out_bufs > 2)
- outlen += s_ctl_op.u4_min_out_buf_size[2];
-
- s_app_ctx.s_disp_buffers[i].pu1_bufs[0] = (UWORD8 *)malloc(outlen);
-
- if(s_app_ctx.s_disp_buffers[i].pu1_bufs[0] == NULL)
- {
- sprintf(ac_error_str,
- "\nAllocation failure for output buffer of i4_size %d",
- outlen);
- codec_exit(ac_error_str);
- }
-
- if(s_ctl_op.u4_min_num_out_bufs > 1)
- s_app_ctx.s_disp_buffers[i].pu1_bufs[1] =
- s_app_ctx.s_disp_buffers[i].pu1_bufs[0]
- + (s_ctl_op.u4_min_out_buf_size[0]);
-
- if(s_ctl_op.u4_min_num_out_bufs > 2)
- s_app_ctx.s_disp_buffers[i].pu1_bufs[2] =
- s_app_ctx.s_disp_buffers[i].pu1_bufs[1]
- + (s_ctl_op.u4_min_out_buf_size[1]);
-
- s_app_ctx.s_disp_buffers[i].u4_num_bufs =
- s_ctl_op.u4_min_num_out_bufs;
- }
- s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs;
-
/*****************************************************************************/
/* API Call: Send the allocated display buffers to codec */
/*****************************************************************************/
@@ -2638,7 +2619,7 @@ int main(WORD32 argc, CHAR *argv[])
memcpy(&(s_set_display_frame_ip.s_disp_buffer),
&(s_app_ctx.s_disp_buffers),
- s_ctl_op.u4_num_disp_bufs * sizeof(ivd_out_bufdesc_t));
+ s_app_ctx.num_disp_buf * sizeof(ivd_out_bufdesc_t));
ret = ivd_api_function((iv_obj_t *)codec_obj,
(void *)&s_set_display_frame_ip,
@@ -2732,7 +2713,17 @@ int main(WORD32 argc, CHAR *argv[])
#ifndef PRINT_PICSIZE
get_version(codec_obj);
#endif
- max_op_frm_ts = (s_app_ctx.u4_max_frm_ts > 0)? (s_app_ctx.u4_max_frm_ts + s_app_ctx.disp_delay): 0xffffffff;
+
+
+ max_op_frm_ts = s_app_ctx.u4_max_frm_ts + s_app_ctx.disp_delay;
+
+ if(max_op_frm_ts < s_app_ctx.disp_delay)
+ max_op_frm_ts = 0xffffffff;/* clip as overflow has occured*/
+
+ max_op_frm_ts = (s_app_ctx.u4_max_frm_ts > 0)? (max_op_frm_ts): 0xffffffff;
+
+ u4_num_disp_bufs_with_dec = 0;
+
while(u4_op_frm_ts < max_op_frm_ts)
{
@@ -2759,9 +2750,10 @@ int main(WORD32 argc, CHAR *argv[])
}
#endif
- if(u4_ip_frm_ts < s_app_ctx.num_disp_buf)
+ if(u4_num_disp_bufs_with_dec < s_app_ctx.num_disp_buf)
{
- release_disp_frame(codec_obj, u4_ip_frm_ts);
+ release_disp_frame(codec_obj, u4_num_disp_bufs_with_dec);
+ u4_num_disp_bufs_with_dec ++;
}
@@ -2991,6 +2983,9 @@ int main(WORD32 argc, CHAR *argv[])
sprintf(ac_error_str, "Error in Reset");
codec_exit(ac_error_str);
}
+
+ /*when reset all buffers are released by lib*/
+ u4_num_disp_bufs_with_dec = 0;
/*************************************************************************/
/* set num of cores */
/*************************************************************************/
diff --git a/test/encoder/app.h b/test/encoder/app.h
index 7c16fcd..ad45f5a 100644
--- a/test/encoder/app.h
+++ b/test/encoder/app.h
@@ -42,6 +42,8 @@
#include "ive2.h"
#ifdef WINDOWS_TIMER
#include <windows.h>
+#else
+#include <sys/time.h>
#endif
/*****************************************************************************/
/* Function Macros */
@@ -55,13 +57,13 @@
/* Constant Macros */
/*****************************************************************************/
-#define DEFAULT_NUM_INPUT_BUFS 1
+#define DEFAULT_NUM_INPUT_BUFS 32
#define DEFAULT_MAX_INPUT_BUFS 32
-#define DEFAULT_NUM_OUTPUT_BUFS 1
+#define DEFAULT_NUM_OUTPUT_BUFS 32
#define DEFAULT_MAX_OUTPUT_BUFS 32
-#define DEFAULT_NUM_RECON_BUFS 1
+#define DEFAULT_NUM_RECON_BUFS 32
#define DEFAULT_MAX_RECON_BUFS DEFAULT_NUM_RECON_BUFS
@@ -69,11 +71,12 @@
#define MAX_VBV_BUFF_SIZE (120 * 16384)
#define MAX_NUM_IO_BUFS 3
-#define DEFAULT_MAX_REF_FRM 1
+#define DEFAULT_MAX_REF_FRM 2
#define DEFAULT_MAX_REORDER_FRM 0
-#define DEFAULT_QP_MIN 0
+#define DEFAULT_QP_MIN 4
#define DEFAULT_QP_MAX 51
#define DEFAULT_MAX_BITRATE 20000000
+#define DEFAULT_NUM_BFRAMES 0
#define DEFAULT_MAX_SRCH_RANGE_X 256
#define DEFAULT_MAX_SRCH_RANGE_Y 256
#define DEFAULT_MAX_FRAMERATE 120000
@@ -94,7 +97,7 @@
#define DEFAULT_TGT_FRAME_RATE 30
#define DEFAULT_MAX_WD 1920
#define DEFAULT_MAX_HT 1920
-#define DEFAULT_MAX_LEVEL 50
+#define DEFAULT_MAX_LEVEL 40
#define DEFAULT_STRIDE 0
#define DEFAULT_WD 0
#define DEFAULT_HT 0
@@ -127,6 +130,8 @@
#define DEFAULT_EPROFILE IV_PROFILE_BASE
#define DEFAULT_SLICE_MODE 0
#define DEFAULT_SLICE_PARAM 256
+#define DEFAULT_ENTROPY_CODING_MODE 0
+
#define STRLENGTH 500
@@ -281,6 +286,7 @@ typedef struct
UWORD32 u4_i_interval;
UWORD32 u4_idr_interval;
UWORD32 u4_b_frames;
+ UWORD32 u4_num_bframes;
UWORD32 u4_disable_deblk_level;
UWORD32 u4_hpel;
UWORD32 u4_qpel;
@@ -289,6 +295,7 @@ typedef struct
UWORD32 u4_slice_mode;
UWORD32 u4_slice_param;
+ UWORD32 u4_entropy_coding_mode;
void *pv_input_thread_handle;
void *pv_output_thread_handle;
diff --git a/test/encoder/input.c b/test/encoder/input.c
index c292612..1d40eb0 100644
--- a/test/encoder/input.c
+++ b/test/encoder/input.c
@@ -27,7 +27,6 @@
#include <stdio.h>
#include <assert.h>
#include <string.h>
-#include <sys/time.h>
/* User include files */
#include "ih264_typedefs.h"
diff --git a/test/encoder/main.c b/test/encoder/main.c
index 26420e2..4ff71af 100644
--- a/test/encoder/main.c
+++ b/test/encoder/main.c
@@ -28,7 +28,6 @@
#include <stddef.h>
#include <assert.h>
#include <string.h>
-#include <sys/time.h>
#ifndef IOS
#include <malloc.h>
@@ -36,6 +35,8 @@
#ifdef WINDOWS_TIMER
#include "windows.h"
+#else
+#include <sys/time.h>
#endif
/* User include files */
#include "ih264_typedefs.h"
@@ -91,6 +92,7 @@ typedef enum
I_QP_MIN,
P_QP_MIN,
B_QP_MIN,
+ ENTROPY,
AIR,
AIR_REFRESH_PERIOD,
ARCH,
@@ -105,6 +107,7 @@ typedef enum
I_INTERVAL,
IDR_INTERVAL,
B_FRMS,
+ NUM_B_FRMS,
DISABLE_DBLK,
PROFILE,
FAST_SAD,
@@ -153,7 +156,7 @@ static const argument_t argument_mapping[] =
{ "--", "--src_framerate", SRC_FRAMERATE, "Source frame rate \n" },
{ "--", "--i_interval", I_INTERVAL, "Intra frame interval \n" },
{ "--", "--idr_interval", IDR_INTERVAL, "IDR frame interval \n" },
- { "--", "--bframes", B_FRMS, "Consecutive B frames \n" },
+ { "--", "--bframes", NUM_B_FRMS, "Maximum number of consecutive B frames \n" },
{ "--", "--speed", ENC_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n" },
{ "--", "--me_speed", ME_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n" },
{ "--", "--fast_sad", FAST_SAD, " Flag for faster sad execution\n" },
@@ -193,6 +196,7 @@ static const argument_t argument_mapping[] =
{ "--", "--qp_i_min", I_QP_MIN, "Min QP for I frames\n"},
{ "--", "--qp_p_min", P_QP_MIN, "Min QP for P frames\n"},
{ "--", "--qp_b_min", B_QP_MIN, "Min QP for B frames\n"},
+ { "--", "--entropy", ENTROPY, "Entropy coding mode(0: CAVLC or 1: CABAC)\n"},
{ "--", "--vbv_delay", VBV_DELAY, "VBV buffer delay\n"},
{ "--", "--vbv_size", VBV_SIZE, "VBV buffer size\n"},
{ "-i4", "--intra_4x4_enable", INTRA_4x4_ENABLE, "Intra 4x4 enable \n" },
@@ -657,6 +661,10 @@ void parse_argument(app_ctxt_t *ps_app_ctxt, CHAR *argument, CHAR *value)
sscanf(value, "%d", &ps_app_ctxt->u4_b_qp_min);
break;
+ case ENTROPY:
+ sscanf(value, "%d", &ps_app_ctxt->u4_entropy_coding_mode);
+ break;
+
case AIR:
sscanf(value, "%d", &ps_app_ctxt->u4_air);
break;
@@ -742,8 +750,8 @@ void parse_argument(app_ctxt_t *ps_app_ctxt, CHAR *argument, CHAR *value)
sscanf(value, "%d", &ps_app_ctxt->u4_idr_interval);
break;
- case B_FRMS:
- sscanf(value, "%d", &ps_app_ctxt->u4_b_frames);
+ case NUM_B_FRMS:
+ sscanf(value, "%d", &ps_app_ctxt->u4_num_bframes);
break;
case DISABLE_DEBLOCK_LEVEL:
@@ -886,7 +894,13 @@ void validate_params(app_ctxt_t *ps_app_ctxt)
sprintf(ac_error, "Invalid number of frames to be encoded: %d", ps_app_ctxt->u4_max_num_frms);
invalid_argument_exit(ac_error);
}
-
+ if ((0 != (WORD32)ps_app_ctxt->u4_entropy_coding_mode)
+ && (1 != (WORD32)ps_app_ctxt->u4_entropy_coding_mode))
+ {
+ sprintf(ac_error, "Invalid entropy codeing mode: %d",
+ ps_app_ctxt->u4_entropy_coding_mode);
+ invalid_argument_exit(ac_error);
+ }
return;
}
@@ -944,6 +958,7 @@ void init_default_params(app_ctxt_t *ps_app_ctxt)
ps_app_ctxt->u4_enable_alt_ref = DEFAULT_ENABLE_ALT_REF;
ps_app_ctxt->u4_rc = DEFAULT_RC;
ps_app_ctxt->u4_max_bitrate = DEFAULT_MAX_BITRATE;
+ ps_app_ctxt->u4_num_bframes = DEFAULT_NUM_BFRAMES;
ps_app_ctxt->u4_bitrate = DEFAULT_BITRATE;
ps_app_ctxt->u4_i_qp = DEFAULT_I_QP;
ps_app_ctxt->u4_p_qp = DEFAULT_P_QP;
@@ -960,7 +975,6 @@ void init_default_params(app_ctxt_t *ps_app_ctxt)
ps_app_ctxt->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y;
ps_app_ctxt->u4_i_interval = DEFAULT_I_INTERVAL;
ps_app_ctxt->u4_idr_interval = DEFAULT_IDR_INTERVAL;
- ps_app_ctxt->u4_b_frames = DEFAULT_B_FRAMES;
ps_app_ctxt->u4_disable_deblk_level = DEFAULT_DISABLE_DEBLK_LEVEL;
ps_app_ctxt->u4_hpel = DEFAULT_HPEL;
ps_app_ctxt->u4_qpel = DEFAULT_QPEL;
@@ -979,6 +993,7 @@ void init_default_params(app_ctxt_t *ps_app_ctxt)
ps_app_ctxt->u4_psnr_cnt = 0;
ps_app_ctxt->pu1_psnr_buf = NULL;
ps_app_ctxt->u4_psnr_buf_size = 0;
+ ps_app_ctxt->u4_entropy_coding_mode = DEFAULT_ENTROPY_CODING_MODE;
return;
}
@@ -1334,7 +1349,6 @@ void set_gop_params(app_ctxt_t *ps_app_ctxt,
s_gop_params_ip.s_ive_ip.u4_i_frm_interval = ps_app_ctxt->u4_i_interval;
s_gop_params_ip.s_ive_ip.u4_idr_frm_interval = ps_app_ctxt->u4_idr_interval;
- s_gop_params_ip.s_ive_ip.u4_num_b_frames = ps_app_ctxt->u4_b_frames;
s_gop_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
s_gop_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
@@ -1368,6 +1382,8 @@ void set_profile_params(app_ctxt_t *ps_app_ctxt,
s_profile_params_ip.s_ive_ip.e_profile = ps_app_ctxt->e_profile;
+ s_profile_params_ip.s_ive_ip.u4_entropy_coding_mode = ps_app_ctxt->u4_entropy_coding_mode;
+
s_profile_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
s_profile_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
@@ -1433,7 +1449,7 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
IV_STATUS_T status = IV_SUCCESS;
- WORD32 i, read_failed = 0, is_last = 0, buff_size = 0, num_bytes = 0;
+ WORD32 i, is_last = 0, buff_size = 0, num_bytes = 0;
UWORD32 u4_total_time = 0;
UWORD8 *pu1_buf = NULL;
UWORD32 u4_timestamp_low, u4_timestamp_high;
@@ -1449,6 +1465,7 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
iv_raw_buf_t s_inp_buf, s_recon_buf;
CHAR ac_error[STRLENGTH];
WORD32 end_of_frames=0;
+ WORD32 i4_inp_done =0;
u4_timestamp_low = 0;
u4_timestamp_high = 0;
@@ -1498,18 +1515,6 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
}
}
-#if 0 //Input buffer dump
- //if(1 == ps_app_ctxt->u4_psnr_enable)
- {
- ps_app_ctxt->fp_dump_op = fopen("D:\\dump\\inp.yuv", "wb");
- if(NULL == ps_app_ctxt->fp_dump_op)
- {
- sprintf(ac_error, "Unable to open output file for input dump: %s", "D:\\dump\\inp.yuv");
- invalid_argument_exit(ac_error);
- }
- }
-#endif //Input buffer dump
-
/* If PSNR is enabled, open input file again and hold a different file pointer
* This makes it easy to compute PSNR without adding dependency between input and recon threads
*/
@@ -1548,10 +1553,6 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
while(1)
{
-
-
-
-
/******************************************************************************/
/****************** Input Initialization **************************************/
/******************************************************************************/
@@ -1568,6 +1569,12 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
}
}
+ if (i == DEFAULT_MAX_INPUT_BUFS)
+ {
+ printf("\n Unable to find a free input buffer!!");
+ exit(0);
+ }
+
ps_video_encode_ip->u4_size = sizeof(ih264e_video_encode_ip_t);
ps_video_encode_op->u4_size = sizeof(ih264e_video_encode_op_t);
@@ -1637,11 +1644,19 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd *2;
}
+ /*
+ * Here we read input and other associated buffers. Regardless of success
+ * we will proceed from here as we will need extra calls to flush out
+ * input queue in encoder. Note that this is not necessary. You can just
+ * send encode calls till with valid output and recon buffers till the
+ * queue is flushed.
+ */
while(1)
{
IV_STATUS_T mb_info_status = IV_SUCCESS, pic_info_status = IV_SUCCESS;
- read_failed = 0;
+
status = read_input(ps_app_ctxt->fp_ip, ps_inp_raw_buf);
+
if (ps_app_ctxt->u4_mb_info_type != 0)
{
mb_info_status = read_mb_info(ps_app_ctxt, pv_mb_info);
@@ -1656,15 +1671,12 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
if(0 == ps_app_ctxt->u4_loopback)
{
is_last = 1;
- read_failed = 1;
-
break;
}
else
fseek(ps_app_ctxt->fp_ip, 0, SEEK_SET);
}
- else
- break;
+ break;
}
/******************************************************************************/
@@ -1716,13 +1728,12 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
ps_inp_raw_buf->apv_bufs[0] = NULL;
ps_inp_raw_buf->apv_bufs[1] = NULL;
ps_inp_raw_buf->apv_bufs[2] = NULL;
- end_of_frames = 1;
}
ps_video_encode_ip->u4_is_last = is_last;
ps_video_encode_ip->u4_mb_info_type = ps_app_ctxt->u4_mb_info_type;
ps_video_encode_ip->u4_pic_info_type = ps_app_ctxt->u4_pic_info_type;;
- ps_video_encode_op->s_out_buf.pv_buf= 0;
+ ps_video_encode_op->s_out_buf.pv_buf= NULL;
ps_video_encode_ip->u4_timestamp_high = u4_timestamp_high;
ps_video_encode_ip->u4_timestamp_low = u4_timestamp_low;
@@ -1766,11 +1777,7 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
/****************** Writing Output ********************************************/
/******************************************************************************/
num_bytes = 0;
- /* Break if all the encoded frames are taken from encoder */
- if(1 == end_of_frames && 0 == ps_video_encode_op->output_present)
- {
- break;
- }
+
if(1 == ps_video_encode_op->output_present)
{
num_bytes = ps_video_encode_op->s_out_buf.u4_bytes;
@@ -1783,7 +1790,11 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
printf("Error: Unable to write to output file\n");
break;
}
+ }
+ /* free input bufer if codec returns a valid input buffer */
+ if (ps_video_encode_op->s_inp_buf.apv_bufs[0])
+ {
/* Reuse of freed input buffer */
for(i = 0; i < DEFAULT_MAX_INPUT_BUFS; i++)
{
@@ -1793,8 +1804,11 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
break;
}
}
+ }
- /* Reuse of freed output buffer */
+ /* free output buffer if codec returns a valid output buffer */
+ // if(ps_video_encode_op->s_out_buf.pv_buf)
+ {
for(i = 0; i < DEFAULT_MAX_OUTPUT_BUFS; i++)
{
if(ps_app_ctxt->as_output_buf[i].pu1_buf == ps_video_encode_op->s_out_buf.pv_buf)
@@ -1805,132 +1819,169 @@ void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
}
}
- if (ps_video_encode_op->dump_recon == 1)
+ /**********************************************************************
+ * Print stats
+ **********************************************************************/
{
- ps_app_ctxt->u4_pics_cnt++;
+ UWORD8 u1_pic_type[][5] =
+ { "IDR", "I", "P", "B", "NA" };
+ WORD32 lookup_idx = 0;
- ps_app_ctxt->avg_time = u4_total_time / ps_app_ctxt->u4_pics_cnt;
- if (ps_app_ctxt->u4_psnr_enable == 0)
+ if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type
+ == IV_IDR_FRAME)
{
- UWORD8 u1_pic_type[][5] = { "IDR", "I", "P","NA" };
- WORD32 lookup_idx = 0;
+ lookup_idx = 0;
+ }
+ else if(ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type
+ == IV_I_FRAME)
+ {
+ lookup_idx = 1;
+ }
+ else if(ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type
+ == IV_P_FRAME)
+ {
+ lookup_idx = 2;
+ }
+ else if(ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type
+ == IV_B_FRAME)
+ {
+ lookup_idx = 3;
+ }
+ else if(ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type
+ == IV_NA_FRAME)
+ {
+ lookup_idx = 4;
+ }
- if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_IDR_FRAME)
- {
- lookup_idx = 0;
- }
- else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_I_FRAME)
- {
- lookup_idx = 1;
- }
- else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_P_FRAME)
- {
- lookup_idx = 2;
- }
- else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_NA_FRAME)
- {
- lookup_idx = 3;
- }
+ if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type
+ != IV_NA_FRAME)
+ {
+ ps_app_ctxt->u4_pics_cnt++;
+ ps_app_ctxt->avg_time = u4_total_time / ps_app_ctxt->u4_pics_cnt;
+ ps_app_ctxt->u4_total_bytes += num_bytes;
+ }
- printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n", u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt, num_bytes, timetaken, ps_app_ctxt->avg_time, peak_avg_max);
+ if (ps_app_ctxt->u4_psnr_enable == 0)
+ {
+ printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n",
+ u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt,
+ num_bytes, timetaken, ps_app_ctxt->avg_time,
+ peak_avg_max);
}
+ }
+
- ps_app_ctxt->u4_total_bytes += num_bytes;
+ /* For psnr computation, we need to read the correct input frame and
+ * compare with recon. The difficulty with doing it is that we only know
+ * that the frame number of recon is monotonically increasing. There
+ * may be gaps in the recon if any pre or post enc skip happens. There are
+ * 3 senarios
+ * 1) A frame is encoded -> returns the pic type
+ * 2) A frame is not encoded -> Encoder is waiting, the frame may get
+ * encoded later
+ * 3) A frame is not encoded -> A post enc or pre enc skip happend. The
+ * frame is not going to be encoded
+ *
+ * The 1st and 2nd scenarios are easy, since we just needs to increment
+ * recon cnt whenever we get a valid recon. This cnt can we used to
+ * sync the recon and input
+ * 3rd scenario in conjuction with 2nd will pose problems. Even if
+ * the returning frame is NA, we donot know we should increment the
+ * recon cnt or not becasue it can be case 2 or case 3.
+ *
+ * Solutions:
+ * -------------------------
+ * One way to over come this will be to return more information as of
+ * the frame type. We can send if a frame was skipped as a part of the
+ * return frame type.
+ * This will not work. Since the output and recon are not in sync, we
+ * cannot use the current output frame type to determine if a recon
+ * is present currently or not. We need some other way to acheive this.
+ *
+ * Other way to do this which is cleaner and maintains the seperation
+ * between recon and the ouptut is to set the width [& height] of output recon
+ * buffer to be zero. Hence we will in effect be saying :"look there
+ * is a recon, but due to frame not being encoded it is having a width 0".
+ * To be more clear we need to make height also to be zero.
+ *
+ * But are we using these variables for allocating and deallocating
+ * the buffers some where ? No we are not. The buffer gets re-init
+ * at every encode call
+ *
+ * Fixes
+ * ------------------------
+ * Currently the recon buff width and height are set in the encoder.
+ * This will not work now because since recon and input are not
+ * in sync. Hence a recon buff sent at time stamp x will get used to
+ * fill recon of input at time stamp y (x > y). If we reduced the
+ * frame dimensions in between, the recon buffer will not have enough
+ * space. Hence we need to set the with and height appropriatley inside
+ * lib itself.
+ */
- /******************************************************************************/
- /****************** Writing Recon ********************************************/
- /******************************************************************************/
- if(1 == ps_video_encode_op->output_present)
+ if (ps_app_ctxt->u4_recon_enable || ps_app_ctxt->u4_chksum_enable
+ || ps_app_ctxt->u4_psnr_enable)
+ {
+ if (ps_video_encode_op->dump_recon)
{
s_recon_buf = ps_video_encode_op->s_recon_buf;
- /* Dump recon when enabled, and output bytes != 0*/
- if(ps_app_ctxt->u4_recon_enable)
- {
- status = write_recon(ps_app_ctxt->fp_recon, &s_recon_buf);
- if(IV_SUCCESS != status)
- {
- printf("Error: Unable to write to recon file\n");
- break;
- }
- }
-
-
- if(ps_app_ctxt->u4_psnr_enable)
- {
+ /* Read input for psnr computuation */
+ if (ps_app_ctxt->u4_psnr_enable)
read_input(ps_app_ctxt->fp_psnr_ip, &s_inp_buf);
- compute_psnr(ps_app_ctxt, &s_recon_buf, &s_inp_buf);
- }
-
- if(ps_app_ctxt->u4_chksum_enable)
+ /* if we have a valid recon buffer do the assocated tasks */
+ if (s_recon_buf.au4_wd[0])
{
- WORD32 comp;
- WORD32 num_comp;
- num_comp = 2;
- if(IV_YUV_420P == s_recon_buf.e_color_fmt)
- num_comp = 3;
+ /* Dump recon when enabled, and output bytes != 0 */
+ if (ps_app_ctxt->u4_recon_enable)
+ {
+ status = write_recon(ps_app_ctxt->fp_recon, &s_recon_buf);
+ if (IV_SUCCESS != status)
+ {
+ printf("Error: Unable to write to recon file\n");
+ break;
+ }
+ }
- for(comp = 0; comp < num_comp; comp++ )
+ if (ps_app_ctxt->u4_psnr_enable)
{
- UWORD8 au1_chksum[16];
+ compute_psnr(ps_app_ctxt, &s_recon_buf, &s_inp_buf);
+ }
- calc_md5_cksum((UWORD8 *)s_recon_buf.apv_bufs[comp],
- s_recon_buf.au4_strd[comp],
- s_recon_buf.au4_wd[comp],
- s_recon_buf.au4_ht[comp],
- au1_chksum);
- fwrite(au1_chksum, sizeof(UWORD8), 16, ps_app_ctxt->fp_chksum);
+ if (ps_app_ctxt->u4_chksum_enable)
+ {
+ WORD32 comp, num_comp = 2;
+
+ if (IV_YUV_420P == s_recon_buf.e_color_fmt)
+ num_comp = 3;
+
+ for (comp = 0; comp < num_comp; comp++)
+ {
+ UWORD8 au1_chksum[16];
+ calc_md5_cksum((UWORD8 *)s_recon_buf.apv_bufs[comp],
+ s_recon_buf.au4_strd[comp],
+ s_recon_buf.au4_wd[comp],
+ s_recon_buf.au4_ht[comp],
+ au1_chksum);
+ fwrite(au1_chksum, sizeof(UWORD8), 16, ps_app_ctxt->fp_chksum);
+ }
}
}
-
-
}
}
- else
- {
- if (ps_app_ctxt->u4_psnr_enable == 0)
- {
- UWORD8 u1_pic_type[][5] = { "IDR", "I", "P", "NA" };
- WORD32 lookup_idx = 0;
- if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_IDR_FRAME)
- {
- lookup_idx = 0;
- }
- else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_I_FRAME)
- {
- lookup_idx = 1;
- }
- else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_P_FRAME)
- {
- lookup_idx = 2;
- }
- else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_NA_FRAME)
- {
- lookup_idx = 3;
- }
-
- printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n", u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt, num_bytes, timetaken, ps_app_ctxt->avg_time, peak_avg_max);
- }
- else
- {
- read_input(ps_app_ctxt->fp_psnr_ip, &s_inp_buf);
- }
- }
-#if 0 //Input buffer dump
- /*Dump input buffers to a file*/
- dump_input(ps_app_ctxt->fp_dump_op, ps_inp_raw_buf);
-#endif //Input buffer dump
+ u4_timestamp_low++;
- if(is_last)
+ /* Break if all the encoded frames are taken from encoder */
+ if (1 == ps_video_encode_op->u4_is_last)
+ {
break;
-
- u4_timestamp_low++;
+ }
}
- /* Pic count is 1 more than actual num frames encoded, beacause last call is to just get the output */
+ /* Pic count is 1 more than actual num frames encoded, because last call is to just get the output */
ps_app_ctxt->u4_pics_cnt--;
if(ps_app_ctxt->u4_psnr_enable)
@@ -2008,9 +2059,10 @@ int main(int argc, char *argv[])
/* error status */
IV_STATUS_T status = IV_SUCCESS;
-
+#ifdef IOS
/* temp var */
CHAR filename_with_path[STRLENGTH];
+#endif
WORD32 num_mem_recs;
iv_obj_t *ps_enc;
WORD32 i;
@@ -2036,6 +2088,11 @@ int main(int argc, char *argv[])
}
else if(argc == 2)
{
+ if (!strcmp(argv[1], "--help"))
+ {
+ print_usage();
+ exit(-1);
+ }
strcpy(ac_cfg_fname, argv[1]);
}
@@ -2053,7 +2110,7 @@ int main(int argc, char *argv[])
/* Read command line arguments */
if(argc > 2)
{
- for(i = 1; i < argc; i += 2)
+ for(i = 1; i + 1 < argc; i += 2)
{
if(CONFIG == get_argument(argv[i]))
{
@@ -2247,7 +2304,7 @@ int main(int argc, char *argv[])
s_init_ip.s_ive_ip.e_rc_mode = s_app_ctxt.u4_rc;
s_init_ip.s_ive_ip.u4_max_framerate = s_app_ctxt.u4_max_frame_rate;
s_init_ip.s_ive_ip.u4_max_bitrate = s_app_ctxt.u4_max_bitrate;
- s_init_ip.s_ive_ip.u4_max_num_bframes = DEFAULT_B_FRAMES;
+ s_init_ip.s_ive_ip.u4_num_bframes = s_app_ctxt.u4_num_bframes;
s_init_ip.s_ive_ip.e_content_type = IV_PROGRESSIVE;
s_init_ip.s_ive_ip.u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X;
s_init_ip.s_ive_ip.u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y;
@@ -2443,10 +2500,13 @@ int main(int argc, char *argv[])
WORD32 achieved_bitrate;
if(s_app_ctxt.u4_pics_cnt != 0)
+ {
bytes_per_frame = (s_app_ctxt.u4_total_bytes) / (s_app_ctxt.u4_pics_cnt);
+ }
else
+ {
bytes_per_frame = 0;
-
+ }
bytes_per_second = (bytes_per_frame * s_app_ctxt.u4_tgt_frame_rate);
achieved_bitrate = bytes_per_second * 8;
diff --git a/test/encoder/output.c b/test/encoder/output.c
index e0f27dd..8438869 100644
--- a/test/encoder/output.c
+++ b/test/encoder/output.c
@@ -28,7 +28,6 @@
#include <stdio.h>
#include <assert.h>
#include <string.h>
-#include <sys/time.h>
/* User include files */
#include "ih264_typedefs.h"
diff --git a/test/encoder/psnr.c b/test/encoder/psnr.c
index c9bb6a1..6913cb3 100644
--- a/test/encoder/psnr.c
+++ b/test/encoder/psnr.c
@@ -26,7 +26,6 @@
#include <stdlib.h>
#include <string.h>
#include <math.h>
-#include <sys/time.h>
/* User include files */
#include "ih264_typedefs.h"
diff --git a/test/encoder/recon.c b/test/encoder/recon.c
index ed63aac..d177a62 100644
--- a/test/encoder/recon.c
+++ b/test/encoder/recon.c
@@ -28,7 +28,6 @@
#include <stdio.h>
#include <assert.h>
#include <string.h>
-#include <sys/time.h>
/* User include files */
#include "ih264_typedefs.h"
@@ -54,7 +53,7 @@
IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf)
{
WORD32 bytes;
- WORD32 wd, ht, strd;
+ WORD32 wd, ht;
UWORD8 *pu1_buf;
WORD32 i;
WORD32 comp;
@@ -68,7 +67,6 @@ IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf)
{
wd = ps_raw_buf->au4_wd[comp];
ht = ps_raw_buf->au4_ht[comp];
- strd = ps_raw_buf->au4_strd[comp];
pu1_buf = ps_raw_buf->apv_bufs[comp];
for(i = 0; i < ht; i++)
{
@@ -163,7 +161,7 @@ void init_raw_buf_descr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_raw_buf, UWORD
/* All the pointers and dimensions are initialized here
* to support change in resolution from the application */
- luma_size = ALIGN16(ps_app_ctxt->u4_wd) * ALIGN16(ps_app_ctxt->u4_ht);
+ luma_size = ps_app_ctxt->u4_max_wd * ps_app_ctxt->u4_max_ht;
chroma_size = (luma_size) / 4;
ps_raw_buf->apv_bufs[0] = pu1_buf;