summaryrefslogtreecommitdiffstats
path: root/encoder
diff options
context:
space:
mode:
authorHamsalekha S <hamsalekha.s@ittiam.com>2015-03-13 21:24:58 +0530
committerHamsalekha S <hamsalekha.s@ittiam.com>2015-04-02 15:59:02 +0530
commit8d3d303c7942ced6a987a52db8977d768dc3605f (patch)
treecc806c96794356996b13ba9970941d0aed74a97e /encoder
parent3956d913d37327dcb340f836e604b04bd478b158 (diff)
downloadandroid_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.gz
android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.bz2
android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.zip
Initial version
Change-Id: I7efe9a589cd24edf86e8d086b40c27cbbf8b4017
Diffstat (limited to 'encoder')
-rwxr-xr-xencoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s313
-rwxr-xr-xencoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s529
-rwxr-xr-xencoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s346
-rwxr-xr-xencoder/arm/ih264e_fmt_conv.s329
-rwxr-xr-xencoder/arm/ih264e_function_selector.c170
-rwxr-xr-xencoder/arm/ih264e_function_selector_a9q.c252
-rwxr-xr-xencoder/arm/ih264e_function_selector_av8.c259
-rwxr-xr-xencoder/arm/ih264e_half_pel.s951
-rwxr-xr-xencoder/arm/ih264e_platform_macros.h143
-rwxr-xr-xencoder/arm/ime_distortion_metrics_a9q.s1353
-rwxr-xr-xencoder/arm/ime_platform_macros.h51
-rwxr-xr-xencoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s592
-rwxr-xr-xencoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s467
-rwxr-xr-xencoder/armv8/ih264e_half_pel_av8.s1024
-rwxr-xr-xencoder/armv8/ih264e_platform_macros.h143
-rwxr-xr-xencoder/armv8/ime_distortion_metrics_av8.s978
-rwxr-xr-xencoder/armv8/ime_platform_macros.h51
-rwxr-xr-xencoder/ih264e.h620
-rwxr-xr-xencoder/ih264e_api.c5559
-rwxr-xr-xencoder/ih264e_bitstream.c472
-rwxr-xr-xencoder/ih264e_bitstream.h401
-rwxr-xr-xencoder/ih264e_cavlc.c1448
-rwxr-xr-xencoder/ih264e_cavlc.h112
-rwxr-xr-xencoder/ih264e_config.h52
-rwxr-xr-xencoder/ih264e_core_coding.c2365
-rwxr-xr-xencoder/ih264e_core_coding.h653
-rwxr-xr-xencoder/ih264e_deblk.c854
-rwxr-xr-xencoder/ih264e_deblk.h99
-rwxr-xr-xencoder/ih264e_debug.h65
-rwxr-xr-xencoder/ih264e_defs.h538
-rwxr-xr-xencoder/ih264e_encode.c580
-rwxr-xr-xencoder/ih264e_encode_header.c1187
-rwxr-xr-xencoder/ih264e_encode_header.h278
-rwxr-xr-xencoder/ih264e_error.h229
-rwxr-xr-xencoder/ih264e_fmt_conv.c864
-rwxr-xr-xencoder/ih264e_fmt_conv.h142
-rwxr-xr-xencoder/ih264e_function_selector_generic.c259
-rwxr-xr-xencoder/ih264e_globals.c261
-rwxr-xr-xencoder/ih264e_globals.h192
-rwxr-xr-xencoder/ih264e_half_pel.c226
-rwxr-xr-xencoder/ih264e_half_pel.h162
-rwxr-xr-xencoder/ih264e_intra_modes_eval.c2296
-rwxr-xr-xencoder/ih264e_intra_modes_eval.h418
-rwxr-xr-xencoder/ih264e_list.h42
-rwxr-xr-xencoder/ih264e_master.h132
-rwxr-xr-xencoder/ih264e_mc.c320
-rwxr-xr-xencoder/ih264e_mc.h104
-rwxr-xr-xencoder/ih264e_me.c1153
-rwxr-xr-xencoder/ih264e_me.h278
-rwxr-xr-xencoder/ih264e_modify_frm_rate.c240
-rwxr-xr-xencoder/ih264e_modify_frm_rate.h182
-rwxr-xr-xencoder/ih264e_process.c2369
-rwxr-xr-xencoder/ih264e_process.h364
-rwxr-xr-xencoder/ih264e_rate_control.c801
-rwxr-xr-xencoder/ih264e_rate_control.h351
-rwxr-xr-xencoder/ih264e_rc_mem_interface.c395
-rwxr-xr-xencoder/ih264e_rc_mem_interface.h179
-rwxr-xr-xencoder/ih264e_statistics.h141
-rwxr-xr-xencoder/ih264e_structs.h2566
-rwxr-xr-xencoder/ih264e_time_stamp.c748
-rwxr-xr-xencoder/ih264e_time_stamp.h498
-rwxr-xr-xencoder/ih264e_trace.h161
-rwxr-xr-xencoder/ih264e_trace_support.h61
-rwxr-xr-xencoder/ih264e_utils.c1804
-rwxr-xr-xencoder/ih264e_utils.h327
-rwxr-xr-xencoder/ih264e_version.c143
-rwxr-xr-xencoder/ih264e_version.h64
-rwxr-xr-xencoder/ime.c836
-rwxr-xr-xencoder/ime.h209
-rwxr-xr-xencoder/ime_defs.h59
-rwxr-xr-xencoder/ime_distortion_metrics.c1262
-rwxr-xr-xencoder/ime_distortion_metrics.h170
-rwxr-xr-xencoder/ime_macros.h44
-rwxr-xr-xencoder/ime_statistics.h86
-rwxr-xr-xencoder/ime_structs.h305
-rwxr-xr-xencoder/ime_typedefs.h50
-rwxr-xr-xencoder/irc_bit_allocation.c859
-rwxr-xr-xencoder/irc_bit_allocation.h99
-rwxr-xr-xencoder/irc_cbr_buffer_control.c653
-rwxr-xr-xencoder/irc_cbr_buffer_control.h104
-rwxr-xr-xencoder/irc_cntrl_param.h59
-rwxr-xr-xencoder/irc_common.h104
-rwxr-xr-xencoder/irc_datatypes.h64
-rwxr-xr-xencoder/irc_est_sad.c260
-rwxr-xr-xencoder/irc_est_sad.h64
-rwxr-xr-xencoder/irc_fixed_point_error_bits.c185
-rwxr-xr-xencoder/irc_fixed_point_error_bits.h64
-rwxr-xr-xencoder/irc_frame_info_collector.c177
-rwxr-xr-xencoder/irc_frame_info_collector.h109
-rwxr-xr-xencoder/irc_mb_model_based.c157
-rwxr-xr-xencoder/irc_mb_model_based.h57
-rwxr-xr-xencoder/irc_mem_req_and_acq.h179
-rwxr-xr-xencoder/irc_picture_type.c1585
-rwxr-xr-xencoder/irc_picture_type.h95
-rwxr-xr-xencoder/irc_rate_control_api.c1600
-rwxr-xr-xencoder/irc_rate_control_api.h188
-rwxr-xr-xencoder/irc_rate_control_api_structs.h93
-rwxr-xr-xencoder/irc_rd_model.c565
-rwxr-xr-xencoder/irc_rd_model.h98
-rwxr-xr-xencoder/irc_rd_model_struct.h75
-rwxr-xr-xencoder/irc_trace_support.h61
-rwxr-xr-xencoder/irc_vbr_storage_vbv.c368
-rwxr-xr-xencoder/irc_vbr_storage_vbv.h119
-rwxr-xr-xencoder/irc_vbr_str_prms.c199
-rwxr-xr-xencoder/irc_vbr_str_prms.h65
-rwxr-xr-xencoder/ithread.h101
-rwxr-xr-xencoder/iv2.h386
-rwxr-xr-xencoder/ive2.h1445
-rwxr-xr-xencoder/mips/ih264e_function_selector.c110
-rwxr-xr-xencoder/mips/ih264e_platform_macros.h135
-rwxr-xr-xencoder/mips/ime_platform_macros.h52
-rwxr-xr-xencoder/x86/ih264e_function_selector.c141
-rwxr-xr-xencoder/x86/ih264e_function_selector_sse42.c146
-rwxr-xr-xencoder/x86/ih264e_function_selector_ssse3.c190
-rwxr-xr-xencoder/x86/ih264e_half_pel_ssse3.c487
-rwxr-xr-xencoder/x86/ih264e_intra_modes_eval_ssse3.c1259
-rwxr-xr-xencoder/x86/ih264e_platform_macros.h154
-rwxr-xr-xencoder/x86/ime_distortion_metrics_sse42.c1940
-rwxr-xr-xencoder/x86/ime_platform_macros.h52
119 files changed, 59630 insertions, 0 deletions
diff --git a/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
new file mode 100755
index 0000000..fe0ce17
--- /dev/null
+++ b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
@@ -0,0 +1,313 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+@******************************************************************************
+@*
+@* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC )
+@* and do the prediction.
+@*
+@* @par Description
+@* This function evaluates first three 16x16 modes and compute corresponding sad
+@* and return the buffer predicted with best mode.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@** @param[in] pu1_ngbr_pels_i16
+@* UWORD8 pointer to neighbouring pels
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] u4_n_avblty
+@* availability of neighbouring pixels
+@*
+@* @param[in] u4_intra_mode
+@* Pointer to the variable in which best mode is returned
+@*
+@* @param[in] pu4_sadmin
+@* Pointer to the variable in which minimum sad is returned
+@*
+@* @param[in] u4_valid_intra_modes
+@* Says what all modes are valid
+@*
+@*
+@* @return none
+@*
+@******************************************************************************
+@*/
+@
+@void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
+@ UWORD8 *pu1_ngbr_pels_i16,
+@ UWORD8 *pu1_dst,
+@ UWORD32 src_strd,
+@ UWORD32 dst_strd,
+@ WORD32 u4_n_avblty,
+@ UWORD32 *u4_intra_mode,
+@ WORD32 *pu4_sadmin,
+@ UWORD32 u4_valid_intra_modes)
+@
+.text
+.p2align 2
+
+ .global ih264e_evaluate_intra16x16_modes_a9q
+
+ih264e_evaluate_intra16x16_modes_a9q:
+
+@r0 = pu1_src,
+@r1 = pu1_ngbr_pels_i16,
+@r2 = pu1_dst,
+@r3 = src_strd,
+@r4 = dst_strd,
+@r5 = u4_n_avblty,
+@r6 = u4_intra_mode,
+@r7 = pu4_sadmin
+
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ ldr r5, [sp, #44]
+
+
+ vpush {d8-d15}
+ vld1.32 {q4}, [r1]!
+ sub r6, r1, #1
+ add r1, r1, #1
+ mov r10, #0
+ vld1.32 {q5}, [r1]!
+ mov r11, #0
+ mov r4, #0
+ @/* Left available ????
+ ands r7, r5, #01
+ movne r10, #1
+
+ @/* Top available ????
+ ands r8, r5, #04
+ lsl r9, r10, #3
+ movne r11, #1
+ lsl r12, r11, #3
+ adds r8, r9, r12
+
+
+ @/* None available :(
+ moveq r4, #128
+
+
+
+@/fINDING dc val*/
+ @----------------------
+ vaddl.u8 q15, d8, d9
+
+ vaddl.u8 q14, d10, d11
+
+ vadd.u16 q15, q14, q15
+ @ VLD1.32 {q2},[r0],r3;row 2
+ vadd.u16 d30, d31, d30
+ vpadd.u16 d30, d30
+ @ VLD1.32 {q3},[r0],r3 ;row 3
+ vpadd.u16 d30, d30
+ @---------------------
+
+
+ vmov.u16 r7, d30[0]
+ add r7, r7, r8
+ add r11, r11, #3
+ add r8, r10, r11
+
+ lsr r7, r8
+ add r7, r4, r7
+ vld1.32 {q0}, [r0], r3 @ source r0w 0
+ vdup.8 q15, r7 @dc val
+
+@/* computing SADs for all three modes*/
+ ldrb r7, [r6]
+ vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=0;
+ @/vertical row 0;
+ vabdl.u8 q8, d0, d10
+ vabdl.u8 q9, d1, d11
+ sub r6, r6, #1
+ @/HORZ row 0;
+ vabdl.u8 q13, d0, d20
+ vabdl.u8 q14, d1, d21
+ mov r1, #15
+ @/dc row 0;
+ vabdl.u8 q11, d0, d30
+ vabdl.u8 q12, d1, d31
+
+
+loop:
+ vld1.32 {q1}, [r0], r3 @row i
+ @/dc row i;
+ vabal.u8 q11, d2, d30
+ ldrb r7, [r6]
+ vabal.u8 q12, d3, d31
+
+ @/vertical row i;
+ vabal.u8 q8, d2, d10
+ vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=i;
+ sub r6, r6, #1
+ vabal.u8 q9, d3, d11
+
+ subs r1, r1, #1
+ @/HORZ row i;
+ vabal.u8 q13, d2, d20
+ vabal.u8 q14, d3, d21
+ bne loop
+
+ @------------------------------------------------------------------------------
+
+ vadd.i16 q9, q9, q8 @/VERT
+ vadd.i16 d18, d19, d18 @/VERT
+ vpaddl.u16 d18, d18 @/VERT
+ vadd.i16 q14, q13, q14 @/HORZ
+ vadd.i16 d28, d29, d28 @/HORZ
+ vpaddl.u32 d18, d18 @/VERT
+ vpaddl.u16 d28, d28 @/HORZ
+
+ vpaddl.u32 d28, d28 @/HORZ
+ vmov.u32 r8, d18[0] @ vert
+ vadd.i16 q12, q11, q12 @/DC
+ vmov.u32 r9, d28[0] @horz
+ mov r11, #1
+ vadd.i16 d24, d24, d25 @/DC
+ lsl r11 , #30
+
+ @-----------------------
+ ldr r0, [sp, #120] @ u4_valid_intra_modes
+ @--------------------------------------------
+ ands r7, r0, #01 @ vert mode valid????????????
+ moveq r8, r11
+ vpaddl.u16 d24, d24 @/DC
+
+ ands r6, r0, #02 @ horz mode valid????????????
+ moveq r9, r11
+ vpaddl.u32 d24, d24 @/DC
+
+ vmov.u32 r10, d24[0] @dc
+@--------------------------------
+ ldr r4, [sp, #104] @r4 = dst_strd,
+ ldr r7, [sp, #116] @r7 = pu4_sadmin
+@----------------------------------------------
+ ands r6, r0, #04 @ dc mode valid????????????
+ moveq r10, r11
+
+ @---------------------------
+ ldr r6, [sp, #112] @ R6 =MODE
+ @--------------------------
+
+ cmp r8, r9
+ bgt not_vert
+ cmp r8, r10
+ bgt do_dc
+
+ @/----------------------
+ @DO VERTICAL PREDICTION
+ str r8 , [r7] @MIN SAD
+ mov r8, #0
+ str r8 , [r6] @ MODE
+ vmov q15, q5
+
+ b do_dc_vert
+ @-----------------------------
+not_vert:
+ cmp r9, r10
+ bgt do_dc
+
+ @/----------------------
+ @DO HORIZONTAL
+ vdup.8 q5, d9[7] @0
+ str r9 , [r7] @MIN SAD
+ vdup.8 q6, d9[6] @1
+ mov r9, #1
+ vdup.8 q7, d9[5] @2
+ vst1.32 {d10, d11} , [r2], r4 @0
+ vdup.8 q8, d9[4] @3
+ str r9 , [r6] @ MODE
+ vdup.8 q9, d9[3] @4
+ vst1.32 {d12, d13} , [r2], r4 @1
+ vdup.8 q10, d9[2] @5
+ vst1.32 {d14, d15} , [r2], r4 @2
+ vdup.8 q11, d9[1] @6
+ vst1.32 {d16, d17} , [r2], r4 @3
+ vdup.8 q12, d9[0] @7
+ vst1.32 {d18, d19} , [r2], r4 @4
+ vdup.8 q13, d8[7] @8
+ vst1.32 {d20, d21} , [r2], r4 @5
+ vdup.8 q14, d8[6] @9
+ vst1.32 {d22, d23} , [r2], r4 @6
+ vdup.8 q15, d8[5] @10
+ vst1.32 {d24, d25} , [r2], r4 @7
+ vdup.8 q1, d8[4] @11
+ vst1.32 {d26, d27} , [r2], r4 @8
+ vdup.8 q2, d8[3] @12
+ vst1.32 {d28, d29} , [r2], r4 @9
+ vdup.8 q3, d8[2] @13
+ vst1.32 {d30, d31}, [r2], r4 @10
+ vdup.8 q5, d8[1] @14
+ vst1.32 {d2, d3} , [r2], r4 @11
+ vdup.8 q6, d8[0] @15
+ vst1.32 {d4, d5} , [r2], r4 @12
+
+ vst1.32 {d6, d7} , [r2], r4 @13
+
+ vst1.32 {d10, d11} , [r2], r4 @14
+
+ vst1.32 {d12, d13} , [r2], r4 @15
+ b end_func
+
+
+ @/-----------------------------
+
+do_dc: @/---------------------------------
+ @DO DC
+ str r10 , [r7] @MIN SAD
+ mov r10, #2
+ str r10 , [r6] @ MODE
+do_dc_vert:
+ vst1.32 {d30, d31}, [r2], r4 @0
+ vst1.32 {d30, d31}, [r2], r4 @1
+ vst1.32 {d30, d31}, [r2], r4 @2
+ vst1.32 {d30, d31}, [r2], r4 @3
+ vst1.32 {d30, d31}, [r2], r4 @4
+ vst1.32 {d30, d31}, [r2], r4 @5
+ vst1.32 {d30, d31}, [r2], r4 @6
+ vst1.32 {d30, d31}, [r2], r4 @7
+ vst1.32 {d30, d31}, [r2], r4 @8
+ vst1.32 {d30, d31}, [r2], r4 @9
+ vst1.32 {d30, d31}, [r2], r4 @10
+ vst1.32 {d30, d31}, [r2], r4 @11
+ vst1.32 {d30, d31}, [r2], r4 @12
+ vst1.32 {d30, d31}, [r2], r4 @13
+ vst1.32 {d30, d31}, [r2], r4 @14
+ vst1.32 {d30, d31}, [r2], r4 @15
+ @/------------------
+end_func:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
new file mode 100755
index 0000000..568e623
--- /dev/null
+++ b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
@@ -0,0 +1,529 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+
+.data
+.p2align 2
+
+scratch_intrapred_luma_4x4_prediction:
+ .long ver, hor, d_c, dia_dl
+ .long dia_dr, ver_r, hor_d, ver_l
+ .long hor_u
+
+
+.text
+.p2align 2
+
+scratch_intrapred_luma_4x4_prediction_addr1:
+ .long scratch_intrapred_luma_4x4_prediction - scrintra_4x4 - 8
+
+
+
+@/**
+@/**
+@******************************************************************************
+@*
+@* @brief :Evaluate best intra 4x4 mode
+@* and do the prediction.
+@*
+@* @par Description
+@* This function evaluates 4x4 modes and compute corresponding sad
+@* and return the buffer predicted with best mode.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@** @param[in] pu1_ngbr_pels
+@* UWORD8 pointer to neighbouring pels
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] u4_n_avblty
+@* availability of neighbouring pixels
+@*
+@* @param[in] u4_intra_mode
+@* Pointer to the variable in which best mode is returned
+@*
+@* @param[in] pu4_sadmin
+@* Pointer to the variable in which minimum cost is returned
+@*
+@* @param[in] u4_valid_intra_modes
+@* Says what all modes are valid
+@*
+@* * @param[in] u4_lambda
+@* Lamda value for computing cost from SAD
+@*
+@* @param[in] u4_predictd_mode
+@* Predicted mode for cost computation
+@*
+@*
+@*
+@* @return none
+@*
+@******************************************************************************
+@*/
+@void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
+@ UWORD8 *pu1_ngbr_pels,
+@ UWORD8 *pu1_dst,
+@ UWORD32 src_strd,
+@ UWORD32 dst_strd,
+@ WORD32 u4_n_avblty,
+@ UWORD32 *u4_intra_mode,
+@ WORD32 *pu4_sadmin,
+@ UWORD32 u4_valid_intra_modes,
+@ UWORD32 u4_lambda,
+@ UWORD32 u4_predictd_mode)
+
+
+
+ .global ih264e_evaluate_intra_4x4_modes_a9q
+
+ih264e_evaluate_intra_4x4_modes_a9q:
+
+@r0 = pu1_src,
+@r1 = pu1_ngbr_pels_i16,
+@r2 = pu1_dst,
+@r3 = src_strd,
+@r4 = dst_strd,
+@r5 = u4_n_avblty,
+@r6 = u4_intra_mode,
+@r7 = pu4_sadmin
+@r8 = u4_valid_intra_modes
+@r0 =u4_lambda
+@r1 = u4_predictd_mode
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+@--------------------
+ ldr r5, [sp, #44] @r5 = u4_n_avblty,
+@----------------------
+ vpush {d8-d15}
+@Loading neighbours
+ vld1.32 {q0}, [r1]
+ add r4, r1, #12
+ vld1.8 d1[5], [r4]
+ vld1.8 d1[7], [r1]
+ @--------------------------------
+ ldr r8, [sp, #120] @u4_valid_intra_modes
+@----------------------------------------------
+
+
+
+@ LOADING pu1_src
+ vld1.32 {d20[0]}, [r0], r3
+ vext.8 q1, q0, q0, #1
+ vld1.32 {d20[1]}, [r0], r3
+ mov r11, #1
+ vld1.32 {d21[0]}, [r0], r3
+ lsl r11, r11, #30
+ vld1.32 {d21[1]}, [r0], r3
+
+
+
+@--------------------------------
+ ldr r0, [sp, #124] @r0 =u4_lambda
+ ldr r1, [sp, #128] @r1 = u4_predictd_mode
+@------
+
+
+vert:
+ ands r10, r8, #01 @VERT sad ??
+ beq horz
+ vdup.32 q2, d2[1]
+ vabdl.u8 q14, d4, d20
+ vabal.u8 q14, d4, d21
+ vadd.i16 d28, d29, d28
+ subs r6, r1, #0
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ moveq r6, r0 @
+ vmov.u32 r9, d28[0] @ vert
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #0
+
+horz:
+ ands r10, r8, #02 @HORZ sad ??
+ beq dc
+ vdup.32 q3, d0[0]
+ vmov.32 q4, q3
+ vtrn.8 q3, q4
+ vtrn.16 d7, d6
+ vtrn.16 d9, d8
+ vtrn.32 d9, d7
+ vtrn.32 d8, d6
+ vabdl.u8 q14, d6, d20
+ subs r6, r1, #1
+ vabal.u8 q14, d7, d21
+ vadd.i16 d28, d29, d28
+ lslne r6, r0, #2
+ vpaddl.u16 d28, d28 @
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #1
+
+dc:
+ ands r10, r8, #04 @DC sad ??
+ beq diags
+ vext.8 q4, q0, q0, #5
+ vaddl.u8 q4, d0, d8
+ vpaddl.u16 d8, d8 @
+ vpaddl.u32 d8, d8 @/
+ vmov.u32 r4, d8[0] @
+ mov r14, #1
+ ands r10, r5, #1
+ addne r4, r4, #2
+ addne r14, r14, #1
+ ands r10, r5, #4
+ addne r4, r4, #2
+ addne r14, r14, #1
+ ands r10, r5, #5
+ moveq r4, #128
+ moveq r14, #0
+ subs r6, r1, #2
+ lsr r4, r4, r14
+ vdup.8 q4, r4
+ lslne r6, r0, #2
+ vabdl.u8 q14, d8, d20
+ vabal.u8 q14, d9, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #2
+
+diags:
+ ands r10, r8, #504 @/* if modes other than VERT, HORZ and DC are valid ????*/
+ beq pred
+ @/* Performing FILT11 and FILT121 operation for all neighbour values*/
+ vext.8 q5, q0, q0, #2
+ vaddl.u8 q6, d0, d2
+ vaddl.u8 q7, d1, d3
+ vaddl.u8 q8, d10, d2
+ vaddl.u8 q9, d11, d3
+ vadd.u16 q12, q10, q11
+ vqrshrun.s16 d10, q6, #1
+ vqrshrun.s16 d11, q7, #1
+ vadd.u16 q11, q6, q8
+ vadd.u16 q12, q7, q9
+ vqrshrun.s16 d12, q11, #2
+ vqrshrun.s16 d13, q12, #2
+ mov r14, #0
+ vdup.32 q13 , r14
+ mov r14, #-1
+ vmov.i32 d26[0], r14
+
+diag_dl:
+ ands r10, r8, #0x08 @DIAG_DL sad ??
+ beq diag_dr
+
+ vext.8 q15, q6, q6, #5
+ vbit.32 d14, d30, d26
+ vext.8 q15, q6, q6, #15
+ vbit.32 d15, d31, d26
+ vext.8 q15, q6, q6, #2
+ vext.32 q14, q13, q13, #3
+ vbit.32 d14, d30, d28
+ vext.8 q15, q6, q6, #4
+ vbit.32 d15, d30, d28
+ vabdl.u8 q14, d14, d20
+ subs r6, r1, #3
+ vabal.u8 q14, d15, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #3
+
+diag_dr:
+ ands r10, r8, #16 @DIAG_DR sad ??
+ beq vert_r
+
+ vext.8 q15, q6, q6, #3
+ vbit.32 d16, d30, d26
+ vext.8 q15, q6, q6, #1
+ vbit.32 d17, d30, d26
+ vext.8 q15, q6, q6, #4
+ vext.32 q14, q13, q13, #3
+ vbit.32 d17, d31, d28
+ vext.8 q15, q6, q6, #6
+ vbit.32 d16, d31, d28
+ vabdl.u8 q14, d16, d20
+ subs r6, r1, #4
+ vabal.u8 q14, d17, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #4
+
+vert_r:
+ ands r10, r8, #32 @VERT_R sad ??
+ beq horz_d
+ vext.8 q15, q5, q5, #4
+ vbit.32 d18, d30, d26
+ vext.8 q15, q5, q5, #3
+ vbit.32 d19, d30, d26
+ vext.32 q14, q13, q13, #3
+ vext.8 q15, q6, q6, #15
+ vbit.32 d18, d30, d28
+ vext.8 q15, q6, q6, #14
+ vbit.32 d19, d30, d28
+ mov r14, #0
+ vdup.32 q14 , r14
+ mov r14, #0xff
+ vmov.i8 d28[0], r14
+ vext.8 q15, q6, q6, #2
+ vbit.32 d19, d30, d28
+ vext.32 q14, q14, q14, #3
+ subs r6, r1, #5
+ vext.8 q15, q6, q6, #13
+ vbit.32 d19, d30, d28
+ lslne r6, r0, #2
+ vabdl.u8 q14, d18, d20
+ vabal.u8 q14, d19, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #5
+
+horz_d:
+ vmov.8 q1, q5
+ vmov.8 q15, q6
+ vzip.8 q1, q15
+
+ ands r10, r8, #64 @HORZ_D sad ??
+ beq vert_l
+ vext.8 q15, q6, q6, #2
+ vbit.32 d8, d30, d26
+ mov r14, #0
+ vdup.32 q14 , r14
+ mov r14, #0xff
+ vmov.i8 d28[0], r14
+ vext.8 q15, q5, q5, #3
+ vbit.32 d8, d30, d28
+ vext.8 q15, q1, q1, #2
+ vbit.32 d9, d30, d26
+ vext.32 q14, q13, q13, #3
+ vbit.32 d8, d2, d28
+ subs r6, r1, #6
+ vext.8 q15, q1, q1, #12
+ vbit.32 d9, d30, d28
+ vabdl.u8 q14, d8, d20
+ vabal.u8 q14, d9, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #6
+vert_l:
+ ands r10, r8, #128 @VERT_L sad ??
+ beq horz_u
+ vext.8 q15, q5, q5, #5
+ vbit.32 d24, d30, d26
+ vext.8 q15, q15, q15, #1
+ vbit.32 d25, d30, d26
+ vext.8 q15, q6, q6, #1
+ vext.32 q14, q13, q13, #3
+ vbit.32 d24, d30, d28
+ vext.8 q15, q15, q15, #1
+ subs r6, r1, #7
+ vbit.32 d25, d30, d28
+ vabdl.u8 q14, d24, d20
+ vabal.u8 q14, d25, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #7
+
+horz_u:
+ ands r10, r8, #256 @HORZ_U sad ??
+ beq pred
+ vrev64.8 q5, q1
+ vdup.8 q1, d0[0]
+ vext.8 q6, q6, #7
+ mov r14, #0
+ vdup.32 q14 , r14
+ mov r14, #0xff
+ vmov.i8 d28[0], r14
+ vbit.32 d11, d13, d28
+ movw r14, #0xffff
+ vmov.i16 d28[0], r14
+ vext.8 q6, q5, q5, #7
+ subs r6, r1, #8
+ vbit.32 d3, d12, d28
+ vext.8 q6, q5, q5, #3
+ vbit.32 d2, d12, d26
+ vext.32 q14, q13, q13, #3
+ vext.8 q6, q5, q5, #1
+ vbit.32 d2, d12, d28
+ vabdl.u8 q14, d2, d20
+ vabal.u8 q14, d3, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #8
+
+pred: @/*dOING FINAL PREDICTION*/
+@---------------------------
+ ldr r7, [sp, #116] @r7 = pu4_sadmin
+ ldr r6, [sp, #112] @ R6 =MODE
+@--------------------------
+ str r11, [r7] @/STORING MIN SAD*/
+ str r12, [r6] @/FINAL MODE*/
+
+
+ ldr r3, scratch_intrapred_luma_4x4_prediction_addr1
+scrintra_4x4:
+ add r3, r3, pc
+ lsl r12, r12, #2
+ add r3, r3, r12
+
+ ldr r5, [r3]
+ and r5, r5, #0xfffffffe
+
+ bx r5
+
+
+ver:
+ vext.8 q0, q0, q0, #1
+ vdup.32 q15, d0[1]
+ b store
+
+hor:
+ vmov.32 q15, q3
+ b store
+
+d_c:
+ vdup.8 q15, r4
+ b store
+
+dia_dl:
+ vmov.32 q15, q7
+ b store
+
+dia_dr:
+ vmov.32 q15, q8
+ b store
+
+ver_r:
+ vmov.32 q15, q9
+ b store
+
+hor_d:
+ vmov.32 q15, q4
+ b store
+
+ver_l:
+ vmov.32 q15, q12
+ b store
+
+hor_u:
+ vmov.32 q15, q1
+
+store: @/* storing to pu1_dst*/
+
+ ldr r4, [sp, #104] @r4 = dst_strd,
+
+ vst1.32 {d30[0]}, [r2], r4
+ vst1.32 {d30[1]}, [r2], r4
+ vst1.32 {d31[0]}, [r2], r4
+ vst1.32 {d31[1]}, [r2], r4
+
+
+end_func:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
diff --git a/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
new file mode 100755
index 0000000..e4dfca8
--- /dev/null
+++ b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
@@ -0,0 +1,346 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+@******************************************************************************
+@*
+@* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC )
+@* and do the prediction.
+@*
+@* @par Description
+@* This function evaluates first three intra chroma modes and compute corresponding sad
+@* and return the buffer predicted with best mode.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@** @param[in] pu1_ngbr_pels
+@* UWORD8 pointer to neighbouring pels
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] u4_n_avblty
+@* availability of neighbouring pixels
+@*
+@* @param[in] u4_intra_mode
+@* Pointer to the variable in which best mode is returned
+@*
+@* @param[in] pu4_sadmin
+@* Pointer to the variable in which minimum sad is returned
+@*
+@* @param[in] u4_valid_intra_modes
+@* Says what all modes are valid
+@*
+@*
+@* @return none
+@*
+@******************************************************************************
+@*/
+@
+@void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
+@ UWORD8 *pu1_ngbr_pels_i16,
+@ UWORD8 *pu1_dst,
+@ UWORD32 src_strd,
+@ UWORD32 dst_strd,
+@ WORD32 u4_n_avblty,
+@ UWORD32 *u4_intra_mode,
+@ WORD32 *pu4_sadmin,
+@ UWORD32 u4_valid_intra_modes)
+@
+.text
+.p2align 2
+
+ .global ih264e_evaluate_intra_chroma_modes_a9q
+
+ih264e_evaluate_intra_chroma_modes_a9q:
+
+@r0 = pu1_src,
+@r1 = pu1_ngbr_pels_i16,
+@r2 = pu1_dst,
+@r3 = src_strd,
+@r4 = dst_strd,
+@r5 = u4_n_avblty,
+@r6 = u4_intra_mode,
+@r7 = pu4_sadmin
+
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ @-----------------------
+ ldr r5, [sp, #44] @r5 = u4_n_avblty,
+ @-------------------------
+ mov r12, r1 @
+ vpush {d8-d15}
+ vld1.32 {q4}, [r1]!
+ add r1, r1, #2
+ vld1.32 {q5}, [r1]!
+
+ vuzp.u8 q4, q5 @
+
+ vpaddl.u8 d8, d8
+ vpadd.u16 d8, d8
+
+ vpaddl.u8 d9, d9
+ vpadd.u16 d9, d9
+
+ vpaddl.u8 d10, d10
+ vpadd.u16 d10, d10
+
+ vpaddl.u8 d11, d11
+
+ and r7, r5, #5
+ vpadd.u16 d11, d11
+ subs r8, r7, #5
+ beq all_available
+ subs r8, r7, #4
+ beq top_available
+ subs r8, r7, #1
+ beq left_available
+ mov r10, #128
+ vdup.8 q14, r10
+ vdup.8 q15, r10
+ b sad
+
+all_available:
+ vzip.u16 q4, q5
+ vext.16 q6, q4, q4, #2
+ vadd.u16 q7, q5, q6
+ vqrshrn.u16 d14, q7, #3
+ vqrshrn.u16 d15, q4, #2
+ vqrshrn.u16 d16, q5, #2
+ vdup.16 d28, d14[0]
+ vdup.16 d29, d16[1]
+ vdup.16 d30, d15[0]
+ vdup.16 d31, d14[1]
+ b sad
+top_available:
+ vzip.u16 q4, q5
+ vqrshrn.u16 d16, q5, #2
+ vdup.16 d28, d16[0]
+ vdup.16 d29, d16[1]
+ vdup.16 d30, d16[0]
+ vdup.16 d31, d16[1]
+ b sad
+left_available:
+ vzip.u16 q4, q5
+ vqrshrn.u16 d16, q4, #2
+ vdup.16 d28, d16[3]
+ vdup.16 d29, d16[3]
+ vdup.16 d30, d16[2]
+ vdup.16 d31, d16[2]
+
+
+sad:
+ vld1.32 {q4}, [r12]!
+ sub r8, r12, #2
+ add r12, r12, #2
+ vld1.32 {q5}, [r12]!
+ add r12, r0, r3, lsl #2
+ sub r10, r8, #8
+ vld1.32 {q0}, [r0], r3
+ ldrh r9, [r8]
+ vdup.16 q10, r9 @ row 0
+
+ @/vertical row 0;
+ vabdl.u8 q8, d0, d10
+ vabdl.u8 q9, d1, d11
+ sub r8, r8, #2
+ vld1.32 {q1}, [r12], r3
+
+ @/HORZ row 0;
+ vabdl.u8 q13, d0, d20
+ vabdl.u8 q7, d1, d21
+ ldrh r9, [r10]
+ @/dc row 0;
+ vabdl.u8 q11, d0, d28
+ vabdl.u8 q12, d1, d29
+
+
+ vdup.16 q10, r9 @ row 4
+ @/vertical row 4;
+ vabal.u8 q8, d2, d10
+ vabal.u8 q9, d3, d11
+ sub r10, r10, #2
+
+ @/HORZ row 4;
+ vabal.u8 q13, d2, d20
+ vabal.u8 q7, d3, d21
+ @/dc row 4;
+ vabal.u8 q11, d2, d30
+ vabal.u8 q12, d3, d31
+
+ mov r11, #3
+
+loop:
+ vld1.32 {q0}, [r0], r3
+ ldrh r9, [r8]
+
+
+ @/vertical row i;
+ vabal.u8 q8, d0, d10
+ vabal.u8 q9, d1, d11
+
+ vdup.16 q10, r9 @ row i
+ vld1.32 {q1}, [r12], r3
+ sub r8, r8, #2
+ @/HORZ row i;
+ vabal.u8 q13, d0, d20
+ vabal.u8 q7, d1, d21
+ ldrh r9, [r10]
+ @/dc row i;
+ vabal.u8 q11, d0, d28
+ vabal.u8 q12, d1, d29
+ sub r10, r10, #2
+
+ vdup.16 q10, r9 @ row i+4
+ @/vertical row 4;
+ vabal.u8 q8, d2, d10
+ vabal.u8 q9, d3, d11
+ subs r11, r11, #1
+
+ @/HORZ row i+4;
+ vabal.u8 q13, d2, d20
+ vabal.u8 q7, d3, d21
+ @/dc row i+4;
+ vabal.u8 q11, d2, d30
+ vabal.u8 q12, d3, d31
+ bne loop
+
+
+
+@-------------------------------------------
+
+ vadd.i16 q9, q9, q8 @/VERT
+ vadd.i16 q7, q13, q7 @/HORZ
+ vadd.i16 q12, q11, q12 @/DC
+ vadd.i16 d18, d19, d18 @/VERT
+ vadd.i16 d14, d15, d14 @/HORZ
+ vadd.i16 d24, d24, d25 @/DC
+ vpaddl.u16 d18, d18 @/VERT
+ vpaddl.u16 d14, d14 @/HORZ
+ vpaddl.u16 d24, d24 @/DC
+ vpaddl.u32 d18, d18 @/VERT
+ vpaddl.u32 d14, d14 @/HORZ
+ vpaddl.u32 d24, d24 @/DC
+
+
+
+ vmov.u32 r8, d18[0] @ vert
+ vmov.u32 r9, d14[0] @horz
+ vmov.u32 r10, d24[0] @dc
+
+ mov r11, #1
+@-----------------------
+ ldr r0, [sp, #120] @ u4_valid_intra_modes
+@--------------------------------------------
+
+
+ lsl r11 , #30
+
+ ands r7, r0, #04 @ vert mode valid????????????
+ moveq r8, r11
+
+ ands r6, r0, #02 @ horz mode valid????????????
+ moveq r9, r11
+
+ ands r6, r0, #01 @ dc mode valid????????????
+ moveq r10, r11
+
+
+ @---------------------------
+ ldr r4, [sp, #104] @r4 = dst_strd,
+ ldr r6, [sp, #112] @ R6 =MODE
+ ldr r7, [sp, #116] @r7 = pu4_sadmin
+
+ @--------------------------
+
+ cmp r10, r9
+ bgt not_dc
+ cmp r10, r8
+ bgt do_vert
+
+ @/----------------------
+ @DO DC PREDICTION
+ str r10 , [r7] @MIN SAD
+ mov r10, #0
+ str r10 , [r6] @ MODE
+ b do_dc_vert
+ @-----------------------------
+
+not_dc:
+ cmp r9, r8
+ bgt do_vert
+ @/----------------------
+ @DO HORIZONTAL
+
+ vdup.16 q10, d9[3] @/HORIZONTAL VALUE ROW=0;
+ str r9 , [r7] @MIN SAD
+ mov r9, #1
+ vdup.16 q11, d9[2] @/HORIZONTAL VALUE ROW=1;
+ str r9 , [r6] @ MODE
+ vdup.16 q12, d9[1] @/HORIZONTAL VALUE ROW=2;
+ vst1.32 {d20, d21} , [r2], r4 @0
+ vdup.16 q13, d9[0] @/HORIZONTAL VALUE ROW=3;
+ vst1.32 {d22, d23} , [r2], r4 @1
+ vdup.16 q14, d8[3] @/HORIZONTAL VALUE ROW=4;
+ vst1.32 {d24, d25} , [r2], r4 @2
+ vdup.16 q15, d8[2] @/HORIZONTAL VALUE ROW=5;
+ vst1.32 {d26, d27} , [r2], r4 @3
+ vdup.16 q1, d8[1] @/HORIZONTAL VALUE ROW=6;
+ vst1.32 {d28, d29} , [r2], r4 @4
+ vdup.16 q2, d8[0] @/HORIZONTAL VALUE ROW=7;
+ vst1.32 {d30, d31} , [r2], r4 @5
+ vst1.32 {d2, d3} , [r2], r4 @6
+ vst1.32 {d4, d5} , [r2], r4 @7
+ b end_func
+
+do_vert:
+ @DO VERTICAL PREDICTION
+ str r8 , [r7] @MIN SAD
+ mov r8, #2
+ str r8 , [r6] @ MODE
+ vmov q15, q5
+ vmov q14, q5
+
+do_dc_vert:
+ vst1.32 {d28, d29} , [r2], r4 @0
+ vst1.32 {d28, d29} , [r2], r4 @1
+ vst1.32 {d28, d29} , [r2], r4 @2
+ vst1.32 {d28, d29} , [r2], r4 @3
+ vst1.32 {d30, d31} , [r2], r4 @4
+ vst1.32 {d30, d31} , [r2], r4 @5
+ vst1.32 {d30, d31} , [r2], r4 @6
+ vst1.32 {d30, d31} , [r2], r4 @7
+
+
+end_func:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
diff --git a/encoder/arm/ih264e_fmt_conv.s b/encoder/arm/ih264e_fmt_conv.s
new file mode 100755
index 0000000..2bf1479
--- /dev/null
+++ b/encoder/arm/ih264e_fmt_conv.s
@@ -0,0 +1,329 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+.text
+.p2align 2
+@/**
+
+@/*****************************************************************************
+@* *
+@* Function Name : IH264D_CXA8_YUV420toYUV420SP_UV() *
+@* *
+@* Description : This function conversts the image from YUV420P color *
+@* space to 420SP color space(UV interleaved). *
+@* *
+@* Arguments : R0 pu1_y *
+@* R1 pu1_u *
+@* R2 pu1_v *
+@* R3 pu1_dest_y *
+@* [R13 #40] pu1_dest_uv *
+@* [R13 #44] u2_height *
+@* [R13 #48] u2_width *
+@* [R13 #52] u2_stridey *
+@* [R13 #56] u2_strideu *
+@* [R13 #60] u2_stridev *
+@* [R13 #64] u2_dest_stride_y *
+@* [R13 #68] u2_dest_stride_uv *
+@* [R13 #72] convert_uv_only *
+@* *
+@* Values Returned : None *
+@* *
+@* Register Usage : R0 - R14 *
+@* *
+@* Stack Usage : 40 Bytes *
+@* *
+@* Interruptibility : Interruptible *
+@* *
+@* Known Limitations *
+@* Assumptions: Image Width: Assumed to be multiple of 16 and *
+@* greater than or equal to 16 *
+@* Image Height: Assumed to be even. *
+@* *
+@* Revision History : *
+@* DD MM YYYY Author(s) Changes (Describe the changes made) *
+@* 07 06 2010 Varshita Draft *
+@* 07 06 2010 Naveen Kr T Completed *
+@* *
+@*****************************************************************************/
+ .global ih264e_fmt_conv_420p_to_420sp_a9q
+
+ih264e_fmt_conv_420p_to_420sp_a9q:
+
+ @// push the registers on the stack
+ stmfd sp!, {r4-r12, lr}
+
+ ldr r4, [sp, #72] @// Load convert_uv_only
+
+ cmp r4, #1
+ beq yuv420sp_uv_chroma
+ @/* Do the preprocessing before the main loops start */
+ @// Load the parameters from stack
+ ldr r4, [sp, #44] @// Load u2_height from stack
+ ldr r5, [sp, #48] @// Load u2_width from stack
+ ldr r7, [sp, #52] @// Load u2_stridey from stack
+ ldr r8, [sp, #64] @// Load u2_dest_stride_y from stack
+ sub r7, r7, r5 @// Source increment
+ sub r8, r8, r5 @// Destination increment
+
+ vpush {d8-d15}
+yuv420sp_uv_row_loop_y:
+ mov r6, r5
+
+yuv420sp_uv_col_loop_y:
+ pld [r0, #128]
+ vld1.8 {d0, d1}, [r0]!
+ vst1.8 {d0, d1}, [r3]!
+ sub r6, r6, #16
+ cmp r6, #15
+ bgt yuv420sp_uv_col_loop_y
+
+ cmp r6, #0
+ beq yuv420sp_uv_row_loop_end_y
+ @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+ @//Ex if width is 162, above loop will process 160 pixels. And
+ @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+ @// and written using VLD1 and VST1
+ rsb r6, r6, #16
+ sub r0, r0, r6
+ sub r3, r3, r6
+
+ vld1.8 {d0, d1}, [r0]!
+ vst1.8 {d0, d1}, [r3]!
+
+yuv420sp_uv_row_loop_end_y:
+ add r0, r0, r7
+ add r3, r3, r8
+ subs r4, r4, #1
+ bgt yuv420sp_uv_row_loop_y
+
+yuv420sp_uv_chroma:
+
+ ldr r3, [sp, #40] @// Load pu1_dest_uv from stack
+
+ ldr r4, [sp, #44] @// Load u2_height from stack
+
+ ldr r5, [sp, #48] @// Load u2_width from stack
+
+
+ ldr r7, [sp, #56] @// Load u2_strideu from stack
+
+ ldr r8, [sp, #68] @// Load u2_dest_stride_uv from stack
+
+ sub r7, r7, r5, lsr #1 @// Source increment
+
+ sub r8, r8, r5 @// Destination increment
+
+ mov r5, r5, lsr #1
+ mov r4, r4, lsr #1
+ ldr r3, [sp, #40] @// Load pu1_dest_uv from stack
+ vpush {d8-d15}
+yuv420sp_uv_row_loop_uv:
+ mov r6, r5
+
+
+yuv420sp_uv_col_loop_uv:
+ pld [r1, #128]
+ pld [r2, #128]
+ vld1.8 d0, [r1]!
+ vld1.8 d1, [r2]!
+ vst2.8 {d0, d1}, [r3]!
+ sub r6, r6, #8
+ cmp r6, #7
+ bgt yuv420sp_uv_col_loop_uv
+
+ cmp r6, #0
+ beq yuv420sp_uv_row_loop_end_uv
+ @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+ @//Ex if width is 162, above loop will process 160 pixels. And
+ @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+ @// and written using VLD1 and VST1
+ rsb r6, r6, #8
+ sub r1, r1, r6
+ sub r2, r2, r6
+ sub r3, r3, r6, lsl #1
+
+ vld1.8 d0, [r1]!
+ vld1.8 d1, [r2]!
+ vst2.8 {d0, d1}, [r3]!
+
+yuv420sp_uv_row_loop_end_uv:
+ add r1, r1, r7
+ add r2, r2, r7
+ add r3, r3, r8
+ subs r4, r4, #1
+ bgt yuv420sp_uv_row_loop_uv
+ @//POP THE REGISTERS
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc}
+
+
+
+
+
+@ /**
+@ *******************************************************************************
+@ *
+@ * @brief ih264e_fmt_conv_422i_to_420sp_a9q
+@ * Function used from format conversion or frame copy
+@ *
+@ *
+@ *
+@ *Inputs : r0 - pu1_y - UWORD8 pointer to y plane.
+@ * r1 - pu1_u - UWORD8 pointer to u plane.
+@ * r2 - pu1_v - UWORD8 pointer to u plane.
+@ * r3 - pu2_yuv422i - UWORD16 pointer to yuv422iimage.
+@ * stack + 40 - u4_width - Width of the Y plane.
+@ * 44 - u4_height - Height of the Y plane.
+@ * 48 - u4_stride_y - Stride in pixels of Y plane.
+@ * 52 - u4_stride_u - Stride in pixels of U plane.
+@ * 56 - u4_stride_v - Stride in pixels of V plane.
+@ * 60 - u4_stride_yuv422i- Stride in pixels of yuv422i image.
+@ *
+@ * @par Description
+@ * Function used from copying or converting a reference frame to display buffer
+@ * in non shared mode
+@ *
+@ * @param[in] pu1_y_dst
+@ * Output Y pointer
+@ *
+@ * @param[in] pu1_u_dst
+@ * Output U/UV pointer ( UV is interleaved in the same format as that of input)
+@ *
+@ * @param[in] pu1_v_dst
+@ * Output V pointer ( used in 420P output case)
+@ *
+@ * @param[in] u4_dst_y_strd
+@ * Stride of destination Y buffer
+@ *
+@ * @param[in] u4_dst_u_strd
+@ * Stride of destination U/V buffer
+@ *
+@ *
+@ * @param[in] blocking
+@ * To indicate whether format conversion should wait till frame is reconstructed
+@ * and then return after complete copy is done. To be set to 1 when called at the
+@ * end of frame processing and set to 0 when called between frame processing modules
+@ * in order to utilize available MCPS
+@ *
+@ * @returns Error from IH264E_ERROR_T
+@ *
+@ * @remarks
+@ * Assumes that the stride of U and V buffers are same.
+@ * This is correct in most cases
+@ * If a case comes where this is not true we need to modify the fmt conversion funcnions called inside also
+@ * Since we read 4 pixels ata time the width should be aligned to 4
+@ * In assembly width should be aligned to 16 and height to 2.
+@ *
+@ *
+@ * Revision History :
+@ * DD MM YYYY Author(s) Changes (Describe the changes made)
+@ * 07 06 2010 Harinarayanan K K Adapeted to 422p
+@ *
+@ *******************************************************************************
+@ */
+
+@//`
+@*/
+ .global ih264e_fmt_conv_422i_to_420sp_a9q
+ih264e_fmt_conv_422i_to_420sp_a9q:
+ stmfd sp!, {r4-r12, lr} @// Back the register which are used
+
+
+
+ @/* Do the preprocessing before the main loops start */
+ @// Load the parameters from stack
+ ldr r4, [sp, #48] @// Load u4_stride_y from stack
+
+ ldr r5, [sp, #60] @// Load u4_stride_yuv422i from stack
+ add r6, r0, r4 @// pu1_y_nxt_row = pu1_y + u4_stride_y
+
+ ldr r7, [sp, #40] @// Load u4_width from stack
+ add r8, r3, r5, lsl #1 @// pu2_yuv422i_nxt_row = pu2_yuv422i_y + u4_stride_yuv422i(2 Bytes for each pixel)
+
+ ldr r9, [sp, #52] @// Load u4_stride_u from stack
+ sub r12, r4, r7 @// u2_offset1 = u4_stride_y - u4_width
+
+@LDR r10,[sp,#56] ;// Load u4_stride_v from stack
+ sub r14, r5, r7 @// u2_offset_yuv422i = u4_stride_yuv422i - u4_width
+
+ ldr r11, [sp, #44] @// Load u4_height from stack
+ sub r9, r9, r7 @// u2_offset2 = u4_stride_u - u4_width >> 1
+
+@ SUB r10,r10,r7,ASR #1 ;// u2_offset3 = u4_stride_v - u4_width >> 1
+ mov r14, r14, lsl #1 @// u2_offset_yuv422i = u2_offset_yuv422i * 2
+
+ mov r7, r7, asr #4 @// u4_width = u4_width / 16 (u4_width >> 4)
+ mov r11, r11, asr #1 @// u4_width = u4_width / 2 (u4_width >> 1)
+
+ add r4, r12, r4 @// u2_offset1 = u2_offset1 + u4_stride_y
+ add r5, r14, r5, lsl #1 @// u2_offset_yuv422i = u2_offset_yuv422i + u4_stride_yuv422i
+
+ vpush {d8-d15}
+
+@// Register Assignment
+@// pu1_y - r0
+@// pu1_y_nxt_row - r6
+@// pu1_u - r1
+@// pu1_v - r2
+@// pu2_yuv422i - r3
+@// pu2_yuv422i_nxt_row - r8
+@// u2_offset1 - r4
+@// u2_offset2 - r9
+@// u2_offset3 - r10
+@// u2_offset_yuv422i - r5
+@// u4_width / 16 - r7
+@// u4_height / 2 - r11
+@// inner loop count - r12
+yuv420_to_yuv422i_hight_loop:
+
+ mov r12, r7 @// Inner loop count = u4_width / 16
+
+yuv420_to_yuv422i_width_loop:
+ vld4.8 {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1
+ vld4.8 {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2
+ subs r12, r12, #1
+
+ vrhadd.u8 d0, d0, d4
+ vrhadd.u8 d2, d2, d6
+
+ vst2.8 {d1, d3}, [r0]! @// Store the 16 elements of row1 Y
+ vst2.8 {d5, d7}, [r6]! @// Store the 16 elements of row2 Y
+
+ vst2.8 {d0, d2}, [r1]! @// Store the 8 elements of row1/2 U
+
+ bgt yuv420_to_yuv422i_width_loop
+
+ @// Update the buffer pointer so that they will refer to next pair of rows
+ add r0, r0, r4 @// pu1_y = pu1_y + u2_offset1
+ add r6, r6, r4 @// pu1_y_nxt_row = pu1_y_nxt_row + u2_offset1
+
+ add r1, r1, r9 @// pu1_u = pu1_u + u2_offset2
+ subs r11, r11, #1
+
+ add r3, r3, r5 @// pu2_yuv422i = pu2_yuv422i + u2_offset_yuv422i
+
+ add r8, r8, r5 @// pu2_yuv422i_nxt_row = pu2_yuv422i_nxt_row + u2_offset_yuv422i
+ bgt yuv420_to_yuv422i_hight_loop
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @// Restore the register which are used
+
+
+
diff --git a/encoder/arm/ih264e_function_selector.c b/encoder/arm/ih264e_function_selector.c
new file mode 100755
index 0000000..bb181c1
--- /dev/null
+++ b/encoder/arm/ih264e_function_selector.c
@@ -0,0 +1,170 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in h264
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+#ifdef ARMV8
+void ih264e_init_function_ptr(void *pv_codec)
+{
+ codec_t *ps_codec = (codec_t *)pv_codec;
+ ih264e_init_function_ptr_generic(ps_codec);
+ switch(ps_codec->s_cfg.e_arch)
+ {
+ case ARCH_ARM_NONEON:
+ break;
+ case ARCH_ARM_A53:
+ case ARCH_ARM_A57:
+ case ARCH_ARM_V8_NEON:
+ ih264e_init_function_ptr_neon_av8(ps_codec);
+ break;
+ default:
+ ih264e_init_function_ptr_neon_av8(ps_codec);
+ break;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void)
+{
+ return ARCH_ARM_V8_NEON;
+}
+
+#else
+
+void ih264e_init_function_ptr(void *pv_codec)
+{
+ codec_t *ps_codec = (codec_t *)pv_codec;
+ ih264e_init_function_ptr_generic(ps_codec);
+ switch(ps_codec->s_cfg.e_arch)
+ {
+ case ARCH_ARM_NONEON:
+ break;
+ case ARCH_ARM_A9Q:
+ case ARCH_ARM_A9A:
+ case ARCH_ARM_A9:
+ case ARCH_ARM_A7:
+ case ARCH_ARM_A5:
+ case ARCH_ARM_A15:
+ ih264e_init_function_ptr_neon_a9q(ps_codec);
+ break;
+ default:
+ ih264e_init_function_ptr_neon_a9q(ps_codec);
+ break;
+ }
+}
+
+IV_ARCH_T ih264e_default_arch(void)
+{
+ return ARCH_ARM_A9Q;
+}
+
+#endif
diff --git a/encoder/arm/ih264e_function_selector_a9q.c b/encoder/arm/ih264e_function_selector_a9q.c
new file mode 100755
index 0000000..8b2879b
--- /dev/null
+++ b/encoder/arm/ih264e_function_selector_a9q.c
@@ -0,0 +1,252 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_generic.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_generic
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec)
+{
+ WORD32 i= 0;
+
+ /* curr proc ctxt */
+ process_ctxt_t *ps_proc = NULL;
+ me_ctxt_t *ps_me_ctxt = NULL;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q;
+ ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q;
+ ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q;
+ ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q;
+ ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q;
+ ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q;
+ ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
+ ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
+ ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
+ ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
+ ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
+ ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q;
+ ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q;
+ ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
+ ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
+ ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
+ ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
+ ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
+ ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q;
+ ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q;
+ ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q;
+ ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q;
+
+ /* Init forward transform fn ptr */
+ ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8;
+ ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_a9;
+ ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_a9;
+ ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_a9;
+ ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_a9;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8;
+ ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_a9;
+ ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_a9;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_a9;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_a9;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9;
+ ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9;
+ ps_codec->pf_interleave_copy = ih264_interleave_copy_a9;
+
+ /* Init fn ptr luma core coding */
+ ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+ ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+ ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+ /* Init fn ptr chroma core coding */
+ ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+ ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9;
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9;
+
+ /* write mb syntax layer */
+ ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
+ ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top_a9q;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q;
+
+ /* Inter pred leaf level functions */
+ ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q;
+ ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q;
+ ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q;
+ ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q;
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q;
+
+ /* sad me level functions */
+ ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+ ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+ ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+
+ /* memor handling operations */
+ ps_codec->pf_mem_cpy = ih264_memcpy_a9q;
+ ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q;
+ ps_codec->pf_mem_set = ih264_memset_a9q;
+ ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_a9q;
+
+ /* sad me level functions */
+ for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+ ps_me_ctxt = &ps_proc->s_me_ctxt;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+ ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+ ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q;
+ ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q;
+ ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q;
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q;
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q;
+ }
+
+ /* intra mode eval -encoder level function */
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q;
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q;
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q;
+
+ /* csc */
+ ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp_a9q;
+ ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp_a9q;
+
+ /* Halp pel generation function - encoder level*/
+ ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_a9q;
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_a9q;
+
+ return ;
+ }
+
diff --git a/encoder/arm/ih264e_function_selector_av8.c b/encoder/arm/ih264e_function_selector_av8.c
new file mode 100755
index 0000000..173c2d5
--- /dev/null
+++ b/encoder/arm/ih264e_function_selector_av8.c
@@ -0,0 +1,259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_generic.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_generic
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec)
+{
+
+ WORD32 i= 0;
+
+ /* curr proc ctxt */
+ process_ctxt_t *ps_proc = NULL;
+ me_ctxt_t *ps_me_ctxt = NULL;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_av8;
+ ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_av8;
+ ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_av8;
+ ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_av8;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_av8;
+ ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_av8;
+ ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_av8;
+ ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8;
+ ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8;
+ ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8;
+ ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8;
+ ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8;
+ ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_av8;
+ ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_av8;
+ ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8;
+ ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8;
+ ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8;
+ ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8;
+ ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8;
+ ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_av8;
+ ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8;
+ ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_av8;
+ ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8;
+
+
+ /* Init forward transform fn ptr */
+ ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8;
+ ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_av8;
+ ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_av8;
+ ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_av8;
+ ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_av8;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8_av8;
+ ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_av8;
+ ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_av8;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_av8;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_av8;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8;
+ ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_av8;
+ ps_codec->pf_interleave_copy = ih264_interleave_copy_av8;
+
+ /* Init fn ptr luma core coding */
+ ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+ ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+ ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+ /* Init fn ptr chroma core coding */
+ ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+ ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8;
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8;
+
+ /* write mb syntax layer */
+ ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
+ ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top_av8;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8;
+
+ /* Inter pred leaf level functions */
+ ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_av8;
+ ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_av8;
+ ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_av8;
+ ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8;
+
+ /* sad me level functions */
+ ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
+ ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
+ ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_av8;
+
+ /* memor handling operations */
+ ps_codec->pf_mem_cpy = ih264_memcpy_av8;
+ ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_av8;
+ ps_codec->pf_mem_set = ih264_memset_av8;
+ ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_av8;
+
+ /* sad me level functions */
+ for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+ ps_me_ctxt = &ps_proc->s_me_ctxt;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
+ ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_av8;
+ ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_av8;
+ ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_av8;
+ ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_av8;
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_av8;
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_av8;
+ }
+
+ /* intra mode eval -encoder level function */
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_av8;
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_av8;
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
+
+ /* csc */
+ ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
+ ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
+
+ /* Halp pel generation function - encoder level*/
+ ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_av8;
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_av8;
+
+ return ;
+ }
+
diff --git a/encoder/arm/ih264e_half_pel.s b/encoder/arm/ih264e_half_pel.s
new file mode 100755
index 0000000..1b9a87a
--- /dev/null
+++ b/encoder/arm/ih264e_half_pel.s
@@ -0,0 +1,951 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ * ih264e_half_pel.s
+@ *
+@ * @brief
+@ *
+@ *
+@ * @author
+@ * Ittiam
+@ *
+@ * @par List of Functions:
+@ * ih264e_sixtapfilter_horz
+@ * ih264e_sixtap_filter_2dvh_vert
+@
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@ */
+
+
+.text
+.p2align 2
+
+@ /**
+@/*******************************************************************************
+@*
+@* @brief
+@* Interprediction luma filter for horizontal input(Filter run for width = 17 and height =16)
+@*
+@* @par Description:
+@* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+@* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd);
+
+
+.equ HALFPEL_WIDTH , 17 + 1 @( make it even, two rows are processed at a time)
+
+
+ .global ih264e_sixtapfilter_horz_a9q
+ih264e_sixtapfilter_horz_a9q:
+ stmfd sp!, {lr}
+
+ vmov.i8 d0, #5
+ sub r0, r0, #2
+
+ vmov.i8 d1, #20
+ mov r14, #HALFPEL_WIDTH
+ vpush {d8-d15}
+
+filter_horz_loop:
+
+
+ vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0
+ vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1
+
+ @// Processing row0 and row1
+
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
+ vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row0)
+
+ vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0)
+ vext.8 d29, d4, d4, #5 @//extract a[5] (column3,row0)
+ vaddl.u8 q5, d30, d3 @// a0 + a5 (column2,row0)
+ vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
+ vaddl.u8 q6, d29, d4 @// a0 + a5 (column3,row0)
+ vext.8 d27, d6, d7, #5 @//extract a[5] (column2,row1)
+ vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1)
+ vext.8 d26, d7, d7, #5 @//extract a[5] (column3,row1)
+
+ vaddl.u8 q8, d27, d6 @// a0 + a5 (column2,row1)
+ vext.8 d31, d2, d3, #2 @//extract a[2] (column1,row0)
+ vaddl.u8 q9, d26, d7 @// a0 + a5 (column3,row1)
+ vext.8 d30, d3, d4, #2 @//extract a[2] (column2,row0)
+ vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vext.8 d29, d4, d4, #2 @//extract a[2] (column3,row0)
+ vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vext.8 d28, d5, d6, #2 @//extract a[2] (column1,row1)
+ vmlal.u8 q6, d29, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vext.8 d27, d6, d7, #2 @//extract a[2] (column2,row1)
+ vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 (column1,row1)
+ vext.8 d26, d7, d7, #2 @//extract a[2] (column3,row1)
+
+ vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 (column2,row1)
+ vext.8 d31, d2, d3, #3 @//extract a[3] (column1,row0)
+ vmlal.u8 q9, d26, d1 @// a0 + a5 + 20a2 (column3,row1)
+ vext.8 d30, d3, d4, #3 @//extract a[3] (column2,row0)
+ vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vext.8 d29, d4, d4, #3 @//extract a[3] (column3,row0)
+ vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vext.8 d28, d5, d6, #3 @//extract a[3] (column1,row1)
+ vmlal.u8 q6, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vext.8 d27, d6, d7, #3 @//extract a[3] (column2,row1)
+ vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ vext.8 d26, d7, d7, #3 @//extract a[3] (column3,row1)
+
+ vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row1)
+ vext.8 d31, d2, d3, #1 @//extract a[1] (column1,row0)
+ vmlal.u8 q9, d26, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row1)
+ vext.8 d30, d3, d4, #1 @//extract a[1] (column2,row0)
+ vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vext.8 d29, d4, d4, #1 @//extract a[1] (column3,row0)
+ vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vext.8 d28, d5, d6, #1 @//extract a[1] (column1,row1)
+ vmlsl.u8 q6, d29, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vext.8 d27, d6, d7, #1 @//extract a[1] (column2,row1)
+ vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ vext.8 d26, d7, d7, #1 @//extract a[1] (column3,row1)
+
+ vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1)
+ vext.8 d31, d2, d3, #4 @//extract a[4] (column1,row0)
+ vmlsl.u8 q9, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row1)
+ vext.8 d30, d3, d4, #4 @//extract a[4] (column2,row0)
+ vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ vext.8 d29, d4, d4, #4 @//extract a[4] (column3,row0)
+ vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ vext.8 d28, d5, d6, #4 @//extract a[4] (column1,row1)
+ vmlsl.u8 q6, d29, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ vext.8 d27, d6, d7, #4 @//extract a[4] (column2,row1)
+ vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ vext.8 d26, d7, d7, #4 @//extract a[4] (column3,row1)
+
+ vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1)
+ vmlsl.u8 q9, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row1)
+
+ vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vqrshrun.s16 d21, q5, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vqrshrun.s16 d22, q6, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vqrshrun.s16 d23, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ vqrshrun.s16 d24, q8, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1)
+ vqrshrun.s16 d25, q9, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row1)
+
+ vst1.8 {d20, d21, d22}, [r1], r3 @//Store dest row0
+ vst1.8 {d23, d24, d25}, [r1], r3 @//Store dest row1
+
+ subs r14, r14, #2 @ decrement counter
+
+ bne filter_horz_loop
+
+ vpop {d8-d15}
+ ldmfd sp!, {pc}
+
+
+
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* This function implements a two stage cascaded six tap filter. It
+@* applies the six tap filter in the vertical direction on the
+@* predictor values, followed by applying the same filter in the
+@* horizontal direction on the output of the first stage. The six tap
+@* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+@* interpolation process"
+@* (Filter run for width = 17 and height =17)
+@* @par Description:
+@* The function interpolates
+@* the predictors first in the vertical direction and then in the
+@* horizontal direction to output the (1/2,1/2). The output of the first
+@* stage of the filter is stored in the buffer pointed to by pi16_pred1(only in C)
+@* in 16 bit precision.
+@*
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst1
+@* UWORD8 pointer to the destination(vertical filtered output)
+@*
+@* @param[out] pu1_dst2
+@* UWORD8 pointer to the destination(out put after applying horizontal filter to the intermediate vertical output)
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride of pu1_dst
+@*
+@* @param[in]pi16_pred1
+@* Pointer to 16bit intermediate buffer(used only in c)
+@*
+@* @param[in] pi16_pred1_strd
+@* integer destination stride of pi16_pred1
+@*
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst1,
+@ UWORD8 *pu1_dst2,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 *pi16_pred1,/* Pointer to 16bit intermmediate buffer (used only in c)*/
+@ WORD32 pi16_pred1_strd)
+
+
+
+
+ .global ih264e_sixtap_filter_2dvh_vert_a9q
+
+ih264e_sixtap_filter_2dvh_vert_a9q:
+ stmfd sp!, {r10, r11, r12, lr}
+
+@//r0 - pu1_ref
+@//r3 - u4_ref_width
+ vpush {d8-d15}
+ @// Load six rows for vertical interpolation
+ lsl r12, r3, #1
+ sub r0, r0, r12
+ sub r0, r0, #2
+ vld1.8 {d2, d3, d4}, [r0], r3
+ vld1.8 {d5, d6, d7}, [r0], r3
+ vld1.8 {d8, d9, d10}, [r0], r3
+ mov r12, #5
+ vld1.8 {d11, d12, d13}, [r0], r3
+ mov r14, #20
+ vld1.8 {d14, d15, d16}, [r0], r3
+ vmov.16 d0[0], r12
+ vmov.16 d0[1], r14
+ vld1.8 {d17, d18, d19}, [r0], r3
+ vmov.i8 d1, #20
+
+@// r12 - u2_buff1_width
+@// r14 - u2_buff2_width
+ ldr r12, [sp, #80]
+ add r11, r1, #6
+
+ mov r14, r12
+
+ mov r10, #3 @loop counter
+
+
+filter_2dvh_loop:
+
+ @// ////////////// ROW 1 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d2, d17 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d8, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d11, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d5, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d14, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+
+ vaddl.u8 q11, d3, d18 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d9, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d12, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d6, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d15, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d4, d19 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d10, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d13, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d7, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d16, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vqrshrun.s16 d2, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d3, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d4, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d2, d2, d3, #2
+ vst1.8 {d3, d4}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d2}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q1, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q1, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q1, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q1, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q1, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q1, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d2, d3, d4}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 2 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d5, d2 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d11, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d14, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d8, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d17, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d6, d3 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d12, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d15, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d9, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d18, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d7, d4 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d13, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d16, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d10, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d19, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d5, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d6, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d7, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d5, d5, d6, #2
+ vst1.8 {d6, d7}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d5}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q3, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q3, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q3, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q3, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q3, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q3, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d5, d6, d7}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 3 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d8, d5 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d14, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d17, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d11, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d2, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d9, d6 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d15, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d18, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d12, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d3, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d10, d7 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d16, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d19, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d13, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d4, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d8, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d9, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d10, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d8, d8, d9, #2
+ vst1.8 {d9, d10}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d8}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q4, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q4, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q4, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q4, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q4, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q4, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d8, d9, d10}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 4 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d11, d8 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d17, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d2, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d14, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d5, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d12, d9 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d18, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d3, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d15, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d6, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d13, d10 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d19, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d4, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d16, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d7, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d11, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d12, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d13, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d11, d11, d12, #2
+ vst1.8 {d12, d13}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d11}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q6, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q6, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q6, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q6, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q6, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q6, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d11, d12, d13}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 5 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d14, d11 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d2, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d5, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d17, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d8, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d15, d12 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d3, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d6, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d18, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d9, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d16, d13 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d4, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d7, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d19, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d10, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d14, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d15, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d16, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d14, d14, d15, #2
+ vst1.8 {d15, d16}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d14}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q7, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q7, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q7, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q7, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q7, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q7, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d14, d15, d16}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 6 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+
+ cmp r10, #1 @// if it 17 rows are complete skip
+ beq filter_2dvh_skip_row
+ vaddl.u8 q10, d17, d14 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d5, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d8, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d2, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d11, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d18, d15 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d6, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d9, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d3, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d12, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d19, d16 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d7, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d10, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d4, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d13, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d17, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d18, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d19, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d17, d17, d18, #2
+ vst1.8 {d18, d19}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d17}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q9, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q9, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q9, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q9, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q9, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q9, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d17, d18, d19}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ subs r10, r10, #1 @//decrement loop counter
+
+ bne filter_2dvh_loop
+
+
+@// Process first vertical interpolated row
+@// each column is
+ @// ////////////// ROW 13 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vpop {d8-d15}
+ ldmfd sp!, {r10, r11, r12, pc}
+
+filter_2dvh_skip_row:
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+ vpop {d8-d15}
+ ldmfd sp!, {r10, r11, r12, pc}
+
+
+
+
diff --git a/encoder/arm/ih264e_platform_macros.h b/encoder/arm/ih264e_platform_macros.h
new file mode 100755
index 0000000..39cac96
--- /dev/null
+++ b/encoder/arm/ih264e_platform_macros.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_platform_macros.h
+*
+* @brief
+* Contains platform specific routines used for codec context intialization
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/arm/ime_distortion_metrics_a9q.s b/encoder/arm/ime_distortion_metrics_a9q.s
new file mode 100755
index 0000000..b58911e
--- /dev/null
+++ b/encoder/arm/ime_distortion_metrics_a9q.s
@@ -0,0 +1,1353 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+@******************************************************************************
+@*
+@*
+@* @brief
+@* This file contains definitions of routines that compute distortion
+@* between two macro/sub blocks of identical dimensions
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@* - ime_compute_sad_16x16_a9q()
+@* - ime_compute_sad_16x16_fast_a9q()
+@* - ime_compute_sad_16x8_a9q()
+@* - ime_compute_sad_16x16_ea8_a9q()
+@* - ime_calculate_sad2_prog_a9q()
+@* - ime_calculate_sad3_prog_a9q()
+@* - ime_calculate_sad4_prog_a9q()
+@* - ime_sub_pel_compute_sad_16x16_a9q()
+@* - ime_compute_satqd_16x16_lumainter_a9q()
+@* -
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+@*
+@* @par Description
+@* This functions computes SAD between 2 16x16 blocks. There is a provision
+@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] i4_max_sad
+@* integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@* integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+.text
+.p2align 2
+ .global ime_compute_sad_16x16_fast_a9q
+ime_compute_sad_16x16_fast_a9q:
+
+ stmfd sp!, {r12, lr}
+ lsl r2, r2, #1
+ lsl r3, r3, #1
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @ LDR r12,[r1]
+
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ mov r12, #6
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d6, d4
+ vabdl.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+loop_sad_16x16_fast:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r12, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+ bne loop_sad_16x16_fast
+
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+
+ ldr r12, [sp, #12]
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vshl.u32 d0, d0, #1
+ vst1.32 {d0[0]}, [r12]
+
+ ldmfd sp!, {r12, pc}
+
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x8 blocks
+@*
+@*
+@* @par Description
+@* This functions computes SAD between 2 16x8 blocks. There is a provision
+@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] u4_max_sad
+@* integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@* integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+@
+ .global ime_compute_sad_16x8_a9q
+ime_compute_sad_16x8_a9q:
+
+ stmfd sp!, {r12, lr}
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @LDR r12,[r1]
+
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ mov r12, #6
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d6, d4
+ vabdl.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+loop_sad_16x8:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r12, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+ bne loop_sad_16x8
+
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+
+ ldr r12, [sp, #12]
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vst1.32 {d0[0]}, [r12]
+
+ ldmfd sp!, {r12, pc}
+
+
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x16 blocks with early exit
+@*
+@* @par Description
+@* This functions computes SAD between 2 16x16 blocks. There is a provision
+@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] i4_max_sad
+@* integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@* integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+ .global ime_compute_sad_16x16_ea8_a9q
+
+ime_compute_sad_16x16_ea8_a9q:
+
+ stmfd sp!, {r5-r7, lr}
+ lsl r2, r2, #1
+ lsl r3, r3, #1
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @LDR r12,[r1]
+
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ mov r5, #6
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d6, d4
+ vabdl.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+ ldrd r6, r7, [sp, #16]
+ @r6 = i4_max_sad, r7 = pi4_mb_distortion
+
+loop_sad_16x16_ea8_1:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r5, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+ bne loop_sad_16x16_ea8_1
+
+ vabal.u8 q0, d10, d8
+ sub r0, r0, r2, lsl #3
+ vabal.u8 q1, d11, d9
+ sub r1, r1, r3, lsl #3
+
+ vadd.i16 q6, q0, q1
+ add r0, r0, r2, asr #1
+ vadd.i16 d12, d12, d13
+ add r1, r1, r3, asr #1
+
+ vpaddl.u16 d12, d12
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ vpaddl.u32 d12, d12
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+
+ vst1.32 {d12[0]}, [r7]
+ ldr r5, [r7]
+ cmp r5, r6
+ bgt end_func_16x16_ea8
+
+ vld1.8 {d10, d11}, [r1], r3
+ mov r5, #6
+
+loop_sad_16x16_ea8_2:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r5, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+ bne loop_sad_16x16_ea8_2
+
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+
+ vst1.32 {d0[0]}, [r7]
+
+end_func_16x16_ea8:
+
+ ldmfd sp!, {r5-r7, pc}
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name : Calculate_Mad2_prog()
+@//
+@// Detail Description : This function find the sad values of 4 Progressive MBs
+@// at one shot
+@//
+@// Platform : CortexA8/NEON .
+@//
+@//-----------------------------------------------------------------------------
+@*/
+
+ .global ime_calculate_sad2_prog_a9q
+
+ime_calculate_sad2_prog_a9q:
+
+ @ r0 = ref1 <UWORD8 *>
+ @ r1 = ref2 <UWORD8 *>
+ @ r2 = src <UWORD8 *>
+ @ r3 = RefBufferWidth <UWORD32>
+ @ stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+
+ stmfd sp!, {r4-r5, lr}
+
+ ldr r4, [sp, #8] @ load src stride to r4
+ mov r5, #14
+
+ @Row 1
+ vld1.8 {d0, d1}, [r2], r4 @ load src Row 1
+ vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1
+ vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1
+
+ @Row 2
+ vld1.8 {d6, d7}, [r2], r4 @ load src Row 2
+ vabdl.u8 q6, d2, d0
+ vabdl.u8 q7, d3, d1
+ vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2
+ vabdl.u8 q8, d4, d0
+ vabdl.u8 q9, d5, d1
+ vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2
+
+loop_sad2_prog:
+
+ subs r5, #2
+ @Row 1
+ vld1.8 {d0, d1}, [r2], r4 @ load src Row 1
+ vabal.u8 q6, d8, d6
+ vabal.u8 q7, d9, d7
+ vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1
+ vabal.u8 q8, d10, d6
+ vabal.u8 q9, d11, d7
+ vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1
+
+ @Row 2
+ vld1.8 {d6, d7}, [r2], r4 @ load src Row 2
+ vabal.u8 q6, d2, d0
+ vabal.u8 q7, d3, d1
+ vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2
+ vabal.u8 q8, d4, d0
+ vabal.u8 q9, d5, d1
+ vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2
+
+ bne loop_sad2_prog
+
+ vabal.u8 q6, d8, d6
+ vabal.u8 q7, d9, d7
+ vabal.u8 q8, d10, d6
+ vabal.u8 q9, d11, d7
+
+ @ Compute SAD
+
+ vadd.u16 q6, q6, q7 @ Q6 : sad_ref1
+ vadd.u16 q8, q8, q9 @ Q8 : sad_ref2
+
+ vadd.u16 d12, d12, d13
+ ldr r5, [sp, #16] @ loading pi4_sad to r5
+ vadd.u16 d16, d16, d17
+
+ vpadd.u16 d12, d12, d16
+ vpaddl.u16 d12, d12
+
+ vst1.64 {d12}, [r5]!
+
+ ldmfd sp!, {r4-r5, pc}
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name : Calculate_Mad3_prog()
+@//
+@// Detail Description : This function find the sad values of 4 Progressive MBs
+@// at one shot
+@//
+@// Platform : CortexA8/NEON .
+@//
+@//-----------------------------------------------------------------------------
+@*/
+
+ .global ime_calculate_sad3_prog_a9q
+
+ime_calculate_sad3_prog_a9q:
+
+ @ r0 = ref1 <UWORD8 *>
+ @ r1 = ref2 <UWORD8 *>
+ @ r2 = ref3 <UWORD8 *>
+ @ r3 = src <UWORD8 *>
+ @ stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *>
+
+
+ stmfd sp!, {r4-r6, lr}
+
+ ldrd r4, r5, [sp, #16] @ load ref stride to r4, src stride to r5
+ mov r6, #14
+
+ @ Row 1
+ vld1.8 {d0, d1}, [r3], r5 @ load src Row 1
+ vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1
+ vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1
+ vabdl.u8 q8, d2, d0
+ vabdl.u8 q9, d3, d1
+ vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1
+ vabdl.u8 q10, d4, d0
+ vabdl.u8 q11, d5, d1
+
+ @ Row 2
+ vld1.8 {d8, d9}, [r3], r5 @ load src Row 1
+ vabdl.u8 q12, d6, d0
+ vabdl.u8 q13, d7, d1
+ vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1
+ vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1
+ vabal.u8 q8, d10, d8
+ vabal.u8 q9, d11, d9
+ vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1
+ vabal.u8 q10, d12, d8
+ vabal.u8 q11, d13, d9
+
+loop_sad3_prog:
+
+ @Row 1
+ vld1.8 {d0, d1}, [r3], r5 @ load src Row 1
+ vabal.u8 q12, d14, d8
+ vabal.u8 q13, d15, d9
+ vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1
+ vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1
+ vabal.u8 q8, d2, d0
+ vabal.u8 q9, d3, d1
+ vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1
+ vabal.u8 q10, d4, d0
+ vabal.u8 q11, d5, d1
+
+ @Row 2
+ vld1.8 {d8, d9}, [r3], r5 @ load src Row 1
+ vabal.u8 q12, d6, d0
+ vabal.u8 q13, d7, d1
+ vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1
+ subs r6, #2
+ vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1
+ vabal.u8 q8, d10, d8
+ vabal.u8 q9, d11, d9
+ vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1
+ vabal.u8 q10, d12, d8
+ vabal.u8 q11, d13, d9
+
+ bne loop_sad3_prog
+
+ vabal.u8 q12, d14, d8
+ vabal.u8 q13, d15, d9
+
+ @ Compute SAD
+
+ vadd.u16 q8, q8, q9 @ Q8 : sad_ref1
+ vadd.u16 q10, q10, q11 @ Q10 : sad_ref2
+ vadd.u16 q12, q12, q13 @ Q12 : sad_ref3
+
+ vadd.u16 d16, d16, d17
+ vadd.u16 d20, d20, d21
+ vadd.u16 d24, d24, d25
+
+ vpadd.u16 d16, d16, d20
+ vpadd.u16 d24, d24, d24
+
+ ldr r6, [sp, #24] @ loading pi4_sad to r6
+ vpaddl.u16 d16, d16
+ vpaddl.u16 d24, d24
+
+ vst1.64 {d16}, [r6]!
+ vst1.32 {d24[0]}, [r6]
+
+ ldmfd sp!, {r4-r6, pc}
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) for sub-pel motion estimation
+@*
+@* @par Description
+@* This functions computes SAD for all the 8 half pel points
+@*
+@* @param[out] pi4_sad
+@* integer evaluated sad
+@* pi4_sad[0] - half x
+@* pi4_sad[1] - half x - 1
+@* pi4_sad[2] - half y
+@* pi4_sad[3] - half y - 1
+@* pi4_sad[4] - half xy
+@* pi4_sad[5] - half xy - 1
+@* pi4_sad[6] - half xy - strd
+@* pi4_sad[7] - half xy - 1 - strd
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+
+.text
+.p2align 2
+
+ .global ime_sub_pel_compute_sad_16x16_a9q
+
+ime_sub_pel_compute_sad_16x16_a9q:
+
+ stmfd sp!, {r4-r11, lr} @store register values to stack
+
+ ldr r9, [sp, #36]
+ ldr r10, [sp, #40]
+
+ sub r4, r1, #1 @ x left
+ sub r5, r2, r10 @ y top
+
+ sub r6, r3, #1 @ xy left
+ sub r7, r3, r10 @ xy top
+
+ sub r8, r7, #1 @ xy top-left
+ mov r11, #15
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @ LDR r12,[r1]
+ @ LDR r12,[sp,#12]
+
+ vld1.8 {d0, d1}, [r0], r9 @ src
+ vld1.8 {d2, d3}, [r5], r10 @ y top LOAD
+ vld1.8 {d4, d5}, [r7], r10 @ xy top LOAD
+ vld1.8 {d6, d7}, [r8], r10 @ xy top-left LOAD
+
+ vabdl.u8 q6, d2, d0 @ y top ABS1
+ vabdl.u8 q7, d4, d0 @ xy top ABS1
+ vld1.8 {d8, d9}, [r1], r10 @ x LOAD
+ vabdl.u8 q8, d6, d0 @ xy top-left ABS1
+ vabdl.u8 q9, d8, d0 @ x ABS1
+ vld1.8 {d10, d11}, [r4], r10 @ x left LOAD
+
+ vabal.u8 q6, d3, d1 @ y top ABS2
+ vabal.u8 q7, d5, d1 @ xy top ABS2
+ vld1.8 {d2, d3}, [r2], r10 @ y LOAD
+ vabal.u8 q8, d7, d1 @ xy top-left ABS2
+ vabal.u8 q9, d9, d1 @ x ABS2
+ vld1.8 {d4, d5}, [r3], r10 @ xy LOAD
+
+ vabdl.u8 q10, d10, d0 @ x left ABS1
+ vabdl.u8 q11, d2, d0 @ y ABS1
+ vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD
+ vabdl.u8 q12, d4, d0 @ xy ABS1
+ vabdl.u8 q13, d6, d0 @ xy left ABS1
+
+loop_sub_pel_16x16:
+
+ vabal.u8 q10, d11, d1 @ x left ABS2
+ vabal.u8 q11, d3, d1 @ y ABS2
+ subs r11, #1
+ vabal.u8 q12, d5, d1 @ xy ABS2
+ vabal.u8 q13, d7, d1 @ xy left ABS2
+
+ vld1.8 {d0, d1}, [r0], r9 @ src
+ vabal.u8 q6, d2, d0 @ y top ABS1
+ vabal.u8 q7, d4, d0 @ xy top ABS1
+ vld1.8 {d8, d9}, [r1], r10 @ x LOAD
+ vabal.u8 q8, d6, d0 @ xy top-left ABS1
+ vabal.u8 q9, d8, d0 @ x ABS1
+ vld1.8 {d10, d11}, [r4], r10 @ x left LOAD
+
+ vabal.u8 q6, d3, d1 @ y top ABS2
+ vabal.u8 q7, d5, d1 @ xy top ABS2
+ vld1.8 {d2, d3}, [r2], r10 @ y LOAD
+ vabal.u8 q8, d7, d1 @ xy top-left ABS2
+ vabal.u8 q9, d9, d1 @ x ABS2
+ vld1.8 {d4, d5}, [r3], r10 @ xy LOAD
+
+ vabal.u8 q10, d10, d0 @ x left ABS1
+ vabal.u8 q11, d2, d0 @ y ABS1
+ vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD
+ vabal.u8 q12, d4, d0 @ xy ABS1
+ vabal.u8 q13, d6, d0 @ xy left ABS1
+
+ bne loop_sub_pel_16x16
+
+ vabal.u8 q10, d11, d1 @ x left ABS2
+ vabal.u8 q11, d3, d1 @ y ABS2
+ vabal.u8 q12, d5, d1 @ xy ABS2
+ vabal.u8 q13, d7, d1 @ xy left ABS2
+
+ vadd.i16 d0, d18, d19 @ x
+ vadd.i16 d3, d12, d13 @ y top
+ vadd.i16 d6, d14, d15 @ xy top
+ vadd.i16 d5, d26, d27 @ xy left
+ vadd.i16 d1, d20, d21 @ x left
+ vadd.i16 d2, d22, d23 @ y
+ vadd.i16 d4, d24, d25 @ xy
+ vadd.i16 d7, d16, d17 @ xy top left
+
+ vpadd.i16 d0, d0, d1
+ vpadd.i16 d2, d2, d3
+ vpadd.i16 d4, d4, d5
+ vpadd.i16 d6, d6, d7
+
+ vpaddl.u16 d0, d0
+ vpaddl.u16 d2, d2
+ ldr r11, [sp, #44]
+ vpaddl.u16 d4, d4
+ vpaddl.u16 d6, d6
+
+ vst1.32 {d0}, [r11]!
+ vst1.32 {d2}, [r11]!
+ vst1.32 {d4}, [r11]!
+ vst1.32 {d6}, [r11]!
+
+ ldmfd sp!, {r4-r11, pc} @Restoring registers from stack
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x16 blocks
+@*
+@* @par Description
+@* This functions computes SAD between 2 16x16 blocks. There is a provision
+@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] i4_max_sad
+@* integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@* integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+
+.text
+.p2align 2
+
+ .global ime_compute_sad_16x16_a9q
+
+ime_compute_sad_16x16_a9q:
+
+
+ @STMFD sp!,{r12,lr}
+ stmfd sp!, {r12, r14} @store register values to stack
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @ LDR r12,[r1]
+ @ LDR r12,[sp,#12]
+
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+
+ mov r12, #14
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d4, d6
+ vld1.8 {d10, d11}, [r1], r3
+ vabdl.u8 q1, d5, d7
+
+loop_sad_16x16:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d8, d10
+ vld1.8 {d6, d7}, [r1], r3
+ vabal.u8 q1, d9, d11
+
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d4, d6
+ subs r12, #2
+ vld1.8 {d10, d11}, [r1], r3
+ vabal.u8 q1, d5, d7
+
+ bne loop_sad_16x16
+
+ vabal.u8 q0, d8, d10
+ vabal.u8 q1, d9, d11
+
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+ ldr r12, [sp, #12]
+
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vst1.32 {d0[0]}, [r12]
+
+ ldmfd sp!, {r12, pc} @Restoring registers from stack
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name : Calculate_Mad4_prog()
+@//
+@// Detail Description : This function find the sad values of 4 Progressive MBs
+@// at one shot
+@//
+@// Platform : CortexA8/NEON .
+@//
+@//-----------------------------------------------------------------------------
+@*/
+
+ .global ime_calculate_sad4_prog_a9q
+
+ime_calculate_sad4_prog_a9q:
+ @ r0 = temp_frame <UWORD8 *>
+ @ r1 = buffer_ptr <UWORD8 *>
+ @ r2 = RefBufferWidth <UWORD32>
+ @ r3 = CurBufferWidth <UWORD32>
+ @ stack = psad <UWORD32 *> {at 0x34}
+
+ stmfd sp!, {r4-r7, lr}
+
+ @UWORD8 *left_ptr = temp_frame - 1;
+ @UWORD8 *right_ptr = temp_frame + 1;
+ @UWORD8 *top_ptr = temp_frame - RefBufferWidth;
+ @UWORD8 *bot_ptr = temp_frame + RefBufferWidth;
+
+ mov r7, #14
+ sub r4, r0, #0x01 @r4 = left_ptr
+ add r5, r0, #0x1 @r5 = right_ptr
+ sub r6, r0, r2 @r6 = top_ptr
+ add r0, r0, r2 @r0 = bot_ptr
+ @r1 = buffer_ptr
+
+ @D0:D1 : buffer
+ @D2:D3 : top
+ @D4:D5 : left
+ @D6:D7 : right
+ @D8:D9 : bottom
+
+ @Row 1
+ vld1.8 {d0, d1}, [r1], r3 @ load src Row 1
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 1
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 1
+
+ vabdl.u8 q5, d2, d0
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 1
+ vabdl.u8 q6, d3, d1
+
+ vabdl.u8 q7, d0, d4
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1
+ vabdl.u8 q8, d1, d5
+
+ @Row 2
+ vabdl.u8 q9, d0, d6
+ vld1.8 {d26, d27}, [r1], r3 @ load src Row 2
+ vabdl.u8 q10, d1, d7
+
+ vabdl.u8 q11, d0, d8
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 2
+ vabdl.u8 q12, d1, d9
+
+loop_sad4_prog:
+
+ vabal.u8 q5, d26, d2
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 2
+ vabal.u8 q6, d27, d3
+
+ vabal.u8 q7, d26, d4
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 2
+ vabal.u8 q8, d27, d5
+
+ vabal.u8 q9, d26, d6
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2
+ vabal.u8 q10, d27, d7
+
+ @Row 1
+ vabal.u8 q11, d26, d8
+ vld1.8 {d0, d1}, [r1], r3 @ load src Row 1
+ vabal.u8 q12, d27, d9
+
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 1
+ subs r7, #2
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 1
+
+ vabal.u8 q5, d0, d2
+
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 1
+ vabal.u8 q6, d1, d3
+
+ vabal.u8 q7, d0, d4
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1
+ vabal.u8 q8, d1, d5
+
+ @Row 2
+ vabal.u8 q9, d0, d6
+ vld1.8 {d26, d27}, [r1], r3 @ load src Row 2
+ vabal.u8 q10, d1, d7
+
+ vabal.u8 q11, d0, d8
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 2
+ vabal.u8 q12, d1, d9
+
+ bne loop_sad4_prog
+
+ vabal.u8 q5, d26, d2
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 2
+ vabal.u8 q6, d27, d3
+
+ vabal.u8 q7, d26, d4
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 2
+ vabal.u8 q8, d27, d5
+
+ vabal.u8 q9, d26, d6
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2
+ vabal.u8 q10, d27, d7
+
+ vabal.u8 q11, d26, d8
+ vabal.u8 q12, d27, d9
+
+ @;Q5:Q6 : sad_top
+ @;Q7:Q8 : sad_left
+ @;Q9:Q10 : sad_right
+ @;Q11:Q12 : sad_bot
+
+ vadd.u16 q5, q5, q6
+ vadd.u16 q7, q7, q8
+ vadd.u16 q9, q9, q10
+ vadd.u16 q11, q11, q12
+
+ @; Free :-
+ @; Q6,Q8,Q10,Q12
+
+ @;Q5 -> D10:D11
+ @;Q7 -> D14:D15
+ @;Q9 -> D18:D19
+ @;Q11 -> D22:D23
+
+ vadd.u16 d10, d10, d11
+ vadd.u16 d14, d14, d15
+ vadd.u16 d18, d18, d19
+ vadd.u16 d22, d22, d23
+
+ @;D10 : sad_top
+ @;D14 : sad_left
+ @;D18 : sad_right
+ @;D22 : sad_bot
+
+
+ vpaddl.u16 d11, d10
+ vpaddl.u16 d15, d14
+ vpaddl.u16 d19, d18
+ vpaddl.u16 d23, d22
+
+ @;D11 : sad_top
+ @;D15 : sad_left
+ @;D19 : sad_right
+ @;D23 : sad_bot
+
+ vpaddl.u32 d10, d11
+ vpaddl.u32 d22, d23
+ vpaddl.u32 d14, d15
+ vpaddl.u32 d18, d19
+
+ @;D10 : sad_top
+ @;D14 : sad_left
+ @;D18 : sad_right
+ @;D22 : sad_bot
+
+ ldr r4, [sp, #20] @;Can be rearranged
+
+ vsli.64 d10, d22, #32
+ vsli.64 d14, d18, #32
+
+ vst1.64 {d14}, [r4]!
+ vst1.64 {d10}, [r4]!
+
+ ldmfd sp!, {r4-r7, pc}
+
+
+
+
+@*****************************************************************************
+@*
+@* Function Name : ime_compute_satqd_16x16_lumainter_a9
+@* Description : This fucntion computes SAD for a 16x16 block.
+@ : It also computes if any 4x4 block will have a nonzero coefficent after transform and quant
+@
+@ Arguments : R0 :pointer to src buffer
+@ R1 :pointer to est buffer
+@ R2 :source stride
+@ R3 :est stride
+@ STACk :Threshold,distotion,is_nonzero
+@*
+@* Values Returned : NONE
+@*
+@* Register Usage : R0-R11
+@* Stack Usage :
+@* Cycles : Around
+@* Interruptiaility : Interruptable
+@*
+@* Known Limitations
+@* \Assumptions :
+@*
+@* Revision History :
+@* DD MM YYYY Author(s) Changes
+@* 14 04 2014 Harinarayanan K K First version
+@*
+@*****************************************************************************
+ .global ime_compute_satqd_16x16_lumainter_a9q
+ime_compute_satqd_16x16_lumainter_a9q:
+ @R0 :pointer to src buffer
+ @R1 :pointer to est buffer
+ @R2 :Source stride
+ @R3 :Pred stride
+ @R4 :Threshold pointer
+ @R5 :Distortion,ie SAD
+ @R6 :is nonzero
+
+ push {r4-r12, lr} @push all the variables first
+ @ADD SP,SP,#40 ;decrement stack pointer,to accomodate two variables
+ ldr r4, [sp, #40] @load the threshold address
+
+ mov r8, #8 @Number of 4x8 blocks to be processed
+ mov r10, #0 @Sad
+ mov r7, #0 @Nonzero info
+ @----------------------------------------------------
+
+ vld1.u8 d30, [r0], r2 @I load 8 pix src row 1
+
+ vld1.u8 d31, [r1], r3 @I load 8 pix pred row 1
+
+ vld1.u8 d28, [r0], r2 @I load 8 pix src row 2
+
+ vld1.u8 d29, [r1], r3 @I load 8 pix pred row 2
+
+ vld1.u8 d26, [r0], r2 @I load 8 pix src row 3
+ vabdl.u8 q0, d30, d31 @I Abs diff r1 blk 12
+
+ vld1.u8 d27, [r1], r3 @I load 8 pix pred row 3
+
+ vld1.u8 d24, [r0], r2 @I load 8 pix src row 4
+
+ vld1.u8 d25, [r1], r3 @I load 8 pix pred row 4
+ vabdl.u8 q1, d28, d29 @I Abs diff r1 blk 12
+
+ vld1.u16 {q11}, [r4] @I load the threhold
+ vabdl.u8 q2, d26, d27 @I Abs diff r1 blk 12
+
+ vabdl.u8 q3, d24, d25 @I Abs diff r1 blk 12
+
+
+
+core_loop:
+ @S1 S2 S3 S4 A1 A2 A3 A4
+ @S5 S6 S7 S8 A5 A6 A7 A8
+ @S9 S10 S11 S12 A9 A10 A11 A12
+ @S13 S14 S15 S16 A13 A14 A15 A16
+ ands r11, r8, #1 @II See if we are at even or odd block
+ vadd.u16 q4 , q0, q3 @I Add r1 r4
+ lsl r11, r2, #2 @II Move back src 4 rows
+
+ subeq r0, r0, r11 @II Move back src 4 rows if we are at even block
+ vadd.u16 q5 , q1, q2 @I Add r2 r3
+ addeq r0, r0, #8 @II Move src 8 cols forward if we are at even block
+
+ lsl r11, r3, #2 @II Move back pred 4 rows
+ vtrn.16 d8 , d10 @I trnspse 1
+ subeq r1, r1, r11 @II Move back pred 4 rows if we are at even block
+
+ addeq r1, r1, #8 @II Move pred 8 cols forward if we are at even block
+ vtrn.16 d9 , d11 @I trnspse 2
+ subne r0, r0, #8 @II Src 8clos back for odd rows
+
+ subne r1, r1, #8 @II Pred 8 cols back for odd rows
+ vtrn.32 d10, d11 @I trnspse 4
+
+
+ vtrn.32 d8 , d9 @I trnspse 3
+ vswp d10, d11 @I rearrange so that the q4 and q5 add properly
+ @D8 S1 S4 A1 A4
+ @D9 S2 S3 A2 A3
+ @D11 S1 S4 A1 A4
+ @D10 S2 S3 A2 A3
+
+ vadd.s16 q6, q4, q5 @I Get s1 s4
+ vld1.u8 d30, [r0], r2 @II load first 8 pix src row 1
+
+ vtrn.s16 d12, d13 @I Get s2 s3
+ @D12 S1 S4 A1 A4
+ @D13 S2 S3 A2 A3
+
+ vshl.s16 q7, q6 , #1 @I si = si<<1
+ vld1.u8 d31, [r1], r3 @II load first 8 pix pred row 1
+
+ vpadd.s16 d16, d12, d13 @I (s1 + s4) (s2 + s3)
+ vld1.u8 d28, [r0], r2 @II load first 8 pix src row 2
+ @ D16 S14 A14 S23 A23
+ vrev32.16 d0, d16 @I
+ vuzp.s16 d16, d0 @I
+ @D16 S14 S23 A14 A23
+ vadd.s16 d17, d12, d13 @I (s1 + s2) (s3 + s4)
+ vld1.u8 d29, [r1], r3 @II load first 8 pix pred row 2
+ @D17 S12 S34 A12 A34
+
+ vrev32.16 q9, q7 @I Rearrange si's
+ @Q9 Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2
+
+ @D12 S1 S4 A1 A4
+ @D19 Z3 Z2 Y3 Y2
+ vsub.s16 d8, d12, d19 @I (s1 - (s3<<1)) (s4 - (s2<<1))
+ vld1.u8 d26, [r0], r2 @II load first 8 pix src row 3
+ @D13 S2 S3 A2 A3
+ @D18 Z4 Z1 Y4 Y1
+ vsub.s16 d9, d13, d18 @I (s2 - (s4<<1)) (s3 - (s1<<1))
+ vld1.u8 d27, [r1], r3 @II load first 8 pix pred row 3
+ @Q10 S8 S5 A8 A5 S7 S4 A7 A4
+
+ @D16 S14 S23 A14 A23
+ vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4
+ vld1.u8 d24, [r0], r2 @II load first 8 pix src row 4
+ @D22 SAD1 SAD2 junk junk
+
+
+ @Q8 S2 S1 A2 A1 S6 S3 A6 A3
+ @Q10 S8 S5 A8 A5 S7 S4 A7 A4
+ vtrn.32 q8, q4 @I Rearrange to make ls of each block togather
+ @Q8 S2 S1 S8 S5 S6 S3 S7 S4
+ @Q10 A2 A1 A8 A5 A6 A3 A7 A4
+
+
+ ldrh r11, [r4, #16] @I Load the threshold for DC val blk 1
+ vdup.s16 q6, d10[0] @I Get the sad blk 1
+ vabdl.u8 q0, d30, d31 @II Abs diff r1 blk 12
+
+ vshl.s16 q7, q6, #1 @I sad_2 = sad_1<<1
+ vmov.s16 r9, d10[0] @I Get the sad for block 1
+
+ vsub.s16 q9, q7, q8 @I Add to the lss
+ vmov.s16 r5, d10[1] @I Get the sad for block 2
+
+ vcle.s16 q7, q11, q9 @I Add to the lss
+ vld1.u8 d25, [r1], r3 @II load first 8 pix pred row 4
+
+ vdup.s16 q15, d10[1] @I Get the sad blk 1
+ vabdl.u8 q1, d28, d29 @II Abs diff r1 blk 12
+
+
+ vshl.s16 q14, q15, #1 @I sad_2 = sad_1<<1
+ vsub.s16 q3, q14, q4 @I Add to the lss
+ vcle.s16 q15, q11, q3 @I Add to the lss
+
+ ADD R10, R10, R9 @I Add to the global sad blk 1
+ vtrn.u8 q15, q7 @I get all comparison bits to one reg
+ vabdl.u8 q2, d26, d27 @II Abs diff r1 blk 12
+
+ ADD R10, R10, R5 @I Add to the global sad blk 2
+ vshr.u8 q14, q15, #7 @I Shift the bits so that no overflow occurs
+ cmp r11, r9
+
+ movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 1 ;I Compare with threshold blk 1
+ vadd.u8 d28, d28, d29 @I Add the bits
+ cmp r11, r5 @I Compare with threshold blk 2
+
+ movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 2
+ vpadd.u8 d28, d28, d29 @I Add the bits
+
+ vmov.u32 r11, d28[0] @I Since a set bit now represents a unstatisofrd contifon store it in r11
+ vabdl.u8 q3, d24, d25 @II Abs diff r1 blk 12
+
+ orr r7, r7, r11 @I get the guy to r11
+
+
+ sub r8, r8, #1 @I Decremrnt block count
+
+ cmp r7, #0 @I If we have atlest one non zero block
+ bne compute_sad_only @I if a non zero block is der,From now on compute sad only
+
+ cmp r8, #1 @I See if we are at the last block
+ bne core_loop @I If the blocks are zero, lets continue the satdq
+
+
+ @EPILOUGE for core loop
+ @S1 S2 S3 S4 A1 A2 A3 A4
+ @S5 S6 S7 S8 A5 A6 A7 A8
+ @S9 S10 S11 S12 A9 A10 A11 A12
+ @S13 S14 S15 S16 A13 A14 A15 A16
+ vadd.u16 q4 , q0, q3 @Add r1 r4
+ vadd.u16 q5 , q1, q2 @Add r2 r3
+ @D8 S1 S2 S2 S1
+ @D10 S4 S3 S3 S4
+ @D9 A1 A2 A2 A1
+ @D11 A4 A3 A3 A4
+ vtrn.16 d8 , d10 @I trnspse 1
+ vtrn.16 d9 , d11 @I trnspse 2
+ vtrn.32 d8 , d9 @I trnspse 3
+ vtrn.32 d10, d11 @I trnspse 4
+
+ vswp d10, d11 @I rearrange so that the q4 and q5 add properly
+ @D8 S1 S4 A1 A4
+ @D9 S2 S3 A2 A3
+ @D11 S1 S4 A1 A4
+ @D10 S2 S3 A2 A3
+ vadd.s16 q6, q4, q5 @Get s1 s4
+ vtrn.s16 d12, d13 @Get s2 s3
+ @D12 S1 S4 A1 A4
+ @D13 S2 S3 A2 A3
+
+ vshl.s16 q7, q6 , #1 @si = si<<1
+ vmov.s16 r9, d10[0] @Get the sad for block 1
+
+ vpadd.s16 d16, d12, d13 @(s1 + s4) (s2 + s3)
+ vmov.s16 r5, d10[1] @Get the sad for block 2
+ @D16 S14 A14 S23 A23
+ vrev32.16 d30, d16 @
+ vuzp.s16 d16, d30 @
+ @D16 S14 S23 A14 A23
+ vadd.s16 d17, d12, d13 @(s1 + s2) (s3 + s4)
+ @D17 S12 S34 A12 A34
+
+ vrev32.16 q9, q7 @Rearrange si's
+ @Q9 Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2
+
+ @D12 S1 S4 A1 A4
+ @D19 Z3 Z2 Y3 Y2
+ vsub.s16 d8, d12, d19 @(s1 - (s3<<1)) (s4 - (s2<<1))
+ @D13 S2 S3 A2 A3
+ @D18 Z4 Z1 Y4 Y1
+ vsub.s16 d9, d13, d18 @(s2 - (s4<<1)) (s3 - (s1<<1))
+ @Q10 S8 S5 A8 A5 S7 S4 A7 A4
+
+ @D16 S14 S23 A14 A23
+ vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4
+ @D22 SAD1 SAD2 junk junk
+ vmov.u16 r9, d10[0] @Get the sad for block 1
+ vmov.u16 r5, d10[1] @Get the sad for block 2
+
+ @Q8 S2 S1 A2 A1 S6 S3 A6 A3
+ @Q10 S8 S5 A8 A5 S7 S4 A7 A4
+ ldrh r11, [r4, #16] @Load the threshold for DC val blk 1
+ vtrn.32 q8, q4 @Rearrange to make ls of each block togather
+ ADD R10, R10, R9 @Add to the global sad blk 1
+
+ @Q8 S2 S1 S8 S5 S6 S3 S7 S4
+ @Q10 A2 A1 A8 A5 A6 A3 A7 A4
+
+ vld1.u16 {q11}, [r4] @load the threhold
+ ADD R10, R10, R5 @Add to the global sad blk 2
+
+ vdup.u16 q6, d10[0] @Get the sad blk 1
+
+ cmp r11, r9 @Compare with threshold blk 1
+ vshl.u16 q7, q6, #1 @sad_2 = sad_1<<1
+
+ vsub.s16 q9, q7, q8 @Add to the lss
+
+ vcle.s16 q15, q11, q9 @Add to the lss
+ movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 1
+
+ cmp r11, r5 @Compare with threshold blk 2
+ vdup.u16 q14, d10[1] @Get the sad blk 1
+
+ vshl.u16 q13, q14, #1 @sad_2 = sad_1<<1
+ vsub.s16 q12, q13, q4 @Add to the lss
+ vcle.s16 q14, q11, q12 @Add to the lss
+ movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 2
+
+ vtrn.u8 q14, q15 @get all comparison bits to one reg
+ vshr.u8 q14, q14, #7 @Shift the bits so that no overflow occurs
+ vadd.u8 d28, d28, d29 @Add the bits
+ vpadd.u8 d28, d28, d29 @Add the bits
+ vmov.u32 r11, d28[0] @Since a set bit now represents a unstatisofrd contifon store it in r11
+ orr r7, r7, r11 @get the guy to r11
+
+ b funcend_sad_16x16 @Since all blocks ar processed nw, got to end
+
+compute_sad_only: @This block computes SAD only, so will be lighter
+ @IT will start processign at n odd block
+ @It will compute sad for odd blok,
+ @and then for two blocks at a time
+ @The counter is r7, hence r7 blocks will be processed
+
+ and r11, r8, #1 @Get the last bit of counter
+ cmp r11, #0 @See if we are at even or odd block
+ @iif the blk is even we just have to set the pointer to the
+ @start of current row
+
+ lsleq r11, r2, #2 @I Move back src 4 rows
+ subeq r0, r0, r11 @I Move back src 4 rows if we are at even block
+
+ lsleq r11, r3, #2 @I Move back pred 4 rows
+ subeq r1, r1, r11 @I Move back pred 4 rows if we are at even block
+ @ADDEQ R8,R8,#2 ;Inc counter
+ beq skip_odd_blk @If the blk is odd we have to compute sad
+
+
+ vadd.u16 q4, q0, q1 @Add SAD of row1 and row2
+ vadd.u16 q5, q2, q3 @Add SAD of row3 and row4
+ vadd.u16 q6, q4, q5 @Add SAD of row 1-4
+ vadd.u16 d14, d12, d13 @Add Blk1 and blk2
+ vpadd.u16 d16, d14, d15 @Add col 1-2 and 3-4
+ vpadd.u16 d18, d16, d17 @Add col 12-34
+
+ vmov.u16 r9, d18[0] @Move sad to arm
+ ADD R10, R10, R9 @Add to the global sad
+
+ sub r8, r8, #1 @Dec counter
+ cmp r8, #0 @See if we processed last block
+ beq funcend_sad_16x16 @if lprocessed last block goto end of func
+
+ sub r0, r0, #8 @Since we processed od block move back src by 8 cols
+ sub r1, r1, #8 @Since we processed od block move back pred by 8 cols
+
+skip_odd_blk:
+
+ vmov.s16 q0, #0 @Initialize the accumulator
+ vmov.s16 q1, #0 @Initialize the accumulator
+
+ vld1.u8 {q15}, [r0], r2 @load src r1
+ vld1.u8 {q14}, [r1], r3 @load pred r1
+
+ vld1.u8 {q13}, [r0], r2 @load src r2
+ vld1.u8 {q12}, [r1], r3 @load pred r2
+
+ vld1.u8 {q11}, [r0], r2 @load src r3
+ vld1.u8 {q10}, [r1], r3 @load pred r2
+
+ vld1.u8 {q9}, [r0], r2 @load src r4
+ vld1.u8 {q8}, [r1], r3 @load pred r4
+
+ cmp r8, #2
+ beq sad_epilouge
+
+sad_loop:
+
+ vabal.u8 q0, d30, d28 @I accumulate Abs diff R1
+ vabal.u8 q1, d31, d29 @I accumulate Abs diff R1
+
+ vld1.u8 {q15}, [r0], r2 @II load r1 src
+ vabal.u8 q0, d26, d24 @I accumulate Abs diff R2
+
+ vld1.u8 {q14}, [r1], r3 @II load r1 pred
+ vabal.u8 q1, d27, d25 @I accumulate Abs diff R2
+
+ vld1.u8 {q13}, [r0], r2 @II load r3 src
+ vabal.u8 q0, d22, d20 @I accumulate Abs diff R3
+
+ vld1.u8 {q12}, [r1], r3 @II load r2 pred
+ vabal.u8 q1, d23, d21 @I accumulate Abs diff R3
+
+ vld1.u8 {q11}, [r0], r2 @II load r3 src
+ vabal.u8 q0, d18, d16 @I accumulate Abs diff R4
+
+
+ sub r8, r8, #2 @Since we processe 16 pix @a time, dec by 2
+ vld1.u8 {q10}, [r1], r3 @II load r3 pred
+ vabal.u8 q1, d19, d17 @I accumulate Abs diff R4
+
+ cmp r8, #2 @Check if last loop
+ vld1.u8 {q9}, [r0], r2 @II load r4 src
+ vld1.u8 {q8}, [r1], r3 @II load r4 pred
+
+ bne sad_loop @Go back to SAD computation
+
+sad_epilouge:
+ vabal.u8 q0, d30, d28 @Accumulate Abs diff R1
+ vabal.u8 q1, d31, d29 @Accumulate Abs diff R1
+
+ vabal.u8 q0, d26, d24 @Accumulate Abs diff R2
+ vabal.u8 q1, d27, d25 @Accumulate Abs diff R2
+
+ vabal.u8 q0, d22, d20 @Accumulate Abs diff R3
+ vabal.u8 q1, d23, d21 @Aaccumulate Abs diff R3
+
+ vabal.u8 q0, d18, d16 @Accumulate Abs diff R4
+ vabal.u8 q1, d19, d17 @Accumulate Abs diff R4
+
+ vadd.u16 q2, q0, q1 @ADD two accumulators
+ vadd.u16 d6, d4, d5 @Add two blk sad
+ vpadd.u16 d8, d6, d7 @Add col 1-2 and 3-4 sad
+ vpadd.u16 d10, d8, d9 @Add col 12-34 sad
+
+ vmov.u16 r9, d10[0] @move SAD to ARM
+ ADD R10, R10, R9 @Add to the global sad
+
+funcend_sad_16x16: @End of fucntion process
+ ldr r5, [sp, #44]
+ ldr r6, [sp, #48]
+
+ str r7, [r6] @Store the is zero reg
+ str r10, [r5] @Store sad
+
+ @SUB SP,SP,#40
+ pop {r4-r12, pc}
+
+
diff --git a/encoder/arm/ime_platform_macros.h b/encoder/arm/ime_platform_macros.h
new file mode 100755
index 0000000..0f5b2f2
--- /dev/null
+++ b/encoder/arm/ime_platform_macros.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ime_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+ sad += ABS(src[0]-est[0]) + \
+ ABS(src[1]-est[1]) + \
+ ABS(src[2]-est[2]) + \
+ ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
new file mode 100755
index 0000000..c442077
--- /dev/null
+++ b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
@@ -0,0 +1,592 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+
+///**
+//******************************************************************************
+//*
+//* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC )
+//* and do the prediction.
+//*
+//* @par Description
+//* This function evaluates first three 16x16 modes and compute corresponding sad
+//* and return the buffer predicted with best mode.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//** @param[in] pu1_ngbr_pels_i16
+//* UWORD8 pointer to neighbouring pels
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] u4_n_avblty
+//* availability of neighbouring pixels
+//*
+//* @param[in] u4_intra_mode
+//* Pointer to the variable in which best mode is returned
+//*
+//* @param[in] pu4_sadmin
+//* Pointer to the variable in which minimum sad is returned
+//*
+//* @param[in] u4_valid_intra_modes
+//* Says what all modes are valid
+//*
+//*
+//* @return none
+//*
+//******************************************************************************
+//*/
+//
+//void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
+// UWORD8 *pu1_ngbr_pels_i16,
+// UWORD8 *pu1_dst,
+// UWORD32 src_strd,
+// UWORD32 dst_strd,
+// WORD32 u4_n_avblty,
+// UWORD32 *u4_intra_mode,
+// WORD32 *pu4_sadmin,
+// UWORD32 u4_valid_intra_modes)
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+.globl ih264e_evaluate_intra16x16_modes_av8
+
+ih264e_evaluate_intra16x16_modes_av8:
+
+//x0 = pu1_src,
+//x1 = pu1_ngbr_pels_i16,
+//x2 = pu1_dst,
+//x3 = src_strd,
+//x4 = dst_strd,
+//x5 = u4_n_avblty,
+//x6 = u4_intra_mode,
+//x7 = pu4_sadmin
+
+
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ldr x16, [sp, #80]
+ mov x17, x4
+ mov x18, x5
+ mov x14, x6
+ mov x15, x7
+
+
+ sub v0.16b, v0.16b, v0.16b
+ sub v1.16b, v1.16b, v1.16b
+ mov w10, #0
+ mov w11 , #3
+
+ ands x6, x5, #0x01
+ beq top_available //LEFT NOT AVAILABLE
+ ld1 {v0.16b}, [x1]
+ add w10, w10, #8
+ add w11, w11, #1
+top_available:
+ ands x6, x5, #0x04
+ beq none_available
+ add x6, x1, #17
+ ld1 {v1.16b}, [x6]
+ add w10, w10, #8
+ add w11, w11, #1
+ b summation
+none_available:
+ cmp x5, #0
+ bne summation
+ mov w6, #128
+ dup v30.16b, w6
+ dup v31.16b, w6
+ b sad_comp
+summation:
+ uaddl v2.8h, v0.8b, v1.8b
+ uaddl2 v3.8h, v0.16b, v1.16b
+ dup v10.8h, w10
+ neg w11, w11
+ dup v20.8h, w11
+ add v0.8h, v2.8h, v3.8h
+ mov v1.d[0], v0.d[1]
+ add v0.4h, v0.4h, v1.4h
+ addp v0.4h, v0.4h , v0.4h
+ addp v0.4h, v0.4h , v0.4h
+ add v0.4h, v0.4h, v10.4h
+ uqshl v0.8h, v0.8h, v20.8h
+ sqxtun v0.8b, v0.8h
+
+ dup v30.16b, v0.b[0]
+ dup v31.16b, v0.b[0]
+
+
+sad_comp:
+ ld1 { v0.2s, v1.2s }, [x0], x3 // source x0w 0
+
+ ld1 { v2.2s, v3.2s}, [x0], x3 //row 1
+
+ ld1 { v4.2s, v5.2s}, [x0], x3 //row 2
+
+ ld1 { v6.2s, v7.2s}, [x0], x3 //row 3
+
+ //---------------------
+
+ //values for vertical prediction
+ add x6, x1, #17
+ ld1 {v10.8b}, [x6], #8
+ ld1 {v11.8b}, [x6], #8
+ ld1 {v9.16b}, [x1]
+
+
+
+ dup v20.8b, v9.b[15] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8b, v9.b[15] ///HORIZONTAL VALUE ROW=0//
+
+
+///* computing SADs for all three modes*/
+ ///vertical row 0@
+ uabdl v16.8h, v0.8b, v10.8b
+ uabdl v18.8h, v1.8b, v11.8b
+
+ ///HORZ row 0@
+ uabdl v26.8h, v0.8b, v20.8b
+ uabdl v28.8h, v1.8b, v21.8b
+
+ ///dc row 0@
+ uabdl v22.8h, v0.8b, v30.8b
+ uabdl v24.8h, v1.8b, v31.8b
+
+
+
+
+
+ dup v20.8b, v9.b[14] ///HORIZONTAL VALUE ROW=1//
+ dup v21.8b, v9.b[14]
+
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ld1 { v0.2s, v1.2s }, [x0], x3 //row 4
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v28.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8b, v9.b[13] ///HORIZONTAL VALUE ROW=2//
+ dup v21.8b, v9.b[13]
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ld1 { v2.2s, v3.2s}, [x0], x3 //row 5
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v28.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8b, v9.b[12] ///HORIZONTAL VALUE ROW=3//
+ dup v21.8b, v9.b[12]
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ld1 { v4.2s, v5.2s}, [x0], x3 //row 6
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v28.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+//----------------------------------------------------------------------------------------------
+
+ dup v20.8b, v9.b[11] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8b, v9.b[11]
+
+ ///vertical row 0@
+ uabal v16.8h, v0.8b, v10.8b
+ uabal v18.8h, v1.8b, v11.8b
+
+ ld1 { v6.2s, v7.2s}, [x0], x3 //row 7
+ ///HORZ row 0@
+ uabal v26.8h, v0.8b, v20.8b
+ uabal v28.8h, v1.8b, v21.8b
+
+ ///dc row 0@
+ uabal v22.8h, v0.8b, v30.8b
+ uabal v24.8h, v1.8b, v31.8b
+
+ dup v20.8b, v9.b[10] ///HORIZONTAL VALUE ROW=1//
+ dup v21.8b, v9.b[10]
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ld1 { v0.2s, v1.2s }, [x0], x3 //row 8
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v28.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8b, v9.b[9] ///HORIZONTAL VALUE ROW=2//
+ dup v21.8b, v9.b[9]
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ld1 { v2.2s, v3.2s}, [x0], x3 //row 9
+
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v28.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8b, v9.b[8] ///HORIZONTAL VALUE ROW=3//
+ dup v21.8b, v9.b[8]
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ld1 { v4.2s, v5.2s}, [x0], x3 //row 10
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v28.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+
+
+//-------------------------------------------
+
+ dup v20.8b, v9.b[7] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8b, v9.b[7]
+
+ ///vertical row 0@
+ uabal v16.8h, v0.8b, v10.8b
+ uabal v18.8h, v1.8b, v11.8b
+
+ ld1 { v6.2s, v7.2s}, [x0], x3 //row11
+
+ ///HORZ row 0@
+ uabal v26.8h, v0.8b, v20.8b
+ uabal v28.8h, v1.8b, v21.8b
+
+ ///dc row 0@
+ uabal v22.8h, v0.8b, v30.8b
+ uabal v24.8h, v1.8b, v31.8b
+
+ dup v20.8b, v9.b[6] ///HORIZONTAL VALUE ROW=1//
+ dup v21.8b, v9.b[6]
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ld1 { v0.2s, v1.2s }, [x0], x3 //row12
+
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v28.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8b, v9.b[5] ///HORIZONTAL VALUE ROW=2//
+ dup v21.8b, v9.b[5]
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ld1 { v2.2s, v3.2s}, [x0], x3 //row13
+
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v28.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8b, v9.b[4] ///HORIZONTAL VALUE ROW=3//
+ dup v21.8b, v9.b[4]
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ld1 { v4.2s, v5.2s}, [x0], x3 //row14
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v28.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+ //-----------------------------------------------------------------
+
+ dup v20.8b, v9.b[3] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8b, v9.b[3]
+
+ ///vertical row 0@
+ uabal v16.8h, v0.8b, v10.8b
+ uabal v18.8h, v1.8b, v11.8b
+
+ ld1 { v6.2s, v7.2s}, [x0], x3 //row15
+
+ ///HORZ row 0@
+ uabal v26.8h, v0.8b, v20.8b
+ uabal v28.8h, v1.8b, v21.8b
+
+ ///dc row 0@
+ uabal v22.8h, v0.8b, v30.8b
+ uabal v24.8h, v1.8b, v31.8b
+
+ dup v20.8b, v9.b[2] ///HORIZONTAL VALUE ROW=1//
+ dup v21.8b, v9.b[2]
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v28.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8b, v9.b[1] ///HORIZONTAL VALUE ROW=2//
+ dup v21.8b, v9.b[1]
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v28.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8b, v9.b[0] ///HORIZONTAL VALUE ROW=3//
+ dup v21.8b, v9.b[0]
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v28.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+ //------------------------------------------------------------------------------
+
+
+ //vert sum
+
+ add v16.8h, v16.8h , v18.8h
+ mov v18.d[0], v16.d[1]
+ add v16.4h, v16.4h , v18.4h
+ uaddlp v16.2s, v16.4h
+ addp v16.2s, v16.2s, v16.2s
+ smov x8, v16.s[0] //dc
+
+
+ //horz sum
+
+ add v26.8h, v26.8h , v28.8h
+ mov v28.d[0], v26.d[1]
+ add v26.4h, v26.4h , v28.4h
+ uaddlp v26.2s, v26.4h
+ addp v26.2s, v26.2s, v26.2s
+ smov x9, v26.s[0]
+
+ //dc sum
+
+ add v24.8h, v22.8h , v24.8h ///DC
+ mov v25.d[0], v24.d[1]
+ add v24.4h, v24.4h , v25.4h ///DC
+ uaddlp v24.2s, v24.4h ///DC
+ addp v24.2s, v24.2s, v24.2s ///DC
+ smov x10, v24.s[0] //dc
+
+
+ //-----------------------
+ mov x11, #1
+ lsl x11, x11, #30
+
+ mov x0, x16
+ //--------------------------------------------
+ ands x7, x0, #01 // vert mode valid????????????
+ csel x8, x11, x8, eq
+
+
+ ands x6, x0, #02 // horz mode valid????????????
+ csel x9, x11, x9, eq
+
+ ands x6, x0, #04 // dc mode valid????????????
+ csel x10, x11, x10, eq
+
+
+
+
+//--------------------------------
+
+ mov x4, x17
+ mov x7, x15
+ mov x6, x14
+
+ //---------------------------
+
+ //--------------------------
+
+ cmp x8, x9
+ bgt not_vert
+ cmp x8, x10
+ bgt do_dc
+
+ ///----------------------
+ //DO VERTICAL PREDICTION
+ str x8 , [x7] //MIN SAD
+ mov x8, #0
+ str x8 , [x6] // MODE
+ add x6, x1, #17
+ ld1 {v30.16b}, [x6]
+ b do_dc_vert
+ //-----------------------------
+not_vert: cmp x9, x10
+ bgt do_dc
+
+ ///----------------------
+ //DO HORIZONTAL
+ str x9 , [x7] //MIN SAD
+ mov x9, #1
+ str x9 , [x6] // MODE
+
+ ld1 {v0.16b}, [x1]
+ dup v10.16b, v0.b[15]
+ dup v11.16b, v0.b[14]
+ dup v12.16b, v0.b[13]
+ dup v13.16b, v0.b[12]
+ st1 {v10.16b}, [x2], x4
+ dup v14.16b, v0.b[11]
+ st1 {v11.16b}, [x2], x4
+ dup v15.16b, v0.b[10]
+ st1 {v12.16b}, [x2], x4
+ dup v16.16b, v0.b[9]
+ st1 {v13.16b}, [x2], x4
+ dup v17.16b, v0.b[8]
+ st1 {v14.16b}, [x2], x4
+ dup v18.16b, v0.b[7]
+ st1 {v15.16b}, [x2], x4
+ dup v19.16b, v0.b[6]
+ st1 {v16.16b}, [x2], x4
+ dup v20.16b, v0.b[5]
+ st1 {v17.16b}, [x2], x4
+ dup v21.16b, v0.b[4]
+ st1 {v18.16b}, [x2], x4
+ dup v22.16b, v0.b[3]
+ st1 {v19.16b}, [x2], x4
+ dup v23.16b, v0.b[2]
+ st1 {v20.16b}, [x2], x4
+ dup v24.16b, v0.b[1]
+ st1 {v21.16b}, [x2], x4
+ dup v25.16b, v0.b[0]
+ st1 {v22.16b}, [x2], x4
+ st1 {v23.16b}, [x2], x4
+ st1 {v24.16b}, [x2], x4
+ st1 {v25.16b}, [x2], x4
+
+
+
+ b end_func
+
+
+ ///-----------------------------
+
+do_dc: ///---------------------------------
+ //DO DC
+ str x10 , [x7] //MIN SAD
+ mov x10, #2
+ str x10 , [x6] // MODE
+do_dc_vert:
+ st1 {v30.4s}, [x2], x4 //0
+ st1 {v30.4s}, [x2], x4 //1
+ st1 {v30.4s}, [x2], x4 //2
+ st1 {v30.4s}, [x2], x4 //3
+ st1 {v30.4s}, [x2], x4 //4
+ st1 {v30.4s}, [x2], x4 //5
+ st1 {v30.4s}, [x2], x4 //6
+ st1 {v30.4s}, [x2], x4 //7
+ st1 {v30.4s}, [x2], x4 //8
+ st1 {v30.4s}, [x2], x4 //9
+ st1 {v30.4s}, [x2], x4 //10
+ st1 {v30.4s}, [x2], x4 //11
+ st1 {v30.4s}, [x2], x4 //12
+ st1 {v30.4s}, [x2], x4 //13
+ st1 {v30.4s}, [x2], x4 //14
+ st1 {v30.4s}, [x2], x4 //15
+ ///------------------
+end_func:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
diff --git a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
new file mode 100755
index 0000000..b02afd1
--- /dev/null
+++ b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
@@ -0,0 +1,467 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+
+///**
+//******************************************************************************
+//*
+//* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC )
+//* and do the prediction.
+//*
+//* @par Description
+//* This function evaluates first three intra chroma modes and compute corresponding sad
+//* and return the buffer predicted with best mode.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//** @param[in] pu1_ngbr_pels
+//* UWORD8 pointer to neighbouring pels
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] u4_n_avblty
+//* availability of neighbouring pixels
+//*
+//* @param[in] u4_intra_mode
+//* Pointer to the variable in which best mode is returned
+//*
+//* @param[in] pu4_sadmin
+//* Pointer to the variable in which minimum sad is returned
+//*
+//* @param[in] u4_valid_intra_modes
+//* Says what all modes are valid
+//*
+//*
+//* @return none
+//*
+//******************************************************************************
+//*/
+//
+//void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
+// UWORD8 *pu1_ngbr_pels_i16,
+// UWORD8 *pu1_dst,
+// UWORD32 src_strd,
+// UWORD32 dst_strd,
+// WORD32 u4_n_avblty,
+// UWORD32 *u4_intra_mode,
+// WORD32 *pu4_sadmin,
+// UWORD32 u4_valid_intra_modes)
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+.global ih264e_evaluate_intra_chroma_modes_av8
+
+ih264e_evaluate_intra_chroma_modes_av8:
+
+//x0 = pu1_src,
+//x1 = pu1_ngbr_pels_i16,
+//x2 = pu1_dst,
+//x3 = src_strd,
+//x4 = dst_strd,
+//x5 = u4_n_avblty,
+//x6 = u4_intra_mode,
+//x7 = pu4_sadmin
+
+
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ //-----------------------
+ ldr x16, [sp, #80]
+ mov x17, x4
+ mov x18, x5
+ mov x14, x6
+ mov x15, x7
+
+ mov x19, #5
+ ands x6, x5, x19
+ beq none_available
+ cmp x6, #1
+ beq left_only_available
+ cmp x6, #4
+ beq top_only_available
+
+all_available:
+ ld1 {v0.8b, v1.8b}, [x1]
+ add x6, x1, #18
+ ld1 {v2.8b, v3.8b}, [x6]
+ uxtl v0.8h, v0.8b
+ uxtl v1.8h, v1.8b
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ uxtl v2.8h, v2.8b
+ uxtl v3.8h, v3.8b
+ addp v2.4s, v2.4s , v2.4s
+ addp v3.4s, v3.4s , v3.4s
+ addp v2.4s, v2.4s , v2.4s
+ addp v3.4s, v3.4s , v3.4s
+ rshrn v5.8b, v0.8h, #2
+ dup v21.8h, v5.h[0]
+ rshrn v6.8b, v3.8h, #2
+ dup v20.8h, v6.h[0]
+ add v1.8h, v1.8h, v2.8h
+ rshrn v1.8b, v1.8h, #3
+ dup v23.8h, v1.h[0]
+ mov v20.d[0], v23.d[0]
+ add v0.8h, v0.8h, v3.8h
+ rshrn v0.8b, v0.8h, #3
+ dup v23.8h, v0.h[0]
+ mov v31.d[0], v23.d[0]
+ mov v28.d[0], v20.d[0]
+ mov v29.d[0], v20.d[1]
+ mov v30.d[0], v21.d[0]
+ b sad_comp
+
+left_only_available:
+ ld1 {v0.8b, v1.8b}, [x1]
+ uxtl v0.8h, v0.8b
+ uxtl v1.8h, v1.8b
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ rshrn v0.8b, v0.8h, #2
+ rshrn v1.8b, v1.8h, #2
+
+ dup v28.8h , v1.h[0]
+ dup v29.8h , v1.h[0]
+ dup v30.8h, v0.h[0]
+ dup v31.8h, v0.h[0]
+ b sad_comp
+
+top_only_available:
+ add x6, x1, #18
+ ld1 {v0.8b, v1.8b}, [x6]
+ uxtl v0.8h, v0.8b
+ uxtl v1.8h, v1.8b
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ rshrn v0.8b, v0.8h, #2
+ rshrn v1.8b, v1.8h, #2
+ dup v28.8h , v0.h[0]
+ dup v30.8h, v1.h[0]
+ mov v29.d[0], v30.d[1]
+ mov v30.d[0], v28.d[0]
+ mov v31.d[0], v30.d[1]
+ b sad_comp
+none_available:
+ mov w20, #128
+ dup v28.16b, w20
+ dup v29.16b, w20
+ dup v30.16b, w20
+ dup v31.16b, w20
+
+
+
+sad_comp:
+ add x6, x1, #18
+ ld1 {v10.8b, v11.8b}, [x6] // vertical values
+
+ ld1 {v27.8h}, [x1]
+
+ dup v20.8h, v27.h[7] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8h, v27.h[7]
+
+ ld1 { v0.8b, v1.8b}, [x0], x3
+
+
+ ///vertical row 0@
+ uabdl v16.8h, v0.8b, v10.8b
+ uabdl v18.8h, v1.8b, v11.8b
+
+ ///HORZ row 0@
+ uabdl v26.8h, v0.8b, v20.8b
+ uabdl v14.8h, v1.8b, v21.8b
+
+ ld1 {v2.8b, v3.8b}, [x0], x3
+
+
+
+ ///dc row 0@
+ uabdl v22.8h, v0.8b, v28.8b
+ uabdl v24.8h, v1.8b, v29.8b
+
+
+ dup v20.8h, v27.h[6]
+ dup v21.8h, v27.h[6] ///HORIZONTAL VALUE ROW=1//
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ld1 { v4.8b, v5.8b}, [x0], x3
+
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v14.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v28.8b
+ uabal v24.8h, v3.8b, v29.8b
+
+ dup v20.8h, v27.h[5]
+ dup v21.8h, v27.h[5] ///HORIZONTAL VALUE ROW=2//
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ld1 { v6.8b, v7.8b}, [x0], x3
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v14.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v28.8b
+ uabal v24.8h, v5.8b, v29.8b
+
+ dup v20.8h, v27.h[4]
+ dup v21.8h, v27.h[4] ///HORIZONTAL VALUE ROW=3//
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v14.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v28.8b
+ uabal v24.8h, v7.8b, v29.8b
+
+ //----------------------------------------------------------------------------------------------
+ ld1 { v0.8b, v1.8b}, [x0], x3
+
+
+ dup v20.8h, v27.h[3]
+ dup v21.8h, v27.h[3] ///HORIZONTAL VALUE ROW=0//
+
+ ///vertical row 0@
+ uabal v16.8h, v0.8b, v10.8b
+ uabal v18.8h, v1.8b, v11.8b
+
+ ///HORZ row 0@
+ uabal v26.8h, v0.8b, v20.8b
+ uabal v14.8h, v1.8b, v21.8b
+
+ ld1 { v2.8b, v3.8b}, [x0], x3
+
+ ///dc row 0@
+ uabal v22.8h, v0.8b, v30.8b
+ uabal v24.8h, v1.8b, v31.8b
+
+ dup v20.8h, v27.h[2]
+ dup v21.8h, v27.h[2] ///HORIZONTAL VALUE ROW=1//
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v14.8h, v3.8b, v21.8b
+
+ ld1 { v4.8b, v5.8b}, [x0], x3
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8h, v27.h[1]
+ dup v21.8h, v27.h[1] ///HORIZONTAL VALUE ROW=2//
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v14.8h, v5.8b, v21.8b
+
+ ld1 {v6.8b, v7.8b}, [x0], x3
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8h, v27.h[0]
+ dup v21.8h, v27.h[0] ///HORIZONTAL VALUE ROW=3//
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v14.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+
+
+//-------------------------------------------
+
+
+//vert sum
+
+ add v16.8h, v16.8h , v18.8h
+ mov v18.d[0], v16.d[1]
+ add v16.4h, v16.4h , v18.4h
+ uaddlp v16.2s, v16.4h
+ addp v16.2s, v16.2s, v16.2s
+ smov x8, v16.s[0]
+
+
+ //horz sum
+
+ add v26.8h, v26.8h , v14.8h
+ mov v14.d[0], v26.d[1]
+ add v26.4h, v26.4h , v14.4h
+ uaddlp v26.2s, v26.4h
+ addp v26.2s, v26.2s, v26.2s
+ smov x9, v26.s[0]
+
+ //dc sum
+
+ add v24.8h, v22.8h , v24.8h ///DC
+ mov v25.d[0], v24.d[1]
+ add v24.4h, v24.4h , v25.4h ///DC
+ uaddlp v24.2s, v24.4h ///DC
+ addp v24.2s, v24.2s, v24.2s ///DC
+ smov x10, v24.s[0] //dc
+
+
+
+
+ mov x11, #1
+//-----------------------
+ mov x0, x16 // u4_valid_intra_modes
+
+//--------------------------------------------
+
+
+ lsl x11, x11, #30
+
+ ands x7, x0, #04 // vert mode valid????????????
+ csel x8, x11, x8, eq
+
+ ands x6, x0, #02 // horz mode valid????????????
+ csel x9, x11, x9, eq
+
+ ands x6, x0, #01 // dc mode valid????????????
+ csel x10, x11, x10, eq
+
+
+ //---------------------------
+
+ mov x4, x17
+ mov x6, x14
+ mov x7, x15
+
+ //--------------------------
+
+ cmp x10, x9
+ bgt not_dc
+ cmp x10, x8
+ bgt do_vert
+
+ ///----------------------
+ //DO DC PREDICTION
+ str x10 , [x7] //MIN SAD
+
+ mov x10, #0
+ str x10 , [x6] // MODE
+
+ b do_dc_vert
+ //-----------------------------
+
+not_dc:
+ cmp x9, x8
+ bgt do_vert
+ ///----------------------
+ //DO HORIZONTAL
+ str x9 , [x7] //MIN SAD
+
+ mov x10, #1
+ str x10 , [x6] // MODE
+ ld1 {v0.8h}, [x1]
+
+ dup v10.8h, v0.h[7]
+ dup v11.8h, v0.h[6]
+ dup v12.8h, v0.h[5]
+ dup v13.8h, v0.h[4]
+ st1 {v10.8h}, [x2], x4
+ dup v14.8h, v0.h[3]
+ st1 {v11.8h}, [x2], x4
+ dup v15.8h, v0.h[2]
+ st1 {v12.8h}, [x2], x4
+ dup v16.8h, v0.h[1]
+ st1 {v13.8h}, [x2], x4
+ dup v17.8h, v0.h[0]
+ st1 {v14.8h}, [x2], x4
+ st1 {v15.8h}, [x2], x4
+ st1 {v16.8h}, [x2], x4
+ st1 {v17.8h}, [x2], x4
+
+ b end_func
+
+do_vert:
+ //DO VERTICAL PREDICTION
+ str x8 , [x7] //MIN SAD
+ mov x8, #2
+ str x8 , [x6] // MODE
+ add x6, x1, #18
+ ld1 {v28.8b, v29.8b}, [x6] // vertical values
+ ld1 {v30.8b, v31.8b}, [x6] // vertical values
+
+do_dc_vert:
+ st1 {v28.2s, v29.2s} , [x2], x4 //0
+ st1 {v28.2s, v29.2s} , [x2], x4 //1
+ st1 {v28.2s, v29.2s} , [x2], x4 //2
+ st1 {v28.2s, v29.2s} , [x2], x4 //3
+ st1 {v30.2s, v31.2s} , [x2], x4 //4
+ st1 {v30.2s, v31.2s} , [x2], x4 //5
+ st1 {v30.2s, v31.2s} , [x2], x4 //6
+ st1 {v30.2s, v31.2s} , [x2], x4 //7
+
+end_func:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
diff --git a/encoder/armv8/ih264e_half_pel_av8.s b/encoder/armv8/ih264e_half_pel_av8.s
new file mode 100755
index 0000000..6dbd8f8
--- /dev/null
+++ b/encoder/armv8/ih264e_half_pel_av8.s
@@ -0,0 +1,1024 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+// *******************************************************************************
+// * @file
+// * ih264e_half_pel.s
+// *
+// * @brief
+// *
+// *
+// * @author
+// * Ittiam
+// *
+// * @par List of Functions:
+// * ih264e_sixtapfilter_horz
+// * ih264e_sixtap_filter_2dvh_vert
+//
+// *
+// * @remarks
+// * None
+// *
+// *******************************************************************************
+// */
+
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+// /**
+///*******************************************************************************
+//*
+//* @brief
+//* Interprediction luma filter for horizontal input(Filter run for width = 17 and height =16)
+//*
+//* @par Description:
+//* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+//* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd);
+
+
+.equ halfpel_width , 17 + 1 //( make it even, two rows are processed at a time)
+
+
+ .global ih264e_sixtapfilter_horz_av8
+ih264e_sixtapfilter_horz_av8:
+ // STMFD sp!,{x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ movi v0.8b, #5
+ sub x0, x0, #2
+ sub x3, x3, #16
+ movi v1.8b, #20
+ mov x14, #16
+
+filter_horz_loop:
+
+
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row0
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1
+
+ //// Processing row0 and row1
+
+ ext v31.8b, v2.8b , v3.8b , #5
+ ext v30.8b, v3.8b , v4.8b , #5
+
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #5
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #5
+ uaddl v12.8h, v29.8b, v4.8b //// a0 + a5 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #5
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #5
+
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row1)
+ ext v31.8b, v2.8b , v3.8b , #2
+ uaddl v18.8h, v26.8b, v7.8b //// a0 + a5 (column3,row1)
+ ext v30.8b, v3.8b , v4.8b , #2
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #2
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #2
+ umlal v12.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #2
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #2
+
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row1)
+ ext v31.8b, v2.8b , v3.8b , #3
+ umlal v18.8h, v26.8b, v1.8b //// a0 + a5 + 20a2 (column3,row1)
+ ext v30.8b, v3.8b , v4.8b , #3
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #3
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #3
+ umlal v12.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #3
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #3
+
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row1)
+ ext v31.8b, v2.8b , v3.8b , #1
+ umlal v18.8h, v26.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row1)
+ ext v30.8b, v3.8b , v4.8b , #1
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #1
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #1
+ umlsl v12.8h, v29.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #1
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #1
+
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1)
+ ext v31.8b, v2.8b , v3.8b , #4
+ umlsl v18.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row1)
+ ext v30.8b, v3.8b , v4.8b , #4
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #4
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #4
+ umlsl v12.8h, v29.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #4
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #4
+
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1)
+ umlsl v18.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row1)
+
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ sqrshrun v22.8b, v12.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1)
+ sqrshrun v25.8b, v18.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row1)
+
+ st1 {v20.8b, v21.8b}, [x1], #16 ////Store dest row0
+ st1 {v22.h}[0], [x1], x3
+ st1 {v23.8b, v24.8b}, [x1], #16 ////Store dest row1
+ st1 {v25.h}[0], [x1], x3
+
+ subs x14, x14, #2 // decrement counter
+
+ bne filter_horz_loop
+
+
+ // LDMFD sp!,{pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* This function implements a two stage cascaded six tap filter. It
+//* applies the six tap filter in the vertical direction on the
+//* predictor values, followed by applying the same filter in the
+//* horizontal direction on the output of the first stage. The six tap
+//* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+//* interpolation process"
+//* (Filter run for width = 17 and height =17)
+//* @par Description:
+//* The function interpolates
+//* the predictors first in the vertical direction and then in the
+//* horizontal direction to output the (1/2,1/2). The output of the first
+//* stage of the filter is stored in the buffer pointed to by pi16_pred1(only in C)
+//* in 16 bit precision.
+//*
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst1
+//* UWORD8 pointer to the destination(vertical filtered output)
+//*
+//* @param[out] pu1_dst2
+//* UWORD8 pointer to the destination(out put after applying horizontal filter to the intermediate vertical output)
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride of pu1_dst
+//*
+//* @param[in]pi16_pred1
+//* Pointer to 16bit intermediate buffer(used only in c)
+//*
+//* @param[in] pi16_pred1_strd
+//* integer destination stride of pi16_pred1
+//*
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst1,
+// UWORD8 *pu1_dst2,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 *pi16_pred1,/* Pointer to 16bit intermmediate buffer (used only in c)*/
+// WORD32 pi16_pred1_strd)
+
+
+
+
+ .global ih264e_sixtap_filter_2dvh_vert_av8
+
+ih264e_sixtap_filter_2dvh_vert_av8:
+ // STMFD sp!,{x10,x11,x12,x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+////x0 - pu1_ref
+////x3 - u4_ref_width
+
+ //// Load six rows for vertical interpolation
+ lsl x12, x3, #1
+ sub x0, x0, x12
+ sub x0, x0, #2
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x3
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x3
+ ld1 {v8.8b, v9.8b, v10.8b}, [x0], x3
+ mov x12, #5
+ ld1 {v11.8b, v12.8b, v13.8b}, [x0], x3
+ mov x14, #20
+ ld1 {v14.8b, v15.8b, v16.8b}, [x0], x3
+ mov v0.4h[0], w12
+ mov v0.4h[1], w14
+ ld1 {v17.8b, v18.8b, v19.8b}, [x0], x3
+ movi v1.8b, #20
+
+//// x12 - u2_buff1_width
+//// x14 - u2_buff2_width
+ mov x12, x4
+ add x11, x1, #16
+
+ mov x14, x12
+
+ mov x10, #3 //loop counter
+ sub x16 , x12, #8
+ sub x19, x14, #16
+filter_2dvh_loop:
+
+ //// ////////////// ROW 1 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v2.8b, v17.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v8.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v11.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v5.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v14.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+
+ uaddl v22.8h, v3.8b, v18.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v9.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v12.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v6.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v15.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ ext v30.8b, v20.8b , v21.8b , #4
+ mov v23.d[0], v22.d[1]
+
+
+ uaddl v24.8h, v4.8b, v19.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v10.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v13.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v7.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v16.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ sqrshrun v2.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v3.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v4.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+ mov v21.d[0], v20.d[1]
+ ext v2.8b, v2.8b , v3.8b , #2
+ ext v3.8b, v3.8b , v4.8b , #2
+ ext v4.8b, v4.8b , v4.8b , #2
+
+ st1 {v2.8b, v3.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v4.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v2.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v2.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v2.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v2.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v2.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v2.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 2 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v5.8b, v2.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v11.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v14.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v8.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v17.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v6.8b, v3.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v12.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v15.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v9.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v18.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v7.8b, v4.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v13.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v16.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v10.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v19.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v5.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v6.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v7.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v5.8b, v5.8b , v6.8b , #2
+ ext v6.8b, v6.8b , v7.8b , #2
+ ext v7.8b, v7.8b , v7.8b , #2
+
+ st1 {v5.8b, v6.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v7.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v6.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v6.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v6.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v6.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v6.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v6.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 3 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v8.8b, v5.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v14.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v17.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v11.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v2.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v9.8b, v6.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v15.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v18.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v12.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v3.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v10.8b, v7.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v16.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v19.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v13.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v4.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 { v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v8.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v9.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v10.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v8.8b, v8.8b , v9.8b , #2
+ ext v9.8b, v9.8b , v10.8b , #2
+ ext v10.8b, v10.8b , v10.8b , #2
+
+ st1 {v8.8b, v9.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v10.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v8.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v8.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v8.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v8.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v8.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v8.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v8.8b, v9.8b, v10.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 4 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v11.8b, v8.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v17.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v2.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v14.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v5.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v12.8b, v9.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v18.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v3.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v15.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v6.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v13.8b, v10.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v19.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v4.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v16.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v7.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v11.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v12.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v13.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v11.8b, v11.8b , v12.8b , #2
+ ext v12.8b, v12.8b , v13.8b , #2
+ ext v13.8b, v13.8b , v13.8b , #2
+
+ st1 {v11.8b, v12.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v13.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v12.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v12.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v12.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v12.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v12.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v12.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v11.8b, v12.8b, v13.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 5 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v14.8b, v11.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v2.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v5.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v17.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v8.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v15.8b, v12.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v3.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v6.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v18.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v9.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v16.8b, v13.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v4.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v7.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v19.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v10.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v14.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v15.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v16.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v14.8b, v14.8b , v15.8b , #2
+ ext v15.8b, v15.8b , v16.8b , #2
+ ext v16.8b, v16.8b , v16.8b , #2
+
+ st1 {v14.8b, v15.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v16.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v14.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v14.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v14.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v14.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v14.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v14.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v14.8b, v15.8b, v16.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 6 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+
+ cmp x10, #1 //// if it 17 rows are complete skip
+ beq filter_2dvh_skip_row
+ uaddl v20.8h, v17.8b, v14.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v5.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v8.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v2.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v11.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v18.8b, v15.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v6.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v9.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v3.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v12.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v19.8b, v16.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v7.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v10.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v4.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v13.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v17.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v18.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v19.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v17.8b, v17.8b , v18.8b , #2
+ ext v18.8b, v18.8b , v19.8b , #2
+ ext v19.8b, v19.8b , v19.8b , #2
+
+ st1 {v17.8b, v18.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v19.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v18.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v18.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v18.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v18.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v18.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v18.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v17.8b, v18.8b, v19.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ subs x10, x10, #1 ////decrement loop counter
+
+ bne filter_2dvh_loop
+
+
+//// Process first vertical interpolated row
+//// each column is
+ //// ////////////// ROW 13 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+
+ // LDMFD sp!,{x10,x11,x12,pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+filter_2dvh_skip_row:
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+ // LDMFD sp!,{x10,x11,x12,pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+///*****************************************
+
+
+
+
+
+
+ .section .note.gnu-stack,"",%progbits
diff --git a/encoder/armv8/ih264e_platform_macros.h b/encoder/armv8/ih264e_platform_macros.h
new file mode 100755
index 0000000..39cac96
--- /dev/null
+++ b/encoder/armv8/ih264e_platform_macros.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_platform_macros.h
+*
+* @brief
+* Contains platform specific routines used for codec context intialization
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/armv8/ime_distortion_metrics_av8.s b/encoder/armv8/ime_distortion_metrics_av8.s
new file mode 100755
index 0000000..99ebc8a
--- /dev/null
+++ b/encoder/armv8/ime_distortion_metrics_av8.s
@@ -0,0 +1,978 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+//**
+
+///**
+//******************************************************************************
+//*
+//*
+//* @brief
+//* This file contains definitions of routines that compute distortion
+//* between two macro/sub blocks of identical dimensions
+//*
+//* @author
+//* Ittiam
+//*
+//* @par List of Functions:
+//* - ime_compute_sad_16x16()
+//* - ime_compute_sad_8x8()
+//* - ime_compute_sad_4x4()
+//* - ime_compute_sad_16x8()
+//* - ime_compute_satqd_16x16_lumainter_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+//*
+//* @par Description
+//* This functions computes SAD between 2 16x16 blocks. There is a provision
+//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] i4_max_sad
+//* integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//* integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+.text
+.p2align 2
+
+.macro push_v_regs
+ stp d8, d9, [sp, #-16]!
+ stp d10, d11, [sp, #-16]!
+ stp d12, d13, [sp, #-16]!
+ stp d14, d15, [sp, #-16]!
+.endm
+.macro pop_v_regs
+ ldp d14, d15, [sp], #16
+ ldp d12, d13, [sp], #16
+ ldp d10, d11, [sp], #16
+ ldp d8, d9, [sp], #16
+.endm
+
+ .global ime_compute_sad_16x16_fast_av8
+ime_compute_sad_16x16_fast_av8:
+ push_v_regs
+ lsl x2, x2, #1
+ lsl x3, x3, #1
+
+ mov x6, #2
+ movi v30.8h, #0
+
+core_loop_ime_compute_sad_16x16_fast_av8:
+
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabal v30.8h, v4.8b, v5.8b
+ uabal2 v30.8h, v4.16b, v5.16b
+
+ uabal v30.8h, v6.8b, v7.8b
+ uabal2 v30.8h, v6.16b, v7.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_compute_sad_16x16_fast_av8
+
+
+ addp v30.8h, v30.8h, v30.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+ shl v30.2s, v30.2s, #1
+
+ st1 {v30.s}[0], [x5]
+ pop_v_regs
+ ret
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x8 blocks
+//*
+//*
+//* @par Description
+//* This functions computes SAD between 2 16x8 blocks. There is a provision
+//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] u4_max_sad
+//* integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//* integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+//
+ .global ime_compute_sad_16x8_av8
+ime_compute_sad_16x8_av8:
+
+ //chheck what stride incremtn to use
+ //earlier code did not have this lsl
+ push_v_regs
+ mov x6, #2
+ movi v30.8h, #0
+
+core_loop_ime_compute_sad_16x8_av8:
+
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabal v30.8h, v4.8b, v5.8b
+ uabal2 v30.8h, v4.16b, v5.16b
+
+ uabal v30.8h, v6.8b, v7.8b
+ uabal2 v30.8h, v6.16b, v7.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_compute_sad_16x8_av8
+
+
+ addp v30.8h, v30.8h, v30.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+
+ st1 {v30.s}[0], [x5]
+ pop_v_regs
+ ret
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x16 blocks with early exit
+//*
+//* @par Description
+//* This functions computes SAD between 2 16x16 blocks. There is a provision
+//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] i4_max_sad
+//* integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//* integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+
+ .global ime_compute_sad_16x16_ea8_av8
+ime_compute_sad_16x16_ea8_av8:
+
+ push_v_regs
+ movi v30.8h, #0
+
+ add x7, x0, x2
+ add x8, x1, x3
+
+ lsl x2, x2, #1
+ lsl x3, x3, #1
+
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+ ld1 {v8.16b}, [x0], x2
+ ld1 {v9.16b}, [x1], x3
+ ld1 {v10.16b}, [x0], x2
+ ld1 {v11.16b}, [x1], x3
+ ld1 {v12.16b}, [x0], x2
+ ld1 {v13.16b}, [x1], x3
+ ld1 {v14.16b}, [x0], x2
+ ld1 {v15.16b}, [x1], x3
+ ld1 {v16.16b}, [x0], x2
+ ld1 {v17.16b}, [x1], x3
+ ld1 {v18.16b}, [x0], x2
+ ld1 {v19.16b}, [x1], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ uabal v30.8h, v8.8b, v9.8b
+ uabal2 v30.8h, v8.16b, v9.16b
+
+ uabal v30.8h, v10.8b, v11.8b
+ uabal2 v30.8h, v10.16b, v11.16b
+
+ uabal v30.8h, v12.8b, v13.8b
+ uabal2 v30.8h, v12.16b, v13.16b
+
+ uabal v30.8h, v14.8b, v15.8b
+ uabal2 v30.8h, v14.16b, v15.16b
+
+ uabal v30.8h, v16.8b, v17.8b
+ uabal2 v30.8h, v16.16b, v17.16b
+
+ uabal v30.8h, v18.8b, v19.8b
+ uabal2 v30.8h, v18.16b, v19.16b
+
+ addp v31.8h, v30.8h, v30.8h
+ uaddlp v31.4s, v31.8h
+ addp v31.2s, v31.2s, v31.2s
+ mov w6, v31.s[0]
+ cmp w6, w4
+ bgt end_func_16x16
+
+ //do the stuff again
+ ld1 {v0.16b}, [x7], x2
+ ld1 {v1.16b}, [x8], x3
+ ld1 {v2.16b}, [x7], x2
+ ld1 {v3.16b}, [x8], x3
+ ld1 {v8.16b}, [x7], x2
+ ld1 {v9.16b}, [x8], x3
+ ld1 {v10.16b}, [x7], x2
+ ld1 {v11.16b}, [x8], x3
+ ld1 {v12.16b}, [x7], x2
+ ld1 {v13.16b}, [x8], x3
+ ld1 {v14.16b}, [x7], x2
+ ld1 {v15.16b}, [x8], x3
+ ld1 {v16.16b}, [x7], x2
+ ld1 {v17.16b}, [x8], x3
+ ld1 {v18.16b}, [x7], x2
+ ld1 {v19.16b}, [x8], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ uabal v30.8h, v8.8b, v9.8b
+ uabal2 v30.8h, v8.16b, v9.16b
+
+ uabal v30.8h, v10.8b, v11.8b
+ uabal2 v30.8h, v10.16b, v11.16b
+
+ uabal v30.8h, v12.8b, v13.8b
+ uabal2 v30.8h, v12.16b, v13.16b
+
+ uabal v30.8h, v14.8b, v15.8b
+ uabal2 v30.8h, v14.16b, v15.16b
+
+ uabal v30.8h, v16.8b, v17.8b
+ uabal2 v30.8h, v16.16b, v17.16b
+
+ uabal v30.8h, v18.8b, v19.8b
+ uabal2 v30.8h, v18.16b, v19.16b
+
+ addp v31.8h, v30.8h, v30.8h
+ uaddlp v31.4s, v31.8h
+ addp v31.2s, v31.2s, v31.2s
+
+end_func_16x16:
+ st1 {v31.s}[0], [x5]
+ pop_v_regs
+ ret
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name : ime_calculate_sad2_prog_av8()
+////
+//// Detail Description : This function find the sad values of 4 Progressive MBs
+//// at one shot
+////
+//// Platform : CortexAv8/NEON .
+////
+////-----------------------------------------------------------------------------
+//*/
+
+ .global ime_calculate_sad2_prog_av8
+ime_calculate_sad2_prog_av8:
+
+ // x0 = ref1 <UWORD8 *>
+ // x1 = ref2 <UWORD8 *>
+ // x2 = src <UWORD8 *>
+ // x3 = RefBufferWidth <UWORD32>
+ // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+ push_v_regs
+ mov x6, #8
+ movi v30.8h, #0
+ movi v31.8h, #0
+
+core_loop_ime_calculate_sad2_prog_av8:
+
+ ld1 {v0.16b}, [x0], x3
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x3], x4
+
+ ld1 {v3.16b}, [x0], x3
+ ld1 {v4.16b}, [x1], x3
+ ld1 {v5.16b}, [x3], x4
+
+
+ uabal v30.8h, v0.8b, v2.8b
+ uabal2 v30.8h, v0.16b, v2.16b
+ uabal v31.8h, v1.8b, v2.8b
+ uabal2 v31.8h, v1.16b, v2.16b
+
+ uabal v30.8h, v3.8b, v5.8b
+ uabal2 v30.8h, v3.16b, v5.16b
+ uabal v31.8h, v4.8b, v5.8b
+ uabal2 v31.8h, v4.16b, v5.16b
+
+
+ ld1 {v6.16b}, [x0], x3
+ ld1 {v7.16b}, [x1], x3
+ ld1 {v8.16b}, [x3], x4
+
+ ld1 {v9.16b}, [x0], x3
+ ld1 {v10.16b}, [x1], x3
+ ld1 {v11.16b}, [x3], x4
+
+ uabal v30.8h, v6.8b, v8.8b
+ uabal2 v30.8h, v6.16b, v8.16b
+ uabal v31.8h, v7.8b, v8.8b
+ uabal2 v31.8h, v7.16b, v8.16b
+
+ uabal v30.8h, v9.8b, v11.8b
+ uabal2 v30.8h, v9.16b, v11.16b
+ uabal v31.8h, v10.8b, v11.8b
+ uabal2 v31.8h, v0.16b, v11.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_calculate_sad2_prog_av8
+
+ addp v30.8h, v30.8h, v31.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+ shl v30.2s, v30.2s, #1
+
+ st1 {v30.2s}, [x5]
+ pop_v_regs
+ ret
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name : Calculate_Mad3_prog()
+////
+//// Detail Description : This function find the sad values of 4 Progressive MBs
+//// at one shot
+////
+//// Platform : CortexA8/NEON .
+////
+////-----------------------------------------------------------------------------
+//*/
+
+ .global ime_calculate_sad3_prog_av8
+ime_calculate_sad3_prog_av8:
+
+ // x0 = ref1 <UWORD8 *>
+ // x1 = ref2 <UWORD8 *>
+ // x2 = ref3 <UWORD8 *>
+ // x3 = src <UWORD8 *>
+ // stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *>
+
+
+ // x0 = ref1 <UWORD8 *>
+ // x1 = ref2 <UWORD8 *>
+ // x2 = src <UWORD8 *>
+ // x3 = RefBufferWidth <UWORD32>
+ // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+ push_v_regs
+ mov x6, #16
+ movi v29.8h, #0
+ movi v30.8h, #0
+ movi v31.8h, #0
+
+core_loop_ime_calculate_sad3_prog_av8:
+
+ ld1 {v0.16b}, [x0], x4
+ ld1 {v1.16b}, [x1], x4
+ ld1 {v2.16b}, [x2], x4
+ ld1 {v3.16b}, [x3], x5
+
+ uabal v29.8h, v0.8b, v3.8b
+ uabal2 v29.8h, v0.16b, v3.16b
+ uabal v30.8h, v1.8b, v3.8b
+ uabal2 v30.8h, v1.16b, v3.16b
+ uabal v31.8h, v2.8b, v3.8b
+ uabal2 v31.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x4
+ ld1 {v5.16b}, [x1], x4
+ ld1 {v6.16b}, [x2], x4
+ ld1 {v7.16b}, [x3], x5
+
+ uabal v29.8h, v4.8b, v7.8b
+ uabal2 v29.8h, v4.16b, v7.16b
+ uabal v30.8h, v5.8b, v7.8b
+ uabal2 v30.8h, v5.16b, v7.16b
+ uabal v31.8h, v6.8b, v7.8b
+ uabal2 v31.8h, v6.16b, v7.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_calculate_sad2_prog_av8
+
+ addp v30.8h, v30.8h, v31.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+ shl v30.2s, v30.2s, #1
+
+ st1 {v30.2s}, [x5]
+ pop_v_regs
+ ret
+
+
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) for sub-pel motion estimation
+//*
+//* @par Description
+//* This functions computes SAD for all the 8 half pel points
+//*
+//* @param[out] pi4_sad
+//* integer evaluated sad
+//* pi4_sad[0] - half x
+//* pi4_sad[1] - half x - 1
+//* pi4_sad[2] - half y
+//* pi4_sad[3] - half y - 1
+//* pi4_sad[4] - half xy
+//* pi4_sad[5] - half xy - 1
+//* pi4_sad[6] - half xy - strd
+//* pi4_sad[7] - half xy - 1 - strd
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+
+.text
+.p2align 2
+
+ .global ime_sub_pel_compute_sad_16x16_av8
+ime_sub_pel_compute_sad_16x16_av8:
+ push_v_regs
+ sub x7, x1, #1 //x left
+ sub x8, x2, x5 //y top
+ sub x9, x3, #1 //xy left
+ sub x10, x3, x5 //xy top
+ sub x11, x10, #1 //xy top left
+
+ movi v24.8h, #0
+ movi v25.8h, #0
+ movi v26.8h, #0
+ movi v27.8h, #0
+ movi v28.8h, #0
+ movi v29.8h, #0
+ movi v30.8h, #0
+ movi v31.8h, #0
+
+ mov x12, #16
+core_loop_ime_sub_pel_compute_sad_16x16_av8:
+
+ ld1 {v0.16b}, [x0], x4 //src
+ ld1 {v1.16b}, [x1], x5 //x
+ ld1 {v2.16b}, [x7], x5 //x left
+ ld1 {v3.16b}, [x2], x5 //y
+ ld1 {v9.16b}, [x8], x5 //y top
+ ld1 {v10.16b}, [x3], x5 //xy
+ ld1 {v11.16b}, [x9], x5 //xy left
+ ld1 {v12.16b}, [x10], x5 //xy top
+ ld1 {v13.16b}, [x11], x5 //xy top left
+
+ uabal v24.8h, v0.8b, v1.8b
+ uabal2 v24.8h, v0.16b, v1.16b
+ uabal v25.8h, v0.8b, v2.8b
+ uabal2 v25.8h, v0.16b, v2.16b
+ uabal v26.8h, v0.8b, v3.8b
+ uabal2 v26.8h, v0.16b, v3.16b
+ uabal v27.8h, v0.8b, v9.8b
+ uabal2 v27.8h, v0.16b, v9.16b
+ uabal v28.8h, v0.8b, v10.8b
+ uabal2 v28.8h, v0.16b, v10.16b
+ uabal v29.8h, v0.8b, v11.8b
+ uabal2 v29.8h, v0.16b, v11.16b
+ uabal v30.8h, v0.8b, v12.8b
+ uabal2 v30.8h, v0.16b, v12.16b
+ uabal v31.8h, v0.8b, v13.8b
+ uabal2 v31.8h, v0.16b, v13.16b
+
+ subs x12, x12, #1
+ bne core_loop_ime_sub_pel_compute_sad_16x16_av8
+
+ addp v24.8h, v24.8h, v25.8h
+ addp v26.8h, v26.8h, v27.8h
+ addp v28.8h, v28.8h, v29.8h
+ addp v30.8h, v30.8h, v31.8h
+
+ uaddlp v24.4s, v24.8h
+ uaddlp v26.4s, v26.8h
+ uaddlp v28.4s, v28.8h
+ uaddlp v30.4s, v30.8h
+
+ addp v24.4s, v24.4s, v26.4s
+ addp v25.4s, v28.4s, v30.4s
+
+ st1 {v24.4s-v25.4s}, [x6]
+
+
+ pop_v_regs
+ ret
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x16 blocks
+//*
+//* @par Description
+//* This functions computes SAD between 2 16x16 blocks. There is a provision
+//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] i4_max_sad
+//* integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//* integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+ .global ime_compute_sad_16x16_av8
+ime_compute_sad_16x16_av8:
+ push_v_regs
+ mov x6, #4
+ movi v30.8h, #0
+
+core_loop_ime_compute_sad_16x16_av8:
+
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabal v30.8h, v4.8b, v5.8b
+ uabal2 v30.8h, v4.16b, v5.16b
+
+ uabal v30.8h, v6.8b, v7.8b
+ uabal2 v30.8h, v6.16b, v7.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_compute_sad_16x16_av8
+
+
+ addp v30.8h, v30.8h, v30.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+
+ st1 {v30.s}[0], [x5]
+ pop_v_regs
+ ret
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name : Calculate_Mad4_prog()
+////
+//// Detail Description : This function find the sad values of 4 Progressive MBs
+//// at one shot
+////
+//// Platform : CortexA8/NEON .
+////
+////-----------------------------------------------------------------------------
+//*/
+
+ .global ime_calculate_sad4_prog_av8
+ime_calculate_sad4_prog_av8:
+ push_v_regs
+ sub x5, x0, #1 //left
+ add x6, x0, #1 //right
+ sub x7, x0, x2 //top
+ add x8, x0, x2 //bottom
+
+ movi v28.8h, #0
+ movi v29.8h, #0
+ movi v30.8h, #0
+ movi v31.8h, #0
+
+ mov x9, #16
+core_loop_ime_calculate_sad4_prog_av8:
+
+ ld1 {v0.16b}, [x1], x3
+ ld1 {v1.16b}, [x5], x2
+ ld1 {v2.16b}, [x6], x2
+ ld1 {v3.16b}, [x7], x2
+ ld1 {v9.16b}, [x8], x2
+
+ uabal v28.8h, v0.8b, v1.8b
+ uabal2 v28.8h, v0.16b, v1.16b
+ uabal v29.8h, v0.8b, v2.8b
+ uabal2 v29.8h, v0.16b, v2.16b
+ uabal v30.8h, v0.8b, v3.8b
+ uabal2 v30.8h, v0.16b, v3.16b
+ uabal v31.8h, v0.8b, v9.8b
+ uabal2 v31.8h, v0.16b, v9.16b
+
+ subs x9, x9, #1
+ bne core_loop_ime_calculate_sad4_prog_av8
+
+ addp v28.8h, v28.8h, v29.8h
+ addp v30.8h, v30.8h, v31.8h
+
+ uaddlp v28.4s, v28.8h
+ uaddlp v30.4s, v30.8h
+
+ addp v28.4s, v28.4s, v30.4s
+ st1 {v28.4s}, [x4]
+ pop_v_regs
+ ret
+
+
+
+//*****************************************************************************
+//*
+//* Function Name : ime_compute_satqd_16x16_lumainter_av8
+//* Description : This fucntion computes SAD for a 16x16 block.
+// : It also computes if any 4x4 block will have a nonzero coefficent after transform and quant
+//
+// Arguments : x0 :pointer to src buffer
+// x1 :pointer to est buffer
+// x2 :source stride
+// x3 :est stride
+// STACk :Threshold,distotion,is_nonzero
+//*
+//* Values Returned : NONE
+//*
+//* Register Usage : x0-x11
+//* Stack Usage :
+//* Cycles : Around
+//* Interruptiaility : Interruptable
+//*
+//* Known Limitations
+//* \Assumptions :
+//*
+//* Revision History :
+//* DD MM YYYY Author(s) Changes
+//* 14 04 2014 Harinarayanan K K First version
+//*
+//*****************************************************************************
+ .global ime_compute_satqd_16x16_lumainter_av8
+ime_compute_satqd_16x16_lumainter_av8:
+ //x0 :pointer to src buffer
+ //x1 :pointer to est buffer
+ //x2 :Source stride
+ //x3 :Pred stride
+ //x4 :Threshold pointer
+ //x5 :Distortion,ie SAD
+ //x6 :is nonzero
+ //x7 :loop counter
+ push_v_regs
+ stp d8, d9, [sp, #-16]!
+ stp d10, d11, [sp, #-16]!
+ stp d12, d13, [sp, #-16]!
+ stp d14, d15, [sp, #-16]!
+
+ ld1 {v30.8h}, [x4]
+
+ dup v20.4h, v30.h[1] //ls1
+ dup v24.4h, v30.h[0] //ls2
+ dup v21.4h, v30.h[5] //ls3
+ dup v25.4h, v30.h[7] //ls4
+ dup v22.4h, v30.h[3] //ls5
+ dup v26.4h, v30.h[4] //ls6
+ dup v23.4h, v30.h[6] //ls7
+ dup v27.4h, v30.h[2] //ls8
+
+ mov v20.d[1], v24.d[0]
+ mov v21.d[1], v25.d[0]
+ mov v22.d[1], v26.d[0]
+ mov v23.d[1], v27.d[0]
+
+ add x4, x4, #16
+ ld1 {v29.h}[0], [x4]
+ dup v29.4h, v29.h[0]
+
+ movi v31.8h, #0
+
+ mov x7, #4
+core_loop_satqd_ime_compute_satqd_16x16_lumainter:
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabdl v10.8h, v0.8b, v1.8b
+ uabdl2 v15.8h, v0.16b, v1.16b
+ uabdl v11.8h, v2.8b, v3.8b
+ uabdl2 v16.8h, v2.16b, v3.16b
+ uabdl v12.8h, v4.8b, v5.8b
+ uabdl2 v17.8h, v4.16b, v5.16b
+ uabdl v13.8h, v6.8b, v7.8b
+ uabdl2 v18.8h, v6.16b, v7.16b
+
+ add v0.8h, v10.8h, v13.8h
+ add v1.8h, v11.8h, v12.8h
+ add v2.8h, v15.8h, v18.8h
+ add v3.8h, v16.8h, v17.8h
+
+ //v0 : S1 S4 S4 S1 A1 A4 A4 A1
+ //v1 : S2 S3 S3 S2 A2 A3 A3 A2
+ //v2 : B1 B4 B4 B1 X1 X4 X4 X1
+ //v3 : B3 B2 B2 B3 X3 X2 X2 X3
+
+ trn1 v4.8h, v0.8h, v1.8h
+ trn2 v5.8h, v0.8h, v1.8h
+ trn1 v6.8h, v2.8h, v3.8h
+ trn2 v7.8h, v2.8h, v3.8h
+
+ trn1 v0.4s, v4.4s, v6.4s
+ trn2 v2.4s, v4.4s, v6.4s
+ trn1 v1.4s, v5.4s, v7.4s
+ trn2 v3.4s, v5.4s, v7.4s
+
+ add v4.8h, v0.8h, v3.8h
+ add v5.8h, v1.8h, v2.8h
+ //v4 : S1 S2 B1 B2 A1 A2 X1 X2
+ //v5 : S4 S3 B4 B3 A4 A3 X4 X3
+
+ //compute sad for each 4x4 block
+ add v6.8h, v4.8h, v5.8h
+ addp v19.8h, v6.8h, v6.8h
+ //duplicate the sad into 128 bit so that we can compare using 128bit
+ add v31.4h, v31.4h, v19.4h
+
+ //sad_2 = sad_1<<1;
+ shl v28.8h, v19.8h, #1
+
+ //sad_2 - pu2_thrsh
+ sub v24.8h, v28.8h, v20.8h
+ sub v25.8h, v28.8h, v21.8h
+ sub v26.8h, v28.8h, v22.8h
+ sub v27.8h, v28.8h, v23.8h
+
+ trn1 v0.4s, v4.4s, v5.4s
+ trn2 v1.4s, v4.4s, v5.4s
+ //v0 : S1 S2 S4 S3 A1 A2 A4 A3
+ //v1 : B1 B2 B4 B3 X1 X2 X4 X3
+
+ trn1 v4.8h, v0.8h, v1.8h
+ trn2 v5.8h, v0.8h, v1.8h
+ //v4 : S1 B1 S4 B4 A1 X1 A4 X4
+ //v5 : S2 B2 S3 B3 A2 X2 A3 X3
+
+ mov v7.s[0], v4.s[1]
+ mov v7.s[1], v4.s[3]
+ mov v6.s[0], v5.s[1] // V4 //S1 B1 A1 X1
+ mov v6.s[1], v5.s[3] // V5 //S2 B2 A2 X2
+ mov v4.s[1], v4.s[2] // V6 //S3 B3 A3 X3
+ mov v5.s[1], v5.s[2] // V7 //S4 B4 A4 X4
+
+ shl v0.4h, v4.4h, #1 //S1<<1
+ shl v1.4h, v5.4h, #1 //S2<<1
+ shl v2.4h, v6.4h, #1 //S3<<1
+ shl v3.4h, v7.4h, #1 //S4<<1
+
+ add v8.4h, v5.4h, v6.4h //(s2[j] + s3[j]))
+ add v9.4h, v4.4h, v7.4h //(s1[j] + s4[j]))
+ add v10.4h, v6.4h, v7.4h //(s3[j] + s4[j]))
+ sub v11.4h, v6.4h, v0.4h //(s3[j] - (s1[j]<<1))
+ sub v12.4h, v7.4h, v1.4h //(s4[j] - (s2[j]<<1))
+ add v13.4h, v4.4h, v5.4h //(s1[j] + s2[j]))
+ sub v14.4h, v5.4h, v3.4h //(s2[j] - (s4[j]<<1)))
+ sub v15.4h, v4.4h, v2.4h //(s1[j] - (s3[j]<<1)))
+
+ mov v8.d[1], v9.d[0]
+ mov v10.d[1], v11.d[0]
+ mov v12.d[1], v13.d[0]
+ mov v14.d[1], v15.d[0]
+
+ cmge v0.8h, v24.8h, v8.8h //ls1 ls2
+ cmge v1.8h, v25.8h, v10.8h //ls3 ls4
+ cmge v2.8h, v26.8h, v12.8h //ls5 ls6
+ cmge v3.8h, v27.8h, v14.8h //ls7 ls8
+ cmge v4.4h, v19.4h, v29.4h //sad
+
+ orr v0.16b, v0.16b, v1.16b
+ orr v2.16b, v2.16b, v3.16b
+ orr v2.16b, v0.16b, v2.16b
+ xtn v2.8b, v2.8h
+ orr v2.8b, v2.8b, v4.8b
+
+ //if the comparison is non zero, out
+ mov x4, v2.d[0]
+ cmp x4, #0
+ bne core_loop_compute_sad_pre
+
+ subs x7, x7, #1
+ bne core_loop_satqd_ime_compute_satqd_16x16_lumainter
+ b satdq_end_func
+
+
+core_loop_compute_sad:
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+
+ uabal v31.8h, v0.8b, v1.8b
+ uabal2 v31.8h, v0.16b, v1.16b
+
+ uabal v31.8h, v2.8b, v3.8b
+ uabal2 v31.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabal v31.8h, v4.8b, v5.8b
+ uabal2 v31.8h, v4.16b, v5.16b
+
+ uabal v31.8h, v6.8b, v7.8b
+ uabal2 v31.8h, v6.16b, v7.16b
+
+core_loop_compute_sad_pre:
+ subs x7, x7, #1
+ bne core_loop_compute_sad
+
+satdq_end_func:
+
+ mov x7, #1
+ cmp x4, #0
+ csel x7, x4, x7, eq
+ str w7, [x6]
+
+ addp v31.8h, v31.8h, v31.8h
+ uaddlp v31.4s, v31.8h
+ addp v31.2s, v31.2s, v31.2s
+ st1 {v31.s}[0], [x5]
+
+
+ ldp d14, d15, [sp], #16
+ ldp d12, d13, [sp], #16
+ ldp d10, d11, [sp], #16
+ ldp d8, d9, [sp], #16
+ pop_v_regs
+ ret
+ .section .note.gnu-stack,"",%progbits
diff --git a/encoder/armv8/ime_platform_macros.h b/encoder/armv8/ime_platform_macros.h
new file mode 100755
index 0000000..0f5b2f2
--- /dev/null
+++ b/encoder/armv8/ime_platform_macros.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ime_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+ sad += ABS(src[0]-est[0]) + \
+ ABS(src[1]-est[1]) + \
+ ABS(src[2]-est[2]) + \
+ ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/encoder/ih264e.h b/encoder/ih264e.h
new file mode 100755
index 0000000..15a9d8f
--- /dev/null
+++ b/encoder/ih264e.h
@@ -0,0 +1,620 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264e.h */
+/* */
+/* Description : This file contains all the necessary structure and */
+/* enumeration definitions needed for the Application */
+/* Program Interface(API) of the Ittiam MPEG4 */
+/* Encoder on Cortex A8 - Neon platform */
+/* */
+/* List of Functions : ih264e_api_function */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 08 2010 100239(RCY) Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef _IH264E_H_
+#define _IH264E_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "iv2.h"
+#include "ive2.h"
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* API Function Prototype */
+/*****************************************************************************/
+IV_STATUS_T ih264e_api_function(iv_obj_t *ps_handle, void *pv_api_ip,void *pv_api_op);
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ IH264E_CMD_CTL_SET_ME_INFO_ENABLE,
+}IH264E_CMD_CTL_SUB_CMDS;
+
+
+/*****************************************************************************/
+/* Extended Structures */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Get Number of Memory Records */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ iv_num_mem_rec_ip_t s_ive_ip;
+}ih264e_num_mem_rec_ip_t;
+
+
+typedef struct
+{
+ iv_num_mem_rec_op_t s_ive_op;
+}ih264e_num_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Fill Memory Records */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ iv_fill_mem_rec_ip_t s_ive_ip;
+}ih264e_fill_mem_rec_ip_t;
+
+
+typedef struct
+{
+ iv_fill_mem_rec_op_t s_ive_op;
+}ih264e_fill_mem_rec_op_t;
+
+/*****************************************************************************/
+/* Retrieve Memory Records */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ iv_retrieve_mem_rec_ip_t s_ive_ip;
+}ih264e_retrieve_mem_rec_ip_t;
+
+
+typedef struct
+{
+ iv_retrieve_mem_rec_op_t s_ive_op;
+}ih264e_retrieve_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Initialize encoder */
+/*****************************************************************************/
+
+typedef struct
+{
+ ive_init_ip_t s_ive_ip;
+}ih264e_init_ip_t;
+
+
+typedef struct
+{
+ ive_init_op_t s_ive_op;
+}ih264e_init_op_t;
+
+
+/*****************************************************************************/
+/* Queue Input raw buffer - Send the YUV buffer to be encoded */
+/*****************************************************************************/
+typedef struct
+{
+ ive_queue_inp_ip_t s_ive_ip;
+}ih264e_queue_inp_ip_t;
+
+typedef struct
+{
+ ive_queue_inp_op_t s_ive_op;
+}ih264e_queue_inp_op_t;
+
+/*****************************************************************************/
+/* Dequeue Input raw buffer - Get free YUV buffer from the encoder */
+/*****************************************************************************/
+typedef struct
+{
+ ive_dequeue_inp_ip_t s_ive_ip;
+}ih264e_dequeue_inp_ip_t;
+
+typedef struct
+{
+ ive_dequeue_inp_op_t s_ive_op;
+}ih264e_dequeue_inp_op_t;
+
+
+/*****************************************************************************/
+/* Queue Output bitstream buffer - Send the bistream buffer to be filled */
+/*****************************************************************************/
+typedef struct
+{
+ ive_queue_out_ip_t s_ive_ip;
+}ih264e_queue_out_ip_t;
+
+typedef struct
+{
+ ive_queue_out_op_t s_ive_op;
+}ih264e_queue_out_op_t;
+
+/*****************************************************************************/
+/* Dequeue Output bitstream buffer - Get the bistream buffer filled */
+/*****************************************************************************/
+typedef struct
+{
+ ive_dequeue_out_ip_t s_ive_ip;
+}ih264e_dequeue_out_ip_t;
+
+typedef struct
+{
+ ive_dequeue_out_op_t s_ive_op;
+}ih264e_dequeue_out_op_t;
+
+
+/*****************************************************************************/
+/* Get Recon data - Get the reconstructed data from encoder */
+/*****************************************************************************/
+typedef struct
+{
+ ive_get_recon_ip_t s_ive_ip;
+}ih264e_get_recon_ip_t;
+
+typedef struct
+{
+ ive_get_recon_op_t s_ive_op;
+}ih264e_get_recon_op_t;
+/*****************************************************************************/
+/* Video control Flush */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_flush_ip_t s_ive_ip;
+}ih264e_ctl_flush_ip_t;
+
+
+typedef struct
+{
+ ive_ctl_flush_op_t s_ive_op;
+}ih264e_ctl_flush_op_t;
+
+/*****************************************************************************/
+/* Video control reset */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_reset_ip_t s_ive_ip;
+}ih264e_ctl_reset_ip_t;
+
+
+typedef struct
+{
+ ive_ctl_reset_op_t s_ive_op;
+}ih264e_ctl_reset_op_t;
+
+
+/*****************************************************************************/
+/* Video control:Get Buf Info */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_getbufinfo_ip_t s_ive_ip;
+}ih264e_ctl_getbufinfo_ip_t;
+
+
+
+typedef struct
+{
+ ive_ctl_getbufinfo_op_t s_ive_op;
+}ih264e_ctl_getbufinfo_op_t;
+
+
+
+/*****************************************************************************/
+/* Video control:Get Version Info */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_getversioninfo_ip_t s_ive_ip;
+}ih264e_ctl_getversioninfo_ip_t;
+
+
+
+typedef struct
+{
+ ive_ctl_getversioninfo_op_t s_ive_op;
+}ih264e_ctl_getversioninfo_op_t;
+
+/*****************************************************************************/
+/* Video control:Set default params */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_setdefault_ip_t s_ive_ip;
+}ih264e_ctl_setdefault_ip_t;
+
+
+
+typedef struct
+{
+ ive_ctl_setdefault_op_t s_ive_op;
+}ih264e_ctl_setdefault_op_t;
+
+/*****************************************************************************/
+/* Video control Set IPE params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_ipe_params_ip_t s_ive_ip;
+}ih264e_ctl_set_ipe_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_ipe_params_op_t s_ive_op;
+}ih264e_ctl_set_ipe_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Frame dimensions */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_dimensions_ip_t s_ive_ip;
+}ih264e_ctl_set_dimensions_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_dimensions_op_t s_ive_op;
+}ih264e_ctl_set_dimensions_op_t;
+
+/*****************************************************************************/
+/* Video control Set Frame rates */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_frame_rate_ip_t s_ive_ip;
+}ih264e_ctl_set_frame_rate_ip_t;
+typedef struct
+{
+ ive_ctl_set_frame_rate_op_t s_ive_op;
+}ih264e_ctl_set_frame_rate_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Bitrate */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_bitrate_ip_t s_ive_ip;
+}ih264e_ctl_set_bitrate_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_bitrate_op_t s_ive_op;
+}ih264e_ctl_set_bitrate_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Frame type */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_frame_type_ip_t s_ive_ip;
+}ih264e_ctl_set_frame_type_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_frame_type_op_t s_ive_op;
+}ih264e_ctl_set_frame_type_op_t;
+
+/*****************************************************************************/
+/* Video control Set Encode mode */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_enc_mode_ip_t s_ive_ip;
+}ih264e_ctl_set_enc_mode_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_enc_mode_op_t s_ive_op;
+}ih264e_ctl_set_enc_mode_op_t;
+
+/*****************************************************************************/
+/* Video control Set QP */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_qp_ip_t s_ive_ip;
+}ih264e_ctl_set_qp_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_qp_op_t s_ive_op;
+}ih264e_ctl_set_qp_op_t;
+
+/*****************************************************************************/
+/* Video control Set AIR params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_air_params_ip_t s_ive_ip;
+}ih264e_ctl_set_air_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_air_params_op_t s_ive_op;
+}ih264e_ctl_set_air_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set VBV params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_vbv_params_ip_t s_ive_ip;
+}ih264e_ctl_set_vbv_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_vbv_params_op_t s_ive_op;
+}ih264e_ctl_set_vbv_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Processor Details */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_num_cores_ip_t s_ive_ip;
+}ih264e_ctl_set_num_cores_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_num_cores_op_t s_ive_op;
+}ih264e_ctl_set_num_cores_op_t;
+
+/*****************************************************************************/
+/* Video control Set Motion estimation params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_me_params_ip_t s_ive_ip;
+}ih264e_ctl_set_me_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_me_params_op_t s_ive_op;
+}ih264e_ctl_set_me_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set GOP params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_gop_params_ip_t s_ive_ip;
+}ih264e_ctl_set_gop_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_gop_params_op_t s_ive_op;
+}ih264e_ctl_set_gop_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Deblock params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_deblock_params_ip_t s_ive_ip;
+}ih264e_ctl_set_deblock_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_deblock_params_op_t s_ive_op;
+}ih264e_ctl_set_deblock_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Profile params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_profile_params_ip_t s_ive_ip;
+}ih264e_ctl_set_profile_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_profile_params_op_t s_ive_op;
+}ih264e_ctl_set_profile_params_op_t;
+
+/*****************************************************************************/
+/* Synchronous video encode call */
+/*****************************************************************************/
+typedef struct
+{
+ ive_video_encode_ip_t s_ive_ip;
+}ih264e_video_encode_ip_t;
+
+typedef struct
+{
+ ive_video_encode_op_t s_ive_op;
+}ih264e_video_encode_op_t;
+
+
+/* The enum values should not have greater than 8 bits as this is assigned to WORD8 */
+typedef enum
+{
+ INTRA16x16 = 0,
+ INTRA4x4,
+ INTER16x16
+}IV_MB_TYPE_T;
+
+/*****************************************************************************/
+/* Pic info structures */
+/*****************************************************************************/
+typedef struct
+{
+ /** Qp */
+ UWORD32 u4_qp;
+
+ /** Pic Type */
+ IV_PICTURE_CODING_TYPE_T e_frame_type;
+
+}ih264e_pic_info1_t;
+
+typedef struct
+{
+ /** Qp */
+ UWORD32 u4_qp;
+
+ /** Pic Type */
+ IV_PICTURE_CODING_TYPE_T e_frame_type;
+
+ /** Disable deblock level (0: Enable completely, 3: Disable completely */
+ UWORD32 u4_disable_deblock_level;
+
+}ih264e_pic_info2_t;
+
+
+/*****************************************************************************/
+/* MB info structures */
+/*****************************************************************************/
+typedef struct
+{
+ /** MV X */
+ WORD16 i2_mv_x;
+
+ /** MV Y */
+ WORD16 i2_mv_y;
+}ih264e_mv_t;
+
+typedef struct
+{
+ /** Intra / Inter */
+ WORD8 i1_mb_type;
+ union
+ {
+ ih264e_mv_t as_mv[1];
+
+ /** Intra mode */
+ WORD8 ai1_intra_mode[1];
+ };
+}ih264e_mb_info1_t;
+
+typedef struct
+{
+ /** Intra / Inter */
+ WORD8 i1_mb_type;
+
+
+ /** SAD */
+ UWORD16 u2_sad;
+
+ union
+ {
+ ih264e_mv_t as_mv[1];
+
+ /** Intra mode */
+ WORD8 ai1_intra_mode[1];
+ };
+
+
+}ih264e_mb_info2_t;
+
+typedef struct
+{
+ /** Intra / Inter */
+ WORD8 i1_mb_type;
+
+ union
+ {
+ ih264e_mv_t as_mv[4];
+
+ /** Intra mode */
+ WORD8 ai1_intra_mode[16];
+ };
+
+}ih264e_mb_info3_t;
+
+typedef struct
+{
+ /** Intra / Inter */
+ WORD8 i1_mb_type;
+
+ /** Intra Mode */
+ WORD8 i1_intra_mode;
+
+ /** SAD */
+ UWORD16 u2_sad;
+
+ union
+ {
+ ih264e_mv_t as_mv[16];
+
+ /** Intra mode */
+ WORD8 ai1_intra_mode[16];
+ };
+
+
+
+}ih264e_mb_info4_t;
+
+/* Add any new structures to the following union. It is used to calculate the max size needed for allocation of memory */
+typedef struct
+{
+ union
+ {
+ ih264e_mb_info1_t s_mb_info1;
+ ih264e_mb_info2_t s_mb_info2;
+ ih264e_mb_info3_t s_mb_info3;
+ ih264e_mb_info4_t s_mb_info4;
+ };
+}ih264e_mb_info_t;
+
+#ifdef __cplusplus
+} /* closing brace for extern "C" */
+#endif
+#endif /* _IH264E_H_ */
diff --git a/encoder/ih264e_api.c b/encoder/ih264e_api.c
new file mode 100755
index 0000000..e5c66ea
--- /dev/null
+++ b/encoder/ih264e_api.c
@@ -0,0 +1,5559 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_api.c
+*
+* @brief
+* Contains api function definitions for H264 encoder
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - api_check_struct_sanity()
+* - ih264e_codec_update_config()
+* - ih264e_set_default_params()
+* - ih264e_init()
+* - ih264e_get_num_rec()
+* - ih264e_fill_num_mem_rec()
+* - ih264e_init_mem_rec()
+* - ih264e_retrieve_memrec()
+* - ih264e_set_flush_mode()
+* - ih264e_get_buf_info()
+* - ih264e_set_dimensions()
+* - ih264e_set_frame_rate()
+* - ih264e_set_bit_rate()
+* - ih264e_set_frame_type()
+* - ih264e_set_qp()
+* - ih264e_set_enc_mode()
+* - ih264e_set_vbv_params()
+* - ih264_set_air_params()
+* - ih264_set_me_params()
+* - ih264_set_ipe_params()
+* - ih264_set_gop_params()
+* - ih264_set_profile_params()
+* - ih264_set_deblock_params()
+* - ih264e_set_num_cores()
+* - ih264e_reset()
+* - ih264e_ctl()
+* - ih264e_api_function()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include Files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_size_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+#include "ih264_buf_mgr.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "ih264e_core_coding.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_list.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_common_tables.h"
+#include "ih264e_master.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_version.h"
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 ih264e_get_rate_control_mem_tab(void *pv_rate_control,
+ iv_mem_rec_t *ps_mem,
+ ITT_FUNC_TYPE_E e_func_type);
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to test arguments for corresponding API call
+*
+* @par Description:
+* For each command the arguments are validated
+*
+* @param[in] ps_handle
+* Codec handle at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input structure
+*
+* @param[out] pv_api_op
+* Pointer to output structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* api call */
+ WORD32 command = IV_CMD_NA;
+
+ /* input structure expected by the api call */
+ UWORD32 *pu4_api_ip = pv_api_ip;
+
+ /* output structure expected by the api call */
+ UWORD32 *pu4_api_op = pv_api_op;
+
+ /* temp var */
+ WORD32 i, j;
+
+ if (NULL == pv_api_op || NULL == pv_api_ip)
+ {
+ return (IV_FAIL);
+ }
+
+ /* get command */
+ command = pu4_api_ip[1];
+
+ /* set error code */
+ pu4_api_op[1] = 0;
+
+ /* error checks on handle */
+ switch (command)
+ {
+ case IV_CMD_GET_NUM_MEM_REC:
+ case IV_CMD_FILL_NUM_MEM_REC:
+ break;
+
+ case IV_CMD_INIT:
+ if (ps_handle == NULL)
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL;
+ return IV_FAIL;
+ }
+
+ if (ps_handle->u4_size != sizeof(iv_obj_t))
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ break;
+
+ case IVE_CMD_QUEUE_INPUT:
+ case IVE_CMD_QUEUE_OUTPUT:
+ case IVE_CMD_DEQUEUE_OUTPUT:
+ case IVE_CMD_GET_RECON:
+ case IV_CMD_RETRIEVE_MEMREC:
+ case IVE_CMD_VIDEO_CTL:
+ case IVE_CMD_VIDEO_ENCODE:
+
+ if (ps_handle == NULL)
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL;
+ return IV_FAIL;
+ }
+
+ if (ps_handle->u4_size != sizeof(iv_obj_t))
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_handle->pv_fxns != ih264e_api_function)
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_API_FUNCTION_PTR_NULL;
+ return IV_FAIL;
+ }
+
+ if (ps_handle->pv_codec_handle == NULL)
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_INVALID_CODEC_HANDLE;
+ return IV_FAIL;
+ }
+ break;
+
+ default:
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD;
+ return IV_FAIL;
+ }
+
+ /* error checks on input output structures */
+ switch (command)
+ {
+ case IV_CMD_GET_NUM_MEM_REC:
+ {
+ ih264e_num_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_num_mem_rec_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_num_mem_rec_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_num_mem_rec_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+ break;
+ }
+
+ case IV_CMD_FILL_NUM_MEM_REC:
+ {
+ ih264e_fill_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_fill_mem_rec_op_t *ps_op = pv_api_op;
+
+ iv_mem_rec_t *ps_mem_rec = NULL;
+
+ WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_fill_mem_rec_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_fill_mem_rec_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (max_wd < MIN_WD || max_wd > MAX_WD)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (max_ht < MIN_HT || max_ht > MAX_HT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ /* verify number of mem rec ptr */
+ if (NULL == ps_ip->s_ive_ip.ps_mem_rec)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL;
+ return (IV_FAIL);
+ }
+
+ /* verify number of mem records */
+ if (ps_ip->s_ive_ip.u4_num_mem_rec != MEM_REC_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT;
+ return IV_FAIL;
+ }
+
+ /* check mem records sizes are correct */
+ ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec;
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+ }
+
+ case IV_CMD_INIT:
+ {
+ ih264e_init_ip_t *ps_ip = pv_api_ip;
+ ih264e_init_op_t *ps_op = pv_api_op;
+
+ iv_mem_rec_t *ps_mem_rec = NULL;
+
+ WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_init_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_INIT_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_init_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_INIT_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (max_wd < MIN_WD || max_wd > MAX_WD)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (max_ht < MIN_HT || max_ht > MAX_HT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_ref_cnt != 1)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_reorder_cnt != 0)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_10)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_1B)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_11)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_12)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_13)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_20)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_21)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_22)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_30)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_31)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_32)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_40)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_41)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_42)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_50)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_51))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_CODEC_LEVEL_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_422ILE)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_UV)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_VU))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420P)
+ && (ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420SP_UV)
+ && (ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420SP_VU))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_RECON_CHROMA_FORMAT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_rc_mode != IVE_RC_NONE)
+ && (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_STORAGE)
+ && (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_CBR_NON_LOW_DELAY))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_RATE_CONTROL_MODE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_framerate > DEFAULT_MAX_FRAMERATE)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_FRAME_RATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_bitrate > DEFAULT_MAX_BITRATE)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_BITRATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_num_bframes != 0)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.e_content_type != IV_PROGRESSIVE)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_CONTENT_TYPE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_srch_rng_x > DEFAULT_MAX_SRCH_RANGE_X)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_srch_rng_y > DEFAULT_MAX_SRCH_RANGE_Y)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_slice_mode != IVE_SLICE_MODE_NONE)
+ && (ps_ip->s_ive_ip.e_slice_mode != IVE_SLICE_MODE_BLOCKS))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_SLICE_TYPE_INPUT_INVALID;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
+ {
+ if (ps_ip->s_ive_ip.u4_slice_param == 0
+ || ps_ip->s_ive_ip.u4_slice_param > ((UWORD32)max_ht >> 4))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_SLICE_PARAM_INPUT_INVALID;
+ return (IV_FAIL);
+ }
+ }
+
+ if (NULL == ps_ip->s_ive_ip.ps_mem_rec)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL;
+ return (IV_FAIL);
+ }
+
+ /* verify number of mem records */
+ if (ps_ip->s_ive_ip.u4_num_mem_rec != MEM_REC_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT;
+ return (IV_FAIL);
+ }
+
+ ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec;
+
+ /* check memrecords sizes are correct */
+ for (i = 0; i <((WORD32)ps_ip->s_ive_ip.u4_num_mem_rec); i++)
+ {
+ if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ /* check memrecords pointers are not NULL */
+ if (ps_mem_rec[i].pv_base == NULL)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_BASE_POINTER_NULL;
+ return IV_FAIL;
+ }
+ }
+
+ /* verify memtabs for overlapping regions */
+ {
+ void *start[MEM_REC_CNT];
+ void *end[MEM_REC_CNT];
+
+ start[0] = (ps_mem_rec[0].pv_base);
+ end[0] = ((UWORD8 *) ps_mem_rec[0].pv_base)
+ + ps_mem_rec[0].u4_mem_size - 1;
+
+ for (i = 1; i < MEM_REC_CNT; i++)
+ {
+ /* This array is populated to check memtab overlap */
+ start[i] = (ps_mem_rec[i].pv_base);
+ end[i] = ((UWORD8 *) ps_mem_rec[i].pv_base)
+ + ps_mem_rec[i].u4_mem_size - 1;
+
+ for (j = 0; j < i; j++)
+ {
+ if ((start[i] >= start[j]) && (start[i] <= end[j]))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_OVERLAP_ERR;
+ return IV_FAIL;
+ }
+
+ if ((end[i] >= start[j]) && (end[i] <= end[j]))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_OVERLAP_ERR;
+ return IV_FAIL;
+ }
+
+ if ((start[i] < start[j]) && (end[i] > end[j]))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_OVERLAP_ERR;
+ return IV_FAIL;
+ }
+ }
+ }
+ }
+
+ /* re-validate mem records with init config */
+ {
+ /* mem records */
+ iv_mem_rec_t s_mem_rec_ittiam_api[MEM_REC_CNT];
+
+ /* api interface structs */
+ ih264e_fill_mem_rec_ip_t s_ip;
+ ih264e_fill_mem_rec_op_t s_op;
+
+ /* error status */
+ IV_STATUS_T e_status;
+
+ /* temp var */
+ WORD32 i;
+
+ s_ip.s_ive_ip.u4_size = sizeof(ih264e_fill_mem_rec_ip_t);
+ s_op.s_ive_op.u4_size = sizeof(ih264e_fill_mem_rec_op_t);
+
+ s_ip.s_ive_ip.e_cmd = IV_CMD_FILL_NUM_MEM_REC;
+ s_ip.s_ive_ip.ps_mem_rec = s_mem_rec_ittiam_api;
+ s_ip.s_ive_ip.u4_max_wd = max_wd;
+ s_ip.s_ive_ip.u4_max_ht = max_ht;
+ s_ip.s_ive_ip.u4_num_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec;
+ s_ip.s_ive_ip.u4_max_level = ps_ip->s_ive_ip.u4_max_level;
+ s_ip.s_ive_ip.u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt;
+ s_ip.s_ive_ip.u4_max_reorder_cnt =
+ ps_ip->s_ive_ip.u4_max_reorder_cnt;
+ s_ip.s_ive_ip.e_color_format = ps_ip->s_ive_ip.e_inp_color_fmt;
+ s_ip.s_ive_ip.u4_max_srch_rng_x =
+ ps_ip->s_ive_ip.u4_max_srch_rng_x;
+ s_ip.s_ive_ip.u4_max_srch_rng_y =
+ ps_ip->s_ive_ip.u4_max_srch_rng_y;
+
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ s_mem_rec_ittiam_api[i].u4_size = sizeof(iv_mem_rec_t);
+ }
+
+ /* fill mem records */
+ e_status = ih264e_api_function(NULL, (void *) &s_ip,
+ (void *) &s_op);
+
+ if (IV_FAIL == e_status)
+ {
+ ps_op->s_ive_op.u4_error_code = s_op.s_ive_op.u4_error_code;
+ return (IV_FAIL);
+ }
+
+ /* verify mem records */
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ if (ps_mem_rec[i].u4_mem_size
+ < s_mem_rec_ittiam_api[i].u4_mem_size)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_INSUFFICIENT_SIZE;
+
+ return IV_FAIL;
+ }
+
+ if (ps_mem_rec[i].u4_mem_alignment
+ != s_mem_rec_ittiam_api[i].u4_mem_alignment)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_ALIGNMENT_ERR;
+
+ return IV_FAIL;
+ }
+
+ if (ps_mem_rec[i].e_mem_type
+ != s_mem_rec_ittiam_api[i].e_mem_type)
+ {
+ UWORD32 check = IV_SUCCESS;
+ UWORD32 diff = s_mem_rec_ittiam_api[i].e_mem_type
+ - ps_mem_rec[i].e_mem_type;
+
+ if ((ps_mem_rec[i].e_mem_type
+ <= IV_EXTERNAL_CACHEABLE_SCRATCH_MEM)
+ && (s_mem_rec_ittiam_api[i].e_mem_type
+ >= IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM))
+ {
+ check = IV_FAIL;
+ }
+
+ if (3 != (s_mem_rec_ittiam_api[i].e_mem_type % 4))
+ {
+ /* It is not IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM or
+ * IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM */
+
+ if ((diff < 1) || (diff > 3))
+ {
+ /* Difference between 1 and 3 is okay for all cases other than the
+ * two filtered with the MOD condition above */
+ check = IV_FAIL;
+ }
+ }
+ else
+ {
+ if (diff == 1)
+ {
+ /* This particular case is when codec asked for External Persistent,
+ * but got Internal Scratch */
+ check = IV_FAIL;
+ }
+ if ((diff != 2) && (diff != 3))
+ {
+ check = IV_FAIL;
+ }
+ }
+
+ if (check == IV_FAIL)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_INCORRECT_TYPE;
+
+ return IV_FAIL;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case IVE_CMD_QUEUE_INPUT:
+ case IVE_CMD_QUEUE_OUTPUT:
+ case IVE_CMD_DEQUEUE_OUTPUT:
+ case IVE_CMD_GET_RECON:
+ break;
+
+ case IV_CMD_RETRIEVE_MEMREC:
+ {
+ ih264e_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_retrieve_mem_rec_op_t *ps_op = pv_api_op;
+
+ iv_mem_rec_t *ps_mem_rec = NULL;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_retrieve_mem_rec_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_retrieve_mem_rec_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (NULL == ps_ip->s_ive_ip.ps_mem_rec)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL;
+ return (IV_FAIL);
+ }
+
+ ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec;
+
+ /* check memrecords sizes are correct */
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+ }
+
+ case IVE_CMD_VIDEO_ENCODE:
+ {
+ ih264e_video_encode_ip_t *ps_ip = pv_api_ip;
+ ih264e_video_encode_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_video_encode_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_ENCODE_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_video_encode_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_ENCODE_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+ break;
+ }
+
+ case IVE_CMD_VIDEO_CTL:
+ {
+ /* ptr to input structure */
+ WORD32 *pu4_ptr_cmd = pv_api_ip;
+
+ /* sub command */
+ WORD32 sub_command = pu4_ptr_cmd[2];
+
+ switch (sub_command)
+ {
+ case IVE_CMD_CTL_SETDEFAULT:
+ {
+ ih264e_ctl_setdefault_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_setdefault_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_setdefault_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_setdefault_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ break;
+ }
+
+ case IVE_CMD_CTL_GETBUFINFO:
+ {
+ codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle);
+
+ ih264e_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_getbufinfo_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_getbufinfo_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_getbufinfo_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_wd < MIN_WD)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_wd > ps_codec->s_cfg.u4_max_wd)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_ht < MIN_HT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_ht > ps_codec->s_cfg.u4_max_ht)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_422ILE)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_UV)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_VU))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+ break;
+ }
+
+ case IVE_CMD_CTL_GETVERSION:
+ {
+ ih264e_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_getversioninfo_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_getversioninfo_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_getversioninfo_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.pu1_version == NULL)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_CTL_GET_VERSION_BUFFER_IS_NULL;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_FLUSH:
+ {
+ ih264e_ctl_flush_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_flush_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_flush_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_flush_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_RESET:
+ {
+ ih264e_ctl_reset_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_reset_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_reset_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_RESET_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_reset_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_RESET_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_NUM_CORES:
+ {
+ ih264e_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_num_cores_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_num_cores_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_num_cores_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_num_cores < 1)
+ || (ps_ip->s_ive_ip.u4_num_cores > MAX_NUM_CORES))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_NUM_CORES;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_DIMENSIONS:
+ {
+ codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle);
+
+ ih264e_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_dimensions_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_dimensions_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_dimensions_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_wd < MIN_WD)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_wd > ps_codec->s_cfg.u4_max_wd)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_ht < MIN_HT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_ht > ps_codec->s_cfg.u4_max_ht)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_FRAMERATE:
+ {
+ ih264e_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_frame_rate_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_frame_rate_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_frame_rate_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (((ps_ip->s_ive_ip.u4_src_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE)
+ || ((ps_ip->s_ive_ip.u4_tgt_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_FRAME_RATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.u4_src_frame_rate == 0)
+ || (ps_ip->s_ive_ip.u4_tgt_frame_rate == 0))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_FRAME_RATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_tgt_frame_rate
+ > ps_ip->s_ive_ip.u4_src_frame_rate)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_TGT_FRAME_RATE_EXCEEDS_SRC_FRAME_RATE;
+ return (IV_FAIL);
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_BITRATE:
+ {
+ ih264e_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_bitrate_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_bitrate_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_bitrate_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_target_bitrate > DEFAULT_MAX_BITRATE)
+ || (ps_ip->s_ive_ip.u4_target_bitrate == 0))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_BITRATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_FRAMETYPE:
+ {
+ ih264e_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_frame_type_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_frame_type_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_frame_type_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.e_frame_type != IV_NA_FRAME)
+ && (ps_ip->s_ive_ip.e_frame_type != IV_I_FRAME)
+ && (ps_ip->s_ive_ip.e_frame_type != IV_P_FRAME)
+ && (ps_ip->s_ive_ip.e_frame_type != IV_IDR_FRAME))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_FORCE_FRAME_INPUT;
+ return IV_FAIL;
+ }
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_ME_PARAMS:
+ {
+ codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle);
+
+ ih264e_ctl_set_me_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_me_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_me_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_me_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_me_speed_preset != FULL_SRCH)
+ && (ps_ip->s_ive_ip.u4_me_speed_preset != DMND_SRCH)
+ && (ps_ip->s_ive_ip.u4_me_speed_preset != HEX_SRCH))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_ME_SPEED_PRESET;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enable_hpel != 0)
+ && (ps_ip->s_ive_ip.u4_enable_hpel != 1))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_HALFPEL_OPTION;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enable_qpel != 0)
+ && (ps_ip->s_ive_ip.u4_enable_qpel != 1))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_QPEL_OPTION;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enable_fast_sad != 0)
+ && (ps_ip->s_ive_ip.u4_enable_fast_sad != 1))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_FAST_SAD_OPTION;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_enable_alt_ref > 255)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_ALT_REF_OPTION;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_srch_rng_x
+ > ps_codec->s_cfg.u4_max_srch_rng_x)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_srch_rng_y
+ > ps_codec->s_cfg.u4_max_srch_rng_y)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_IPE_PARAMS:
+ {
+ ih264e_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_ipe_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_ipe_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_ipe_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enable_intra_4x4 != 0)
+ && (ps_ip->s_ive_ip.u4_enable_intra_4x4 != 1))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_INTRA4x4_OPTION;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_CONFIG)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_SLOWEST)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_NORMAL)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FAST)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_HIGH_SPEED)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FASTEST))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_ENC_SPEED_PRESET;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_GOP_PARAMS:
+ {
+ ih264e_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_gop_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_gop_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_gop_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE)
+ || (ps_ip->s_ive_ip.u4_i_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_INTRA_FRAME_INTERVAL;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_idr_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE)
+ || (ps_ip->s_ive_ip.u4_idr_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_IDR_FRAME_INTERVAL;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_num_b_frames != 0)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_BFRAMES_NOT_SUPPORTED;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_DEBLOCK_PARAMS:
+ {
+ ih264e_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_deblock_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_deblock_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_deblock_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_0)
+ && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_2)
+ && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_3)
+ && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_4))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_DEBLOCKING_TYPE_INPUT;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_QP:
+ {
+ ih264e_ctl_set_qp_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_qp_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_qp_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_qp_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_qp_max > MAX_H264_QP)
+ || (ps_ip->s_ive_ip.u4_p_qp_max > MAX_H264_QP)
+ || (ps_ip->s_ive_ip.u4_b_qp_max > MAX_H264_QP))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_MAX_FRAME_QP;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_qp_min > ps_ip->s_ive_ip.u4_i_qp_max)
+ || (ps_ip->s_ive_ip.u4_p_qp_min > ps_ip->s_ive_ip.u4_p_qp_max)
+ || (ps_ip->s_ive_ip.u4_b_qp_min > ps_ip->s_ive_ip.u4_b_qp_max))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_MIN_FRAME_QP;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_qp > ps_ip->s_ive_ip.u4_i_qp_max)
+ || (ps_ip->s_ive_ip.u4_p_qp > ps_ip->s_ive_ip.u4_p_qp_max)
+ || (ps_ip->s_ive_ip.u4_b_qp > ps_ip->s_ive_ip.u4_b_qp_max))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_qp < ps_ip->s_ive_ip.u4_i_qp_min)
+ || (ps_ip->s_ive_ip.u4_p_qp < ps_ip->s_ive_ip.u4_p_qp_min)
+ || (ps_ip->s_ive_ip.u4_b_qp < ps_ip->s_ive_ip.u4_b_qp_min))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_ENC_MODE:
+ {
+ ih264e_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_enc_mode_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_enc_mode_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_enc_mode_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_HEADER)
+ && (ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_PICTURE))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_ENC_OPERATION_MODE;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_VBV_PARAMS:
+ {
+ ih264e_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_vbv_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_vbv_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_vbv_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_vbv_buffer_delay < DEFAULT_MIN_BUFFER_DELAY)
+ || (ps_ip->s_ive_ip.u4_vbv_buffer_delay > DEFAULT_MAX_BUFFER_DELAY))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_BUFFER_DELAY;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_AIR_PARAMS:
+ {
+ ih264e_ctl_set_air_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_air_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_air_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_air_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_NONE)
+ && (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_CYCLIC)
+ && (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_RANDOM))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_AIR_MODE;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_air_refresh_period == 0)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_AIR_REFRESH_PERIOD;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_PROFILE_PARAMS:
+ {
+ ih264e_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_profile_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_profile_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_profile_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.e_profile != IV_PROFILE_BASE)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_PROFILE_NOT_SUPPORTED;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ default:
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_SUB_CMD;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ default:
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD;
+ return IV_FAIL;
+ }
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief update encoder configuration parameters
+*
+* @par Description:
+* updates encoder configuration parameters from the given config set.
+* Initialize/reinitialize codec parameters according to new configurations.
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_cfg
+* Pointer to config param set
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec,
+ cfg_params_t *ps_cfg)
+{
+ /* config params */
+ cfg_params_t *ps_curr_cfg = &ps_codec->s_cfg;
+
+ /* error status */
+ IH264E_ERROR_T err = IH264E_SUCCESS;
+
+ /* temp var */
+ UWORD32 u4_init_rc = 0;
+
+ /***********************/
+ /* UPDATE CODEC CONFIG */
+ /***********************/
+ if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DIMENSIONS)
+ {
+ UWORD32 wd_aln = ALIGN16(ps_cfg->u4_wd);
+ UWORD32 ht_aln = ALIGN16(ps_cfg->u4_ht);
+
+ if (ps_curr_cfg->u4_wd != wd_aln || ps_curr_cfg->u4_ht != ht_aln
+ || ps_curr_cfg->u4_strd != ps_cfg->u4_strd
+ || ps_curr_cfg->u4_disp_wd != ps_cfg->u4_disp_wd
+ || ps_curr_cfg->u4_disp_ht != ps_cfg->u4_disp_ht)
+ {
+ ps_curr_cfg->u4_wd = wd_aln;
+ ps_curr_cfg->u4_ht = ht_aln;
+ ps_curr_cfg->u4_strd = ps_cfg->u4_strd;
+
+ if (ps_curr_cfg->u4_strd == 0)
+ {
+ ps_curr_cfg->u4_strd = ps_curr_cfg->u4_wd;
+ }
+
+ ps_curr_cfg->u4_disp_wd = ps_cfg->u4_disp_wd;
+ ps_curr_cfg->u4_disp_ht = ps_cfg->u4_disp_ht;
+
+ ps_curr_cfg->i4_wd_mbs = ps_curr_cfg->u4_wd >> 4;
+ ps_curr_cfg->i4_ht_mbs = ps_curr_cfg->u4_ht >> 4;
+
+ ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd;
+ ps_codec->i4_rec_strd = ALIGN16(ps_cfg->u4_wd) + PAD_WD;
+
+ /* If number of MBs in a frame changes the air map also changes.
+ * Hence recompute air map also reset air pic cnt */
+ if (ps_codec->s_cfg.e_air_mode != IVE_AIR_MODE_NONE)
+ {
+ /* re-init the air map */
+ ih264e_init_air_map(ps_codec);
+
+ /* reset air counter */
+ ps_codec->i4_air_pic_cnt = -1;
+ }
+
+ /* initialize mv bank buffer manager */
+ err = ih264e_mv_buf_mgr_add_bufs(ps_codec);
+ if (err != IH264E_SUCCESS)
+ return err;
+
+ /* initialize ref bank buffer manager */
+ err = ih264e_pic_buf_mgr_add_bufs(ps_codec);
+ if (err != IH264E_SUCCESS)
+ return err;
+
+ /* since dimension changed, start new sequence by forcing IDR */
+ ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+
+ /* in case dimension changes, we need to reinitialize RC as the
+ * old model shall not fit further */
+ u4_init_rc = 1;
+
+ /* when the dimension changes, the header needs to be regenerated */
+ ps_codec->i4_header_mode = 1;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_FRAMERATE)
+ {
+ /* temp var */
+ UWORD32 u4_src_ticks, u4_tgt_ticks;
+
+ u4_src_ticks = ih264e_frame_time_get_src_ticks(
+ ps_codec->s_rate_control.pps_frame_time);
+
+ u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(
+ ps_codec->s_rate_control.pps_frame_time);
+
+ /* Change frame rate */
+ if (ps_codec->s_cfg.u4_src_frame_rate
+ != ps_cfg->u4_src_frame_rate * 1000)
+ {
+ ps_codec->s_cfg.u4_src_frame_rate = ps_cfg->u4_src_frame_rate
+ * 1000;
+
+ ih264e_frame_time_update_src_frame_rate(
+ ps_codec->s_rate_control.pps_frame_time,
+ ps_codec->s_cfg.u4_src_frame_rate);
+
+ ih264_time_stamp_update_frame_rate(
+ ps_codec->s_rate_control.pps_time_stamp,
+ ps_codec->s_cfg.u4_src_frame_rate);
+
+ irc_change_frame_rate(ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_cfg.u4_src_frame_rate,
+ u4_src_ticks, u4_tgt_ticks);
+ }
+
+ if (ps_codec->s_cfg.u4_tgt_frame_rate
+ != ps_cfg->u4_tgt_frame_rate * 1000)
+ {
+ ps_codec->s_cfg.u4_tgt_frame_rate = ps_cfg->u4_tgt_frame_rate
+ * 1000;
+
+ ih264e_frame_time_update_tgt_frame_rate(
+ ps_codec->s_rate_control.pps_frame_time,
+ ps_codec->s_cfg.u4_tgt_frame_rate);
+
+ irc_change_frame_rate(ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_cfg.u4_src_frame_rate,
+ u4_src_ticks, u4_tgt_ticks);
+
+ irc_change_frm_rate_for_bit_alloc(
+ ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_cfg.u4_tgt_frame_rate);
+ }
+
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_BITRATE)
+ {
+ if (ps_curr_cfg->u4_target_bitrate != ps_cfg->u4_target_bitrate)
+ {
+ if (IVE_RC_NONE != ps_curr_cfg->e_rc_mode)
+ irc_change_avg_bit_rate(
+ ps_codec->s_rate_control.pps_rate_control_api,
+ ps_cfg->u4_target_bitrate);
+
+ ps_curr_cfg->u4_target_bitrate = ps_cfg->u4_target_bitrate;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_FRAMETYPE)
+ {
+ switch (ps_cfg->e_frame_type)
+ {
+ case IV_I_FRAME:
+ ps_codec->force_curr_frame_type = IV_I_FRAME;
+ break;
+
+ case IV_IDR_FRAME:
+ ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+ break;
+
+ case IV_P_FRAME:
+ default:
+ break;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_ME_PARAMS)
+ {
+ if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG)
+ {
+ ps_codec->s_cfg.u4_enable_hpel = ps_cfg->u4_enable_hpel;
+ ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad;
+ ps_codec->s_cfg.u4_me_speed_preset = ps_cfg->u4_me_speed_preset;
+ ps_codec->s_cfg.u4_enable_qpel = ps_cfg->u4_enable_qpel;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST)
+ {
+ ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad;
+ }
+ ps_codec->s_cfg.u4_srch_rng_x = ps_cfg->u4_srch_rng_x;
+ ps_codec->s_cfg.u4_srch_rng_y = ps_cfg->u4_srch_rng_y;
+
+ if (ps_codec->s_cfg.u4_enable_alt_ref != ps_cfg->u4_enable_alt_ref)
+ {
+ ps_codec->s_cfg.u4_enable_alt_ref = ps_cfg->u4_enable_alt_ref;
+ ps_codec->u4_is_curr_frm_ref = 1;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_IPE_PARAMS)
+ {
+ ps_curr_cfg->u4_enc_speed_preset = ps_cfg->u4_enc_speed_preset;
+
+ if (ps_curr_cfg->u4_enc_speed_preset == IVE_SLOWEST)
+ {/* high quality */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ ps_curr_cfg->u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 1;
+ ps_codec->luma_energy_compaction[1] =
+ ih264e_code_luma_intra_macroblock_4x4_rdopt_on;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_NORMAL)
+ {/* normal */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ ps_curr_cfg->u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 1;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FAST)
+ {/* normal */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ ps_curr_cfg->u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 1;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_HIGH_SPEED)
+ {/* fast */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ ps_curr_cfg->u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 0;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST)
+ {/* fastest */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ //u4_num_layers = 4;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 0;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 1;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG)
+ {
+ ps_curr_cfg->u4_enable_intra_4x4 = ps_cfg->u4_enable_intra_4x4;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_GOP_PARAMS)
+ {
+ if (ps_curr_cfg->u4_i_frm_interval != ps_cfg->u4_i_frm_interval)
+ {
+ ps_curr_cfg->u4_i_frm_interval = ps_cfg->u4_i_frm_interval;
+
+ /* reset air counter */
+ ps_codec->i4_air_pic_cnt = -1;
+
+ /* re-init air map */
+ ih264e_init_air_map(ps_codec);
+
+ /*Effect intra frame interval change*/
+
+ irc_change_intra_frm_int_call(
+ ps_codec->s_rate_control.pps_rate_control_api,
+ ps_curr_cfg->u4_i_frm_interval);
+ }
+
+ ps_curr_cfg->u4_idr_frm_interval = ps_cfg->u4_idr_frm_interval;
+
+ ps_curr_cfg->u4_num_b_frames = ps_cfg->u4_num_b_frames;
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DEBLOCK_PARAMS)
+ {
+ if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG)
+ {
+ ps_curr_cfg->u4_disable_deblock_level =
+ ps_cfg->u4_disable_deblock_level;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_QP)
+ {
+ UWORD8 au1_init_qp[MAX_PIC_TYPE];
+ UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
+
+ ps_codec->s_cfg.u4_i_qp_max = ps_cfg->u4_i_qp_max;
+ ps_codec->s_cfg.u4_i_qp_min = ps_cfg->u4_i_qp_min;
+ ps_codec->s_cfg.u4_i_qp = ps_cfg->u4_i_qp;
+
+ ps_codec->s_cfg.u4_p_qp_max = ps_cfg->u4_p_qp_max;
+ ps_codec->s_cfg.u4_p_qp_min = ps_cfg->u4_p_qp_min;
+ ps_codec->s_cfg.u4_p_qp = ps_cfg->u4_p_qp;
+
+ ps_codec->s_cfg.u4_b_qp_max = ps_cfg->u4_b_qp_max;
+ ps_codec->s_cfg.u4_b_qp_min = ps_cfg->u4_b_qp_min;
+ ps_codec->s_cfg.u4_b_qp = ps_cfg->u4_b_qp;
+
+ /* update rc lib with modified qp */
+ au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
+ au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
+ au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
+
+ irc_change_init_qp(ps_codec->s_rate_control.pps_rate_control_api,
+ au1_init_qp);
+
+ au1_min_max_qp[2 * I_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
+ au1_min_max_qp[2 * I_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
+
+ au1_min_max_qp[2 * P_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
+ au1_min_max_qp[2 * P_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
+
+ au1_min_max_qp[2 * B_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
+ au1_min_max_qp[2 * B_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
+
+ irc_change_min_max_qp(ps_codec->s_rate_control.pps_rate_control_api,
+ au1_min_max_qp);
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_ENC_MODE)
+ {
+ ps_codec->s_cfg.e_enc_mode = ps_cfg->e_enc_mode;
+
+ if (ps_codec->s_cfg.e_enc_mode == IVE_ENC_MODE_HEADER)
+ {
+ ps_codec->i4_header_mode = 1;
+ ps_codec->s_cfg.e_enc_mode = IVE_ENC_MODE_PICTURE;
+ }
+ else
+ {
+ ps_codec->i4_header_mode = 0;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_VBV_PARAMS
+ && IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode)
+ {
+ ps_codec->s_cfg.u4_vbv_buf_size = ps_cfg->u4_vbv_buf_size;
+ ps_codec->s_cfg.u4_vbv_buffer_delay = ps_cfg->u4_vbv_buffer_delay;
+
+ // irc_change_buffer_delay(ps_codec->s_rate_control.pps_rate_control_api, ps_codec->s_cfg.u4_vbv_buffer_delay);
+
+ // TODO: remove this when the support for changing buffer dynamically
+ // is yet to be added.
+ u4_init_rc = 1;
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_AIR_PARAMS)
+ {
+ if (ps_curr_cfg->e_air_mode != ps_cfg->e_air_mode
+ || ps_curr_cfg->u4_air_refresh_period
+ != ps_cfg->u4_air_refresh_period)
+ {
+ ps_curr_cfg->e_air_mode = ps_cfg->e_air_mode;
+ ps_curr_cfg->u4_air_refresh_period = ps_cfg->u4_air_refresh_period;
+
+ ih264e_init_air_map(ps_codec);
+
+ /* reset air counter */
+ ps_codec->i4_air_pic_cnt = -1;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_PROFILE_PARAMS)
+ {
+ ps_codec->s_cfg.e_profile = ps_cfg->e_profile;
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_NUM_CORES)
+ {
+ ps_codec->s_cfg.u4_num_cores = ps_cfg->u4_num_cores;
+ }
+
+ /* reset RC model */
+ if (u4_init_rc)
+ {
+ /* init qp */
+ UWORD8 au1_init_qp[MAX_PIC_TYPE];
+
+ /* min max qp */
+ UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
+
+ /* init i,p,b qp */
+ au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
+ au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
+ au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
+
+ /* init min max qp */
+ au1_min_max_qp[2 * I_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
+ au1_min_max_qp[2 * I_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
+
+ au1_min_max_qp[2 * P_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
+ au1_min_max_qp[2 * P_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
+
+ au1_min_max_qp[2 * B_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
+ au1_min_max_qp[2 * B_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
+
+ /* get rc mode */
+ switch (ps_codec->s_cfg.e_rc_mode)
+ {
+ case IVE_RC_STORAGE:
+ ps_codec->s_rate_control.e_rc_type = VBR_STORAGE;
+ break;
+
+ case IVE_RC_CBR_NON_LOW_DELAY:
+ ps_codec->s_rate_control.e_rc_type = CBR_NLDRC;
+ break;
+
+ case IVE_RC_CBR_LOW_DELAY:
+ ps_codec->s_rate_control.e_rc_type = CBR_LDRC;
+ break;
+
+ case IVE_RC_NONE:
+ ps_codec->s_rate_control.e_rc_type = CONST_QP;
+ break;
+
+ default:
+ break;
+ }
+
+ /* init rate control */
+ ih264e_rc_init(ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_rate_control.pps_frame_time,
+ ps_codec->s_rate_control.pps_time_stamp,
+ ps_codec->s_rate_control.pps_pd_frm_rate,
+ ps_codec->s_cfg.u4_max_framerate,
+ ps_codec->s_cfg.u4_src_frame_rate,
+ ps_codec->s_cfg.u4_tgt_frame_rate,
+ ps_codec->s_rate_control.e_rc_type,
+ ps_codec->s_cfg.u4_target_bitrate,
+ ps_codec->s_cfg.u4_max_bitrate,
+ ps_codec->s_cfg.u4_vbv_buffer_delay,
+ ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp,
+ H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp,
+ ps_codec->s_cfg.u4_max_level);
+ }
+
+ return err;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets default encoder config parameters
+*
+* @par Description:
+* Sets default dynamic parameters. Will be called in ih264e_init() to ensure
+* that even if set_params is not called, codec continues to work
+*
+* @param[in] ps_cfg
+* Pointer to encoder config params
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_set_default_params(cfg_params_t *ps_cfg)
+{
+ WORD32 ret = IV_SUCCESS;
+
+ ps_cfg->u4_max_wd = MAX_WD;
+ ps_cfg->u4_max_ht = MAX_HT;
+ ps_cfg->u4_max_ref_cnt = MAX_REF_CNT;
+ ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT;
+ ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL;
+ ps_cfg->e_inp_color_fmt = IV_YUV_420SP_UV;
+ ps_cfg->u4_enable_recon = DEFAULT_RECON_ENABLE;
+ ps_cfg->e_recon_color_fmt = IV_YUV_420P;
+ ps_cfg->u4_enc_speed_preset = IVE_FASTEST;
+ ps_cfg->e_rc_mode = DEFAULT_RC;
+ ps_cfg->u4_max_framerate = DEFAULT_MAX_FRAMERATE;
+ ps_cfg->u4_max_bitrate = DEFAULT_MAX_BITRATE;
+ ps_cfg->u4_max_num_bframes = 0;
+ ps_cfg->e_content_type = IV_PROGRESSIVE;
+ ps_cfg->u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X;
+ ps_cfg->u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y;
+ ps_cfg->e_slice_mode = IVE_SLICE_MODE_NONE;
+ ps_cfg->u4_slice_param = DEFAULT_SLICE_PARAM;
+ ps_cfg->e_arch = ih264e_default_arch();
+ ps_cfg->e_soc = SOC_GENERIC;
+ ps_cfg->u4_disp_wd = MAX_WD;
+ ps_cfg->u4_disp_ht = MAX_HT;
+ ps_cfg->u4_wd = MAX_WD;
+ ps_cfg->u4_ht = MAX_HT;
+ ps_cfg->u4_strd = ALIGN16(MAX_WD);
+ ps_cfg->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE;
+ ps_cfg->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE;
+ ps_cfg->u4_target_bitrate = DEFAULT_BITRATE;
+ ps_cfg->e_frame_type = IV_NA_FRAME;
+ ps_cfg->e_enc_mode = IVE_ENC_MODE_DEFAULT;
+ ps_cfg->u4_i_qp = DEFAULT_I_QP;
+ ps_cfg->u4_p_qp = DEFAULT_P_QP;
+ ps_cfg->u4_b_qp = DEFAULT_B_QP;
+ ps_cfg->u4_i_qp_min = DEFAULT_QP_MIN;
+ ps_cfg->u4_i_qp_max = DEFAULT_QP_MAX;
+ ps_cfg->u4_p_qp_min = DEFAULT_QP_MIN;
+ ps_cfg->u4_p_qp_max = DEFAULT_QP_MAX;
+ ps_cfg->u4_b_qp_min = DEFAULT_QP_MIN;
+ ps_cfg->u4_b_qp_max = DEFAULT_QP_MAX;
+ ps_cfg->e_air_mode = DEFAULT_AIR_MODE;
+ ps_cfg->u4_air_refresh_period = DEFAULT_AIR_REFRESH_PERIOD;
+ ps_cfg->u4_vbv_buffer_delay = DEFAULT_VBV_DELAY;
+ ps_cfg->u4_vbv_buf_size = DEFAULT_VBV_SIZE;
+ ps_cfg->u4_num_cores = DEFAULT_NUM_CORES;
+ ps_cfg->u4_me_speed_preset = DEFAULT_ME_SPEED_PRESET;
+ ps_cfg->u4_enable_hpel = DEFAULT_HPEL;
+ ps_cfg->u4_enable_qpel = DEFAULT_QPEL;
+ ps_cfg->u4_enable_intra_4x4 = DEFAULT_I4;
+ ps_cfg->u4_enable_intra_8x8 = DEFAULT_I8;
+ ps_cfg->u4_enable_intra_16x16 = DEFAULT_I16;
+ ps_cfg->u4_enable_fast_sad = DEFAULT_ENABLE_FAST_SAD;
+ ps_cfg->u4_enable_satqd = DEFAULT_ENABLE_SATQD;
+ ps_cfg->i4_min_sad =
+ (ps_cfg->u4_enable_satqd == DEFAULT_ENABLE_SATQD) ?
+ DEFAULT_MIN_SAD_ENABLE :
+ DEFAULT_MIN_SAD_DISABLE;
+ ps_cfg->u4_srch_rng_x = DEFAULT_SRCH_RNG_X;
+ ps_cfg->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y;
+ ps_cfg->u4_i_frm_interval = DEFAULT_I_INTERVAL;
+ ps_cfg->u4_idr_frm_interval = DEFAULT_IDR_INTERVAL;
+ ps_cfg->u4_num_b_frames = DEFAULT_B_FRAMES;
+ ps_cfg->u4_disable_deblock_level = DEFAULT_DISABLE_DEBLK_LEVEL;
+ ps_cfg->e_profile = DEFAULT_PROFILE;
+ ps_cfg->u4_timestamp_low = 0;
+ ps_cfg->u4_timestamp_high = 0;
+ ps_cfg->u4_is_valid = 1;
+ ps_cfg->e_cmd = IVE_CMD_CT_NA;
+ ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4;
+ ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4;
+ ps_cfg->u4_entropy_coding_mode = CAVLC;
+ ps_cfg->u4_weighted_prediction = 0;
+ ps_cfg->u4_constrained_intra_pred = 0;
+ ps_cfg->u4_pic_info_type = 0;
+ ps_cfg->u4_mb_info_type = 0;
+
+ return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Initialize encoder context. This will be called by init_mem_rec and during
+* codec reset
+*
+* @par Description:
+* Initializes the context
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_init(codec_t *ps_codec)
+{
+ /* enc config param set */
+ cfg_params_t *ps_cfg = &(ps_codec->s_cfg);
+
+ /* temp var */
+ WORD32 i;
+
+ /* coded pic count */
+ ps_codec->i4_coded_pic_cnt = 0;
+
+ /* Number of API calls to encode are made */
+ ps_codec->i4_encode_api_call_cnt = -1;
+
+ /* Indicates no header has been generated yet */
+ ps_codec->u4_header_generated = 0;
+
+ /* Number of pictures encoded */
+ ps_codec->i4_pic_cnt = -1;
+
+ /* Number of threads created */
+ ps_codec->i4_proc_thread_cnt = 0;
+
+ /* ctl mutex init */
+ ithread_mutex_init(ps_codec->pv_ctl_mutex);
+
+ /* Set encoder chroma format */
+ ps_codec->e_codec_color_format =
+ (ps_cfg->e_inp_color_fmt == IV_YUV_420SP_VU) ?
+ IV_YUV_420SP_VU : IV_YUV_420SP_UV;
+
+ /* Number of continuous frames where deblocking was disabled */
+ ps_codec->i4_disable_deblk_pic_cnt = 0;
+
+ /* frame num */
+ ps_codec->i4_frame_num = -1;
+
+ /* set the current frame type to I frame, since we are going to start encoding*/
+ ps_codec->force_curr_frame_type = IV_NA_FRAME;
+
+ /* idr_pic_id */
+ ps_codec->i4_idr_pic_id = -1;
+
+ /* Flush mode */
+ ps_codec->i4_flush_mode = 0;
+
+ /* Encode header mode */
+ ps_codec->i4_header_mode = 0;
+
+ /* Encode generate header */
+ ps_codec->i4_gen_header = 0;
+
+ /* To signal successful completion of init */
+ ps_codec->i4_init_done = 1;
+
+ /* To signal that at least one picture was decoded */
+ ps_codec->i4_first_pic_done = 0;
+
+ /* Reset Codec */
+ ps_codec->i4_reset_flag = 0;
+
+ /* Current error code */
+ ps_codec->i4_error_code = IH264E_SUCCESS;
+
+ /* threshold residue */
+ ps_codec->u4_thres_resi = 1;
+
+ /* inter gating enable */
+ ps_codec->u4_inter_gate = 0;
+
+ /* entropy mutex init */
+ ithread_mutex_init(ps_codec->pv_entropy_mutex);
+
+ /* sps id */
+ ps_codec->i4_sps_id = 0;
+
+ /* sps id */
+ ps_codec->i4_pps_id = 0;
+
+ /* Process thread created status */
+ memset(ps_codec->ai4_process_thread_created, 0, MAX_PROCESS_THREADS);
+
+ /* Number of MBs processed together */
+ ps_codec->i4_proc_nmb = 8;
+
+ /* Previous POC msb */
+ ps_codec->i4_prev_poc_msb = 0;
+
+ /* Previous POC lsb */
+ ps_codec->i4_prev_poc_lsb = -1;
+
+ /* max Previous POC lsb */
+ ps_codec->i4_max_prev_poc_lsb = -1;
+
+ /* sps, pps status */
+ {
+ sps_t *ps_sps = ps_codec->ps_sps_base;
+ pps_t *ps_pps = ps_codec->ps_pps_base;
+
+ for (i = 0; i < MAX_SPS_CNT; i++)
+ {
+ ps_sps->i1_sps_valid = 0;
+ ps_sps++;
+ }
+
+ for (i = 0; i < MAX_PPS_CNT; i++)
+ {
+ ps_pps->i1_pps_valid = 0;
+ ps_pps++;
+ }
+ }
+
+ {
+ WORD32 max_mb_rows = ps_cfg->i4_ht_mbs;
+
+ WORD32 num_jobs = max_mb_rows * 2;
+ WORD32 clz;
+
+ /* Use next power of two number of entries*/
+ clz = CLZ(num_jobs);
+ num_jobs = 1 << (32 - clz);
+
+ /* init process jobq */
+ ps_codec->pv_proc_jobq = ih264_list_init(
+ ps_codec->pv_proc_jobq_buf,
+ ps_codec->i4_proc_jobq_buf_size, num_jobs,
+ sizeof(job_t), 10);
+ RETURN_IF((ps_codec->pv_proc_jobq == NULL), IV_FAIL);
+ ih264_list_reset(ps_codec->pv_proc_jobq);
+
+ /* init entropy jobq */
+ ps_codec->pv_entropy_jobq = ih264_list_init(
+ ps_codec->pv_entropy_jobq_buf,
+ ps_codec->i4_entropy_jobq_buf_size, num_jobs,
+ sizeof(job_t), 10);
+ RETURN_IF((ps_codec->pv_entropy_jobq == NULL), IV_FAIL);
+ ih264_list_reset(ps_codec->pv_entropy_jobq);
+ }
+
+ /* Update the jobq context to all the threads */
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ ps_codec->as_process[i].pv_proc_jobq = ps_codec->pv_proc_jobq;
+ ps_codec->as_process[i].pv_entropy_jobq = ps_codec->pv_entropy_jobq;
+
+ /* i4_id always stays between 0 and MAX_PROCESS_THREADS */
+ ps_codec->as_process[i].i4_id =
+ (i >= MAX_PROCESS_THREADS) ?
+ (i - MAX_PROCESS_THREADS) : i;
+ ps_codec->as_process[i].ps_codec = ps_codec;
+
+ ps_codec->as_process[i].s_entropy.pv_proc_jobq = ps_codec->pv_proc_jobq;
+ ps_codec->as_process[i].s_entropy.pv_entropy_jobq =
+ ps_codec->pv_entropy_jobq;
+ ps_codec->as_process[i].s_entropy.i4_abs_pic_order_cnt = -1;
+ }
+
+ /* Initialize MV Bank buffer manager */
+ ps_codec->pv_mv_buf_mgr = ih264_buf_mgr_init(ps_codec->pv_mv_buf_mgr_base);
+
+ /* Initialize Picture buffer manager for reference buffers*/
+ ps_codec->pv_ref_buf_mgr = ih264_buf_mgr_init(
+ ps_codec->pv_ref_buf_mgr_base);
+
+ /* Initialize Picture buffer manager for input buffers*/
+ ps_codec->pv_inp_buf_mgr = ih264_buf_mgr_init(
+ ps_codec->pv_inp_buf_mgr_base);
+
+ /* Initialize buffer manager for output buffers*/
+ ps_codec->pv_out_buf_mgr = ih264_buf_mgr_init(
+ ps_codec->pv_out_buf_mgr_base);
+
+ /* buffer cnt in buffer manager */
+ ps_codec->i4_inp_buf_cnt = 0;
+ ps_codec->i4_out_buf_cnt = 0;
+ ps_codec->i4_ref_buf_cnt = 0;
+
+ ps_codec->ps_pic_buf = (pic_buf_t *) ps_codec->pv_pic_buf_base;
+ memset(ps_codec->ps_pic_buf, 0, BUF_MGR_MAX_CNT * sizeof(pic_buf_t));
+
+ /* Initialize dpb manager */
+ ih264_dpb_mgr_init((dpb_mgr_t*) ps_codec->pv_dpb_mgr);
+
+ memset(ps_codec->as_ref_set, 0,
+ sizeof(ref_set_t) * (MAX_DPB_SIZE + MAX_CTXT_SETS));
+ for (i = 0; i < (MAX_DPB_SIZE + MAX_CTXT_SETS); i++)
+ {
+ ps_codec->as_ref_set[i].i4_pic_cnt = -1;
+ }
+
+ /* fn ptr init */
+ ih264e_init_function_ptr(ps_codec);
+
+ /* reset status flags */
+ for (i = 0; i < MAX_CTXT_SETS; i++)
+ {
+ ps_codec->au4_entropy_thread_active[i] = 0;
+ ps_codec->ai4_pic_cnt[i] = -1;
+
+ ps_codec->s_rate_control.pre_encode_skip[i] = 0;
+ ps_codec->s_rate_control.post_encode_skip[i] = 0;
+ }
+
+ ps_codec->s_rate_control.num_intra_in_prev_frame = 0;
+ ps_codec->s_rate_control.i4_avg_activity = 0;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets number of memory records required by the codec
+*
+* @par Description:
+* Gets codec memory requirements
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_get_num_rec(void *pv_api_ip, void *pv_api_op)
+{
+ UNUSED(pv_api_ip);
+ /* api call I/O structures */
+ ih264e_num_mem_rec_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_num_mem_rec = MEM_REC_CNT;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Fills memory records of the codec
+*
+* @par Description:
+* Fills codec memory requirements
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
+{
+ /* api call I/O structures */
+ ih264e_fill_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_fill_mem_rec_op_t *ps_op = pv_api_op;
+
+ /* profile / level info */
+ WORD32 level;
+ WORD32 num_reorder_frames;
+ WORD32 num_ref_frames;
+
+ /* mem records */
+ WORD32 no_of_mem_rec;
+ iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec;
+
+ /* frame dimensions */
+ WORD32 max_wd_luma, max_ht_luma;
+ WORD32 max_mb_rows, max_mb_cols, max_mb_cnt;
+
+ /* temp var */
+ WORD32 i;
+
+ /* error status */
+ IV_STATUS_T status = IV_SUCCESS;
+
+ /* profile / level info */
+ level = ps_ip->s_ive_ip.u4_max_level;
+ num_reorder_frames = ps_ip->s_ive_ip.u4_max_reorder_cnt;
+ num_ref_frames = ps_ip->s_ive_ip.u4_max_ref_cnt;
+
+ /* mem records */
+ ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec;
+ no_of_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec;
+
+ /* frame dimensions */
+ max_ht_luma = ps_ip->s_ive_ip.u4_max_ht;
+ max_wd_luma = ps_ip->s_ive_ip.u4_max_wd;
+ max_ht_luma = ALIGN16(max_ht_luma);
+ max_wd_luma = ALIGN16(max_wd_luma);
+ max_mb_rows = max_ht_luma / MB_SIZE;
+ max_mb_cols = max_wd_luma / MB_SIZE;
+ max_mb_cnt = max_mb_rows * max_mb_cols;
+
+ /* validate params */
+ if ((level < MIN_LEVEL) || (level > MAX_LEVEL))
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED;
+ level = MAX_LEVEL;
+ }
+
+ if (num_ref_frames > MAX_REF_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
+ num_ref_frames = MAX_REF_CNT;
+ }
+
+ if (num_reorder_frames > MAX_REF_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED;
+ num_reorder_frames = MAX_REF_CNT;
+ }
+
+ /* Set all memory records as persistent and alignment as 128 by default */
+ ps_mem_rec = ps_mem_rec_base;
+ for (i = 0; i < no_of_mem_rec; i++)
+ {
+ ps_mem_rec->u4_mem_alignment = 128;
+ ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ ps_mem_rec++;
+ }
+
+ /************************************************************************
+ * Request memory for h264 encoder handle *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_IV_OBJ];
+ {
+ ps_mem_rec->u4_mem_size = sizeof(iv_obj_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_IV_OBJ, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for h264 encoder context *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CODEC];
+ {
+ ps_mem_rec->u4_mem_size = sizeof(codec_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CODEC, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for entropy context *
+ * In multi core encoding, each row is assumed to be launched on a *
+ * thread. The rows below can only start after its neighbors are coded *
+ * The status of an mb coded/uncoded is signaled via entropy map. *
+ * 1. One word32 to store skip run cnt *
+ * 2. mb entropy map (mb status entropy coded/uncoded). The size*
+ * of the entropy map is max mb cols. Further allocate one *
+ * more additional row to evade checking for row -1. *
+ * 3. size of bit stream buffer to store bit stream ctxt. *
+ * 4. Entropy coding is dependent on nnz coefficient count for *
+ * the neighbor blocks. It is sufficient to maintain one row *
+ * worth of nnz as entropy for lower row waits on entropy map*
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size of skip mb run */
+ total_size += sizeof(WORD32);
+ total_size = ALIGN8(total_size);
+
+ /* size in bytes to store entropy status of an entire frame */
+ total_size += (max_mb_cols * max_mb_rows);
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+ total_size = ALIGN128(total_size);
+
+ /* size of bit stream buffer */
+ total_size += sizeof(bitstrm_t);
+ total_size = ALIGN128(total_size);
+
+ /* top nnz luma */
+ total_size += (max_mb_cols * 4 * sizeof(UWORD8));
+ total_size = ALIGN128(total_size);
+
+ /* top nnz cbcr */
+ total_size += (max_mb_cols * 4 * sizeof(UWORD8));
+ total_size = ALIGN128(total_size);
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * The residue coefficients that needs to be entropy coded are packed *
+ * at a buffer space by the proc threads. The entropy thread shall *
+ * read from the buffer space, unpack them and encode the same. The *
+ * buffer space required to pack a row of mbs are as follows. *
+ * Assuming transform_8x8_flag is disabled, *
+ * In the worst case, 1 mb contains 1 dc 4x4 luma sub block, followed *
+ * by 16 ac 4x4 luma sub blocks, 2 dc chroma 2x2 sub blocks, followed *
+ * by 8 ac 4x4 chroma sub blocks. *
+ * For the sake of simplicity we assume that all sub blocks are of *
+ * type 4x4. The packing of each 4x4 is depicted by the structure *
+ * tu_sblk_coeff_data_t *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA];
+ {
+ /* temp var */
+ WORD32 size = 0;
+
+ /* size of coeff data of 1 mb */
+ size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS;
+
+ /* size of coeff data of 1 row of mb's */
+ size *= max_mb_cols;
+
+ /* align to avoid any false sharing across threads */
+ size = ALIGN64(size);
+
+ /* size for one full frame */
+ size *= max_mb_rows;
+
+ /* size of each proc buffer set (ping, pong) */
+ size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_COEFF_DATA, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * while encoding an mb, the mb header data is signaled to the entropy*
+ * thread by writing to a buffer space. the size of header data per mb *
+ * is assumed to be 40 bytes *
+ * TODO: revisit this inference *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_HEADER_DATA];
+ {
+ /* temp var */
+ WORD32 size;
+
+ /* size per MB */
+ size = 40;
+
+ /* size for 1 row of mbs */
+ size = size * max_mb_cols;
+
+ /* align to avoid any false sharing across threads */
+ size = ALIGN64(size);
+
+ /* size for one full frame */
+ size *= max_mb_rows;
+
+ /* size of each proc buffer set (ping, pong) */
+ size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_HEADER_DATA, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Size for holding mv_buf_t for each MV Bank. *
+ * Note this allocation is done for BUF_MGR_MAX_CNT instead of *
+ * MAX_DPB_SIZE or max_dpb_size for following reasons *
+ * max_dpb_size will be based on max_wd and max_ht *
+ * For higher max_wd and max_ht this number will be smaller than *
+ * MAX_DPB_SIZE But during actual initialization number of buffers *
+ * allocated can be more. *
+ * *
+ * One extra MV Bank is needed to hold current pics MV bank. *
+ * Since this is only a structure allocation and not actual buffer *
+ * allocation, it is allocated for BUF_MGR_MAX_CNT entries *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBANK];
+ {
+ /* max luma samples */
+ WORD32 max_luma_samples = 0;
+
+ /* determine max luma samples */
+ for (i = 0; i < 16; i++)
+ if (level ==(WORD32)gas_ih264_lvl_tbl[i].u4_level_idc)
+ max_luma_samples = gas_ih264_lvl_tbl[i].u4_max_fs
+ << (BLK_SIZE + BLK_SIZE);
+
+ ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+
+ /************************************************************************
+ * Allocate for pu_map, enc_pu_t and pic_pu_idx for each MV bank *
+ * Note: Number of luma samples is not max_wd * max_ht here, instead it *
+ * is set to maximum number of luma samples allowed at the given level. *
+ * This is done to ensure that any stream with width and height lesser *
+ * than max_wd and max_ht is supported. Number of buffers required can *
+ * be greater for lower width and heights at a given level and this *
+ * increased number of buffers might require more memory than what *
+ * max_wd and max_ht buffer would have required Also note one extra *
+ * buffer is allocated to store current pictures MV bank. *
+ ***********************************************************************/
+
+ ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(mv_buf_t);
+
+ ps_mem_rec->u4_mem_size += (num_ref_frames + num_reorder_frames
+ + MAX_CTXT_SETS)
+ * ih264e_get_pic_mv_bank_size(max_luma_samples);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MVBANK, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * While encoding inter slices, to compute the cost of encoding an mb *
+ * with the mv's at hand, we employ the expression cost = sad + lambda *
+ * x mv_bits. Here mv_bits is the total number of bits taken to represe*
+ * nt the mv in the stream. The mv bits for all the possible mv are *
+ * stored in the look up table. The mem record for this look up table *
+ * is given below. *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBITS];
+ {
+ /* max srch range x */
+ UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
+
+ /* max srch range y */
+ UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
+
+ /* max srch range */
+ UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y);
+
+ /* due to subpel */
+ u4_max_srch_range <<= 2;
+
+ /* due to mv on either direction */
+ u4_max_srch_range = (u4_max_srch_range << 1);
+
+ /* due to pred mv + zero */
+ u4_max_srch_range = (u4_max_srch_range << 1) + 1;
+
+ u4_max_srch_range = ALIGN128(u4_max_srch_range);
+
+ ps_mem_rec->u4_mem_size = u4_max_srch_range;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MVBITS, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for SPS *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SPS];
+ {
+ ps_mem_rec->u4_mem_size = MAX_SPS_CNT * sizeof(sps_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SPS, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for PPS *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PPS];
+ {
+ ps_mem_rec->u4_mem_size = MAX_PPS_CNT * sizeof(pps_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PPS, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for Slice Header *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_HDR];
+ {
+ ps_mem_rec->u4_mem_size = MAX_CTXT_SETS * MAX_SLICE_HDR_CNT
+ * sizeof(slice_header_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SLICE_HDR, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for Adaptive Intra Refresh *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_AIR_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* intra coded map */
+ total_size += max_mb_cnt;
+ total_size *= MAX_CTXT_SETS;
+
+ /* mb refresh map */
+ total_size += sizeof(UWORD16) * max_mb_cnt;
+
+ /* alignment */
+ total_size = ALIGN128(total_size);
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_AIR_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * In multi slice encoding, this memory record helps tracking the start*
+ * of slice with reference to mb. *
+ * MEM RECORD for holding *
+ * 1. mb slice map *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to slice index of all mbs of a frame */
+ total_size = ALIGN64(max_mb_cnt);
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SLICE_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold thread handles for each processing thread *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_THREAD_HANDLE];
+ {
+ WORD32 handle_size = ithread_get_handle_size();
+
+ ps_mem_rec->u4_mem_size = MAX_PROCESS_THREADS * handle_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_THREAD_HANDLE, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold mutex for control calls *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CTL_MUTEX];
+ {
+ ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size();
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CTL_MUTEX, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold mutex for entropy calls *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_MUTEX];
+ {
+ ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size();
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY_MUTEX, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold process jobs *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_JOBQ];
+ {
+ /* One process job per row of MBs */
+ /* Allocate for two pictures, so that wrap around can be handled easily */
+ WORD32 num_jobs = max_mb_rows * 2;
+
+ WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
+
+ ps_mem_rec->u4_mem_size = job_queue_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_JOBQ, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold entropy jobs *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_JOBQ];
+ {
+ /* One process job per row of MBs */
+ /* Allocate for two pictures, so that wrap around can be handled easily */
+ WORD32 num_jobs = max_mb_rows * 2;
+
+ WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
+
+ ps_mem_rec->u4_mem_size = job_queue_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY_JOBQ, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * In multi core encoding, each row is assumed to be launched on a *
+ * thread. The rows below can only start after its neighbors are coded *
+ * The status of an mb coded/uncoded is signaled via proc map. *
+ * MEM RECORD for holding *
+ * 1. mb proc map (mb status core coded/uncoded) *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * mem record for holding a particular MB is deblocked or not *
+ * 1. mb deblk map (mb status deblocked/not deblocked) *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_DBLK_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ total_size = ALIGN64(total_size);
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_DBLK_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * mem record for holding a particular MB's me is done or not *
+ * 1. mb me map *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ME_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ME_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * size for holding dpb manager context *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_DPB_MGR];
+ {
+ ps_mem_rec->u4_mem_size = sizeof(dpb_mgr_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_DPB_MGR, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * luma or chroma core coding involves mb estimation, error computation*
+ * between the estimated singnal and the actual signal, transform the *
+ * error, quantize the error, then inverse transform and inverse quant *
+ * ize the residue and add the result back to estimated signal. *
+ * To perform all these, a set of temporary buffers are needed. *
+ * MEM RECORD for holding scratch buffers *
+ * 1. prediction buffer used during mb mode analysis *
+ * 2 temp. reference buffer when intra 4x4 with rdopt on is *
+ * enabled *
+ * - when intra 4x4 is enabled, rdopt is on, to store the *
+ * reconstructed values and use them later this temp. buffer *
+ * is used. *
+ * 3. prediction buffer used during intra mode analysis *
+ * 4. prediction buffer used during intra 16x16 plane mode *
+ * analysis
+ * 5. prediction buffer used during intra chroma mode analysis *
+ * 6. prediction buffer used during intra chroma 16x16 plane *
+ * mode analysis
+ * 7. forward transform output buffer *
+ * - to store the error between estimated and the actual inp *
+ * ut and to store the fwd transformed quantized output *
+ * 8. forward transform output buffer *
+ * - when intra 4x4 is enabled, rdopt is on, to store the *
+ * fwd transform values and use them later this temp. buffer *
+ * is used. *
+ * 9. temporary buffer for inverse transform *
+ * - temporary buffer used in inverse transform and inverse *
+ * quantization *
+ * A. Buffers for holding half_x , half_y and half_xy planes *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH];
+ {
+ WORD32 total_size = 0;
+
+ /* size to hold prediction buffer */
+ total_size += sizeof(UWORD8) * 16 * 16;
+ total_size = ALIGN64(total_size);
+
+ /* size to hold recon for intra 4x4 buffer */
+ total_size += sizeof(UWORD8) * 16 * 16;
+ total_size = ALIGN64(total_size);
+
+ /* prediction buffer intra 16x16 */
+ total_size += sizeof(UWORD8) * 16 * 16;
+ total_size = ALIGN64(total_size);
+
+ /* prediction buffer intra 16x16 plane*/
+ total_size += sizeof(UWORD8) * 16 * 16;
+ total_size = ALIGN64(total_size);
+
+ /* prediction buffer intra chroma*/
+ total_size += sizeof(UWORD8) * 16 * 8;
+ total_size = ALIGN64(total_size);
+
+ /* prediction buffer intra chroma plane*/
+ total_size += sizeof(UWORD8) * 16 * 8;
+ total_size = ALIGN64(total_size);
+
+ /* size to hold fwd transform output */
+ total_size += sizeof(WORD16) * SIZE_TRANS_BUFF;
+ total_size = ALIGN64(total_size);
+
+ /* size to hold fwd transform output */
+ total_size += sizeof(WORD16) * SIZE_TRANS_BUFF;
+ total_size = ALIGN64(total_size);
+
+ /* size to hold temporary data during inverse transform */
+ total_size += sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS;
+ total_size = ALIGN64(total_size);
+
+ /* Buffers for holding half_x , half_y and half_xy planes */
+ total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+ total_size = ALIGN64(total_size);
+
+ total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+ total_size = ALIGN64(total_size);
+
+ total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+ total_size = ALIGN64(total_size);
+
+ /* Allocate for each process thread */
+ total_size *= MAX_PROCESS_CTXT;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_SCRATCH, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * When transform_8x8_flag is disabled, the size of a sub block is *
+ * 4x4 and when the transform_8x8_flag is enabled the size of the sub *
+ * block is 8x8. The threshold matrix and the forward scaling list *
+ * is of the size of the sub block. *
+ * MEM RECORD for holding *
+ * 1. quantization parameters for plane y, cb, cr *
+ * - threshold matrix for quantization *
+ * - forward weight matrix *
+ * - satqd threshold matrix *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_QUANT_PARAM];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* quantization parameter list for planes y,cb and cr */
+ total_size += ALIGN64(sizeof(quant_params_t)) * 3;
+
+ /* size of threshold matrix for quantization
+ * (assuming the transform_8x8_flag is disabled).
+ * for all 3 planes */
+ total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3;
+
+ /* size of forward weight matrix for quantization
+ * (assuming the transform_8x8_flag is disabled).
+ * for all 3 planes */
+ total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3;
+
+ /* Size for SATDQ threshold matrix for palnes y, cb and cr */
+ total_size += ALIGN64(sizeof(UWORD16) * 9) * 3;
+
+ /* total size per each proc thread */
+ total_size *= MAX_PROCESS_CTXT;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_QUANT_PARAM, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * While computing blocking strength for the current mb, the csbp, mb *
+ * type for the neighboring mbs are necessary. memtab for storing top *
+ * row mbtype and csbp is evaluated here. *
+ * *
+ * when encoding intra 4x4 or intra 8x8 the submb types are estimated *
+ * and sent. The estimation is dependent on neighbor mbs. For this *
+ * store the top row sub mb types for intra mbs *
+ * *
+ * During motion vector prediction, the curr mb mv is predicted from *
+ * neigbors left, top, top right and sometimes top left depending on *
+ * the availability. The top and top right content is accessed from *
+ * the memtab specified below. *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_TOP_ROW_SYN_INFO];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to store 1 row of mb_info_t */
+ /* one additional mb, to avoid checking end of row condition */
+ total_size += (max_mb_cols + 1) * sizeof(mb_info_t);
+
+ /* size in bytes to store 1 row of intra macroblock sub modes */
+ total_size += max_mb_cols * sizeof(UWORD8) * 16;
+
+ /* size in bytes to store 1 row + 1 of enc_pu_t */
+ /* one additional mb, to avoid checking end of row condition */
+ total_size += (max_mb_cols + 1) * sizeof(enc_pu_t);
+
+ /* total size per proc ctxt */
+ total_size = ALIGN128(total_size);
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_TOP_ROW_SYN_INFO, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * When transform_8x8_flag is disabled, the mb is partitioned into *
+ * 4 sub blocks. This corresponds to 1 vertical left edge and 1 *
+ * vertical inner edge, 1 horizontal top edge and 1 horizontal *
+ * inner edge per mb. Further, When transform_8x8_flag is enabled, *
+ * the mb is partitioned in to 16 sub blocks. This corresponds to *
+ * 1 vertical left edge and 3 vertical inner edges, 1 horizontal top *
+ * edge and 3 horizontal inner edges per mb. *
+ * MEM RECORD for holding *
+ * 1. vertical edge blocking strength *
+ * 2. horizontal edge blocking strength *
+ * 3. mb qp *
+ * all are frame level *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_BS_QP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to store vertical edge bs, horizontal edge bs and qp of every mb*/
+ WORD32 vert_bs_size, horz_bs_size, qp_size;
+
+ /* vertical edge bs = total number of vertical edges * number of bytes per each edge */
+ /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0),
+ * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+ vert_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+ /* horizontal edge bs = total number of horizontal edges * number of bytes per each edge */
+ /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0),
+ * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+ horz_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+ /* qp of each mb requires 1 byte */
+ qp_size = ALIGN64(max_mb_cnt);
+
+ /* total size */
+ total_size = vert_bs_size + horz_bs_size + qp_size;
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_BS_QP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * size for holding dpb manager context *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_INP_PIC];
+ {
+ ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_INP_PIC, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * size for holding dpb manager context *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_OUT];
+ {
+ ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_OUT, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Size for color space conversion *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CSC];
+ {
+ /* We need a total a memory for a single frame of 420 sp, ie
+ * (wd * ht) for luma and (wd * ht / 2) for chroma*/
+ ps_mem_rec->u4_mem_size = MAX_CTXT_SETS
+ * ((3 * max_ht_luma * max_wd_luma) >> 1);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CSC, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Size for holding pic_buf_t for each reference picture *
+ * Note this allocation is done for BUF_MGR_MAX_CNT instead of *
+ * MAX_DPB_SIZE or max_dpb_size for following reasons *
+ * max_dpb_size will be based on max_wd and max_ht *
+ * For higher max_wd and max_ht this number will be smaller than *
+ * MAX_DPB_SIZE But during actual initialization number of buffers *
+ * allocated can be more. *
+ * *
+ * Also to handle display depth application can allocate more than *
+ * what codec asks for in case of non-shared mode *
+ * Since this is only a structure allocation and not actual buffer *
+ * allocation, it is allocated for BUF_MGR_MAX_CNT entries *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_REF_PIC];
+ {
+ ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+ ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
+
+ /************************************************************************
+ * Note: Number of luma samples is not max_wd * max_ht here, instead it *
+ * is set to maximum number of luma samples allowed at the given level. *
+ * This is done to ensure that any stream with width and height lesser *
+ * than max_wd and max_ht is supported. Number of buffers required can *
+ * be greater for lower width and heights at a given level and this *
+ * increased number of buffers might require more memory than what *
+ * max_wd and max_ht buffer would have required. Number of buffers is *
+ * doubled in order to return one frame at a time instead of sending *
+ * multiple outputs during dpb full case. Also note one extra buffer is *
+ * allocted to store current picture. *
+ * *
+ * Half-pel planes for each reference buffer are allocated along with *
+ * the reference buffer. So each reference buffer is 4 times the *
+ * required size. This way buffer management for the half-pel planes is *
+ * easier and while using the half-pel planes in MC, an offset can be *
+ * used from a single pointer *
+ ***********************************************************************/
+ ps_mem_rec->u4_mem_size += HPEL_PLANES_CNT
+ * ih264e_get_total_pic_buf_size(
+ max_wd_luma * max_ht_luma, level,
+ PAD_WD, PAD_HT, num_ref_frames,
+ num_reorder_frames);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_REF_PIC, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold mem recs to be returned during retrieve call *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_BACKUP];
+ {
+ ps_mem_rec->u4_mem_size = MEM_REC_CNT * sizeof(iv_mem_rec_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_BACKUP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * size for memory required by NMB info structs and buffer for storing *
+ * half pel plane *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB];
+ {
+ ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * MAX_NMB
+ * (sizeof(mb_info_nmb_t)
+ + MB_SIZE * MB_SIZE * sizeof(UWORD8));
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_INFO_NMB, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * RC mem records *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_RC];
+ {
+ ih264e_get_rate_control_mem_tab(NULL, ps_mem_rec, FILL_MEMTAB);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_RC, ps_mem_rec->u4_mem_size);
+
+ /* Each memtab size is aligned to next multiple of 128 bytes */
+ /* This is to ensure all the memtabs start at different cache lines */
+ ps_mem_rec = ps_mem_rec_base;
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ ps_mem_rec->u4_mem_size = ALIGN128(ps_mem_rec->u4_mem_size);
+ ps_mem_rec++;
+ }
+
+ ps_op->s_ive_op.u4_num_mem_rec = MEM_REC_CNT;
+
+ DEBUG("Num mem recs in fill call : %d\n", ps_op->s_ive_op.u4_num_mem_rec);
+
+ return (status);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Initializes from mem records passed to the codec
+*
+* @par Description:
+* Initializes pointers based on mem records passed
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* api call I/O structures */
+ ih264e_init_ip_t *ps_ip = pv_api_ip;
+ ih264e_init_op_t *ps_op = pv_api_op;
+
+ /* mem records */
+ iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec;
+
+ /* codec variables */
+ codec_t * ps_codec;
+ cfg_params_t *ps_cfg;
+
+ /* frame dimensions */
+ WORD32 max_wd_luma, max_ht_luma;
+ WORD32 max_mb_rows, max_mb_cols, max_mb_cnt;
+
+ /* temp var */
+ WORD32 i;
+ WORD32 status = IV_SUCCESS;
+
+ /* frame dimensions */
+ max_ht_luma = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+ max_wd_luma = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ max_mb_rows = max_ht_luma / MB_SIZE;
+ max_mb_cols = max_wd_luma / MB_SIZE;
+ max_mb_cnt = max_mb_rows * max_mb_cols;
+
+ /* mem records */
+ ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec;
+
+ /* Init mem records */
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CODEC];
+ {
+ ps_codec_obj->pv_codec_handle = ps_mem_rec->pv_base;
+ ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle);
+ }
+
+ /* Note this memset can not be done in init() call, since init will called
+ during reset as well. And calling this during reset will mean all pointers
+ need to reinitialized */
+ memset(ps_codec, 0, sizeof(codec_t));
+
+ /* Set default Config Params */
+ ps_cfg = &ps_codec->s_cfg;
+ ih264e_set_default_params(ps_cfg);
+
+ /* Update config params as per input */
+ ps_cfg->u4_max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ ps_cfg->u4_max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+ ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4;
+ ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4;
+ ps_cfg->u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt;
+ ps_cfg->u4_max_reorder_cnt = ps_ip->s_ive_ip.u4_max_reorder_cnt;
+ ps_cfg->u4_max_level = ps_ip->s_ive_ip.u4_max_level;
+ ps_cfg->e_inp_color_fmt = ps_ip->s_ive_ip.e_inp_color_fmt;
+ ps_cfg->e_recon_color_fmt = ps_ip->s_ive_ip.e_recon_color_fmt;
+ ps_cfg->u4_max_framerate = ps_ip->s_ive_ip.u4_max_framerate;
+ ps_cfg->u4_max_bitrate = ps_ip->s_ive_ip.u4_max_bitrate;
+ ps_cfg->u4_max_num_bframes = ps_ip->s_ive_ip.u4_max_num_bframes;
+ ps_cfg->e_content_type = ps_ip->s_ive_ip.e_content_type;
+ ps_cfg->u4_max_srch_rng_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
+ ps_cfg->u4_max_srch_rng_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
+ ps_cfg->e_slice_mode = ps_ip->s_ive_ip.e_slice_mode;
+ ps_cfg->u4_slice_param = ps_ip->s_ive_ip.u4_slice_param;
+ ps_cfg->e_arch = ps_ip->s_ive_ip.e_arch;
+ ps_cfg->e_soc = ps_ip->s_ive_ip.e_soc;
+ ps_cfg->u4_enable_recon = ps_ip->s_ive_ip.u4_enable_recon;
+ ps_cfg->e_rc_mode = ps_ip->s_ive_ip.e_rc_mode;
+
+ /* Validate params */
+ if ((ps_ip->s_ive_ip.u4_max_level < MIN_LEVEL)
+ || (ps_ip->s_ive_ip.u4_max_level > MAX_LEVEL))
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED;
+ ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_ref_cnt > MAX_REF_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
+ ps_cfg->u4_max_ref_cnt = MAX_REF_CNT;
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_reorder_cnt > MAX_REF_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED;
+ ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_BACKUP];
+ {
+ ps_codec->ps_mem_rec_backup = (iv_mem_rec_t *) ps_mem_rec->pv_base;
+
+ memcpy(ps_codec->ps_mem_rec_backup, ps_mem_rec_base,
+ MEM_REC_CNT * sizeof(iv_mem_rec_t));
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY];
+ {
+ /* temp var */
+ WORD32 size = 0, offset;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ /* base ptr */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* reset size */
+ size = 0;
+
+ /* skip mb run */
+ ps_codec->as_process[i].s_entropy.pi4_mb_skip_run =
+ (void *) (pu1_buf + size);
+ size += sizeof(WORD32);
+ size = ALIGN8(size);
+
+ /* entropy map */
+ ps_codec->as_process[i].s_entropy.pu1_entropy_map =
+ (void *) (pu1_buf + size + max_mb_cols);
+ /* size in bytes to store entropy status of an entire frame */
+ size += (max_mb_cols * max_mb_rows);
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ size += max_mb_cols;
+ size = ALIGN128(size);
+
+ /* bit stream ptr */
+ ps_codec->as_process[i].s_entropy.ps_bitstrm = (void *) (pu1_buf
+ + size);
+ size += sizeof(bitstrm_t);
+ size = ALIGN128(size);
+
+ /* nnz luma */
+ ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma =
+ (void *) (pu1_buf + size);
+ size += (max_mb_cols * 4 * sizeof(UWORD8));
+ size = ALIGN128(size);
+
+ /* nnz chroma */
+ ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr =
+ (void *) (pu1_buf + size);
+ size += (max_mb_cols * 4 * sizeof(UWORD8));
+ size = ALIGN128(size);
+ offset = size;
+ }
+ else
+ {
+ /* base ptr */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* reset size */
+ size = offset;
+
+ /* skip mb run */
+ ps_codec->as_process[i].s_entropy.pi4_mb_skip_run =
+ (void *) (pu1_buf + size);
+ size += sizeof(WORD32);
+ size = ALIGN8(size);
+
+ /* entropy map */
+ ps_codec->as_process[i].s_entropy.pu1_entropy_map =
+ (void *) (pu1_buf + size + max_mb_cols);
+ /* size in bytes to store entropy status of an entire frame */
+ size += (max_mb_cols * max_mb_rows);
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ size += max_mb_cols;
+ size = ALIGN128(size);
+
+ /* bit stream ptr */
+ ps_codec->as_process[i].s_entropy.ps_bitstrm = (void *) (pu1_buf
+ + size);
+ size += sizeof(bitstrm_t);
+ size = ALIGN128(size);
+
+ /* nnz luma */
+ ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma =
+ (void *) (pu1_buf + size);
+ size += (max_mb_cols * 4 * sizeof(UWORD8));
+ size = ALIGN128(size);
+
+ /* nnz chroma */
+ ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr =
+ (void *) (pu1_buf + size);
+ size += (max_mb_cols * 4 * sizeof(UWORD8));
+ size = ALIGN128(size);
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA];
+ {
+ /* temp var */
+ WORD32 size = 0, size_of_row;
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* size of coeff data of 1 mb */
+ size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS;
+
+ /* size of coeff data of 1 row of mb's */
+ size *= max_mb_cols;
+
+ /* align to avoid false sharing */
+ size = ALIGN64(size);
+ size_of_row = size;
+
+ /* size for one full frame */
+ size *= max_mb_rows;
+
+ ps_codec->u4_size_coeff_data = size_of_row;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf;
+ ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data =
+ pu1_buf;
+ }
+ else
+ {
+ ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf + size;
+ ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = pu1_buf
+ + size;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_HEADER_DATA];
+ {
+ /* temp var */
+ WORD32 size, size_of_row;
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* size of header data of 1 mb */
+ size = 40;
+
+ /* size for 1 row of mbs */
+ size = size * max_mb_cols;
+
+ /* align to avoid any false sharing across threads */
+ size = ALIGN64(size);
+ size_of_row = size;
+
+ /* size for one full frame */
+ size *= max_mb_rows;
+
+ ps_codec->u4_size_header_data = size_of_row;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf;
+ ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data =
+ pu1_buf;
+ }
+ else
+ {
+ ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf + size;
+ ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data =
+ pu1_buf + size;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBANK];
+ {
+ /* size of buf mgr struct */
+ WORD32 size = ih264_buf_mgr_size();
+
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* mv buffer mgr */
+ ps_codec->pv_mv_buf_mgr_base = pu1_buf;
+
+ /* mv bank */
+ ps_codec->pv_mv_bank_buf_base = pu1_buf + size;
+ ps_codec->i4_total_mv_bank_size = ps_mem_rec->u4_mem_size - size;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBITS];
+ {
+ /* max srch range x */
+ UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
+
+ /* max srch range y */
+ UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
+
+ /* max srch range */
+ UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y);
+
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* due to subpel */
+ u4_max_srch_range <<= 2;
+
+// /* due to mv on either direction */
+// u4_max_srch_range = (u4_max_srch_range << 1);
+
+ /* due to pred mv + zero */
+ u4_max_srch_range = (u4_max_srch_range << 1) + 1;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ /* me ctxt */
+ me_ctxt_t *ps_mem_ctxt = &(ps_codec->as_process[i].s_me_ctxt);
+
+ /* init at zero mv */
+ ps_mem_ctxt->pu1_mv_bits = pu1_buf + u4_max_srch_range;
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SPS];
+ {
+ ps_codec->ps_sps_base = (sps_t *) ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PPS];
+ {
+ ps_codec->ps_pps_base = (pps_t *) ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_HDR];
+ {
+ ps_codec->ps_slice_hdr_base = ps_mem_rec->pv_base;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base;
+ }
+ else
+ {
+ /* temp var */
+ WORD32 size = MAX_SLICE_HDR_CNT * sizeof(slice_header_t);
+ void *pv_buf = (UWORD8 *) ps_mem_rec->pv_base + size;
+
+ ps_codec->as_process[i].ps_slice_hdr_base = pv_buf;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_AIR_MAP];
+ {
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf;
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf
+ + max_mb_cnt;
+ }
+ }
+
+ ps_codec->pu2_intr_rfrsh_map = (UWORD16 *) (pu1_buf + max_mb_cnt * 2);
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_MAP];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf_ping, *pu1_buf_pong;
+
+ /* init pointer */
+ pu1_buf_ping = ps_mem_rec->pv_base;
+ pu1_buf_pong = pu1_buf_ping + ALIGN64(max_mb_cnt);
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping;
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_slice_idx = pu1_buf_pong;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_THREAD_HANDLE];
+ {
+ WORD32 handle_size = ithread_get_handle_size();
+
+ for (i = 0; i < MAX_PROCESS_THREADS; i++)
+ {
+ ps_codec->apv_proc_thread_handle[i] = (UWORD8 *) ps_mem_rec->pv_base
+ + (i * handle_size);
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CTL_MUTEX];
+ {
+ ps_codec->pv_ctl_mutex = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_MUTEX];
+ {
+ ps_codec->pv_entropy_mutex = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_JOBQ];
+ {
+ ps_codec->pv_proc_jobq_buf = ps_mem_rec->pv_base;
+ ps_codec->i4_proc_jobq_buf_size = ps_mem_rec->u4_mem_size;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_JOBQ];
+ {
+ ps_codec->pv_entropy_jobq_buf = ps_mem_rec->pv_base;
+ ps_codec->i4_entropy_jobq_buf_size = ps_mem_rec->u4_mem_size;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_MAP];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols;
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_proc_map = pu1_buf + total_size
+ + max_mb_cols;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_DBLK_MAP];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ /*Align the memory offsets*/
+ total_size = ALIGN64(total_size);
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols;
+
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_deblk_map = pu1_buf + total_size
+ + max_mb_cols;
+
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ME_MAP];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols;
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_me_map = pu1_buf + total_size
+ + max_mb_cols;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_DPB_MGR];
+ {
+ ps_codec->pv_dpb_mgr = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+ /* size of pred buffer, fwd transform output, temp buffer for inv tra */
+ WORD32 size_pred_luma, size_pred_chroma, size_fwd, size_inv, size_hp;
+
+ /* temp var */
+ WORD32 size = 0;
+
+ /* size to hold intra/inter prediction buffer */
+ size_pred_luma = sizeof(UWORD8) * 16 * 16;
+ size_pred_chroma = sizeof(UWORD8) * 8 * 16;
+
+ /* size to hold fwd transform output */
+ size_fwd = sizeof(WORD16) * SIZE_TRANS_BUFF;
+
+ /* size to hold temporary data during inverse transform */
+ size_inv = sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS;
+
+ /* size to hold half pel plane buffers */
+ size_hp = sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ /* prediction buffer */
+ ps_codec->as_process[i].pu1_pred_mb = (void *) (pu1_buf + size);
+ ps_codec->as_process[i].i4_pred_strd = 16;
+ size += size_pred_luma;
+ size = ALIGN64(size);
+
+ /* prediction buffer */
+ ps_codec->as_process[i].pu1_ref_mb_intra_4x4 = (void *) (pu1_buf
+ + size);
+ size += size_pred_luma;
+ size = ALIGN64(size);
+
+ /* prediction buffer intra 16x16 */
+ ps_codec->as_process[i].pu1_pred_mb_intra_16x16 = (void *) (pu1_buf
+ + size);
+ size += size_pred_luma;
+ size = ALIGN64(size);
+
+ /* prediction buffer intra 16x16 plane*/
+ ps_codec->as_process[i].pu1_pred_mb_intra_16x16_plane =
+ (void *) (pu1_buf + size);
+ size += size_pred_luma;
+ size = ALIGN64(size);
+
+ /* prediction buffer intra chroma*/
+ ps_codec->as_process[i].pu1_pred_mb_intra_chroma = (void *) (pu1_buf
+ + size);
+ size += size_pred_chroma;
+ size = ALIGN64(size);
+
+ /* prediction buffer intra chroma plane*/
+ ps_codec->as_process[i].pu1_pred_mb_intra_chroma_plane =
+ (void *) (pu1_buf + size);
+ size += size_pred_chroma;
+ size = ALIGN64(size);
+
+ /* Fwd transform output */
+ ps_codec->as_process[i].pi2_res_buf = (void *) (pu1_buf + size);
+ ps_codec->as_process[i].i4_res_strd = 16;
+ size += size_fwd;
+ size = ALIGN64(size);
+
+ /* Fwd transform output */
+ ps_codec->as_process[i].pi2_res_buf_intra_4x4 = (void *) (pu1_buf
+ + size);
+ size += size_fwd;
+ size = ALIGN64(size);
+
+ /* scratch buffer used during inverse transform */
+ ps_codec->as_process[i].pv_scratch_buff = (void *) (pu1_buf + size);
+ size += size_inv;
+ size = ALIGN64(size);
+
+ /* Buffers for holding half_x , half_y and half_xy values */
+ ps_codec->as_process[i].pu1_half_x = (void *) (pu1_buf + size);
+ size += size_hp;
+ size = ALIGN64(size);
+
+ ps_codec->as_process[i].pu1_half_y = (void *) (pu1_buf + size);
+ size += size_hp;
+ size = ALIGN64(size);
+
+ ps_codec->as_process[i].pu1_half_xy = (void *) (pu1_buf + size);
+ size += size_hp;
+ size = ALIGN64(size);
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_QUANT_PARAM];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+ /* size of qp, threshold matrix, fwd scaling list for one plane */
+ WORD32 size_quant_param, size_thres_mat, size_fwd_weight_mat,
+ size_satqd_weight_mat;
+
+ /* temp var */
+ WORD32 total_size = 0;
+
+ /* size of quantization parameter list of 1 plane */
+ size_quant_param = ALIGN64(sizeof(quant_params_t));
+
+ /* size of threshold matrix for quantization
+ * (assuming the transform_8x8_flag is disabled).
+ * for 1 plane */
+ size_thres_mat = ALIGN64(sizeof(WORD16) * 4 * 4);
+
+ /* size of forward weight matrix for quantization
+ * (assuming the transform_8x8_flag is disabled).
+ * for 1 plane */
+ size_fwd_weight_mat = ALIGN64(sizeof(WORD16) * 4 * 4);
+
+ /* size of SATQD matrix*/
+ size_satqd_weight_mat = ALIGN64(sizeof(UWORD16) * 9);
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ quant_params_t **ps_qp_params = ps_codec->as_process[i].ps_qp_params;
+
+ /* quantization param structure */
+ ps_qp_params[0] = (quant_params_t *) (pu1_buf + total_size);
+ total_size = total_size + size_quant_param;
+ ps_qp_params[1] = (quant_params_t *) (pu1_buf + total_size);
+ total_size = total_size + size_quant_param;
+ ps_qp_params[2] = (quant_params_t *) (pu1_buf + total_size);
+ total_size = total_size + size_quant_param;
+
+ /* threshold matrix for quantization */
+ ps_qp_params[0]->pu2_thres_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_thres_mat;
+ ps_qp_params[1]->pu2_thres_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_thres_mat;
+ ps_qp_params[2]->pu2_thres_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_thres_mat;
+
+ /* fwd weight matrix */
+ ps_qp_params[0]->pu2_weigh_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_fwd_weight_mat;
+ ps_qp_params[1]->pu2_weigh_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_fwd_weight_mat;
+ ps_qp_params[2]->pu2_weigh_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_fwd_weight_mat;
+
+ /* threshold matrix for SATQD */
+ ps_qp_params[0]->pu2_sad_thrsh = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_satqd_weight_mat;
+ ps_qp_params[1]->pu2_sad_thrsh = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_satqd_weight_mat;
+ ps_qp_params[2]->pu2_sad_thrsh = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_satqd_weight_mat;
+
+ total_size = ALIGN128(total_size);
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_TOP_ROW_SYN_INFO];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0, size_csbp, size_intra_modes, size_mv;
+
+ /* pointer to buffer */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* size in bytes to store 1 row of mb_info_t */
+ /* one additional mb, to avoid checking end of row condition */
+ size_csbp = (max_mb_cols + 1) * sizeof(mb_info_t);
+
+ /* size in bytes to store 1 row of intra macroblock sub modes */
+ size_intra_modes = max_mb_cols * sizeof(UWORD8) * 16;
+
+ /* size in bytes to store 1 row + 1 of enc_pu_t */
+ /* one additional mb, to avoid checking end of row condition */
+ size_mv = (max_mb_cols + 1) * sizeof(enc_pu_t);
+
+ /* total size per proc ctxt */
+ total_size = size_csbp + size_intra_modes + size_mv;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base =
+ (mb_info_t *) pu1_buf;
+ ps_codec->as_process[i].pu1_top_mb_intra_modes_base = pu1_buf
+ + size_csbp;
+ ps_codec->as_process[i].ps_top_row_pu_base =
+ (enc_pu_t *) (pu1_buf + size_csbp
+ + size_intra_modes);
+ }
+ else
+ {
+ ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base =
+ (mb_info_t *) (pu1_buf + total_size);
+ ps_codec->as_process[i].pu1_top_mb_intra_modes_base = pu1_buf
+ + total_size + size_csbp;
+ ps_codec->as_process[i].ps_top_row_pu_base =
+ (enc_pu_t *) (pu1_buf + total_size + size_csbp
+ + size_intra_modes);
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_BS_QP];
+ {
+ UWORD8 *pu1_buf_ping, *pu1_buf_pong;
+
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to store vertical edge bs, horizontal edge bs and qp of every mb*/
+ WORD32 vert_bs_size, horz_bs_size, qp_size;
+
+ /* vertical edge bs = total number of vertical edges * number of bytes per each edge */
+ /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0),
+ * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+ vert_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+ /* horizontal edge bs = total number of horizontal edges * number of bytes per each edge */
+ /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0),
+ * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+ horz_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+ /* qp of each mb requires 1 byte */
+ qp_size = ALIGN64(max_mb_cnt);
+
+ /* total size */
+ total_size = vert_bs_size + horz_bs_size + qp_size;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ pu1_buf_ping = (UWORD8 *) ps_mem_rec->pv_base;
+
+ /* vertical edge bs storage space */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_vert_bs =
+ (UWORD32 *) pu1_buf_ping;
+ pu1_buf_ping += vert_bs_size;
+
+ /* horizontal edge bs storage space */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_horz_bs =
+ (UWORD32 *) pu1_buf_ping;
+ pu1_buf_ping += horz_bs_size;
+
+ /* qp */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp =
+ (UWORD8 *) pu1_buf_ping;
+ pu1_buf_ping += qp_size;
+ }
+ else
+ {
+ pu1_buf_pong = (UWORD8 *) ps_mem_rec->pv_base;
+ pu1_buf_pong += total_size;
+
+ /* vertical edge bs storage space */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_vert_bs =
+ (UWORD32 *) pu1_buf_pong;
+ pu1_buf_pong += vert_bs_size;
+
+ /* horizontal edge bs storage space */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_horz_bs =
+ (UWORD32 *) pu1_buf_pong;
+ pu1_buf_pong += horz_bs_size;
+
+ /* qp */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp =
+ (UWORD8 *) pu1_buf_pong;
+ pu1_buf_pong += qp_size;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_INP_PIC];
+ {
+ ps_codec->pv_inp_buf_mgr_base = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_OUT];
+ {
+ ps_codec->pv_out_buf_mgr_base = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CSC];
+ {
+ ps_codec->pu1_y_csc_buf_base = ps_mem_rec->pv_base;
+ ps_codec->pu1_uv_csc_buf_base = (UWORD8 *) ps_mem_rec->pv_base
+ + (max_ht_luma * max_wd_luma);
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_REF_PIC];
+ {
+ /* size of buf mgr struct */
+ WORD32 size = ih264_buf_mgr_size();
+
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* pic buffer mgr */
+ ps_codec->pv_ref_buf_mgr_base = pu1_buf;
+
+ /* picture bank */
+ ps_codec->pv_pic_buf_base = pu1_buf + size;
+ ps_codec->i4_total_pic_buf_size = ps_mem_rec->u4_mem_size - size;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB];
+ {
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* size of nmb ctxt */
+ WORD32 size = MAX_NMB * sizeof(mb_info_nmb_t);
+
+ UWORD32 nmb_cntr, subpel_buf_size;
+
+ /* init nmb info structure pointer in all proc ctxts */
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ ps_codec->as_process[i].ps_nmb_info = (mb_info_nmb_t *) (pu1_buf);
+
+ pu1_buf += size;
+ }
+
+ subpel_buf_size = MB_SIZE * MB_SIZE * sizeof(UWORD8);
+
+ /* adjusting pointers for nmb halfpel buffer */
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ mb_info_nmb_t* ps_mb_info_nmb =
+ &ps_codec->as_process[i].ps_nmb_info[0];
+
+ for (nmb_cntr = 0; nmb_cntr < MAX_NMB; nmb_cntr++)
+ {
+ ps_mb_info_nmb[nmb_cntr].pu1_best_sub_pel_buf = pu1_buf;
+
+ pu1_buf = pu1_buf + subpel_buf_size;
+
+ ps_mb_info_nmb[nmb_cntr].u4_bst_spel_buf_strd = MB_SIZE;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_RC];
+ {
+ ih264e_get_rate_control_mem_tab(&ps_codec->s_rate_control, ps_mem_rec,
+ USE_BASE);
+ }
+
+ /* init codec ctxt */
+ status = ih264e_init(ps_codec);
+
+ return status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Retrieves mem records passed to the codec
+*
+* @par Description:
+* Retrieves mem recs passed during init
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_retrieve_memrec(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* codec ctxt */
+ codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
+
+ /* ctrl call I/O structures */
+ ih264e_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_retrieve_mem_rec_op_t *ps_op = pv_api_op;
+
+ if (ps_codec->i4_init_done != 1)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR;
+ ps_op->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE;
+ return IV_FAIL;
+ }
+
+ /* join threads upon at end of sequence */
+ ih264e_join_threads(ps_codec);
+
+ /* collect list of memory records used by the encoder library */
+ memcpy(ps_ip->s_ive_ip.ps_mem_rec, ps_codec->ps_mem_rec_backup,
+ MEM_REC_CNT * (sizeof(iv_mem_rec_t)));
+ ps_op->s_ive_op.u4_num_mem_rec_filled = MEM_REC_CNT;
+
+ /* clean up mutex memory */
+ ih264_list_free(ps_codec->pv_entropy_jobq);
+ ih264_list_free(ps_codec->pv_proc_jobq);
+ ithread_mutex_destroy(ps_codec->pv_ctl_mutex);
+ ithread_mutex_destroy(ps_codec->pv_entropy_mutex);
+
+
+ ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_mv_buf_mgr);
+ ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr);
+ ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_inp_buf_mgr);
+ ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_out_buf_mgr);
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets the encoder in flush mode.
+*
+* @par Description:
+* Sets the encoder in flush mode
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks This call has no real effect on encoder
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_set_flush_mode(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ UNUSED(pv_api_ip);
+ /* codec ctxt */
+ codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
+
+ /* ctrl call I/O structures */
+ ih264e_ctl_flush_op_t *ps_ctl_op = pv_api_op;
+
+ ps_ctl_op->s_ive_op.u4_error_code = 0;
+
+ /* signal flush frame control call */
+ ps_codec->i4_flush_mode = 1;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets encoder buffer requirements
+*
+* @par Description:
+* Gets the encoder buffer requirements. Basing on max width and max height
+* configuration settings, this routine, computes the sizes of necessary input,
+* output buffers returns this info to callee.
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_get_buf_info(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ UNUSED(ps_codec_obj);
+ /* ctrl call I/O structures */
+ ih264e_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_getbufinfo_op_t *ps_op = pv_api_op;
+
+ /* temp var */
+ WORD32 wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ WORD32 ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+ WORD32 i;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ /* Number of components in input buffers required for codec &
+ * Minimum sizes of each component in input buffer required */
+ if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420P)
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] = (wd >> 1) * (ht >> 1);
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = (wd >> 1) * (ht >> 1);
+ }
+ else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_422ILE)
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_422ILE_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] =
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+ }
+ else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGB_565)
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGB565_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] =
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+ }
+ else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGBA_8888)
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGBA8888_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 4;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] =
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+ }
+ else if ((ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_UV)
+ || (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_VU))
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420SP_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] = wd * (ht >> 1);
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+ }
+
+ /* Number of components in output buffers required for codec &
+ * Minimum sizes of each component in output buffer required */
+ ps_op->s_ive_op.u4_out_comp_cnt = MIN_BITS_BUFS_COMP;
+
+ for (i = 0; i < (WORD32) ps_op->s_ive_op.u4_out_comp_cnt; i++)
+ {
+ ps_op->s_ive_op.au4_min_out_buf_size[i] = (wd * ht * 3) >> 1;
+ }
+
+ ps_op->s_ive_op.u4_min_inp_bufs = MIN_INP_BUFS;
+ ps_op->s_ive_op.u4_min_out_bufs = MIN_OUT_BUFS;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets the picture dimensions
+*
+* @par Description:
+* Sets width, height, display width, display height and strides
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_dimensions(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_dimensions_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_wd = ALIGN16(ps_ip->s_ive_ip.u4_wd);
+ ps_cfg->u4_ht = ALIGN16(ps_ip->s_ive_ip.u4_ht);
+ ps_cfg->u4_strd = ps_ip->s_ive_ip.u4_strd;
+ ps_cfg->i4_wd_mbs = ps_cfg->u4_wd >> 4;
+ ps_cfg->i4_ht_mbs = ps_cfg->u4_ht >> 4;
+ ps_cfg->u4_disp_wd = ps_ip->s_ive_ip.u4_wd;
+ ps_cfg->u4_disp_ht = ps_ip->s_ive_ip.u4_ht;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets source and target frame rates
+*
+* @par Description:
+* Sets source and target frame rates
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_frame_rate(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_frame_rate_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_src_frame_rate = ps_ip->s_ive_ip.u4_src_frame_rate;
+ ps_cfg->u4_tgt_frame_rate = ps_ip->s_ive_ip.u4_tgt_frame_rate;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets target bit rate
+*
+* @par Description:
+* Sets target bit rate
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_bit_rate(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_bitrate_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_target_bitrate = ps_ip->s_ive_ip.u4_target_bitrate;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets frame type
+*
+* @par Description:
+* Sets frame type
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks not a sticky tag
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_frame_type(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_frame_type_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->e_frame_type = ps_ip->s_ive_ip.e_frame_type;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets quantization params
+*
+* @par Description:
+* Sets the max, min and default qp for I frame, P frame and B frame
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_qp(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_qp_ip_t *ps_set_qp_ip = pv_api_ip;
+ ih264e_ctl_set_qp_op_t *ps_set_qp_op = pv_api_op;
+
+ ps_set_qp_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_i_qp_max = ps_set_qp_ip->s_ive_ip.u4_i_qp_max;
+ ps_cfg->u4_i_qp_min = ps_set_qp_ip->s_ive_ip.u4_i_qp_min;
+ ps_cfg->u4_i_qp = ps_set_qp_ip->s_ive_ip.u4_i_qp;
+ ps_cfg->u4_p_qp_max = ps_set_qp_ip->s_ive_ip.u4_p_qp_max;
+ ps_cfg->u4_p_qp_min = ps_set_qp_ip->s_ive_ip.u4_p_qp_min;
+ ps_cfg->u4_p_qp = ps_set_qp_ip->s_ive_ip.u4_p_qp;
+ ps_cfg->u4_b_qp_max = ps_set_qp_ip->s_ive_ip.u4_b_qp_max;
+ ps_cfg->u4_b_qp_min = ps_set_qp_ip->s_ive_ip.u4_b_qp_min;
+ ps_cfg->u4_b_qp = ps_set_qp_ip->s_ive_ip.u4_b_qp;
+
+ ps_cfg->u4_timestamp_high = ps_set_qp_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_set_qp_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets encoding mode
+*
+* @par Description:
+* Sets encoding mode
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_enc_mode(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_enc_mode_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->e_enc_mode = ps_ip->s_ive_ip.e_enc_mode;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets vbv parameters
+*
+* @par Description:
+* Sets vbv parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_vbv_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_vbv_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_vbv_buf_size = ps_ip->s_ive_ip.u4_vbv_buf_size;
+ ps_cfg->u4_vbv_buffer_delay = ps_ip->s_ive_ip.u4_vbv_buffer_delay;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets AIR parameters
+*
+* @par Description:
+* Sets AIR parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_air_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_air_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_air_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->e_air_mode = ps_ip->s_ive_ip.e_air_mode;
+ ps_cfg->u4_air_refresh_period = ps_ip->s_ive_ip.u4_air_refresh_period;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets motion estimation parameters
+*
+* @par Description:
+* Sets motion estimation parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_me_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_me_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_me_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_enable_hpel = ps_ip->s_ive_ip.u4_enable_hpel;
+ ps_cfg->u4_enable_qpel = ps_ip->s_ive_ip.u4_enable_qpel;
+ ps_cfg->u4_enable_fast_sad = ps_ip->s_ive_ip.u4_enable_fast_sad;
+ ps_cfg->u4_enable_alt_ref = ps_ip->s_ive_ip.u4_enable_alt_ref;
+ ps_cfg->u4_srch_rng_x = ps_ip->s_ive_ip.u4_srch_rng_x;
+ ps_cfg->u4_srch_rng_y = ps_ip->s_ive_ip.u4_srch_rng_y;
+ ps_cfg->u4_me_speed_preset = ps_ip->s_ive_ip.u4_me_speed_preset;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets Intra/Inter Prediction estimation parameters
+*
+* @par Description:
+* Sets Intra/Inter Prediction estimation parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_ipe_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_ipe_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_enable_intra_4x4 = ps_ip->s_ive_ip.u4_enable_intra_4x4;
+ ps_cfg->u4_enc_speed_preset = ps_ip->s_ive_ip.u4_enc_speed_preset;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets GOP parameters
+*
+* @par Description:
+* Sets GOP parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_gop_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_gop_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_i_frm_interval = ps_ip->s_ive_ip.u4_i_frm_interval;
+ ps_cfg->u4_idr_frm_interval = ps_ip->s_ive_ip.u4_idr_frm_interval;
+ ps_cfg->u4_num_b_frames = ps_ip->s_ive_ip.u4_num_b_frames;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets profile parameters
+*
+* @par Description:
+* Sets profile parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_profile_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_profile_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->e_profile = ps_ip->s_ive_ip.e_profile;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets disable deblock level
+*
+* @par Description:
+* Sets disable deblock level. Level 0 means no disabling and level 4 means
+* disable completely. 1, 2, 3 are intermediate levels that control amount
+* of deblocking done.
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264_set_deblock_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_deblock_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_disable_deblock_level = ps_ip->s_ive_ip.u4_disable_deblock_level;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets number of cores
+*
+* @par Description:
+* Sets number of cores
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks The number of encoder threads is limited to MAX_PROCESS_THREADS
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_set_num_cores(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_num_cores_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_num_cores = MIN(ps_ip->s_ive_ip.u4_num_cores, MAX_PROCESS_THREADS);
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Resets encoder state
+*
+* @par Description:
+* Resets encoder state by calling ih264e_init()
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_reset(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ UNUSED(pv_api_ip);
+ /* codec ctxt */
+ codec_t * ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle);
+
+ /* ctrl call I/O structures */
+ ih264e_ctl_reset_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_codec != NULL)
+ {
+ ih264e_init(ps_codec);
+ }
+ else
+ {
+ ps_op->s_ive_op.u4_error_code = IH264E_INIT_NOT_DONE;
+ }
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Codec control call
+*
+* @par Description:
+* Codec control call which in turn calls appropriate calls based on sub-command
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_ctl(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* codec ctxt */
+ codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
+
+ /* ctrl call I/O structures */
+ ih264e_ctl_setdefault_ip_t *ps_ctl_ip = pv_api_ip;
+ ih264e_ctl_setdefault_op_t *ps_ctl_op = pv_api_op;
+
+ /* ctrl call sub cmd */
+ IVE_CONTROL_API_COMMAND_TYPE_T sub_cmd = ps_ctl_ip->s_ive_ip.e_sub_cmd;
+
+ /* error status */
+ IV_STATUS_T ret = 0;
+
+ /* temp var */
+ WORD32 i;
+ cfg_params_t *ps_cfg = NULL;
+
+ /* control call is for configuring encoding params, this is not to be called
+ * before a successful init call */
+ if (ps_codec->i4_init_done != 1)
+ {
+ ps_ctl_op->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR;
+ ps_ctl_op->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE;
+ return IV_FAIL;
+ }
+
+ /* make it thread safe */
+ ithread_mutex_lock(ps_codec->pv_ctl_mutex);
+
+ /* find a free config param set to hold current parameters */
+ for (i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++)
+ {
+ if (0 == ps_codec->as_cfg[i].u4_is_valid)
+ {
+ ps_cfg = &ps_codec->as_cfg[i];
+ break;
+ }
+ }
+
+ /* If all are invalid, then start overwriting from the head config params */
+ if (NULL == ps_cfg)
+ {
+ ps_cfg = &ps_codec->as_cfg[0];
+ }
+
+ ps_cfg->u4_is_valid = 1;
+
+ ps_cfg->e_cmd = sub_cmd;
+
+ switch (sub_cmd)
+ {
+ case IVE_CMD_CTL_SET_DIMENSIONS:
+ ret = ih264e_set_dimensions(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_FRAMERATE:
+ ret = ih264e_set_frame_rate(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_BITRATE:
+ ret = ih264e_set_bit_rate(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_FRAMETYPE:
+ ret = ih264e_set_frame_type(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_QP:
+ ret = ih264e_set_qp(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_ENC_MODE:
+ ret = ih264e_set_enc_mode(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_VBV_PARAMS:
+ ret = ih264e_set_vbv_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_AIR_PARAMS:
+ ret = ih264_set_air_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_ME_PARAMS:
+ ret = ih264_set_me_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_IPE_PARAMS:
+ ret = ih264_set_ipe_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_GOP_PARAMS:
+ ret = ih264_set_gop_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_PROFILE_PARAMS:
+ ret = ih264_set_profile_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_DEBLOCK_PARAMS:
+ ret = ih264_set_deblock_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_RESET:
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ ret = ih264e_reset(ps_codec_obj, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_CTL_SETDEFAULT:
+ {
+ /* ctrl call I/O structures */
+ ih264e_ctl_setdefault_op_t *ps_op = pv_api_op;
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ /* error status */
+ ret = ih264e_set_default_params(ps_cfg);
+
+ ps_op->s_ive_op.u4_error_code = ret;
+
+ break;
+ }
+
+ case IVE_CMD_CTL_FLUSH:
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ ret = ih264e_set_flush_mode(ps_codec_obj, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_CTL_GETBUFINFO:
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ ret = ih264e_get_buf_info(ps_codec_obj, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_CTL_GETVERSION:
+ {
+ /* ctrl call I/O structures */
+ ih264e_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_getversioninfo_op_t *ps_op = pv_api_op;
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ /* error status */
+ ps_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+ if (ps_ip->s_ive_ip.u4_version_bufsize <= 0)
+ {
+ ps_op->s_ive_op.u4_error_code =
+ IH264E_CXA_VERS_BUF_INSUFFICIENT;
+ ret = IV_FAIL;
+ }
+ else
+ {
+ ret = ih264e_get_version((CHAR *) ps_ip->s_ive_ip.pu1_version,
+ ps_ip->s_ive_ip.u4_version_bufsize);
+
+ if (ret != IV_SUCCESS)
+ {
+ ps_op->s_ive_op.u4_error_code =
+ IH264E_CXA_VERS_BUF_INSUFFICIENT;
+ ret = IV_FAIL;
+ }
+ }
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_NUM_CORES:
+ ret = ih264e_set_num_cores(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ default:
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ DEBUG("Warning !! unrecognized control api command \n");
+ break;
+ }
+
+ ithread_mutex_unlock(ps_codec->pv_ctl_mutex);
+
+ return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Codec entry point function. All the function calls to the codec are done
+* using this function with different values specified in command
+*
+* @par Description:
+* Arguments are tested for validity and then based on the command
+* appropriate function is called
+*
+* @param[in] ps_handle
+* API level handle for codec
+*
+* @param[in] pv_api_ip
+* Input argument structure
+*
+* @param[out] pv_api_op
+* Output argument structure
+*
+* @returns error_status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IV_STATUS_T ih264e_api_function(iv_obj_t *ps_handle,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* api command */
+ WORD32 command = IV_CMD_NA;
+
+ /* error status */
+ IV_STATUS_T e_status;
+ WORD32 ret;
+
+ /* tmp var */
+ WORD32 *pu4_ptr_cmd = (WORD32 *) pv_api_ip;
+
+ /* validate input / output structures */
+ e_status = api_check_struct_sanity(ps_handle, pv_api_ip, pv_api_op);
+
+ if (e_status != IV_SUCCESS)
+ {
+ DEBUG("error code = %d\n", *((UWORD32 *)pv_api_op + 1));
+ return IV_FAIL;
+ }
+
+ pu4_ptr_cmd++;
+
+ command = *pu4_ptr_cmd;
+
+ switch (command)
+ {
+ case IV_CMD_GET_NUM_MEM_REC:
+ ret = ih264e_get_num_rec(pv_api_ip, pv_api_op);
+ break;
+
+ case IV_CMD_FILL_NUM_MEM_REC:
+ ret = ih264e_fill_num_mem_rec(pv_api_ip, pv_api_op);
+ break;
+
+ case IV_CMD_INIT:
+ ret = ih264e_init_mem_rec(ps_handle, pv_api_ip, pv_api_op);
+ break;
+
+ case IV_CMD_RETRIEVE_MEMREC:
+ ret = ih264e_retrieve_memrec(ps_handle, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_VIDEO_CTL:
+ ret = ih264e_ctl(ps_handle, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_VIDEO_ENCODE:
+ ret = ih264e_encode(ps_handle, pv_api_ip, pv_api_op);
+ break;
+
+ default:
+ ret = IV_FAIL;
+ break;
+ }
+
+ return (IV_STATUS_T) ret;
+}
diff --git a/encoder/ih264e_bitstream.c b/encoder/ih264e_bitstream.c
new file mode 100755
index 0000000..e5bfbe4
--- /dev/null
+++ b/encoder/ih264e_bitstream.c
@@ -0,0 +1,472 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_bitstream.c
+*
+* @brief
+* This file contains function definitions related to bitstream generation
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_bitstrm_init()
+* - ih264e_put_bits()
+* - ih264e_put_bit()
+* - ih264e_put_rbsp_trailing_bits()
+* - ih264e_put_uev()
+* - ih264e_put_sev()
+* - ih264e_put_nal_start_code_prefix()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_debug.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Initializes the encoder bitstream engine
+*
+* @par Description
+* This routine needs to be called at start of slice/frame encode
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] p1_bitstrm_buf
+* bitstream buffer pointer where the encoded stream is generated in byte order
+*
+* @param[in] u4_max_bitstrm_size
+* indicates maximum bitstream buffer size. (in bytes)
+* If actual stream size exceeds the maximum size, encoder should
+* 1. Not corrupt data beyond u4_max_bitstrm_size bytes
+* 2. Report an error back to application indicating overflow
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_bitstrm_init(bitstrm_t *ps_bitstrm,
+ UWORD8 *pu1_bitstrm_buf,
+ UWORD32 u4_max_bitstrm_size)
+{
+ ps_bitstrm->pu1_strm_buffer = pu1_bitstrm_buf;
+ ps_bitstrm->u4_max_strm_size = u4_max_bitstrm_size;
+
+ /* Default init values for other members of bitstream context */
+ ps_bitstrm->u4_strm_buf_offset = 0;
+ ps_bitstrm->u4_cur_word = 0;
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE;
+ ps_bitstrm->i4_zero_bytes_run = 0;
+
+ return(IH264E_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief puts a code with specified number of bits into the bitstream
+*
+* @par Description
+* inserts code_len number of bits from lsb of code_val into the
+* bitstream. updates context members like u4_cur_word, u4_strm_buf_offset and
+* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @param[in] code_len
+* indicates code length (in bits) of code_val that would be inserted in
+* bitstream buffer size. Range of length[1:WORD_SIZE]
+*
+* @remarks Assumptions: all bits from bit position code_len to msb of
+* code_val shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bits(bitstrm_t *ps_bitstrm,
+ UWORD32 u4_code_val,
+ WORD32 code_len)
+{
+ UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word;
+ WORD32 bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw;
+
+
+ /* check assumptions made in the module */
+ ASSERT(code_len > 0 && code_len <= WORD_SIZE);
+
+ if(code_len < WORD_SIZE)
+ ASSERT((u4_code_val >> code_len) == 0);
+
+
+ /* sanity check on the bitstream engine state */
+ ASSERT(bits_left_in_cw > 0 && bits_left_in_cw <= WORD_SIZE);
+
+ ASSERT(ps_bitstrm->i4_zero_bytes_run <= EPB_ZERO_BYTES);
+
+ ASSERT(ps_bitstrm->pu1_strm_buffer != NULL);
+
+
+ if(bits_left_in_cw > code_len)
+ {
+ /*******************************************************************/
+ /* insert the code in local bitstream word and return */
+ /* code is inserted in position of bits left (post decrement) */
+ /*******************************************************************/
+ bits_left_in_cw -= code_len;
+ u4_cur_word |= (u4_code_val << bits_left_in_cw);
+
+ ps_bitstrm->u4_cur_word = u4_cur_word;
+ ps_bitstrm->i4_bits_left_in_cw = bits_left_in_cw;
+
+ return(IH264E_SUCCESS);
+ }
+ else
+ {
+ /********************************************************************/
+ /* 1. insert partial code corresponding to bits left in cur word */
+ /* 2. flush all the bits of cur word to bitstream */
+ /* 3. insert emulation prevention bytes while flushing the bits */
+ /* 4. insert remaining bits of code starting from msb of cur word */
+ /* 5. update bitsleft in current word and stream buffer offset */
+ /********************************************************************/
+ UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+
+ UWORD32 u4_max_strm_size = ps_bitstrm->u4_max_strm_size;
+
+ WORD32 zero_run = ps_bitstrm->i4_zero_bytes_run;
+
+ UWORD8* pu1_strm_buf = ps_bitstrm->pu1_strm_buffer;
+
+ WORD32 i, rem_bits = (code_len - bits_left_in_cw);
+
+
+ /*********************************************************************/
+ /* Bitstream overflow check */
+ /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+ /*********************************************************************/
+ if((u4_strm_buf_offset + (WORD_SIZE>>3)) >= u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return(IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* insert parital code corresponding to bits left in cur word */
+ u4_cur_word |= u4_code_val >> rem_bits;
+
+ for(i = WORD_SIZE; i > 0; i -= 8)
+ {
+ /* flush the bits in cur word byte by byte and copy to stream */
+ UWORD8 u1_next_byte = (u4_cur_word >> (i-8)) & 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run);
+ }
+
+ /* insert the remaining bits from code val into current word */
+ u4_cur_word = rem_bits ? (u4_code_val << (WORD_SIZE - rem_bits)) : 0;
+
+ /* update the state variables and return success */
+ ps_bitstrm->u4_cur_word = u4_cur_word;
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE - rem_bits;
+ ps_bitstrm->i4_zero_bytes_run = zero_run;
+ ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+ return (IH264E_SUCCESS);
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief inserts a 1-bit code into the bitstream
+*
+* @par Description
+* inserts 1bit lsb of code_val into the bitstream
+* updates context members like u4_cur_word, u4_strm_buf_offset and
+* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @remarks Assumptions: all bits from bit position 1 to msb of code_val
+* shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bit(bitstrm_t *ps_bitstrm, UWORD32 u4_code_val)
+{
+ /* call the put bits function for 1 bit and return */
+ return(ih264e_put_bits(ps_bitstrm, u4_code_val, 1));
+}
+
+/**
+******************************************************************************
+*
+* @brief inserts rbsp trailing bits at the end of stream buffer (NAL)
+*
+* @par Description
+* inserts rbsp trailing bits, updates context members like u4_cur_word and
+* i4_bits_left_in_cw and flushes the same in the bitstream buffer. If the
+* total words (u4_strm_buf_offset) exceeds max available size
+* (u4_max_strm_size), returns error without corrupting data beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_rbsp_trailing_bits(bitstrm_t *ps_bitstrm)
+{
+ WORD32 i;
+ UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word;
+ WORD32 bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw;
+ WORD32 bytes_left_in_cw = (bits_left_in_cw - 1) >> 3;
+
+ UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+ UWORD32 u4_max_strm_size = ps_bitstrm->u4_max_strm_size;
+ WORD32 zero_run = ps_bitstrm->i4_zero_bytes_run;
+ UWORD8* pu1_strm_buf = ps_bitstrm->pu1_strm_buffer;
+
+ /*********************************************************************/
+ /* Bitstream overflow check */
+ /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+ /*********************************************************************/
+ if((u4_strm_buf_offset + (WORD_SIZE>>3) - bytes_left_in_cw) >=
+ u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return(IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* insert a 1 at the end of current word and flush all the bits */
+ u4_cur_word |= (1 << (bits_left_in_cw - 1));
+
+ /* get the bits to be inserted in msbdb of the word */
+ //u4_cur_word <<= (WORD_SIZE - bytes_left_in_cw + 1);
+
+ for(i = WORD_SIZE; i > (bytes_left_in_cw*8); i -= 8)
+ {
+ /* flush the bits in cur word byte by byte and copy to stream */
+ UWORD8 u1_next_byte = (u4_cur_word >> (i-8)) & 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run);
+ }
+
+ /* update the stream offset */
+ ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+
+ /* Default init values for scratch variables of bitstream context */
+ ps_bitstrm->u4_cur_word = 0;
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE;
+ ps_bitstrm->i4_zero_bytes_run = 0;
+
+ return (IH264E_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a unsigned integer into bitstream
+*
+* @par Description
+* computes uev code for given syntax element and inserts the same into
+* bitstream by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_num
+* unsigned integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_uev(bitstrm_t *ps_bitstrm, UWORD32 u4_code_num)
+{
+ UWORD32 u4_bit_str, u4_range;
+ IH264E_ERROR_T e_error;
+
+ /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */
+ u4_bit_str = u4_code_num + 1;
+
+ /* get range of the bit string and put using put_bits() */
+ GETRANGE(u4_range, u4_bit_str);
+
+ e_error = ih264e_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1));
+
+ return(e_error);
+}
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a signed integer into bitstream
+*
+* @par Description
+* computes sev code for given syntax element and inserts the same into
+* bitstream by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] syntax_elem
+* signed integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_sev(bitstrm_t *ps_bitstrm, WORD32 syntax_elem)
+{
+ UWORD32 u4_code_num, u4_bit_str, u4_range;
+ IH264E_ERROR_T e_error;
+
+ /************************************************************************/
+ /* convert the codenum to exp-golomb bit code for signed syntax element */
+ /* See Table9-2 and Table 9-3 of standard JCTVC-J1003_d7 */
+ /************************************************************************/
+ if(syntax_elem <= 0)
+ {
+ /* codeNum for non-positive integer = 2*abs(x) : Table9-3 */
+ u4_code_num = ((-syntax_elem) << 1);
+ }
+ else
+ {
+ /* codeNum for positive integer = 2x-1 : Table9-3 */
+ u4_code_num = (syntax_elem << 1) - 1;
+ }
+
+ /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */
+ u4_bit_str = u4_code_num + 1;
+
+ /* get range of the bit string and put using put_bits() */
+ GETRANGE(u4_range, u4_bit_str);
+
+ e_error = ih264e_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1));
+
+ return(e_error);
+}
+
+/**
+******************************************************************************
+*
+* @brief insert NAL start code prefix (0x000001) into bitstream with an option
+* of inserting leading_zero_8bits (which makes startcode prefix as 0x00000001)
+*
+* @par Description
+* Although start code prefix could have been put by calling ih264e_put_bits(),
+* ih264e_put_nal_start_code_prefix() is specially added to make sure emulation
+* prevention insertion is not done for the NAL start code prefix which will
+* surely happen otherwise by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] insert_leading_zero_8bits
+* flag indicating if one more zero bytes needs to prefixed before start code
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_nal_start_code_prefix(bitstrm_t *ps_bitstrm,
+ WORD32 insert_leading_zero_8bits)
+{
+ UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+ UWORD8* pu1_strm_buf = ps_bitstrm->pu1_strm_buffer;
+
+ /* Bitstream buffer overflow check assuming worst case of 4 bytes */
+ if((u4_strm_buf_offset + 4) >= ps_bitstrm->u4_max_strm_size)
+ {
+ return(IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* Insert leading zero 8 bits conditionally */
+ if(insert_leading_zero_8bits)
+ {
+ pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+ u4_strm_buf_offset++;
+ }
+
+ /* Insert NAL start code prefix 0x00 00 01 */
+ pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+ u4_strm_buf_offset++;
+
+ pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+ u4_strm_buf_offset++;
+
+ pu1_strm_buf[u4_strm_buf_offset] = 0x01;
+ u4_strm_buf_offset++;
+
+ /* update the stream offset */
+ ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+
+ return (IH264E_SUCCESS);
+}
+
diff --git a/encoder/ih264e_bitstream.h b/encoder/ih264e_bitstream.h
new file mode 100755
index 0000000..21360cc
--- /dev/null
+++ b/encoder/ih264e_bitstream.h
@@ -0,0 +1,401 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_bitstream.h
+*
+* @brief
+* This file contains encoder bitstream engine related structures and
+* interface prototypes
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_BITSTREAM_H_
+#define IH264E_BITSTREAM_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief defines the maximum number of bits in a bitstream word
+******************************************************************************
+ */
+#define WORD_SIZE 32
+
+/**
+******************************************************************************
+ * @brief The number of consecutive zero bytes for emulation prevention check
+******************************************************************************
+ */
+#define EPB_ZERO_BYTES 2
+
+/**
+******************************************************************************
+ * @brief Emulation prevention insertion byte
+******************************************************************************
+ */
+#define EPB_BYTE 0x03
+
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to check if emulation prevention byte insertion is required
+******************************************************************************
+ */
+#define INSERT_EPB(zero_run, next_byte) \
+ ((zero_run) == EPB_ZERO_BYTES) && (0 == ((next_byte) & 0xFC))
+
+/**
+******************************************************************************
+ * @brief returns the bit position of a leading 1 (msb) in a code value
+******************************************************************************
+ */
+#if !MSVC
+#define GETRANGE(r,value) \
+{ \
+ r = 0; \
+ if(0 == value) \
+ r = 1; \
+ else \
+ { \
+ r = 32-CLZ(value); \
+ }\
+}
+#else
+#define GETRANGE(r,value) \
+{ \
+ unsigned long msb_one_bit = 0; \
+ r = _BitScanReverse(&msb_one_bit, value) ? (UWORD32)(msb_one_bit + 1) : 1 ; \
+}
+#endif
+
+/**
+******************************************************************************
+ * @brief returns bits required to code a value
+******************************************************************************
+ */
+#define UE_LENGTH(bits,x) \
+{ \
+ UWORD32 r_bit; \
+ GETRANGE(r_bit,x+1) \
+ bits =(((r_bit - 1) << 1)+1); \
+} \
+
+/**
+******************************************************************************
+ * @brief Inserts 1 byte and Emulation Prevention Byte(if any) into bitstream
+ * Increments the stream offset and zero run correspondingly
+******************************************************************************
+ */
+#define PUTBYTE_EPB(ptr,off,byte,zero_run) \
+{ \
+ if( INSERT_EPB(zero_run, byte) ) \
+ { \
+ ptr[off] = EPB_BYTE; \
+ off++; \
+ zero_run = 0; \
+ } \
+ \
+ ptr[off] = byte; \
+ off++; \
+ zero_run = byte ? 0 : zero_run+1; \
+} \
+
+/**
+******************************************************************************
+ * @brief Ensures Byte alignment of the slice header
+******************************************************************************
+ */
+#define BYTE_ALIGNMENT(ps_bitstrm) ih264e_put_rbsp_trailing_bits(ps_bitstrm)
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Bitstream context for encoder
+******************************************************************************
+ */
+typedef struct bitstrm
+{
+ /** points to start of stream buffer. */
+ UWORD8 *pu1_strm_buffer;
+
+ /**
+ * max bitstream size (in bytes).
+ * Encoded stream shall not exceed this size.
+ */
+ UWORD32 u4_max_strm_size;
+
+ /**
+ * byte offset (w.r.t pu1_strm_buffer) where next byte would be written
+ * Bitstream engine makes sure it would not corrupt data beyond
+ * u4_max_strm_size bytes
+ */
+ UWORD32 u4_strm_buf_offset;
+
+ /**
+ * current bitstream word; It is a scratch word containing max of
+ * WORD_SIZE bits. Will be copied to stream buffer when the word is
+ * full
+ */
+ UWORD32 u4_cur_word;
+
+ /**
+ * signifies number of bits available in u4_cur_word
+ * bits from msb to i4_bits_left_in_cw of u4_cur_word have already been
+ * inserted next bits would be inserted from pos [i4_bits_left_in_cw-1]
+ * Range of this variable [1 : WORD_SIZE]
+ */
+ WORD32 i4_bits_left_in_cw;
+
+ /**
+ * signifies the number of consecutive zero bytes propogated from previous
+ * word. It is used for emulation prevention byte insertion in the stream
+ */
+ WORD32 i4_zero_bytes_run;
+
+} bitstrm_t;
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Initializes the encoder bitstream engine
+*
+* @par Description
+* This routine needs to be called at start of slice/frame encode
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] p1_bitstrm_buf
+* bitstream buffer pointer where the encoded stream is generated in byte order
+*
+* @param[in] u4_max_bitstrm_size
+* indicates maximum bitstream buffer size. (in bytes)
+* If actual stream size exceeds the maximum size, encoder should
+* 1. Not corrupt data beyond u4_max_bitstrm_size bytes
+* 2. Report an error back to application indicating overflow
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_bitstrm_init
+ (
+ bitstrm_t *ps_bitstrm,
+ UWORD8 *pu1_bitstrm_buf,
+ UWORD32 u4_max_bitstrm_size
+ );
+
+/**
+******************************************************************************
+*
+* @brief puts a code with specified number of bits into the bitstream
+*
+* @par Description
+* inserts code_len number of bits from lsb of code_val into the
+* bitstream. If the total bytes (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @param[in] code_len
+* indicates code length (in bits) of code_val that would be inserted in
+* bitstream buffer size.
+*
+* @remarks Assumptions: all bits from bit position code_len to msb of
+* code_val shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bits
+ (
+ bitstrm_t *ps_bitstrm,
+ UWORD32 u4_code_val,
+ WORD32 code_len
+ );
+
+/**
+******************************************************************************
+*
+* @brief inserts a 1-bit code into the bitstream
+*
+* @par Description
+* inserts 1bit lsb of code_val into the bitstream
+* updates context members like u4_cur_word, u4_strm_buf_offset and
+* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @remarks Assumptions: all bits from bit position 1 to msb of code_val
+* shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bit
+ (
+ bitstrm_t *ps_bitstrm,
+ UWORD32 u4_code_val
+ );
+
+/**
+******************************************************************************
+*
+* @brief inserts rbsp trailing bits at the end of stream buffer (NAL)
+*
+* @par Description
+* inserts rbsp trailing bits, updates context members like u4_cur_word and
+* i4_bits_left_in_cw and flushes the same in the bitstream buffer. If the
+* total words (u4_strm_buf_offset) exceeds max available size
+* (u4_max_strm_size), returns error without corrupting data beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_rbsp_trailing_bits
+ (
+ bitstrm_t *ps_bitstrm
+ );
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a unsigned integer into bitstream
+*
+* @par Description
+* computes uev code for given syntax element and inserts the same into
+* bitstream by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_num
+* unsigned integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_uev
+ (
+ bitstrm_t *ps_bitstrm,
+ UWORD32 u4_code_num
+ );
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a signed integer into bitstream
+*
+* @par Description
+* computes sev code for given syntax element and inserts the same into
+* bitstream by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] syntax_elem
+* signed integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_sev
+ (
+ bitstrm_t *ps_bitstrm,
+ WORD32 syntax_elem
+ );
+
+/**
+******************************************************************************
+*
+* @brief insert NAL start code prefix (0x000001) into bitstream with an option
+* of inserting leading_zero_8bits (which makes startcode prefix as 0x00000001)
+*
+* @par Description
+* Although start code prefix could have been put by calling ih264e_put_bits(),
+* ih264e_put_nal_start_code_prefix() is specially added to make sure emulation
+* prevention insertion is not done for the NAL start code prefix which will
+* surely happen otherwise by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] insert_leading_zero_8bits
+* flag indicating if one more zero bytes needs to prefixed before start code
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_nal_start_code_prefix
+ (
+ bitstrm_t *ps_bitstrm,
+ WORD32 insert_leading_zero_8bits
+ );
+
+#endif /* IH264E_BITSTREAM_H_ */
diff --git a/encoder/ih264e_cavlc.c b/encoder/ih264e_cavlc.c
new file mode 100755
index 0000000..1341dcd
--- /dev/null
+++ b/encoder/ih264e_cavlc.c
@@ -0,0 +1,1448 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_cavlc.c
+*
+* @brief
+* Contains all the routines to code syntax elements and residuals when entropy
+* coding chosen is CAVLC
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_compute_zeroruns_and_trailingones()
+* - ih264e_write_coeff4x4_cavlc()
+* - ih264e_write_coeff8x8_cavlc()
+* - ih264e_encode_residue()
+* - ih264e_write_islice_mb()
+* - ih264e_write_pslice_mb()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_encode_header.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function computes run of zero, number of trailing ones and sign of
+* trailing ones basing on the significant coeff map, residual block and
+* total nnz.
+*
+* @param[in] pi2_res_block
+* Pointer to residual block containing levels in scan order
+*
+* @param[in] u4_total_coeff
+* Total non-zero coefficients in that sub block
+*
+* @param[in] pu1_zero_run
+* Pointer to array to store run of zeros
+*
+* @param[in] u4_sig_coeff_map
+* significant coefficient map
+*
+* @returns u4_totzero_sign_trailone
+* Bits 0-8 contains number of trailing ones.
+* Bits 8-16 contains bitwise sign information of trailing one
+* Bits 16-24 contains total number of zeros.
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+static UWORD32 ih264e_compute_zeroruns_and_trailingones(WORD16 *pi2_res_block,
+ UWORD32 u4_total_coeff,
+ UWORD8 *pu1_zero_run,
+ UWORD32 u4_sig_coeff_map)
+{
+ UWORD32 i = 0;
+ UWORD32 u4_nnz_coeff = 0;
+ WORD32 i4_run = -1;
+ UWORD32 u4_sign = 0;
+ UWORD32 u4_tot_zero = 0;
+ UWORD32 u4_trailing1 = 0;
+ WORD32 i4_val;
+ UWORD32 u4_totzero_sign_trailone;
+ UWORD32 *pu4_zero_run;
+
+ pu4_zero_run = (void *)pu1_zero_run;
+ pu4_zero_run[0] = 0;
+ pu4_zero_run[1] = 0;
+ pu4_zero_run[2] = 0;
+ pu4_zero_run[3] = 0;
+
+ /* Compute Runs of zeros for all nnz coefficients except the last 3 */
+ if (u4_total_coeff > 3)
+ {
+ for (i = 0; u4_nnz_coeff < (u4_total_coeff-3); i++)
+ {
+ i4_run++;
+
+ i4_val = (u4_sig_coeff_map & 0x1);
+ u4_sig_coeff_map >>= 1;
+
+ if (i4_val != 0)
+ {
+ pu1_zero_run[u4_nnz_coeff++] = i4_run;
+ i4_run = -1;
+ }
+ }
+ }
+
+ /* Compute T1's, Signof(T1's) and Runs of zeros for the last 3 */
+ while (u4_nnz_coeff != u4_total_coeff)
+ {
+ i4_run++;
+
+ i4_val = (u4_sig_coeff_map & 0x1);
+ u4_sig_coeff_map >>= 1;
+
+ if (i4_val != 0)
+ {
+ if (pi2_res_block[u4_nnz_coeff] == 1)
+ {
+ pu1_zero_run[u4_nnz_coeff] = i4_run;
+ u4_trailing1++;
+ }
+ else
+ {
+ if (pi2_res_block[u4_nnz_coeff] == -1)
+ {
+ pu1_zero_run[u4_nnz_coeff] = i4_run;
+ u4_sign |= 1 << u4_trailing1;
+ u4_trailing1++;
+ }
+ else
+ {
+ pu1_zero_run[u4_nnz_coeff] = i4_run;
+ u4_trailing1 = 0;
+ u4_sign = 0;
+ }
+ }
+ i4_run = -1;
+ u4_nnz_coeff++;
+ }
+ i++;
+ }
+
+ u4_tot_zero = i - u4_total_coeff;
+ u4_totzero_sign_trailone = (u4_tot_zero << 16)|(u4_sign << 8)|u4_trailing1;
+
+ return (u4_totzero_sign_trailone);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for the given residual block
+*
+* @param[in] pi2_res_block
+* Pointer to residual block containing levels in scan order
+*
+* @param[in] u4_total_coeff
+* Total non-zero coefficients in the sub block
+*
+* @param[in] u4_block_type
+* block type
+*
+* @param[in] pu1_zero_run
+* Pointer to array to store run of zeros
+*
+* @param[in] u4_nc
+* average of non zero coeff from top and left blocks (when available)
+*
+* @param[in, out] ps_bit_stream
+* structure pointing to a buffer holding output bit stream
+*
+* @param[in] u4_sig_coeff_map
+* significant coefficient map of the residual block
+*
+* @returns
+* error code
+*
+* @remarks
+* If the block type is CAVLC_CHROMA_4x4_DC, then u4_nc is non-significant
+*
+*******************************************************************************
+*/
+static IH264E_ERROR_T ih264e_write_coeff4x4_cavlc(WORD16 *pi2_res_block,
+ UWORD32 u4_total_coeff,
+ ENTROPY_BLK_TYPE u4_block_type,
+ UWORD8 *pu1_zero_run,
+ UWORD32 u4_nc,
+ bitstrm_t *ps_bit_stream,
+ UWORD32 u4_sig_coeff_map)
+{
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+ UWORD32 u4_totzero_sign_trailone = 0;
+ UWORD32 u4_trailing_ones = 0;
+ UWORD32 u4_tot_zeros = 0;
+ UWORD32 u4_remaining_coeff = 0;
+ UWORD32 u4_sign1 = 0;
+ UWORD32 u4_max_num_coeff = 0;
+ const UWORD32 au4_max_num_nnz_coeff[] = {16, 15, 16, 4, 15};
+
+ /* validate inputs */
+ ASSERT(u4_block_type <= CAVLC_CHROMA_4x4_AC);
+
+ u4_max_num_coeff = au4_max_num_nnz_coeff[u4_block_type];
+
+ ASSERT(u4_total_coeff <= u4_max_num_coeff);
+
+ if (!u4_total_coeff)
+ {
+ UWORD32 u4_codeword = 15;
+ UWORD32 u4_codesize = 1;
+ if (u4_block_type == CAVLC_CHROMA_4x4_DC)
+ {
+ u4_codeword = 1;
+ u4_codesize = 2;
+ DEBUG("\n[%d numcoeff, %d numtrailing ones]",u4_total_coeff, 0);
+ ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+ ENTROPY_TRACE("\tnumber of trailing ones ",0);
+ }
+ else
+ {
+ UWORD32 u4_vlcnum = u4_nc >> 1;
+
+ /* write coeff_token */
+ if (u4_vlcnum > 3)
+ {
+ /* Num-FLC */
+ u4_codeword = 3;
+ u4_codesize = 6;
+ }
+ else
+ {
+ /* Num-VLC 0, 1, 2 */
+ if (u4_vlcnum > 1)
+ {
+ u4_vlcnum = 2;
+ }
+ u4_codesize <<= u4_vlcnum;
+ u4_codeword >>= (4 - u4_codesize);
+ }
+
+ DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]",u4_total_coeff, 0, u4_nc);
+ ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+ ENTROPY_TRACE("\tnC ",u4_nc);
+ }
+
+
+ DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+
+ return error_status;
+ }
+ else
+ {
+ /* Compute zero run, number of trailing ones and their sign. */
+ u4_totzero_sign_trailone =
+ ih264e_compute_zeroruns_and_trailingones(pi2_res_block,
+ u4_total_coeff,
+ pu1_zero_run,
+ u4_sig_coeff_map);
+ u4_trailing_ones = u4_totzero_sign_trailone & 0xFF;
+ u4_sign1 = (u4_totzero_sign_trailone >> 8)& 0xFF;
+ u4_tot_zeros = (u4_totzero_sign_trailone >> 16) & 0xFF;
+ u4_remaining_coeff = u4_total_coeff - u4_trailing_ones;
+
+ /* write coeff_token */
+ {
+ UWORD32 u4_codeword;
+ UWORD32 u4_codesize;
+ if (u4_block_type == CAVLC_CHROMA_4x4_DC)
+ {
+ u4_codeword = gu1_code_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff-1];
+ u4_codesize = gu1_size_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff-1];
+
+ DEBUG("\n[%d numcoeff, %d numtrailing ones]",u4_total_coeff, u4_trailing_ones);
+ ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+ ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones);
+ }
+ else
+ {
+ UWORD32 u4_vlcnum = u4_nc >> 1;
+
+ if (u4_vlcnum > 3)
+ {
+ /* Num-FLC */
+ u4_codeword = ((u4_total_coeff-1) << 2 ) + u4_trailing_ones;
+ u4_codesize = 6;
+ }
+ else
+ {
+ /* Num-VLC 0, 1, 2 */
+ if (u4_vlcnum > 1)
+ {
+ u4_vlcnum = 2;
+ }
+ u4_codeword = gu1_code_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff-1];
+ u4_codesize = gu1_size_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff-1];
+ }
+
+ DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]",u4_total_coeff, u4_trailing_ones, u4_nc);
+ ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+ ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones);
+ ENTROPY_TRACE("\tnC ",u4_nc);
+ }
+
+ DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+ }
+
+ /* write sign of trailing ones */
+ if (u4_trailing_ones)
+ {
+ DEBUG("\nT1's: %d u4_codeword, %d u4_codesize",u4_sign1, u4_trailing_ones);
+ error_status = ih264e_put_bits(ps_bit_stream, u4_sign1, u4_trailing_ones);
+ ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones);
+ ENTROPY_TRACE("\tsign of trailing ones ",u4_sign1);
+ }
+
+ /* write level codes */
+ if (u4_remaining_coeff)
+ {
+ WORD32 i4_level = pi2_res_block[u4_remaining_coeff-1];
+ UWORD32 u4_escape;
+ UWORD32 u4_suffix_length = 0; // Level-VLC[N]
+ UWORD32 u4_abs_level, u4_abs_level_actual = 0;
+ WORD32 i4_sign;
+ const UWORD32 u4_rndfactor[] = {0, 0, 1, 3, 7, 15, 31};
+
+ DEBUG("\n \t%d coeff,",i4_level);
+ ENTROPY_TRACE("\tcoeff ",i4_level);
+
+ if (u4_trailing_ones < 3)
+ {
+ /* If there are less than 3 T1s, then the first non-T1 level is incremented if negative (decremented if positive)*/
+ if (i4_level < 0)
+ {
+ i4_level += 1;
+ }
+ else
+ {
+ i4_level -= 1;
+ }
+
+ u4_abs_level_actual = 1;
+
+ /* Initialize VLC table (Suffix Length) to encode the level */
+ if (u4_total_coeff > 10)
+ {
+ u4_suffix_length = 1;
+ }
+ }
+
+ i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1));
+ u4_abs_level = ((i4_level + i4_sign) ^ i4_sign);
+
+ u4_abs_level_actual += u4_abs_level;
+
+ u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length;
+
+ while (1)
+ {
+ UWORD32 u4_codesize;
+ UWORD32 u4_codeword;
+ UWORD32 u4_codeval;
+
+ u4_remaining_coeff--;
+
+GATHER_CAVLC_STATS1();
+
+ {
+ u4_codeval = u4_abs_level << 1;
+ u4_codeval = u4_codeval - 2 - i4_sign;
+
+ if ((!u4_suffix_length) && (u4_escape > 7) && (u4_abs_level < 16))
+ {
+ u4_codeword = (1 << 4) + (u4_codeval - 14);
+ u4_codesize = 19;
+ }
+ else if (u4_escape > 7)
+ {
+ u4_codeword = (1 << 12) + (u4_codeval - (15 << u4_suffix_length));
+ u4_codesize = 28;
+ if (!u4_suffix_length)
+ {
+ u4_codeword -= 15;
+ }
+ }
+ else
+ {
+ u4_codeword = (1 << u4_suffix_length) + (u4_codeval & ((1 << u4_suffix_length)-1));
+ u4_codesize = (u4_codeval >> u4_suffix_length) + 1 + u4_suffix_length;
+ }
+ }
+
+ /*put the level code in bitstream*/
+ DEBUG("\nLEVEL: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+
+ if (u4_remaining_coeff == 0) break;
+
+ /*update suffix length for next level*/
+ if (u4_suffix_length == 0)
+ {
+ u4_suffix_length++;
+ }
+ if (u4_suffix_length < 6)
+ {
+ if (u4_abs_level_actual > gu1_threshold_vlc_level[u4_suffix_length])
+ {
+ u4_suffix_length++;
+ }
+ }
+
+ /* next level */
+ i4_level = pi2_res_block[u4_remaining_coeff-1];
+
+ DEBUG("\n \t%d coeff,",i4_level);
+ ENTROPY_TRACE("\tcoeff ",i4_level);
+
+ i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1));
+ u4_abs_level = ((i4_level + i4_sign) ^ i4_sign);
+
+ u4_abs_level_actual = u4_abs_level;
+
+ u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length;
+ }
+ }
+
+ DEBUG("\n \t %d totalzeros",u4_tot_zeros);
+ ENTROPY_TRACE("\ttotal zeros ",u4_tot_zeros);
+
+ /* Write Total Zeros */
+ if (u4_total_coeff < u4_max_num_coeff)
+ {
+ WORD32 index;
+ UWORD32 u4_codeword;
+ UWORD32 u4_codesize;
+
+ if (u4_block_type == CAVLC_CHROMA_4x4_DC)
+ {
+ UWORD8 gu1_index_zero_table_chroma[] = {0, 4, 7};
+ index = gu1_index_zero_table_chroma[u4_total_coeff-1] + u4_tot_zeros;
+ u4_codesize = gu1_size_zero_table_chroma[index];
+ u4_codeword = gu1_code_zero_table_chroma[index];
+ }
+ else
+ {
+ index = gu1_index_zero_table[u4_total_coeff-1] + u4_tot_zeros;
+ u4_codesize = gu1_size_zero_table[index];
+ u4_codeword = gu1_code_zero_table[index];
+ }
+
+ DEBUG("\nTOTAL ZEROS: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+ }
+
+ /* Write Run Before */
+ if (u4_tot_zeros)
+ {
+ UWORD32 u4_max_num_coef = u4_total_coeff-1;
+ UWORD32 u4_codeword;
+ UWORD32 u4_codesize;
+ UWORD32 u4_zeros_left = u4_tot_zeros;
+
+ while (u4_max_num_coef)
+ {
+ UWORD32 u4_run_before = pu1_zero_run[u4_max_num_coef];
+ UWORD32 u4_index;
+
+ if (u4_zeros_left > MAX_ZERO_LEFT)
+ {
+ u4_index = gu1_index_run_table[MAX_ZERO_LEFT];
+ }
+ else
+ {
+ u4_index = gu1_index_run_table[u4_zeros_left - 1];
+ }
+
+ u4_codesize = gu1_size_run_table[u4_index + u4_run_before];
+ u4_codeword = gu1_code_run_table[u4_index + u4_run_before];
+
+ DEBUG("\nRUN BEFORE ZEROS: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+
+ u4_zeros_left -= u4_run_before;
+ if (!u4_zeros_left)
+ {
+ break;
+ }
+ u4_max_num_coef--;
+ }
+ }
+ }
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for the given subblock
+*
+* @param[in] ps_ent_ctxt
+* Pointer to entropy context
+*
+* @param[in] pi2_res_block
+* Pointers to residual blocks of all the partitions for the current subblk
+* (containing levels in scan order)
+*
+* @param[in] pu1_nnz
+* Total non-zero coefficients of all the partitions for the current subblk
+*
+* @param[in] pu2_sig_coeff_map
+* Significant coefficient map of all the partitions for the current subblk
+*
+* @param[in] u4_block_type
+* entropy coding block type
+*
+* @param[in] u4_ngbr_avbl
+* top and left availability of all the partitions for the current subblk
+* (packed)
+*
+* @param[in] pu1_top_nnz
+* pointer to the buffer containing nnz of all the subblks to the top
+*
+* @param[in] pu1_left_nnz
+* pointer to the buffer containing nnz of all the subblks to the left
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IH264E_ERROR_T ih264e_write_coeff8x8_cavlc(entropy_ctxt_t *ps_ent_ctxt,
+ WORD16 **pi2_res_block,
+ UWORD8 *pu1_nnz,
+ UWORD16 *pu2_sig_coeff_map,
+ ENTROPY_BLK_TYPE u4_block_type,
+ UWORD32 u4_ngbr_avlb,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz)
+{
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+ UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run, *pu1_ngbr_avbl;
+ UWORD32 u4_nC;
+ UWORD8 u1_mb_a, u1_mb_b;
+
+ pu1_ngbr_avbl = (void *)(&u4_ngbr_avlb);
+
+ /* encode ac block index 4x4 = 0*/
+ u1_mb_a = pu1_ngbr_avbl[0] & 0x0F;
+ u1_mb_b = pu1_ngbr_avbl[0] & 0xF0;
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[0];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[0];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+ pu1_left_nnz[0] = pu1_top_nnz[0] = pu1_nnz[0];
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], pu1_nnz[0], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[0]);
+
+ /* encode ac block index 4x4 = 1*/
+ u1_mb_a = pu1_ngbr_avbl[1] & 0x0F;
+ u1_mb_b = pu1_ngbr_avbl[1] & 0xF0;
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[0];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[1];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+ pu1_left_nnz[0] = pu1_top_nnz[1] = pu1_nnz[1];
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[1], pu1_nnz[1], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[1]);
+
+ /* encode ac block index 4x4 = 2*/
+ u1_mb_a = pu1_ngbr_avbl[2] & 0x0F;
+ u1_mb_b = pu1_ngbr_avbl[2] & 0xF0;
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[1];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[0];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+ pu1_left_nnz[1] = pu1_top_nnz[0] = pu1_nnz[2];
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[2], pu1_nnz[2], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[2]);
+
+ /* encode ac block index 4x4 = 0*/
+ u1_mb_a = pu1_ngbr_avbl[3] & 0x0F;
+ u1_mb_b = pu1_ngbr_avbl[3] & 0xF0;
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[1];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[1];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+ pu1_left_nnz[1] = pu1_top_nnz[1] = pu1_nnz[3];
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[3], pu1_nnz[3], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[3]);
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function encodes luma and chroma residues of a macro block when
+* the entropy coding mode chosen is cavlc.
+*
+* @param[in] ps_ent_ctxt
+* Pointer to entropy context
+*
+* @param[in] u4_mb_type
+* current mb type
+*
+* @param[in] u4_cbp
+* coded block pattern for the current mb
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt,
+ UWORD32 u4_mb_type,
+ UWORD32 u4_cbp)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+ /* packed residue */
+ void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data;
+
+ /* bit stream buffer */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+ /* zero run */
+ UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run;
+
+ /* temp var */
+ UWORD32 u4_nC, u4_ngbr_avlb;
+ UWORD8 au1_nnz[4], *pu1_ngbr_avlb, *pu1_top_nnz, *pu1_left_nnz;
+ UWORD16 au2_sig_coeff_map[4];
+ WORD16 *pi2_res_block[4];
+ UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx;
+ tu_sblk_coeff_data_t *ps_mb_coeff_data;
+ ENTROPY_BLK_TYPE e_entropy_blk_type = CAVLC_LUMA_4x4;
+
+ /* ngbr availability */
+ UWORD8 u1_mb_a, u1_mb_b;
+
+ /* cbp */
+ UWORD32 u4_cbp_luma = u4_cbp & 0xF, u4_cbp_chroma = u4_cbp >> 4;
+
+ /* mb indices */
+ WORD32 i4_mb_x, i4_mb_y;
+
+ /* derive neighbor availability */
+ i4_mb_x = ps_ent_ctxt->i4_mb_x;
+ i4_mb_y = ps_ent_ctxt->i4_mb_y;
+ pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs);
+ /* left macroblock availability */
+ u1_mb_a = (i4_mb_x == 0 ||
+ (pu1_slice_idx[i4_mb_x - 1 ] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+ /* top macroblock availability */
+ u1_mb_b = (i4_mb_y == 0 ||
+ (pu1_slice_idx[i4_mb_x-ps_ent_ctxt->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+
+ pu1_ngbr_avlb = (void *)(&u4_ngbr_avlb);
+ pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x];
+ pu1_left_nnz = (UWORD8 *)&ps_ent_ctxt->u4_left_nnz_luma;
+
+ /* encode luma residue */
+
+ /* mb type intra 16x16 */
+ if (u4_mb_type == I16x16)
+ {
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ /* estimate nnz for the current mb */
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[0];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[0];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+
+ /* encode dc block */
+ ENTROPY_TRACE("Luma DC blk idx %d",0);
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_LUMA_4x4_DC, pu1_zero_run, u4_nC, ps_bitstream, au2_sig_coeff_map[0]);
+
+ e_entropy_blk_type = CAVLC_LUMA_4x4_AC;
+ }
+
+ if (u4_cbp_luma & 1)
+ {
+ /* encode ac block index 8x8 = 0*/
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+ /* derive sub block neighbor availability */
+
+ pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a);
+ pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1;
+ pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a);
+ pu1_ngbr_avlb[3] = 0x11;
+ /* encode sub blk */
+ ENTROPY_TRACE("Luma blk idx %d",0);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz);
+ }
+ else
+ {
+ pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+ pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+ }
+
+ if (u4_cbp_luma & 2)
+ {
+ /* encode ac block index 8x8 = 1*/
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ /* derive sub block neighbor availability */
+ pu1_ngbr_avlb[1] = pu1_ngbr_avlb[0] = (u1_mb_b << 4) | 1;
+ pu1_ngbr_avlb[3] = pu1_ngbr_avlb[2] = 0x11;
+ /* encode sub blk */
+ ENTROPY_TRACE("Luma blk idx %d",1);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz+2, pu1_left_nnz);
+ }
+ else
+ {
+ (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0;
+ pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+ }
+
+ if (u4_cbp_luma & 0x4)
+ {
+ /* encode ac block index 8x8 = 2*/
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ /* derive sub block neighbor availability */
+ pu1_ngbr_avlb[2] = pu1_ngbr_avlb[0] = (1 << 4) | u1_mb_a;
+ pu1_ngbr_avlb[1] = pu1_ngbr_avlb[3] = 0x11;
+ /* encode sub blk */
+ ENTROPY_TRACE("Luma blk idx %d",2);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz, (pu1_left_nnz+2));
+ }
+ else
+ {
+ pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+ (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0;
+ }
+
+ if (u4_cbp_luma & 0x8)
+ {
+ /* encode ac block index 8x8 = 3*/
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ /* derive sub block neighbor availability */
+ u4_ngbr_avlb = 0x11111111;
+ /* encode sub blk */
+ ENTROPY_TRACE("Luma blk idx %d",3);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz+2, pu1_left_nnz+2);
+ }
+ else
+ {
+ (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0;
+ (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0;
+ }
+
+ /* encode chroma residue */
+ if (u4_cbp_chroma & 3)
+ {
+ /* parse packed coeff data structure for residual data */
+ /* cb, cr */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+
+ /* encode dc block */
+ /* cb, cr */
+ ENTROPY_TRACE("Chroma DC blk idx %d",0);
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_CHROMA_4x4_DC, pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[0]);
+ ENTROPY_TRACE("Chroma DC blk idx %d",1);
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[1], au1_nnz[1], CAVLC_CHROMA_4x4_DC, pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[1]);
+ }
+
+ pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x];
+ pu1_left_nnz = (UWORD8 *) &ps_ent_ctxt->u4_left_nnz_cbcr;
+
+ /* encode sub blk */
+ if (u4_cbp_chroma & 0x2)
+ {
+ /* encode ac block index 8x8 = 0*/
+ /* derive sub block neighbor availability */
+ pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a);
+ pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1;
+ pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a);
+ pu1_ngbr_avlb[3] = 0x11;
+
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ ENTROPY_TRACE("Chroma AC blk idx %d",0);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz);
+ }
+ else
+ {
+ pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+ pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+ }
+
+ pu1_top_nnz += 2;
+ pu1_left_nnz += 2;
+
+ /* encode sub blk */
+ if (u4_cbp_chroma & 0x2)
+ {
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ ENTROPY_TRACE("Chroma AC blk idx %d",1);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz);
+ }
+ else
+ {
+ pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+ pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+ }
+
+ /* store the index of the next mb coeff data */
+ ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data;
+
+ return error_status;
+}
+
+#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + 32 - ps_bitstream->i4_bits_left_in_cw)
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for an Intra Slice.
+*
+* @description
+* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification.
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+ /* bit stream ptr */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+ /* packed header data */
+ UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+
+ /* mb header info */
+ /*
+ * mb_tpm : mb type plus mode
+ * mb_type : luma mb type and chroma mb type are packed
+ * cbp : coded block pattern
+ * mb_qp_delta : mb qp delta
+ * chroma_intra_mode : chroma intra mode
+ * luma_intra_mode : luma intra mode
+ */
+ WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+ WORD8 mb_qp_delta;
+
+ /* temp var */
+ WORD32 i, mb_type_stream;
+
+ WORD32 bitstream_start_offset, bitstream_end_offset;
+
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+
+
+ /********************************************************************/
+ /* BEGIN HEADER GENERATION */
+ /********************************************************************/
+
+ /* mb header info */
+ mb_tpm = *pu1_byte++;
+ cbp = *pu1_byte++;
+ mb_qp_delta = *pu1_byte++;
+
+ /* mb type */
+ mb_type = mb_tpm & 0xF;
+ /* is intra ? */
+ if (mb_type == I16x16)
+ {
+ UWORD32 u4_cbp_l, u4_cbp_c;
+
+ u4_cbp_c = (cbp >> 4);
+ u4_cbp_l = (cbp & 0xF);
+ luma_intra_mode = (mb_tpm >> 4) & 3;
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type");
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I4x4)
+ {
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type");
+
+ for (i = 0; i < 16; i += 2)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I8x8)
+ {
+ /* transform 8x8 flag */
+ UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
+
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ ASSERT(0);
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type");
+
+ /* u4_transform_size_8x8_flag */
+ PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag");
+
+ /* write sub block modes */
+ for (i = 0; i < 4; i++)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else
+ {
+ }
+
+ /* coded_block_pattern */
+ if (mb_type != I16x16)
+ {
+ PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][0], error_status, "coded_block_pattern");
+ }
+
+ if (cbp || mb_type == I16x16)
+ {
+ /* mb_qp_delta */
+ PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta");
+ }
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset - bitstream_start_offset;
+
+ /* Starting bitstream offset for residue */
+ bitstream_start_offset = bitstream_end_offset;
+
+ /* residual */
+ error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp);
+
+ /* Ending bitstream offset for reside in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset - bitstream_start_offset;
+
+ /* store the index of the next mb syntax layer */
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for Inter slices
+*
+* @description
+* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+ /* bit stream ptr */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+ /* packed header data */
+ UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+
+ /* mb header info */
+ /*
+ * mb_tpm : mb type plus mode
+ * mb_type : luma mb type and chroma mb type are packed
+ * cbp : coded block pattern
+ * mb_qp_delta : mb qp delta
+ * chroma_intra_mode : chroma intra mode
+ * luma_intra_mode : luma intra mode
+ * ps_pu : Pointer to the array of structures having motion vectors, size
+ * and position of sub partitions
+ */
+ WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+ WORD8 mb_qp_delta;
+
+ /* temp var */
+ WORD32 i, mb_type_stream, cbptable = 1;
+
+ WORD32 is_inter = 0;
+
+ WORD32 bitstream_start_offset, bitstream_end_offset;
+
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+
+ /********************************************************************/
+ /* BEGIN HEADER GENERATION */
+ /********************************************************************/
+
+ /* mb header info */
+ mb_tpm = *pu1_byte++;
+
+ /* mb type */
+ mb_type = mb_tpm & 0xF;
+
+ /* check for skip */
+ if (mb_type == PSKIP)
+ {
+ UWORD32 *nnz;
+
+ is_inter = 1;
+
+ /* increment skip counter */
+ (*ps_ent_ctxt->pi4_mb_skip_run)++;
+
+ /* store the index of the next mb syntax layer */
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+ /* set nnz to zero */
+ ps_ent_ctxt->u4_left_nnz_luma = 0;
+ nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x];
+ *nnz = 0;
+ ps_ent_ctxt->u4_left_nnz_cbcr = 0;
+ nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x];
+ *nnz = 0;
+
+ /* residual */
+ error_status = ih264e_encode_residue(ps_ent_ctxt, P16x16, 0);
+
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+ return error_status;
+ }
+
+ /* remaining mb header info */
+ cbp = *pu1_byte++;
+ mb_qp_delta = *pu1_byte++;
+
+ /* mb skip run */
+ PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run");
+
+ /* reset skip counter */
+ *ps_ent_ctxt->pi4_mb_skip_run = 0;
+
+ /* is intra ? */
+ if (mb_type == I16x16)
+ {
+ UWORD32 u4_cbp_l, u4_cbp_c;
+
+ is_inter = 0;
+
+ u4_cbp_c = (cbp >> 4);
+ u4_cbp_l = (cbp & 0xF);
+ luma_intra_mode = (mb_tpm >> 4) & 3;
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12;
+
+ mb_type_stream += 5;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type");
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I4x4)
+ {
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ is_inter = 0;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+ cbptable = 0;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type");
+
+ for (i = 0; i < 16; i += 2)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I8x8)
+ {
+ /* transform 8x8 flag */
+ UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
+
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ is_inter = 0;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+ cbptable = 0;
+
+ ASSERT(0);
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type");
+
+ /* u4_transform_size_8x8_flag */
+ PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag");
+
+ /* write sub block modes */
+ for (i = 0; i < 4; i++)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else
+ {
+ /* inter macro block partition cnt */
+ const UWORD8 au1_part_cnt[] = { 1, 2, 2, 4 };
+
+ /* mv ptr */
+ WORD16 *pi2_mv_ptr = (WORD16 *)pu1_byte;
+
+ /* number of partitions for the current mb */
+ UWORD32 u4_part_cnt = au1_part_cnt[mb_type - 3];
+
+ is_inter = 1;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, mb_type - 3, error_status, "mb type");
+
+ for (i = 0; i < (WORD32)u4_part_cnt; i++)
+ {
+ PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv x");
+
+ PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv y");
+ }
+
+ pu1_byte = (UWORD8 *)pi2_mv_ptr;
+ }
+
+ /* coded_block_pattern */
+ if (mb_type != I16x16)
+ {
+ PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][cbptable], error_status, "coded_block_pattern");
+ }
+
+ if (cbp || mb_type == I16x16)
+ {
+ /* mb_qp_delta */
+ PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta");
+ }
+
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+ /* start bitstream offset for residue in bits */
+ bitstream_start_offset = bitstream_end_offset;
+
+ /* residual */
+ error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp);
+
+ /* Ending bitstream offset for residue in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_residue_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+ /* store the index of the next mb syntax layer */
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+ return error_status;
+}
diff --git a/encoder/ih264e_cavlc.h b/encoder/ih264e_cavlc.h
new file mode 100755
index 0000000..86f4cd4
--- /dev/null
+++ b/encoder/ih264e_cavlc.h
@@ -0,0 +1,112 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_cavlc.h
+*
+* @brief
+* This file contains enumerations, macros and extern declarations of H264
+* cavlc tables
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_CAVLC_H_
+#define IH264E_CAVLC_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u4_nnz, u4_sig_coeff_map, pi2_res_block) \
+ {\
+ ps_mb_coeff_data = pv_mb_coeff_data; \
+ u4_nnz = ps_mb_coeff_data->i4_sig_map_nnz & 0xff; \
+ if (u4_nnz)\
+ {\
+ u4_sig_coeff_map = ps_mb_coeff_data->i4_sig_map_nnz >> 16; \
+ pi2_res_block = ps_mb_coeff_data->ai2_residue; \
+ pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue + u4_nnz; \
+ }\
+ else\
+ {\
+ pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue;\
+ }\
+ }
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for an Intra Slice.
+*
+* @description
+* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification.
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt);
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for Inter slices
+*
+* @description
+* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt);
+
+#endif /* IH264E_CAVLC_H_ */
diff --git a/encoder/ih264e_config.h b/encoder/ih264e_config.h
new file mode 100755
index 0000000..2446cdb
--- /dev/null
+++ b/encoder/ih264e_config.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_config.h
+*
+* @brief
+* contains any necessary declarations/definitions that are used during codec
+* build
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_CONFIG_H_
+#define IH264E_CONFIG_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define CAVLC_LEVEL_STATS 0
+#define GATING_STATS 0
+#define DEBUG_PRINT 0
+#define ENABLE_TRACE 0
+#define DEBUG_RC 0
+#define TRACE_SUPPORT 0
+
+#endif /* IH264E_CONFIG_H_ */
diff --git a/encoder/ih264e_core_coding.c b/encoder/ih264e_core_coding.c
new file mode 100755
index 0000000..5ba18de
--- /dev/null
+++ b/encoder/ih264e_core_coding.c
@@ -0,0 +1,2365 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_core_coding.c
+ *
+ * @brief
+ * This file contains routines that perform luma and chroma core coding for
+ * intra macroblocks
+ *
+ * @author
+ * ittiam
+ *
+ * @par List of Functions:
+ * - ih264e_pack_l_mb_i16()
+ * - ih264e_pack_c_mb_i8()
+ * - ih264e_code_luma_intra_macroblock_16x16()
+ * - ih264e_code_luma_intra_macroblock_4x4()
+ * - ih264e_code_chroma_intra_macroblock_8x8()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264_trans_data.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_globals.h"
+#include "ih264e_core_coding.h"
+#include "ih264e_mc.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs does the DCT transform then Hadamard transform
+* and quantization for a macroblock when the mb mode is intra 16x16 mode
+*
+* @par Description:
+* First cf4 is done on all 16 4x4 blocks of the 16x16 input block.
+* Then hadamard transform is done on the DC coefficients
+* Quantization is then performed on the 16x16 block, 4x4 wise
+*
+* @param[in] pu1_src
+* Pointer to source sub-block
+*
+* @param[in] pu1_pred
+* Pointer to prediction sub-block
+*
+* @param[in] pi2_out
+* Pointer to residual sub-block
+* The output will be in linear format
+* The first 16 continuous locations will contain the values of Dc block
+* After DC block and a stride 1st AC block will follow
+* After one more stride next AC block will follow
+* The blocks will be in raster scan order
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* Prediction stride
+*
+* @param[in] dst_strd
+* Destination stride
+*
+* @param[in] pu2_scale_matrix
+* The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+* Threshold matrix
+*
+* @param[in] u4_qbits
+* 15+QP/6
+*
+* @param[in] u4_round_factor
+* Round factor for quant
+*
+* @param[out] pu1_nnz
+* Memory to store the non-zeros after transform
+* The first byte will be the nnz of DC block
+* From the next byte the AC nnzs will be stored in raster scan order
+*
+* @param u4_dc_flag
+* Signals if Dc transform is to be done or not
+* 1 -> Dc transform will be done
+* 0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec,
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz,
+ UWORD32 u4_dc_flag)
+
+{
+ WORD32 blk_cntr;
+ WORD32 i4_offsetx, i4_offsety;
+ UWORD8 *pu1_curr_src, *pu1_curr_pred;
+
+ WORD16 *pi2_dc_str = pi2_out;
+
+ /* Move to the ac addresses */
+ pu1_nnz++;
+ pi2_out += dst_strd;
+
+ for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++)
+ {
+ IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety);
+
+ pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
+ pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
+
+ ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred,
+ pi2_out + blk_cntr * dst_strd,
+ src_strd, pred_strd, pu2_scale_matrix,
+ pu2_threshold_matrix, u4_qbits,
+ u4_round_factor, &pu1_nnz[blk_cntr],
+ &pi2_dc_str[blk_cntr]);
+
+ }
+
+ if (!u4_dc_flag)
+ return;
+
+ /*
+ * In case of i16x16, we need to remove the contribution of dc coeffs into
+ * nnz of each block. We are doing that in the packing function
+ */
+
+ /* Adjust pointers to point to dc values */
+ pi2_out -= dst_strd;
+ pu1_nnz--;
+
+ u4_qbits++;
+ u4_round_factor <<= 1;
+
+ ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix,
+ pu2_threshold_matrix, u4_qbits,
+ u4_round_factor, &pu1_nnz[0]);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs the intra 16x16 inverse transform process for H264
+* it includes inverse Dc transform, inverse quant and then inverse transform
+*
+* @par Description:
+*
+* @param[in] pi2_src
+* Input data, 16x16 size
+* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
+* after a stride 1st AC clock will be present again in raster can order
+* Then each AC block of the 16x16 block will follow in raster scan order
+*
+* @param[in] pu1_pred
+* The predicted data, 16x16 size
+* Block by block form
+*
+* @param[in] pu1_out
+* Output 16x16
+* In block by block form
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* input stride for prediction buffer
+*
+* @param[in] out_strd
+* input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+* Inverse quantization matrix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+* weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+* QP/6
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least 20 in size
+*
+* @param[in] pu4_cntrl
+* Controls the transform path
+* total Last 17 bits are used
+* the 16th th bit will correspond to DC block
+* and 32-17 will correspond to the ac blocks in raster scan order
+* bit equaling zero indicates that the entire 4x4 block is zero for DC
+* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
+*
+* @returns
+* none
+*
+* @remarks
+* The all zero case must be taken care outside
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec,
+ WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ UWORD32 u4_cntrl,
+ UWORD32 u4_dc_trans_flag,
+ WORD32 *pi4_tmp)
+{
+ /* Start index for inverse quant in a 4x4 block */
+ WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1;
+
+ /* Cntrl bits for 4x4 transforms
+ * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
+ * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
+ * : dc block must contain only single dc coefficient
+ * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
+ * : ie not (ac or dc)
+ */
+ UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
+
+ /* tmp registers for block ids */
+ UWORD32 u4_blk_id;
+
+ /* Subscrripts */
+ WORD32 i4_offset_x, i4_offset_y;
+
+ UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk;
+
+ /* Src and stride for dc coeffs */
+ UWORD32 u4_dc_inc;
+ WORD16 *pi2_dc_src;
+
+ /*
+ * For intra blocks we need to do inverse dc transform
+ * In case if intra blocks, its here that we populate the dc bits in cntrl
+ * as they cannot be populated any earlier
+ */
+ if (u4_dc_trans_flag)
+ {
+ UWORD32 cntr, u4_dc_cntrl;
+ /* Do inv hadamard and place the results at the start of each AC block */
+ ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat,
+ pu2_weigh_mat, qp_div, pi4_tmp);
+
+ /* Update the cntrl flag */
+ u4_dc_cntrl = 0;
+ for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++)
+ {
+ u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
+ }
+ /* Mark dc bits as 1 if corresponding ac bit is 0 */
+ u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
+ /* Combine both ac and dc bits */
+ u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA)
+ | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA);
+ }
+
+ /* Source for dc coeffs
+ * If the block is intra, we have to read dc values from first row of src
+ * then stride for each block is 1, other wise its src stride
+ */
+ pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src;
+ u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1;
+
+ /* The AC blocks starts from 2nd row */
+ pi2_src += src_strd;
+
+ /* Get the block bits */
+ u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA);
+ u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16;
+ u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000;
+
+ /* Get first block to process */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+ while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
+ {
+ /* Compute address of src blocks */
+ WORD32 i4_src_offset = u4_dc_inc * u4_blk_id;
+
+ IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ /* Compute address of out and pred blocks */
+ pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ /* Do inv dc transform */
+ ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset,
+ pu1_cur_prd_blk,
+ pu1_cur_out_blk, pred_strd,
+ out_strd, pu2_iscale_mat,
+ pu2_weigh_mat, qp_div, NULL,
+ iq_start_idx,
+ pi2_dc_src + i4_src_offset);
+ /* Get next DC block to process */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+ }
+
+ /* now process ac/mixed blocks */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+ while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
+ {
+
+ WORD32 i4_src_offset = src_strd * u4_blk_id;
+
+ IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset,
+ pu1_cur_prd_blk, pu1_cur_out_blk,
+ pred_strd, out_strd,
+ pu2_iscale_mat, pu2_weigh_mat,
+ qp_div, (WORD16*) pi4_tmp,
+ iq_start_idx,
+ pi2_dc_src + u4_blk_id);
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+ }
+
+ /* Now process empty blocks */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+ while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
+ {
+ IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk,
+ pred_strd, out_strd, SIZE_4X4_BLK_HRZ,
+ SIZE_4X4_BLK_VERT, 0, 0);
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs does the DCT transform then Hadamard transform
+* and quantization for a chroma macroblock
+*
+* @par Description:
+* First cf4 is done on all 16 4x4 blocks of the 8x8input block
+* Then hadamard transform is done on the DC coefficients
+* Quantization is then performed on the 8x8 block, 4x4 wise
+*
+* @param[in] pu1_src
+* Pointer to source sub-block
+* The input is in interleaved format for two chroma planes
+*
+* @param[in] pu1_pred
+* Pointer to prediction sub-block
+* Prediction is in inter leaved format
+*
+* @param[in] pi2_out
+* Pointer to residual sub-block
+* The output will be in linear format
+* The first 4 continuous locations will contain the values of DC block for U
+* and then next 4 will contain for V.
+* After DC block and a stride 1st AC block of U plane will follow
+* After one more stride next AC block of V plane will follow
+* The blocks will be in raster scan order
+*
+* After all the AC blocks of U plane AC blocks of V plane will follow in exact
+* same way
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* Prediction stride
+*
+* @param[in] dst_strd
+* Destination stride
+*
+* @param[in] pu2_scale_matrix
+* The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+* Threshold matrix
+*
+* @param[in] u4_qbits
+* 15+QP/6
+*
+* @param[in] u4_round_factor
+* Round factor for quant
+*
+* @param[out] pu1_nnz
+* Memory to store the non-zeros after transform
+* The first byte will be the nnz od DC block for U plane
+* From the next byte the AC nnzs will be storerd in raster scan order
+* The fifth byte will be nnz of Dc block of V plane
+* Then Ac blocks will follow
+*
+* @param u4_dc_flag
+* Signals if Dc transform is to be done or not
+* 1 -> Dc transform will be done
+* 0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec,
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz_c)
+{
+ WORD32 blk_cntr;
+ WORD32 i4_offsetx, i4_offsety;
+ UWORD8 *pu1_curr_src, *pu1_curr_pred;
+
+ WORD16 pi2_dc_str[8];
+ UWORD8 au1_dcnnz[2];
+
+ /* Move to the ac addresses */
+ pu1_nnz_c++;
+ pi2_out += out_strd;
+
+ for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++)
+ {
+ IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety);
+
+ pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
+ pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
+
+ /* For chroma, v plane nnz is populated from position 5 */
+ ps_codec->pf_resi_trans_quant_chroma_4x4(
+ pu1_curr_src, pu1_curr_pred,
+ pi2_out + blk_cntr * out_strd, src_strd, pred_strd,
+ pu2_scale_matrix, pu2_threshold_matrix, u4_qbits,
+ u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)],
+ &pi2_dc_str[blk_cntr]);
+ }
+
+ /* Adjust pointers to point to dc values */
+ pi2_out -= out_strd;
+ pu1_nnz_c--;
+
+ u4_qbits++;
+ u4_round_factor <<= 1;
+
+ ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix,
+ pu2_threshold_matrix, u4_qbits,
+ u4_round_factor, au1_dcnnz);
+
+ /* Copy the dc nnzs */
+ pu1_nnz_c[0] = au1_dcnnz[0];
+ pu1_nnz_c[5] = au1_dcnnz[1];
+
+}
+
+/**
+*******************************************************************************
+* @brief
+* This function performs the inverse transform with process for chroma MB of H264
+*
+* @par Description:
+* Does inverse DC transform ,inverse quantization inverse transform
+*
+* @param[in] pi2_src
+* Input data, 16x16 size
+* The input is in the form of, first 4 locations will contain DC coeffs of
+* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
+* in raster scan order will follow, each block as linear array in raster scan order.
+* After a stride next AC block will follow. After all AC blocks of U plane
+* V plane AC blocks will follow in exact same order.
+*
+* @param[in] pu1_pred
+* The predicted data, 8x16 size, U and V interleaved
+*
+* @param[in] pu1_out
+* Output 8x16, U and V interleaved
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* input stride for prediction buffer
+*
+* @param[in] out_strd
+* input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+* Inverse quantization martix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+* weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+* QP/6
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
+* in size
+*
+* @param[in] pu4_cntrl
+* Controls the transform path
+* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
+* 32-28 bits will indicate AC blocks of U plane in raster scan order
+* 27-23 bits will indicate AC blocks of V plane in rater scan order
+* The bit 1 implies that there is at least one non zero coeff in a block
+*
+* @returns
+* none
+*
+* @remarks
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec,
+ WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ UWORD32 u4_cntrl,
+ WORD32 *pi4_tmp)
+{
+ /* Cntrl bits for 4x4 transforms
+ * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
+ * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
+ * : dc block must contain only single dc coefficient
+ * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
+ * : ie not (ac or dc)
+ */
+
+ UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
+
+ /* tmp registers for block ids */
+ WORD32 u4_blk_id;
+
+ /* Offsets for pointers */
+ WORD32 i4_offset_x, i4_offset_y;
+
+ /* Pointer to 4x4 blocks */
+ UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk;
+
+ /* Tmp register for pointer to dc coffs */
+ WORD16 *pi2_dc_src;
+
+ WORD16 i2_zero = 0;
+
+ /* Increment for dc block */
+ WORD32 i4_dc_inc;
+
+ /*
+ * Lets do the inverse transform for dc coeffs in chroma
+ */
+ if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA)
+ {
+ UWORD32 cntr, u4_dc_cntrl;
+ /* Do inv hadamard for u an v block */
+
+ ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat,
+ pu2_weigh_mat, qp_div, NULL);
+ /*
+ * Update the cntrl flag
+ * Flag is updated as follows bits 15-11 -> u block dc bits
+ */
+ u4_dc_cntrl = 0;
+ for (cntr = 0; cntr < 8; cntr++)
+ {
+ u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
+ }
+
+ /* Mark dc bits as 1 if corresponding ac bit is 0 */
+ u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
+ /* Combine both ac and dc bits */
+ u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA)
+ | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA);
+
+ /* Since we populated the dc coffs, we have to read them from there */
+ pi2_dc_src = pi2_src;
+ i4_dc_inc = 1;
+ }
+ else
+ {
+ u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA;
+ pi2_dc_src = &i2_zero;
+ i4_dc_inc = 0;
+ }
+
+ /* Get the block bits */
+ u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA);
+ u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16;
+ u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000;
+
+ /* The AC blocks starts from 2nd row */
+ pi2_src += src_strd;
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+ while (u4_blk_id < 8)
+ {
+ WORD32 dc_src_offset = u4_blk_id * i4_dc_inc;
+
+ IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc(
+ pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk,
+ pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0,
+ NULL, pi2_dc_src + dc_src_offset);
+ /* Get next DC block to process */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+ }
+
+ /* now process ac/mixed blocks */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+ while (u4_blk_id < 8)
+ {
+ WORD32 i4_src_offset = src_strd * u4_blk_id;
+ WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
+
+ IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset,
+ pu1_cur_4x4_prd_blk,
+ pu1_cur_4x4_out_blk,
+ pred_strd, out_strd,
+ pu2_iscale_mat,
+ pu2_weigh_mat, qp_div,
+ (WORD16 *) pi4_tmp,
+ pi2_dc_src + dc_src_offset);
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+ }
+
+ /* Now process empty blocks */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+ while (u4_blk_id < 8)
+ {
+ IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk,
+ pred_strd, out_strd, SIZE_4X4_BLK_VERT,
+ SIZE_4X4_BLK_HRZ);
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an i16x16 luma mb for entropy coding
+*
+* @par Description
+* An i16 macro block contains two classes of units, dc 4x4 block and
+* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
+* the 16 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 16 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] u4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_l
+* coded block pattern luma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out]
+* Control signal for inverse transform of 16x16 blocks
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb,
+ void **pv_mb_coeff_data,
+ WORD32 i4_res_strd,
+ UWORD8 *u1_cbp_l,
+ UWORD8 *pu1_nnz,
+ UWORD32 *pu4_cntrl)
+{
+ /* pointer to packed sub block buffer space */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order;
+
+ /* number of non zeros in sub block */
+ UWORD32 u4_nnz;
+
+ /* coeff scan order */
+ const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+ /* temp var */
+ UWORD32 coeff_cnt, mask, b4,u4_cntrl=0;
+
+ /*DC and AC coeff pointers*/
+ WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc;
+
+ /********************************************************/
+ /* pack dc coeff data for entropy coding */
+ /********************************************************/
+
+ pi2_res_mb_dc = pi2_res_mb;
+ pu1_scan_order = gu1_luma_scan_order_dc;
+
+ u4_nnz = *pu1_nnz;
+ u4_cntrl = 0;
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]])
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]];
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+ u4_cntrl = 0x00008000;// Set DC bit in ctrl code
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ /********************************************************/
+ /* pack ac coeff data for entropy coding */
+ /********************************************************/
+
+ pu1_nnz ++;
+ pu1_scan_order = gu1_luma_scan_order;
+ pi2_res_mb += i4_res_strd; /*Move to AC block*/
+
+ ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
+
+ for (b4 = 0; b4 < 16; b4++)
+ {
+ ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+ u4_nnz = pu1_nnz[u1_scan_order[b4]];
+
+ /* Jump according to the scan order */
+ pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
+
+ /*
+ * Since this is a i16x16 block, we should not count dc coeff on indi
+ * vidual 4x4 blocks to nnz. But due to the implementation of 16x16
+ * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that
+ * here
+ */
+ u4_nnz -= (pi2_res_mb_ac[0] != 0);
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]])
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]];
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+ *u1_cbp_l = 15;
+
+ u4_cntrl |= (1 << (31 - u1_scan_order[b4]));
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ }
+
+ if (!(*u1_cbp_l))
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
+ }
+
+ /* Store the cntrl signal */
+ (*pu4_cntrl) = u4_cntrl;
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an p16x16 luma mb for entropy coding
+*
+* @par Description
+* A p16x16 macro block contains two classes of units 16 4x4 ac blocks.
+* while packing the mb, the dc block is sent first, and
+* the 16 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 16 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] i4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_l
+* coded block pattern luma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out] pu4_cntrl
+* Control signal for inverse transform
+*
+* @return none
+*
+* @remarks Killing coffs not yet coded
+*
+******************************************************************************
+*/
+void ih264e_pack_l_mb(WORD16 *pi2_res_mb,
+ void **pv_mb_coeff_data,
+ WORD32 i4_res_strd,
+ UWORD8 *u1_cbp_l,
+ UWORD8 *pu1_nnz,
+ UWORD32 u4_thres_resi,
+ UWORD32 *pu4_cntrl)
+{
+ /* pointer to packed sub block buffer space */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
+
+ /* number of non zeros in sub block */
+ UWORD32 u4_nnz;
+
+ /* pointer to residual sub block */
+ WORD16 *pi2_res_sb;
+
+ /* coeff scan order */
+ const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+ /* coeff cost */
+ const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
+
+ /* temp var */
+ UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8;
+
+ /* temp var */
+ WORD32 i4_res_val, i4_run = -1, dcac_block;
+
+ /* When Hadamard transform is disabled, first row values are dont care, ignore them */
+ pi2_res_mb += i4_res_strd;
+
+ /* When Hadamard transform is disabled, first unit value is dont care, ignore this */
+ pu1_nnz ++;
+
+ ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+
+ for (b4 = 0; b4 < 16; b4++)
+ {
+ ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+ b8 = b4 >> 2;
+
+ u4_nnz = pu1_nnz[u1_scan_order[b4]];
+
+ /* Jump according to the scan order */
+ pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ /* number of runs of zero before, this is used to compute coeff cost */
+ i4_run++;
+
+ i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
+
+ if (i4_res_val)
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val;
+ u4_s_map |= mask;
+
+ if (u4_thres_resi)
+ {
+ /* compute coeff cost */
+ if (i4_res_val == 1 || i4_res_val == -1)
+ {
+ if (i4_run < 6)
+ u4_b8_coeff_cost += pu1_coeff_cost[i4_run];
+ }
+ else
+ u4_b8_coeff_cost += 9;
+
+ i4_run = -1;
+ }
+ }
+
+ mask <<= 1;
+ }
+
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+ /* cbp */
+ *u1_cbp_l |= (1 << b8);
+
+ /* Cntrl map for inverse transform computation
+ *
+ * If coeff_cnt is zero, it means that only nonzero was a dc coeff
+ * Hence we have to set the 16 - u1_scan_order[b4]) position instead
+ * of 31 - u1_scan_order[b4]
+ */
+ dcac_block = (coeff_cnt == 0)?16:31;
+ u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4]));
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ /* Decide if the 8x8 unit has to be sent for entropy coding? */
+ if ((b4+1) % 4 == 0)
+ {
+ if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) &&
+ (*u1_cbp_l & (1 << b8)) )
+ {
+
+
+ /*
+ * When we want to reset the full 8x8 block, we have to reset
+ * both the dc and ac coeff bits hence we have the symmetric
+ * arrangement of bits
+ */
+ const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033};
+
+ /* restore cbp */
+ *u1_cbp_l = (*u1_cbp_l & (~(1 << b8)));
+
+ /* correct cntrl flag */
+ u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]);
+
+ /* correct nnz */
+ pu1_nnz[u1_scan_order[b4 - 3]] = 0;
+ pu1_nnz[u1_scan_order[b4 - 2]] = 0;
+ pu1_nnz[u1_scan_order[b4 - 1]] = 0;
+ pu1_nnz[u1_scan_order[b4]] = 0;
+
+ /* reset blk cost */
+ u4_b8_coeff_cost = 0;
+ }
+
+ if (!(*u1_cbp_l & (1 << b8)))
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_b8;
+ }
+
+ u4_mb_coeff_cost += u4_b8_coeff_cost;
+
+ u4_b8_coeff_cost = 0;
+ i4_run = -1;
+ ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
+ }
+ }
+
+ if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD)
+ && (*u1_cbp_l))
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_mb;
+ *u1_cbp_l = 0;
+ u4_cntrl = 0;
+ memset(pu1_nnz, 0, 16);
+ }
+
+ (*pu4_cntrl) = u4_cntrl;
+
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an i8x8 chroma mb for entropy coding
+*
+* @par Description
+* An i8 chroma macro block contains two classes of units, dc 2x2 block and
+* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
+* the 4 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 4 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] u4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_c
+* coded block pattern chroma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out] pu1_nnz
+* Control signal for inverse transform
+*
+* @param[in] u4_swap_uv
+* Swaps the order of U and V planes in entropy bitstream
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_c_mb(WORD16 *pi2_res_mb,
+ void **pv_mb_coeff_data,
+ WORD32 i4_res_strd,
+ UWORD8 *u1_cbp_c,
+ UWORD8 *pu1_nnz,
+ UWORD32 u4_thres_resi,
+ UWORD32 *pu4_cntrl,
+ UWORD32 u4_swap_uv)
+{
+ /* pointer to packed sub block buffer space */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data);
+ tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac;
+
+ /* nnz pointer */
+ UWORD8 *pu1_nnz_ac, *pu1_nnz_dc;
+
+ /* nnz counter */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz;
+
+ /* pointer to residual sub block, res val */
+ WORD16 *pi2_res_sb, i2_res_val;
+
+ /* temp var */
+ UWORD32 coeff_cnt, mask, b4,plane;
+
+ /* temp var */
+ UWORD32 u4_coeff_cost;
+ WORD32 i4_run;
+
+ /* coeff cost */
+ const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
+
+ /* pointer to packed buffer space */
+ UWORD32 *pu4_mb_coeff_data = NULL;
+
+ /* ac coded block pattern */
+ UWORD8 u1_cbp_ac;
+
+ /* Variable to store the current bit pos in cntrl variable*/
+ UWORD32 cntrl_pos = 0;
+
+ /********************************************************/
+ /* pack dc coeff data for entropy coding */
+ /********************************************************/
+ pu1_scan_order = gu1_chroma_scan_order_dc;
+ pi2_res_sb = pi2_res_mb;
+ pu1_nnz_dc = pu1_nnz;
+ (*pu4_cntrl) = 0;
+ cntrl_pos = 15;
+ ps_mb_coeff_data_dc = (*pv_mb_coeff_data);
+
+ /* Color space conversion between SP_UV and SP_VU
+ * We always assume SP_UV for all the processing
+ * Hence to get proper stream output we need to swap U and V channels here
+ *
+ * For that there are two paths we need to look for
+ * One is the path to bitstream , these variables should have the proper input
+ * configured UV or VU
+ * For the other path the inverse transform variables should have ehat ever 0ordering the
+ * input had
+ */
+
+ if (u4_swap_uv)
+ {
+ pu1_nnz_dc += 5;/* Move to NNZ of V planve */
+ pi2_res_sb += 4;/* Move to DC coff of V plane */
+
+ cntrl_pos = 14; /* Control bit for V plane */
+ }
+
+ for (plane = 0; plane < 2; plane++)
+ {
+ ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+ u4_nnz = *pu1_nnz_dc;
+ /* write number of non zero coefficients U/V */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
+ if (i2_res_val)
+ {
+ /* write residue U/V */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map U/V */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+ *u1_cbp_c = 1;
+
+ (*pu4_cntrl) |= (1 << cntrl_pos);
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ if (u4_swap_uv)
+ {
+ cntrl_pos++; /* Control bit for U plane */
+ pu1_nnz_dc -= 5; /* Move to NNZ of U plane */
+ pi2_res_sb -= 4; /* Move to DC coff of U plane */
+
+ }
+ else
+ {
+ cntrl_pos--; /* Control bit for U plane */
+ pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */
+ pi2_res_sb += 4; /* Move to DC coff of V plane */
+ }
+ }
+
+ /********************************************************/
+ /* pack ac coeff data for entropy coding */
+ /********************************************************/
+
+ pu1_scan_order = gu1_chroma_scan_order;
+ ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
+
+ if (u4_swap_uv)
+ {
+ pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */
+ cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */
+ pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
+ }
+ else
+ {
+ pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */
+ cntrl_pos = 31;
+ pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */
+ }
+
+ for (plane = 0; plane < 2; plane++)
+ {
+ pu4_mb_coeff_data = (*pv_mb_coeff_data);
+
+ u4_coeff_cost = 0;
+ i4_run = -1;
+
+ /* get the current cbp, so that it automatically
+ * gets reverted in case of zero ac values */
+ u1_cbp_ac = *u1_cbp_c;
+
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+ u4_nnz = *pu1_nnz_ac;
+
+ /*
+ * We are scanning only ac coeffs, but the nnz is for the
+ * complete 4x4 block. Hence we have to discount the nnz contributed
+ * by the dc coefficient
+ */
+ u4_nnz -= (pi2_res_sb[0]!=0);
+
+ /* write number of non zero coefficients U/V */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
+
+ i4_run++;
+
+ if (i2_res_val)
+ {
+ /* write residue U/V */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
+ u4_s_map |= mask;
+
+ if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) )
+ {
+ /* compute coeff cost */
+ if (i2_res_val == 1 || i2_res_val == -1)
+ {
+ if (i4_run < 6)
+ u4_coeff_cost += pu1_coeff_cost[i4_run];
+ }
+ else
+ u4_coeff_cost += 9;
+
+ i4_run = -1;
+ }
+ }
+ mask <<= 1;
+ }
+
+ /* write significant coeff map U/V */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+ u1_cbp_ac = 2;
+
+ (*pu4_cntrl) |= 1 << cntrl_pos;
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ pu1_nnz_ac++;
+ pi2_res_sb += i4_res_strd;
+ cntrl_pos--;
+ }
+
+ /* reset block */
+ if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
+ {
+ pu4_mb_coeff_data[0] = 0;
+ pu4_mb_coeff_data[1] = 0;
+ pu4_mb_coeff_data[2] = 0;
+ pu4_mb_coeff_data[3] = 0;
+ (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4;
+
+ /* Generate the control signal */
+ /* Zero out the current plane's AC coefficients */
+ (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF);
+
+ /* Similarly do for the NNZ also */
+ *(pu1_nnz_ac - 4) = 0;
+ *(pu1_nnz_ac - 3) = 0;
+ *(pu1_nnz_ac - 2) = 0;
+ *(pu1_nnz_ac - 1) = 0;
+ }
+ else
+ {
+ *u1_cbp_c = u1_cbp_ac;
+ }
+
+ if (u4_swap_uv)
+ {
+ pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */
+ cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */
+ pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
+
+ pu1_nnz_ac = pu1_nnz + 1;
+ }
+ else
+ pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */
+ }
+
+ /* restore the ptr basing on cbp */
+ if (*u1_cbp_c == 0)
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_dc;
+ }
+ else if (*u1_cbp_c == 1)
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
+ }
+
+ return ;
+}
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i16x16
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i16x16, the mb is first
+* predicted using one of i16x16 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is transformed (hierarchical transform i.e., dct followed by hada-
+* -mard), quantized. The quantized coefficients are packed in scan order for
+* entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = NULL;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+ /* intra mode */
+ UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_l = 0;
+
+ /* number of non zero coeffs*/
+ UWORD32 au4_nnz[5];
+ UWORD8 *pu1_nnz = (UWORD8 *)au4_nnz;
+
+ /*Cntrol signal for itrans*/
+ UWORD32 u4_cntrl;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* init nnz */
+ au4_nnz[0] = 0;
+ au4_nnz[1] = 0;
+ au4_nnz[2] = 0;
+ au4_nnz[3] = 0;
+ au4_nnz[4] = 0;
+
+ if (u1_intra_mode == PLANE_I16x16)
+ {
+ pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane;
+ }
+ else
+ {
+ pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16;
+ }
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+ pu1_pred_mb, pi2_res_mb,
+ i4_src_strd, i4_pred_strd,
+ i4_res_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ pu1_nnz, ENABLE_DC_TRANSFORM);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
+ pu1_nnz, &u4_cntrl);
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+ /*
+ *if refernce frame is not to be computed
+ *we only need the right and bottom border 4x4 blocks to predict next intra
+ *blocks, hence only compute them
+ */
+ if (!ps_proc->u4_compute_recon)
+ {
+ u4_cntrl &= 0x111F8000;
+ }
+
+ if (u4_cntrl)
+ {
+ ih264e_luma_16x16_idctrans_iquant_itrans_recon(
+ ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
+ i4_res_strd, i4_pred_strd, i4_rec_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
+ u4_cntrl, ENABLE_DC_TRANSFORM,
+ ps_proc->pv_scratch_buff);
+ }
+ else
+ {
+ ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd,
+ i4_rec_strd, MB_SIZE, MB_SIZE, NULL,
+ 0);
+ }
+
+ return (u1_cbp_l);
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i4x4, the mb is first
+* predicted using one of i4x4 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is dct transformed and quantized. The quantized coefficients are
+* packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* pointer to neighbors: left, top, top-left */
+ UWORD8 *pu1_mb_a;
+ UWORD8 *pu1_mb_b;
+ UWORD8 *pu1_mb_c;
+ UWORD8 *pu1_mb_d;
+
+ /* intra mode */
+ UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_l = 0;
+
+ /* number of non zero coeffs*/
+ UWORD8 u1_nnz;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* pointer to packed mb coeff data */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
+
+ /*Dummy variable for 4x4 trans fucntion*/
+ WORD16 i2_dc_dummy;
+
+ /* temp var */
+ UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask;
+
+ /* Process 16 4x4 lum sub-blocks of the MB in scan order */
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3;
+ u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3;
+
+ /* if in case cbp for the 8x8 block is zero, send no residue */
+ ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
+
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ /* index of pel in MB */
+ u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2);
+ u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2);
+
+ /* Initialize source and reference pointers */
+ pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd);
+ pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd);
+
+ /* pointer to left of ref macro block */
+ pu1_mb_a = pu1_ref_mb - 1;
+ /* pointer to top of ref macro block */
+ pu1_mb_b = pu1_ref_mb - i4_rec_strd;
+ /* pointer to topright of ref macro block */
+ pu1_mb_c = pu1_mb_b + 4;
+ /* pointer to topleft macro block */
+ pu1_mb_d = pu1_mb_b - 1;
+
+ /* compute neighbor availability */
+ i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
+
+ /* sub block intra mode */
+ u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4];
+
+ /********************************************************/
+ /* gather prediction pels from neighbors for prediction */
+ /********************************************************/
+ /* left pels */
+ if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK)
+ {
+ for (i = 0; i < 4; i++)
+ pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4, 0, 4);
+ }
+
+ /* top pels */
+ if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4 + 5, 0, 4);
+ }
+ /* top left pels */
+ if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK)
+ {
+ pu1_ngbr_pels_i4[4] = *pu1_mb_d;
+ }
+ else
+ {
+ pu1_ngbr_pels_i4[4] = 0;
+ }
+ /* top right pels */
+ if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK)
+ {
+ memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4);
+ }
+ else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
+ {
+ memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4);
+ }
+
+ /********************************************************/
+ /* prediction */
+ /********************************************************/
+ (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4,
+ pu1_pred_mb, 0,
+ i4_pred_strd,
+ i4_ngbr_avbl);
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb,
+ pi2_res_mb, i4_src_strd,
+ i4_pred_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ &u1_nnz, &i2_dc_dummy);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ps_mb_coeff_data = *pv_mb_coeff_data;
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz;
+
+ if (u1_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++)
+ {
+ if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+
+ /* update ptr to coeff data */
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+ /* cbp */
+ u1_cbp_l |= (1 << b8);
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+ /* If the frame is not to be used for P frame reference or dumping recon
+ * we only will use the recon for only predicting intra Mbs
+ * This will need only right and bottom edge 4x4 blocks recon
+ * Hence we selectively enable them
+ */
+ if (ps_proc->u4_compute_recon || (0xF888 & (1 << ((b8 << 2) + b4))))
+ {
+ if (u1_nnz)
+ ps_codec->pf_iquant_itrans_recon_4x4(
+ pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
+ /*No input stride,*/i4_pred_strd,
+ i4_rec_strd, ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat,
+ ps_qp_params->u1_qp_div,
+ ps_proc->pv_scratch_buff, 0, 0);
+ else
+ ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb,
+ i4_pred_strd, i4_rec_strd,
+ BLK_SIZE, BLK_SIZE, NULL,
+ 0);
+ }
+
+ }
+
+ /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
+ if (!(u1_cbp_l & (1 << b8)))
+ {
+ *pv_mb_coeff_data = ps_mb_coeff_data_b8;
+ }
+ }
+
+ return (u1_cbp_l);
+}
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i4x4, the mb is first
+* predicted using one of i4x4 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is dct transformed and quantized. The quantized coefficients are
+* packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4;
+
+ /* pointer to recon buffer */
+ UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
+
+ /* strides */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* number of non zero coeffs*/
+ UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_l = 0;
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* pointer to packed mb coeff data */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
+
+ /* temp var */
+ UWORD32 b8, b4, coeff_cnt, mask;
+
+ /* Process 16 4x4 lum sub-blocks of the MB in scan order */
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ /* if in case cbp for the 8x8 block is zero, send no residue */
+ ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
+
+ for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
+ {
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ps_mb_coeff_data = *pv_mb_coeff_data;
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz;
+
+ if (*pu1_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++)
+ {
+ if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+
+ /* update ptr to coeff data */
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+ /* cbp */
+ u1_cbp_l |= (1 << b8);
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+ }
+
+ /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
+ if (!(u1_cbp_l & (1 << b8)))
+ {
+ *pv_mb_coeff_data = ps_mb_coeff_data_b8;
+ }
+ }
+
+ /* memcpy recon */
+ ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0);
+
+ return (u1_cbp_l);
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief performs chroma core coding for intra macro blocks
+*
+* @par Description:
+* If the current MB is to be intra coded with mb type chroma I8x8, the MB is
+* first predicted using intra 8x8 prediction filters. The predicted data is
+* compared with the input for error and the error is transformed. The DC
+* coefficients of each transformed sub blocks are further transformed using
+* Hadamard transform. The resulting coefficients are quantized, packed and sent
+* for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_c
+* coded block pattern chroma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = NULL;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+ /* intra mode */
+ UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_c = 0;
+
+ /* number of non zero coeffs*/
+ UWORD8 au1_nnz[18] = {0};
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
+
+ /* Control signal for inverse transform */
+ UWORD32 u4_cntrl;
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* See if we need to swap U and V plances for entropy */
+ UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
+
+ if (PLANE_CH_I8x8 == u1_intra_mode)
+ {
+ pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane;
+ }
+ else
+ {
+ pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
+ }
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+ pu1_pred_mb, pi2_res_mb,
+ i4_src_strd, i4_pred_strd,
+ i4_res_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ au1_nnz);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
+ au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+ ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb,
+ pu1_pred_mb, pu1_ref_mb,
+ i4_res_strd, i4_pred_strd,
+ i4_rec_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat,
+ ps_qp_params->u1_qp_div,
+ u4_cntrl,
+ ps_proc->pv_scratch_buff);
+ return (u1_cbp_c);
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when mode is inter
+*
+* @par Description:
+* If the current mb is to be coded as inter the mb is predicted based on the
+* sub mb partitions and corresponding motion vectors generated by ME. Then,
+* error is computed between the input blk and the estimated blk. This error is
+* transformed, quantized. The quantized coefficients are packed in scan order
+* for entropy coding
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_l = 0;
+
+ /*Control signal of itrans*/
+ UWORD32 u4_cntrl;
+
+ /* number of non zero coeffs*/
+ UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* pseudo pred buffer */
+ UWORD8 *pu1_pseudo_pred = pu1_pred_mb;
+
+ /* pseudo pred buffer stride */
+ WORD32 i4_pseudo_pred_strd = i4_pred_strd;
+
+ /* init nnz */
+ ps_proc->au4_nnz[0] = 0;
+ ps_proc->au4_nnz[1] = 0;
+ ps_proc->au4_nnz[2] = 0;
+ ps_proc->au4_nnz[3] = 0;
+ ps_proc->au4_nnz[4] = 0;
+
+ /********************************************************/
+ /* prediction */
+ /********************************************************/
+ ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd);
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0)
+ {
+ ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+ pu1_pseudo_pred, pi2_res_mb,
+ i4_src_strd,
+ i4_pseudo_pred_strd,
+ i4_res_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ pu1_nnz,
+ DISABLE_DC_TRANSFORM);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
+ pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl);
+ }
+ else
+ {
+ u1_cbp_l = 0;
+ u4_cntrl = 0;
+ }
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+
+ /*If the frame is not to be used for P frame reference or dumping recon
+ * we only will use the reocn for only predicting intra Mbs
+ * THis will need only right and bottom edge 4x4 blocks recon
+ * Hence we selectively enable them using control signal(including DC)
+ */
+ if (ps_proc->u4_compute_recon != 1)
+ {
+ u4_cntrl &= 0x111F0000;
+ }
+
+ if (u4_cntrl)
+ {
+ ih264e_luma_16x16_idctrans_iquant_itrans_recon(
+ ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb,
+ i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
+ u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM,
+ ps_proc->pv_scratch_buff);
+ }
+ else
+ {
+ ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb,
+ i4_pseudo_pred_strd, i4_rec_strd,
+ MB_SIZE, MB_SIZE, NULL, 0);
+ }
+
+
+ return (u1_cbp_l);
+}
+
+/**
+*******************************************************************************
+*
+* @brief performs chroma core coding for inter macro blocks
+*
+* @par Description:
+* If the current mb is to be coded as inter predicted mb,based on the sub mb partitions
+* and corresponding motion vectors generated by ME ,prediction is done.
+* Then, error is computed between the input blk and the estimated blk.
+* This error is transformed , quantized. The quantized coefficients
+* are packed in scan order for
+* entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern chroma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_c = 0;
+
+ /*Control signal for inverse transform*/
+ UWORD32 u4_cntrl;
+
+ /* number of non zero coeffs*/
+ UWORD8 au1_nnz[10] = {0};
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /*See if we need to swap U and V plances for entropy*/
+ UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
+
+ /********************************************************/
+ /* prediction */
+ /********************************************************/
+ ih264e_motion_comp_chroma(ps_proc);
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+ pu1_pred_mb, pi2_res_mb,
+ i4_src_strd, i4_pred_strd,
+ i4_res_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ au1_nnz);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
+ au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+
+ /* If the frame is not to be used for P frame reference or dumping recon
+ * we only will use the reocn for only predicting intra Mbs
+ * THis will need only right and bottom edge 4x4 blocks recon
+ * Hence we selectively enable them using control signal(including DC)
+ */
+ if (!ps_proc->u4_compute_recon)
+ {
+ u4_cntrl &= 0x7700C000;
+ }
+
+ if (u4_cntrl)
+ {
+ ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
+ ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb,
+ i4_res_strd, i4_pred_strd, i4_rec_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
+ u4_cntrl, ps_proc->pv_scratch_buff);
+ }
+ else
+ {
+ ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd,
+ i4_rec_strd, MB_SIZE >> 1, MB_SIZE,
+ NULL, 0);
+ }
+
+ return (u1_cbp_c);
+}
diff --git a/encoder/ih264e_core_coding.h b/encoder/ih264e_core_coding.h
new file mode 100755
index 0000000..1237d25
--- /dev/null
+++ b/encoder/ih264e_core_coding.h
@@ -0,0 +1,653 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_core_coding.h
+*
+* @brief
+* This file contains extern declarations of core coding routines
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_CORE_CODING_H_
+#define IH264E_CORE_CODING_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Enable/Disable Hadamard transform of DC Coeff's
+******************************************************************************
+ */
+#define DISABLE_DC_TRANSFORM 0
+#define ENABLE_DC_TRANSFORM 1
+
+/**
+*******************************************************************************
+ * @brief bit masks for DC and AC control flags
+*******************************************************************************
+ */
+
+#define DC_COEFF_CNT_LUMA_MB 16
+#define NUM_4X4_BLKS_LUMA_MB_ROW 4
+#define NUM_LUMA4x4_BLOCKS_IN_MB 16
+#define NUM_CHROMA4x4_BLOCKS_IN_MB 8
+
+#define SIZE_4X4_BLK_HRZ TRANS_SIZE_4
+#define SIZE_4X4_BLK_VERT TRANS_SIZE_4
+
+#define CNTRL_FLAG_DC_MASK_LUMA 0x0000FFFF
+#define CNTRL_FLAG_AC_MASK_LUMA 0xFFFF0000
+
+#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000
+#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000
+
+#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000
+#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00
+
+#define CNTRL_FLAG_AC_MASK_CHROMA ( CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V )
+#define CNTRL_FLAG_DC_MASK_CHROMA ( CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V )
+
+#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000
+
+/**
+*******************************************************************************
+ * @brief macros for transforms
+*******************************************************************************
+ */
+#define DEQUEUE_BLKID_FROM_CONTROL( u4_cntrl, blk_lin_id) \
+{ \
+ blk_lin_id = CLZ(u4_cntrl); \
+ u4_cntrl &= (0x7FFFFFFF >> blk_lin_id); \
+};
+
+#define IND2SUB_LUMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \
+{ \
+ i4_offset_x = (u4_blk_id % 4) << 2; \
+ i4_offset_y = (u4_blk_id / 4) << 2; \
+}
+
+#define IND2SUB_CHROMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \
+{ \
+ i4_offset_x = ((u4_blk_id & 0x1 ) << 3) + (u4_blk_id > 3); \
+ i4_offset_y = (u4_blk_id & 0x2) << 1; \
+}
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs does the DCT transform then Hadamard transform
+* and quantization for a macroblock when the mb mode is intra 16x16 mode
+*
+* @par Description:
+* First cf4 is done on all 16 4x4 blocks of the 16x16 input block.
+* Then hadamard transform is done on the DC coefficients
+* Quantization is then performed on the 16x16 block, 4x4 wise
+*
+* @param[in] pu1_src
+* Pointer to source sub-block
+*
+* @param[in] pu1_pred
+* Pointer to prediction sub-block
+*
+* @param[in] pi2_out
+* Pointer to residual sub-block
+* The output will be in linear format
+* The first 16 continuous locations will contain the values of Dc block
+* After DC block and a stride 1st AC block will follow
+* After one more stride next AC block will follow
+* The blocks will be in raster scan order
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* Prediction stride
+*
+* @param[in] dst_strd
+* Destination stride
+*
+* @param[in] pu2_scale_matrix
+* The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+* Threshold matrix
+*
+* @param[in] u4_qbits
+* 15+QP/6
+*
+* @param[in] u4_round_factor
+* Round factor for quant
+*
+* @param[out] pu1_nnz
+* Memory to store the non-zeros after transform
+* The first byte will be the nnz of DC block
+* From the next byte the AC nnzs will be stored in raster scan order
+*
+* @param u4_dc_flag
+* Signals if Dc transform is to be done or not
+* 1 -> Dc transform will be done
+* 0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_resi_trans_dctrans_quant(
+ codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred,
+ WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
+ WORD32 dst_strd, const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+ UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag);
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs the intra 16x16 inverse transform process for H264
+* it includes inverse Dc transform, inverse quant and then inverse transform
+*
+* @par Description:
+*
+* @param[in] pi2_src
+* Input data, 16x16 size
+* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
+* after a stride 1st AC clock will be present again in raster can order
+* Then each AC block of the 16x16 block will follow in raster scan order
+*
+* @param[in] pu1_pred
+* The predicted data, 16x16 size
+* Block by block form
+*
+* @param[in] pu1_out
+* Output 16x16
+* In block by block form
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* input stride for prediction buffer
+*
+* @param[in] out_strd
+* input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+* Inverse quantization matrix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+* weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+* QP/6
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least 20 in size
+*
+* @param[in] pu4_cntrl
+* Controls the transform path
+* total Last 17 bits are used
+* the 16th th bit will correspond to DC block
+* and 32-17 will correspond to the ac blocks in raster scan order
+* bit equaling zero indicates that the entire 4x4 block is zero for DC
+* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
+*
+* @returns
+* none
+*
+* @remarks
+* The all zero case must be taken care outside
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_idctrans_iquant_itrans_recon(
+ codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred,
+ UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
+ WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl,
+ UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp);
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs does the DCT transform then Hadamard transform
+* and quantization for a chroma macroblock
+*
+* @par Description:
+* First cf4 is done on all 16 4x4 blocks of the 8x8input block
+* Then hadamard transform is done on the DC coefficients
+* Quantization is then performed on the 8x8 block, 4x4 wise
+*
+* @param[in] pu1_src
+* Pointer to source sub-block
+* The input is in interleaved format for two chroma planes
+*
+* @param[in] pu1_pred
+* Pointer to prediction sub-block
+* Prediction is in inter leaved format
+*
+* @param[in] pi2_out
+* Pointer to residual sub-block
+* The output will be in linear format
+* The first 4 continuous locations will contain the values of DC block for U
+* and then next 4 will contain for V.
+* After DC block and a stride 1st AC block of U plane will follow
+* After one more stride next AC block of V plane will follow
+* The blocks will be in raster scan order
+*
+* After all the AC blocks of U plane AC blocks of V plane will follow in exact
+* same way
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* Prediction stride
+*
+* @param[in] dst_strd
+* Destination stride
+*
+* @param[in] pu2_scale_matrix
+* The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+* Threshold matrix
+*
+* @param[in] u4_qbits
+* 15+QP/6
+*
+* @param[in] u4_round_factor
+* Round factor for quant
+*
+* @param[out] pu1_nnz
+* Memory to store the non-zeros after transform
+* The first byte will be the nnz od DC block for U plane
+* From the next byte the AC nnzs will be storerd in raster scan order
+* The fifth byte will be nnz of Dc block of V plane
+* Then Ac blocks will follow
+*
+* @param u4_dc_flag
+* Signals if Dc transform is to be done or not
+* 1 -> Dc transform will be done
+* 0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_resi_trans_dctrans_quant(
+ codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred,
+ WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
+ WORD32 out_strd, const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+ UWORD32 u4_round_factor, UWORD8 *pu1_nnz_c);
+
+/**
+*******************************************************************************
+* @brief
+* This function performs the inverse transform with process for chroma MB of H264
+*
+* @par Description:
+* Does inverse DC transform ,inverse quantization inverse transform
+*
+* @param[in] pi2_src
+* Input data, 16x16 size
+* The input is in the form of, first 4 locations will contain DC coeffs of
+* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
+* in raster scan order will follow, each block as linear array in raster scan order.
+* After a stride next AC block will follow. After all AC blocks of U plane
+* V plane AC blocks will follow in exact same order.
+*
+* @param[in] pu1_pred
+* The predicted data, 8x16 size, U and V interleaved
+*
+* @param[in] pu1_out
+* Output 8x16, U and V interleaved
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* input stride for prediction buffer
+*
+* @param[in] out_strd
+* input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+* Inverse quantization martix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+* weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+* QP/6
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
+* in size
+*
+* @param[in] pu4_cntrl
+* Controls the transform path
+* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
+* 32-28 bits will indicate AC blocks of U plane in raster scan order
+* 27-23 bits will indicate AC blocks of V plane in rater scan order
+* The bit 1 implies that there is at least one non zero coff in a block
+*
+* @returns
+* none
+*
+* @remarks
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
+ codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred,
+ UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
+ WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl,
+ WORD32 *pi4_tmp);
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an i16x16 luma mb for entropy coding
+*
+* @par Description
+* An i16 macro block contains two classes of units, dc 4x4 block and
+* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
+* the 16 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 16 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] u4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_l
+* coded block pattern luma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out]
+* Control signal for inverse transform of 16x16 blocks
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, void **pv_mb_coeff_data,
+ WORD32 i4_res_strd, UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz,
+ UWORD32 *pu4_cntrl);
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an i8x8 chroma mb for entropy coding
+*
+* @par Description
+* An i8 chroma macro block contains two classes of units, dc 2x2 block and
+* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
+* the 4 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 4 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] u4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_c
+* coded block pattern chroma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out] pu1_nnz
+* Control signal for inverse transform
+*
+* @param[in] u4_swap_uv
+* Swaps the order of U and V planes in entropy bitstream
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_c_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data,
+ WORD32 i4_res_strd, UWORD8 *u1_cbp_c, UWORD8 *pu1_nnz,
+ UWORD32 u4_kill_coffs_flag, UWORD32 *pu4_cntrl,
+ UWORD32 u4_swap_uv);
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i16x16
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i16x16, the mb is first
+* predicted using one of i16x16 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is transformed (hierarchical transform i.e., dct followed by hada-
+* -mard), quantized. The quantized coefficients are packed in scan order for
+* entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_16x16
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i4x4, the mb is first
+* predicted using one of i4x4 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is dct transformed and quantized. The quantized coefficients are
+* packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i4x4, the mb is first
+* predicted using one of i4x4 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is dct transformed and quantized. The quantized coefficients are
+* packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief performs chroma core coding for intra macro blocks
+*
+* @par Description:
+* If the current MB is to be intra coded with mb type chroma I8x8, the MB is
+* first predicted using intra 8x8 prediction filters. The predicted data is
+* compared with the input for error and the error is transformed. The DC
+* coefficients of each transformed sub blocks are further transformed using
+* Hadamard transform. The resulting coefficients are quantized, packed and sent
+* for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_c
+* coded block pattern chroma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_intra_macroblock_8x8
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+* @brief performs luma core coding when mode is inter
+*
+* @par Description:
+* If the current mb is to be coded as inter predicted mb,based on the sub mb
+* partitions and corresponding motion vectors generated by ME, prediction is done.
+* Then, error is computed between the input blk and the estimated blk.
+* This error is transformed ( dct and with out hadamard), quantized. The
+* quantized coefficients are packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_inter_macroblock_16x16
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+* @brief performs chroma core coding for inter macro blocks
+*
+* @par Description:
+* If the current mb is to be coded as inter predicted mb, based on the sub mb
+* partitions and corresponding motion vectors generated by ME, prediction is done.
+* Then, error is computed between the input blk and the estimated blk.
+* This error is transformed, quantized. The quantized coefficients
+* are packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_inter_macroblock_8x8
+ (
+ process_ctxt_t *ps_proc
+ );
+
+#endif /* IH264E_CORE_CODING_H_ */
diff --git a/encoder/ih264e_deblk.c b/encoder/ih264e_deblk.c
new file mode 100755
index 0000000..8a11bdb
--- /dev/null
+++ b/encoder/ih264e_deblk.c
@@ -0,0 +1,854 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_deblk.c
+ *
+ * @brief
+ * This file contains functions that are associated with deblocking
+ *
+ * @author
+ * ittiam
+ *
+ * @par List of Functions:
+ * - ih264e_fill_bs_1mv_1ref_non_mbaff
+ * - ih264e_calculate_csbp
+ * - ih264e_compute_bs
+ * - ih264e_filter_top_edge
+ * - ih264e_filter_left_edge
+ * - ih264e_deblock_mb
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_macros.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264_trans_data.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_deblk_tables.h"
+#include "ih264e_deblk.h"
+
+
+/*****************************************************************************/
+/* Extern global definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief BS Table Lookup
+* input :
+* output :
+* @remarks none
+******************************************************************************
+*/
+static const UWORD32 gu4_bs_table[][16] =
+{
+ {
+ 0x00000000, 0x02000000, 0x00020000, 0x02020000,
+ 0x00000200, 0x02000200, 0x00020200, 0x02020200,
+ 0x00000002, 0x02000002, 0x00020002, 0x02020002,
+ 0x00000202, 0x02000202, 0x00020202, 0x02020202
+ },
+ {
+ 0x01010101, 0x02010101, 0x01020101, 0x02020101,
+ 0x01010201, 0x02010201, 0x01020201, 0x02020201,
+ 0x01010102, 0x02010102, 0x01020102, 0x02020102,
+ 0x01010202, 0x02010202, 0x01020202, 0x02020202
+ }
+};
+
+/**
+******************************************************************************
+* @brief Transpose Matrix used in BS
+* input :
+* output :
+* @remarks none
+******************************************************************************
+*/
+static const UWORD16 ih264e_gu2_4x4_v2h_reorder[16] =
+{
+ 0x0000, 0x0001, 0x0010, 0x0011,
+ 0x0100, 0x0101, 0x0110, 0x0111,
+ 0x1000, 0x1001, 0x1010, 0x1011,
+ 0x1100, 0x1101, 0x1110, 0x1111
+};
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Fill BS value for all the edges of an mb
+*
+* @par Description:
+* Fill BS value for all the edges of an mb
+*
+* @param[in] pu4_horz_bs
+* Base pointer of horizontal BS table
+*
+* @param[in] pu4_vert_bs
+* Base pointer of vertical BS table
+*
+* @param[in] u4_left_mb_csbp
+* coded sub block pattern of left mb
+*
+* @param[in] u4_left_mb_csbp
+* coded sub block pattern of top mb
+*
+* @param[in] ps_leftMvPred
+* MV of left mb
+*
+* @param[in] ps_topMvPred
+* MV of top mb
+*
+* @param[in] ps_curMvPred
+* MV of curr mb
+*
+* @param[in] u1_left_intra
+* is left intra
+*
+* @param[in] u1_top_intra
+* is top intra
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static void ih264e_fill_bs_1mv_1ref_non_mbaff(UWORD32 *pu4_horz_bs,
+ UWORD32 *pu4_vert_bs,
+ UWORD32 u4_left_mb_csbp,
+ UWORD32 u4_top_mb_csbp,
+ UWORD32 u4_cur_mb_csbp,
+ mv_t *ps_leftMvPred,
+ mv_t *ps_topMvPred,
+ mv_t *ps_curMvPred,
+ UWORD8 u1_left_intra,
+ UWORD8 u1_top_intra)
+{
+ /* motion vectors of blks p & q */
+ WORD16 i16_qMv0, i16_qMv1, i16_pMv0, i16_pMv1;
+
+ /* temp var */
+ UWORD32 u4_lft_flag, u4_top_flag;
+ const UWORD32 *bs_map;
+ UWORD32 u4_reordered_vert_bs_enc, u4_temp;
+
+ /* Coded Pattern for Horizontal Edge */
+ /*-----------------------------------------------------------------------*/
+ /*u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */
+ /*-----------------------------------------------------------------------*/
+ UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12);
+ UWORD32 u4_horz_bs_enc = u4_cur_mb_csbp | u4_nbr_horz_csbp;
+
+ /* Coded Pattern for Vertical Edge */
+ /*-----------------------------------------------------------------------*/
+ /*u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0 */
+ /*-----------------------------------------------------------------------*/
+ UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK;
+
+ /*-----------------------------------------------------------------------*/
+ /*u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */
+ /*-----------------------------------------------------------------------*/
+ UWORD32 u4_cur_mb_masked_csbp =(u4_cur_mb_csbp<<1)&(~CSBP_LEFT_BLOCK_MASK);
+
+ /*-----------------------------------------------------------------------*/
+ /*u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */
+ /*-----------------------------------------------------------------------*/
+ UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp) | (u4_left_mb_masked_csbp >> 3);
+ UWORD32 u4_vert_bs_enc = u4_cur_mb_csbp | u4_nbr_vert_csbp;
+
+ /* BS Calculation for MB Boundary Edges */
+
+ /* BS calculation for 1 2 3 horizontal boundary */
+ bs_map = gu4_bs_table[0];
+ pu4_horz_bs[1] = bs_map[(u4_horz_bs_enc >> 4) & 0xF];
+ pu4_horz_bs[2] = bs_map[(u4_horz_bs_enc >> 8) & 0xF];
+ pu4_horz_bs[3] = bs_map[(u4_horz_bs_enc >> 12) & 0xF];
+
+ /* BS calculation for 5 6 7 vertical boundary */
+ /* Do 4x4 tranpose of u4_vert_bs_enc by using look up table for reorder */
+ u4_reordered_vert_bs_enc = ih264e_gu2_4x4_v2h_reorder[u4_vert_bs_enc & 0xF];
+
+ u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 4) & 0xF];
+ u4_reordered_vert_bs_enc |= (u4_temp << 1);
+
+ u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 8) & 0xF];
+ u4_reordered_vert_bs_enc |= (u4_temp << 2);
+
+ u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 12) & 0xF];
+ u4_reordered_vert_bs_enc |= (u4_temp << 3);
+
+ pu4_vert_bs[1] = bs_map[(u4_reordered_vert_bs_enc >> 4) & 0xF];
+ pu4_vert_bs[2] = bs_map[(u4_reordered_vert_bs_enc >> 8) & 0xF];
+ pu4_vert_bs[3] = bs_map[(u4_reordered_vert_bs_enc >> 12) & 0xF];
+
+
+ /* BS Calculation for MB Boundary Edges */
+ i16_qMv0 = ps_curMvPred->i2_mvx;
+ i16_qMv1 = ps_curMvPred->i2_mvy;
+
+ if (u1_top_intra)
+ {
+ pu4_horz_bs[0] = 0x04040404;
+ }
+ else
+ {
+ i16_pMv0 = ps_topMvPred->i2_mvx;
+ i16_pMv1 = ps_topMvPred->i2_mvy;
+
+ u4_top_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) |
+ (ABS((i16_pMv1 - i16_qMv1)) >= 4);
+
+ bs_map = gu4_bs_table[!!u4_top_flag];
+ pu4_horz_bs[0] = bs_map[u4_horz_bs_enc & 0xF];
+ }
+
+ if (u1_left_intra)
+ {
+ pu4_vert_bs[0] = 0x04040404;
+ }
+ else
+ {
+ i16_pMv0 = ps_leftMvPred->i2_mvx;
+ i16_pMv1 = ps_leftMvPred->i2_mvy;
+
+
+ u4_lft_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) |
+ (ABS((i16_pMv1 - i16_qMv1)) >= 4);
+
+ bs_map = gu4_bs_table[!!u4_lft_flag];
+ pu4_vert_bs[0] = bs_map[u4_reordered_vert_bs_enc & 0xF];
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief calculate coded subblock pattern from nnz
+*
+* @par Description:
+* calculate coded subblock pattern from nnz
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns csbp
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static UWORD32 ih264e_calculate_csbp(process_ctxt_t *ps_proc)
+{
+ /* number of non zeros for each tx blk */
+ UWORD8 *pu1_curr_nnz = (UWORD8 *)ps_proc->au4_nnz;
+
+ /* csbp */
+ UWORD32 u4_csbp = 0;
+
+ /* temp var */
+ WORD32 i4_i;
+
+ pu1_curr_nnz += 1;
+
+ /* Creating Subblock pattern for current MB */
+ /* 15C|14C|13C|12C|11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C */
+ for (i4_i = 0; i4_i < 16; i4_i++ )
+ {
+ u4_csbp |= ((!!*(pu1_curr_nnz + i4_i))<< i4_i);
+ }
+
+ return u4_csbp;
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function computes blocking strength for an mb
+*
+* @par Description:
+* This function computes blocking strength for an mb
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns none
+*
+* @remarks In this module it is assumed that their is only single reference
+* frame and is always the most recently used anchor frame
+*
+*******************************************************************************
+*/
+void ih264e_compute_bs(process_ctxt_t * ps_proc)
+{
+ /* deblk bs context */
+ bs_ctxt_t *ps_bs = &(ps_proc->s_deblk_ctxt.s_bs_ctxt);
+
+ /* vertical blocking strength */
+ UWORD32 *pu4_pic_vert_bs;
+
+ /* horizontal blocking strength */
+ UWORD32 *pu4_pic_horz_bs;
+
+ /* mb indices */
+ WORD32 i4_mb_x, i4_mb_y;
+
+ /* is intra */
+ WORD32 i4_intra;
+
+ /* temp var */
+ WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ /* init indices */
+ i4_mb_x = ps_bs->i4_mb_x;
+ i4_mb_y = ps_bs->i4_mb_y;
+
+ /* init pointers */
+ pu4_pic_vert_bs = ps_bs->pu4_pic_vert_bs + ((i4_mb_y * i4_wd_mbs) + i4_mb_x) * 4;
+ pu4_pic_horz_bs = ps_bs->pu4_pic_horz_bs + ((i4_mb_y * i4_wd_mbs) + i4_mb_x) * 4;
+
+ /* is intra? */
+ i4_intra = ps_proc->u4_is_intra;
+
+ /* compute blocking strength */
+ if (i4_intra)
+ {
+ pu4_pic_vert_bs[0] = 0x04040404;
+ pu4_pic_vert_bs[1] = pu4_pic_vert_bs[2] = pu4_pic_vert_bs[3] = 0x03030303;
+
+ pu4_pic_horz_bs[0] = 0x04040404;
+ pu4_pic_horz_bs[1] = pu4_pic_horz_bs[2] = pu4_pic_horz_bs[3] = 0x03030303;
+ }
+ else
+ {
+ /* left mb syntax info */
+ mb_info_t *ps_left_mb_syntax_ele = &ps_proc->s_left_mb_syntax_ele;
+
+ /* top mb syntax info */
+ mb_info_t *ps_top_mb_syntax_ele = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
+
+ /* top row motion vector info */
+ enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
+
+ /* csbp for curr mb */
+ ps_proc->u4_csbp = ih264e_calculate_csbp(ps_proc);
+
+ /* csbp for ngbrs */
+ if (i4_mb_x == 0)
+ {
+ ps_left_mb_syntax_ele->u4_csbp = 0;
+ ps_left_mb_syntax_ele->u2_is_intra = 0;
+ ps_proc->s_left_mb_pu.s_l0_mv = ps_proc->ps_pu->s_l0_mv;
+ }
+ if (i4_mb_y == 0)
+ {
+ ps_top_mb_syntax_ele->u4_csbp = 0;
+ ps_top_mb_syntax_ele->u2_is_intra = 0;
+ ps_top_row_pu->s_l0_mv = ps_proc->ps_pu->s_l0_mv;
+ }
+
+ ih264e_fill_bs_1mv_1ref_non_mbaff(pu4_pic_horz_bs,
+ pu4_pic_vert_bs,
+ ps_left_mb_syntax_ele->u4_csbp,
+ ps_top_mb_syntax_ele->u4_csbp,
+ ps_proc->u4_csbp,
+ &ps_proc->s_left_mb_pu.s_l0_mv,
+ &ps_top_row_pu->s_l0_mv,
+ &ps_proc->ps_pu->s_l0_mv,
+ ps_left_mb_syntax_ele->u2_is_intra,
+ ps_top_mb_syntax_ele->u2_is_intra);
+ }
+
+ return ;
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking of top horizontal edge
+*
+* @par Description:
+* This function performs deblocking of top horizontal edge
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @param[in] ps_proc
+* pointer to proc context
+*
+* @param[in] pu1_mb_qp
+* pointer to mb quantization param
+*
+* @param[in] pu1_cur_pic_luma
+* pointer to recon buffer luma
+*
+* @param[in] pu1_cur_pic_chroma
+* pointer to recon buffer chroma
+*
+* @param[in] pu4_pic_horz_bs
+* pointer to horizontal blocking strength
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static void ih264e_filter_top_edge(codec_t *ps_codec,
+ process_ctxt_t *ps_proc,
+ UWORD8 *pu1_mb_qp,
+ UWORD8 *pu1_cur_pic_luma,
+ UWORD8 *pu1_cur_pic_chroma,
+ UWORD32 *pu4_pic_horz_bs)
+{
+ /* strd */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* deblk params */
+ UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma, u4_qp_p, u4_qp_q;
+ UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma;
+
+ /* collect qp of left & top mb */
+ u4_qp_p = pu1_mb_qp[-ps_proc->i4_wd_mbs];
+ u4_qp_q = pu1_mb_qp[0];
+
+ /********/
+ /* luma */
+ /********/
+ u4_qp_luma = (u4_qp_p + u4_qp_q + 1) >> 1;
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_luma = MIN(51, u4_qp_luma + 0);
+ u4_idx_B_luma = MIN(51, u4_qp_luma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma];
+ u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma];
+
+ /**********/
+ /* chroma */
+ /**********/
+ u4_qp_chroma = (gu1_qpc_fqpi[u4_qp_p] + gu1_qpc_fqpi[u4_qp_q] + 1) >> 1;
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0);
+ u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma];
+ u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma];
+
+ /* deblk edge */
+ /* top Horizontal edge - allowed to be deblocked ? */
+ if (pu4_pic_horz_bs[0] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_horz_bs[0],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+
+ ps_codec->pf_deblk_chroma_horz_bslt4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma,
+ u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_horz_bs[0],
+ gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking of left vertical edge
+*
+* @par Description:
+* This function performs deblocking of top horizontal edge
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @param[in] ps_proc
+* pointer to proc context
+*
+* @param[in] pu1_mb_qp
+* pointer to mb quantization param
+*
+* @param[in] pu1_cur_pic_luma
+* pointer to recon buffer luma
+*
+* @param[in] pu1_cur_pic_chroma
+* pointer to recon buffer chroma
+*
+* @param[in] pu4_pic_vert_bs
+* pointer to vertical blocking strength
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static void ih264e_filter_left_edge(codec_t *ps_codec,
+ process_ctxt_t *ps_proc,
+ UWORD8 *pu1_mb_qp,
+ UWORD8 *pu1_cur_pic_luma,
+ UWORD8 *pu1_cur_pic_chroma,
+ UWORD32 *pu4_pic_vert_bs)
+{
+ /* strd */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* deblk params */
+ UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma, u4_qp_p, u4_qp_q;
+ UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma;
+
+ /* collect qp of left & curr mb */
+ u4_qp_p = pu1_mb_qp[-1];
+ u4_qp_q = pu1_mb_qp[0];
+
+ /********/
+ /* luma */
+ /********/
+ u4_qp_luma = (u4_qp_p + u4_qp_q + 1) >> 1;
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_luma = MIN(51, u4_qp_luma + 0);
+ u4_idx_B_luma = MIN(51, u4_qp_luma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma];
+ u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma];
+
+ /**********/
+ /* chroma */
+ /**********/
+ u4_qp_chroma = (gu1_qpc_fqpi[u4_qp_p] + gu1_qpc_fqpi[u4_qp_q] + 1) >> 1;
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0);
+ u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma];
+ u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma];
+
+ /* deblk edge */
+ if (pu4_pic_vert_bs[0] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma, i4_rec_strd,
+ u4_alpha_luma, u4_beta_luma,
+ pu4_pic_vert_bs[0],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+
+ ps_codec->pf_deblk_chroma_vert_bslt4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma,
+ u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_vert_bs[0],
+ gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking on an mb
+*
+* @par Description:
+* This function performs deblocking on an mb
+*
+* @param[in] ps_proc
+* process context corresponding to the job
+*
+* @param[in] ps_deblk
+* pointer to deblock context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_deblock_mb(process_ctxt_t *ps_proc, deblk_ctxt_t * ps_deblk)
+{
+ /* codec ctxt */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* ngbr availability */
+ UWORD8 u1_mb_a, u1_mb_b;
+
+ /* mb indices */
+ WORD32 i4_mb_x = ps_deblk->i4_mb_x, i4_mb_y = ps_deblk->i4_mb_y;
+
+ /* pic qp ptr */
+ UWORD8 *pu1_pic_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp;
+
+ /* vertical blocking strength */
+ UWORD32 *pu4_pic_vert_bs = ps_deblk->s_bs_ctxt.pu4_pic_vert_bs;
+
+ /* horizontal blocking strength */
+ UWORD32 *pu4_pic_horz_bs = ps_deblk->s_bs_ctxt.pu4_pic_horz_bs;
+
+ /* src buffers luma */
+ UWORD8 *pu1_cur_pic_luma = ps_deblk->pu1_cur_pic_luma;
+
+ /* src buffers chroma */
+ UWORD8 *pu1_cur_pic_chroma = ps_deblk->pu1_cur_pic_chroma;
+
+ /* strd */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* deblk params */
+ UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma;
+ UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma;
+
+ /* temp var */
+ UWORD32 push_ptr = (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x;
+
+ /* derive neighbor availability */
+ /* In slice mode the edges of mbs that lie on the slice boundary are not deblocked */
+ /* deblocking filter idc '2' */
+ if (ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE)
+ {
+ /* slice index */
+ UWORD8 *pu1_slice_idx = ps_deblk->pu1_slice_idx;
+
+ pu1_slice_idx += (i4_mb_y * ps_proc->i4_wd_mbs);
+ /* left macroblock availability */
+ u1_mb_a = (i4_mb_x == 0 ||
+ (pu1_slice_idx[i4_mb_x - 1 ] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+ /* top macroblock availability */
+ u1_mb_b = (i4_mb_y == 0 ||
+ (pu1_slice_idx[i4_mb_x-ps_proc->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+ }
+ else
+ {
+ /* left macroblock availability */
+ u1_mb_a = (i4_mb_x == 0)? 0 : 1;
+ /* top macroblock availability */
+ u1_mb_b = (i4_mb_y == 0)? 0 : 1;
+ }
+
+ pu1_pic_qp += push_ptr;
+ pu4_pic_vert_bs += push_ptr * 4;
+ pu4_pic_horz_bs += push_ptr * 4;
+
+ /********/
+ /* luma */
+ /********/
+ u4_qp_luma = pu1_pic_qp[0];
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_luma = MIN(51, u4_qp_luma + 0);
+ u4_idx_B_luma = MIN(51, u4_qp_luma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma];
+ u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma];
+
+ /**********/
+ /* chroma */
+ /**********/
+ u4_qp_chroma = gu1_qpc_fqpi[u4_qp_luma];
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0);
+ u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma];
+ u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma];
+
+ /* Deblock vertical edges */
+ /* left vertical edge 0 - allowed to be deblocked ? */
+ if (u1_mb_a)
+ {
+ ih264e_filter_left_edge(ps_codec, ps_proc, pu1_pic_qp, pu1_cur_pic_luma, pu1_cur_pic_chroma, pu4_pic_vert_bs);
+ }
+
+ /* vertical edge 1 */
+ if (pu4_pic_vert_bs[1] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 4, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 4, i4_rec_strd,
+ u4_alpha_luma, u4_beta_luma,
+ pu4_pic_vert_bs[1],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+ }
+
+ /* vertical edge 2 */
+ if (pu4_pic_vert_bs[2] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 8, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma + 8, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 8, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_vert_bs[2],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+
+ ps_codec->pf_deblk_chroma_vert_bslt4(pu1_cur_pic_chroma + 8, i4_rec_strd, u4_alpha_chroma,
+ u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_vert_bs[2],
+ gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+ }
+
+ /* vertical edge 3 */
+ if (pu4_pic_vert_bs[3] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 12, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 12, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_vert_bs[3],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+ }
+
+ /* Deblock Horizontal edges */
+ /* Horizontal edge 0 */
+ if (u1_mb_b)
+ {
+ ih264e_filter_top_edge(ps_codec, ps_proc, pu1_pic_qp, pu1_cur_pic_luma, pu1_cur_pic_chroma, pu4_pic_horz_bs);
+ }
+
+ /* horizontal edge 1 */
+ if (pu4_pic_horz_bs[1] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_horz_bs[1],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+ }
+
+ /* horizontal edge 2 */
+ if (pu4_pic_horz_bs[2] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 8 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 8 * i4_rec_strd, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_horz_bs[2],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+
+ ps_codec->pf_deblk_chroma_horz_bslt4(pu1_cur_pic_chroma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_chroma,
+ u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_horz_bs[2],
+ gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+ }
+
+ /* horizontal edge 3 */
+ if (pu4_pic_horz_bs[3] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 12 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 12 * i4_rec_strd, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_horz_bs[3],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+ }
+
+ return ;
+}
diff --git a/encoder/ih264e_deblk.h b/encoder/ih264e_deblk.h
new file mode 100755
index 0000000..9b3b67b
--- /dev/null
+++ b/encoder/ih264e_deblk.h
@@ -0,0 +1,99 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_deblk.h
+*
+* @brief
+* This file contains extern declarations of deblocking routines
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_DEBLK_H_
+#define IH264E_DEBLK_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief masks to extract csbp
+******************************************************************************
+ */
+#define CSBP_LEFT_BLOCK_MASK 0x1111
+#define CSBP_RIGHT_BLOCK_MASK 0x8888
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief This function computes blocking strength for an mb
+*
+* @par Description:
+* This function computes blocking strength for an mb
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns none
+*
+* @remarks In this module it is assumed that their is only single reference
+* frame and is always the most recently used anchor frame
+*
+*******************************************************************************
+*/
+void ih264e_compute_bs(process_ctxt_t * ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking on an mb
+*
+* @par Description:
+* This function performs deblocking on an mb
+*
+* @param[in] ps_proc
+* process context corresponding to the job
+*
+* @param[in] ps_deblk
+* pointer to deblock context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_deblock_mb(process_ctxt_t *ps_proc, deblk_ctxt_t * ps_deblk);
+
+#endif /* IH264E_DEBLK_H_ */
diff --git a/encoder/ih264e_debug.h b/encoder/ih264e_debug.h
new file mode 100755
index 0000000..5cb0434
--- /dev/null
+++ b/encoder/ih264e_debug.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_debug.h
+*
+* @brief
+* This file contains extern declarations of routines that could be helpful
+* for debugging purposes.
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_DEBUG_H_
+#define IH264E_DEBUG_H_
+
+#if DEBUG_RC
+
+#define DEBUG_DUMP_QP(pic_cnt, qp, num_cores) \
+ ih264e_debug_dump_qp(pic_cnt, qp, num_cores);
+
+#define DEBUG_DUMP_RC(ps_rc) ih264e_debug_print_rc(ps_rc);
+
+#define DEBUG_DUMP_COST_SAD_PU(ps_proc) ih264e_debug_dump_cost_sad_pu(ps_proc);
+
+#define DEBUG_DUMP_INP_TO_RC_POST_ENC(ps_frame_info, pic_cnt, num_cores) \
+ ih264e_debug_dump_inp_to_post_enc(ps_frame_info, pic_cnt, num_cores);
+
+#else
+
+#define DEBUG_DUMP_QP(pic_cnt, qp, num_cores) (void);
+
+#define DEBUG_DUMP_RC(ps_rc) (void);
+
+#define DEBUG_DUMP_COST_SAD_PU(ps_proc) (void);
+
+#define DEBUG_DUMP_INP_TO_RC_POST_ENC(ps_frame_info, pic_cnt, num_cores) (void);
+
+#endif
+
+#endif /* IH264E_DEBUG_H_ */
diff --git a/encoder/ih264e_defs.h b/encoder/ih264e_defs.h
new file mode 100755
index 0000000..76929ef
--- /dev/null
+++ b/encoder/ih264e_defs.h
@@ -0,0 +1,538 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_defs.h
+*
+* @brief
+* Definitions used in the encoder
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_DEFS_H_
+#define IH264E_DEFS_H_
+
+
+/*****************************************************************************/
+/* Width and height restrictions */
+/*****************************************************************************/
+/**
+ * Minimum width supported by codec
+ */
+#define MIN_WD 16
+
+/**
+ * Maximum width supported by codec
+ */
+
+#define MAX_WD 1920
+
+/**
+ * Minimum height supported by codec
+ */
+#define MIN_HT 16
+
+/**
+ * Maximum height supported by codec
+ */
+
+#define MAX_HT 1920
+
+/*****************************************************************************/
+/* Padding sizes */
+/*****************************************************************************/
+/**
+ * Padding used for top of the frame
+ */
+#define PAD_TOP 32
+
+/**
+ * Padding used for bottom of the frame
+ */
+#define PAD_BOT 32
+
+/**
+ * Padding used at left of the frame
+ */
+#define PAD_LEFT 32
+
+/**
+ * Padding used at right of the frame
+ */
+#define PAD_RIGHT 32
+/**
+ * Padding for width
+ */
+#define PAD_WD (PAD_LEFT + PAD_RIGHT)
+/**
+ * Padding for height
+ */
+#define PAD_HT (PAD_TOP + PAD_BOT)
+
+/*
+ * buffer width and height for half pel buffers
+ */
+#define HP_BUFF_WD 24
+#define HP_BUFF_HT 18
+
+/*****************************************************************************/
+/* Number of frame restrictions */
+/*****************************************************************************/
+/**
+ * Maximum number of reference buffers in DPB manager
+ */
+#define MAX_REF_CNT 32
+
+/*****************************************************************************/
+/* Num cores releated defs */
+/*****************************************************************************/
+/**
+ * Maximum number of cores
+ */
+#define MAX_NUM_CORES 8
+
+/**
+ * Maximum number of threads for pixel processing
+ */
+#define MAX_PROCESS_THREADS MAX_NUM_CORES
+
+/**
+ * Maximum process context sets
+ * Used to stagger encoding of MAX_CTXT_SETS in parallel
+ */
+#define MAX_CTXT_SETS 2
+/**
+ * Maximum number of contexts
+ * Kept as twice the number of threads, to make it easier to initialize the contexts
+ * from master thread
+ */
+#define MAX_PROCESS_CTXT MAX_NUM_CORES * MAX_CTXT_SETS
+
+/*****************************************************************************/
+/* Profile and level restrictions */
+/*****************************************************************************/
+/**
+ * Max level supported by the codec
+ */
+#define MAX_LEVEL IH264_LEVEL_51
+
+/**
+ * Min level supported by the codec
+ */
+#define MIN_LEVEL IH264_LEVEL_10
+
+/**
+ * Maximum number of slice headers that are held in memory simultaneously
+ * For single core implementation only 1 slice header is enough.
+ * But for multi-core parsing thread needs to ensure that slice headers are
+ * stored till the last CB in a slice is decoded.
+ * Parsing thread has to wait till last CB of a slice is consumed before reusing
+ * overwriting the slice header
+ * MAX_SLICE_HDR_CNT is assumed to be a power of 2
+ */
+
+#define LOG2_MAX_SLICE_HDR_CNT 8
+#define MAX_SLICE_HDR_CNT (1 << LOG2_MAX_SLICE_HDR_CNT)
+
+/* Generic declarations */
+#define DEFAULT_MAX_LEVEL 40
+#define DEFAULT_RECON_ENABLE 0
+#define DEFAULT_RC IVE_RC_STORAGE
+#define DEFAULT_MAX_FRAMERATE 120000
+#define DEFAULT_MAX_BITRATE 20000000
+#define DEFAULT_MAX_SRCH_RANGE_X 256
+#define DEFAULT_MAX_SRCH_RANGE_Y 256
+#define DEFAULT_SLICE_PARAM 256
+#define DEFAULT_SRC_FRAME_RATE 30000
+#define DEFAULT_TGT_FRAME_RATE 30000
+#define DEFAULT_BITRATE 6000000
+#define DEFAULT_QP_MIN 10
+#define DEFAULT_QP_MAX 51
+#define DEFAULT_I_QP 25
+#define DEFAULT_P_QP 28
+#define DEFAULT_B_QP 28
+#define DEFAULT_AIR_MODE IVE_AIR_MODE_NONE
+#define DEFAULT_AIR_REFRESH_PERIOD 30
+#define DEFAULT_VBV_DELAY 1000
+#define DEFAULT_VBV_SIZE 16800000 /* level 3.1 */
+#define DEFAULT_NUM_CORES 1
+#define DEFAULT_ME_SPEED_PRESET 100
+#define DEFAULT_HPEL 1
+#define DEFAULT_QPEL 1
+#define DEFAULT_I4 1
+#define DEFAULT_I8 0
+#define DEFAULT_I16 1
+#define DEFAULT_ENABLE_FAST_SAD 0
+#define DEFAULT_ENABLE_SATQD 1
+#define DEFAULT_MIN_SAD_ENABLE 0
+#define DEFAULT_MIN_SAD_DISABLE -1
+#define DEFAULT_SRCH_RNG_X 64
+#define DEFAULT_SRCH_RNG_Y 48
+#define DEFAULT_I_INTERVAL 30
+#define DEFAULT_IDR_INTERVAL 1000
+#define DEFAULT_B_FRAMES 0
+#define DEFAULT_DISABLE_DEBLK_LEVEL 0
+#define DEFAULT_PROFILE IV_PROFILE_BASE
+#define DEFAULT_MIN_INTRA_FRAME_RATE 1
+#define DEFAULT_MAX_INTRA_FRAME_RATE 2147483647
+#define DEFAULT_MIN_BUFFER_DELAY 30
+#define DEFAULT_MAX_BUFFER_DELAY 20000
+#define DEFAULT_STRIDE 0
+#define DEFAULT_ENC_SPEED_PRESET IVE_USER_DEFINED
+#define DEFAULT_PRE_ENC_ME 0
+#define DEFAULT_PRE_ENC_IPE 0
+
+/** Maximum number of entries in input buffer list */
+#define MAX_INP_BUF_LIST_ENTRIES 32
+
+/** Maximum number of entries in output buffer list */
+#define MAX_OUT_BUF_LIST_ENTRIES 32
+
+/** Maximum number of entries in recon buffer list used within the encoder */
+#define MAX_REC_LIST_ENTRIES 16
+
+/** Number of buffers created to hold half-pel planes for every reference buffer */
+ #define HPEL_PLANES_CNT 1
+
+/**
+ *****************************************************************************
+ * Macro to compute total size required to hold on set of scaling matrices
+ *****************************************************************************
+ */
+#define SCALING_MAT_SIZE(m_scaling_mat_size) \
+{ \
+ m_scaling_mat_size = 6 * TRANS_SIZE_4 * TRANS_SIZE_4; \
+ m_scaling_mat_size += 6 * TRANS_SIZE_8 * TRANS_SIZE_8; \
+ m_scaling_mat_size += 6 * TRANS_SIZE_16 * TRANS_SIZE_16; \
+ m_scaling_mat_size += 2 * TRANS_SIZE_32 * TRANS_SIZE_32; \
+}
+
+/**
+ ******************************************************************************
+ * @brief Macros to get raster scan position of a block[8x8] / sub block[4x4]
+ ******************************************************************************
+ */
+#define GET_BLK_RASTER_POS_X(x) ((x & 0x01))
+#define GET_BLK_RASTER_POS_Y(y) ((y >> 1))
+#define GET_SUB_BLK_RASTER_POS_X(x) ((x & 0x01))
+#define GET_SUB_BLK_RASTER_POS_Y(y) ((y >> 1))
+
+#define NUM_RC_MEMTABS 17
+
+/**
+ ***************************************************************************
+ * Enum to hold various mem records being request
+ ****************************************************************************
+ */
+enum
+{
+ /**
+ * Codec Object at API level
+ */
+ MEM_REC_IV_OBJ,
+
+ /**
+ * Codec context
+ */
+ MEM_REC_CODEC,
+
+ /**
+ * entropy context
+ */
+ MEM_REC_ENTROPY,
+
+ /**
+ * Buffer to hold coeff data
+ */
+ MEM_REC_MB_COEFF_DATA,
+
+ /**
+ * Buffer to hold coeff data
+ */
+ MEM_REC_MB_HEADER_DATA,
+
+ /**
+ * Motion vector bank
+ */
+ MEM_REC_MVBANK,
+
+ /**
+ * Motion vector bits
+ */
+ MEM_REC_MVBITS,
+
+ /**
+ * Holds mem records passed to the codec.
+ */
+ MEM_REC_BACKUP,
+
+ /**
+ * Holds SPS
+ */
+ MEM_REC_SPS,
+
+ /**
+ * Holds PPS
+ */
+ MEM_REC_PPS,
+
+ /**
+ * Holds Slice Headers
+ */
+ MEM_REC_SLICE_HDR,
+
+ /**
+ * Contains map indicating slice index per MB basis
+ */
+ MEM_REC_SLICE_MAP,
+
+ /**
+ * Holds thread handles
+ */
+ MEM_REC_THREAD_HANDLE,
+
+ /**
+ * Holds control call mutex
+ */
+ MEM_REC_CTL_MUTEX,
+
+ /**
+ * Holds entropy call mutex
+ */
+ MEM_REC_ENTROPY_MUTEX,
+
+ /**
+ * Holds memory for Process JOB Queue
+ */
+ MEM_REC_PROC_JOBQ,
+
+ /**
+ * Holds memory for Entropy JOB Queue
+ */
+ MEM_REC_ENTROPY_JOBQ,
+
+ /**
+ * Contains status map indicating processing status per MB basis
+ */
+ MEM_REC_PROC_MAP,
+
+ /**
+ * Contains status map indicating deblocking status per MB basis
+ */
+ MEM_REC_DBLK_MAP,
+
+ /*
+ * Contains AIR map and mask
+ */
+ MEM_REC_AIR_MAP,
+
+ /**
+ * Contains status map indicating ME status per MB basis
+ */
+ MEM_REC_ME_MAP,
+
+ /**
+ * Holds dpb manager context
+ */
+ MEM_REC_DPB_MGR,
+
+ /**
+ * Holds intermediate buffers needed during processing stage
+ * Memory for process contexts is allocated in this memtab
+ */
+ MEM_REC_PROC_SCRATCH,
+
+ /**
+ * Holds buffers for vert_bs, horz_bs and QP (all frame level)
+ */
+ MEM_REC_QUANT_PARAM,
+
+ /**
+ * Holds top row syntax information
+ */
+ MEM_REC_TOP_ROW_SYN_INFO,
+
+ /**
+ * Holds buffers for vert_bs, horz_bs and QP (all frame level)
+ */
+ MEM_REC_BS_QP,
+
+ /**
+ * Holds input buffer manager context
+ */
+ MEM_REC_INP_PIC,
+
+ /**
+ * Holds output buffer manager context
+ */
+ MEM_REC_OUT,
+
+ /**
+ * Holds picture buffer manager context and array of pic_buf_ts
+ * Also holds reference picture buffers in non-shared mode
+ */
+ MEM_REC_REF_PIC,
+
+ /*
+ * Mem record for color space conversion
+ */
+ MEM_REC_CSC,
+
+ /**
+ * NMB info struct
+ */
+ MEM_REC_MB_INFO_NMB,
+
+ /**
+ * Rate control of memory records.
+ */
+ MEM_REC_RC,
+
+ /**
+ * Place holder to compute number of memory records.
+ */
+ MEM_REC_CNT = MEM_REC_RC + NUM_RC_MEMTABS,
+
+ /*
+ * Do not add anything below
+ */
+};
+
+#define DISABLE_DEBLOCK_INTERVAL 8
+
+/**
+ ****************************************************************************
+ * Disable deblock levels
+ * Level 0 enables deblocking completely and level 4 disables completely
+ * Other levels are intermediate values to control deblocking level
+ ****************************************************************************
+ */
+enum
+{
+ /**
+ * Enable deblocking completely
+ */
+ DISABLE_DEBLK_LEVEL_0,
+
+ /**
+ * Disable only within MB edges - Not supported currently
+ */
+ DISABLE_DEBLK_LEVEL_1,
+
+ /**
+ * Enable deblocking once in DEBLOCK_INTERVAL number of pictures
+ * and for I slices
+ */
+ DISABLE_DEBLK_LEVEL_2,
+
+ /**
+ * Enable deblocking only for I slices
+ */
+ DISABLE_DEBLK_LEVEL_3,
+
+ /**
+ * Disable deblocking completely
+ */
+ DISABLE_DEBLK_LEVEL_4
+};
+
+/**
+ ****************************************************************************
+ * Number of buffers for I/O based on format
+ ****************************************************************************
+ */
+
+/** Minimum number of input buffers */
+#define MIN_INP_BUFS 2
+
+/** Minimum number of output buffers */
+#define MIN_OUT_BUFS 1
+
+/** Minimum number of components in bitstream buffer */
+#define MIN_BITS_BUFS_COMP 1
+
+/** Minimum number of components in raw buffer */
+#define MIN_RAW_BUFS_420_COMP 3
+#define MIN_RAW_BUFS_422ILE_COMP 1
+#define MIN_RAW_BUFS_RGB565_COMP 1
+#define MIN_RAW_BUFS_RGBA8888_COMP 1
+#define MIN_RAW_BUFS_420SP_COMP 2
+
+#define MAX_NMB 120
+
+/** Maximum number of active config paramter sets */
+#define MAX_ACTIVE_CONFIG_PARAMS 32
+
+/**
+******************************************************************************
+ * @brief Thresholds for luma & chroma to determine if the 8x8 subblock needs
+ * to be encoded or skipped
+******************************************************************************
+*/
+#define LUMA_SUB_BLOCK_SKIP_THRESHOLD 4
+#define LUMA_BLOCK_SKIP_THRESHOLD 5
+#define CHROMA_BLOCK_SKIP_THRESHOLD 4
+
+/**
+******************************************************************************
+ * @brief defines the first byte of a NAL unit
+ * forbidden zero bit - nal_ref_idc - nal_unit_type
+******************************************************************************
+*/
+/* [0 - 11 - 00111] */
+#define NAL_SPS_FIRST_BYTE 0x67
+
+/* [0 - 11 - 01000] */
+#define NAL_PPS_FIRST_BYTE 0x68
+
+/* [0 - 11 - 00001] */
+#define NAL_SLICE_FIRST_BYTE 0x61
+
+/* [0 - 00 - 00001] */
+#define NAL_NON_REF_SLICE_FIRST_BYTE 0x01
+
+/* [0 - 11 - 00101] */
+#define NAL_IDR_SLICE_FIRST_BYTE 0x65
+
+/* [0 - 00 - 01100] */
+#define NAL_FILLER_FIRST_BYTE 0x0C
+
+/* [0 - 00 - 00110] */
+#define NAL_SEI_FIRST_BYTE 0x06
+
+#define H264_ALLOC_INTER_FRM_INTV 1
+
+#define H264_MPEG_QP_MAP 191
+
+#define MPEG2_QP_ELEM (H264_MPEG_QP_MAP + 1)
+#define H264_QP_ELEM (MAX_H264_QP + 1)
+
+#define H264_INIT_QUANT_I 26
+#define H264_INIT_QUANT_P 34
+
+#endif /*IH264E_DEFS_H_*/
diff --git a/encoder/ih264e_encode.c b/encoder/ih264e_encode.c
new file mode 100755
index 0000000..ffc6fb7
--- /dev/null
+++ b/encoder/ih264e_encode.c
@@ -0,0 +1,580 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_encode.c
+*
+* @brief
+* This file contains functions for encoding the input yuv frame in synchronous
+* api mode
+*
+* @author
+* ittiam
+*
+* List of Functions
+* - ih264e_join_threads()
+* - ih264e_wait_for_thread()
+* - ih264e_encode()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_debug.h"
+#include "ih264_structs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_list.h"
+#include "ih264e_error.h"
+#include "ih264e_defs.h"
+#include "ih264_padding.h"
+#include "ih264e_bitstream.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_structs.h"
+#include "ih264e_master.h"
+#include "ih264e_process.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264e_utils.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_config.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264e_debug.h"
+#ifdef LOGO_EN
+#include "ih264e_ittiam_logo.h"
+#endif
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* This function joins all the spawned threads after successful completion of
+* their tasks
+*
+* @par Description
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_join_threads(codec_t *ps_codec)
+{
+ /* temp var */
+ WORD32 i = 0;
+ WORD32 ret = 0;
+
+ /* join spawned threads */
+ while (i < ps_codec->i4_proc_thread_cnt)
+ {
+ if (ps_codec->ai4_process_thread_created[i])
+ {
+ ret = ithread_join(ps_codec->apv_proc_thread_handle[i], NULL);
+ if (ret != 0)
+ {
+ printf("pthread Join Failed");
+ assert(0);
+ }
+ ps_codec->ai4_process_thread_created[i] = 0;
+ i++;
+ }
+ }
+
+ ps_codec->i4_proc_thread_cnt = 0;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function puts the current thread to sleep for a duration
+* of sleep_us
+*
+* @par Description
+* ithread_yield() method causes the calling thread to yield execution to another
+* thread that is ready to run on the current processor. The operating system
+* selects the thread to yield to. ithread_usleep blocks the current thread for
+* the specified number of milliseconds. In other words, yield just says,
+* end my timeslice prematurely, look around for other threads to run. If there
+* is nothing better than me, continue. Sleep says I don't want to run for x
+* milliseconds. Even if no other thread wants to run, don't make me run.
+*
+* @param[in] sleep_us
+* thread sleep duration
+*
+* @returns error_status
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_wait_for_thread(UWORD32 sleep_us)
+{
+ /* yield thread */
+ ithread_yield();
+
+ /* put thread to sleep */
+ ithread_usleep(sleep_us);
+
+ return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* Encodes in synchronous api mode
+*
+* @par Description
+* This routine processes input yuv, encodes it and outputs bitstream and recon
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns Status
+*
+******************************************************************************
+*/
+WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+ /* codec ctxt */
+ codec_t *ps_codec = (codec_t *)ps_codec_obj->pv_codec_handle;
+
+ /* input frame to encode */
+ ih264e_video_encode_ip_t *ps_video_encode_ip = pv_api_ip;
+
+ /* output buffer to write stream */
+ ih264e_video_encode_op_t *ps_video_encode_op = pv_api_op;
+
+ /* i/o structures */
+ inp_buf_t s_inp_buf;
+ out_buf_t s_out_buf;
+
+ /* temp var */
+ WORD32 ctxt_sel = 0, i;
+
+ /********************************************************************/
+ /* BEGIN INIT */
+ /********************************************************************/
+ /* reset output structure */
+ ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+ ps_video_encode_op->s_ive_op.output_present = 0;
+ ps_video_encode_op->s_ive_op.dump_recon = 0;
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
+
+ /* copy input info. to internal structure */
+ s_inp_buf.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
+ s_inp_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
+ s_inp_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
+ s_inp_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
+ s_inp_buf.pv_mb_info = ps_video_encode_ip->s_ive_ip.pv_mb_info;
+ s_inp_buf.u4_mb_info_type = ps_video_encode_ip->s_ive_ip.u4_mb_info_type;
+ s_inp_buf.pv_pic_info = ps_video_encode_ip->s_ive_ip.pv_pic_info;
+ s_inp_buf.u4_pic_info_type = ps_video_encode_ip->s_ive_ip.u4_pic_info_type;
+
+ /* copy output info. to internal structure */
+ s_out_buf.s_bits_buf = ps_video_encode_ip->s_ive_ip.s_out_buf;
+ s_out_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
+ s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
+ s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
+
+ /* api call cnt */
+ ps_codec->i4_encode_api_call_cnt += 1;
+
+ /* curr pic cnt */
+ ps_codec->i4_pic_cnt += 1;
+
+ /* codec context selector */
+ ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+ /* reset status flags */
+ ps_codec->ai4_pic_cnt[ctxt_sel] = -1;
+ ps_codec->s_rate_control.post_encode_skip[ctxt_sel] = 0;
+ ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = 0;
+
+ /* pass output buffer to codec */
+ ps_codec->as_out_buf[ctxt_sel] = s_out_buf;
+
+ /* initialize codec ctxt with default params for the first encode api call */
+ if (ps_codec->i4_encode_api_call_cnt == 0)
+ {
+ ih264e_codec_init(ps_codec);
+ }
+
+ /* parse configuration params */
+ for (i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++)
+ {
+ cfg_params_t *ps_cfg = &ps_codec->as_cfg[i];
+
+ if (1 == ps_cfg->u4_is_valid)
+ {
+ if ( ((ps_cfg->u4_timestamp_high == s_inp_buf.u4_timestamp_high) &&
+ (ps_cfg->u4_timestamp_low == s_inp_buf.u4_timestamp_low)) ||
+ ((WORD32)ps_cfg->u4_timestamp_high == -1) ||
+ ((WORD32)ps_cfg->u4_timestamp_low == -1) )
+ {
+ error_status |= ih264e_codec_update_config(ps_codec, ps_cfg);
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_UNSUPPORTEDPARAM,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ ps_cfg->u4_is_valid = 0;
+ }
+ }
+ }
+
+ /******************************************************************
+ * INSERT LOGO
+ *****************************************************************/
+#ifdef LOGO_EN
+ if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL &&
+ ps_codec->i4_header_mode != 1)
+ {
+ ih264e_insert_logo(s_inp_buf.s_raw_buf.apv_bufs[0],
+ s_inp_buf.s_raw_buf.apv_bufs[1],
+ s_inp_buf.s_raw_buf.apv_bufs[2],
+ s_inp_buf.s_raw_buf.au4_strd[0],
+ 0,
+ 0,
+ ps_codec->s_cfg.e_inp_color_fmt,
+ ps_codec->s_cfg.u4_disp_wd,
+ ps_codec->s_cfg.u4_disp_ht);
+ }
+#endif /*LOGO_EN*/
+
+ if (ps_codec->i4_encode_api_call_cnt == 0)
+ {
+ /********************************************************************/
+ /* number of mv/ref bank buffers used by the codec, */
+ /* 1 to handle curr frame */
+ /* 1 to store information of ref frame */
+ /* 1 more additional because of the codec employs 2 ctxt sets */
+ /* to assist asynchronous API */
+ /********************************************************************/
+
+ /* initialize mv bank buffer manager */
+ error_status |= ih264e_mv_buf_mgr_add_bufs(ps_codec);
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ /* initialize ref bank buffer manager */
+ error_status |= ih264e_pic_buf_mgr_add_bufs(ps_codec);
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ /* for the first frame, generate header when not requested explicitly */
+ if (ps_codec->i4_header_mode == 0 &&
+ ps_codec->u4_header_generated == 0)
+ {
+ ps_codec->i4_gen_header = 1;
+ }
+ }
+
+ /* generate header and return when encoder is operated in header mode */
+ if (ps_codec->i4_header_mode == 1)
+ {
+ /* whenever the header is generated, this implies a start of sequence
+ * and a sequence needs to be started with IDR
+ */
+ ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+
+ /* generate header */
+ error_status |= ih264e_generate_sps_pps(ps_codec);
+
+ /* api call cnt */
+ ps_codec->i4_encode_api_call_cnt --;
+
+ /* curr pic cnt */
+ ps_codec->i4_pic_cnt --;
+
+ /* header mode tag is not sticky */
+ ps_codec->i4_header_mode = 0;
+
+ /* send the input to app */
+ ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
+
+ /* send the output to app */
+ ps_video_encode_op->s_ive_op.output_present = 1;
+ ps_video_encode_op->s_ive_op.dump_recon = 0;
+ ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].s_bits_buf;
+
+ /* error status */
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ /* indicates that header has been generated previously */
+ ps_codec->u4_header_generated = 1;
+
+ return IV_SUCCESS;
+ }
+
+
+ if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL)
+ {
+ /* array giving pic cnt that is being processed in curr context set */
+ ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt;
+
+ /* initialize all relevant process ctxts */
+ error_status |= ih264e_pic_init(ps_codec, &s_inp_buf);
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ if (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0)
+ {
+ /* proc ctxt base idx */
+ WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
+
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
+
+ WORD32 ret = 0;
+
+ /* number of addl. threads to be created */
+ WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1;
+
+ for (i = 0; i < num_thread_cnt; i++)
+ {
+ ret = ithread_create(ps_codec->apv_proc_thread_handle[i],
+ NULL,
+ (void*)ih264e_process_thread,
+ &ps_codec->as_process[i + 1]);
+ if (ret != 0)
+ {
+ printf("pthread Create Failed");
+ assert(0);
+ }
+
+ ps_codec->ai4_process_thread_created[i] = 1;
+
+ ps_codec->i4_proc_thread_cnt++;
+ }
+
+
+ /* launch job */
+ ih264e_process_thread(ps_proc);
+
+ /* Join threads at the end of encoding a frame */
+ ih264e_join_threads(ps_codec);
+
+ ih264_list_reset(ps_codec->pv_proc_jobq);
+
+ ih264_list_reset(ps_codec->pv_entropy_jobq);
+ }
+ }
+
+ if (-1 != ps_codec->ai4_pic_cnt[ctxt_sel])
+ {
+ /* proc ctxt base idx */
+ WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
+
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
+
+ /* receive output back from codec */
+ s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+ /* send the output to app */
+ ps_video_encode_op->s_ive_op.output_present = 1;
+ ps_video_encode_op->s_ive_op.dump_recon = 1;
+ ps_video_encode_op->s_ive_op.s_out_buf = s_out_buf.s_bits_buf;
+ ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+ /* receive input back from codec */
+ s_inp_buf = ps_proc->s_inp_buf;
+
+ /* send the input to app */
+ ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
+
+ if (ps_codec->s_cfg.u4_enable_recon &&
+ ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0)
+ {
+ /* error status */
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ /* recon buffer */
+ rec_buf_t *ps_rec_buf = &ps_codec->as_rec_buf[ctxt_sel];
+
+ ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf;
+
+ /* copy/convert the recon buffer and return */
+ ih264e_fmt_conv(ps_codec, &ps_rec_buf->s_pic_buf,
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1],
+ 0,
+ ps_codec->s_cfg.u4_disp_ht);
+
+ ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_rec_buf->s_pic_buf.i4_buf_id, BUF_MGR_IO);
+ if (IH264_SUCCESS != ret)
+ {
+ SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+ }
+ }
+
+ /* release buffers from ref list */
+ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1)
+ {
+ /* pic info */
+ pic_buf_t *ps_cur_pic;
+
+ /* mv info */
+ mv_buf_t *ps_cur_mv_buf;
+
+ /* error status */
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ /* Decrement coded pic count */
+ ps_codec->i4_coded_pic_cnt--;
+
+ /* loop through to get the min pic cnt among the list of pics stored in ref list */
+ /* since the skipped frame may not be on reference list, we may not have an MV bank
+ * hence free only if we have allocated */
+ for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+ {
+ if (ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt)
+ {
+ ps_codec->as_ref_set[i].i4_pic_cnt = -1;
+ ps_codec->as_ref_set[i].i4_poc = -1;
+
+ ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf;
+
+ ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_mv_buf;
+
+ /* release this frame from reference list */
+ ret = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_REF);
+ SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_REF);
+ SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+ break;
+ }
+ }
+ }
+
+ if ((ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1) ||
+ (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 1))
+ {
+ ps_video_encode_op->s_ive_op.dump_recon = 0;
+ }
+ else
+ {
+ /* set output pic type */
+ if (ps_codec->i4_slice_type == PSLICE)
+ {
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME;
+ }
+ else if (ps_codec->i4_slice_type == ISLICE && ps_codec->u4_is_idr != 1)
+ {
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME;
+ }
+ else
+ {
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME;
+ }
+ }
+
+ /* loop through to get the error status */
+ for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
+ {
+ error_status |= ps_codec->as_process[ctxt_sel + i].i4_error_code;
+ }
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+ }
+
+ if (1 == s_inp_buf.u4_is_last)
+ {
+ ps_video_encode_op->s_ive_op.output_present = 0;
+ ps_video_encode_op->s_ive_op.dump_recon = 0;
+ }
+
+ return IV_SUCCESS;
+}
diff --git a/encoder/ih264e_encode_header.c b/encoder/ih264e_encode_header.c
new file mode 100755
index 0000000..67e5409
--- /dev/null
+++ b/encoder/ih264e_encode_header.c
@@ -0,0 +1,1187 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_encode_header.c
+*
+* @brief
+* This file contains function definitions related to header encoding.
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_generate_nal_unit_header()
+* - ih264e_generate_sps()
+* - ih264e_generate_pps()
+* - ih264e_generate_slice_header()
+* - ih264e_get_level()
+* - ih264e_populate_sps()
+* - ih264e_populate_pps()
+* - ih264e_populate_slice_header()
+* - ih264e_add_filler_nal_unit()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264e_config.h"
+#include "ih264e_trace.h"
+#include "ih264_typedefs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_encode_header.h"
+#include "ih264_common_tables.h"
+#include "ih264_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Generate nal unit header in the stream as per section 7.4.1
+*
+* @par Description
+* Inserts Nal unit header syntax as per section 7.4.1
+*
+* @param[inout] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] nal_unit_type
+* nal type to be inserted
+*
+* @param[in] nal_ref_idc
+* nal ref idc to be inserted
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+static WORD32 ih264e_generate_nal_unit_header(bitstrm_t *ps_bitstrm,
+ WORD32 nal_unit_type,
+ WORD32 nal_ref_idc)
+{
+ WORD32 return_status = IH264E_SUCCESS;
+
+ /* sanity checks */
+ ASSERT((nal_unit_type > 0) && (nal_unit_type < 32));
+
+ /* forbidden_zero_bit + nal_ref_idc + nal_unit_type */
+ PUT_BITS(ps_bitstrm,
+ ((nal_ref_idc << 5) + nal_unit_type),
+ (1+2+5), /*1 forbidden zero bit + 2 nal_ref_idc + 5 nal_unit_type */
+ return_status,
+ "nal_unit_header");
+
+ return(return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates SPS (Sequence Parameter Set)
+*
+* @par Description
+* This function generates Sequence Parameter Set header as per the spec
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sps
+* pointer to structure containing SPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps)
+{
+ WORD32 return_status = IH264E_SUCCESS;
+ WORD32 i;
+ WORD8 i1_nal_unit_type = 7;
+ WORD8 i1_nal_ref_idc = 3;
+
+ /* Insert Start Code */
+ return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ return_status |= ih264e_generate_nal_unit_header(ps_bitstrm, i1_nal_unit_type, i1_nal_ref_idc);
+
+ /* profile_idc */
+ PUT_BITS(ps_bitstrm, ps_sps->u1_profile_idc, 8, return_status, "profile_idc");
+
+ /* constrained_set_flags */
+ PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set0_flag, 1, return_status, "constrained_set0_flag");
+ PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set1_flag, 1, return_status, "constrained_set1_flag");
+ PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set2_flag, 1, return_status, "constrained_set2_flag");
+ PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set3_flag, 1, return_status, "constrained_set3_flag");
+
+ /* reserved_zero_four_bits */
+ PUT_BITS(ps_bitstrm, 0, 4, return_status, "reserved_zero_four_bits");
+
+ /* level_idc */
+ PUT_BITS(ps_bitstrm, ps_sps->u1_level_idc, 8, return_status, "level_idc");
+
+ /* seq_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_sps_id, return_status, "seq_parameter_set_id");
+
+ if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)
+ {
+ /* chroma_format_idc */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_chroma_format_idc, return_status, "chroma_format_idc");
+
+ if (ps_sps->u1_chroma_format_idc == CHROMA_FMT_IDC_YUV444)
+ {
+ /* i1_residual_colour_transform_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_residual_colour_transform_flag, 1, return_status, "i1_residual_colour_transform_flag");
+ }
+
+ /* bit_depth_luma_minus8 */
+ PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_luma - 8), return_status, "bit_depth_luma_minus8");
+
+ /* bit_depth_chroma_minus8 */
+ PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_chroma - 8), return_status, "bit_depth_chroma_minus8");
+
+ /* qpprime_y_zero_transform_bypass_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_qpprime_y_zero_transform_bypass_flag, 1, return_status, "qpprime_y_zero_transform_bypass_flag");
+
+ /* seq_scaling_matrix_present_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_seq_scaling_matrix_present_flag, 1, return_status, "seq_scaling_matrix_present_flag");
+
+ /* seq_scaling_list */
+ if (ps_sps->i1_seq_scaling_matrix_present_flag)
+ {
+ /* TODO_LATER: Will be enabled once scaling list support is added */
+ }
+ }
+
+ /* log2_max_frame_num_minus4 */
+ PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_frame_num - 4), return_status, "log2_max_frame_num_minus4");
+
+ /* pic_order_cnt_type */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_pic_order_cnt_type, return_status, "pic_order_cnt_type");
+
+ if (ps_sps->i1_pic_order_cnt_type == 0)
+ {
+ /* log2_max_pic_order_cnt_lsb_minus4 */
+ PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_pic_order_cnt_lsb - 4), return_status, "log2_max_pic_order_cnt_lsb_minus4");
+ }
+ else if (ps_sps->i1_pic_order_cnt_type == 1)
+ {
+ /* delta_pic_order_always_zero_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_delta_pic_order_always_zero_flag, 1, return_status, "delta_pic_order_always_zero_flag");
+
+ /* offset_for_non_ref_pic */
+ PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_non_ref_pic, return_status, "offset_for_non_ref_pic");
+
+ /* offset_for_top_to_bottom_field */
+ PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_top_to_bottom_field, return_status, "offset_for_top_to_bottom_field");
+
+ /* num_ref_frames_in_pic_order_cnt_cycle */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle, return_status, "num_ref_frames_in_pic_order_cnt_cycle");
+
+ /* Offset for ref frame */
+ for (i=0; i<ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle; i++)
+ {
+ /* offset_for_ref_frame */
+ PUT_BITS_SEV(ps_bitstrm, ps_sps->ai4_offset_for_ref_frame[i], return_status, "offset_for_ref_frame");
+ }
+ }
+
+ /* num_ref_frames */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_max_num_ref_frames, return_status, "num_ref_frames");
+
+ /* gaps_in_frame_num_value_allowed_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_gaps_in_frame_num_value_allowed_flag, 1, return_status, "gaps_in_frame_num_value_allowed_flag");
+
+ /* pic_width_in_mbs_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_width_in_mbs_minus1, return_status, "pic_width_in_mbs_minus1");
+
+ /* pic_height_in_map_units_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_height_in_map_units_minus1, return_status, "pic_height_in_map_units_minus1");
+
+ /* frame_mbs_only_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_frame_mbs_only_flag, 1, return_status, "frame_mbs_only_flag");
+
+ if (!ps_sps->i1_frame_mbs_only_flag)
+ {
+ /* mb_adaptive_frame_field_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_mb_adaptive_frame_field_flag, 1, return_status, "mb_adaptive_frame_field_flag");
+ }
+
+ /* direct_8x8_inference_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_direct_8x8_inference_flag, 1, return_status, "direct_8x8_inference_flag");
+
+ /* frame_cropping_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_frame_cropping_flag, 1, return_status, "frame_cropping_flag");
+
+ if (ps_sps->i1_frame_cropping_flag)
+ {
+ /* frame_crop_left_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_left_offset, return_status, "frame_crop_left_offset");
+
+ /* frame_crop_right_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_right_offset, return_status, "frame_crop_right_offset");
+
+ /* frame_crop_top_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_top_offset, return_status, "frame_crop_top_offset");
+
+ /* frame_crop_bottom_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_bottom_offset, return_status, "frame_crop_bottom_offset");
+ }
+
+ /* vui_parameters_present_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_vui_parameters_present_flag, 1, return_status, "vui_parameters_present_flag");
+
+ if (ps_sps->i1_vui_parameters_present_flag)
+ {
+ /* Add vui parameters to the bitstream */;
+ }
+
+ /* rbsp trailing bits */
+ return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates PPS (Picture Parameter Set)
+*
+* @par Description
+* Generate Picture Parameter Set as per Section 7.3.2.2
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_pps
+* pointer to structure containing PPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps, sps_t *ps_sps)
+{
+ WORD32 return_status = IH264E_SUCCESS;
+
+ /* Insert the NAL start code */
+ return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ PUT_BITS(ps_bitstrm, NAL_PPS_FIRST_BYTE, 8, return_status, "pps_header");
+
+ /* pic_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_pps_id, return_status, "pic_parameter_set_id");
+
+ /* seq_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_sps_id, return_status, "seq_parameter_set_id");
+
+ /* Entropy coding : 0-VLC; 1 - CABAC */
+ PUT_BITS(ps_bitstrm, ps_pps->u1_entropy_coding_mode_flag, 1, return_status, "Entropy coding : 0-VLC; 1 - CABAC");
+
+ /* Pic order present flag */
+ PUT_BITS(ps_bitstrm, ps_pps->u1_pic_order_present_flag, 1, return_status, "Pic order present flag");
+
+ /* Number of slice groups */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_num_slice_groups - 1, return_status, "Number of slice groups");
+
+ if (ps_pps->u1_num_slice_groups > 1)
+ {
+ /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+ * If this is not the case, we have to add Slice group map type to the bit stream*/
+ }
+
+ /* num_ref_idx_l0_default_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l0_default_active - 1, return_status, "num_ref_idx_l0_default_active_minus1");
+
+ /* num_ref_idx_l1_default_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l1_default_active - 1, return_status, "num_ref_idx_l1_default_active_minus1");
+
+ /* weighted_pred_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_pred_flag, 1, return_status, "weighted_pred_flag");
+
+ /* weighted_bipred_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_bipred_idc, 2, return_status, "weighted_bipred_idc");
+
+ /* pic_init_qp_minus26 */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qp - 26, return_status, "pic_init_qp_minus26");
+
+ /* pic_init_qs_minus26 */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qs - 26, return_status, "pic_init_qs_minus26");
+
+ /* chroma_qp_index_offset */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_chroma_qp_index_offset, return_status, "chroma_qp_index_offset");
+
+ /* deblocking_filter_control_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_deblocking_filter_control_present_flag, 1, return_status, "deblocking_filter_control_present_flag");
+
+ /* constrained_intra_pred_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_constrained_intra_pred_flag, 1, return_status, "constrained_intra_pred_flag");
+
+ /*redundant_pic_cnt_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_redundant_pic_cnt_present_flag, 1, return_status, "redundant_pic_cnt_present_flag");
+
+ if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)
+ {
+ /* transform_8x8_mode_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_transform_8x8_mode_flag, 1, return_status, "transform_8x8_mode_flag");
+
+ /* pic_scaling_matrix_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_pic_scaling_matrix_present_flag, 1, return_status, "pic_scaling_matrix_present_flag");
+
+ if(ps_pps->i1_pic_scaling_matrix_present_flag)
+ {
+ /* TODO_LATER: Will be enabled once scaling list support is added */
+ }
+
+ /* Second chroma QP offset */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_second_chroma_qp_index_offset, return_status, "Second chroma QP offset");
+ }
+
+ return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Slice Header
+*
+* @par Description
+* Generate Slice Header as per Section 7.3.5.1
+*
+* @param[inout] ps_bitstrm
+* pointer to bitstream context for generating slice header
+*
+* @param[in] ps_slice_hdr
+* pointer to slice header params
+*
+* @param[in] ps_pps
+* pointer to pps params referred by slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by slice
+*
+* @param[out] ps_dup_bit_strm_ent_offset
+* Bitstream struct to store bitstream state
+*
+* @param[out] pu4_first_slice_start_offset
+* first slice offset is returned
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_slice_header(bitstrm_t *ps_bitstrm,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps)
+{
+
+ WORD32 return_status = IH264E_SUCCESS;
+
+ /* Insert start code */
+ return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ return_status |= ih264e_generate_nal_unit_header(ps_bitstrm, ps_slice_hdr->i1_nal_unit_type, ps_slice_hdr->i1_nal_unit_idc);
+
+ /* first_mb_in_slice */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_first_mb_in_slice, return_status, "first_mb_in_slice");
+
+ /* slice_type */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_slice_type, return_status, "slice_type");
+
+ /* pic_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_pps_id, return_status, "pic_parameter_set_id");
+
+ /* frame_num */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_frame_num, ps_sps->i1_log2_max_frame_num, return_status, "frame_num");
+
+ if (!ps_sps->i1_frame_mbs_only_flag)
+ {
+ /* field_pic_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_field_pic_flag, 1, return_status, "field_pic_flag");
+
+ if(ps_slice_hdr->i1_field_pic_flag)
+ {
+ /* bottom_field_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_bottom_field_flag, 1, return_status, "bottom_field_flag");
+ }
+ }
+
+ if (ps_slice_hdr->i1_nal_unit_type == 5)
+ {
+ /* u2_idr_pic_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_idr_pic_id, return_status, "u2_idr_pic_id");
+ }
+
+ if (ps_sps->i1_pic_order_cnt_type == 0)
+ {
+ /* pic_order_cnt_lsb */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_pic_order_cnt_lsb, ps_sps->i1_log2_max_pic_order_cnt_lsb, return_status, "pic_order_cnt_lsb");
+
+ if(ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag)
+ {
+ /* delta_pic_order_cnt_bottom */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i4_delta_pic_order_cnt_bottom, return_status, "delta_pic_order_cnt_bottom");
+ }
+ }
+
+ if (ps_sps->i1_pic_order_cnt_type == 1 && !ps_sps->i1_delta_pic_order_always_zero_flag)
+ {
+ /* delta_pic_order_cnt[0] */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[0], return_status, "delta_pic_order_cnt[0]");
+
+ if (ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag)
+ {
+ /* delta_pic_order_cnt[1] */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[1], return_status, "delta_pic_order_cnt[1]");
+ }
+ }
+
+ if (ps_pps->i1_redundant_pic_cnt_present_flag)
+ {
+ /* redundant_pic_cnt */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_redundant_pic_cnt, return_status, "redundant_pic_cnt");
+ }
+
+ if (ps_slice_hdr->u1_slice_type == BSLICE)
+ {
+ /* direct_spatial_mv_pred_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_direct_spatial_mv_pred_flag, 1, return_status, "direct_spatial_mv_pred_flag");
+ }
+
+ if (ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE || ps_slice_hdr->u1_slice_type == BSLICE)
+ {
+ /* num_ref_idx_active_override_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_num_ref_idx_active_override_flag, 1, return_status, "num_ref_idx_active_override_flag");
+
+ if (ps_slice_hdr->u1_num_ref_idx_active_override_flag)
+ {
+ /* num_ref_idx_l0_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l0_active - 1, return_status, "num_ref_idx_l0_active_minus1");
+ }
+ if (ps_slice_hdr->u1_slice_type == BSLICE)
+ {
+ /* num_ref_idx_l1_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status, "num_ref_idx_l1_active_minus1");
+ }
+ }
+
+ /* ref_idx_reordering */
+ /* TODO: ref_idx_reordering */
+ if ((ps_slice_hdr->u1_slice_type != ISLICE) && (ps_slice_hdr->u1_slice_type != SISLICE))
+ {
+ /* ref_pic_list_reordering_flag_l0 */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_ref_idx_reordering_flag_l0, 1, return_status, "ref_pic_list_reordering_flag_l0");
+
+ if (ps_slice_hdr->u1_ref_idx_reordering_flag_l0)
+ {
+
+ }
+ }
+
+ if ((ps_pps->i1_weighted_pred_flag &&
+ (ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE)) ||
+ (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_slice_hdr->u1_slice_type == BSLICE))
+ {
+ /* TODO_LATER: Currently there is no support for weighted prediction.
+ This needs to be updated when the support is added */
+ }
+
+ if (ps_slice_hdr->i1_nal_unit_idc != 0)
+ {
+ if (ps_slice_hdr->i1_nal_unit_type == 5)
+ {
+ /* no_output_of_prior_pics_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_no_output_of_prior_pics_flag , 1, return_status, "no_output_of_prior_pics_flag ");
+
+ /* long_term_reference_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_long_term_reference_flag , 1, return_status, "long_term_reference_flag ");
+ }
+ else
+ {
+ /* adaptive_ref_pic_marking_mode_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag , 1, return_status, "adaptive_ref_pic_marking_mode_flag ");
+
+ if (ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag)
+ {
+ /* TODO: if the reference picture marking mode is adaptive
+ add these fields in the bit-stream */
+ }
+ }
+ }
+
+ if (ps_slice_hdr->u1_entropy_coding_mode_flag && ps_slice_hdr->u1_slice_type != ISLICE &&
+ ps_slice_hdr->u1_slice_type != SISLICE)
+ {
+ /* cabac_init_idc */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_cabac_init_idc, return_status, "cabac_init_idc");
+ }
+
+ /* slice_qp_delta */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_qp - ps_pps->i1_pic_init_qp, return_status, "slice_qp_delta");
+
+ if (ps_slice_hdr->u1_slice_type == SPSLICE || ps_slice_hdr->u1_slice_type == SISLICE)
+ {
+ if (ps_slice_hdr->u1_slice_type == SPSLICE)
+ {
+ /* sp_for_switch_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_sp_for_switch_flag , 1, return_status, "sp_for_switch_flag");
+ }
+ /* slice_qs_delta */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->u1_slice_qs - ps_pps->i1_pic_init_qs, return_status, "slice_qs_delta");
+ }
+
+ if (ps_pps->i1_deblocking_filter_control_present_flag)
+ {
+ /* disable_deblocking_filter_idc */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_disable_deblocking_filter_idc, return_status, "disable_deblocking_filter_idc");
+
+ if(ps_slice_hdr->u1_disable_deblocking_filter_idc != 1)
+ {
+ /* slice_alpha_c0_offset_div2 */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_alpha_c0_offset_div2, return_status, "slice_alpha_c0_offset_div2");
+
+ /* slice_beta_offset_div2 */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_beta_offset_div2, return_status, "slice_beta_offset_div2");
+ }
+ }
+
+ if (ps_slice_hdr->u1_num_slice_groups_minus1 > 0 &&
+ ps_pps->u1_slice_group_map_type >= 3 &&
+ ps_pps->u1_slice_group_map_type <= 5)
+ {
+ /* slice_group_change_cycle */
+ /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+ * If this is not the case, we have to add Slice group map type to the bit stream */
+ }
+
+ return return_status;
+}
+
+
+
+/**
+******************************************************************************
+*
+* @brief Populates sps structure
+*
+* @par Description
+* Populates sps structure for its use in header generation
+*
+* @param[in] ps_codec
+* pointer to encoder context
+*
+* @param[out] ps_sps
+* pointer to sps params that needs to be populated
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps)
+{
+ /* active config parameters */
+ cfg_params_t *ps_cfg = &(ps_codec->s_cfg);
+
+// /* level */
+// IH264_LEVEL_T level_idc;
+
+ /* error_status */
+ IH264E_ERROR_T i4_err_code = IH264E_FAIL;
+
+ /* profile */
+ /*
+ * Baseline profile supports, 8 bits per sample, 4:2:0 format, CAVLC.
+ * B frames are not allowed. Further, Flexible mb ordering, Redundant slices, Arbitrary slice ordering are supported.
+ * The constrained baseline profile is baseline profile minus ASO, FMO and redundant slices.
+ * To the constrained baseline profile if we add support for B slices, support for encoding interlaced frames,
+ * support for weighted prediction and introduce CABAC entropy coding then we have Main Profile.
+ */
+ if ((ps_cfg->u4_num_b_frames) || (ps_cfg->e_content_type != IV_PROGRESSIVE) ||
+ (ps_cfg->u4_entropy_coding_mode == CABAC) || (ps_cfg->u4_weighted_prediction))
+ {
+ ps_sps->u1_profile_idc = IH264_PROFILE_MAIN;
+ }
+ else
+ {
+ ps_sps->u1_profile_idc = IH264_PROFILE_BASELINE;
+ }
+
+ /* level */
+ ps_sps->u1_level_idc = ps_cfg->u4_max_level;
+// i4_err_code = ih264e_get_level(ps_cfg, &level_idc);
+// if (i4_err_code == IH264E_SUCCESS)
+// {
+// ps_sps->u1_level_idc = level_idc;
+//
+// }
+// else
+// {
+// return i4_err_code;
+// }
+
+ /* constrained flags */
+ /*
+ * baseline profile automatically implies set 0 flag
+ */
+ ps_sps->u1_constraint_set0_flag = (ps_sps->u1_profile_idc == IH264_PROFILE_BASELINE);
+ /*
+ * main profile automatically implies set 1 flag
+ * Although the encoder says it supports Baseline profile it actually supports constrained
+ * baseline profile as ASO, FMO and redundant slices are not supported
+ */
+ ps_sps->u1_constraint_set1_flag = (ps_sps->u1_profile_idc <= IH264_PROFILE_MAIN);
+ /*
+ * extended profile is not supported
+ */
+ ps_sps->u1_constraint_set2_flag = 0x00;
+ /*
+ * level 1b or level 11
+ */
+ if (ps_sps->u1_level_idc == IH264_LEVEL_1B)
+ {
+ ps_sps->u1_constraint_set3_flag = 0;
+ ps_sps->u1_level_idc = IH264_LEVEL_11;
+ }
+ else
+ {
+ ps_sps->u1_constraint_set3_flag = 0;
+ }
+
+ /* active sps id */
+ ps_sps->u1_sps_id = ps_codec->i4_sps_id;
+
+ if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)
+ {
+ /* chroma format idc */
+ ps_sps->u1_chroma_format_idc = CHROMA_FMT_IDC_YUV420;
+
+ /* residual_colour_transform_flag */
+ ps_sps->i1_residual_colour_transform_flag = 0;
+
+ /* luma bit depth 8 */
+ ps_sps->i1_bit_depth_luma = 8;
+
+ /* chroma bit depth 8 */
+ ps_sps->i1_bit_depth_chroma = 8;
+
+ /* qpprime_y_zero_transform_bypass_flag */
+ ps_sps->i1_qpprime_y_zero_transform_bypass_flag = 0;
+
+ /* seq_scaling_matrix_present_flag */
+ ps_sps->i1_seq_scaling_matrix_present_flag = 0;
+
+ if (ps_sps->i1_seq_scaling_matrix_present_flag)
+ {
+ /* TODO_LATER: Will be enabled once scaling list support is added */
+ }
+ }
+
+ /* log2_max_frame_num_minus4 */
+ ps_sps->i1_log2_max_frame_num = 16;
+
+ /* pic_order_cnt_type */
+ ps_sps->i1_pic_order_cnt_type = 2;
+
+ if(ps_cfg->u4_enable_alt_ref)
+ ps_sps->i1_pic_order_cnt_type = 0;
+
+ /* log2_max_pic_order_cnt_lsb_minus4 */
+ ps_sps->i1_log2_max_pic_order_cnt_lsb = 8;
+
+ /* TODO : add support for other poc types */
+ if (ps_sps->i1_pic_order_cnt_type == 0)
+ {
+
+ }
+ else if (ps_sps->i1_pic_order_cnt_type == 1)
+ {
+
+ }
+
+ /* num_ref_frames */
+ /* FIXME : Fix this hard coding */
+ ps_sps->u1_max_num_ref_frames = 1;
+
+ /* gaps_in_frame_num_value_allowed_flag */
+ ps_sps->i1_gaps_in_frame_num_value_allowed_flag = 0;
+
+ /* pic width in mb - 1 */
+ ps_sps->i2_pic_width_in_mbs_minus1 = ps_cfg->i4_wd_mbs - 1;
+
+ /* pic height in mb - 1 */
+ ps_sps->i2_pic_height_in_map_units_minus1 = ps_cfg->i4_ht_mbs - 1;;
+
+ /* frame_mbs_only_flag, no support for interlace encoding */
+ ps_sps->i1_frame_mbs_only_flag = 1;
+
+ /* mb_adaptive_frame_field_flag */
+ if (ps_sps->i1_frame_mbs_only_flag == 0)
+ {
+ ps_sps->i1_mb_adaptive_frame_field_flag = 0;
+ }
+
+ /* direct_8x8_inference_flag */
+ ps_sps->i1_direct_8x8_inference_flag = 0;
+
+ /* cropping params */
+ /*NOTE : Cropping values depend on the chroma format
+ * For our case ,decoder interprets the cropping values as 2*num pixels
+ * Hence the difference in the disp width and width must be halved before sending
+ * to get the expected results
+ */
+ ps_sps->i1_frame_cropping_flag = 0;
+ ps_sps->i2_frame_crop_left_offset = 0;
+ ps_sps->i2_frame_crop_right_offset = (ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd)>>1;
+ ps_sps->i2_frame_crop_top_offset = 0;
+ ps_sps->i2_frame_crop_bottom_offset = (ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht)>>1;
+
+ if (ps_sps->i2_frame_crop_left_offset ||
+ ps_sps->i2_frame_crop_right_offset ||
+ ps_sps->i2_frame_crop_top_offset ||
+ ps_sps->i2_frame_crop_bottom_offset)
+ {
+ ps_sps->i1_frame_cropping_flag = 1;
+ }
+
+ /* vui params */
+ ps_sps->i1_vui_parameters_present_flag = 0;
+
+ if (ps_sps->i1_vui_parameters_present_flag)
+ {
+ /* populate vui params */
+ }
+
+ return i4_err_code;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates pps structure
+*
+* @par Description
+* Populates pps structure for its use in header generation
+*
+* @param[in] ps_codec
+* pointer to encoder context
+*
+* @param[out] ps_pps
+* pointer to pps params that needs to be populated
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_pps(codec_t *ps_codec, pps_t *ps_pps)
+{
+ /* active config parameters */
+ cfg_params_t *ps_cfg = &(ps_codec->s_cfg);
+
+ /* seq_parameter_set_id */
+ ps_pps->u1_sps_id = ps_codec->i4_sps_id;
+
+ /* pic_parameter_set_id */
+ ps_pps->u1_pps_id = ps_codec->i4_pps_id;
+
+ /* entropy_coding_mode */
+ ps_pps->u1_entropy_coding_mode_flag = ps_cfg->u4_entropy_coding_mode;
+
+ /* pic_order_present_flag is unset for POC type 2 */
+ ps_pps->u1_pic_order_present_flag = 0;
+
+ /* Currently number of slice groups supported are 1 */
+ ps_pps->u1_num_slice_groups = 1;
+
+ if (ps_pps->u1_num_slice_groups - 1)
+ {
+ /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+ * If this is not the case, we have to add Slice group map type to the bit stream*/
+ }
+
+ /* number of reference frames for list 0 */
+ /* FIXME : fix this hard coded value */
+ ps_pps->i1_num_ref_idx_l0_default_active = 1;
+
+ /* number of reference frames for list 1 */
+ ps_pps->i1_num_ref_idx_l1_default_active = 1;
+
+ /* weighted prediction for now is disabled */
+ ps_pps->i1_weighted_pred_flag = 0;
+ ps_pps->i1_weighted_bipred_idc = 0;
+
+ /* The intent is to not signal qp from pps. Rather send the same in slice headers */
+ ps_pps->i1_pic_init_qp = 0;
+
+ /* The intent is to not signal qp from pps. Rather send the same in slice headers */
+ ps_pps->i1_pic_init_qs = 0;
+
+ /* The intent is to not signal qp from pps. Rather send the same in slice headers */
+ ps_pps->i1_chroma_qp_index_offset = 0;
+
+ /* deblocking filter flags present in slice header */
+ ps_pps->i1_deblocking_filter_control_present_flag = 1;
+
+ /* constrained intra prediction */
+ ps_pps->i1_constrained_intra_pred_flag = ps_cfg->u4_constrained_intra_pred;
+
+ /* sending redundant slices is not supported for now */
+ ps_pps->i1_redundant_pic_cnt_present_flag = 0;
+
+ ps_pps->u1_slice_group_map_type = 0;
+ return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates slice header structure
+*
+* @par Description
+* Populates slice header structure for its use in header generation
+*
+* @param[in] ps_proc
+* pointer to proc context
+*
+* @param[out] ps_slice_hdr
+* pointer to slice header structure that needs to be populated
+*
+* @param[in] ps_pps
+* pointer to pps params structure referred by the slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by the pps
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps)
+{
+ /* entropy context */
+ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ if (ps_proc->ps_codec->u4_is_curr_frm_ref)
+ {
+ ps_slice_hdr->i1_nal_unit_idc = 3;
+ }
+ else
+ {
+ ps_slice_hdr->i1_nal_unit_idc = 0;
+ }
+
+ /* start mb address */
+ ps_slice_hdr->u2_first_mb_in_slice = ps_entropy->i4_mb_start_add;
+
+ /* slice type */
+ ps_slice_hdr->u1_slice_type = ps_proc->i4_slice_type;
+
+ /* pic_parameter_set_id */
+ ps_slice_hdr->u1_pps_id = ps_pps->u1_pps_id;
+
+ /* Separate color plane flag is 0,
+ * hence the syntax element color_plane_id not included */
+
+ /* frame num */
+ ps_slice_hdr->i4_frame_num = ps_proc->i4_frame_num;
+
+ /* frame_mbs_only_flag, no support for interlace encoding */
+ if (!ps_sps->i1_frame_mbs_only_flag)
+ {
+ ps_slice_hdr->i1_field_pic_flag = 0;
+
+ if (ps_slice_hdr->i1_field_pic_flag)
+ {
+ ps_slice_hdr->i1_bottom_field_flag = 0;
+ }
+ }
+
+ /* idr pic id */
+ if (ps_proc->u4_is_idr)
+ {
+ ps_slice_hdr->u2_idr_pic_id = ps_proc->u4_idr_pic_id;
+ ps_slice_hdr->i1_nal_unit_type = 5;
+ }
+ else
+ {
+ ps_slice_hdr->i1_nal_unit_type = 1;
+ }
+
+ if (ps_sps->i1_pic_order_cnt_type == 0)
+ {
+
+ WORD32 val;
+ val = ps_codec->i4_coded_pic_cnt;
+ val %= (1 << ps_sps->i1_log2_max_pic_order_cnt_lsb);
+ ps_slice_hdr->i4_pic_order_cnt_lsb = val;
+ }
+ else if (ps_sps->i1_pic_order_cnt_type == 1)
+ {
+
+ }
+
+ if(0 == ps_slice_hdr->u2_first_mb_in_slice)
+ ps_codec->i4_coded_pic_cnt++;
+
+ /*
+ * redundant slices are not currently supported.
+ * Hence the syntax element redundant slice cnt is not initialized
+ */
+ if (ps_pps->i1_redundant_pic_cnt_present_flag)
+ {
+
+ }
+
+ /* direct spatial mv pred flag */
+ if (ps_proc->i4_slice_type == BSLICE)
+ {
+
+ }
+
+ if (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == BSLICE)
+ {
+ /* num_ref_idx_active_override_flag */
+ ps_slice_hdr->u1_num_ref_idx_active_override_flag = 0;
+
+ if (ps_slice_hdr->u1_num_ref_idx_active_override_flag)
+ {
+ /* num_ref_idx_l0_active_minus1 */
+
+ if (ps_proc->i4_slice_type == BSLICE)
+ {
+ /* num_ref_idx_l1_active_minus1 */
+
+ }
+ }
+ }
+
+ /* ref_idx_reordering */
+ /* TODO: ref_idx_reordering */
+ if ((ps_proc->i4_slice_type != ISLICE) && (ps_proc->i4_slice_type != SISLICE))
+ {
+ /* ref_pic_list_reordering_flag_l0 */
+ ps_slice_hdr->u1_ref_idx_reordering_flag_l0 = 0;
+
+ if (ps_slice_hdr->u1_ref_idx_reordering_flag_l0)
+ {
+
+ }
+ }
+
+ if ((ps_pps->i1_weighted_pred_flag &&
+ (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE)) ||
+ (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_proc->i4_slice_type == BSLICE))
+ {
+ /* TODO_LATER: Currently there is no support for weighted prediction.
+ This needs to be updated when the support is added */
+ }
+
+ if (ps_slice_hdr->i1_nal_unit_idc != 0)
+ {
+ if (ps_slice_hdr->i1_nal_unit_type == 5)
+ {
+ /* no_output_of_prior_pics_flag */
+ ps_slice_hdr->u1_no_output_of_prior_pics_flag = 0;
+
+ /* long_term_reference_flag */
+ ps_slice_hdr->u1_long_term_reference_flag = 0;
+ }
+ else
+ {
+ /* adaptive_ref_pic_marking_mode_flag */
+ ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag = 0;
+
+ if (ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag)
+ {
+ /* TODO: if the reference picture marking mode is adaptive
+ add these fields in the bit-stream */
+ }
+ }
+ }
+
+ /* entropy coding mode flag */
+ ps_slice_hdr->u1_entropy_coding_mode_flag = ps_entropy->u1_entropy_coding_mode_flag;
+
+ if (ps_slice_hdr->u1_entropy_coding_mode_flag && ps_proc->i4_slice_type != ISLICE &&
+ ps_proc->i4_slice_type != SISLICE)
+ {
+ /* cabac_init_idc */
+ }
+
+ /* slice qp */
+ ps_slice_hdr->i1_slice_qp = ps_proc->u4_frame_qp;
+
+ if (ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == SISLICE)
+ {
+ if (ps_proc->i4_slice_type == SPSLICE)
+ {
+ /* sp_for_switch_flag */
+ }
+ /* slice_qs_delta */
+ }
+
+ if (ps_pps->i1_deblocking_filter_control_present_flag)
+ {
+ /* disable_deblocking_filter_idc */
+ ps_slice_hdr->u1_disable_deblocking_filter_idc = ps_proc->u4_disable_deblock_level;
+
+ if (ps_slice_hdr->u1_disable_deblocking_filter_idc != 1)
+ {
+ /* slice_alpha_c0_offset_div2 */
+ ps_slice_hdr->i1_slice_alpha_c0_offset_div2 = 0;
+
+ /* slice_beta_offset_div2 */
+ ps_slice_hdr->i1_slice_beta_offset_div2 = 0;
+ }
+ }
+ ps_slice_hdr->u1_num_slice_groups_minus1 = 0;
+ if(ps_slice_hdr->u1_num_slice_groups_minus1 > 0 &&
+ ps_pps->u1_slice_group_map_type >= 3 &&
+ ps_pps->u1_slice_group_map_type <= 5)
+ {
+ /* slice_group_change_cycle */
+ /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+ * If this is not the case, we have to add Slice group map type to the bit stream */
+ }
+
+ return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief inserts FILLER Nal Unit.
+*
+* @par Description
+* In constant bit rate rc mode, when the bits generated by the codec is
+* underflowing the target bit rate, the encoder library inserts filler nal unit.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] insert_fill_bytes
+* Number of fill bytes to be inserted
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_add_filler_nal_unit(bitstrm_t *ps_bitstrm,
+ WORD32 insert_fill_bytes)
+{
+ WORD32 i4_num_words_to_fill, i4_words_filled;
+
+ IH264E_ERROR_T return_status = IH264E_SUCCESS;
+
+ /* Insert the NAL start code */
+ return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ if (ps_bitstrm->u4_strm_buf_offset + insert_fill_bytes >= ps_bitstrm->u4_max_strm_size)
+ {
+ return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* Insert Nal Unit Header */
+ PUT_BITS(ps_bitstrm, NAL_FILLER_FIRST_BYTE, 8, return_status, "filler_header");
+
+ PUT_BITS(ps_bitstrm, 0xFFFFFF, 24, return_status, "fill bytes");
+
+ /* Initializing Variables */
+ i4_words_filled = 1;
+
+ /****************************************************/
+ /* Flooring the number of bytes for be stuffed to */
+ /* WORD unit */
+ /****************************************************/
+ i4_num_words_to_fill = (insert_fill_bytes >> 2);
+
+ /****************************************************/
+ /* Reducing already 4 bytes filled. In case stuffing*/
+ /* is <= 4 bytes, we are actually not stuffing */
+ /* anything */
+ /****************************************************/
+ i4_num_words_to_fill -= i4_words_filled;
+
+ while (i4_num_words_to_fill > 0)
+ {
+ /* Insert Nal Unit Header */
+ PUT_BITS(ps_bitstrm, 0xFFFFFFFF, 32, return_status, "fill bytes");
+
+ i4_num_words_to_fill-- ;
+ }
+
+ return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
diff --git a/encoder/ih264e_encode_header.h b/encoder/ih264e_encode_header.h
new file mode 100755
index 0000000..acae5b6
--- /dev/null
+++ b/encoder/ih264e_encode_header.h
@@ -0,0 +1,278 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_encode_header.h
+*
+* @brief
+* This file contains structures and interface prototypes for h264 bitstream
+* header encoding
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_ENCODE_HEADER_H_
+#define IH264E_ENCODE_HEADER_H_
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to put a code with specified number of bits into the
+ * bitstream
+******************************************************************************
+ */
+#define PUT_BITS(ps_bitstrm, code_val, code_len, ret_val, syntax_string) \
+ ENTROPY_TRACE(syntax_string, code_val);\
+ ret_val |= ih264e_put_bits((ps_bitstrm), (code_val), (code_len))
+
+/**
+******************************************************************************
+ * @brief Macro to put a code with specified number of bits into the
+ * bitstream using 0th order exponential Golomb encoding for
+ * signed numbers
+******************************************************************************
+ */
+#define PUT_BITS_UEV(ps_bitstrm, code_val, ret_val, syntax_string) \
+ ENTROPY_TRACE(syntax_string, code_val);\
+ ret_val |= ih264e_put_uev((ps_bitstrm), (code_val))
+
+/**
+******************************************************************************
+ * @brief Macro to put a code with specified number of bits into the
+ * bitstream using 0th order exponential Golomb encoding for
+ * signed numbers
+******************************************************************************
+ */
+#define PUT_BITS_SEV(ps_bitstrm, code_val, ret_val, syntax_string) \
+ ENTROPY_TRACE(syntax_string, code_val);\
+ ret_val |= ih264e_put_sev((ps_bitstrm), (code_val))
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Generates SPS (Sequence Parameter Set)
+*
+* @par Description
+* This function generates Sequence Parameter Set header as per the spec
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sps
+* pointer to structure containing SPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_sps
+ (
+ bitstrm_t *ps_bitstrm,
+ sps_t *ps_sps
+ );
+
+/**
+******************************************************************************
+*
+* @brief Generates PPS (Picture Parameter Set)
+*
+* @par Description
+* Generate Picture Parameter Set as per Section 7.3.2.2
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_pps
+* pointer to structure containing PPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_pps
+ (
+ bitstrm_t *ps_bitstrm,
+ pps_t *ps_pps,
+ sps_t *ps_sps
+ );
+
+/**
+******************************************************************************
+*
+* @brief Generates Slice Header
+*
+* @par Description
+* Generate Slice Header as per Section 7.3.5.1
+*
+* @param[inout] ps_bitstrm
+* pointer to bitstream context for generating slice header
+*
+* @param[in] ps_slice_hdr
+* pointer to slice header params
+*
+* @param[in] ps_pps
+* pointer to pps params referred by slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by slice
+*
+* @param[out] ps_dup_bit_strm_ent_offset
+* Bitstream struct to store bitstream state
+*
+* @param[out] pu4_first_slice_start_offset
+* first slice offset is returned
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_slice_header
+ (
+ bitstrm_t *ps_bitstrm,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps
+ );
+
+/**
+******************************************************************************
+*
+* @brief Populates sps structure
+*
+* @par Description
+* Populates sps structure for its use in header generation
+*
+* @param[in] ps_codec
+* pointer to encoder context
+*
+* @param[out] ps_sps
+* pointer to sps params that needs to be populated
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_sps
+ (
+ codec_t *ps_codec,
+ sps_t *ps_sps
+ );
+
+/**
+******************************************************************************
+*
+* @brief Populates pps structure
+*
+* @par Description
+* Populates pps structure for its use in header generation
+*
+* @param[in] ps_codec
+* pointer to encoder context
+*
+* @param[out] ps_pps
+* pointer to pps params that needs to be populated
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_pps
+ (
+ codec_t *ps_codec,
+ pps_t *ps_pps
+ );
+
+
+/**
+******************************************************************************
+*
+* @brief Populates slice header structure
+*
+* @par Description
+* Populates slice header structure for its use in header generation
+*
+* @param[in] ps_proc
+* pointer to proc context
+*
+* @param[out] ps_slice_hdr
+* pointer to slice header structure that needs to be populated
+*
+* @param[in] ps_pps
+* pointer to pps params structure referred by the slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by the pps
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_populate_slice_header
+ (
+ process_ctxt_t *ps_proc,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps
+ );
+
+
+/**
+******************************************************************************
+*
+* @brief inserts FILLER Nal Unit.
+*
+* @par Description
+* In constant bit rate rc mode, when the bits generated by the codec is
+* underflowing the target bit rate, the encoder library inserts filler nal unit.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] insert_fill_bytes
+* Number of fill bytes to be inserted
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_add_filler_nal_unit
+ (
+ bitstrm_t *ps_bitstrm,
+ WORD32 insert_fill_bytes
+ );
+
+
+#endif //IH264E_ENCODE_HEADER_H_
diff --git a/encoder/ih264e_error.h b/encoder/ih264e_error.h
new file mode 100755
index 0000000..8fe9dac
--- /dev/null
+++ b/encoder/ih264e_error.h
@@ -0,0 +1,229 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_error.h
+*
+* @brief
+* Definitions related to error handling
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_ERROR_H_
+#define IH264E_ERROR_H_
+
+/**
+******************************************************************************
+* @brief Error start codes for various classes of errors in H264 encoder
+******************************************************************************
+*/
+#define SET_ERROR_ON_RETURN(error, severity, out_status, ret_code) \
+ if (error != IH264E_SUCCESS) \
+ {\
+ out_status = ((1 << severity) | error);\
+ return (ret_code);\
+ }
+
+
+/**
+******************************************************************************
+ * @brief Extended error code for each error in H264 encoder
+******************************************************************************
+ */
+typedef enum
+{
+ /* NOTE: the ive error codes ends at 0x80 */
+ IVE_ERR_CODEC_EXTENSIONS = 0x80,
+
+ /* bit stream error start */
+ IH264E_BITSTREAM_ERROR_START = IVE_ERR_CODEC_EXTENSIONS,
+
+ /* codec error start */
+ IH264E_CODEC_ERROR_START = IH264E_BITSTREAM_ERROR_START + 0x10,
+
+ /** no error */
+ IH264E_SUCCESS = 0,
+
+ /** bitstream init failure, buffer ptr not aligned to WORD (32bits) */
+ IH264E_BITSTREAM_BUFPTR_ALIGN_FAIL = IH264E_BITSTREAM_ERROR_START + 0x01,
+
+ /** bitstream init failure, buf size not multiple of WORD size (32bits) */
+ IH264E_BITSTREAM_BUFSIZE_ALIGN_FAIL = IH264E_BITSTREAM_ERROR_START + 0x02,
+
+ /** bitstream runtime failure, buf size limit exceeded during encode */
+ IH264E_BITSTREAM_BUFFER_OVERFLOW = IH264E_BITSTREAM_ERROR_START + 0x03,
+
+ /**width not set within supported limit */
+ IH264E_WIDTH_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x01,
+
+ /**height not set within supported limit */
+ IH264E_HEIGHT_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x02,
+
+ /**Unsupported number of reference pictures passed as an argument */
+ IH264E_NUM_REF_UNSUPPORTED = IH264E_CODEC_ERROR_START + 0x03,
+
+ /**Unsupported number of reference pictures passed as an argument */
+ IH264E_NUM_REORDER_UNSUPPORTED = IH264E_CODEC_ERROR_START + 0x04,
+
+ /**codec level not supported */
+ IH264E_CODEC_LEVEL_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x05,
+
+ /**input chroma format not supported */
+ IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x06,
+
+ /**recon chroma format not supported */
+ IH264E_RECON_CHROMA_FORMAT_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x07,
+
+ /**rate control option configured is not supported */
+ IH264E_RATE_CONTROL_MODE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x08,
+
+ /**frame rate configured is not supported */
+ IH264E_FRAME_RATE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x09,
+
+ /**bit rate configured is not supported */
+ IH264E_BITRATE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0A,
+
+ /**frame rate not supported */
+ IH264E_BFRAMES_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0B,
+
+ /**content type not supported */
+ IH264E_CONTENT_TYPE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0C,
+
+ /**unsupported horizontal search range */
+ IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0D,
+
+ /**unsupported vertical search range */
+ IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0E,
+
+ /**Unsupported slice type input */
+ IH264E_SLICE_TYPE_INPUT_INVALID = IH264E_CODEC_ERROR_START + 0x0F,
+
+ /**unsupported architecture type */
+ IH264E_ARCH_TYPE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x10,
+
+ /**unsupported soc type */
+ IH264E_SOC_TYPE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x11,
+
+ /**target frame rate exceeds source frame rate */
+ IH264E_TGT_FRAME_RATE_EXCEEDS_SRC_FRAME_RATE = IH264E_CODEC_ERROR_START + 0x12,
+
+ /**invalid force frame input */
+ IH264E_INVALID_FORCE_FRAME_INPUT = IH264E_CODEC_ERROR_START + 0x13,
+
+ /**invalid me speed preset */
+ IH264E_INVALID_ME_SPEED_PRESET = IH264E_CODEC_ERROR_START + 0x14,
+
+ /**invalid encoder speed preset */
+ IH264E_INVALID_ENC_SPEED_PRESET = IH264E_CODEC_ERROR_START + 0x15,
+
+ /**invalid deblocking param */
+ IH264E_INVALID_DEBLOCKING_TYPE_INPUT = IH264E_CODEC_ERROR_START + 0x16,
+
+ /**invalid max qp */
+ IH264E_INVALID_MAX_FRAME_QP = IH264E_CODEC_ERROR_START + 0x17,
+
+ /**invalid min qp */
+ IH264E_INVALID_MIN_FRAME_QP = IH264E_CODEC_ERROR_START + 0x18,
+
+ /**invalid init qp */
+ IH264E_INVALID_INIT_QP = IH264E_CODEC_ERROR_START + 0x19,
+
+ /**version buffer size is insufficient */
+ IH264E_CXA_VERS_BUF_INSUFFICIENT = IH264E_CODEC_ERROR_START + 0x1A,
+
+ /**init not done */
+ IH264E_INIT_NOT_DONE = IH264E_CODEC_ERROR_START + 0x1B,
+
+ /**invalid refresh type input */
+ IH264E_INVALID_AIR_MODE = IH264E_CODEC_ERROR_START + 0x1C,
+
+ /** Unsupported air mode */
+ IH264E_INVALID_AIR_REFRESH_PERIOD = IH264E_CODEC_ERROR_START + 0x1D,
+
+ /**In sufficient memory allocated for MV Bank */
+ IH264E_INSUFFICIENT_MEM_MVBANK = IH264E_CODEC_ERROR_START + 0x1E,
+
+ /**In sufficient memory allocated for MV Bank */
+ IH264E_INSUFFICIENT_MEM_PICBUF = IH264E_CODEC_ERROR_START + 0x1F,
+
+ /**Buffer manager error */
+ IH264E_BUF_MGR_ERROR = IH264E_CODEC_ERROR_START + 0x20,
+
+ /**No free MV Bank buffer available to store current pic */
+ IH264E_NO_FREE_MVBANK = IH264E_CODEC_ERROR_START + 0x21,
+
+ /**No free picture buffer available to store current pic */
+ IH264E_NO_FREE_PICBUF = IH264E_CODEC_ERROR_START + 0x22,
+
+ /**Invalid encoder operation mode */
+ IH264E_INVALID_ENC_OPERATION_MODE = IH264E_CODEC_ERROR_START + 0x23,
+
+ /**Invalid half pel option */
+ IH264E_INVALID_HALFPEL_OPTION = IH264E_CODEC_ERROR_START + 0x24,
+
+ /**Invalid quarter pel option */
+ IH264E_INVALID_QPEL_OPTION = IH264E_CODEC_ERROR_START + 0x25,
+
+ /**Invalid fast sad option */
+ IH264E_INVALID_FAST_SAD_OPTION = IH264E_CODEC_ERROR_START + 0x26,
+
+ /**Invalid intra 4x4 option */
+ IH264E_INVALID_INTRA4x4_OPTION = IH264E_CODEC_ERROR_START + 0x27,
+
+ /**Invalid intra frame interval */
+ IH264E_INVALID_INTRA_FRAME_INTERVAL = IH264E_CODEC_ERROR_START + 0x28,
+
+ /**Invalid idr frame interval */
+ IH264E_INVALID_IDR_FRAME_INTERVAL = IH264E_CODEC_ERROR_START + 0x29,
+
+ /**Invalid buffer delay */
+ IH264E_INVALID_BUFFER_DELAY = IH264E_CODEC_ERROR_START + 0x2A,
+
+ /**Invalid num cores */
+ IH264E_INVALID_NUM_CORES = IH264E_CODEC_ERROR_START + 0x2B,
+
+ /**profile not supported */
+ IH264E_PROFILE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x2C,
+
+ /**Unsupported slice type input */
+ IH264E_SLICE_PARAM_INPUT_INVALID = IH264E_CODEC_ERROR_START + 0x2D,
+
+ /**Invalid alt ref option */
+ IH264E_INVALID_ALT_REF_OPTION = IH264E_CODEC_ERROR_START + 0x2E,
+
+ /**No free picture buffer available to store recon pic */
+ IH264E_NO_FREE_RECONBUF = IH264E_CODEC_ERROR_START + 0x2F,
+
+ /**max failure error code to ensure enum is 32 bits wide */
+ IH264E_FAIL = -1,
+
+}IH264E_ERROR_T;
+
+
+#endif /* IH264E_ERROR_H_ */
diff --git a/encoder/ih264e_fmt_conv.c b/encoder/ih264e_fmt_conv.c
new file mode 100755
index 0000000..393d6ca
--- /dev/null
+++ b/encoder/ih264e_fmt_conv.c
@@ -0,0 +1,864 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_fmt_conv.c
+*
+* @brief
+* Contains functions for format conversion or frame copy of output buffer
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_fmt_conv_420sp_to_rgb565()
+* - ih264e_fmt_conv_420sp_to_rgba8888()
+* - ih264e_fmt_conv_420sp_to_420sp()
+* - ih264e_fmt_conv_420sp_to_420sp_swap_uv()
+* - ih264e_fmt_conv_420sp_to_420p()
+* - ih264e_fmt_conv_420p_to_420sp()
+* - ih264e_fmt_conv_422i_to_420sp()
+* - ih264e_fmt_conv()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "ih264_buf_mgr.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_fmt_conv.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+void ih264e_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD16 *pu2_rgb_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first)
+{
+ WORD16 i2_r, i2_g, i2_b;
+ UWORD32 u4_r, u4_g, u4_b;
+ WORD16 i2_i, i2_j;
+ UWORD8 *pu1_y_src_nxt;
+ UWORD16 *pu2_rgb_dst_NextRow;
+
+ UWORD8 *pu1_u_src, *pu1_v_src;
+
+ if (is_u_first)
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src;
+ pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
+ }
+ else
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
+ pu1_v_src = (UWORD8 *) pu1_uv_src;
+ }
+
+ pu1_y_src_nxt = pu1_y_src + src_y_strd;
+ pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
+
+ for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
+ {
+ for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
+ {
+ i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
+ i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
+ >> 13;
+ i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
+
+ pu1_u_src += 2;
+ pu1_v_src += 2;
+ /* pixel 0 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src++;
+ *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ /* pixel 1 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src++;
+ *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ /* pixel 2 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src_nxt++;
+ *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ /* pixel 3 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src_nxt++;
+ *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ }
+
+ pu1_u_src = pu1_u_src + src_uv_strd - wd;
+ pu1_v_src = pu1_v_src + src_uv_strd - wd;
+
+ pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
+ pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
+
+ pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
+ pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
+ }
+
+}
+
+void ih264e_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD32 *pu4_rgba_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first)
+{
+ WORD16 i2_r, i2_g, i2_b;
+ UWORD32 u4_r, u4_g, u4_b;
+ WORD16 i2_i, i2_j;
+ UWORD8 *pu1_y_src_nxt;
+ UWORD32 *pu4_rgba_dst_NextRow;
+ UWORD8 *pu1_u_src, *pu1_v_src;
+
+ if (is_u_first)
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src;
+ pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
+ }
+ else
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
+ pu1_v_src = (UWORD8 *) pu1_uv_src;
+ }
+
+ pu1_y_src_nxt = pu1_y_src + src_y_strd;
+
+ pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
+
+ for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
+ {
+ for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
+ {
+ i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
+ i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
+ >> 13;
+ i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
+
+ pu1_u_src += 2;
+ pu1_v_src += 2;
+ /* pixel 0 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+
+ pu1_y_src++;
+ *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ /* pixel 1 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+
+ pu1_y_src++;
+ *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ /* pixel 2 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+
+ pu1_y_src_nxt++;
+ *pu4_rgba_dst_NextRow++ =
+ ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ /* pixel 3 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+
+ pu1_y_src_nxt++;
+ *pu4_rgba_dst_NextRow++ =
+ ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ }
+
+ pu1_u_src = pu1_u_src + src_uv_strd - wd;
+ pu1_v_src = pu1_v_src + src_uv_strd - wd;
+
+ pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
+ pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
+
+ pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
+ pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
+ }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used for copying a 420SP buffer
+*
+* @par Description
+* Function used for copying a 420SP buffer
+*
+* @param[in] pu1_y_src
+* Input Y pointer
+*
+* @param[in] pu1_uv_src
+* Input UV pointer (UV is interleaved either in UV or VU format)
+*
+* @param[in] pu1_y_dst
+* Output Y pointer
+*
+* @param[in] pu1_uv_dst
+* Output UV pointer (UV is interleaved in the same format as that of input)
+*
+* @param[in] wd
+* Width
+*
+* @param[in] ht
+* Height
+*
+* @param[in] src_y_strd
+* Input Y Stride
+*
+* @param[in] src_uv_strd
+* Input UV stride
+*
+* @param[in] dst_y_strd
+* Output Y stride
+*
+* @param[in] dst_uv_strd
+* Output UV stride
+*
+* @returns None
+*
+* @remarks In case there is a need to perform partial frame copy then
+* by passion appropriate source and destination pointers and appropriate
+* values for wd and ht it can be done
+*
+*******************************************************************************
+*/
+void ih264e_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 num_rows, num_cols, src_strd, dst_strd;
+ WORD32 i;
+
+ /* copy luma */
+ pu1_src = (UWORD8 *) pu1_y_src;
+ pu1_dst = (UWORD8 *) pu1_y_dst;
+
+ num_rows = ht;
+ num_cols = wd;
+
+ src_strd = src_y_strd;
+ dst_strd = dst_y_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+
+ /* copy U and V */
+ pu1_src = (UWORD8 *) pu1_uv_src;
+ pu1_dst = (UWORD8 *) pu1_uv_dst;
+
+ num_rows = ht >> 1;
+ num_cols = wd;
+
+ src_strd = src_uv_strd;
+ dst_strd = dst_uv_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+ return;
+}
+
+
+void ih264e_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 num_rows, num_cols, src_strd, dst_strd;
+ WORD32 i;
+
+ /* copy luma */
+ pu1_src = (UWORD8 *) pu1_y_src;
+ pu1_dst = (UWORD8 *) pu1_y_dst;
+
+ num_rows = ht;
+ num_cols = wd;
+
+ src_strd = src_y_strd;
+ dst_strd = dst_y_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+
+ /* copy U and V */
+ pu1_src = (UWORD8 *) pu1_uv_src;
+ pu1_dst = (UWORD8 *) pu1_uv_dst;
+
+ num_rows = ht >> 1;
+ num_cols = wd;
+
+ src_strd = src_uv_strd;
+ dst_strd = dst_uv_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ WORD32 j;
+ for (j = 0; j < num_cols; j += 2)
+ {
+ pu1_dst[j + 0] = pu1_src[j + 1];
+ pu1_dst[j + 1] = pu1_src[j + 0];
+ }
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+ return;
+}
+
+void ih264e_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd,
+ WORD32 is_u_first,
+ WORD32 disable_luma_copy)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ UWORD8 *pu1_u_src, *pu1_v_src;
+ WORD32 num_rows, num_cols, src_strd, dst_strd;
+ WORD32 i, j;
+
+ if (0 == disable_luma_copy)
+ {
+ /* copy luma */
+ pu1_src = (UWORD8 *) pu1_y_src;
+ pu1_dst = (UWORD8 *) pu1_y_dst;
+
+ num_rows = ht;
+ num_cols = wd;
+
+ src_strd = src_y_strd;
+ dst_strd = dst_y_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+ }
+ /* de-interleave U and V and copy to destination */
+ if (is_u_first)
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src;
+ pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
+ }
+ else
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
+ pu1_v_src = (UWORD8 *) pu1_uv_src;
+ }
+
+ num_rows = ht >> 1;
+ num_cols = wd >> 1;
+
+ src_strd = src_uv_strd;
+ dst_strd = dst_uv_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ for (j = 0; j < num_cols; j++)
+ {
+ pu1_u_dst[j] = pu1_u_src[j * 2];
+ pu1_v_dst[j] = pu1_v_src[j * 2];
+ }
+
+ pu1_u_dst += dst_strd;
+ pu1_v_dst += dst_strd;
+ pu1_u_src += src_strd;
+ pu1_v_src += src_strd;
+ }
+ return;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used to perform color space conversion from 420P to 420SP
+*
+* @par Description
+* Function used to perform color space conversion from 420P to 420SP
+*
+* @param[in] pu1_y_src
+* Input Y pointer
+*
+* @param[in] pu1_u_src
+* Input U pointer
+*
+* @param[in] pu1_v_dst
+* Input V pointer
+*
+* @param[in] pu1_y_dst
+* Output Y pointer
+*
+* @param[in] pu1_uv_dst
+* Output UV pointer
+*
+* @param[in] u4_width
+* Width
+*
+* @param[in] u4_height
+* Height
+*
+* @param[in] src_y_strd
+* Input Y Stride
+*
+* @param[in] src_u_strd
+* Input U stride
+*
+* @param[in] src_v_strd
+* Input V stride
+*
+* @param[in] dst_y_strd
+* Output Y stride
+*
+* @param[in] dst_uv_strd
+* Output UV stride
+*
+* @param[in] convert_uv_only
+* Flag to indicate if only UV copy needs to be done
+*
+* @returns none
+*
+* @remarks In case there is a need to perform partial frame copy then
+* by passion appropriate source and destination pointers and appropriate
+* values for wd and ht it can be done
+*
+*******************************************************************************
+*/
+void ih264e_fmt_conv_420p_to_420sp(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_u_src,
+ UWORD8 *pu1_v_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ UWORD16 u2_height,
+ UWORD16 u2_width,
+ UWORD16 src_y_strd,
+ UWORD16 src_u_strd,
+ UWORD16 src_v_strd,
+ UWORD16 dst_y_strd,
+ UWORD16 dst_uv_strd,
+ UWORD32 convert_uv_only)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ UWORD8 *pu1_src_u, *pu1_src_v;
+ UWORD16 i;
+ UWORD32 u2_width_uv;
+ UWORD32 dest_inc_Y = 0, dest_inc_UV = 0;
+
+ dest_inc_UV = dst_uv_strd;
+
+ if (0 == convert_uv_only)
+ {
+
+ /* Copy Y buffer */
+ pu1_dst = (UWORD8 *) pu1_y_dst;
+ pu1_src = (UWORD8 *) pu1_y_src;
+
+ dest_inc_Y = dst_y_strd;
+
+ for (i = 0; i < u2_height; i++)
+ {
+ memcpy((void *) pu1_dst, (void *) pu1_src, u2_width);
+ pu1_dst += dest_inc_Y;
+ pu1_src += src_y_strd;
+ }
+ }
+
+ /* Interleave Cb and Cr buffers */
+ pu1_src_u = pu1_u_src;
+ pu1_src_v = pu1_v_src;
+ pu1_dst = pu1_uv_dst;
+
+ u2_height = (u2_height + 1) >> 1;
+ u2_width_uv = (u2_width + 1) >> 1;
+ for (i = 0; i < u2_height; i++)
+ {
+ UWORD32 j;
+ for (j = 0; j < u2_width_uv; j++)
+ {
+ *pu1_dst++ = *pu1_src_u++;
+ *pu1_dst++ = *pu1_src_v++;
+ }
+
+ pu1_dst += dest_inc_UV - u2_width;
+ pu1_src_u += src_u_strd - u2_width_uv;
+ pu1_src_v += src_v_strd - u2_width_uv;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used to convert 422 interleaved to 420sp
+*
+* @par Description
+* Function used to convert 422 interleaved to 420sp
+*
+* @param[in] pu1_y_buf
+* Output Y pointer
+*
+* @param[in] pu1_u_buf
+* Output u pointer
+*
+* @param[in[ pu1_v_buf
+* Output V pointer
+*
+* @param[in] pu1_422i_buf
+* Input 422i pointer
+*
+* @param[in] u4_y_width
+* Width of Y component
+*
+* @param[in] u4_y_height
+* Height of Y component
+*
+* @param[in] u4_y_stride
+* Stride of pu1_y_buf
+*
+* @param[in] u4_u_stride
+* Stride of pu1_u_buf
+*
+* @param[in] u4_v_stride
+* Stride of pu1_v_buf
+*
+* @param[in] u4_422i_stride
+* Stride of pu1_422i_buf
+*
+* @returns None
+*
+* @remarks For conversion
+* pu1_v_buf = pu1_u_buf+1
+* u4_u_stride = u4_v_stride
+*
+* The extra parameters are for maintaining API with assembly function
+*
+*******************************************************************************
+*/
+void ih264e_fmt_conv_422i_to_420sp(UWORD8 *pu1_y_buf,
+ UWORD8 *pu1_u_buf,
+ UWORD8 *pu1_v_buf,
+ UWORD8 *pu1_422i_buf,
+ WORD32 u4_y_width,
+ WORD32 u4_y_height,
+ WORD32 u4_y_stride,
+ WORD32 u4_u_stride,
+ WORD32 u4_v_stride,
+ WORD32 u4_422i_stride)
+{
+ WORD32 row, col;
+ UWORD8 *row_even_422 = pu1_422i_buf;
+ UWORD8 *row_odd_422 = row_even_422 + (u4_422i_stride << 1);
+ UWORD8 *row_even_luma = pu1_y_buf;
+ /* Since at the end of loop, we have row_even_luma += (luma_width << 1),
+ * it should be same here right? */
+ UWORD8 *row_odd_luma = row_even_luma + u4_y_stride;
+ UWORD8 *row_cb = pu1_u_buf;
+ UWORD8 *row_cr = pu1_v_buf;
+
+ for (row = 0; row < u4_y_height; row = row + 2)
+ {
+ for (col = 0; col < (u4_y_width << 1); col = col + 4)
+ {
+ UWORD8 cb_even = row_even_422[col];
+ UWORD8 cr_even = row_even_422[col + 2];
+
+ row_cb[col >> 1] = cb_even;
+ row_cr[col >> 1] = cr_even;
+
+ row_even_luma[col >> 1] = row_even_422[col + 1];
+ row_even_luma[(col >> 1) + 1] = row_even_422[col + 3];
+
+ row_odd_luma[col >> 1] = row_odd_422[col + 1];
+ row_odd_luma[(col >> 1) + 1] = row_odd_422[col + 3];
+ }
+
+ row_even_422 += (u4_422i_stride << 2);
+ row_odd_422 += (u4_422i_stride << 2);
+
+ row_even_luma += (u4_y_stride << 1);
+ row_odd_luma += (u4_y_stride << 1);
+
+ row_cb += u4_u_stride;
+ row_cr += u4_v_stride;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used from format conversion or frame copy
+*
+* @par Description
+* Function used from copying or converting a reference frame to display buffer
+* in non shared mode
+*
+* @param[in] pu1_y_dst
+* Output Y pointer
+*
+* @param[in] pu1_u_dst
+* Output U/UV pointer ( UV is interleaved in the same format as that of input)
+*
+* @param[in] pu1_v_dst
+* Output V pointer ( used in 420P output case)
+*
+* @param[in] u4_dst_y_strd
+* Stride of destination Y buffer
+*
+* @param[in] u4_dst_u_strd
+* Stride of destination U/V buffer
+*
+* @param[in] blocking
+* To indicate whether format conversion should wait till frame is reconstructed
+* and then return after complete copy is done. To be set to 1 when called at the
+* end of frame processing and set to 0 when called between frame processing modules
+* in order to utilize available MCPS
+*
+* @returns error status
+*
+* @remarks
+* Assumes that the stride of U and V buffers are same.
+* This is correct in most cases
+* If a case comes where this is not true we need to modify the fmt conversion
+* functions called inside also
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec,
+ pic_buf_t *ps_pic,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ UWORD32 u4_dst_y_strd,
+ UWORD32 u4_dst_uv_strd,
+ WORD32 cur_row,
+ WORD32 num_rows)
+{
+ IH264E_ERROR_T ret = IH264E_SUCCESS;
+ UWORD8 *pu1_y_src, *pu1_uv_src;
+ UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
+ UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
+ UWORD16 *pu2_rgb_dst_tmp;
+ UWORD32 *pu4_rgb_dst_tmp;
+ WORD32 is_u_first;
+ UWORD8 *pu1_luma;
+ UWORD8 *pu1_chroma;
+ WORD32 dst_stride, wd;
+
+
+ if (0 == num_rows)
+ return ret;
+
+ pu1_luma = ps_pic->pu1_luma;
+ pu1_chroma = ps_pic->pu1_chroma;
+
+
+ dst_stride = ps_codec->s_cfg.u4_wd;
+ wd = ps_codec->s_cfg.u4_disp_wd;
+ is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0;
+
+ /* In case of 420P output luma copy is disabled for shared mode */
+ {
+ pu1_y_src = pu1_luma + cur_row * ps_codec->i4_rec_strd;
+ pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_rec_strd;
+
+ pu2_rgb_dst_tmp = (UWORD16 *) pu1_y_dst;
+ pu2_rgb_dst_tmp += cur_row * dst_stride;
+ pu4_rgb_dst_tmp = (UWORD32 *) pu1_y_dst;
+ pu4_rgb_dst_tmp += cur_row * dst_stride;
+
+ pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd;
+ pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
+ pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
+ pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd;
+
+ /* If the call is non-blocking and there are no rows to be copied then return */
+ /* In non-shared mode, reference buffers are in 420SP UV format,
+ * if output also is in 420SP_UV, then just copy
+ * if output is in 420SP_VU then swap UV values
+ */
+ if ((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) ||
+ (IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt))
+ {
+ ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
+ pu1_uv_dst_tmp, wd, num_rows,
+ ps_codec->i4_rec_strd,
+ ps_codec->i4_rec_strd, u4_dst_y_strd,
+ u4_dst_uv_strd);
+ }
+ else if (IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt)
+ {
+ ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
+ pu1_u_dst_tmp, pu1_v_dst_tmp, wd,
+ num_rows, ps_codec->i4_rec_strd,
+ ps_codec->i4_rec_strd, u4_dst_y_strd,
+ u4_dst_uv_strd, is_u_first, 0);
+ }
+ }
+ return(ret);
+}
+
diff --git a/encoder/ih264e_fmt_conv.h b/encoder/ih264e_fmt_conv.h
new file mode 100755
index 0000000..6b33bf0
--- /dev/null
+++ b/encoder/ih264e_fmt_conv.h
@@ -0,0 +1,142 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_fmt_conv.h
+*
+* @brief
+* The file contains extern declarations of color space conversion routines
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_FMT_CONV_H_
+#define IH264E_FMT_CONV_H_
+
+#define COEFF1 13073
+#define COEFF2 -3207
+#define COEFF3 -6664
+#define COEFF4 16530
+
+IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec,
+ pic_buf_t *ps_pic,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ UWORD32 u4_dst_y_strd,
+ UWORD32 u4_dst_uv_strd,
+ WORD32 cur_row,
+ WORD32 num_rows);
+
+typedef void ih264e_fmt_conv_420sp_to_rgba8888_ft(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD32 *pu4_rgba_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first);
+
+typedef void ih264e_fmt_conv_420sp_to_rgb565_ft(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD16 *pu2_rgb_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first);
+
+typedef void ih264e_fmt_conv_420sp_to_420sp_ft(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd);
+
+typedef void ih264e_fmt_conv_420sp_to_420p_ft(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd,
+ WORD32 is_u_first,
+ WORD32 disable_luma_copy);
+
+typedef void ih264e_fmt_conv_420p_to_420sp_ft(UWORD8 *pu1_y_src, UWORD8 *pu1_u_src, UWORD8 *pu1_v_src,
+ UWORD8 *pu1_y_dst, UWORD8 *pu1_uv_dst,
+ UWORD16 u2_height, UWORD16 u2_width, UWORD16 src_y_strd,
+ UWORD16 src_u_strd, UWORD16 src_v_strd,
+ UWORD16 dst_y_strd, UWORD16 dst_uv_strd,
+ UWORD32 convert_uv_only);
+
+typedef void ih264e_fmt_conv_422i_to_420sp_ft(UWORD8 *pu1_y_buf,UWORD8 *pu1_u_buf,UWORD8 *pu1_v_buf,
+ UWORD8 *pu1_422i_buf,
+ WORD32 u4_y_width,WORD32 u4_y_height,
+ WORD32 u4_y_stride,WORD32 u4_u_stride,WORD32 u4_v_stride,
+ WORD32 u4_422i_stride);
+
+
+/* C function declarations */
+ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888;
+ih264e_fmt_conv_420sp_to_rgb565_ft ih264e_fmt_conv_420sp_to_rgb565;
+ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp;
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p;
+ih264e_fmt_conv_420p_to_420sp_ft ih264e_fmt_conv_420p_to_420sp;
+ih264e_fmt_conv_422i_to_420sp_ft ih264e_fmt_conv_422i_to_420sp;
+
+/* A9Q function declarations */
+ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888_a9q;
+ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp_a9q;
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_a9q;
+ih264e_fmt_conv_420p_to_420sp_ft ih264e_fmt_conv_420p_to_420sp_a9q;
+ih264e_fmt_conv_422i_to_420sp_ft ih264e_fmt_conv_422i_to_420sp_a9q;
+
+
+/* A9A function declarations */
+ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888_a9a;
+ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp_a9a;
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_a9a;
+
+/* SSSe31 function declarations */
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_ssse31;
+
+/* SSE4 function declarations */
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_sse42;
+
+#endif /* IH264E_FMT_CONV_H_ */
diff --git a/encoder/ih264e_function_selector_generic.c b/encoder/ih264e_function_selector_generic.c
new file mode 100755
index 0000000..65f943a
--- /dev/null
+++ b/encoder/ih264e_function_selector_generic.c
@@ -0,0 +1,259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_generic.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_generic
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec)
+{
+ WORD32 i = 0;
+
+ /* curr proc ctxt */
+ process_ctxt_t *ps_proc = NULL;
+ me_ctxt_t *ps_me_ctxt = NULL;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert;
+ ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz;
+ ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc;
+ ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert;
+ ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz;
+ ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc;
+ ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl;
+ ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr;
+ ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r;
+ ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d;
+ ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l;
+ ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert;
+ ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc;
+ ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl;
+ ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr;
+ ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r;
+ ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d;
+ ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l;
+ ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc;
+ ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz;
+ ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert;
+ ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane;
+
+ /* Init luma forward transform fn ptr */
+ ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8;
+ ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4;
+ ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4;
+ ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4;
+ ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8;
+ ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4;
+ ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc;
+
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4;
+ ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv;
+ ps_codec->pf_interleave_copy = ih264_interleave_copy;
+
+ /* Init fn ptr luma core coding */
+ ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+ ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+ ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+ /* Init fn ptr chroma core coding */
+ ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+ ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4;
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4;
+
+ /* write mb syntax layer */
+ ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
+ ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma;
+
+ /* Inter pred leaf level functions */
+ ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy;
+ ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz;
+ ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert;
+ ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma;
+
+ /* sad me level functions */
+ ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16;
+ ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
+ ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8;
+
+ /* memory handling operations */
+ ps_codec->pf_mem_cpy = ih264_memcpy;
+ ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8;
+ ps_codec->pf_mem_set = ih264_memset;
+ ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8;
+
+ /* sad me level functions */
+ for (i = 0; i < (MAX_PROCESS_CTXT); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+
+ ps_me_ctxt = &ps_proc->s_me_ctxt;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
+ ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8;
+ ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog;
+ ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog;
+ ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog;
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16;
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter;
+ }
+
+ /* intra mode eval -encoder level function */
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes;
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes;
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
+
+ /* csc */
+ ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
+ ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
+
+ /* Halp pel generation function - encoder level*/
+ ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz;
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert;
+
+ return;
+}
diff --git a/encoder/ih264e_globals.c b/encoder/ih264e_globals.c
new file mode 100755
index 0000000..e2b46a4
--- /dev/null
+++ b/encoder/ih264e_globals.c
@@ -0,0 +1,261 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_globals.c
+*
+* @brief
+* Contains definitions of global variables used across the encoder
+*
+* @author
+* ittiam
+*
+* @par List of functions
+*
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+
+/*****************************************************************************/
+/* Extern global definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input : qp
+* output : lambda
+* @remarks lambda = 0.85 * pow(2, (qp - 12)/3), when SSD is used as metric
+* for computing distortion (Bit rate estimation for cost function of H.264/
+* AVC by Mohd Golam Sarwer et. al.) If the use of distortion metric is SAD
+* rather than SSD in the stage of encoding, consider sqrt(lambda) simply to
+* adjust lambda for the lack of squaring operation in the error computation
+* (from rate distortion optimization for video compression by sullivan).
+******************************************************************************
+*/
+const UWORD16 gu2_qp_lambda[52]=
+{
+ 0, 0, 0, 0, 0, 0, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 3, 3, 3,
+ 4, 4, 5, 5, 6, 7, 7, 8,
+ 9, 10, 12, 13, 15, 17, 19, 21,
+ 23, 26, 30, 33, 37, 42, 47, 53,
+ 59, 66, 74, 83,
+};
+
+/**
+******************************************************************************
+* @brief Lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input : qp
+* output : lambda
+* @remarks lambda = pow(2, (qp - 12)/6)
+******************************************************************************
+*/
+const UWORD8 gu1_qp0[52]=
+{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 4,
+ 4, 4, 5, 6, 6, 7, 8, 9,
+ 10, 11, 13, 14, 16, 18, 20, 23,
+ 25, 29, 32, 36, 40, 45, 51, 57,
+ 64, 72, 81, 91,
+};
+
+/**
+******************************************************************************
+* @brief unsigned exp. goulumb codelengths to assign cost to a coefficient of
+* mb types.
+* input : Integer
+* output : codelength
+* @remarks Refer sec. 9-1 in h264 specification
+******************************************************************************
+*/
+const UWORD8 u1_uev_codelength[32] =
+{
+ 1, 3, 3, 5, 5, 5, 5, 7,
+ 7, 7, 7, 7, 7, 7, 7, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 11,
+};
+
+
+/**
+******************************************************************************
+* @brief Look up table to assign cost to a coefficient of a residual block
+* basing on its surrounding coefficients
+* input : Numbers of T1's
+* output : coeff_cost
+* @remarks Refer Section 2.3 Elimination of single coefficients in inter
+* macroblocks in document JVT-O079
+******************************************************************************
+*/
+const UWORD8 gu1_coeff_cost[6] =
+{
+ 3, 2, 2, 1, 1, 1
+};
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for luma 4x4 block
+* input : scan index
+* output : scan location
+* @remarks None
+******************************************************************************
+*/
+const UWORD8 gu1_luma_scan_order[16] =
+{
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for chroma AC block
+* input : scan index
+* output : scan location
+* @remarks None
+******************************************************************************
+*/
+const UWORD8 gu1_chroma_scan_order[15] =
+{
+ 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for luma 4x4 dc block
+* input : scan index
+* output : scan location
+* @remarks : None
+******************************************************************************
+*/
+const UWORD8 gu1_luma_scan_order_dc[16] =
+{
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for chroma 2x2 dc block
+* input : scan index
+* output : scan location
+* @remarks None
+******************************************************************************
+*/
+const UWORD8 gu1_chroma_scan_order_dc[4] =
+{
+ 0, 1, 2, 3
+};
+
+/**
+******************************************************************************
+* @brief choice of motion vectors to be used during mv prediction
+* input : formatted reference idx comparison metric
+* output : mv prediction has to be median or a simple straight forward selec
+* tion from neighbors.
+* @remarks If only one of the candidate blocks has a reference frame equal to
+ the current block then use the same block as the final predictor. A simple
+ look up table to assist this mv prediction condition
+******************************************************************************
+*/
+const WORD8 gi1_mv_pred_condition[8] =
+{
+ -1, 0, 1, -1, 2, -1, -1, -1
+};
+
+/**
+******************************************************************************
+* @brief maps the h264 quantizer to the mpeg2 quantizer scale
+* input : h264 qp
+* output : equivalent mpeg 2 qp
+* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1]
+******************************************************************************
+*/
+const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM] =
+{
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 4,
+ 4, 4, 5, 6, 6, 7, 8, 9,
+ 10, 11, 13, 14, 16, 18, 20, 23,
+ 25, 29, 32, 36, 40, 45, 51, 57,
+ 64, 72, 81, 91, 102, 114, 128, 144,
+ 161, 181, 203, 228,
+};
+
+/**
+******************************************************************************
+* @brief maps the mpeg2 quantizer to the h264 quantizer scale
+* input : mpeg2 qp
+* output : equivalent h264qp
+* @remarks MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32
+* k = 0 (for intra) k = sign(QFij)
+* H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6)
+*
+* Excluding the portion of R(QP%6,i,j) that is due to
+* the DCT scale factors, the 6 entries after dividing by 64 (2^6)
+* correspond to dequant values of
+* 2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375.
+* (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc)
+*
+* Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2
+* (the actual mapping seems to be to MPEG2 qscale of 2.5),
+* and the fact that the effective h264 quantizer changes by
+* a factor of 2 for every 6 steps, the following mapping is
+* obtained:
+* h264qp = 6*(log2(mpeg2qscale/2)) + 12.
+*
+* Note that the quant matrix entry assumed for the above
+* equality is 16. Hence when the mpeg2 quant matrix entries
+* are all 16, this lookup can be used as is (which is the
+* default inter quant matrix in mpeg-2).
+******************************************************************************
+*/
+const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM] =
+{
+ 0, 4, 10, 14, 16, 18, 20, 21, 22, 23, 24, 25, 26, 26, 27, 27,
+ 28, 29, 29, 29, 30, 30, 31, 31, 32, 32, 32, 33, 33, 33, 33, 34,
+ 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 37,
+ 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40,
+ 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42,
+ 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49,
+ 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+};
+
diff --git a/encoder/ih264e_globals.h b/encoder/ih264e_globals.h
new file mode 100755
index 0000000..4c3de23
--- /dev/null
+++ b/encoder/ih264e_globals.h
@@ -0,0 +1,192 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_globals.h
+*
+* @brief
+* Contains declarations of global variables for H264 encoder
+*
+* @author
+* Ittiam
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_GLOBALS_H_
+#define IH264E_GLOBALS_H_
+
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief Computes the lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input : qp
+* output : lambda
+* @remarks lambda = 0.85 * pow(2, (qp - 12)/3), when SSD is used as metric
+* for computing distortion (Bit rate estimation for cost function of H.264/
+* AVC by Mohd Golam Sarwer et. al.) If the use of distortion metric is SAD
+* rather than SSD in the stage of encoding, consider sqrt(lambda) simply to
+* adjust lambda for the lack of squaring operation in the error computation
+* (from rate distortion optimization for video compression by sullivan).
+******************************************************************************
+*/
+extern const UWORD16 gu2_qp_lambda[52];
+
+/**
+******************************************************************************
+* @brief Computes the lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input : qp
+* output : lambda
+* @remarks lambda = pow(2, (qp - 12)/6). When Lagrangian multiplier is disabled
+* the same constant is used across mode decision and mv decisions.
+******************************************************************************
+*/
+extern const UWORD8 gu1_qp0[52];
+
+/**
+******************************************************************************
+* @brief unsigned exp. goulumb codelengths to assign cost to a coefficient of
+* mb types.
+* input : Integer
+* output : codelength
+* @remarks Refer sec. 9-1 in h264 specification
+******************************************************************************
+*/
+extern const UWORD8 u1_uev_codelength[32];
+
+/**
+******************************************************************************
+* @brief Look up table to assign cost to a coefficient of a residual block
+* basing on its surrounding coefficients
+* input : Numbers of T1's
+* output : coeff_cost
+* @remarks Refer Section 2.3 Elimination of single coefficients in inter
+* macroblocks in document JVT-O079
+******************************************************************************
+*/
+extern const UWORD8 gu1_coeff_cost[6];
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for luma 4x4 block
+* input : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 16
+******************************************************************************
+*/
+extern const UWORD8 gu1_luma_scan_order[16];
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for chroma AC block
+* input : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 32
+******************************************************************************
+*/
+extern const UWORD8 gu1_chroma_scan_order[15];
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for luma 4x4 dc block
+* input : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 16
+******************************************************************************
+*/
+extern const UWORD8 gu1_luma_scan_order_dc[16];
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for chroma 2x2 dc block
+* input : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 16
+******************************************************************************
+*/
+extern const UWORD8 gu1_chroma_scan_order_dc[4];
+
+
+/**
+******************************************************************************
+* @brief choice of motion vectors to be used during mv prediction
+* input : formatted reference idx comparison metric
+* output : mv prediction has to be median or a simple straight forward selec
+* tion from neighbors.
+* @remarks If only one of the candidate blocks has a reference frame equal to
+ the current block then use the same block as the final predictor. A simple
+ look up table to assist this mv prediction condition
+******************************************************************************
+*/
+extern const WORD8 gi1_mv_pred_condition[8];
+
+
+/**
+******************************************************************************
+* @brief maps the h264 quantizer to the mpeg2 quantizer scale
+* input : h264 qp
+* output : eqvivalent mpeg 2 qp
+* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1]
+******************************************************************************
+*/
+extern const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM];
+
+/**
+******************************************************************************
+* @brief maps the mpeg2 quantizer to the h264 quantizer scale
+* input : mpeg2 qp
+* output : eqvivalent h264q p
+* @remarks MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32
+* k = 0 (for intra) k = sign(QFij)
+* H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6)
+*
+* Excluding the portion of R(QP%6,i,j) that is due to
+* the DCT scale factors, the 6 entries after dividing by 64 (2^6)
+* correspond to dequant values of
+* 2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375.
+* (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc)
+*
+* Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2
+* (the actual mapping seems to be to MPEG2 qscale of 2.5),
+* and the fact that the effective h264 quantizer changes by
+* a factor of 2 for every 6 steps, the following mapping is
+* obtained:
+* h264qp = 6*(log2(mpeg2qscale/2)) + 12.
+*
+* Note that the quant matrix entry assumed for the above
+* equality is 16. Hence when the mpeg2 quant matrix entries
+* are all 16, this lookup can be used as is (which is the
+* default inter quant matrix in mpeg-2).
+******************************************************************************
+*/
+extern const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM];
+
+
+#endif /* IH264E_GLOBALS_H_ */
diff --git a/encoder/ih264e_half_pel.c b/encoder/ih264e_half_pel.c
new file mode 100755
index 0000000..cb475a1
--- /dev/null
+++ b/encoder/ih264e_half_pel.c
@@ -0,0 +1,226 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_half_pel.c
+*
+* @brief
+* This file contains functions that are used for computing subpixel planes
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_sixtapfilter_horz
+* - ih264e_sixtap_filter_2dvh_vert
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_defs.h"
+#include "ih264e_half_pel.h"
+#include "ih264_macros.h"
+#include "ih264e_half_pel.h"
+#include "ih264e_debug.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Interprediction luma filter for horizontal input (Filter run for width = 17
+* and height =16)
+*
+* @par Description:
+* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd)
+{
+ UWORD32 u4_i, u4_j;
+ UWORD32 u4_w, u4_h;
+
+ /* width and height of interpolation */
+ u4_w = HP_PL_WD;
+ u4_h = MB_SIZE;
+
+ pu1_src -= 2;
+
+ for (u4_i = 0; u4_i < u4_h; u4_i++)
+ {
+ for (u4_j = 0; u4_j < u4_w; u4_j++, pu1_dst++, pu1_src++)
+ {
+ WORD16 i16_temp;
+
+ i16_temp = ih264_g_six_tap[0] * (*pu1_src + pu1_src[5])
+ + ih264_g_six_tap[1] * (pu1_src[1] + pu1_src[4])
+ + ih264_g_six_tap[2] * (pu1_src[2] + pu1_src[3]);
+
+ i16_temp = (i16_temp + 16) >> 5;
+
+ *pu1_dst = CLIP_U8(i16_temp);
+ }
+ pu1_src += src_strd - u4_w;
+ pu1_dst += dst_strd - u4_w;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function implements a two stage cascaded six tap filter. It applies
+* the six tap filter in the vertical direction on the predictor values,
+* followed by applying the same filter in the horizontal direction on the
+* output of the first stage. The six tap filtering operation is described in
+* sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for
+* width = 17 and height = 17)
+*
+* @par Description:
+* The function interpolates the predictors first in the vertical direction and
+* then in the horizontal direction to output the (1/2,1/2). The output of the
+* first stage of the filter is stored in the buffer pointed to by
+* pi16_pred1(only in C) in 16 bit precision.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst1
+* UWORD8 pointer to the destination (Horizontal filtered output)
+*
+* @param[out] pu1_dst2
+* UWORD8 pointer to the destination (output after applying vertical filter to
+* the intermediate horizontal output)
+*
+* @param[in] src_strd
+* integer source stride
+
+* @param[in] dst_strd
+* integer destination stride of pu1_dst
+*
+* @param[in] pi4_pred
+* Pointer to 16bit intermediate buffer (used only in c)
+*
+* @param[in] i4_pred_strd
+* integer destination stride of pi16_pred1
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst1,
+ UWORD8 *pu1_dst2,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 *pi4_pred,
+ WORD32 i4_pred_strd)
+{
+ WORD32 row, col;
+ WORD32 tmp;
+ WORD32 *pi4_pred_temp = pi4_pred;
+ WORD32 ht = HP_PL_HT, wd = HP_PL_WD;
+
+ for (row = 0; row < ht; row++)
+ {
+ for (col = -2; col < wd + 3; col++)
+ {
+ tmp = ih264_g_six_tap[0] * (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) +
+ ih264_g_six_tap[1] * (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) +
+ ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1 * src_strd]);
+
+ pi4_pred_temp[col] = tmp;
+ }
+
+ pu1_src += src_strd;
+ pi4_pred_temp += i4_pred_strd;
+ }
+
+ for (row = 0; row < ht; row++)
+ {
+ for (col = 0; col < wd; col++)
+ {
+ tmp = (pi4_pred[col - 2] + pi4_pred[col + 3]) +
+ ih264_g_six_tap[1] * (pi4_pred[col - 1] + pi4_pred[col + 2]) +
+ ih264_g_six_tap[2] * (pi4_pred[col] + pi4_pred[col + 1]);
+
+ tmp = (tmp + 512) >> 10;
+
+ pu1_dst2[col] = CLIP_U8(tmp);
+ pu1_dst1[col] = CLIP_U8((pi4_pred[col] + 16) >> 5);
+ }
+ pi4_pred += i4_pred_strd;
+ pu1_dst2 += dst_strd;
+ pu1_dst1 += dst_strd;
+ }
+}
+
diff --git a/encoder/ih264e_half_pel.h b/encoder/ih264e_half_pel.h
new file mode 100755
index 0000000..92bd37f
--- /dev/null
+++ b/encoder/ih264e_half_pel.h
@@ -0,0 +1,162 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_half_pel.h
+ *
+ * @brief
+ * Contains extern declarations of subpel functions used by the encoder
+ *
+ * @author
+ * ittiam
+ *
+ * @remarks
+ * none
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264E_HALF_PEL_H_
+#define IH264E_HALF_PEL_H_
+
+/*****************************************************************************/
+/* Global constants */
+/*****************************************************************************/
+/*
+ * Dimensions of subpel plane buffers
+ */
+#define HP_PL_WD MB_SIZE + 1
+#define HP_PL_HT MB_SIZE + 1
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Interprediction luma filter for horizontal input (Filter run for width = 17
+* and height =16)
+*
+* @par Description:
+* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+typedef void ih264e_sixtapfilter_horz_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd);
+
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz;
+
+/* arm assembly */
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_a9q;
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_av8;
+
+/* x86 intrinsics*/
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_ssse3;
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function implements a two stage cascaded six tap filter. It applies
+* the six tap filter in the vertical direction on the predictor values,
+* followed by applying the same filter in the horizontal direction on the
+* output of the first stage. The six tap filtering operation is described in
+* sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for
+* width = 17 and height = 17)
+*
+* @par Description:
+* The function interpolates the predictors first in the vertical direction and
+* then in the horizontal direction to output the (1/2,1/2). The output of the
+* first stage of the filter is stored in the buffer pointed to by
+* pi16_pred1(only in C) in 16 bit precision.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst1
+* UWORD8 pointer to the destination (Horizontal filtered output)
+*
+* @param[out] pu1_dst2
+* UWORD8 pointer to the destination (output after applying vertical filter to
+* the intermediate horizontal output)
+*
+* @param[in] src_strd
+* integer source stride
+
+* @param[in] dst_strd
+* integer destination stride of pu1_dst
+*
+* @param[in] pi4_pred
+* Pointer to 16bit intermediate buffer (used only in c)
+*
+* @param[in] i4_pred_strd
+* integer destination stride of pi16_pred1
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+typedef void ih264e_sixtap_filter_2dvh_vert_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst1,
+ UWORD8 *pu1_dst2,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 *pi4_pred,
+ WORD32 i4_pred_strd);
+
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert;
+
+/* assembly */
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_a9q;
+
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_av8;
+
+/* x86 intrinsics */
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_ssse3;
+
+#endif /* IH264E_HALF_PEL_H_ */
diff --git a/encoder/ih264e_intra_modes_eval.c b/encoder/ih264e_intra_modes_eval.c
new file mode 100755
index 0000000..b41d717
--- /dev/null
+++ b/encoder/ih264e_intra_modes_eval.c
@@ -0,0 +1,2296 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_intra_modes_eval.c
+*
+* @brief
+* This file contains definitions of routines that perform rate distortion
+* analysis on a macroblock if they are to be coded as intra.
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_derive_neighbor_availability_of_mbs()
+* - ih264e_derive_ngbr_avbl_of_mb_partitions()
+* - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
+* - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
+* - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
+* - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton()
+* - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
+* - ih264e_evaluate_intra16x16_modes()
+* - ih264e_evaluate_intra4x4_modes()
+* - ih264e_evaluate_intra_chroma_modes()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264e_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ime_distortion_metrics.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_globals.h"
+#include "ime_platform_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* derivation process for macroblock availability
+*
+* @par Description
+* Calculates the availability of the left, top, topright and topleft macroblocks.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc context (handle)
+*
+* @remarks Based on section 6.4.5 in H264 spec
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc)
+{
+ UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx;
+ UWORD8 *pu1_slice_idx_b;
+ UWORD8 *pu1_slice_idx_a;
+ UWORD8 *pu1_slice_idx_c;
+ UWORD8 *pu1_slice_idx_d;
+ block_neighbors_t *ps_ngbr_avbl;
+ WORD32 i4_mb_x, i4_mb_y;
+ WORD32 i4_wd_mbs;
+
+ i4_mb_x = ps_proc->i4_mb_x;
+ i4_mb_y = ps_proc->i4_mb_y;
+
+ i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x;
+ pu1_slice_idx_a = pu1_slice_idx_curr - 1;
+ pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs;
+ pu1_slice_idx_c = pu1_slice_idx_b + 1;
+ pu1_slice_idx_d = pu1_slice_idx_b - 1;
+ ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ /**********************************************************************/
+ /* The macroblock is marked as available, unless one of the following */
+ /* conditions is true in which case the macroblock shall be marked as */
+ /* not available. */
+ /* 1. mbAddr < 0 */
+ /* 2 mbAddr > CurrMbAddr */
+ /* 3. the macroblock with address mbAddr belongs to a different slice */
+ /* than the macroblock with address CurrMbAddr */
+ /**********************************************************************/
+
+ /* left macroblock availability */
+ if (i4_mb_x == 0)
+ { /* macroblocks along first column */
+ ps_ngbr_avbl->u1_mb_a = 0;
+ }
+ else
+ { /* macroblocks belong to same slice? */
+ if (*pu1_slice_idx_a != *pu1_slice_idx_curr)
+ ps_ngbr_avbl->u1_mb_a = 0;
+ else
+ ps_ngbr_avbl->u1_mb_a = 1;
+ }
+
+ /* top macroblock availability */
+ if (i4_mb_y == 0)
+ { /* macroblocks along first row */
+ ps_ngbr_avbl->u1_mb_b = 0;
+ }
+ else
+ { /* macroblocks belong to same slice? */
+ if (*pu1_slice_idx_b != *pu1_slice_idx_curr)
+ ps_ngbr_avbl->u1_mb_b = 0;
+ else
+ ps_ngbr_avbl->u1_mb_b = 1;
+ }
+
+ /* top right macroblock availability */
+ if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0)
+ { /* macroblocks along last column */
+ ps_ngbr_avbl->u1_mb_c = 0;
+ }
+ else
+ { /* macroblocks belong to same slice? */
+ if (*pu1_slice_idx_c != *pu1_slice_idx_curr)
+ ps_ngbr_avbl->u1_mb_c = 0;
+ else
+ ps_ngbr_avbl->u1_mb_c = 1;
+ }
+
+ /* top left macroblock availability */
+ if (i4_mb_x == 0 || i4_mb_y == 0)
+ { /* macroblocks along first column */
+ ps_ngbr_avbl->u1_mb_d = 0;
+ }
+ else
+ { /* macroblocks belong to same slice? */
+ if (*pu1_slice_idx_d != *pu1_slice_idx_curr)
+ ps_ngbr_avbl->u1_mb_d = 0;
+ else
+ ps_ngbr_avbl->u1_mb_d = 1;
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* derivation process for subblock/partition availability
+*
+* @par Description
+* Calculates the availability of the left, top, topright and topleft subblock
+* or partitions.
+*
+* @param[in] ps_proc_ctxt
+* pointer to macroblock context (handle)
+*
+* @param[in] i1_pel_pos_x
+* column position of the pel wrt the current block
+*
+* @param[in] i1_pel_pos_y
+* row position of the pel in wrt current block
+*
+* @remarks Assumptions: before calling this function it is assumed that
+* the neighbor availability of the current macroblock is already derived.
+* Based on table 6-3 of H264 specification
+*
+* @return availability status (yes or no)
+*
+******************************************************************************
+*/
+UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl,
+ WORD8 i1_pel_pos_x,
+ WORD8 i1_pel_pos_y)
+{
+ UWORD8 u1_neighbor_avail=0;
+
+ /**********************************************************************/
+ /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */
+ /* various columns of a macroblock */
+ /* */
+ /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */
+ /* various rows of a macroblock */
+ /* */
+ /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */
+ /* outside the bound of an mb ie., represents its neighbors. */
+ /**********************************************************************/
+ if (i1_pel_pos_x < 0)
+ { /* column(-1) */
+ if (i1_pel_pos_y < 0)
+ { /* row(-1) */
+ u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
+ }
+ else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
+ { /* all rows of a macroblock */
+ u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
+ }
+ else /* if (i1_pel_pos_y >= 16) */
+ { /* rows(+16) */
+ u1_neighbor_avail = 0; /* current mb bottom left availability */
+ }
+ }
+ else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
+ { /* all columns of a macroblock */
+ if (i1_pel_pos_y < 0)
+ { /* row(-1) */
+ u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
+ }
+ else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
+ { /* all rows of a macroblock */
+ u1_neighbor_avail = 1; /* current mb availability */
+ /* availability of the partition is dependent on the position of the partition inside the mb */
+ /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */
+ }
+ else /* if (i1_pel_pos_y >= 16) */
+ { /* rows(+16) */
+ u1_neighbor_avail = 0; /* current mb bottom availability */
+ }
+ }
+ else if (i1_pel_pos_x >= 16)
+ { /* column(+16) */
+ if (i1_pel_pos_y < 0)
+ { /* row(-1) */
+ u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
+ }
+ else /* if (i1_pel_pos_y >= 0) */
+ { /* all other rows */
+ u1_neighbor_avail = 0; /* current mb right & bottom right availability */
+ }
+ }
+
+ return u1_neighbor_avail;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 16x16 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible intra 16x16 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to process context (handle)
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
+* the SAD and cost are one and the same.
+*
+* @return none
+*
+******************************************************************************
+*/
+
+void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 8x8 block */
+ WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
+
+ /* lambda */
+ UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+ /* cost = distortion + lambda*rate */
+ WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX;
+
+ /* intra mode */
+ UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
+ UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* pointer to neighbors left, top, topleft */
+ UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
+ UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
+ UWORD8 *pu1_mb_d = pu1_mb_b - 1;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+
+ /* lut for valid intra modes */
+ const UWORD8 u1_valid_intra_modes[8] = {4, 6, 12, 14, 5, 7, 13, 15};
+
+ /* temp var */
+ UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
+
+ /* init temp var */
+ if (ps_proc->i4_slice_type == PSLICE)
+ {
+ offset = 5;
+ u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
+ }
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines
+ * basing on neighbors available and hence evade the computation of neighbor availability totally. */
+ /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
+ i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1);
+ ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
+
+ /* gather prediction pels from the neighbors, if particular set is not available
+ * it is set to zero*/
+ /* left pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_a)
+ {
+ for(i = 0; i < 16; i++)
+ pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd];
+ }
+ else
+ {
+ ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE);
+ }
+ /* top pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_b)
+ {
+ ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16);
+ /*for(i = 0; i < 16; i++)
+ pu1_ngbr_pels_i16[16+1+i] = pu1_mb_b[i];*/
+ }
+ else
+ {
+ ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE);
+ }
+ /* topleft pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_d)
+ pu1_ngbr_pels_i16[16] = *pu1_mb_d;
+ else
+ pu1_ngbr_pels_i16[16] = 0;
+
+ /* set valid intra modes for evaluation */
+// u4_valid_intra_modes = 15;
+//// ih264e_filter_intra16x16modes(pu1_mb_curr, i4_src_strd, &u4_valid_intra_modes);
+// if (!ps_proc->ps_ngbr_avbl->u1_mb_a)
+// u4_valid_intra_modes &= ~(1 << HORZ_I16x16);
+// if (!ps_proc->ps_ngbr_avbl->u1_mb_b)
+// u4_valid_intra_modes &= ~(1 << VERT_I16x16);
+//// if (!ps_proc->ps_ngbr_avbl->u1_mb_a || !ps_proc->ps_ngbr_avbl->u1_mb_b || !ps_proc->ps_ngbr_avbl->u1_mb_d)
+// if (i4_ngbr_avbl != 7)
+// u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
+
+ u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
+
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
+ u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
+
+ /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16,
+ i4_src_strd, i4_pred_strd,
+ i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least,
+ u4_valid_intra_modes);
+
+ /* cost = distortion + lambda*rate */
+ i4_mb_cost_least = i4_mb_distortion_least;
+
+ if (( (u4_valid_intra_modes >> 3) & 1) != 0 && (ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST ||
+ ps_proc->i4_slice_type == ISLICE))
+ {
+ /* intra prediction for PLANE mode*/
+ (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
+
+ /* evaluate distortion between the actual blk and the estimated blk for the given mode */
+ ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion);
+
+ /* cost = distortion + lambda*rate */
+ i4_mb_cost = i4_mb_distortion;
+
+ /* update the least cost information if necessary */
+ if(i4_mb_cost < i4_mb_distortion_least)
+ {
+ u4_intra_mode = PLANE_I16x16;
+
+ i4_mb_cost_least = i4_mb_cost;
+ i4_mb_distortion_least = i4_mb_distortion;
+ }
+ }
+
+ u4_best_intra_16x16_mode = u4_intra_mode;
+
+ DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
+
+ ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
+
+ /* cost = distortion + lambda*rate */
+ i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode];
+
+
+ /* update the type of the mb if necessary */
+ if (i4_mb_cost_least < ps_proc->i4_mb_cost)
+ {
+ ps_proc->i4_mb_cost = i4_mb_cost_least;
+ ps_proc->i4_mb_distortion = i4_mb_distortion_least;
+ ps_proc->u4_mb_type = I16x16;
+ }
+
+ return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 8x8 mode (rate distortion opt on)
+*
+* @par Description
+* This function evaluates all the possible intra 8x8 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: TODO: This function needs to be tested
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 4x4 block */
+ WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
+
+ /* lambda */
+ UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+ /* cost = distortion + lambda*rate */
+ WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
+
+ /* cost due to mbtype */
+ UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
+
+ /* intra mode */
+ UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
+
+ /* pointer to curr partition */
+ UWORD8 *pu1_mb_curr;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* neighbors left, top, top right, top left */
+ UWORD8 *pu1_mb_a;
+ UWORD8 *pu1_mb_b;
+ UWORD8 *pu1_mb_d;
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+ block_neighbors_t s_ngbr_avbl;
+
+ /* temp vars */
+ UWORD32 b8, u4_pix_x, u4_pix_y;
+
+ /* ngbr mb syntax information */
+ UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
+ mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+
+ for(b8 = 0; b8 < 4; b8++)
+ {
+ u4_pix_x = (b8 & 0x01) << 3;
+ u4_pix_y = (b8 >> 1) << 3;
+
+ pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
+ /* when rdopt is off, we use the input as reference for constructing prediction buffer */
+ /* as opposed to using the recon pels. (open loop intra prediction) */
+ pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
+ pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
+ pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
+ /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
+ s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
+ s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
+ s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
+ s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
+
+ /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
+ i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
+ (s_ngbr_avbl.u1_mb_a << 4);
+ /* if top partition is available and top right is not available for intra prediction, then */
+ /* padd top right samples using top sample and make top right also available */
+ /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
+ ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
+
+
+ ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
+ i4_src_strd, i4_ngbr_avbl);
+
+ i4_partition_cost_least = INT_MAX;
+ /* set valid intra modes for evaluation */
+ u4_valid_intra_modes = 0x1ff;
+
+ if (!s_ngbr_avbl.u1_mb_b)
+ {
+ u4_valid_intra_modes &= ~(1 << VERT_I4x4);
+ u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
+ u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
+ }
+ if (!s_ngbr_avbl.u1_mb_a)
+ {
+ u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
+ u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
+ }
+ if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
+ {
+ u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
+ u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
+ u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
+ }
+
+ /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */
+ if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
+ {
+ u4_estimated_intra_8x8_mode = DC_I8x8;
+ }
+ else
+ {
+ UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
+ UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
+
+ if (u4_pix_x == 0)
+ {
+ if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
+ {
+ u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1];
+ }
+ else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
+ {
+ u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2];
+ }
+ }
+ else
+ {
+ u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1];
+ }
+
+ if (u4_pix_y == 0)
+ {
+ if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
+ {
+ u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2];
+ }
+ else if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
+ {
+ u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2];
+ }
+ }
+ else
+ {
+ u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2];
+ }
+
+ u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
+ }
+
+ /* perform intra mode 8x8 evaluation */
+ for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1)
+ {
+ if ( (u4_valid_intra_modes & 1) == 0)
+ continue;
+
+ /* intra prediction */
+ (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl);
+
+ /* evaluate distortion between the actual blk and the estimated blk for the given mode */
+ ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion);
+
+ i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits);
+
+ /* update the least cost information if necessary */
+ if (i4_partition_cost < i4_partition_cost_least)
+ {
+ i4_partition_cost_least = i4_partition_cost;
+ i4_partition_distortion_least = i4_partition_distortion;
+ u4_best_intra_8x8_mode = u4_intra_mode;
+ }
+ }
+ /* macroblock distortion */
+ i4_total_cost += i4_partition_cost_least;
+ i4_total_distortion += i4_partition_distortion_least;
+ /* mb partition mode */
+ ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
+
+ }
+
+ /* update the type of the mb if necessary */
+ if (i4_total_cost < ps_proc->i4_mb_cost)
+ {
+ ps_proc->i4_mb_cost = i4_total_cost;
+ ps_proc->i4_mb_distortion = i4_total_distortion;
+ ps_proc->u4_mb_type = I8x8;
+ }
+
+ return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 4x4 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible intra 4x4 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+* 24*lambda is added to the SAD before comparison with the best SAD for
+* inter prediction. This is an empirical value to prevent using too many intra
+* blocks.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 4x4 block */
+ WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
+
+ /* lambda */
+ UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+ /* cost = distortion + lambda*rate */
+ WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
+
+ /* cost due to mbtype */
+ UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
+
+ /* intra mode */
+ UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
+
+ /* pointer to curr partition */
+ UWORD8 *pu1_mb_curr;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* neighbors left, top, top right, top left */
+ UWORD8 *pu1_mb_a;
+ UWORD8 *pu1_mb_b;
+ UWORD8 *pu1_mb_c;
+ UWORD8 *pu1_mb_d;
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+ block_neighbors_t s_ngbr_avbl;
+
+ /* temp vars */
+ UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
+
+ /* scan order inside 4x4 block */
+ const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+ /* ngbr sub mb modes */
+ UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
+ mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+ UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
+
+ i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3);
+ memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
+
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ u4_blk_x = (b8 & 0x01) << 3;
+ u4_blk_y = (b8 >> 1) << 3;
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
+ u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
+
+ pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
+ /* when rdopt is off, we use the input as reference for constructing prediction buffer */
+ /* as opposed to using the recon pels. (open loop intra prediction) */
+ pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
+ pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
+ pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
+ pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
+ /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
+
+ i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
+ s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
+ s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
+ s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
+ s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
+ /* set valid intra modes for evaluation */
+ u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
+
+ /* if top partition is available and top right is not available for intra prediction, then */
+ /* padd top right samples using top sample and make top right also available */
+ /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
+
+ /* gather prediction pels from the neighbors */
+ if (s_ngbr_avbl.u1_mb_a)
+ {
+ for(i = 0; i < 4; i++)
+ pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd];
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4, 0, 4);
+ }
+
+ if (s_ngbr_avbl.u1_mb_b)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4 + 5, 0, 4);
+ }
+
+ if (s_ngbr_avbl.u1_mb_d)
+ pu1_ngbr_pels_i4[4] = *pu1_mb_d;
+ else
+ pu1_ngbr_pels_i4[4] = 0;
+
+ if (s_ngbr_avbl.u1_mb_c)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
+ }
+ else if (s_ngbr_avbl.u1_mb_b)
+ {
+ memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
+ s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
+ }
+
+ i4_partition_cost_least = INT_MAX;
+
+ /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
+ if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
+ {
+ u4_estimated_intra_4x4_mode = DC_I4x4;
+ }
+ else
+ {
+ UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
+ UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
+
+ if (u4_pix_x == 0)
+ {
+ if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
+ }
+ else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
+ }
+ }
+ else
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
+ }
+
+ if (u4_pix_y == 0)
+ {
+ if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
+ {
+ u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
+ }
+ else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
+ {
+ u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
+ }
+ }
+ else
+ {
+ u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
+ }
+
+ u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
+ }
+
+ ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
+
+ /* mode evaluation and prediction */
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
+ pu1_ngbr_pels_i4,
+ pu1_pred_mb, i4_src_strd,
+ i4_pred_strd, i4_ngbr_avbl,
+ &u4_best_intra_4x4_mode,
+ &i4_partition_cost_least,
+ u4_valid_intra_modes,
+ u4_lambda,
+ u4_estimated_intra_4x4_mode);
+
+
+ i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits);
+
+ DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
+ /* macroblock distortion */
+ i4_total_distortion += i4_partition_distortion_least;
+ i4_total_cost += i4_partition_cost_least;
+ /* mb partition mode */
+ ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
+ }
+ }
+
+ /* update the type of the mb if necessary */
+ if (i4_total_cost < ps_proc->i4_mb_cost)
+ {
+ ps_proc->i4_mb_cost = i4_total_cost;
+ ps_proc->i4_mb_distortion = i4_total_distortion;
+ ps_proc->u4_mb_type = I4x4;
+ }
+
+ return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief evaluate best intra 4x4 mode (rate distortion opt on)
+*
+* @par Description
+* This function evaluates all the possible intra 4x4 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+* 24*lambda is added to the SAD before comparison with the best SAD for
+* inter prediction. This is an empirical value to prevent using too many intra
+* blocks.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 4x4 block */
+ WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
+
+ /* lambda */
+ UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+ /* cost = distortion + lambda*rate */
+ WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
+
+ /* cost due to mbtype */
+ UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
+
+ /* intra mode */
+ UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
+
+ /* pointer to curr partition */
+ UWORD8 *pu1_mb_curr;
+ UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
+ UWORD8 *pu1_ref_mb_intra_4x4;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_ref_strd_left, i4_ref_strd_top;
+
+ /* neighbors left, top, top right, top left */
+ UWORD8 *pu1_mb_a;
+ UWORD8 *pu1_mb_b;
+ UWORD8 *pu1_mb_c;
+ UWORD8 *pu1_mb_d;
+
+ /* number of non zero coeffs*/
+ UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+ block_neighbors_t s_ngbr_avbl;
+
+ /* temp vars */
+ UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
+
+ /* scan order inside 4x4 block */
+ const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+ /* ngbr sub mb modes */
+ UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
+ mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+ UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
+
+ /* Dummy variable for 4x4 trans function */
+ WORD16 i2_dc_dummy;
+
+ /* compute ngbr availability for sub blks */
+ i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3);
+ memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
+
+ for(b8 = 0; b8 < 4; b8++)
+ {
+ u4_blk_x = (b8 & 0x01) << 3;
+ u4_blk_y = (b8 >> 1) << 3;
+ for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
+ {
+ u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
+ u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
+
+ pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
+ pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
+ if (u4_pix_x == 0)
+ {
+ i4_ref_strd_left = ps_proc->i4_rec_strd;
+ pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left);
+ }
+ else
+ {
+ i4_ref_strd_left = i4_pred_strd;
+ pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
+ }
+ if (u4_pix_y == 0)
+ {
+ i4_ref_strd_top = ps_proc->i4_rec_strd;
+ pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top);
+ }
+ else
+ {
+ i4_ref_strd_top = i4_pred_strd;
+ pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
+ }
+
+ pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
+ pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
+ pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
+ if (u4_pix_y == 0)
+ pu1_mb_d = pu1_mb_b - 1;
+ else
+ pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
+ /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
+
+ i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
+ s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
+ s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
+ s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
+ s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
+ /* set valid intra modes for evaluation */
+ u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
+
+ /* if top partition is available and top right is not available for intra prediction, then */
+ /* padd top right samples using top sample and make top right also available */
+ /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
+
+ /* gather prediction pels from the neighbors */
+ if (s_ngbr_avbl.u1_mb_a)
+ {
+ for(i = 0; i < 4; i++)
+ pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left];
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4,0,4);
+ }
+ if(s_ngbr_avbl.u1_mb_b)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
+ }
+ if (s_ngbr_avbl.u1_mb_d)
+ pu1_ngbr_pels_i4[4] = *pu1_mb_d;
+ else
+ pu1_ngbr_pels_i4[4] = 0;
+ if (s_ngbr_avbl.u1_mb_c)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
+ }
+ else if (s_ngbr_avbl.u1_mb_b)
+ {
+ memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
+ s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
+ }
+
+ i4_partition_cost_least = INT_MAX;
+
+ /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
+ if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
+ {
+ u4_estimated_intra_4x4_mode = DC_I4x4;
+ }
+ else
+ {
+ UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
+ UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
+
+ if (u4_pix_x == 0)
+ {
+ if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
+ }
+ else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
+ }
+ }
+ else
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
+ }
+
+ if (u4_pix_y == 0)
+ {
+ if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
+ {
+ u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
+ }
+ else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
+ {
+ u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
+ }
+ }
+ else
+ {
+ u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
+ }
+
+ u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
+ }
+
+ ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
+
+ /*mode evaluation and prediction*/
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
+ pu1_ngbr_pels_i4,
+ pu1_pred_mb, i4_src_strd,
+ i4_pred_strd, i4_ngbr_avbl,
+ &u4_best_intra_4x4_mode,
+ &i4_partition_cost_least,
+ u4_valid_intra_modes,
+ u4_lambda,
+ u4_estimated_intra_4x4_mode);
+
+
+ i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits);
+
+ DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
+
+ /* macroblock distortion */
+ i4_total_distortion += i4_partition_distortion_least;
+ i4_total_cost += i4_partition_cost_least;
+
+ /* mb partition mode */
+ ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
+
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb,
+ pi2_res_mb, i4_src_strd,
+ i4_pred_strd,
+ /* No op stride, this implies a buff of lenght 1x16 */
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ pu1_nnz, &i2_dc_dummy);
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+ ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb,
+ pu1_ref_mb_intra_4x4,
+ i4_pred_strd, i4_pred_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat,
+ ps_qp_params->u1_qp_div,
+ ps_proc->pv_scratch_buff, 0,
+ NULL);
+ }
+ }
+
+ /* update the type of the mb if necessary */
+ if (i4_total_cost < ps_proc->i4_mb_cost)
+ {
+ ps_proc->i4_mb_cost = i4_total_cost;
+ ps_proc->i4_mb_distortion = i4_total_distortion;
+ ps_proc->u4_mb_type = I4x4;
+ }
+
+ return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best chroma intra 8x8 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible chroma intra 8x8 modes and finds
+* the mode that best represents the macroblock (least distortion) and occupies
+* fewer bits in the bitstream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to macroblock context (handle)
+*
+* @remarks
+* For chroma best intra pred mode is calculated based only on SAD
+*
+* @returns none
+*
+******************************************************************************
+*/
+
+void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 8x8 block */
+ WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
+
+ /* intra mode */
+ UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
+
+ /* pointer to curr macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
+ UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
+
+ /* strides */
+ WORD32 i4_src_strd_c = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd;
+
+ /* neighbors left, top, top left */
+ UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
+ UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
+ UWORD8 *pu1_mb_d = pu1_mb_b - 2;
+
+ /* neighbor availability */
+ const UWORD8 u1_valid_intra_modes[8] = {1, 3, 9, 11, 5, 7, 13, 15,};
+ WORD32 i4_ngbr_avbl;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+
+ /* temp var */
+ UWORD8 i;
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines
+ * basing on neighbors available and hence evade the computation of neighbor availability totally. */
+ /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
+ i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1);
+ ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
+
+ /* gather prediction pels from the neighbors */
+ /* left pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_a)
+ {
+ for (i = 0; i < 16; i += 2)
+ {
+ pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
+ pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
+ }
+ }
+ else
+ {
+ ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
+ }
+
+ /* top pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_b)
+ {
+ ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
+ }
+ else
+ {
+ ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
+ }
+
+ /* top left pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_d)
+ {
+ pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
+ pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
+ }
+
+ u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
+
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
+ u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
+
+ i4_chroma_mb_distortion = INT_MAX;
+
+ /* perform intra mode chroma 8x8 evaluation */
+ /* intra prediction */
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb,
+ pu1_ngbr_pels_c_i8x8,
+ pu1_pred_mb,
+ i4_src_strd_c,
+ i4_pred_strd,
+ i4_ngbr_avbl,
+ &u4_best_chroma_intra_8x8_mode,
+ &i4_chroma_mb_distortion,
+ u4_valid_intra_modes);
+
+ if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/
+ {
+ (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl);
+
+ /* evaluate distortion(sad) */
+ ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion);
+
+ /* update the least distortion information if necessary */
+ if(i4_mb_distortion < i4_chroma_mb_distortion)
+ {
+ i4_chroma_mb_distortion = i4_mb_distortion;
+ u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
+ }
+ }
+
+ DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode);
+
+ ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
+
+ return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+* prediction.
+*
+* @par Description
+* This function evaluates first three 16x16 modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels_i16,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes)
+{
+ UWORD8 *pu1_neighbour;
+ UWORD8 *pu1_src_temp = pu1_src;
+ UWORD8 left = 0, top = 0;
+ WORD32 u4_dcval = 0;
+ WORD32 i, j;
+ WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
+ i4_min_sad = INT_MAX;
+ UWORD8 val;
+
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ /* left available */
+ if (left)
+ {
+ i4_sad_horz = 0;
+
+ for (i = 0; i < 16; i++)
+ {
+ val = pu1_ngbr_pels_i16[15 - i];
+
+ u4_dcval += val;
+
+ for (j = 0; j < 16; j++)
+ {
+ i4_sad_horz += ABS(val - pu1_src_temp[j]);
+ }
+
+ pu1_src_temp += src_strd;
+ }
+ u4_dcval += 8;
+ }
+
+ pu1_src_temp = pu1_src;
+ /* top available */
+ if (top)
+ {
+ i4_sad_vert = 0;
+
+ for (i = 0; i < 16; i++)
+ {
+ u4_dcval += pu1_ngbr_pels_i16[17 + i];
+
+ for (j = 0; j < 16; j++)
+ {
+ i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
+ }
+ pu1_src_temp += src_strd;
+
+ }
+ u4_dcval += 8;
+ }
+
+ u4_dcval = (u4_dcval) >> (3 + left + top);
+
+ pu1_src_temp = pu1_src;
+
+ /* none available */
+ u4_dcval += (left == 0) * (top == 0) * 128;
+
+ i4_sad_dc = 0;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+ i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
+ }
+ pu1_src_temp += src_strd;
+ }
+
+ if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */
+ i4_sad_dc = INT_MAX;
+
+ if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */
+ i4_sad_vert = INT_MAX;
+
+ if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */
+ i4_sad_horz = INT_MAX;
+
+ i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
+
+ /* Finding Minimum sad and doing corresponding prediction */
+ if (i4_min_sad < *pu4_sadmin)
+ {
+ *pu4_sadmin = i4_min_sad;
+ if (i4_min_sad == i4_sad_vert)
+ {
+ *u4_intra_mode = VERT_I16x16;
+ pu1_neighbour = pu1_ngbr_pels_i16 + 17;
+ for (j = 0; j < 16; j++)
+ {
+ memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
+ pu1_dst += dst_strd;
+ }
+ }
+ else if (i4_min_sad == i4_sad_horz)
+ {
+ *u4_intra_mode = HORZ_I16x16;
+ for (j = 0; j < 16; j++)
+ {
+ val = pu1_ngbr_pels_i16[15 - j];
+ memset(pu1_dst, val, MB_SIZE);
+ pu1_dst += dst_strd;
+ }
+ }
+ else
+ {
+ *u4_intra_mode = DC_I16x16;
+ for (j = 0; j < 16; j++)
+ {
+ memset(pu1_dst, u4_dcval, MB_SIZE);
+ pu1_dst += dst_strd;
+ }
+ }
+ }
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra 4x4 mode and perform prediction.
+*
+* @par Description
+* This function evaluates 4x4 modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @param[in] u4_lambda
+* Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+* Predicted mode for cost computation
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes,
+ UWORD32 u4_lambda,
+ UWORD32 u4_predictd_mode)
+{
+ UWORD8 *pu1_src_temp = pu1_src;
+ UWORD8 *pu1_pred = pu1_ngbr_pels;
+ UWORD8 left = 0, top = 0;
+ UWORD8 u1_pred_val = 0;
+ UWORD8 u1_pred_vals[4] = {0};
+ UWORD8 *pu1_pred_val = NULL;
+ /* To store FILT121 operated values*/
+ UWORD8 u1_pred_vals_diag_121[15] = {0};
+ /* To store FILT11 operated values*/
+ UWORD8 u1_pred_vals_diag_11[15] = {0};
+ UWORD8 u1_pred_vals_vert_r[8] = {0};
+ UWORD8 u1_pred_vals_horz_d[10] = {0};
+ UWORD8 u1_pred_vals_horz_u[10] = {0};
+ WORD32 u4_dcval = 0;
+ WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+ INT_MAX, INT_MAX, INT_MAX, INT_MAX};
+
+ WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+ INT_MAX, INT_MAX, INT_MAX, INT_MAX};
+ WORD32 i, i4_min_cost = INT_MAX;
+
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ /* Computing SAD */
+
+ /* VERT mode valid */
+ if (u4_valid_intra_modes & 1)
+ {
+ pu1_pred = pu1_ngbr_pels + 5;
+ i4_sad[VERT_I4x4] = 0;
+ i4_cost[VERT_I4x4] = 0;
+
+ USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+
+ i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ /* HORZ mode valid */
+ if (u4_valid_intra_modes & 2)
+ {
+ i4_sad[HORZ_I4x4] = 0;
+ i4_cost[HORZ_I4x4] =0;
+ pu1_src_temp = pu1_src;
+
+ u1_pred_val = pu1_ngbr_pels[3];
+
+ i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+ + ABS(pu1_src_temp[1] - u1_pred_val)
+ + ABS(pu1_src_temp[2] - u1_pred_val)
+ + ABS(pu1_src_temp[3] - u1_pred_val);
+ pu1_src_temp += src_strd;
+
+ u1_pred_val = pu1_ngbr_pels[2];
+
+ i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+ + ABS(pu1_src_temp[1] - u1_pred_val)
+ + ABS(pu1_src_temp[2] - u1_pred_val)
+ + ABS(pu1_src_temp[3] - u1_pred_val);
+ pu1_src_temp += src_strd;
+
+ u1_pred_val = pu1_ngbr_pels[1];
+
+ i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+ + ABS(pu1_src_temp[1] - u1_pred_val)
+ + ABS(pu1_src_temp[2] - u1_pred_val)
+ + ABS(pu1_src_temp[3] - u1_pred_val);
+ pu1_src_temp += src_strd;
+
+ u1_pred_val = pu1_ngbr_pels[0];
+
+ i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+ + ABS(pu1_src_temp[1] - u1_pred_val)
+ + ABS(pu1_src_temp[2] - u1_pred_val)
+ + ABS(pu1_src_temp[3] - u1_pred_val);
+
+ i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ /* DC mode valid */
+ if (u4_valid_intra_modes & 4)
+ {
+ i4_sad[DC_I4x4] = 0;
+ i4_cost[DC_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+
+ if (left)
+ u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2]
+ + pu1_ngbr_pels[3] + 2;
+ if (top)
+ u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7]
+ + pu1_ngbr_pels[8] + 2;
+
+ u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
+
+ /* none available */
+ memset(u1_pred_vals, u4_dcval, 4);
+ USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+ pu1_src_temp += src_strd;
+
+ i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ /* if modes other than VERT, HORZ and DC are valid */
+ if (u4_valid_intra_modes > 7)
+ {
+ pu1_pred = pu1_ngbr_pels;
+ pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
+
+ /* Performing FILT121 and FILT11 operation for all neighbour values*/
+ for (i = 0; i < 13; i++)
+ {
+ u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
+ u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
+
+ pu1_pred++;
+ }
+
+ if (u4_valid_intra_modes & 8)/* DIAG_DL */
+ {
+ i4_sad[DIAG_DL_I4x4] = 0;
+ i4_cost[DIAG_DL_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+ pu1_pred_val = u1_pred_vals_diag_121 + 5;
+
+ USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
+ pu1_src_temp += src_strd;
+ i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ if (u4_valid_intra_modes & 16)/* DIAG_DR */
+ {
+ i4_sad[DIAG_DR_I4x4] = 0;
+ i4_cost[DIAG_DR_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+ pu1_pred_val = u1_pred_vals_diag_121 + 3;
+
+ USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
+ pu1_src_temp += src_strd;
+ i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+
+ }
+
+ if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
+ {
+ i4_sad[VERT_R_I4x4] = 0;
+
+ pu1_src_temp = pu1_src;
+ u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
+ memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
+ u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
+ memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
+
+ pu1_pred_val = u1_pred_vals_diag_11 + 4;
+ USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
+ pu1_pred_val = u1_pred_vals_diag_121 + 3;
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4),
+ i4_sad[VERT_R_I4x4]);
+
+ i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
+ {
+ i4_sad[HORZ_D_I4x4] = 0;
+
+ pu1_src_temp = pu1_src;
+ u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
+ memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
+ u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
+ u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
+ u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
+ u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
+ u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
+ u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
+
+ pu1_pred_val = u1_pred_vals_horz_d;
+ USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
+
+ i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
+ {
+ i4_sad[VERT_L_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+ pu1_pred_val = u1_pred_vals_diag_11 + 5;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+ pu1_src_temp += src_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 5;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+ pu1_src_temp += src_strd;
+ pu1_pred_val = u1_pred_vals_diag_11 + 6;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+ pu1_src_temp += src_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 6;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+
+ i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
+ {
+ i4_sad[HORZ_U_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+ u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
+ u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
+ u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
+ u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
+ u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
+ u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
+
+ memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
+
+ pu1_pred_val = u1_pred_vals_horz_u;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
+
+ i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]),
+ MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
+ MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
+
+ }
+ else
+ {
+ /* Only first three modes valid */
+ i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
+ }
+
+ *pu4_sadmin = i4_min_cost;
+
+ if (i4_min_cost == i4_cost[0])
+ {
+ *u4_intra_mode = VERT_I4x4;
+ pu1_pred_val = pu1_ngbr_pels + 5;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ }
+ else if (i4_min_cost == i4_cost[1])
+ {
+ *u4_intra_mode = HORZ_I4x4;
+ memset(pu1_dst, pu1_ngbr_pels[3], 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, pu1_ngbr_pels[2], 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, pu1_ngbr_pels[1], 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, pu1_ngbr_pels[0], 4);
+ }
+ else if (i4_min_cost == i4_cost[2])
+ {
+ *u4_intra_mode = DC_I4x4;
+ memset(pu1_dst, u4_dcval, 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, u4_dcval, 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, u4_dcval, 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, u4_dcval, 4);
+ }
+
+ else if (i4_min_cost == i4_cost[3])
+ {
+ *u4_intra_mode = DIAG_DL_I4x4;
+ pu1_pred_val = u1_pred_vals_diag_121 + 5;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 1), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 2), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 3), 4);
+ }
+ else if (i4_min_cost == i4_cost[4])
+ {
+ *u4_intra_mode = DIAG_DR_I4x4;
+ pu1_pred_val = u1_pred_vals_diag_121 + 3;
+
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val - 1), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val - 2), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val - 3), 4);
+ }
+
+ else if (i4_min_cost == i4_cost[5])
+ {
+ *u4_intra_mode = VERT_R_I4x4;
+ pu1_pred_val = u1_pred_vals_diag_11 + 4;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 3;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
+ }
+ else if (i4_min_cost == i4_cost[6])
+ {
+ *u4_intra_mode = HORZ_D_I4x4;
+ pu1_pred_val = u1_pred_vals_horz_d;
+ memcpy(pu1_dst, (pu1_pred_val + 6), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 4), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 2), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ }
+ else if (i4_min_cost == i4_cost[7])
+ {
+ *u4_intra_mode = VERT_L_I4x4;
+ pu1_pred_val = u1_pred_vals_diag_11 + 5;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 5;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ pu1_pred_val = u1_pred_vals_diag_11 + 6;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 6;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ }
+ else if (i4_min_cost == i4_cost[8])
+ {
+ *u4_intra_mode = HORZ_U_I4x4;
+ pu1_pred_val = u1_pred_vals_horz_u;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 2), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 4), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 6), 4);
+ pu1_dst += dst_strd;
+ }
+
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief:
+* Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction.
+*
+* @par Description
+* This function evaluates first three intra chroma modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes)
+{
+ UWORD8 *pu1_neighbour;
+ UWORD8 *pu1_src_temp = pu1_src;
+ UWORD8 left = 0, top = 0;
+ WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */
+ u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/
+
+ WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/
+ u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/
+
+ WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX,
+ i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
+ UWORD8 val_u, val_v;
+
+ WORD32 u4_dc_val[2][2][2];/* -----------
+ | | | Chroma can have four
+ | 00 | 01 | separate dc value...
+ ----------- u4_dc_val corresponds to this dc values
+ | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V]
+ | 10 | 11 |
+ ----------- */
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ /*Evaluating HORZ*/
+ if (left)/* Ifleft available*/
+ {
+ i4_sad_horz = 0;
+
+ for (i = 0; i < 8; i++)
+ {
+ val_v = pu1_ngbr_pels[15 - 2 * i];
+ val_u = pu1_ngbr_pels[15 - 2 * i - 1];
+ row = i / 4;
+ u4_dcval_u_l[row] += val_u;
+ u4_dcval_v_l[row] += val_v;
+ for (j = 0; j < 8; j++)
+ {
+ i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/
+ i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
+ }
+
+ pu1_src_temp += src_strd;
+ }
+ u4_dcval_u_l[0] += 2;
+ u4_dcval_u_l[1] += 2;
+ u4_dcval_v_l[0] += 2;
+ u4_dcval_v_l[1] += 2;
+ }
+
+ /*Evaluating VERT**/
+ pu1_src_temp = pu1_src;
+ if (top) /* top available*/
+ {
+ i4_sad_vert = 0;
+
+ for (i = 0; i < 8; i++)
+ {
+ col = i / 4;
+
+ val_u = pu1_ngbr_pels[18 + i * 2];
+ val_v = pu1_ngbr_pels[18 + i * 2 + 1];
+ u4_dcval_u_t[col] += val_u;
+ u4_dcval_v_t[col] += val_v;
+
+ for (j = 0; j < 16; j++)
+ {
+ i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/
+ }
+ pu1_src_temp += src_strd;
+
+ }
+ u4_dcval_u_t[0] += 2;
+ u4_dcval_u_t[1] += 2;
+ u4_dcval_v_t[0] += 2;
+ u4_dcval_v_t[1] += 2;
+ }
+
+ /* computing DC value*/
+ /* Equation 8-128 in spec*/
+ u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
+ u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
+ u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
+ u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
+
+ if (top)
+ {
+ /* Equation 8-132 in spec*/
+ u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
+ u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
+ }
+ else
+ {
+ u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
+ u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
+ }
+
+ if (left)
+ {
+ u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
+ u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
+ }
+ else
+ {
+ u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
+ u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
+ }
+
+ if (!(left || top))
+ {
+ /*none available*/
+ u4_dc_val[0][0][0] = u4_dc_val[0][0][1] =
+ u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
+ u4_dc_val[1][0][0] = u4_dc_val[1][0][1] =
+ u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
+ }
+
+ /* Evaluating DC */
+ pu1_src_temp = pu1_src;
+ i4_sad_dc = 0;
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ col = j / 4;
+ row = i / 4;
+ val_u = u4_dc_val[row][col][0];
+ val_v = u4_dc_val[row][col][1];
+
+ i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/
+ i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
+ }
+ pu1_src_temp += src_strd;
+ }
+
+ if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/
+ i4_sad_dc = INT_MAX;
+ if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/
+ i4_sad_horz = INT_MAX;
+ if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/
+ i4_sad_vert = INT_MAX;
+
+ i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
+
+ /* Finding Minimum sad and doing corresponding prediction*/
+ if (i4_min_sad < *pu4_sadmin)
+ {
+ *pu4_sadmin = i4_min_sad;
+
+ if (i4_min_sad == i4_sad_dc)
+ {
+ *u4_intra_mode = DC_CH_I8x8;
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ col = j / 4;
+ row = i / 4;
+
+ pu1_dst[2 * j] = u4_dc_val[row][col][0];
+ pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
+ }
+ pu1_dst += dst_strd;
+ }
+ }
+ else if (i4_min_sad == i4_sad_horz)
+ {
+ *u4_intra_mode = HORZ_CH_I8x8;
+ for (j = 0; j < 8; j++)
+ {
+ val_v = pu1_ngbr_pels[15 - 2 * j];
+ val_u = pu1_ngbr_pels[15 - 2 * j - 1];
+
+ for (i = 0; i < 8; i++)
+ {
+ pu1_dst[2 * i] = val_u;
+ pu1_dst[2 * i + 1] = val_v;
+
+ }
+ pu1_dst += dst_strd;
+ }
+ }
+ else
+ {
+ *u4_intra_mode = VERT_CH_I8x8;
+ pu1_neighbour = pu1_ngbr_pels + 18;
+ for (j = 0; j < 8; j++)
+ {
+ memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
+ pu1_dst += dst_strd;
+ }
+ }
+ }
+
+ return;
+}
diff --git a/encoder/ih264e_intra_modes_eval.h b/encoder/ih264e_intra_modes_eval.h
new file mode 100755
index 0000000..c8402e5
--- /dev/null
+++ b/encoder/ih264e_intra_modes_eval.h
@@ -0,0 +1,418 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_intra_modes_eval.h
+*
+* @brief
+* This file contains declarations of routines that perform rate distortion
+* analysis on a macroblock if coded as intra.
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_INTRA_MODES_EVAL_H_
+#define IH264E_INTRA_MODES_EVAL_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* derivation process for macroblock availability
+*
+* @par Description
+* Calculates the availability of the left, top, topright and topleft macroblocks.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc context (handle)
+*
+* @remarks Based on section 6.4.5 in H264 spec
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_derive_nghbr_avbl_of_mbs
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* derivation process for subblock/partition availability
+*
+* @par Description
+* Calculates the availability of the left, top, topright and topleft subblock
+* or partitions.
+*
+* @param[in] ps_proc_ctxt
+* pointer to macroblock context (handle)
+*
+* @param[in] i1_pel_pos_x
+* column position of the pel wrt the current block
+*
+* @param[in] i1_pel_pos_y
+* row position of the pel in wrt current block
+*
+* @remarks Assumptions: before calling this function it is assumed that
+* the neighbor availability of the current macroblock is already derived.
+* Based on table 6-3 of H264 specification
+*
+* @return availability status (yes or no)
+*
+******************************************************************************
+*/
+UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions
+ (
+ block_neighbors_t *s_ngbr_avbl,
+ WORD8 i1_pel_pos_x,
+ WORD8 i1_pel_pos_y
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 16x16 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible intra 16x16 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to process context (handle)
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
+* the SAD and cost are one and the same.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 8x8 mode (rate distortion opt on)
+*
+* @par Description
+* This function evaluates all the possible intra 8x8 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: TODO: This function needs to be tested
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 4x4 mode (rate distortion opt on)
+*
+* @par Description
+* This function evaluates all the possible intra 4x4 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+* 24*lambda is added to the SAD before comparison with the best SAD for
+* inter prediction. This is an empirical value to prevent using too many intra
+* blocks.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 4x4 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible intra 4x4 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+* 24*lambda is added to the SAD before comparison with the best SAD for
+* inter prediction. This is an empirical value to prevent using too many intra
+* blocks.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best chroma intra 8x8 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible chroma intra 8x8 modes and finds
+* the mode that best represents the macroblock (least distortion) and occupies
+* fewer bits in the bitstream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to macroblock context (handle)
+*
+* @remarks
+* For chroma best intra pred mode is calculated based only on SAD
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+* prediction.
+*
+* @par Description
+* This function evaluates first three 16x16 modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @returns none
+*
+******************************************************************************
+*/
+typedef void ih264e_evaluate_intra_modes_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels_i16,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes);
+
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes;
+
+/* assembly */
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_a9q;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_a9q;
+
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_av8;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_av8;
+
+/* x86 intrinsics */
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_ssse3;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_ssse3;
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra 4x4 mode and perform prediction.
+*
+* @par Description
+* This function evaluates 4x4 modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @param[in] u4_lambda
+* Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+* Predicted mode for cost computation
+*
+* @returns none
+*
+******************************************************************************
+*/
+typedef void ih264e_evaluate_intra_4x4_modes_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes,
+ UWORD32 u4_lambda,
+ UWORD32 u4_predictd_mode);
+
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes;
+
+/* x86 intrinsics */
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_ssse3;
+
+/* assembly */
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_a9q;
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_av8;
+
+#endif /* IH264E_INTRA_MODES_EVAL_H_ */
diff --git a/encoder/ih264e_list.h b/encoder/ih264e_list.h
new file mode 100755
index 0000000..782c007
--- /dev/null
+++ b/encoder/ih264e_list.h
@@ -0,0 +1,42 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_list.h
+*
+* @brief
+* The file contains declarations of functions for encoder queue management
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_LIST_H_
+#define IH264E_LIST_H_
+
+
+#endif /* IH264E_LIST_H_ */
diff --git a/encoder/ih264e_master.h b/encoder/ih264e_master.h
new file mode 100755
index 0000000..6c7505a
--- /dev/null
+++ b/encoder/ih264e_master.h
@@ -0,0 +1,132 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_master.h
+*
+* @brief
+* Contains declarations of functions used by master thread
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_MASTER_H_
+#define IH264E_MASTER_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* This function joins all the spawned threads after successful completion of
+* their tasks
+*
+* @par Description
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_join_threads(codec_t *ps_codec);
+
+/**
+******************************************************************************
+*
+* @brief This function puts the current thread to sleep for a duration
+* of sleep_us
+*
+* @par Description
+* ithread_yield() method causes the calling thread to yield execution to another
+* thread that is ready to run on the current processor. The operating system
+* selects the thread to yield to. ithread_usleep blocks the current thread for
+* the specified number of milliseconds. In other words, yield just says,
+* end my timeslice prematurely, look around for other threads to run. If there
+* is nothing better than me, continue. Sleep says I don't want to run for x
+* milliseconds. Even if no other thread wants to run, don't make me run.
+*
+* @param[in] sleep_us
+* thread sleep duration
+*
+* @returns error_status
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_wait_for_thread(UWORD32 sleep_us);
+
+/**
+******************************************************************************
+*
+* @brief
+* Encodes in synchronous api mode
+*
+* @par Description
+* This routine processes input yuv, encodes it and outputs bitstream and recon
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns Status
+*
+******************************************************************************
+*/
+WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op);
+
+/**
+*******************************************************************************
+*
+* @brief update encoder configuration parameters
+*
+* @par Description:
+* updates encoder configuration parameters from the given config set.
+* Initialize/reinitialize codec parameters according to new configurations.
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_cfg
+* Pointer to config param set
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec, cfg_params_t *ps_cfg);
+
+#endif /* IH264E_MASTER_H_ */
diff --git a/encoder/ih264e_mc.c b/encoder/ih264e_mc.c
new file mode 100755
index 0000000..2dd0974
--- /dev/null
+++ b/encoder/ih264e_mc.c
@@ -0,0 +1,320 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_mc.c
+*
+* @brief
+* Contains definition of functions for motion compensation
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_motion_comp_luma()
+* - ih264e_motion_comp_chroma()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_mc.h"
+#include "ih264e_half_pel.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* performs motion compensation for a luma mb for the given mv.
+*
+* @par Description
+* This routine performs motion compensation of an inter mb. When the inter
+* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+* to pred buffer. In this case the function returns pointer and stride of the
+* ref. buffer and this info is used in place of pred buffer else where.
+* In other cases, the pred buffer is populated via copy / filtering + copy
+* (q pel cases) and returned.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[out] pu1_pseudo_pred
+* pseudo prediction buffer
+*
+* @param[out] u4_pseudo_pred_strd
+* pseudo pred buffer stride
+*
+* @return none
+*
+* @remarks Assumes half pel buffers for the entire frame are populated.
+*
+******************************************************************************
+*/
+void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
+ UWORD8 **pu1_pseudo_pred,
+ WORD32 *pi4_pseudo_pred_strd)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* me ctxt */
+ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+ /* Pointer to the structure having motion vectors, size and position of curr partitions */
+ enc_pu_t *ps_curr_pu;
+
+ /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
+ UWORD8 *pu1_ref[4];
+
+ /* pred buffer ptr */
+ UWORD8 *pu1_pred;
+
+ /* strides of full pel, half pel x, half pel y, half pel xy reference buffer */
+ WORD32 i4_ref_strd[4];
+
+ /* pred buffer stride */
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* full pel motion vectors */
+ WORD32 u4_mv_x_full, u4_mv_y_full;
+
+ /* half pel motion vectors */
+ WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
+
+ /* quarter pel motion vectors */
+ WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
+
+ /* width & height of the partition */
+ UWORD32 wd, ht;
+
+ /* partition idx */
+ UWORD32 u4_num_prtn;
+
+ /* half / qpel coefficient */
+ UWORD32 u4_subpel_factor;
+
+ /* temp var */
+ UWORD32 u4_lkup_idx1;
+
+ /* Init */
+ i4_ref_strd[0] = ps_proc->i4_rec_strd;
+
+ i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_hp_buf_strd;
+
+ for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+ {
+ /* update ptr to curr partition */
+ ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
+
+
+ /* get full pel mv's (full pel units) */
+ u4_mv_x_full = ps_curr_pu->s_l0_mv.i2_mvx >> 2;
+ u4_mv_y_full = ps_curr_pu->s_l0_mv.i2_mvy >> 2;
+
+ /* get half pel mv's */
+ u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
+ u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+
+ /* get quarter pel mv's */
+ u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
+ u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+
+ /* width and height of partition */
+ wd = (ps_curr_pu->b4_wd + 1) << 2;
+ ht = (ps_curr_pu->b4_ht + 1) << 2;
+
+ /* decision ? qpel/hpel, fpel */
+ u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
+
+ /* update ref buffer ptrs */
+ pu1_ref[0] = ps_proc->pu1_ref_buf_luma + (u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full;
+
+ pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
+ i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
+
+
+ /* update pred buff ptr */
+ pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 4 * ps_curr_pu->b4_pos_x;
+
+ /*u4_lkup_idx1 will be non zero for half pel*/
+ u4_lkup_idx1 = (u4_subpel_factor >> 2 ) != 0 ;
+
+ {
+ /********************************************************************/
+ /* if the block is P16x16 MB and mv are not quarter pel motion */
+ /* vectors, there is no need to copy 16x16 unit from reference frame*/
+ /* to pred buffer. We might as well send the reference frame buffer */
+ /* pointer as pred buffer (ofc with updated stride) to fwd transform*/
+ /* and inverse transform unit. */
+ /********************************************************************/
+ if (ps_proc->u4_num_sub_partitions == 1)
+ {
+ *pu1_pseudo_pred = pu1_ref[u4_lkup_idx1];
+ *pi4_pseudo_pred_strd = i4_ref_strd[u4_lkup_idx1];
+
+ }
+ /*
+ * Copying half pel or full pel to prediction buffer
+ * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only support 16x16 in P mbs
+ */
+ else
+ {
+ ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred, i4_ref_strd[u4_lkup_idx1], i4_pred_strd, ht, wd, NULL, 0);
+ }
+
+ }
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* performs motion compensation for chroma mb
+*
+* @par Description
+* Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+* according to the motion vectors given
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @return none
+*
+* @remarks Assumes half pel and quarter pel buffers for the entire frame are
+* populated.
+******************************************************************************
+*/
+void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* Pointer to the structure having motion vectors, size and position of curr partitions */
+ enc_pu_t *ps_curr_pu;
+
+ /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
+ UWORD8 *pu1_ref;
+
+ /* pred buffer ptr */
+ UWORD8 *pu1_pred;
+
+ /* strides of full pel reference buffer */
+ WORD32 i4_ref_strd = ps_proc->i4_rec_strd;
+
+ /* pred buffer stride */
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* full pel motion vectors */
+ WORD32 u4_mv_x_full, u4_mv_y_full;
+
+ /* half pel motion vectors */
+ WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
+
+ /* quarter pel motion vectors */
+ WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
+
+ /* width & height of the partition */
+ UWORD32 wd, ht;
+
+ /* partition idx */
+ UWORD32 u4_num_prtn;
+
+ WORD32 u4_mv_x;
+ WORD32 u4_mv_y;
+ UWORD8 u1_dx, u1_dy;
+
+ for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+ {
+ ps_curr_pu =ps_proc->ps_pu + u4_num_prtn;
+
+ u4_mv_x = ps_curr_pu->s_l0_mv.i2_mvx >> 3;
+ u4_mv_y = ps_curr_pu->s_l0_mv.i2_mvy >> 3;
+
+ /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed with dx, dy =4*/
+ u4_mv_x_full = (ps_curr_pu->s_l0_mv.i2_mvx & 0x4) >> 2;
+ u4_mv_y_full = (ps_curr_pu->s_l0_mv.i2_mvy & 0x4) >> 2;
+
+ /* get half pel mv's */
+ u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
+ u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+
+ /* get quarter pel mv's */
+ u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
+ u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+
+ /* width and height of sub macro block */
+ wd = (ps_curr_pu->b4_wd + 1) << 1;
+ ht = (ps_curr_pu->b4_ht + 1) << 1;
+
+ /* move the pointers so that they point to the motion compensated locations */
+ pu1_ref = ps_proc->pu1_ref_buf_chroma + (u4_mv_y * i4_ref_strd) + (u4_mv_x << 1);
+
+ pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 2 * ps_curr_pu->b4_pos_x;
+
+ u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
+ u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
+
+ ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd,
+ u1_dx, u1_dy, ht, wd);
+ }
+}
diff --git a/encoder/ih264e_mc.h b/encoder/ih264e_mc.h
new file mode 100755
index 0000000..965e1d1
--- /dev/null
+++ b/encoder/ih264e_mc.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_mc.h
+*
+* @brief
+* This file contains declarations of routines that perform motion compensation
+* of luma and chroma macroblocks.
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_MC_H_
+#define IH264E_MC_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* performs motion compensation for a luma mb for the given mv.
+*
+* @par Description
+* This routine performs motion compensation of an inter mb. When the inter
+* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+* to pred buffer. In this case the function returns pointer and stride of the
+* ref. buffer and this info is used in place of pred buffer else where.
+* In other cases, the pred buffer is populated via copy / filtering + copy
+* (q pel cases) and returned.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[out] pu1_pseudo_pred
+* pseudo prediction buffer
+*
+* @param[out] u4_pseudo_pred_strd
+* pseudo pred buffer stride
+*
+* @return none
+*
+* @remarks Assumes half pel buffers for the entire frame are populated.
+*
+******************************************************************************
+*/
+void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
+ UWORD8 **pu1_pseudo_pred,
+ WORD32 *pi4_pseudo_pred_strd);
+
+/**
+******************************************************************************
+*
+* @brief
+* performs motion compensation for chroma mb
+*
+* @par Description
+* Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+* according to the motion vectors given
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @return none
+*
+* @remarks Assumes half pel and quarter pel buffers for the entire frame are
+* populated.
+******************************************************************************
+*/
+void ih264e_motion_comp_chroma
+ (
+ process_ctxt_t *ps_proc
+ );
+
+
+#endif // IH264E_MC_H_
diff --git a/encoder/ih264e_me.c b/encoder/ih264e_me.c
new file mode 100755
index 0000000..9e8d7a3
--- /dev/null
+++ b/encoder/ih264e_me.c
@@ -0,0 +1,1153 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_me.c
+ *
+ * @brief
+ * Contains definition of functions for motion estimation
+ *
+ * @author
+ * ittiam
+ *
+ * @par List of Functions:
+ * - ih264e_init_mv_bits()
+ * - ih264e_skip_analysis_chroma()
+ * - ih264e_skip_analysis_luma()
+ * - ih264e_analyse_skip()
+ * - ih264e_get_search_candidates()
+ * - ih264e_find_skip_motion_vector()
+ * - ih264e_get_mv_predictor()
+ * - ih264e_mv_pred()
+ * - ih264e_mv_pred_me()
+ * - ih264e_init_me()
+ * - ih264e_compute_me()
+ * - ih264e_compute_me_nmb()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_defs.h"
+#include "ime_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_globals.h"
+#include "ih264_macros.h"
+#include "ih264e_me.h"
+#include "ime.h"
+#include "ime_distortion_metrics.h"
+#include "ih264_debug.h"
+#include "ithread.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_core_coding.h"
+#include "ih264e_mc.h"
+#include "ih264e_debug.h"
+#include "ih264e_half_pel.h"
+#include "ime_statistics.h"
+#include "ih264e_platform_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function populates the length of the codewords for motion vectors in the
+* range (-search range, search range) in pixels
+*
+* @param[in] ps_me
+* Pointer to me ctxt
+*
+* @param[out] pu1_mv_bits
+* length of the codeword for all mv's
+*
+* @remarks The length of the code words are derived from signed exponential
+* goloumb codes.
+*
+*******************************************************************************
+*/
+void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
+{
+ /* temp var */
+ WORD32 i, codesize = 3, diff, limit;
+ UWORD32 u4_code_num, u4_range;
+ UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
+
+ /* max srch range */
+ diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
+ /* sub pel */
+ diff <<= 2;
+ /* delta mv */
+ diff <<= 1;
+
+ /* codeNum for positive integer = 2x-1 : Table9-3 */
+ u4_code_num = (diff << 1);
+
+ /* get range of the bit string and put using put_bits() */
+ GETRANGE(u4_range, u4_code_num);
+
+ limit = 2*u4_range - 1;
+
+ /* init mv bits */
+ ps_me_ctxt->pu1_mv_bits[0] = 1;
+
+ while (codesize < limit)
+ {
+ u4_uev_min = (1 << (codesize >> 1));
+ u4_uev_max = 2*u4_uev_min - 1;
+
+ u4_sev_min = u4_uev_min >> 1;
+ u4_sev_max = u4_uev_max >> 1;
+
+ DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
+
+ for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
+ {
+ ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
+ }
+
+ codesize += 2;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determines the valid candidates for which the initial search shall happen.
+* The best of these candidates is used to center the diamond pixel search.
+*
+* @par Description: The function sends the skip, (0,0), left, top and top-right
+* neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
+* these are the same MVs that are used to form the MV predictor. This initial MV
+* search candidates need not take care of slice boundaries and hence neighbor
+* availability checks are not made here.
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_mb_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[in] ps_top_left_mb_pu
+* pointer to top left mb motion vector info
+*
+* @param[out] ps_skip_mv
+* pointer to skip motion vectors for the curr mb
+*
+* @param[in] i4_mb_x
+* mb index x
+*
+* @param[in] i4_mb_y
+* mb index y
+*
+* @param[in] i4_wd_mbs
+* pic width in mbs
+*
+* @param[in] ps_motionEst
+* pointer to me context
+*
+* @returns The list of MVs to be used of priming the full pel search and the
+* number of such MVs
+*
+* @remarks
+* Assumptions : 1. Assumes Single reference frame
+* 2. Assumes Only partition of size 16x16
+*
+*******************************************************************************
+*/
+static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
+ me_ctxt_t *ps_me_ctxt)
+{
+ /* curr mb indices */
+ WORD32 i4_mb_x = ps_proc->i4_mb_x;
+
+ /* left mb motion vector */
+ mv_t *ps_left_mv;
+
+ /* top left mb motion vector */
+ mv_t *ps_top_mv;
+
+ /* top left mb motion vector */
+ mv_t *ps_top_left_mv;
+
+ /* top left mb motion vector */
+ mv_t *ps_top_right_mv;
+
+ /* skip mv */
+ mv_t *ps_skip_mv = ps_proc->ps_skip_mv;
+
+ /* mb part info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /* num of candidate search candidates */
+ UWORD32 u4_num_candidates = 0;
+
+ /* mvs */
+ WORD32 mvx, mvy;
+
+ /* ngbr availability */
+ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ /* srch range*/
+ WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
+ WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
+ WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
+ WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
+
+ ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_l0_mv;
+ ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_l0_mv;
+ ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_l0_mv;
+ ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_l0_mv;
+
+ /************************************************************/
+ /* Taking the Zero motion vector as one of the candidates */
+ /************************************************************/
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = 0;
+
+ u4_num_candidates++;
+
+ /************************************************************/
+ /* Taking the Left MV Predictor as one of the candidates */
+ /************************************************************/
+ if (ps_ngbr_avbl->u1_mb_a)
+ {
+ mvx = (ps_left_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_left_mv->i2_mvy + 2) >> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates ++;
+ }
+ /*else
+ {
+ ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvy = 0;
+ }*/
+
+ /************************************************************/
+ /* Taking the Top MV Predictor as one of the candidates */
+ /************************************************************/
+ if (ps_ngbr_avbl->u1_mb_b)
+ {
+ mvx = (ps_top_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_top_mv->i2_mvy + 2) >> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates ++;
+
+ /************************************************************/
+ /* Taking the TopRt MV Predictor as one of the candidates */
+ /************************************************************/
+ if (ps_ngbr_avbl->u1_mb_c)
+ {
+ mvx = (ps_top_right_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_top_right_mv->i2_mvy + 2)>> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates ++;
+ }
+ /************************************************************/
+ /* Taking the TopLt MV Predictor as one of the candidates */
+ /************************************************************/
+ else if (ps_ngbr_avbl->u1_mb_d)
+ {
+ mvx = (ps_top_left_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_top_left_mv->i2_mvy + 2) >> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates ++;
+ }
+ /*else
+ {
+ ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0;
+ }*/
+ }
+ /*else
+ {
+ ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvy = 0;
+
+ ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0;
+ }*/
+
+
+ /********************************************************************/
+ /* MV Prediction */
+ /********************************************************************/
+ ih264e_mv_pred_me(ps_proc);
+
+ ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv->i2_mvx;
+ ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv->i2_mvy;
+
+ /************************************************************/
+ /* Get the skip motion vector */
+ /************************************************************/
+ ih264e_find_skip_motion_vector(ps_proc, 1);
+
+ /************************************************************/
+ /* Taking the Skip motion vector as one of the candidates */
+ /************************************************************/
+ mvx = (ps_skip_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_skip_mv->i2_mvy + 2) >> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates++;
+
+ ASSERT(u4_num_candidates <= 5);
+
+ ps_me_ctxt->u4_num_candidates = u4_num_candidates;
+}
+
+/**
+*******************************************************************************
+*
+* @brief The function gives the skip motion vector
+*
+* @par Description:
+* The function gives the skip motion vector
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+* pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+* specification.
+*
+*******************************************************************************
+*/
+void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me)
+{
+ /* left mb motion vector */
+ enc_pu_t *ps_left_mb_pu ;
+
+ /* top mb motion vector */
+ enc_pu_t *ps_top_mb_pu ;
+
+ /* skip mv */
+ mv_t *ps_skip_mv = ps_proc->ps_skip_mv;
+
+ if (u4_for_me == 1)
+ {
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+ ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
+ }
+ else
+ {
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu ;
+ ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
+ }
+
+ if ( (!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
+ (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
+ ((ps_left_mb_pu->i1_l0_ref_idx | ps_left_mb_pu->s_l0_mv.i2_mvx | ps_left_mb_pu->s_l0_mv.i2_mvy) == 0) ||
+ ((ps_top_mb_pu->i1_l0_ref_idx | ps_top_mb_pu->s_l0_mv.i2_mvx | ps_top_mb_pu->s_l0_mv.i2_mvy) == 0) )
+ {
+ ps_skip_mv->i2_mvx = 0;
+ ps_skip_mv->i2_mvy = 0;
+ }
+ else
+ {
+ ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv->i2_mvx;
+ ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv->i2_mvy;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief motion vector predictor
+*
+* @par Description:
+* The routine calculates the motion vector predictor for a given block,
+* given the candidate MV predictors.
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+* pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
+* specification.
+* Assumptions : 1. Assumes Single reference frame
+* 2. Assumes Only partition of size 16x16
+*
+*******************************************************************************
+*/
+void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
+ enc_pu_t *ps_top_row_pu,
+ mv_t *ps_pred_mv)
+{
+ /* curr frame ref idx */
+ /* we are assuming that we are operating on single reference frame
+ * hence the ref idx is insignificant during mv prediction.
+ */
+ WORD32 u4_ref_idx = 0;
+
+ /* temp var */
+ WORD32 pred_algo = 3, a, b, c;
+
+ /* If only one of the candidate blocks has a reference frame equal to
+ * the current block then use the same block as the final predictor */
+ a = (ps_left_mb_pu->i1_l0_ref_idx == u4_ref_idx)? 0:-1;
+ b = (ps_top_row_pu[0].i1_l0_ref_idx == u4_ref_idx)? 0:-1;
+ c = (ps_top_row_pu[1].i1_l0_ref_idx == u4_ref_idx)? 0:-1;
+
+ if (a == 0 && b == -1 && c == -1)
+ pred_algo = 0; /* LEFT */
+ else if (a == -1 && b == 0 && c == -1)
+ pred_algo = 1; /* TOP */
+ else if (a == -1 && b == -1 && c == 0)
+ pred_algo = 2; /* TOP RIGHT */
+
+ switch (pred_algo)
+ {
+ case 0:
+ /* left */
+ ps_pred_mv->i2_mvx = ps_left_mb_pu->s_l0_mv.i2_mvx;
+ ps_pred_mv->i2_mvy = ps_left_mb_pu->s_l0_mv.i2_mvy;
+ break;
+ case 1:
+ /* top */
+ ps_pred_mv->i2_mvx = ps_top_row_pu[0].s_l0_mv.i2_mvx;
+ ps_pred_mv->i2_mvy = ps_top_row_pu[0].s_l0_mv.i2_mvy;
+ break;
+ case 2:
+ /* top right */
+ ps_pred_mv->i2_mvx = ps_top_row_pu[1].s_l0_mv.i2_mvx;
+ ps_pred_mv->i2_mvy = ps_top_row_pu[1].s_l0_mv.i2_mvy;
+ break;
+ case 3:
+ /* median */
+ MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvx,
+ ps_top_row_pu[0].s_l0_mv.i2_mvx,
+ ps_top_row_pu[1].s_l0_mv.i2_mvx,
+ ps_pred_mv->i2_mvx);
+ MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvy,
+ ps_top_row_pu[0].s_l0_mv.i2_mvy,
+ ps_top_row_pu[1].s_l0_mv.i2_mvy,
+ ps_pred_mv->i2_mvy);
+
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs MV prediction
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+* This function will update the MB availability since intra inter decision
+* should be done before the call
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred(process_ctxt_t *ps_proc)
+{
+
+ /* left mb motion vector */
+ enc_pu_t *ps_left_mb_pu ;
+
+ /* top left mb motion vector */
+ enc_pu_t *ps_top_left_mb_pu ;
+
+ /* top row motion vector info */
+ enc_pu_t *ps_top_row_pu;
+
+ /* predicted motion vector */
+ mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
+
+ /* zero mv */
+ mv_t zero_mv = {0, 0};
+
+ /* mb neighbor availability */
+ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ /* mb syntax elements of neighbors */
+ mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+ mb_info_t *ps_top_left_syn;
+ UWORD32 u4_left_is_intra;
+
+ ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
+ u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu;
+ ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
+ ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
+
+ /* Before performing mv prediction prepare the ngbr information and
+ * reset motion vectors basing on their availability */
+ if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1) )
+ {
+ /* left mv */
+ ps_left_mb_pu->i1_l0_ref_idx = -1;
+ ps_left_mb_pu->s_l0_mv = zero_mv;
+ }
+ if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra)
+ {
+ /* top mv */
+ ps_top_row_pu[0].i1_l0_ref_idx = -1;
+ ps_top_row_pu[0].s_l0_mv = zero_mv;
+ }
+ if (!ps_ngbr_avbl->u1_mb_c)
+ {
+ /* top right mv - When top right partition is not available for
+ * prediction if top left is available use it for prediction else
+ * set the mv information to -1 and (0, 0)
+ * */
+ if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra)
+ {
+ ps_top_row_pu[1].i1_l0_ref_idx = -1;
+ ps_top_row_pu[1].s_l0_mv = zero_mv;
+ }
+ else
+ {
+ ps_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx;
+ ps_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv;
+ }
+ }
+ else if (ps_top_syn[1].u2_is_intra)
+ {
+ ps_top_row_pu[1].i1_l0_ref_idx = -1;
+ ps_top_row_pu[1].s_l0_mv = zero_mv;
+ }
+
+ ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, ps_pred_mv);
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function approximates Pred. MV
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+* Motion estimation happens at nmb level. For cost calculations, mv is appro
+* ximated using this function
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred_me(process_ctxt_t *ps_proc)
+{
+ /* left mb motion vector */
+ enc_pu_t *ps_left_mb_pu ;
+
+ /* top left mb motion vector */
+ enc_pu_t *ps_top_left_mb_pu ;
+
+ /* top row motion vector info */
+ enc_pu_t *ps_top_row_pu;
+
+ enc_pu_t s_top_row_pu[2];
+
+ /* predicted motion vector */
+ mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
+
+ /* zero mv */
+ mv_t zero_mv = {0, 0};
+
+ /* mb neighbor availability */
+ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+ ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
+ ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
+
+ s_top_row_pu[0] = ps_top_row_pu[0];
+ s_top_row_pu[1] = ps_top_row_pu[1];
+
+ /* Before performing mv prediction prepare the ngbr information and
+ * reset motion vectors basing on their availability */
+ if (!ps_ngbr_avbl->u1_mb_a )
+ {
+ /* left mv */
+ ps_left_mb_pu->i1_l0_ref_idx = -1;
+ ps_left_mb_pu->s_l0_mv = zero_mv;
+ }
+ if (!ps_ngbr_avbl->u1_mb_b )
+ {
+ /* top mv */
+ s_top_row_pu[0].i1_l0_ref_idx = -1;
+ s_top_row_pu[0].s_l0_mv = zero_mv;
+ }
+ if (!ps_ngbr_avbl->u1_mb_c)
+ {
+ /* top right mv - When top right partition is not available for
+ * prediction if top left is available use it for prediction else
+ * set the mv information to -1 and (0, 0)
+ * */
+ if (!ps_ngbr_avbl->u1_mb_d)
+ {
+ s_top_row_pu[1].i1_l0_ref_idx = -1;
+ s_top_row_pu[1].s_l0_mv = zero_mv;
+ }
+ else
+ {
+ s_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx;
+ s_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv;
+ }
+ }
+
+ ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]), ps_pred_mv);
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function initializes me ctxt
+*
+* @par Description:
+* Before dispatching the current job to me thread, the me context associated
+* with the job is initialized.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_me(process_ctxt_t *ps_proc)
+{
+ /* me ctxt */
+ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+ /* src ptr */
+ ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
+
+ /* ref ptr */
+ ps_me_ctxt->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma;
+
+ /* lagrange param */
+ ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current mb
+*
+* @par Description:
+* The current mb is compared with a list of mb's in the reference frame for
+* least cost. The mb that offers least cost is chosen as predicted mb and the
+* displacement of the predicted mb from index location of the current mb is
+* signaled as mv. The list of the mb's that are chosen in the reference frame
+* are dependent on the speed of the ME configured.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns motion vector of the pred mb, sad, cost.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me(process_ctxt_t *ps_proc)
+{
+ /* me ctxt */
+ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+// /* mb syntax elements of neighbors */
+// mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+// mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
+
+ /* mb part info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+ mb_part_ctxt skip_mb_part_info;
+
+ /* temp var */
+ WORD32 rows_above, rows_below, columns_left, columns_right,u4_use_stat_sad;
+
+ /* Motion vectors in full-pel units */
+ WORD16 mv_x, mv_y;
+
+ /* recon stride */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* source buffer for halp pel generation functions */
+ UWORD8 *pu1_hpel_src;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* Sad therholds */
+ ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
+
+ /*Best half pel buffer*/
+ UWORD8 *pu1_best_subpel_buf = ps_proc->pu1_best_subpel_buf;
+ UWORD32 u4_bst_spel_strd = ps_proc->u4_bst_spel_buf_strd;
+
+ /* During evaluation for motion vectors do not search through padded regions */
+ /* Obtain number of rows and columns that are effective for computing for me evaluation */
+ rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
+ rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
+ columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
+ columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
+
+ /* init srch range */
+ /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
+ * on all sides.
+ */
+// ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, ps_me_ctxt->ai2_srch_boundaries[0]);
+// ps_me_ctxt->i4_srch_range_e = MIN(columns_right, ps_me_ctxt->ai2_srch_boundaries[0]);
+// ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, ps_me_ctxt->ai2_srch_boundaries[1]);
+// ps_me_ctxt->i4_srch_range_s = MIN(rows_below, ps_me_ctxt->ai2_srch_boundaries[1]);
+
+ ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+ ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+ ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+ ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+
+ /* this is to facilitate fast sub pel computation with minimal loads */
+ if (ps_me_ctxt->u4_enable_hpel)
+ {
+ ps_me_ctxt->i4_srch_range_w += 1;
+ ps_me_ctxt->i4_srch_range_e -= 1;
+ ps_me_ctxt->i4_srch_range_n += 1;
+ ps_me_ctxt->i4_srch_range_s -= 1;
+ }
+
+ /*Initialize the min sad option*/
+ ps_me_ctxt->u4_min_sad_reached = 0; /*Not yet found min sad*/
+ ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
+
+ /************************************************************/
+ /* Get the seed motion vector candidates */
+ /************************************************************/
+ ih264e_get_search_candidates(ps_proc, ps_me_ctxt);
+
+ /************************************************************/
+ /* Init the MB part ctxt structure */
+ /************************************************************/
+ ps_mb_part->s_mv_curr.i2_mvx = 0;
+ ps_mb_part->s_mv_curr.i2_mvy = 0;
+ ps_mb_part->i4_mb_cost = INT_MAX;
+ ps_mb_part->i4_mb_distortion = INT_MAX;
+
+ /* With NMB changes this logic will not work as we cannot exit NME in between*/
+ /********************************************************************/
+ /* Analyse skip */
+ /********************************************************************/
+// if (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 0
+// && u4_frame_level_me == 0)
+// {
+// if ( (ps_proc->ps_ngbr_avbl->u1_mb_a && (ps_me_ctxt->u4_left_is_skip == 1)) ||
+// (ps_proc->ps_ngbr_avbl->u1_mb_b && ps_top_syn->u2_mb_type == PSKIP) ||
+// (ps_proc->ps_ngbr_avbl->u1_mb_d && ps_top_left_syn->u2_mb_type == PSKIP) )
+// {
+// if ( 0 == ih264e_analyse_skip(ps_proc, ps_me_ctxt) )
+// {
+// return;
+// }
+// }
+// }
+
+ /********************************************************************/
+ /* compute skip cost */
+ /********************************************************************/
+ /* See if we need to use modified sad */
+ u4_use_stat_sad = (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 1);
+
+ /* init the cost of skip MB */
+ skip_mb_part_info.i4_mb_cost = INT_MAX;
+ ime_compute_skip_cost(ps_me_ctxt, ps_proc->ps_skip_mv, &skip_mb_part_info, u4_use_stat_sad);
+
+
+ if (ps_me_ctxt->u4_min_sad_reached == 0)
+ {
+ /************************************************************/
+ /* Evaluate search candidates for initial mv pt. */
+ /************************************************************/
+ ime_evaluate_init_srchposn_16x16(ps_me_ctxt);
+
+ /********************************************************************/
+ /* full pel motion estimation */
+ /********************************************************************/
+ ime_full_pel_motion_estimation_16x16(ps_me_ctxt);
+
+ DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
+ (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
+
+ DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
+ /********************************************************************/
+ /* sub pel motion estimation */
+ /********************************************************************/
+ if (ps_me_ctxt->u4_enable_hpel)
+ {
+ /* motion vectors in terms of full pel values */
+ mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
+ mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
+
+ /* moving src pointer to the converged motion vector location*/
+ pu1_hpel_src = ps_me_ctxt->pu1_ref_buf_luma + mv_x + (mv_y * i4_rec_strd);
+
+ ps_me_ctxt->pu1_half_x = ps_proc->pu1_half_x;
+ ps_me_ctxt->pu1_half_y = ps_proc->pu1_half_y;
+ ps_me_ctxt->pu1_half_xy = ps_proc->pu1_half_xy;
+ ps_me_ctxt->u4_hp_buf_strd = HP_BUFF_WD;
+
+ /* half pel search is done for both sides of full pel,
+ * hence half_x of width x height = 17x16 is created
+ * starting from left half_x of converged full pel */
+ pu1_hpel_src -= 1;
+
+ /* computing half_x */
+ ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
+ ps_proc->pu1_half_x,
+ i4_rec_strd,
+ ps_me_ctxt->u4_hp_buf_strd);
+
+ /*
+ * Halfpel search is done for both sides of full pel,
+ * hence half_y of width x height = 16x17 is created
+ * starting from top half_y of converged full pel
+ * for half_xy top_left is required
+ * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
+ */
+
+ pu1_hpel_src -= i4_rec_strd;
+
+ /* computing half_y , and half_xy*/
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
+ pu1_hpel_src, ps_proc->pu1_half_y,
+ ps_proc->pu1_half_xy, i4_rec_strd,
+ ps_me_ctxt->u4_hp_buf_strd, ps_proc->ai16_pred1 + 3,
+ ps_me_ctxt->u4_hp_buf_strd);
+
+ ime_sub_pel_motion_estimation_16x16(ps_me_ctxt);
+ }
+ }
+
+ {
+
+ /* if skip gives a better cost than other search, copy the cost accordingly*/
+ if (skip_mb_part_info.i4_mb_cost < ps_mb_part->i4_mb_cost)
+ {
+ ps_mb_part->i4_mb_cost = skip_mb_part_info.i4_mb_cost;
+ ps_mb_part->i4_mb_distortion = skip_mb_part_info.i4_mb_distortion;
+ ps_mb_part->s_mv_curr.i2_mvx = skip_mb_part_info.s_mv_curr.i2_mvx;
+ ps_mb_part->s_mv_curr.i2_mvy = skip_mb_part_info.s_mv_curr.i2_mvy;
+ }
+ else
+ {
+ /*
+ * If the current MB has a sub pel component,
+ * we need to copy that to the best subpel buffer
+ */
+ if (ps_me_ctxt->u4_enable_hpel && ps_mb_part->pu1_best_hpel_buf)
+ {
+ ps_codec->pf_inter_pred_luma_copy(ps_mb_part->pu1_best_hpel_buf,
+ pu1_best_subpel_buf,
+ ps_me_ctxt->u4_hp_buf_strd,
+ u4_bst_spel_strd, MB_SIZE,
+ MB_SIZE, NULL, 0);
+ }
+ }
+ }
+
+ DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 0);
+
+ /* update the type of the mb if necessary */
+ if (ps_me_ctxt->s_mb_part.i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
+ {
+ /* mb cost */
+ ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->s_mb_part.i4_mb_cost;
+
+ /* mb distortion */
+ ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->s_mb_part.i4_mb_distortion;
+
+ /* mb type */
+ ps_proc->ps_cur_mb->u4_mb_type = P16x16;
+ }
+
+ /* number of partitions */
+ ps_proc->u4_num_sub_partitions = 1;
+ *(ps_proc->pu4_mb_pu_cnt) = 1;
+
+ /* position in-terms of PU */
+ ps_proc->ps_pu->b4_pos_x = 0;
+ ps_proc->ps_pu->b4_pos_y = 0;
+
+ /* PU size */
+ ps_proc->ps_pu->b4_wd = 3;
+ ps_proc->ps_pu->b4_ht = 3;
+
+ /* ref idx */
+ ps_proc->ps_pu->i1_l0_ref_idx = 0;
+
+ /* motion vector L0 */
+ ps_proc->ps_pu->s_l0_mv.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx;
+ ps_proc->ps_pu->s_l0_mv.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy;
+
+ /* Update min sad conditions */
+ if (ps_me_ctxt->u4_min_sad_reached == 1)
+ {
+ ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
+ ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current NMB
+*
+* @par Description:
+* Intializes input and output pointers required by the function ih264e_compute_me
+* and calls the function ih264e_compute_me in a loop to process NMBs.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
+{
+ /* pic pu */
+ enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
+
+ /* ME map */
+ UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
+
+ /* temp var */
+ UWORD32 u4_i;
+
+ ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
+ ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
+
+ for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
+ {
+ /* Wait for ME map */
+ if (ps_proc->i4_mb_y > 0)
+ {
+ /* Wait for top right ME to be done */
+ UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
+
+ while (1)
+ {
+ volatile UWORD8 *pu1_buf;
+ WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
+
+ idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
+ pu1_buf = pu1_me_map_tp_rw + idx;
+ if(*pu1_buf)
+ break;
+ ithread_yield();
+ }
+ }
+
+ ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].s_skip_mv);
+ ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
+ ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].s_pred_mv);
+
+ ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
+
+ ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
+ ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
+
+ ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
+ ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
+
+ /* Set the best subpel buf to the correct mb so that the buffer can be copied */
+ ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
+ ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
+
+ /* Set the min sad conditions */
+ ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
+ ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
+
+ /* Derive neighbor availability for the current macroblock */
+ ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
+
+ /* init me */
+ ih264e_init_me(ps_proc);
+
+ ih264e_compute_me(ps_proc);
+
+ /* update top and left structs */
+ {
+ mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+ mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
+ enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+ enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
+ enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
+
+ *ps_top_left_syn = *ps_top_syn;
+
+ *ps_top_left_mb_pu = *ps_top_mv;
+ *ps_left_mb_pu = *ps_proc->ps_pu;
+ }
+
+ ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
+
+ /* Copy the min sad reached info */
+ ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
+ ps_proc->ps_nmb_info[u4_i].u4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
+
+ /*
+ * To make sure that the MV map is properly sync to the
+ * cache we need to do a DDB
+ */
+ {
+ DATA_SYNC();
+
+ pu1_me_map[ps_proc->i4_mb_x] = 1;
+ }
+ ps_proc->i4_mb_x++;
+
+ ps_proc->s_me_ctxt.u4_left_is_intra = 0;
+ ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type == PSKIP);
+
+ /* update buffers pointers */
+ ps_proc->pu1_src_buf_luma += MB_SIZE;
+ ps_proc->pu1_rec_buf_luma += MB_SIZE;
+ ps_proc->pu1_ref_buf_luma += MB_SIZE;
+
+ /*
+ * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+ * the stride per MB is MB_SIZE
+ */
+ ps_proc->pu1_src_buf_chroma += MB_SIZE;
+ ps_proc->pu1_rec_buf_chroma += MB_SIZE;
+ ps_proc->pu1_ref_buf_chroma += MB_SIZE;
+
+ ps_proc->pu4_mb_pu_cnt += 1;
+ }
+
+
+ ps_proc->ps_pu = ps_pu_begin;
+ ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
+
+ /* update buffers pointers */
+ ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
+ ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
+ ps_proc->pu1_ref_buf_luma -= MB_SIZE * u4_nmb_count;
+
+ /*
+ * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+ * the stride per MB is MB_SIZE
+ */
+ ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
+ ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
+ ps_proc->pu1_ref_buf_chroma -= MB_SIZE * u4_nmb_count;
+
+ ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
+}
diff --git a/encoder/ih264e_me.h b/encoder/ih264e_me.h
new file mode 100755
index 0000000..c4834a1
--- /dev/null
+++ b/encoder/ih264e_me.h
@@ -0,0 +1,278 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_me.h
+ *
+ * @brief
+ * Contains declarations of global variables for H264 encoder
+ *
+ * @author
+ * ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264E_ME_H_
+#define IH264E_ME_H_
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief compute median of 3 elements (a, b, c) and store the output
+ * in to result. This is used for mv prediction
+******************************************************************************
+ */
+
+#define MEDIAN(a, b, c, result) if (a > b){\
+ if (b > c)\
+ result = b;\
+ else {\
+ if (a > c)\
+ result = c;\
+ else \
+ result = a;\
+ }\
+ }\
+ else {\
+ if (c > b)\
+ result = b;\
+ else {\
+ if (c > a)\
+ result = c;\
+ else \
+ result = a;\
+ }\
+ }
+
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function populates the length of the codewords for motion vectors in the
+* range (-search range, search range) in pixels
+*
+* @param[in] ps_me
+* Pointer to me ctxt
+*
+* @param[out] pu1_mv_bits
+* length of the codeword for all mv's
+*
+* @remarks The length of the code words are derived from signed exponential
+* goloumb codes.
+*
+*******************************************************************************
+*/
+void ih264e_init_mv_bits
+ (
+ me_ctxt_t *ps_me
+ );
+
+/**
+*******************************************************************************
+*
+* @brief The function gives the skip motion vector
+*
+* @par Description:
+* The function gives the skip motion vector
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+* pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+* specification.
+*
+*******************************************************************************
+*/
+void ih264e_find_skip_motion_vector
+ (
+ process_ctxt_t *ps_proc,
+ UWORD32 u4_for_me
+ );
+
+/**
+*******************************************************************************
+*
+* @brief motion vector predictor
+*
+* @par Description:
+* The routine calculates the motion vector predictor for a given block,
+* given the candidate MV predictors.
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+* pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
+* specification.
+* Assumptions : 1. Assumes Single reference frame
+* 2. Assumes Only partition of size 16x16
+*
+*******************************************************************************
+*/
+void ih264e_get_mv_predictor
+ (
+ enc_pu_t *ps_left_mb_pu,
+ enc_pu_t *ps_top_row_pu,
+ mv_t *ps_pred_mv
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector for the current mb
+*
+* @par Description:
+* This function currently does nothing except set motion vectors from external
+* source
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function initializes me ctxt
+*
+* @par Description:
+* Before dispatching the current job to me thread, the me context associated
+* with the job is initialized.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_me(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current NMB
+*
+* @par Description:
+* Intializes input and output pointers required by the function ih264e_compute_me
+* and calls the function ih264e_compute_me in a loop to process NMBs.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me_nmb
+ (
+ process_ctxt_t *ps_proc,
+ UWORD32 u4_nmb_count
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function performs MV prediction
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+* This function will update the MB availability since intra inter decision
+* should be done before the call
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function approximates Pred. MV
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+* Motion estimation happens at nmb level. For cost calculations, mv is appro
+* ximated using this function
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred_me
+ (
+ process_ctxt_t *ps_proc
+ );
+
+#endif /* IH264E_ME_H_ */
diff --git a/encoder/ih264e_modify_frm_rate.c b/encoder/ih264e_modify_frm_rate.c
new file mode 100755
index 0000000..bc0e873
--- /dev/null
+++ b/encoder/ih264e_modify_frm_rate.c
@@ -0,0 +1,240 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_modify_frm_rate.c
+*
+* @brief
+* Functions used to modify frame rate
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_pd_frm_rate_get_init_free_memtab()
+* - ih264e_init_pd_frm_rate()
+* - ih264e_update_pd_frm_rate()
+* - ih264e_get_pd_avg_frm_rate()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264e_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to init pd frame rate memtab
+*
+* @par Description
+* Function to init pull down frame rate memtab
+*
+* @param[in] pps_pd_frm_rate
+* pull down frame rate context
+*
+* @param[in] ps_memtab
+* Handle to memtab
+*
+* @param[in] e_func_type
+* Function type (get memtab/ update memtab)
+*
+* @returns Number of memtabs used
+*
+* @remarks None
+*
+*******************************************************************************
+*/
+WORD32 ih264e_pd_frm_rate_get_init_free_memtab(pd_frm_rate_handle *pps_pd_frm_rate,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static pd_frm_rate_t s_temp_pd_frm_rate_t;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_pd_frm_rate) = &s_temp_pd_frm_rate_t;
+
+ /* for src rate control state structure */
+ if (e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(pd_frm_rate_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**) pps_pd_frm_rate, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Initializes the pull down frame rate state structure based on input
+* frame rate
+*
+* @par Description
+* Initializes the pull down frame rate state structure based on input frame rate
+*
+* @param[in] ps_pd_frm_rate
+* Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+* Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_pd_frm_rate(pd_frm_rate_t *ps_pd_frm_rate,
+ UWORD32 u4_input_frm_rate)
+{
+ WORD32 i;
+
+ ps_pd_frm_rate->u4_input_frm_rate = u4_input_frm_rate;
+
+ for (i = 0; i < (WORD32) (u4_input_frm_rate / 1000); i++)
+ {
+ ps_pd_frm_rate->u4_cur_frm_rate[i] = u4_input_frm_rate;
+ }
+
+ ps_pd_frm_rate->u4_frm_num = 0;
+
+ ps_pd_frm_rate->u4_tot_frm_encoded = 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update pull down frame rate
+*
+* @par Description
+* For each frame a run time frame rate value is sent based on whether a frame
+* is skipped or not. If it is skipped for pull down then the current frame
+* rate for the pull down period is signaled as 4/5th of the original frame
+* rate. Thus when this is averaged the frame rate gradually switches from the
+* input frame rate to 4/5th of input frame rate as and when more 3:2 pull
+* down patterns are detected
+*
+* @param[in] ps_pd_frm_rate
+* Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+* Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_pd_frm_rate(pd_frm_rate_t *ps_pd_frm_rate,
+ UWORD32 u4_cur_frm_rate)
+{
+ ps_pd_frm_rate->u4_cur_frm_rate[ps_pd_frm_rate->u4_frm_num] = u4_cur_frm_rate;
+
+ ps_pd_frm_rate->u4_frm_num++;
+
+ /* Increment the frame number */
+ if (ps_pd_frm_rate->u4_tot_frm_encoded < (ps_pd_frm_rate->u4_input_frm_rate / 1000))
+ {
+ ps_pd_frm_rate->u4_tot_frm_encoded++;
+ }
+
+ /* Reset frm_num to zero */
+ if (ps_pd_frm_rate->u4_frm_num >= (ps_pd_frm_rate->u4_input_frm_rate / 1000))
+ {
+ ps_pd_frm_rate->u4_frm_num = 0;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief returns average frame rate in 1 sec duration
+*
+* @par Description
+* Averages the last N frame in period(1 sec) and then gives that
+* as the current frames frame rate. Thus this averages out the sudden
+* variation in frame rate
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frame rate context
+*
+* @returns average frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD32 ih264e_get_pd_avg_frm_rate(pd_frm_rate_t *ps_pd_frm_rate)
+{
+ WORD32 i;
+ WORD32 i4_avg_frm_rate = 0;
+
+ for (i = 0; i < (WORD32) ps_pd_frm_rate->u4_tot_frm_encoded; i++)
+ {
+ i4_avg_frm_rate += ps_pd_frm_rate->u4_cur_frm_rate[i];
+ }
+
+ i4_avg_frm_rate = i4_avg_frm_rate / ps_pd_frm_rate->u4_tot_frm_encoded;
+
+ return i4_avg_frm_rate;
+}
diff --git a/encoder/ih264e_modify_frm_rate.h b/encoder/ih264e_modify_frm_rate.h
new file mode 100755
index 0000000..c301e2c
--- /dev/null
+++ b/encoder/ih264e_modify_frm_rate.h
@@ -0,0 +1,182 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_modify_frm_rate.h
+*
+* @brief
+* Functions declarations used to modify frame rate
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_MODIFY_FRM_RATE_H_
+#define IH264E_MODIFY_FRM_RATE_H_
+
+/*****************************************************************************/
+/* Constant Definitions */
+/*****************************************************************************/
+
+#define MAX_NUM_FRAME 120
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+typedef struct pd_frm_rate_t
+{
+ /*
+ * The input frame rate set in the encoder (per 1000 sec)
+ */
+ UWORD32 u4_input_frm_rate;
+
+ /*
+ * Frame rate of current frame due to pull down
+ */
+ UWORD32 u4_cur_frm_rate[MAX_NUM_FRAME];
+
+ /*
+ * current frame num in the above buffer
+ */
+ UWORD32 u4_frm_num;
+
+ /*
+ * Total number of frames encoded.
+ * if greater than input frame rate stays at input frame rate
+ */
+ UWORD32 u4_tot_frm_encoded;
+
+}pd_frm_rate_t;
+
+typedef struct pd_frm_rate_t *pd_frm_rate_handle;
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to init pd frame rate memtab
+*
+* @par Description
+* Function to init pull down frame rate memtab
+*
+* @param[in] pps_pd_frm_rate
+* pull down frame rate context
+*
+* @param[in] ps_memtab
+* Handle to memtab
+*
+* @param[in] e_func_type
+* Function type (get memtab/ update memtab)
+*
+* @returns Number of memtabs used
+*
+* @remarks None
+*
+*******************************************************************************
+*/
+WORD32 ih264e_pd_frm_rate_get_init_free_memtab(pd_frm_rate_handle *pps_pd_frm_rate,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+/**
+*******************************************************************************
+*
+* @brief Initializes the pull down frame rate state structure based on input
+* frame rate
+*
+* @par Description
+* Initializes the pull down frame rate state structure based on input frame rate
+*
+* @param[in] ps_pd_frm_rate
+* Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+* Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_pd_frm_rate(pd_frm_rate_handle ps_pd_frm_rate,
+ UWORD32 u4_input_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update pull down frame rate
+*
+* @par Description
+* For each frame a run time frame rate value is sent based on whether a frame
+* is skipped or not. If it is skipped for pull down then the current frame
+* rate for the pull down period is signaled as 4/5th of the original frame
+* rate. Thus when this is averaged the frame rate gradually switches from the
+* input frame rate to 4/5th of input frame rate as and when more 3:2 pull
+* down patterns are detected
+*
+* @param[in] ps_pd_frm_rate
+* Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+* Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_pd_frm_rate(pd_frm_rate_handle ps_pd_frm_rate,
+ UWORD32 u4_cur_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief returns average frame rate in 1 sec duration
+*
+* @par Description
+* Averages the last N frame in period(1 sec) and then gives that
+* as the current frames frame rate. Thus this averages out the sudden
+* variation in frame rate
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frame rate context
+*
+* @returns average frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD32 ih264e_get_pd_avg_frm_rate(pd_frm_rate_handle ps_pd_frm_rate);
+
+#endif /* IH264E_MODIFY_FRM_RATE_H_ */
diff --git a/encoder/ih264e_process.c b/encoder/ih264e_process.c
new file mode 100755
index 0000000..9a468e9
--- /dev/null
+++ b/encoder/ih264e_process.c
@@ -0,0 +1,2369 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_process.c
+*
+* @brief
+* Contains functions for codec thread
+*
+* @author
+* Harish
+*
+* @par List of Functions:
+* - ih264e_generate_sps_pps()
+* - ih264e_init_entropy_ctxt()
+* - ih264e_entropy()
+* - ih264e_pack_header_data()
+* - ih264e_update_proc_ctxt()
+* - ih264e_init_proc_ctxt()
+* - ih264e_pad_recon_buffer()
+* - ih264e_dblk_pad_hpel_processing_n_mbs()
+* - ih264e_process()
+* - ih264e_set_rc_pic_params()
+* - ih264e_update_rc_post_enc()
+* - ih264e_process_thread()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_platform_macros.h"
+#include "ih264_macros.h"
+#include "ih264_error.h"
+#include "ih264_buf_mgr.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+#include "ih264_list.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_process.h"
+#include "ithread.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_encode_header.h"
+#include "ih264e_globals.h"
+#include "ih264e_config.h"
+#include "ih264e_trace.h"
+#include "ih264e_statistics.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_deblk.h"
+#include "ih264e_me.h"
+#include "ih264e_debug.h"
+#include "ih264e_process.h"
+#include "ih264e_master.h"
+#include "ih264e_utils.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_padding.h"
+#include "ime_statistics.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function generates sps, pps set on request
+*
+* @par Description
+* When the encoder is set in header generation mode, the following function
+* is called. This generates sps and pps headers and returns the control back
+* to caller.
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
+{
+ /* choose between ping-pong process buffer set */
+ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+ /* entropy ctxt */
+ entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
+
+ /* Bitstream structure */
+ bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
+
+ /* sps */
+ sps_t *ps_sps = NULL;
+
+ /* pps */
+ pps_t *ps_pps = NULL;
+
+ /* output buff */
+ out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
+
+
+ /********************************************************************/
+ /* initialize the bit stream buffer */
+ /********************************************************************/
+ ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
+
+ /********************************************************************/
+ /* BEGIN HEADER GENERATION */
+ /********************************************************************/
+ /*ps_codec->i4_pps_id ++;*/
+ ps_codec->i4_pps_id %= MAX_PPS_CNT;
+
+ /*ps_codec->i4_sps_id ++;*/
+ ps_codec->i4_sps_id %= MAX_SPS_CNT;
+
+ /* populate sps header */
+ ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
+ ih264e_populate_sps(ps_codec, ps_sps);
+
+ /* populate pps header */
+ ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
+ ih264e_populate_pps(ps_codec, ps_pps);
+
+ ps_entropy->i4_error_code = IH264E_SUCCESS;
+
+ /* generate sps */
+ ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
+
+ /* generate pps */
+ ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
+
+ /* queue output buffer */
+ ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
+
+ return ps_entropy->i4_error_code;
+}
+
+/**
+*******************************************************************************
+*
+* @brief initialize entropy context.
+*
+* @par Description:
+* Before invoking the call to perform to entropy coding the entropy context
+* associated with the job needs to be initialized. This involves the start
+* mb address, end mb address, slice index and the pointer to location at
+* which the mb residue info and mb header info are packed.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* entropy ctxt */
+ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+ /* start address */
+ ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
+
+ /* end address */
+ ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
+
+ /* slice index */
+ ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
+
+ /* sof */
+ /* @ start of frame or start of a new slice, set sof flag */
+ if (ps_entropy->i4_mb_start_add == 0)
+ {
+ ps_entropy->i4_sof = 1;
+ }
+
+ if (ps_entropy->i4_mb_x == 0)
+ {
+ /* packed mb coeff data */
+ ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
+ ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
+
+ /* packed mb header data */
+ ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
+ ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
+ }
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief entry point for entropy coding
+*
+* @par Description
+* This function calls lower level functions to perform entropy coding for a
+* group (n rows) of mb's. After encoding 1 row of mb's, the function takes
+* back the control, updates the ctxt and calls lower level functions again.
+* This process is repeated till all the rows or group of mb's (which ever is
+* minimum) are coded
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + WORD_SIZE - ps_bitstream->i4_bits_left_in_cw)
+
+IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* entropy context */
+ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+ /* sps */
+ sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
+
+ /* pps */
+ pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
+
+ /* slice header */
+ slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
+
+ /* slice type */
+ WORD32 i4_slice_type = ps_proc->i4_slice_type;
+
+ /* Bitstream structure */
+ bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
+
+ /* output buff */
+ out_buf_t s_out_buf;
+
+ /* proc map */
+ UWORD8 *pu1_proc_map;
+
+ /* entropy map */
+ UWORD8 *pu1_entropy_map_curr;
+
+ /* proc base idx */
+ WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+
+ /* temp var */
+ WORD32 i4_wd_mbs, i4_ht_mbs;
+ UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
+
+ /********************************************************************/
+ /* BEGIN INIT */
+ /********************************************************************/
+
+ /* entropy encode start address */
+ u4_mb_idx = ps_entropy->i4_mb_start_add;
+
+ /* entropy encode end address */
+ u4_mb_end_idx = ps_entropy->i4_mb_end_add;
+
+ /* width in mbs */
+ i4_wd_mbs = ps_entropy->i4_wd_mbs;
+
+ /* height in mbs */
+ i4_ht_mbs = ps_entropy->i4_ht_mbs;
+
+ /* total mb cnt */
+ u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
+
+ /* proc map */
+ pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+
+ /* entropy map */
+ pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+
+ /********************************************************************/
+ /* @ start of frame / slice, */
+ /* initialize the output buffer, */
+ /* initialize the bit stream buffer, */
+ /* check if sps and pps headers have to be generated, */
+ /* populate and generate slice header */
+ /********************************************************************/
+ if (ps_entropy->i4_sof)
+ {
+ /********************************************************************/
+ /* initialize the output buffer */
+ /********************************************************************/
+ s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+ /* is last frame to encode */
+ s_out_buf.u4_is_last = ps_entropy->u4_is_last;
+
+ /* frame idx */
+ s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
+ s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
+
+ /********************************************************************/
+ /* initialize the bit stream buffer */
+ /********************************************************************/
+ ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
+
+ /********************************************************************/
+ /* BEGIN HEADER GENERATION */
+ /********************************************************************/
+ if (1 == ps_entropy->i4_gen_header)
+ {
+ /* generate sps */
+ ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
+
+ /* generate pps */
+ ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
+
+ /* reset i4_gen_header */
+ ps_entropy->i4_gen_header = 0;
+ }
+
+ /* populate slice header */
+ ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
+
+ /* generate slice header */
+ ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
+ ps_pps, ps_sps);
+
+ /* once start of frame / slice is done, you can reset it */
+ /* it is the responsibility of the caller to set this flag */
+ ps_entropy->i4_sof = 0;
+ }
+
+ /* begin entropy coding for the mb set */
+ while (u4_mb_idx < u4_mb_end_idx)
+ {
+ /* init ptrs/indices */
+ if (ps_entropy->i4_mb_x == i4_wd_mbs)
+ {
+ ps_entropy->i4_mb_y ++;
+ ps_entropy->i4_mb_x = 0;
+
+ /* packed mb coeff data */
+ ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
+ ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
+
+ /* packed mb header data */
+ ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
+ ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
+
+ /* proc map */
+ pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+
+ /* entropy map */
+ pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+ }
+
+ DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
+ ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
+ ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
+
+ /* wait until the curr mb is core coded */
+ /* The wait for curr mb to be core coded is essential when entropy is launched
+ * as a separate job
+ */
+ while (1)
+ {
+ volatile UWORD8 *pu1_buf1;
+ WORD32 idx = ps_entropy->i4_mb_x;
+
+ pu1_buf1 = pu1_proc_map + idx;
+ if(*pu1_buf1)
+ break;
+ ithread_yield();
+ }
+
+ /* write mb layer */
+ ps_codec->pf_write_mb_syntax_layer[i4_slice_type](ps_entropy);
+
+ /* set entropy map */
+ pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
+
+ u4_mb_idx ++;
+ ps_entropy->i4_mb_x ++;
+
+ if (ps_entropy->i4_mb_x == i4_wd_mbs)
+ {
+ /* if slices are enabled */
+ if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
+ {
+ /* current slice index */
+ WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
+
+ /* slice map */
+ UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
+
+ /* No need to open a slice at end of frame. The current slice can be closed at the time
+ * of signaling eof flag.
+ */
+ if ( (u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx != pu1_slice_idx[u4_mb_idx]))
+ {
+ /* mb skip run */
+ if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
+ {
+ if (*ps_entropy->pi4_mb_skip_run)
+ {
+ PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
+ *ps_entropy->pi4_mb_skip_run = 0;
+ }
+ }
+
+ /* put rbsp trailing bits for the previous slice */
+ ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ /* update slice header pointer */
+ i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
+ ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
+ ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
+
+ /* populate slice header */
+ ps_entropy->i4_mb_start_add = u4_mb_idx;
+ ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
+
+ /* generate slice header */
+ ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
+ ps_pps, ps_sps);
+ }
+ }
+
+ /* Dont execute any further instructions until store synchronization took place */
+ DATA_SYNC();
+ }
+ }
+
+ /* check for eof */
+ if (u4_mb_idx == u4_mb_cnt)
+ {
+ /* set end of frame flag */
+ ps_entropy->i4_eof = 1;
+ }
+
+ if (ps_entropy->i4_eof)
+ {
+ /* mb skip run */
+ if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
+ {
+ if (*ps_entropy->pi4_mb_skip_run)
+ {
+ PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
+ *ps_entropy->pi4_mb_skip_run = 0;
+ }
+ }
+
+ /* put rbsp trailing bits */
+ ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ /* update current frame stats to rc library */
+ if (IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode)
+ {
+ /* number of bytes to stuff */
+ WORD32 i4_stuff_bytes;
+
+ /* update */
+ i4_stuff_bytes = ih264e_update_rc_post_enc(ps_codec, ctxt_sel, ps_proc->i4_pic_cnt);
+
+ /* cbr rc - house keeping */
+ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
+ {
+ ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
+ }
+ else if (i4_stuff_bytes)
+ {
+ /* add filler nal units */
+ ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
+ }
+ }
+
+ /********************************************************************/
+ /* signal the output */
+ /********************************************************************/
+ ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = ps_entropy->ps_bitstrm->u4_strm_buf_offset;
+
+ DEBUG("entropy status %x", ps_entropy->i4_error_code);
+ }
+
+ /* allow threads to dequeue entropy jobs */
+ ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
+
+ return ps_entropy->i4_error_code;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Packs header information of a mb in to a buffer
+*
+* @par Description:
+* After the deciding the mode info of a macroblock, the syntax elements
+* associated with the mb are packed and stored. The entropy thread unpacks
+* this buffer and generates the end bit stream.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
+{
+ /* curr mb type */
+ UWORD32 u4_mb_type = ps_proc->u4_mb_type;
+
+ /* pack mb syntax layer of curr mb (used for entropy coding) */
+ if (u4_mb_type == I4x4)
+ {
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ /* temp var */
+ WORD32 i4, byte;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
+
+ /* cbp */
+ *pu1_ptr++ = ps_proc->u4_cbp;
+
+ /* mb qp delta */
+ *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+ /* sub mb modes */
+ for (i4 = 0; i4 < 16; i4 ++)
+ {
+ byte = 0;
+
+ if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
+ ps_proc->au1_intra_luma_mb_4x4_modes[i4])
+ {
+ byte |= 1;
+ }
+ else
+ {
+
+ if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
+ ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
+ {
+ byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
+ }
+ else
+ {
+ byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
+ }
+ }
+
+ i4++;
+
+ if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
+ ps_proc->au1_intra_luma_mb_4x4_modes[i4])
+ {
+ byte |= 16;
+ }
+ else
+ {
+
+ if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
+ ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
+ {
+ byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
+ }
+ else
+ {
+ byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
+ }
+ }
+
+ *pu1_ptr++ = byte;
+ }
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = pu1_ptr;
+ }
+ else if (u4_mb_type == I16x16)
+ {
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
+
+ /* cbp */
+ *pu1_ptr++ = ps_proc->u4_cbp;
+
+ /* mb qp delta */
+ *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = pu1_ptr;
+ }
+ else if (u4_mb_type == P16x16)
+ {
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ WORD16 *i2_mv_ptr;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = u4_mb_type;
+
+ /* cbp */
+ *pu1_ptr++ = ps_proc->u4_cbp;
+
+ /* mb qp delta */
+ *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+ i2_mv_ptr = (WORD16 *)pu1_ptr;
+
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvx - ps_proc->ps_pred_mv->i2_mvx;
+
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvy - ps_proc->ps_pred_mv->i2_mvy;
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = i2_mv_ptr;
+ }
+ else if (u4_mb_type == PSKIP)
+ {
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = u4_mb_type;
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = pu1_ptr;
+ }
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief update process context after encoding an mb. This involves preserving
+* the current mb information for later use, initialize the proc ctxt elements to
+* encode next mb.
+*
+* @par Description:
+* This function performs house keeping tasks after encoding an mb.
+* After encoding an mb, various elements of the process context needs to be
+* updated to encode the next mb. For instance, the source, recon and reference
+* pointers, mb indices have to be adjusted to the next mb. The slice index of
+* the current mb needs to be updated. If mb qp modulation is enabled, then if
+* the qp changes the quant param structure needs to be updated. Also to encoding
+* the next mb, the current mb info is used as part of mode prediction or mv
+* prediction. Hence the current mb info has to preserved at top/top left/left
+* locations.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
+{
+ /* error status */
+ WORD32 error_status = IH264_SUCCESS;
+
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* curr mb indices */
+ WORD32 i4_mb_x = ps_proc->i4_mb_x;
+ WORD32 i4_mb_y = ps_proc->i4_mb_y;
+
+ /* mb syntax elements of neighbors */
+ mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele;
+ mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
+ mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
+
+ /* curr mb type */
+ UWORD32 u4_mb_type = ps_proc->u4_mb_type;
+
+ /* curr mb type */
+ UWORD32 u4_is_intra = ps_proc->u4_is_intra;
+
+ /* width in mbs */
+ WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ /*height in mbs*/
+ WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
+
+ /* proc map */
+ UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
+
+ /* deblk context */
+ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+ /* deblk bs context */
+ bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
+
+ /* top row motion vector info */
+ enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
+
+ /* top left mb motion vector */
+ enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
+
+ /* left mb motion vector */
+ enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
+
+ /* sub mb modes */
+ UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
+
+// /* zero mv */
+// mv_t zero_mv = {0, 0};
+
+ /* Pad the MB to support non standard sizes */
+ UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
+ UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
+
+ /*************************************************************/
+ /* During MV prediction, when top right mb is not available, */
+ /* top left mb info. is used for prediction. Hence the curr */
+ /* top, which will be top left for the next mb needs to be */
+ /* preserved before updating it with curr mb info. */
+ /*************************************************************/
+
+ /* mb type, mb class, csbp */
+ *ps_top_left_syn = *ps_top_syn;
+
+ if (ps_proc->i4_slice_type == PSLICE)
+ {
+ /*****************************************/
+ /* update top left with top info results */
+ /*****************************************/
+
+ /* mv */
+ *ps_top_left_mb_pu = *ps_top_row_pu;
+ }
+
+ /*************************************************/
+ /* update top and left with curr mb info results */
+ /*************************************************/
+
+ /* mb type */
+ ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
+
+ /* mb class */
+ ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
+
+ /* csbp */
+ ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
+
+ /* distortion */
+ ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
+
+ if (u4_is_intra)
+ {
+ /* mb / sub mb modes */
+ if (I16x16 == u4_mb_type)
+ {
+ pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
+ }
+ else if (I4x4 == u4_mb_type)
+ {
+ ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
+ ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
+ }
+ else if (I8x8 == u4_mb_type)
+ {
+ memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
+ memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
+ }
+
+ if (ps_proc->i4_slice_type == PSLICE)
+ {
+ /* mv */
+ *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
+
+// /* reset ngbr mv's */
+// ps_top_row_pu->i1_l0_ref_idx = -1;
+// ps_top_row_pu->s_l0_mv = zero_mv;
+//
+// *ps_left_mb_pu = *ps_top_row_pu;
+ }
+ }
+ else
+ {
+ /* mv */
+ *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
+ }
+
+ /*
+ * Mark that the MB has been coded intra
+ * So that future AIRs can skip it
+ */
+ ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
+
+ /**************************************************/
+ /* pack mb header info. for entropy coding */
+ /**************************************************/
+ ih264e_pack_header_data(ps_proc);
+
+ /* update previous mb qp */
+ ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
+
+ /* store qp */
+ ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
+
+ /*
+ * We need to sync the cache to make sure that the nmv content of proc
+ * is updated to cache properly
+ */
+ DATA_SYNC();
+
+ /* Just before finishing the row, enqueue the job in to entropy queue.
+ * The master thread depending on its convenience shall dequeue it and
+ * performs entropy.
+ *
+ * WARN !! Placing this block post proc map update can cause queuing of
+ * entropy jobs in out of order.
+ */
+ if (i4_mb_x == i4_wd_mbs - 1)
+ {
+ /* job structures */
+ job_t s_job;
+
+ /* job class */
+ s_job.i4_cmd = CMD_ENTROPY;
+
+ /* number of mbs to be processed in the current job */
+ s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
+
+ /* job start index x */
+ s_job.i2_mb_x = 0;
+
+ /* job start index y */
+ s_job.i2_mb_y = ps_proc->i4_mb_y;
+
+ /* proc base idx */
+ s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ;
+
+ /* queue the job */
+ error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
+
+ if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
+ ih264_list_terminate(ps_codec->pv_entropy_jobq);
+ }
+
+ /* update proc map */
+ pu1_proc_map[i4_mb_x] = 1;
+
+ /**************************************************/
+ /* update proc ctxt elements for encoding next mb */
+ /**************************************************/
+ /* update indices */
+ i4_mb_x ++;
+ ps_proc->i4_mb_x = i4_mb_x;
+
+ if (ps_proc->i4_mb_x == i4_wd_mbs)
+ {
+ ps_proc->i4_mb_y++;
+ ps_proc->i4_mb_x = 0;
+ }
+
+ /* update slice index */
+ ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
+
+ /* update buffers pointers */
+ ps_proc->pu1_src_buf_luma += MB_SIZE;
+ ps_proc->pu1_rec_buf_luma += MB_SIZE;
+ ps_proc->pu1_ref_buf_luma += MB_SIZE;
+
+ /*
+ * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+ * the stride per MB is MB_SIZE
+ */
+ ps_proc->pu1_src_buf_chroma += MB_SIZE;
+ ps_proc->pu1_rec_buf_chroma += MB_SIZE;
+ ps_proc->pu1_ref_buf_chroma += MB_SIZE;
+
+ /* pad right edge */
+ if (u4_pad_right_sz && (ps_proc->i4_mb_x == i4_wd_mbs - 1))
+ {
+ ih264_pad_right_luma(
+ ps_proc->pu1_src_buf_luma + MB_SIZE - u4_pad_right_sz,
+ ps_proc->i4_src_strd, MB_SIZE, u4_pad_right_sz);
+
+ ih264_pad_right_chroma(
+ ps_proc->pu1_src_buf_chroma + MB_SIZE - u4_pad_right_sz,
+ ps_proc->i4_src_strd, BLK8x8SIZE, u4_pad_right_sz);
+ }
+
+ /* pad bottom edge */
+ if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == i4_ht_mbs - 1) &&
+ ps_proc->i4_mb_x != 0)
+ {
+ ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
+ ps_proc->i4_src_strd, MB_SIZE, u4_pad_bottom_sz);
+
+ ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2,
+ ps_proc->i4_src_strd, MB_SIZE, (u4_pad_bottom_sz / 2));
+ }
+
+ /* Reset cost, distortion params */
+ ps_proc->i4_mb_cost = INT_MAX;
+ ps_proc->i4_mb_distortion = SHRT_MAX;
+
+ ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
+
+ ps_proc->pu4_mb_pu_cnt += 1;
+
+ /* deblk ctxts */
+ if (ps_proc->u4_disable_deblock_level != 1)
+ {
+ /* indices */
+ ps_bs->i4_mb_x = ps_proc->i4_mb_x;
+ ps_bs->i4_mb_y = ps_proc->i4_mb_y;
+
+#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
+ ps_deblk->i4_mb_x ++;
+
+ ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+ /*
+ * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+ * the stride per MB is MB_SIZE
+ */
+ ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+#endif
+ }
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief initialize process context.
+*
+* @par Description:
+* Before dispatching the current job to process thread, the process context
+* associated with the job is initialized. Usually every job aims to encode one
+* row of mb's. Basing on the row indices provided by the job, the process
+* context's buffer ptrs, slice indices and other elements that are necessary
+* during core-coding are initialized.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* nmb processing context*/
+ n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
+
+ /* indices */
+ WORD32 i4_mb_x, i4_mb_y;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* quant params */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* deblk ctxt */
+ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+ /* deblk bs context */
+ bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
+
+ /* Pointer to mv_buffer of current frame */
+ mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
+
+ /* Pointers for color space conversion */
+ UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
+
+ /* Pad the MB to support non standard sizes */
+ UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
+
+ /********************************************************************/
+ /* BEGIN INIT */
+ /********************************************************************/
+
+ i4_mb_x = ps_proc->i4_mb_x;
+ i4_mb_y = ps_proc->i4_mb_y;
+
+ /* Number of mbs processed in one loop of process function */
+ ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs;
+ ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs;
+
+ /* init buffer pointers */
+ ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
+ ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * BLK8x8SIZE);
+ ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
+ ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
+ ps_proc->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
+ ps_proc->pu1_ref_buf_chroma = ps_proc->pu1_ref_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
+
+ /*
+ * Do color space conversion
+ * NOTE : We assume there that the number of MB's to process will not span multiple rows
+ */
+ switch (ps_codec->s_cfg.e_inp_color_fmt)
+ {
+ case IV_YUV_420SP_UV:
+ case IV_YUV_420SP_VU:
+ break;
+
+ case IV_YUV_420P :
+ pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
+
+ pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
+
+ pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
+
+ ps_codec->pf_ih264e_conv_420p_to_420sp(
+ pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
+ ps_proc->pu1_src_buf_luma,
+ ps_proc->pu1_src_buf_chroma, MB_SIZE,
+ ps_proc->i4_wd_mbs * MB_SIZE,
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
+ ps_proc->i4_src_strd, ps_proc->i4_src_strd, 1);
+ break;
+
+ case IV_YUV_422ILE :
+ pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
+ + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
+
+ ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
+ ps_proc->pu1_src_buf_luma,
+ ps_proc->pu1_src_buf_chroma,
+ ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
+ ps_proc->i4_wd_mbs * MB_SIZE, MB_SIZE,
+ ps_proc->i4_src_strd, ps_proc->i4_src_strd,
+ ps_proc->i4_src_strd,
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
+ break;
+
+ default:
+ break;
+ }
+
+ /* pad bottom edge */
+ if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
+ {
+ ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
+ ps_proc->i4_src_strd, MB_SIZE, u4_pad_bottom_sz);
+
+ ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2,
+ ps_proc->i4_src_strd, MB_SIZE, (u4_pad_bottom_sz / 2));
+ }
+
+ /* packed mb coeff data */
+ ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
+
+ /* packed mb header data */
+ ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
+
+ /* slice index */
+ ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
+
+ /*********************************************************************/
+ /* ih264e_init_quant_params() routine is called at the pic init level*/
+ /* this would have initialized the qp. */
+ /* TODO_LATER: currently it is assumed that quant params donot change*/
+ /* across mb's. When they do calculate update ps_qp_params accordingly*/
+ /*********************************************************************/
+
+ /* init mv buffer ptr */
+ ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+
+ if (i4_mb_y == 0)
+ {
+ ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
+ }
+ else
+ {
+ ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+ }
+
+ ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
+
+ /* mb type */
+ ps_proc->u4_mb_type = I16x16;
+
+ /* lambda */
+ ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
+
+ /* mb distortion */
+ ps_proc->i4_mb_distortion = SHRT_MAX;
+
+ if (i4_mb_x == 0)
+ {
+ ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
+
+ ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
+
+ ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
+
+ if (i4_mb_y == 0)
+ {
+ memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
+ }
+ }
+
+ /* mb cost */
+ ps_proc->i4_mb_cost = INT_MAX;
+
+ /**********************/
+ /* init deblk context */
+ /**********************/
+ ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
+ /* deblk lags the current mb proc by 1 row */
+ /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
+ /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
+ /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
+ ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
+
+ /* buffer ptrs */
+ ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
+ ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
+
+ /* init deblk bs context */
+ /* mb indices */
+ ps_bs->i4_mb_x = ps_proc->i4_mb_x;
+ ps_bs->i4_mb_y = ps_proc->i4_mb_y;
+
+ /* init n_mb_process context */
+ ps_n_mb_ctxt->i4_mb_x = 0;
+ ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
+ ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma padding
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+* Pointer to luma buffer
+*
+* @param[in] pu1_curr_pic_chroma
+* Pointer to chroma buffer
+*
+* @param[in] i4_mb_x
+* mb index x
+*
+* @param[in] i4_mb_y
+* mb index y
+*
+* @param[in] i4_pad_ht
+* number of rows to be padded
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
+ UWORD8 *pu1_curr_pic_luma,
+ UWORD8 *pu1_curr_pic_chroma,
+ WORD32 i4_mb_x,
+ WORD32 i4_mb_y,
+ WORD32 i4_pad_ht)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* strides */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ if (i4_mb_x == 0)
+ {
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
+ }
+ else if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+ {
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
+
+ if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
+ {
+ UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
+ UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
+
+ /* padding bottom luma */
+ ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
+
+ /* padding bottom chroma */
+ ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
+ }
+ }
+
+ if (i4_mb_y == 0)
+ {
+ UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
+ UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
+ WORD32 wd = MB_SIZE;
+
+ if (i4_mb_x == 0)
+ {
+ pu1_rec_luma -= PAD_LEFT;
+ pu1_rec_chroma -= PAD_LEFT;
+
+ wd += PAD_LEFT;
+ }
+ else if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+ {
+ wd += PAD_RIGHT;
+ }
+
+ /* padding top luma */
+ ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
+
+ /* padding top chroma */
+ ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
+ }
+
+ return IH264E_SUCCESS;
+}
+
+
+
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking, padding and halfpel generation for
+* 'n' MBs
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+* Current MB being processed(Luma)
+*
+* @param[in] pu1_curr_pic_chroma
+* Current MB being processed(Chroma)
+*
+* @param[in] i4_mb_x
+* Column value of current MB processed
+*
+* @param[in] i4_mb_y
+* Curent row processed
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
+ UWORD8 *pu1_curr_pic_luma,
+ UWORD8 *pu1_curr_pic_chroma,
+ WORD32 i4_mb_x,
+ WORD32 i4_mb_y)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* n_mb processing context */
+ n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
+
+ /* deblk context */
+ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+ /* strides */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* loop variables */
+ WORD32 row, i, j, col;
+
+ /* Padding Width */
+ UWORD32 u4_pad_wd;
+
+ /* deblk_map of the row being deblocked */
+ UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
+
+ /* deblk_map_previous row */
+ UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
+
+ WORD32 u4_pad_top = 0;
+
+ WORD32 u4_deblk_prev_row = 0;
+
+ /* Number of mbs to be processed */
+ WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
+
+ /* Number of mbs actually processed
+ * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
+ WORD32 i4_n_mb_process_count = 0;
+
+ UWORD8 *pu1_pad_bottom_src = NULL;
+
+ UWORD8 *pu1_pad_src_luma = NULL;
+ UWORD8 *pu1_pad_src_chroma = NULL;
+
+ if (ps_proc->u4_disable_deblock_level == 1)
+ {
+ /* If left most MB is processed, then pad left */
+ if (i4_mb_x == 0)
+ {
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
+ }
+ /*last col*/
+ if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
+ {
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
+ }
+ }
+
+ if (i4_mb_y > 0)
+ {
+ /* if number of mb's to be processed are less than 'N', go back.
+ * exception to the above clause is end of row */
+ if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
+ {
+ return IH264E_SUCCESS;
+ }
+ else
+ {
+ i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
+
+ u4_deblk_prev_row = 1;
+
+ /* checking whether the top rows are deblocked */
+ for (col = 0; col < i4_n_mb_process_count; col++)
+ {
+ u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
+ }
+
+ /* checking whether the top right MB is deblocked */
+ if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
+ {
+ u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
+ }
+
+ /* performing deblocking for required number of MBs */
+ if (ps_proc->u4_disable_deblock_level != 1)
+ {
+ /* Top or Top right MBs not deblocked */
+ if (u4_deblk_prev_row != 1)
+ {
+ return IH264E_SUCCESS;
+ }
+
+ for (row = 0; row < i4_n_mb_process_count; row++)
+ {
+ ih264e_deblock_mb(ps_proc, ps_deblk);
+
+ pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
+
+ if (ps_deblk->i4_mb_y > 0)
+ {
+ if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
+ {
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
+ }
+
+ if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
+ {
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
+ }
+ }
+ ps_deblk->i4_mb_x++;
+
+ ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+ ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+
+ }
+ }
+ else
+ {
+ ps_deblk->i4_mb_x += i4_n_mb_process_count;
+
+ ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
+ ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
+ }
+
+ if (i4_mb_y == 2)
+ {
+ u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
+ u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
+
+ if (ps_n_mb_ctxt->i4_mb_x == 0)
+ {
+ u4_pad_wd += PAD_LEFT;
+ u4_pad_top = -PAD_LEFT;
+ }
+
+ if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+ {
+ u4_pad_wd += PAD_RIGHT;
+ }
+
+ /* padding top luma */
+ ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
+
+ /* padding top chroma */
+ ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
+ }
+
+ ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
+
+ if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+ {
+ if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
+ {
+ /* Bottom Padding is done in one stretch for the entire width */
+ if (ps_proc->u4_disable_deblock_level != 1)
+ {
+ ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
+
+ ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
+
+ ps_n_mb_ctxt->i4_mb_x = 0;
+ ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
+ ps_deblk->i4_mb_x = 0;
+ ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
+
+ /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
+ ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
+
+ i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
+
+ j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
+
+ for (i = 0; i < j; i++)
+ {
+ for (col = 0; col < i4_n_mbs; col++)
+ {
+ ih264e_deblock_mb(ps_proc, ps_deblk);
+
+ pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
+
+ ps_deblk->i4_mb_x++;
+ ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+ ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+ ps_n_mb_ctxt->i4_mb_x++;
+ }
+ }
+
+ for (col = 0; col < i4_n_mb_process_count; col++)
+ {
+ ih264e_deblock_mb(ps_proc, ps_deblk);
+
+ pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
+
+ ps_deblk->i4_mb_x++;
+ ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+ ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+ ps_n_mb_ctxt->i4_mb_x++;
+ }
+
+ pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
+
+ pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
+
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
+
+ pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
+ pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
+
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
+
+ pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
+
+ pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
+
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
+
+ pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
+ pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
+
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
+
+ }
+
+ /* padding bottom luma */
+ pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
+ ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
+
+ /* padding bottom chroma */
+ pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
+ ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
+ }
+ }
+ }
+ }
+
+ return IH264E_SUCCESS;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma core coding for a set of mb's.
+*
+* @par Description:
+* The mb to be coded is taken and is evaluated over a predefined set of modes
+* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
+* is selected and using intra/inter prediction filters, prediction is carried out.
+* The deviation between src and pred signal constitutes error signal. This error
+* signal is transformed (hierarchical transform if necessary) and quantized. The
+* quantized residue is packed in to entropy buffer for entropy coding. This is
+* repeated for all the mb's enlisted under the job.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process(process_ctxt_t *ps_proc)
+{
+ /* error status */
+ WORD32 error_status = IH264_SUCCESS;
+
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* cbp luma, chroma */
+ UWORD32 u4_cbp_l, u4_cbp_c;
+
+ /* width in mbs */
+ WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ /* loop var */
+ WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
+
+ /* valid modes */
+ UWORD32 u4_valid_modes = 0;
+
+ /* gate threshold */
+ WORD32 i4_gate_threshold = 0;
+
+ /* is intra */
+ WORD32 luma_idx, chroma_idx, is_intra;
+
+ /* temp variables */
+ WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+
+ /* list of modes for evaluation */
+ if (ps_proc->i4_slice_type == ISLICE)
+ {
+ /* enable intra 16x16 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+
+ /* enable intra 8x8 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
+
+ /* enable intra 4x4 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
+ }
+ else if (ps_proc->i4_slice_type == PSLICE)
+ {
+ /* enable intra 16x16 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+
+ /* enable intra 4x4 */
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+ {
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
+ }
+
+ /* enable inter 16x16 */
+ u4_valid_modes |= (1 << P16x16);
+ }
+
+
+ /* init entropy */
+ ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
+ ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
+ ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
+
+ /* compute recon when :
+ * 1. current frame is to be used as a reference
+ * 2. dump recon for bit stream sanity check
+ */
+ ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
+ ps_codec->s_cfg.u4_enable_recon;
+
+ /* Encode 'n' macroblocks,
+ * 'n' being the number of mbs dictated by current proc ctxt */
+ for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
+ {
+ /* since we have not yet found sad, we have not yet got min sad */
+ /* we need to initialize these variables for each MB */
+ /* TODO how to get the min sad into the codec */
+ ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
+ ps_proc->u4_min_sad_reached = 0;
+
+ /* mb analysis */
+ {
+ /* temp var */
+ WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
+
+ /* force intra refresh ? */
+ WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
+ (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) ||
+ (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
+
+ /* evaluate inter 16x16 modes */
+ if (u4_valid_modes & (1 << P16x16))
+ {
+ /* compute nmb me */
+ if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
+ {
+ ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
+ i4_wd_mbs - ps_proc->i4_mb_x));
+ }
+
+ /* set pointers to ME data appropriately for other modules to use */
+ {
+ UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
+
+ /* get the min sad condition for current mb */
+ ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
+ ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
+
+ ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_skip_mv);
+ ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
+ ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_pred_mv);
+
+ ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
+ ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
+ ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
+ ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
+ ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
+
+ /* get the best sub pel buffer */
+ ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
+ ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
+ }
+ ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
+ }
+ else
+ {
+ /* Derive neighbor availability for the current macroblock */
+ ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
+
+ ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
+ }
+
+ /*
+ * If air says intra, we need to force the following code path to evaluate intra
+ * The easy way is just to say that the inter cost is too much
+ */
+ if (!i4_air_enable_inter)
+ {
+ ps_proc->u4_min_sad_reached = 0;
+ ps_proc->i4_mb_cost = INT_MAX;
+ ps_proc->i4_mb_distortion = INT_MAX;
+ }
+ else if (ps_proc->u4_mb_type == PSKIP)
+ {
+ goto UPDATE_MB_INFO;
+ }
+
+ /* wait until the proc of [top + 1] mb is computed.
+ * We wait till the proc dependencies are satisfied */
+ if(ps_proc->i4_mb_y > 0)
+ {
+ /* proc map */
+ UWORD8 *pu1_proc_map_top;
+
+ pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
+
+ while (1)
+ {
+ volatile UWORD8 *pu1_buf;
+ WORD32 idx = i4_mb_idx + 1;
+
+ idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
+ pu1_buf = pu1_proc_map_top + idx;
+ if(*pu1_buf)
+ break;
+ ithread_yield();
+ }
+ }
+
+ /* If we already have the minimum sad, there is no point in searching for sad again */
+ if (ps_proc->u4_min_sad_reached == 0)
+ {
+ /* intra gating in inter slices */
+ /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
+ if (i4_air_enable_inter && ps_proc->i4_slice_type == PSLICE && ps_codec->u4_inter_gate)
+ {
+ /* distortion of neighboring blocks */
+ WORD32 i4_distortion[4];
+
+ i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
+
+ i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
+
+ i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
+
+ i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
+
+ i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
+
+ }
+
+ /* If we are going to force intra we need to evaluate intra irrespective of gating */
+ if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
+ {
+ /* evaluate intra 4x4 modes */
+ if (u4_valid_modes & (1 << I4x4))
+ {
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+ {
+ ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
+ }
+ else
+ {
+ ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
+ }
+ }
+
+ /* evaluate intra 16x16 modes */
+ if (u4_valid_modes & (1 << I16x16))
+ {
+ ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
+ }
+
+ /* evaluate intra 8x8 modes */
+ if (u4_valid_modes & (1 << I8x8))
+ {
+ ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
+ }
+ }
+
+ }
+ }
+
+ /* is intra */
+ if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
+ {
+ luma_idx = ps_proc->u4_mb_type;
+ chroma_idx = 0;
+ is_intra = 1;
+
+ /* evaluate chroma blocks for intra */
+ ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
+ }
+ else
+ {
+ luma_idx = 3;
+ chroma_idx = 1;
+ is_intra = 0;
+ }
+ ps_proc->u4_is_intra = is_intra;
+
+ /* redo MV pred of neighbors in the case intra mb */
+ /* TODO : currently called unconditionally, needs to be called only in the case of intra
+ * to modify neighbors */
+ if (ps_proc->i4_slice_type != ISLICE)
+ {
+ ih264e_mv_pred(ps_proc);
+ }
+
+ /* Perform luma mb core coding */
+ u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
+
+ /* Perform luma mb core coding */
+ u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
+
+ /* coded block pattern */
+ ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
+
+ /* mb skip */
+ if (is_intra == 0)
+ {
+ if (ps_proc->u4_cbp == 0)
+ {
+ /* get skip mv */
+ UWORD32 u4_for_me = 0;
+ ih264e_find_skip_motion_vector(ps_proc,u4_for_me);
+
+ /* skip ? */
+ if (ps_proc->ps_skip_mv->i2_mvx == ps_proc->ps_pu->s_l0_mv.i2_mvx &&
+ ps_proc->ps_skip_mv->i2_mvy == ps_proc->ps_pu->s_l0_mv.i2_mvy)
+ {
+ ps_proc->u4_mb_type = PSKIP;
+ }
+ }
+ }
+
+UPDATE_MB_INFO:
+
+ /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
+ ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
+
+ /**********************************************************************/
+ /* if disable deblock level is '0' this implies enable deblocking for */
+ /* all edges of all macroblocks with out any restrictions */
+ /* */
+ /* if disable deblock level is '1' this implies disable deblocking for*/
+ /* all edges of all macroblocks with out any restrictions */
+ /* */
+ /* if disable deblock level is '2' this implies enable deblocking for */
+ /* all edges of all macroblocks except edges overlapping with slice */
+ /* boundaries. This option is not currently supported by the encoder */
+ /* hence the slice map should be of no significance to perform debloc */
+ /* king */
+ /**********************************************************************/
+
+ if (ps_proc->u4_compute_recon)
+ {
+ /* deblk context */
+ /* src pointers */
+ UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
+ UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
+
+ /* src indices */
+ UWORD32 i4_mb_x = ps_proc->i4_mb_x;
+ UWORD32 i4_mb_y = ps_proc->i4_mb_y;
+
+ /* compute blocking strength */
+ if (ps_proc->u4_disable_deblock_level != 1)
+ {
+ ih264e_compute_bs(ps_proc);
+ }
+
+ /* nmb deblocking and hpel and padding */
+ ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
+ pu1_cur_pic_chroma, i4_mb_x,
+ i4_mb_y);
+ }
+
+ /* update the context after for coding next mb */
+ error_status |= ih264e_update_proc_ctxt(ps_proc);
+
+ /* Once the last row is processed, mark the buffer status appropriately */
+ if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
+ {
+ /* Pointer to current picture buffer structure */
+ pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
+
+ /* Pointer to current picture's mv buffer structure */
+ mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
+
+ /**********************************************************************/
+ /* if disable deblock level is '0' this implies enable deblocking for */
+ /* all edges of all macroblocks with out any restrictions */
+ /* */
+ /* if disable deblock level is '1' this implies disable deblocking for*/
+ /* all edges of all macroblocks with out any restrictions */
+ /* */
+ /* if disable deblock level is '2' this implies enable deblocking for */
+ /* all edges of all macroblocks except edges overlapping with slice */
+ /* boundaries. This option is not currently supported by the encoder */
+ /* hence the slice map should be of no significance to perform debloc */
+ /* king */
+ /**********************************************************************/
+ error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
+
+ error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
+
+ if (ps_codec->s_cfg.u4_enable_recon)
+ {
+ /* pic cnt */
+ ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
+
+ /* rec buffers */
+ ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic;
+
+ /* is last? */
+ ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
+
+ /* frame time stamp */
+ ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
+ ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
+ }
+
+ }
+ }
+
+ DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* function to receive frame qp and pic type before encoding
+*
+* @par Description:
+* Before encoding the frame, this function calls the rc library for frame qp
+* and picture type
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] pic_cnt
+* pic count
+*
+* @param[out] pi4_pic_type
+* pic type
+
+* @returns skip_src
+* if the source frame rate and target frame rate are not identical, the encoder
+* skips few source frames. skip_src is set when the source need not be encoded.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type)
+{
+ /* rate control context */
+ rate_control_ctxt_t *ps_rate_control = &ps_codec->s_rate_control;
+
+ /* frame qp */
+ UWORD8 u1_frame_qp;
+
+ /* pic type */
+ PIC_TYPE_T pic_type = PIC_NA;
+
+ /* should src be skipped */
+ WORD32 skip_src = 0;
+
+ /* temp var */
+ WORD32 delta_time_stamp = 1;
+
+ /* see if the app requires any specific frame */
+ if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
+ {
+ irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api);
+ }
+
+ /* call rate control lib to get curr pic type and qp to be used */
+ skip_src = ih264e_rc_pre_enc(ps_rate_control->pps_rate_control_api,
+ ps_rate_control->pps_pd_frm_rate,
+ ps_rate_control->pps_time_stamp,
+ ps_rate_control->pps_frame_time,
+ delta_time_stamp,
+ (ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs),
+ &ps_rate_control->e_pic_type,
+ &u1_frame_qp);
+
+ switch (ps_rate_control->e_pic_type)
+ {
+ case I_PIC:
+ pic_type = PIC_I;
+ break;
+
+ case P_PIC:
+ pic_type = PIC_P;
+ break;
+
+ case B_PIC:
+ pic_type = PIC_B;
+ break;
+
+ default:
+ break;
+ }
+
+ /* is idr? */
+ if ((0 == cur_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval) ||
+ ps_codec->force_curr_frame_type == IV_IDR_FRAME)
+ {
+ pic_type = PIC_IDR;
+ }
+
+ /* force frame tag is not sticky */
+ if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
+ {
+ ps_codec->force_curr_frame_type = IV_NA_FRAME;
+ }
+
+ /* qp */
+ ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp];
+
+ /* pic type */
+ *pi4_pic_type = pic_type;
+
+ return skip_src;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to update rc context after encoding
+*
+* @par Description
+* This function updates the rate control context after the frame is encoded.
+* Number of bits consumed by the current frame, frame distortion, frame cost,
+* number of intra/inter mb's, ... are passed on to rate control context for
+* updating the rc model.
+*
+* @param[in] ps_codec
+* Handle to codec context
+*
+* @param[in] ctxt_sel
+* frame context selector
+*
+* @param[in] pic_cnt
+* pic count
+*
+* @returns i4_stuffing_byte
+* number of stuffing bytes (if necessary)
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt)
+{
+ /* proc set base idx */
+ WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
+
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
+
+ /* frame qp */
+ UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
+
+ /* cbr rc return status */
+ WORD32 i4_stuffing_byte = 0;
+
+ /* current frame stats */
+ frame_info_t s_frame_info;
+ picture_type_e rc_pic_type;
+
+ /* temp var */
+ WORD32 i, j;
+
+ /********************************************************************/
+ /* BEGIN INIT */
+ /********************************************************************/
+
+ /* init frame info */
+ irc_init_frame_info(&s_frame_info);
+
+ /* get frame info */
+ for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
+ {
+ /*****************************************************************/
+ /* One frame can be encoded by max of u4_num_cores threads */
+ /* Accumulating the num mbs, sad, qp and intra_mb_cost from */
+ /* u4_num_cores threads */
+ /*****************************************************************/
+ for (j = 0; j< MAX_MB_TYPE; j++)
+ {
+ s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
+
+ s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
+
+ s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
+ }
+
+ s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
+
+ s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
+
+ /*****************************************************************/
+ /* gather number of residue and header bits consumed by the frame*/
+ /*****************************************************************/
+ ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
+ }
+
+ /* get pic type */
+ switch (ps_codec->pic_type)
+ {
+ case PIC_I:
+ case PIC_IDR:
+ rc_pic_type = I_PIC;
+ break;
+ case PIC_P:
+ rc_pic_type = P_PIC;
+ break;
+ case PIC_B:
+ rc_pic_type = B_PIC;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* update rc lib with current frame stats */
+ i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
+ &(s_frame_info),
+ ps_codec->s_rate_control.pps_pd_frm_rate,
+ ps_codec->s_rate_control.pps_time_stamp,
+ ps_codec->s_rate_control.pps_frame_time,
+ (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
+ &rc_pic_type,
+ pic_cnt,
+ &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
+ u1_frame_qp,
+ &ps_codec->s_rate_control.num_intra_in_prev_frame,
+ &ps_codec->s_rate_control.i4_avg_activity);
+
+ /* in case the frame needs to be skipped, the frame num should not be incremented */
+ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
+ {
+ ps_codec->i4_frame_num --;
+ }
+
+ return i4_stuffing_byte;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* entry point of a spawned encoder thread
+*
+* @par Description:
+* The encoder thread dequeues a proc/entropy job from the encoder queue and
+* calls necessary routines.
+*
+* @param[in] pv_proc
+* Process context corresponding to the thread
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process_thread(void *pv_proc)
+{
+ /* error status */
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ WORD32 error_status = IH264_SUCCESS;
+
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = pv_proc;
+
+ /* codec ctxt */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* structure to represent a processing job entry */
+ job_t s_job;
+
+ /* blocking call : entropy dequeue is non-blocking till all
+ * the proc jobs are processed */
+ WORD32 is_blocking = 0;
+
+ /* set affinity */
+ ithread_set_affinity(ps_proc->i4_id);
+
+ while(1)
+ {
+ /* dequeue a job from the entropy queue */
+ {
+ int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
+
+ /* codec context selector */
+ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+ volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
+
+ /* have the lock */
+ if (error == 0)
+ {
+ if (*pu4_buf == 0)
+ {
+ /* no entropy threads are active, try dequeuing a job from the entropy queue */
+ ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
+ if (IH264_SUCCESS == ret)
+ {
+ *pu4_buf = 1;
+ ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
+ goto WORKER;
+ }
+ else if(is_blocking)
+ {
+ ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
+ break;
+ }
+ }
+ ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
+ }
+ }
+
+ /* dequeue a job from the process queue */
+ ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
+ if (IH264_SUCCESS != ret)
+ {
+ if(ps_proc->i4_id)
+ break;
+ else
+ {
+ is_blocking = 1;
+ continue;
+ }
+ }
+
+WORKER:
+ /* choose appropriate proc context based on proc_base_idx */
+ ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
+
+ switch (s_job.i4_cmd)
+ {
+ case CMD_PROCESS:
+ ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
+ ps_proc->i4_mb_x = s_job.i2_mb_x;
+ ps_proc->i4_mb_y = s_job.i2_mb_y;
+
+ /* init process context */
+ ih264e_init_proc_ctxt(ps_proc);
+
+ /* core code all mbs enlisted under the current job */
+ error_status |= ih264e_process(ps_proc);
+ break;
+
+ case CMD_ENTROPY:
+ ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
+ ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
+ ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
+
+ /* init entropy */
+ ih264e_init_entropy_ctxt(ps_proc);
+
+ /* entropy code all mbs enlisted under the current job */
+ error_status |= ih264e_entropy(ps_proc);
+ break;
+
+ default:
+ error_status |= IH264_FAIL;
+ break;
+ }
+ }
+
+ /* send error code */
+ ps_proc->i4_error_code = error_status;
+ return ret;
+}
diff --git a/encoder/ih264e_process.h b/encoder/ih264e_process.h
new file mode 100755
index 0000000..9715434
--- /dev/null
+++ b/encoder/ih264e_process.h
@@ -0,0 +1,364 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_process.h
+*
+* @brief
+* Contains functions for codec thread
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_PROCESS_H_
+#define IH264E_PROCESS_H_
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function generates sps, pps set on request
+*
+* @par Description
+* When the encoder is set in header generation mode, the following function
+* is called. This generates sps and pps headers and returns the control back
+* to caller.
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_generate_sps_pps
+ (
+ codec_t *ps_codec
+ );
+
+/**
+*******************************************************************************
+*
+* @brief initialize entropy context.
+*
+* @par Description:
+* Before invoking the call to perform to entropy coding the entropy context
+* associated with the job needs to be initialized. This involves the start
+* mb address, end mb address, slice index and the pointer to location at
+* which the mb residue info and mb header info are packed.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief entry point for entropy coding
+*
+* @par Description
+* This function calls lower level functions to perform entropy coding for a
+* group (n rows) of mb's. After encoding 1 row of mb's, the function takes
+* back the control, updates the ctxt and calls lower level functions again.
+* This process is repeated till all the rows or group of mb's (which ever is
+* minimum) are coded
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns error status
+*
+* @remarks
+* NOTE : It is assumed that this routine is invoked at the start of a slice,
+* so the slice header is generated by default.
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief Packs header information of a mb in to a buffer
+*
+* @par Description:
+* After the deciding the mode info of a macroblock, the syntax elements
+* associated with the mb are packed and stored. The entropy thread unpacks
+* this buffer and generates the end bit stream.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pack_header_data
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief update process context after encoding an mb. This involves preserving
+* the current mb information for later use, initialize the proc ctxt elements to
+* encode next mb.
+*
+* @par Description:
+* This function performs house keeping tasks after encoding an mb.
+* After encoding an mb, various elements of the process context needs to be
+* updated to encode the next mb. For instance, the source, recon and reference
+* pointers, mb indices have to be adjusted to the next mb. The slice index of
+* the current mb needs to be updated. If mb qp modulation is enabled, then if
+* the qp changes the quant param structure needs to be updated. Also to encoding
+* the next mb, the current mb info is used as part of mode prediction or mv
+* prediction. Hence the current mb info has to preserved at top/top left/left
+* locations.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_proc_ctxt
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief initialize process context.
+*
+* @par Description:
+* Before dispatching the current job to process thread, the process context
+* associated with the job is initialized. Usually every job aims to encode one
+* row of mb's. Basing on the row indices provided by the job, the process
+* context's buffer ptrs, slice indices and other elements that are necessary
+* during core-coding are initialized.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma padding
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+* Pointer to luma buffer
+*
+* @param[in] pu1_curr_pic_chroma
+* Pointer to chroma buffer
+*
+* @param[in] i4_mb_x
+* mb index x
+*
+* @param[in] i4_mb_y
+* mb index y
+*
+* @param[in] i4_pad_ht
+* number of rows to be padded
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pad_recon_buffer
+ (
+ process_ctxt_t *ps_proc,
+ UWORD8 *pu1_curr_pic_luma,
+ UWORD8 *pu1_curr_pic_chroma,
+ WORD32 i4_mb_x,
+ WORD32 i4_mb_y,
+ WORD32 i4_pad_ht
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma half pel planes generation
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_halfpel_generation
+ (
+ process_ctxt_t *ps_proc,
+ UWORD8 *pu1_curr_pic_luma,
+ WORD32 i4_mb_x,
+ WORD32 i4_mb_y
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma core coding for a set of mb's.
+*
+* @par Description:
+* The mb to be coded is taken and is evaluated over a predefined set of modes
+* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
+* is selected and using intra/inter prediction filters, prediction is carried out.
+* The deviation between src and pred signal constitutes error signal. This error
+* signal is transformed (hierarchical transform if necessary) and quantized. The
+* quantized residue is packed in to entropy buffer for entropy coding. This is
+* repeated for all the mb's enlisted under the job.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief
+* function to receive frame qp and pic type before encoding
+*
+* @par Description:
+* Before encoding the frame, this function calls the rc library for frame qp
+* and picture type
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] pic_cnt
+* pic count
+*
+* @param[out] pi4_pic_type
+* pic type
+
+* @returns skip_src
+* if the source frame rate and target frame rate are not identical, the encoder
+* skips few source frames. skip_src is set when the source need not be encoded.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type);
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to update rc context after encoding
+*
+* @par Description
+* This function updates the rate control context after the frame is encoded.
+* Number of bits consumed by the current frame, frame distortion, frame cost,
+* number of intra/inter mb's, ... are passed on to rate control context for
+* updating the rc model.
+*
+* @param[in] ps_codec
+* Handle to codec context
+*
+* @param[in] ctxt_sel
+* frame context selector
+*
+* @param[in] pic_cnt
+* pic count
+*
+* @returns i4_stuffing_byte
+* number of stuffing bytes (if necessary)
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt);
+
+/**
+*******************************************************************************
+*
+* @brief
+* entry point of a spawned encoder thread
+*
+* @par Description:
+* The encoder thread dequeues a proc/entropy job from the encoder queue and
+* calls necessary routines.
+*
+* @param[in] pv_proc
+* Process context corresponding to the thread
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process_thread(void *pv_proc);
+
+#endif /* IH264E_PROCESS_H_ */
diff --git a/encoder/ih264e_rate_control.c b/encoder/ih264e_rate_control.c
new file mode 100755
index 0000000..1e2fe4f
--- /dev/null
+++ b/encoder/ih264e_rate_control.c
@@ -0,0 +1,801 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_rate_control.c
+*
+* @brief
+* Contains api function definitions for h264 rate control
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_rc_init()
+* - ih264e_rc_get_picture_details()
+* - ih264e_rc_pre_enc()
+* - ih264e_update_rc_mb_info()
+* - ih264e_rc_get_buffer_status()
+* - ih264e_rc_post_enc()
+* - ih264e_update_rc_bits_info()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_common_tables.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "irc_trace_support.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief This function does nothing
+*
+* @par Description
+* This function does nothing
+*
+* @param[in] variadic function
+
+* @returns none
+*
+* @remarks This function is used by the rc library for debugging purposes.
+* However this function was not part of rc library. So this is defined here
+* to resolve link issues.
+*
+*******************************************************************************
+*/
+int trace_printf(const WORD8 *format, ...)
+{
+ UNUSED(format);
+ return(0);
+};
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function initializes rate control context and variables
+*
+* @par Description
+* This function initializes rate control type, source and target frame rate,
+* average and peak bitrate, intra-inter frame interval and initial
+* quantization parameter
+*
+* @param[in] pv_rc_api
+* Handle to rate control api
+*
+* @param[in] pv_frame_time
+* Handle to frame time context
+*
+* @param[in] pv_time_stamp
+* Handle to time stamp context
+*
+* @param[in] pv_pd_frm_rate
+* Handle to pull down frame time context
+*
+* @param[in] u4_max_frm_rate
+* Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+* Target frame rate
+*
+* @param[in] e_rate_control_type
+* Rate control type
+*
+* @param[in] u4_avg_bit_rate
+* Average bit rate
+*
+* @param[in] u4_peak_bit_rate
+* Peak bit rate
+*
+* @param[in] u4_max_delay
+* Maximum delay between frames
+*
+* @param[in] u4_intra_frame_interval
+* Intra frame interval
+*
+* @param[in] pu1_init_qp
+* Initial qp
+*
+* @param[in] i4_max_inter_frm_int
+* Maximum inter frame interval
+*
+* @param[in] pu1_min_max_qp
+* Array of min/max qp
+*
+* @param[in] u1_profile_level
+* Encoder profile level
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_init(void *pv_rc_api,
+ void *pv_frame_time,
+ void *pv_time_stamp,
+ void *pv_pd_frm_rate,
+ UWORD32 u4_max_frm_rate,
+ UWORD32 u4_src_frm_rate,
+ UWORD32 u4_tgt_frm_rate,
+ rc_type_e e_rate_control_type,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 u4_peak_bit_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD8 *pu1_init_qp,
+ WORD32 i4_max_inter_frm_int,
+ UWORD8 *pu1_min_max_qp,
+ UWORD8 u1_profile_level)
+{
+// UWORD8 u1_is_mb_level_rc_on = 0;
+ UWORD32 au4_peak_bit_rate[2] = {0,0};
+ UWORD32 u4_min_bit_rate = 0;
+ WORD32 i4_is_gop_closed = 0;
+// WORD32 i4_use_est_intra_sad = 1;
+ UWORD32 u4_src_ticks = 0;
+ UWORD32 u4_tgt_ticks = 0;
+ UWORD8 u1_level_idx = ih264e_get_lvl_idx(u1_profile_level);
+ UWORD32 u4_max_cpb_size = 1200 * gas_ih264_lvl_tbl[u1_level_idx].u4_max_cpb_size;
+
+ /* Fill the params needed for the RC init */
+ if (e_rate_control_type == CBR_NLDRC)
+ {
+ au4_peak_bit_rate[0] = u4_avg_bit_rate;
+ au4_peak_bit_rate[1] = u4_avg_bit_rate;
+ }
+ else
+ {
+ au4_peak_bit_rate[0] = u4_peak_bit_rate;
+ au4_peak_bit_rate[1] = u4_peak_bit_rate;
+ }
+
+ /* Initialize frame time computation module*/
+ ih264e_init_frame_time(pv_frame_time,
+ u4_src_frm_rate, /* u4_src_frm_rate */
+ u4_tgt_frm_rate); /* u4_tgt_frm_rate */
+
+ /* Initialize the pull_down frame rate */
+ ih264e_init_pd_frm_rate(pv_pd_frm_rate,
+ u4_src_frm_rate); /* u4_input_frm_rate */
+
+ /* Initialize time stamp structure */
+ ih264e_init_time_stamp(pv_time_stamp,
+ u4_max_frm_rate, /* u4_max_frm_rate */
+ u4_src_frm_rate); /* u4_src_frm_rate */
+
+ u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time);
+ u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time);
+
+ /* Initialize the rate control */
+ irc_initialise_rate_control(pv_rc_api, /* RC handle */
+ e_rate_control_type, /* RC algo type */
+ 0, /* MB activity on/off */
+ u4_avg_bit_rate, /* Avg Bitrate */
+ au4_peak_bit_rate, /* Peak bitrate array[2]:[I][P] */
+ u4_min_bit_rate, /* Min Bitrate */
+ u4_src_frm_rate, /* Src frame_rate */
+ u4_max_delay, /* Max buffer delay */
+ u4_intra_frame_interval, /* Intra frm_interval */
+ pu1_init_qp, /* Init QP array[3]:[I][P][B] */
+ u4_max_cpb_size, /* Max VBV/CPB Buffer Size */
+ i4_max_inter_frm_int, /* Max inter frm_interval */
+ i4_is_gop_closed, /* Open/Closed GOP */
+ pu1_min_max_qp, /* Min-max QP array[6]:[Imax][Imin][Pmax][Pmin][Bmax][Bmin] */
+ 0, /* How to calc the I-frame estimated_sad */
+ u4_src_ticks, /* Src_ticks = LCM(src_frm_rate,tgt_frm_rate)/src_frm_rate */
+ u4_tgt_ticks); /* Tgt_ticks = LCM(src_frm_rate,tgt_frm_rate)/tgt_frm_rate */
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get picture details
+*
+* @par Description
+* This function returns the Picture type(I/P/B)
+*
+* @param[in] pv_rc_api
+* Handle to Rate control api
+*
+* @returns
+* Picture type
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api)
+{
+ WORD32 i4_pic_id = 0;
+ WORD32 i4_pic_disp_order_no = 0;
+ picture_type_e e_rc_pic_type = P_PIC;
+
+ irc_get_picture_details(pv_rc_api, &i4_pic_id, &i4_pic_disp_order_no,
+ &e_rc_pic_type);
+
+ return (e_rc_pic_type);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control output before encoding
+*
+* @par Description
+* This function is called before encoding the current frame and gets the qp
+* for the current frame from rate control module
+*
+* @param[in] ps_rate_control_api
+* Handle to rate control api
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frm rate context
+*
+* @param[in] ps_time_stamp
+* Handle to time stamp context
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] i4_delta_time_stamp
+* Time stamp difference between frames
+*
+* @param[in] i4_total_mb_in_frame
+* Total Macro Blocks in frame
+*
+* @param[in/out] pe_vop_coding_type
+* Picture coding type(I/P/B)
+*
+* @param[in/out] pu1_frame_qp
+* QP for current frame
+*
+* @returns
+* Skip or encode the current frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api,
+ void * ps_pd_frm_rate,
+ void * ps_time_stamp,
+ void * ps_frame_time,
+ WORD32 i4_delta_time_stamp,
+ WORD32 i4_total_mb_in_frame,
+ picture_type_e *pe_vop_coding_type,
+ UWORD8 *pu1_frame_qp)
+{
+ WORD8 i4_skip_src = 0, i4_num_app_skips = 0;
+ UWORD32 u4_src_not_skipped_for_dts = 0;
+
+ /* Variables for the update_frm_level_info */
+ WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE];
+ WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0};
+ WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0};
+ WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0};
+ WORD32 i4_total_frame_bits = 0;
+ WORD32 i4_total_hdr_bits = 0;
+ WORD32 i4_avg_mb_activity = 0;
+ WORD32 i4_intra_frm_cost = 0;
+ UWORD8 u1_is_scd = 0;
+
+ /* Set all the MBs to Intra */
+ ai4_tot_mb_in_type[0] = i4_total_mb_in_frame;
+ ai4_tot_mb_in_type[1] = 0;
+
+ /* If delta time stamp is greater than 1, do rcupdate that many times */
+ for (i4_num_app_skips = 0; (i4_num_app_skips < i4_delta_time_stamp - 1); i4_num_app_skips++)
+ {
+ /*update the missing frames frm_rate with 0 */
+ ih264e_update_pd_frm_rate(ps_pd_frm_rate,0);
+
+ /* Update the time stamp */
+ ih264e_update_time_stamp(ps_time_stamp);
+
+ /* Do a pre encode skip update */
+
+ irc_update_frame_level_info(ps_rate_control_api,
+ (*pe_vop_coding_type),
+ ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */
+ i4_total_frame_bits, /* Total frame bits actually consumed */
+ i4_total_hdr_bits, /*header bits for model updation*/
+ ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
+ ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */
+ ai4_tot_mb_in_type, /* total number of mbs in each mb type */
+ i4_avg_mb_activity, /* Average mb activity in frame */
+ u1_is_scd, /* Is a scene change detected at the current frame */
+ 1, /* If it's a pre-encode skip */
+ i4_intra_frm_cost, /* Sum of Intra cost for each frame */
+ 0); /* Is pic handling [irc_update_pic_handling_state] done before update */
+ }
+
+ /* Update the time stamp for the current frame */
+ ih264e_update_time_stamp(ps_time_stamp);
+
+ /* Check if a src not needs to be skipped */
+ i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time,
+ i4_delta_time_stamp,
+ &u4_src_not_skipped_for_dts);
+
+ /***********************************************************************
+ Based on difference in source and target frame rate frames are skipped
+ ***********************************************************************/
+ if (i4_skip_src)
+ {
+ /*update the missing frames frm_rate with 0 */
+ ih264e_update_pd_frm_rate(ps_pd_frm_rate,0);
+
+ /* Do a pre encode skip update */
+ irc_update_frame_level_info(ps_rate_control_api,
+ (*pe_vop_coding_type),
+ ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */
+ i4_total_frame_bits, /* Total frame bits actually consumed */
+ i4_total_hdr_bits, /*header bits for model updation*/
+ ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
+ ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */
+ ai4_tot_mb_in_type, /* total number of mbs in each mb type */
+ i4_avg_mb_activity, /* Average mb activity in frame */
+ u1_is_scd, /* Is a scene change detected at the current frame */
+ 1, /* If it's a pre-encode skip */
+ i4_intra_frm_cost, /* Sum of Intra cost for each frame */
+ 0); /* Is pic handling [irc_update_pic_handling_state] done before update */
+
+ /* Set the current frame type to NA */
+ *pe_vop_coding_type = BUF_PIC;
+ }
+ else
+ {
+#define MAX_FRAME_BITS 0x7FFFFFFF
+// WORD32 i4_pic_id;
+// WORD32 i4_pic_disp_order_no;
+ WORD32 i4_avg_frm_rate, i4_source_frame_rate;
+
+ i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time);
+
+ /* Update the frame rate of the frame present with the tgt_frm_rate */
+ /* If the frm was not skipped due to delta_time_stamp, update the
+ frame_rate with double the tgt_frame_rate value, so that it makes
+ up for one of the frames skipped by the application */
+ ih264e_update_pd_frm_rate(ps_pd_frm_rate,
+ i4_source_frame_rate);
+
+ /* Based on the update get the average frame rate */
+ i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate);
+
+ /* Call the RC library function to change the frame_rate to the
+ actually achieved frm_rate */
+ irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate);
+
+ /* --------Rate control related things. Get pic type and frame Qp---------*/
+ /* Add picture to the stack. For IPP encoder we push the variable
+ into the stack and get back the variables by requesting RC.
+ This interface is designed for IPB encoder */
+ irc_add_picture_to_stack(ps_rate_control_api, 1);
+
+ /* Query the picture_type */
+ *pe_vop_coding_type = ih264e_rc_get_picture_details(ps_rate_control_api);
+
+ /* Get current frame Qp */
+ pu1_frame_qp[0] = (UWORD8)irc_get_frame_level_qp(ps_rate_control_api,
+ (picture_type_e)(pe_vop_coding_type[0]),
+ MAX_FRAME_BITS);
+ }
+
+ return(i4_skip_src);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update mb info for rate control context
+*
+* @par Description
+* After encoding a mb, information such as mb type, qp used, mb distortion
+* resulted in encoding the block and so on needs to be preserved for modeling
+* RC. This is preserved via this function call.
+*
+* @param[in] ps_frame_info
+* Handle Frame info context
+*
+* @param[in] ps_proc
+* Process context
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc)
+{
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = pv_proc;
+
+ /* is intra or inter */
+ WORD32 mb_type = !ps_proc->u4_is_intra;
+
+ /* distortion */
+ ps_frame_info->tot_mb_sad[mb_type] += ps_proc->i4_mb_distortion;
+
+ /* qp */
+ ps_frame_info->qp_sum[mb_type] += gau1_h264_to_mpeg2_qmap[ps_proc->u4_mb_qp];
+
+ /* mb cnt */
+ ps_frame_info->num_mbs[mb_type]++;
+
+ /* cost */
+ if (ps_proc->u4_is_intra)
+ {
+ ps_frame_info->intra_mb_cost_sum += ps_proc->i4_mb_cost;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control buffer status
+*
+* @par Description
+* This function is used to get buffer status(underflow/overflow) by rate
+* control module
+*
+* @param[in] pv_rc_api
+* Handle to rate control api context
+*
+* @param[in] i4_total_frame_bits
+* Total frame bits
+*
+* @param[in] u1_pic_type
+* Picture type
+*
+* @param[in] pi4_num_bits_to_prevent_vbv_underflow
+* Number of bits to prevent underflow
+*
+* @param[out] pu1_is_enc_buf_overflow
+* Buffer overflow indication flag
+*
+* @param[out] pu1_is_enc_buf_underflow
+* Buffer underflow indication flag
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_get_buffer_status(void *pv_rc_api,
+ WORD32 i4_total_frame_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
+ UWORD8 *pu1_is_enc_buf_overflow,
+ UWORD8 *pu1_is_enc_buf_underflow)
+{
+ vbv_buf_status_e e_vbv_buf_status = VBV_NORMAL;
+
+ e_vbv_buf_status = irc_get_buffer_status(pv_rc_api,
+ i4_total_frame_bits,
+ e_pic_type,
+ pi4_num_bits_to_prevent_vbv_underflow);
+
+ if (e_vbv_buf_status == VBV_OVERFLOW)
+ {
+ *pu1_is_enc_buf_underflow = 1;
+ *pu1_is_enc_buf_overflow = 0;
+ }
+ else if (e_vbv_buf_status == VBV_UNDERFLOW)
+ {
+ *pu1_is_enc_buf_underflow = 0;
+ *pu1_is_enc_buf_overflow = 1;
+ }
+ else
+ {
+ *pu1_is_enc_buf_underflow = 0;
+ *pu1_is_enc_buf_overflow = 0;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update rate control module after encoding
+*
+* @par Description
+* This function is used to update the rate control module after the current
+* frame encoding is done with details such as bits consumed, SAD for I/P/B,
+* intra cost ,mb type and other
+*
+* @param[in] ps_rate_control_api
+* Handle to rate control api context
+*
+* @param[in] ps_frame_info
+* Handle to frame info context
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frame rate context
+*
+* @param[in] ps_time_stamp
+* Handle to time stamp context
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] i4_total_mb_in_frame
+* Total mb in frame
+*
+* @param[in] pe_vop_coding_type
+* Picture coding type
+*
+* @param[in] i4_is_first_frame
+* Is first frame
+*
+* @param[in] pi4_is_post_encode_skip
+* Post encoding skip flag
+*
+* @param[in] u1_frame_qp
+* Frame qp
+*
+* @param[in] pi4_num_intra_in_prev_frame
+* Numberf of intra mbs in previous frame
+*
+* @param[in] pi4_avg_activity
+* Average activity
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_post_enc(void * ps_rate_control_api,
+ frame_info_t *ps_frame_info,
+ void * ps_pd_frm_rate,
+ void * ps_time_stamp,
+ void * ps_frame_time,
+ WORD32 i4_total_mb_in_frame,
+ picture_type_e *pe_vop_coding_type,
+ WORD32 i4_is_first_frame,
+ WORD32 *pi4_is_post_encode_skip,
+ UWORD8 u1_frame_qp,
+ WORD32 *pi4_num_intra_in_prev_frame,
+ WORD32 *pi4_avg_activity)
+{
+ /* Variables for the update_frm_level_info */
+ WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE];
+ WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0};
+ WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0};
+ WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0};
+ WORD32 i4_total_frame_bits = 0;
+ WORD32 i4_total_hdr_bits = 0;
+ WORD32 i4_total_texturebits;
+ WORD32 i4_avg_mb_activity = 0;
+ WORD32 i4_intra_frm_cost = 0;
+ UWORD8 u1_is_scd = 0;
+ WORD32 i4_cbr_bits_to_stuff = 0;
+ UWORD32 u4_num_intra_in_prev_frame = *pi4_num_intra_in_prev_frame;
+ UNUSED(ps_pd_frm_rate);
+ UNUSED(ps_time_stamp);
+ UNUSED(ps_frame_time);
+ UNUSED(u1_frame_qp);
+ /* Accumulate RC stats */
+ ai4_tot_mb_in_type[MB_TYPE_INTRA] = irc_fi_get_total_mb(ps_frame_info,MB_TYPE_INTRA);
+ ai4_tot_mb_in_type[MB_TYPE_INTER] = irc_fi_get_total_mb(ps_frame_info,MB_TYPE_INTER);
+ /* ai4_tot_mb_type_qp[MB_TYPE_INTRA] = 0;
+ ai4_tot_mb_type_qp[MB_TYPE_INTER] = ps_enc->pu1_h264_mpg2quant[u1_frame_qp] * i4_total_mb_in_frame;*/
+ ai4_tot_mb_type_qp[MB_TYPE_INTRA] = irc_fi_get_total_mb_qp(ps_frame_info,MB_TYPE_INTRA);
+ ai4_tot_mb_type_qp[MB_TYPE_INTER] = irc_fi_get_total_mb_qp(ps_frame_info,MB_TYPE_INTER);
+ ai4_mb_type_sad[MB_TYPE_INTRA] = irc_fi_get_total_mb_sad(ps_frame_info,MB_TYPE_INTRA);
+ ai4_mb_type_sad[MB_TYPE_INTER] = irc_fi_get_total_mb_sad(ps_frame_info,MB_TYPE_INTER);
+ i4_intra_frm_cost = irc_fi_get_total_intra_mb_cost(ps_frame_info);
+ i4_avg_mb_activity = irc_fi_get_avg_activity(ps_frame_info);
+ i4_total_hdr_bits = irc_fi_get_total_header_bits(ps_frame_info);
+ i4_total_texturebits = irc_fi_get_total_mb_texture_bits(ps_frame_info,MB_TYPE_INTRA);
+ i4_total_texturebits += irc_fi_get_total_mb_texture_bits(ps_frame_info,MB_TYPE_INTER);
+ i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits ;
+
+ *pi4_avg_activity = i4_avg_mb_activity;
+
+
+ /* Texture bits are not accumulated. Hence subtracting hdr bits from total bits */
+ ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0;
+ ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits;
+
+ /* Set post encode skip to zero */
+ pi4_is_post_encode_skip[0]= 0;
+
+ /* For NLDRC, get the buffer status for stuffing or skipping */
+ if (irc_get_rc_type(ps_rate_control_api) == CBR_NLDRC)
+ {
+ WORD32 i4_get_num_bit_to_prevent_vbv_overflow;
+ UWORD8 u1_enc_buf_overflow,u1_enc_buf_underflow;
+
+ /* Getting the buffer status */
+ ih264e_rc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
+ pe_vop_coding_type[0], &i4_get_num_bit_to_prevent_vbv_overflow,
+ &u1_enc_buf_overflow,&u1_enc_buf_underflow);
+
+ /* We skip the frame if decoder buffer is underflowing. But we never skip first I frame */
+ // if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1))
+ if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0))
+ {
+ irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e)pe_vop_coding_type[0]);
+ // i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc);
+ i4_total_frame_bits = 0;
+
+ *pi4_is_post_encode_skip = 1;
+
+ /* Adjust the GOP if in case we skipped an I-frame */
+ if (*pe_vop_coding_type == I_PIC)
+ irc_force_I_frame(ps_rate_control_api);
+
+ /* Since this frame is skipped by writing 7 bytes header, we say this is a P frame */
+ // *pe_vop_coding_type = P;
+
+ /* Getting the buffer status again,to check if it underflows */
+ irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
+ (picture_type_e)pe_vop_coding_type[0], &i4_get_num_bit_to_prevent_vbv_overflow);
+
+ }
+
+ /* In this case we stuff bytes as buffer is overflowing */
+ if (u1_enc_buf_underflow == 1)
+ {
+ /* The stuffing function is directly pulled out from split controller workspace.
+ encode_vop_data() function makes sure alignment data is dumped at the end of a
+ frame. Split controller was identifying this alignment byte, overwriting it with
+ the stuff data and then finally aligning the buffer. Here every thing is inside
+ the DSP. So, ideally encode_vop_data needn't align, and we can start stuffing directly.
+ But in that case, it'll break the logic for a normal frame.
+ Hence for simplicity, not changing this part since it is ok to align and
+ then overwrite since stuffing is not done for every frame */
+ i4_cbr_bits_to_stuff = irc_get_bits_to_stuff(ps_rate_control_api, i4_total_frame_bits, pe_vop_coding_type[0]);
+
+ /* Just add extra 32 bits to make sure we don't stuff lesser */
+ i4_cbr_bits_to_stuff += 32;
+
+ /* We can not stuff more than the outbuf size. So have a check here */
+ /* Add stuffed bits to total bits */
+ i4_total_frame_bits += i4_cbr_bits_to_stuff;
+ }
+ }
+
+#define ENABLE_SCD 1
+#if ENABLE_SCD
+ /* If number of intra MBs are more than 2/3rd of total MBs, assume it as a scene change */
+ if ((ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((2 * i4_total_mb_in_frame) / 3)) &&
+ (*pe_vop_coding_type == P_PIC) &&
+ (ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((11 * (WORD32)u4_num_intra_in_prev_frame) / 10)))
+ {
+ u1_is_scd = 1;
+ }
+#endif
+
+ /* Update num intra mbs of this frame */
+ if (pi4_is_post_encode_skip[0] == 0)
+ {
+ *pi4_num_intra_in_prev_frame = ai4_tot_mb_in_type[MB_TYPE_INTRA];
+ }
+
+ /* Reset intra count to zero, if u encounter an I frame */
+ if (*pe_vop_coding_type == I_PIC)
+ {
+ *pi4_num_intra_in_prev_frame = 0;
+ }
+
+ /* Do an update of rate control after post encode */
+ irc_update_frame_level_info(ps_rate_control_api, /* RC state */
+ pe_vop_coding_type[0], /* PIC type */
+ ai4_mb_type_sad, /* SAD for [Intra/Inter] */
+ i4_total_frame_bits, /* Total frame bits */
+ i4_total_hdr_bits, /* header bits for */
+ ai4_mb_type_tex_bits, /* for MB[Intra/Inter] */
+ ai4_tot_mb_type_qp, /* for MB[Intra/Inter] */
+ ai4_tot_mb_in_type, /* for MB[Intra/Inter] */
+ i4_avg_mb_activity, /* Average mb activity in frame */
+ u1_is_scd, /* Is a scene change detected */
+ 0, /* Pre encode skip */
+ (WORD32)i4_intra_frm_cost, /* Intra cost for frame */
+ 0); /* Not done outside */
+
+ return (i4_cbr_bits_to_stuff >> 3);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update bits consumed info to rate control context
+*
+* @par Description
+* Function to update bits consume info to rate control context
+*
+* @param[in] ps_frame_info
+* Frame info context
+*
+* @param[in] ps_entropy
+* Entropy context
+*
+* @returns
+* total bits consumed by the frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy)
+{
+ entropy_ctxt_t *ps_entropy = pv_entropy;
+
+ ps_frame_info->mb_header_bits[MB_TYPE_INTRA] += ps_entropy->u4_header_bits[MB_TYPE_INTRA];
+
+ ps_frame_info->mb_texture_bits[MB_TYPE_INTRA] += ps_entropy->u4_residue_bits[MB_TYPE_INTRA];
+
+ ps_frame_info->mb_header_bits[MB_TYPE_INTER] += ps_entropy->u4_header_bits[MB_TYPE_INTER];
+
+ ps_frame_info->mb_texture_bits[MB_TYPE_INTER] += ps_entropy->u4_residue_bits[MB_TYPE_INTER];
+
+ return;
+}
+
diff --git a/encoder/ih264e_rate_control.h b/encoder/ih264e_rate_control.h
new file mode 100755
index 0000000..de9466a
--- /dev/null
+++ b/encoder/ih264e_rate_control.h
@@ -0,0 +1,351 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_rate_control.h
+*
+* @brief
+* This file contains function declarations of api functions for h264 rate
+* control
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_RATE_CONTROL_H_
+#define IH264E_RATE_CONTROL_H_
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function initializes rate control context and variables
+*
+* @par Description
+* This function initializes rate control type, source and target frame rate,
+* average and peak bitrate, intra-inter frame interval and initial
+* quantization parameter
+*
+* @param[in] pv_rc_api
+* Handle to rate control api
+*
+* @param[in] pv_frame_time
+* Handle to frame time context
+*
+* @param[in] pv_time_stamp
+* Handle to time stamp context
+*
+* @param[in] pv_pd_frm_rate
+* Handle to pull down frame time context
+*
+* @param[in] u4_max_frm_rate
+* Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+* Target frame rate
+*
+* @param[in] e_rate_control_type
+* Rate control type
+*
+* @param[in] u4_avg_bit_rate
+* Average bit rate
+*
+* @param[in] u4_peak_bit_rate
+* Peak bit rate
+*
+* @param[in] u4_max_delay
+* Maximum delay between frames
+*
+* @param[in] u4_intra_frame_interval
+* Intra frame interval
+*
+* @param[in] pu1_init_qp
+* Initial qp
+*
+* @param[in] i4_max_inter_frm_int
+* Maximum inter frame interval
+*
+* @param[in] pu1_min_max_qp
+* Array of min/max qp
+*
+* @param[in] u1_profile_level
+* Encoder profile level
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_init(void *pv_rc_api,
+ void *pv_frame_time,
+ void *pv_time_stamp,
+ void *pv_pd_frm_rate,
+ UWORD32 u4_max_frm_rate,
+ UWORD32 u4_src_frm_rate,
+ UWORD32 u4_tgt_frm_rate,
+ rc_type_e e_rate_control_type,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 u4_peak_bit_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD8 *pu1_init_qp,
+ WORD32 i4_max_inter_frm_int,
+ UWORD8 *pu1_min_max_qp,
+ UWORD8 u1_profile_level);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get picture details
+*
+* @par Description
+* This function returns the Picture type(I/P/B)
+*
+* @param[in] pv_rc_api
+* Handle to Rate control api
+*
+* @returns
+* Picture type
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api);
+
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control output before encoding
+*
+* @par Description
+* This function is called before encoding the current frame and gets the qp
+* for the current frame from rate control module
+*
+* @param[in] ps_rate_control_api
+* Handle to rate control api
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frm rate context
+*
+* @param[in] ps_time_stamp
+* Handle to time stamp context
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] i4_delta_time_stamp
+* Time stamp difference between frames
+*
+* @param[in] i4_total_mb_in_frame
+* Total Macro Blocks in frame
+*
+* @param[in/out] pe_vop_coding_type
+* Picture coding type(I/P/B)
+*
+* @param[in/out] pu1_frame_qp
+* QP for current frame
+*
+* @returns
+* Skip or encode the current frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api,
+ void * ps_pd_frm_rate,
+ void * ps_time_stamp,
+ void * ps_frame_time,
+ WORD32 i4_delta_time_stamp,
+ WORD32 i4_total_mb_in_frame,
+ picture_type_e *pe_vop_coding_type,
+ UWORD8 *pu1_frame_qp);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update mb info for rate control context
+*
+* @par Description
+* After encoding a mb, information such as mb type, qp used, mb distortion
+* resulted in encoding the block and so on needs to be preserved for modelling
+* RC. This is preserved via this function call.
+*
+* @param[in] ps_frame_info
+* Handle Frame info context
+*
+* @param[in] ps_proc
+* Process context
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control buffer status
+*
+* @par Description
+* This function is used to get buffer status(underflow/overflow) by rate
+* control module
+*
+* @param[in] pv_rc_api
+* Handle to rate control api context
+*
+* @param[in] i4_total_frame_bits
+* Total frame bits
+*
+* @param[in] u1_pic_type
+* Picture type
+*
+* @param[in] pi4_num_bits_to_prevent_vbv_underflow
+* Number of bits to prevent underflow
+*
+* @param[out] pu1_is_enc_buf_overflow
+* Buffer overflow indication flag
+*
+* @param[out] pu1_is_enc_buf_underflow
+* Buffer underflow indication flag
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_get_buffer_status(void *pv_rc_api,
+ WORD32 i4_total_frame_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
+ UWORD8 *pu1_is_enc_buf_overflow,
+ UWORD8 *pu1_is_enc_buf_underflow);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update rate control module after encoding
+*
+* @par Description
+* This function is used to update the rate control module after the current
+* frame encoding is done with details such as bits consumed, SAD for I/P/B,
+* intra cost ,mb type and other
+*
+* @param[in] ps_rate_control_api
+* Handle to rate control api context
+*
+* @param[in] ps_frame_info
+* Handle to frame info context
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frame rate context
+*
+* @param[in] ps_time_stamp
+* Handle to time stamp context
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] i4_total_mb_in_frame
+* Total mb in frame
+*
+* @param[in] pe_vop_coding_type
+* Picture coding type
+*
+* @param[in] i4_is_first_frame
+* Is first frame
+*
+* @param[in] pi4_is_post_encode_skip
+* Post encoding skip flag
+*
+* @param[in] u1_frame_qp
+* Frame qp
+*
+* @param[in] pi4_num_intra_in_prev_frame
+* Number of intra mbs in previous frame
+*
+* @param[in] pi4_avg_activity
+* Average activity
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_post_enc(void *ps_rate_control_api,
+ frame_info_t *ps_frame_info,
+ void *ps_pd_frm_rate,
+ void *ps_time_stamp,
+ void *ps_frame_time,
+ WORD32 i4_total_mb_in_frame,
+ picture_type_e *pe_vop_coding_type,
+ WORD32 i4_is_first_frame,
+ WORD32 *pi4_is_post_encode_skip,
+ UWORD8 u1_frame_qp,
+ WORD32 *pi4_num_intra_in_prev_frame,
+ WORD32 *pi4_avg_activity);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update bits consumed info to rate control context
+*
+* @par Description
+* Function to update bits consume info to rate control context
+*
+* @param[in] ps_frame_info
+* Frame info context
+*
+* @param[in] ps_entropy
+* Entropy context
+*
+* @returns
+* total bits consumed by the frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy);
+
+#endif /* IH264E_RATE_CONTROL_H */
+
diff --git a/encoder/ih264e_rc_mem_interface.c b/encoder/ih264e_rc_mem_interface.c
new file mode 100755
index 0000000..e4d5781
--- /dev/null
+++ b/encoder/ih264e_rc_mem_interface.c
@@ -0,0 +1,395 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_rc_mem_interface.c
+*
+* @brief
+* This file contains api function definitions for rate control memtabs
+*
+* @author
+* ittiam
+*
+* List of Functions
+* - fill_memtab()
+* - use_or_fill_base()
+* - ih264e_map_rc_mem_recs_to_itt_api()
+* - ih264e_map_itt_mem_rec_to_rc_mem_rec()
+* - ih264e_get_rate_control_mem_tab()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User Include Files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_size_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264e.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_common_tables.h"
+#include "ih264_list.h"
+#include "ih264e_error.h"
+#include "ih264e_defs.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_master.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264e_utils.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_config.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264e_statistics.h"
+#include "ih264e_error.h"
+#include "ih264e_utils.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "irc_common.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_mb_model_based.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_vbr_str_prms.h"
+#include "irc_rate_control_api.h"
+#include "irc_rate_control_api_structs.h"
+#include "ih264e_modify_frm_rate.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] u4_size
+* size of the record
+*
+* @param[in] i4_alignment
+* memory alignment size
+*
+* @param[in] e_usage
+* usage
+*
+* @param[in] e_mem_region
+* mem region
+*
+* @return void
+*
+******************************************************************************
+*/
+void fill_memtab(itt_memtab_t *ps_mem_tab,
+ WORD32 u4_size,
+ WORD32 i4_alignment,
+ ITT_MEM_USAGE_TYPE_E e_usage,
+ ITT_MEM_REGION_E e_mem_region)
+{
+ /* Make the size next multiple of alignment */
+ WORD32 i4_aligned_size = (((u4_size) + (i4_alignment-1)) & (~(i4_alignment-1)));
+
+ /* Fill the memtab */
+ ps_mem_tab->u4_size = i4_aligned_size;
+ ps_mem_tab->i4_alignment = i4_alignment;
+ ps_mem_tab->e_usage = e_usage;
+ ps_mem_tab->e_mem_region = e_mem_region;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] ptr_to_be_filled
+* handle to the memory record storage space
+*
+* @param[in] e_func_type
+* enum that dictates fill memory records or use memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab,
+ void **ptr_to_be_filled,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ /* Fill base for freeing the allocated memory */
+ if (e_func_type == FILL_BASE)
+ {
+ if (ptr_to_be_filled[0] != 0)
+ {
+ ps_mem_tab->pv_base = ptr_to_be_filled[0];
+ return (0);
+ }
+ else
+ {
+ return (-1);
+ }
+ }
+ /* obtain the allocated memory from base pointer */
+ if (e_func_type == USE_BASE)
+ {
+ if (ps_mem_tab->pv_base != 0)
+ {
+ ptr_to_be_filled[0] = ps_mem_tab->pv_base;
+ return (0);
+ }
+ else
+ {
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+/**
+******************************************************************************
+*
+* @brief This function maps rc mem records structure to encoder lib mem records
+* structure
+*
+* @par Description
+* This function maps rc mem records structure to encoder lib mem records
+* structure
+*
+* @param[in] ps_mem
+* pointer to encoder lib mem records
+*
+* @param[in] rc_memtab
+* pointer to rc mem records
+*
+* @param[in] num_mem_recs
+* number of memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+void ih264e_map_rc_mem_recs_to_itt_api(iv_mem_rec_t *ps_mem,
+ itt_memtab_t *rc_memtab,
+ UWORD32 num_mem_recs)
+{
+ UWORD32 j;
+ UWORD32 Size, align;
+
+ for (j = 0; j < num_mem_recs; j++)
+ {
+ Size = rc_memtab->u4_size;
+ align = rc_memtab->i4_alignment;
+
+ /* we always ask for external persistent cacheable memory */
+ FILL_MEMTAB(ps_mem, j, Size, align, IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM);
+
+ rc_memtab++;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function maps encoder lib mem records structure to RC memory
+* records structure
+*
+* @par Description
+* This function maps encoder lib mem records structure to RC memory
+* records structure
+*
+* @param[in] ps_mem
+* pointer to encoder lib mem records
+*
+* @param[in] rc_memtab
+* pointer to rc mem records
+*
+* @param[in] num_mem_recs
+* Number of memory records
+
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_map_itt_mem_rec_to_rc_mem_rec(iv_mem_rec_t *ps_mem,
+ itt_memtab_t *rc_memtab,
+ UWORD32 num_mem_recs)
+{
+ UWORD32 i;
+
+ for (i = 0; i < num_mem_recs; i++)
+ {
+ rc_memtab->i4_alignment = ps_mem->u4_mem_alignment;
+ rc_memtab->u4_size = ps_mem->u4_mem_size;
+ rc_memtab->pv_base = ps_mem->pv_base;
+
+ /* only DDR memory is available */
+ rc_memtab->e_mem_region = DDR;
+ rc_memtab->e_usage = PERSISTENT;
+
+ rc_memtab++;
+ ps_mem++;
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief Get memtabs for rate control
+*
+* @par Description
+* This routine is used to Get/init memtabs for rate control
+*
+* @param[in] pv_rate_control
+* pointer to rate control context (handle)
+*
+* @param[in] ps_mem
+* pointer to encoder lib mem records
+*
+* @param[in] e_func_type
+* enum that dictates fill memory records or Init memory records
+*
+* @return total number of mem records
+*
+******************************************************************************
+*/
+WORD32 ih264e_get_rate_control_mem_tab(void *pv_rate_control,
+ iv_mem_rec_t *ps_mem,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ static itt_memtab_t as_itt_memtab[NUM_RC_MEMTABS];
+ WORD32 i4_num_memtab = 0, j = 0;
+ void *refptr2[4];
+ void **refptr1[4];
+ rate_control_ctxt_t *ps_rate_control = pv_rate_control;
+
+ for (j = 0; j < 4; j++)
+ refptr1[j] = &(refptr2[j]);
+
+ j = 0;
+
+ if (e_func_type == USE_BASE || e_func_type == FILL_BASE)
+ {
+ refptr1[1] = &ps_rate_control->pps_frame_time;
+ refptr1[2] = &ps_rate_control->pps_time_stamp;
+ refptr1[3] = &ps_rate_control->pps_pd_frm_rate;
+ refptr1[0] = &ps_rate_control->pps_rate_control_api;
+ }
+
+ /* Get the total number of memtabs used by Rate Controller */
+ i4_num_memtab = irc_rate_control_num_fill_use_free_memtab((rate_control_api_t **)refptr1[0], NULL, GET_NUM_MEMTAB);
+ /* Few extra steps during init */
+ ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ /* Fill the memtabs used by Rate Controller */
+ i4_num_memtab = irc_rate_control_num_fill_use_free_memtab((rate_control_api_t **)refptr1[0],as_itt_memtab+j,e_func_type);
+ /* Mapping ittiam memtabs to App. memtabs */
+ ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ j += i4_num_memtab;
+
+ /* Get the total number of memtabs used by Frame time Module */
+ i4_num_memtab = ih264e_frame_time_get_init_free_memtab((frame_time_t **)refptr1[1], NULL, GET_NUM_MEMTAB);
+ /* Few extra steps during init */
+ ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ /* Fill the memtabs used by Frame time Module */
+ i4_num_memtab = ih264e_frame_time_get_init_free_memtab((frame_time_t **)refptr1[1], as_itt_memtab+j, e_func_type);
+ /* Mapping ittiam memtabs to App. memtabs */
+ ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ j += i4_num_memtab;
+
+ /* Get the total number of memtabs used by Time stamp Module */
+ i4_num_memtab = ih264e_time_stamp_get_init_free_memtab((time_stamp_t **)refptr1[2], NULL, GET_NUM_MEMTAB);
+ /* Few extra steps during init */
+ ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ /* Fill the memtabs used by Time Stamp Module */
+ i4_num_memtab = ih264e_time_stamp_get_init_free_memtab((time_stamp_t **)refptr1[2], as_itt_memtab+j, e_func_type);
+ /* Mapping ittiam memtabs to App. memtabs */
+ ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ j += i4_num_memtab;
+
+ /* Get the total number of memtabs used by Frame rate Module */
+ i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab((pd_frm_rate_t **)refptr1[3], NULL, GET_NUM_MEMTAB);
+ /* Few extra steps during init */
+ ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ /* Fill the memtabs used by Frame Rate Module */
+ i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab((pd_frm_rate_t **)refptr1[3], as_itt_memtab+j, e_func_type);
+ /* Mapping ittiam memtabs to App. memtabs */
+ ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ j += i4_num_memtab;
+
+ return j; /* Total MemTabs Needed by Rate Control Module */
+}
diff --git a/encoder/ih264e_rc_mem_interface.h b/encoder/ih264e_rc_mem_interface.h
new file mode 100755
index 0000000..a2946a7
--- /dev/null
+++ b/encoder/ih264e_rc_mem_interface.h
@@ -0,0 +1,179 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_rc_mem_interface.h
+*
+* @brief
+* This file contains function declaration and structures for rate control
+* memtabs
+*
+* @author
+* ittiam
+*
+* @remarks
+* The rate control library is a global library across various codecs. It
+* anticipates certain structures definitions. Those definitions are to be
+* imported from global workspace. Instead of that, the structures needed for
+* rc library are copied in to this file and exported to rc library. If the
+* structures / enums / ... in the global workspace change, this file also needs
+* to be modified accordingly.
+*
+******************************************************************************
+*/
+#ifndef IH264E_RC_MEM_INTERFACE_H_
+#define IH264E_RC_MEM_INTERFACE_H_
+
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+#define FILL_MEMTAB(m_pv_mem_rec, m_j, m_mem_size, m_align, m_type) \
+{ \
+ m_pv_mem_rec[m_j].u4_size = sizeof(iv_mem_rec_t); \
+ m_pv_mem_rec[m_j].u4_mem_size = m_mem_size; \
+ m_pv_mem_rec[m_j].u4_mem_alignment = m_align; \
+ m_pv_mem_rec[m_j].e_mem_type = m_type; \
+}
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ ALIGN_BYTE = 1,
+ ALIGN_WORD16 = 2,
+ ALIGN_WORD32 = 4,
+ ALIGN_WORD64 = 8,
+ ALIGN_128_BYTE = 128
+}ITT_MEM_ALIGNMENT_TYPE_E;
+
+typedef enum
+{
+ SCRATCH = 0,
+ PERSISTENT = 1,
+ WRITEONCE = 2
+}ITT_MEM_USAGE_TYPE_E;
+
+typedef enum
+{
+ L1D = 0,
+ SL2 = 1,
+ DDR = 3
+}ITT_MEM_REGION_E;
+
+typedef enum
+{
+ GET_NUM_MEMTAB = 0,
+ FILL_MEMTAB = 1,
+ USE_BASE = 2,
+ FILL_BASE =3
+}ITT_FUNC_TYPE_E;
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/*NOTE : This should be an exact replica of IALG_MemRec, any change in IALG_MemRec
+ must be replicated here*/
+typedef struct
+{
+ /* Size in bytes */
+ UWORD32 u4_size;
+
+ /* Alignment in bytes */
+ WORD32 i4_alignment;
+
+ /* decides which memory region to be placed */
+ ITT_MEM_REGION_E e_mem_region;
+
+ /* memory is scratch or persistent */
+ ITT_MEM_USAGE_TYPE_E e_usage;
+
+ /* Base pointer for allocated memory */
+ void *pv_base;
+} itt_memtab_t;
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] u4_size
+* size of the record
+*
+* @param[in] i4_alignment
+* memory alignment size
+*
+* @param[in] e_usage
+* usage
+*
+* @param[in] e_mem_region
+* mem region
+*
+* @return void
+*
+******************************************************************************
+*/
+void fill_memtab(itt_memtab_t *ps_mem_tab, WORD32 u4_size, WORD32 i4_alignment,
+ ITT_MEM_USAGE_TYPE_E e_usage, ITT_MEM_REGION_E e_mem_region);
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] ptr_to_be_filled
+* handle to the memory record storage space
+*
+* @param[in] e_func_type
+* enum that dictates fill memory records or use memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab, void **ptr_to_be_filled,
+ ITT_FUNC_TYPE_E e_func_type);
+
+
+#endif // IH264E_RC_MEM_INTERFACE_H_
+
diff --git a/encoder/ih264e_statistics.h b/encoder/ih264e_statistics.h
new file mode 100755
index 0000000..0ab33ca
--- /dev/null
+++ b/encoder/ih264e_statistics.h
@@ -0,0 +1,141 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_statistics.h
+*
+* @brief
+* Contains macros for generating stats about h264 encoder
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_STATISTICS_H_
+#define IH264E_STATISTICS_H_
+
+#if CAVLC_LEVEL_STATS
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief In cavlc encoding, a lut is used for encoding levels. It is not possible
+ * to use look up for all possible levels. The extent to which look up is generated
+ * is based on the statistics that were collected in the following global variables.
+ *
+ * gu4_cavlc_level_bin_lt_4 represents the number coefficients with abs(level) < 4
+ * gu4_cavlc_level_bin_lt_16 represents the number coefficients with 4 < abs(level) < 16
+ * gu4_cavlc_level_bin_lt_32 represents the number coefficients with 16 < abs(level) < 32
+ * and so on ...
+ * ******************************************************************************
+ */
+extern UWORD32 gu4_cavlc_level_bin_lt_4;
+extern UWORD32 gu4_cavlc_level_bin_lt_16;
+extern UWORD32 gu4_cavlc_level_bin_lt_32;
+extern UWORD32 gu4_cavlc_level_bin_lt_64;
+extern UWORD32 gu4_cavlc_level_bin_lt_128;
+extern UWORD32 gu4_cavlc_level_bin_else_where;
+extern UWORD32 gu4_cavlc_level_lut_hit_rate;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief print cavlc stats
+******************************************************************************
+*/
+void print_cavlc_level_stats(void);
+
+#define GATHER_CAVLC_STATS1() \
+ if (u4_abs_level < 4)\
+ gu4_cavlc_level_bin_lt_4 ++; \
+ else if (u4_abs_level < 16) \
+ gu4_cavlc_level_bin_lt_16 ++; \
+ else if (u4_abs_level < 32) \
+ gu4_cavlc_level_bin_lt_32 ++; \
+ else if (u4_abs_level < 64) \
+ gu4_cavlc_level_bin_lt_64 ++; \
+ else if (u4_abs_level < 128) \
+ gu4_cavlc_level_bin_lt_128 ++; \
+ else \
+ gu4_cavlc_level_bin_else_where ++;
+
+#define GATHER_CAVLC_STATS2() \
+ gu4_cavlc_level_lut_hit_rate ++;
+
+#else
+
+#define GATHER_CAVLC_STATS1()
+
+#define GATHER_CAVLC_STATS2()
+
+#endif
+
+
+#if GATING_STATS
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief During encoding at fastest preset, some times if the inter threshold
+* is lesser than the predefined threshold, intra analysis is not done. The
+* below variable keeps track of the number of mb for which intra analysis is not
+* done
+* ******************************************************************************
+*/
+extern UWORD32 gu4_mb_gated_cnt;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief print gating stats
+******************************************************************************
+*/
+void print_gating_stats(void);
+
+#define GATHER_GATING_STATS() \
+ gu4_mb_gated_cnt ++;
+
+#else
+
+#define GATHER_GATING_STATS()
+
+#endif
+
+
+#endif /* IH264E_STATISTICS_H_ */
diff --git a/encoder/ih264e_structs.h b/encoder/ih264e_structs.h
new file mode 100755
index 0000000..1043a53
--- /dev/null
+++ b/encoder/ih264e_structs.h
@@ -0,0 +1,2566 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_structs.h
+*
+* @brief
+* Structure definitions used in the encoder
+*
+* @author
+* Harish
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_STRUCTS_H_
+#define IH264E_STRUCTS_H_
+
+/*****************************************************************************/
+/* Extern Function type definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief intra prediction filters leaf level
+******************************************************************************
+ */
+typedef void (*pf_intra_pred)(UWORD8 *pu1_src, UWORD8 *pu1_dst,
+ WORD32 src_strd, WORD32 dst_strd,
+ WORD32 ui_neighboravailability);
+
+/**
+******************************************************************************
+ * @brief inter prediction filters leaf level
+******************************************************************************
+ */
+
+typedef void (*pf_inter_pred_luma_bilinear)(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst,
+ WORD32 src_strd1, WORD32 src_strd2, WORD32 dst_strd,
+ WORD32 height, WORD32 width);
+
+/**
+******************************************************************************
+ * @brief fwd transform leaf level
+******************************************************************************
+ */
+typedef void (*pf_trans_quant)(UWORD8*pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out,
+ WORD32 i4_src_stride, UWORD32 u4_pred_stride, UWORD32 u4_dst_stride,
+ const UWORD16 *pu2_scale_mat, const UWORD16 *pu2_thresh_mat,
+ UWORD32 u4_qbit, UWORD32 u4_round_fact, UWORD8 *pu1_nnz);
+
+typedef void (*pf_iquant_itrans)(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out,
+ WORD32 i4_src_stride, UWORD32 u4_pred_stride, UWORD32 u4_out_stride,
+ const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div, WORD32 *pi4_tmp);
+
+/**
+******************************************************************************
+ * @brief Padding leaf level
+******************************************************************************
+ */
+typedef void (*pf_pad)(UWORD8 *pu1_src, WORD32 src_strd, WORD32 wd, WORD32 pad_size);
+
+/**
+******************************************************************************
+ * @brief memory handling leaf level
+******************************************************************************
+ */
+typedef void (*pf_memcpy)(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
+
+typedef void (*pf_memset)(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
+
+typedef void (*pf_memcpy_mul8)(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
+
+typedef void (*pf_memset_mul8)(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
+
+/**
+******************************************************************************
+ * @brief Sad computation
+******************************************************************************
+ */
+typedef void (*pf_compute_sad)(UWORD8 *pu1_src, UWORD8 *pu1_est,
+ UWORD32 src_strd, UWORD32 est_strd,
+ WORD32 i4_max_sad, WORD32 *pi4_mb_distortion);
+
+/**
+******************************************************************************
+ * @brief Intra mode eval:encoder level
+******************************************************************************
+ */
+typedef void (*pf_evaluate_intra_modes)(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst,
+ UWORD32 src_strd, UWORD32 dst_strd,
+ WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes);
+
+typedef void (*pf_evaluate_intra_4x4_modes)(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
+ UWORD32 src_strd, UWORD32 dst_strd,
+ WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda,
+ UWORD32 u4_predictd_mode);
+
+/**
+******************************************************************************
+ * @brief half_pel generation :encoder level
+******************************************************************************
+ */
+typedef void (*pf_sixtapfilter_horz)(UWORD8 *pu1_src, UWORD8 *pu1_dst,
+ WORD32 src_strd, WORD32 dst_strd);
+
+typedef void (*pf_sixtap_filter_2dvh_vert)(UWORD8 *pu1_src, UWORD8 *pu1_dst1, UWORD8 *pu1_dst2,
+ WORD32 src_strd, WORD32 dst_strd,
+ WORD32 *pi16_pred1,
+ WORD32 pi16_pred1_strd);
+/**
+******************************************************************************
+ * @brief color space conversion
+******************************************************************************
+ */
+typedef void (*pf_fmt_conv_420p_to_420sp)(UWORD8 *pu1_y_src, UWORD8 *pu1_u_src, UWORD8 *pu1_v_src,
+ UWORD8 *pu1_y_dst, UWORD8 *pu1_uv_dst,
+ UWORD16 u2_height, UWORD16 u2_width,
+ UWORD16 src_y_strd, UWORD16 src_u_strd, UWORD16 src_v_strd,
+ UWORD16 dst_y_strd, UWORD16 dst_uv_strd,
+ UWORD32 convert_uv_only);
+
+typedef void (*pf_fmt_conv_422ile_to_420sp)(UWORD8 *pu1_y_buf, UWORD8 *pu1_u_buf, UWORD8 *pu1_v_buf,
+ UWORD8 *pu1_422i_buf,
+ WORD32 u4_y_width, WORD32 u4_y_height, WORD32 u4_y_stride,
+ WORD32 u4_u_stride, WORD32 u4_v_stride,
+ WORD32 u4_422i_stride);
+
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @enum CODEC_STATE_T
+ * @brief codec state
+ ******************************************************************************
+ */
+typedef enum
+{
+ INIT_DONE,
+ HEADER_DONE,
+ FIRST_FRAME_DONE,
+} CODEC_STATE_T;
+
+
+/**
+ ******************************************************************************
+ * @enum JOBQ_CMD_T
+ * @brief list of job commands (used during job instantiation)
+ ******************************************************************************
+ */
+typedef enum
+{
+ CMD_PROCESS,
+ CMD_ENTROPY,
+ CMD_FMTCONV,
+ CMD_ME,
+}JOBQ_CMD_T;
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+ * PU information
+ */
+typedef struct
+{
+
+ /**
+ * L0 Motion Vector
+ */
+ mv_t s_l0_mv;
+
+ /**
+ * PU X position in terms of min PU (4x4) units
+ */
+ UWORD32 b4_pos_x : 4;
+
+ /**
+ * PU Y position in terms of min PU (4x4) units
+ */
+ UWORD32 b4_pos_y : 4;
+
+ /**
+ * PU width in pixels = (b4_wd + 1) << 2
+ */
+ UWORD32 b4_wd : 2;
+
+ /**
+ * PU height in pixels = (b4_ht + 1) << 2
+ */
+ UWORD32 b4_ht : 2;
+
+ /**
+ * L0 Ref index
+ */
+ WORD8 i1_l0_ref_idx;
+
+} enc_pu_t;
+
+typedef struct _codec_t codec_t;
+
+typedef struct
+{
+ /** Descriptor of raw buffer */
+ iv_raw_buf_t s_raw_buf;
+
+ /** Lower 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if the current buffer is last buffer */
+ UWORD32 u4_is_last;
+
+ /** Flag to indicate if mb info is sent along with input buffer */
+ UWORD32 u4_mb_info_type;
+
+ /** Flag to indicate the size of mb info structure */
+ UWORD32 u4_mb_info_size;
+
+ /** Buffer containing mb info if mb_info_type is non-zero */
+ void *pv_mb_info;
+
+ /** Flag to indicate if pic info is sent along with input buffer */
+ UWORD32 u4_pic_info_type;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_pic_info;
+
+}inp_buf_t;
+
+typedef struct
+{
+ /** Descriptor of bitstream buffer */
+ iv_bits_buf_t s_bits_buf;
+
+ /** Lower 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if the current buffer is last buffer */
+ UWORD32 u4_is_last;
+
+}out_buf_t;
+
+typedef struct
+{
+ /** Descriptor of picture buffer */
+ pic_buf_t s_pic_buf;
+
+ /** Lower 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if the current buffer is last buffer */
+ UWORD32 u4_is_last;
+
+ /** Picture count corresponding to current picture */
+ WORD32 i4_pic_cnt;
+
+}rec_buf_t;
+
+typedef struct
+{
+ /** maximum width for which codec should request memory requirements */
+ UWORD32 u4_max_wd;
+
+ /** maximum height for which codec should request memory requirements */
+ UWORD32 u4_max_ht;
+
+ /** Maximum number of reference frames */
+ UWORD32 u4_max_ref_cnt;
+
+ /** Maximum number of reorder frames */
+ UWORD32 u4_max_reorder_cnt;
+
+ /** Maximum level supported */
+ UWORD32 u4_max_level;
+
+ /** Input color format */
+ IV_COLOR_FORMAT_T e_inp_color_fmt;
+
+ /** Flag to enable/disable - To be used only for debugging/testing */
+ UWORD32 u4_enable_recon;
+
+ /** Recon color format */
+ IV_COLOR_FORMAT_T e_recon_color_fmt;
+
+ /** Encoder Speed preset - Value between 0 (slowest) and 100 (fastest) */
+ IVE_SPEED_CONFIG u4_enc_speed_preset;
+
+ /** Rate control mode */
+ IVE_RC_MODE_T e_rc_mode;
+
+ /** Maximum frame rate to be supported */
+ UWORD32 u4_max_framerate;
+
+ /** Maximum bitrate to be supported */
+ UWORD32 u4_max_bitrate;
+
+ /** Maximum number of consecutive B frames */
+ UWORD32 u4_max_num_bframes;
+
+ /** Content type Interlaced/Progressive */
+ IV_CONTENT_TYPE_T e_content_type;
+
+ /** Maximum search range to be used in X direction */
+ UWORD32 u4_max_srch_rng_x;
+
+ /** Maximum search range to be used in Y direction */
+ UWORD32 u4_max_srch_rng_y;
+
+ /** Slice Mode */
+ IVE_SLICE_MODE_T e_slice_mode;
+
+ /** Slice parameter */
+ UWORD32 u4_slice_param;
+
+ /** Processor architecture */
+ IV_ARCH_T e_arch;
+
+ /** SOC details */
+ IV_SOC_T e_soc;
+
+ /** Input width to be sent in bitstream */
+ UWORD32 u4_disp_wd;
+
+ /** Input height to be sent in bitstream */
+ UWORD32 u4_disp_ht;
+
+ /** Input width */
+ UWORD32 u4_wd;
+
+ /** Input height */
+ UWORD32 u4_ht;
+
+ /** Input stride */
+ UWORD32 u4_strd;
+
+ /** Source frame rate */
+ UWORD32 u4_src_frame_rate;
+
+ /** Target frame rate */
+ UWORD32 u4_tgt_frame_rate;
+
+ /** Target bitrate in kilobits per second */
+ UWORD32 u4_target_bitrate;
+
+ /** Force current frame type */
+ IV_PICTURE_CODING_TYPE_T e_frame_type;
+
+ /** Encoder mode */
+ IVE_ENC_MODE_T e_enc_mode;
+
+ /** Set initial Qp for I pictures */
+ UWORD32 u4_i_qp;
+
+ /** Set initial Qp for P pictures */
+ UWORD32 u4_p_qp;
+
+ /** Set initial Qp for B pictures */
+ UWORD32 u4_b_qp;
+
+ /** Set minimum Qp for I pictures */
+ UWORD32 u4_i_qp_min;
+
+ /** Set maximum Qp for I pictures */
+ UWORD32 u4_i_qp_max;
+
+ /** Set minimum Qp for P pictures */
+ UWORD32 u4_p_qp_min;
+
+ /** Set maximum Qp for P pictures */
+ UWORD32 u4_p_qp_max;
+
+ /** Set minimum Qp for B pictures */
+ UWORD32 u4_b_qp_min;
+
+ /** Set maximum Qp for B pictures */
+ UWORD32 u4_b_qp_max;
+
+ /** Adaptive intra refresh mode */
+ IVE_AIR_MODE_T e_air_mode;
+
+ /** Adaptive intra refresh period in frames */
+ UWORD32 u4_air_refresh_period;
+
+ /** VBV buffer delay */
+ UWORD32 u4_vbv_buffer_delay;
+
+ /** VBV buffer size */
+ UWORD32 u4_vbv_buf_size;
+
+ /** Number of cores to be used */
+ UWORD32 u4_num_cores;
+
+ /** ME speed preset - Value between 0 (slowest) and 100 (fastest) */
+ UWORD32 u4_me_speed_preset;
+
+ /** Flag to enable/disable half pel motion estimation */
+ UWORD32 u4_enable_hpel;
+
+ /** Flag to enable/disable quarter pel motion estimation */
+ UWORD32 u4_enable_qpel;
+
+ /** Flag to enable/disable intra 4x4 analysis */
+ UWORD32 u4_enable_intra_4x4;
+
+ /** Flag to enable/disable intra 8x8 analysis */
+ UWORD32 u4_enable_intra_8x8;
+
+ /** Flag to enable/disable intra 16x16 analysis */
+ UWORD32 u4_enable_intra_16x16;
+
+ /** Flag to enable/disable fast SAD approximation */
+ UWORD32 u4_enable_fast_sad;
+
+ /*flag to enable/disable alternate reference frames */
+ UWORD32 u4_enable_alt_ref;
+
+ /*Flag to enable/disable computation of SATDQ in ME*/
+ UWORD32 u4_enable_satqd;
+
+ /*Minimum SAD to search for*/
+ WORD32 i4_min_sad;
+
+ /** Maximum search range in X direction for farthest reference */
+ UWORD32 u4_srch_rng_x;
+
+ /** Maximum search range in Y direction for farthest reference */
+ UWORD32 u4_srch_rng_y;
+
+ /** I frame interval */
+ UWORD32 u4_i_frm_interval;
+
+ /** IDR frame interval */
+ UWORD32 u4_idr_frm_interval;
+
+ /** consecutive B frames */
+ UWORD32 u4_num_b_frames;
+
+ /** Disable deblock level (0: Enable completely, 3: Disable completely */
+ UWORD32 u4_disable_deblock_level;
+
+ /** Profile */
+ IV_PROFILE_T e_profile;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to say if the current config parameter set is valid
+ * Will be zero to start with and will be set to 1, when configured
+ * Once encoder uses the parameter set, this will be set to zero */
+ UWORD32 u4_is_valid;
+
+ /** Command associated with this config param set */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_cmd;
+
+ /** Input width in mbs */
+ UWORD32 i4_wd_mbs;
+
+ /** Input height in mbs */
+ UWORD32 i4_ht_mbs;
+
+ /** entropy coding mode flag */
+ UWORD32 u4_entropy_coding_mode;
+
+ /** enable weighted prediction */
+ UWORD32 u4_weighted_prediction;
+
+ /** enable constrained intra prediction */
+ UWORD32 u4_constrained_intra_pred;
+
+ /** Pic info type */
+ UWORD32 u4_pic_info_type;
+ /**
+ * MB info type
+ */
+ UWORD32 u4_mb_info_type;
+
+}cfg_params_t;
+
+
+
+/** Structure to hold format conversion context */
+typedef struct
+{
+ /** Current row for which format conversion should be done */
+ WORD32 i4_cur_row;
+
+ /** Number of rows for which format conversion should be done */
+ WORD32 i4_num_rows;
+
+}fmt_conv_t;
+
+
+/**
+ * Structure to represent a processing job entry
+ */
+typedef struct
+{
+ /**
+ * Command
+ */
+ WORD32 i4_cmd;
+
+ /**
+ * MB x of the starting MB
+ */
+ WORD16 i2_mb_x;
+
+ /**
+ * MB y of the starting MB
+ */
+
+ WORD16 i2_mb_y;
+
+ /**
+ * Number of MBs that need to be processed in this job
+ */
+ WORD16 i2_mb_cnt;
+
+ /**
+ * Process contexts base index
+ * Will toggle between 0 and MAX_PROCESS_THREADS
+ */
+ WORD16 i2_proc_base_idx;
+
+} job_t;
+
+
+/**
+ * Structure to represent a MV Bank buffer
+ */
+typedef struct
+{
+ /**
+ * Pointer to hold num PUs each MB in a picture
+ */
+ UWORD32 *pu4_mb_pu_cnt;
+
+ /**
+ * Pointer to hold enc_pu_t for each PU in a picture
+ */
+ enc_pu_t *ps_pic_pu;
+
+ /**
+ * Pointer to hold PU map for each MB in a picture
+ */
+ UWORD8 *pu1_pic_pu_map;
+
+ /**
+ * Pointer to hold the Slice map
+ */
+ UWORD16 *pu1_pic_slice_map;
+
+ /**
+ * Absolute POC for the current MV Bank
+ */
+ WORD32 i4_abs_poc;
+
+ /**
+ * Buffer Id
+ */
+ WORD32 i4_buf_id;
+
+} mv_buf_t;
+
+
+/**
+ * Reference set containing pointers to MV buf and pic buf
+ */
+typedef struct
+{
+ /** Picture count */
+ WORD32 i4_pic_cnt;
+
+ /** POC */
+ WORD32 i4_poc;
+
+ /** picture buffer */
+ pic_buf_t *ps_pic_buf;
+
+ /** mv buffer */
+ mv_buf_t *ps_mv_buf;
+
+}ref_set_t;
+
+typedef struct
+{
+
+ /**
+ * Pointer to current PPS
+ */
+ pps_t *ps_pps;
+
+ /**
+ * Pointer to current SPS
+ */
+ sps_t *ps_sps;
+
+ /**
+ * Pointer to current slice header structure
+ */
+ slice_header_t *ps_slice_hdr;
+
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+
+ WORD32 i4_mb_y;
+
+ /**
+ * Current PU structure - set to MB enc_pu_t pointer at the start of MB processing and incremented
+ * for every TU
+ */
+ enc_pu_t *ps_pu;
+
+ /**
+ * Pointer to frame level enc_pu_t for the current frame being parsed
+ * where MVs and Intra pred modes will be updated
+ */
+ enc_pu_t *ps_pic_pu;
+
+ /**
+ * Pointer to hold num PUs each MB in a picture
+ */
+ UWORD32 *pu4_mb_pu_cnt;
+
+ /** PU Index map per MB. The indices in this map are w.r.t picture pu array and not
+ * w.r.t MB pu array.
+ * This will be used during mv prediction and since neighbors will have different MB pu map
+ * it will be easier if they all have indices w.r.t picture level PU array rather than MB level
+ * PU array.
+ * pu1_pic_pu_map is map w.r.t MB's enc_pu_t array
+ */
+ UWORD32 *pu4_pic_pu_idx_map;
+
+ /**
+ * Pointer to pu_map for the current frame being parsed
+ * where MVs and Intra pred modes will be updated
+ */
+ UWORD8 *pu1_pic_pu_map;
+
+ /**
+ * PU count in current MB
+ */
+ WORD32 i4_mb_pu_cnt;
+
+ /**
+ * PU count in current MB
+ */
+ WORD32 i4_mb_start_pu_idx;
+
+ /**
+ * Top availability for current MB level
+ */
+ UWORD8 u1_top_mb_avail;
+
+ /**
+ * Top right availability for current MB level
+ */
+ UWORD8 u1_top_rt_mb_avail;
+ /**
+ * Top left availability for current MB level
+ */
+ UWORD8 u1_top_lt_mb_avail;
+ /**
+ * left availability for current MB level
+ */
+ UWORD8 u1_left_mb_avail;
+
+}mv_ctxt_t;
+
+typedef struct
+{
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * MB's x position within a Slice in raster scan in MB units
+ */
+ WORD32 i4_mb_slice_x;
+
+ /**
+ * MB's y position within a Slice in raster scan in MB units
+ */
+ WORD32 i4_mb_slice_y;
+
+ /**
+ * Vertical strength, Two bits per edge.
+ * Stored in format. BS[15] | BS[14] | .. |BS[0]
+ */
+ UWORD32 *pu4_pic_vert_bs;
+
+ /**
+ * Boundary strength, Two bits per edge.
+ * Stored in format. BS[15] | BS[14] | .. |BS[0]
+ */
+ UWORD32 *pu4_pic_horz_bs;
+
+ /**
+ * Qp array stored for each mb
+ */
+ UWORD8 *pu1_pic_qp;
+
+}bs_ctxt_t;
+
+typedef struct
+{
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * structure that contains BS and QP frame level arrays
+ */
+ bs_ctxt_t s_bs_ctxt;
+
+ /**
+ * Pointer to 0th luma pixel in current pic
+ */
+ UWORD8 *pu1_cur_pic_luma;
+
+ /**
+ * Pointer to 0th chroma pixel in current pic
+ */
+ UWORD8 *pu1_cur_pic_chroma;
+
+ /**
+ * Points to the array of slice indices which is used to identify the slice
+ * to which each MB in a frame belongs.
+ */
+ UWORD8 *pu1_slice_idx;
+
+}deblk_ctxt_t;
+
+
+/**
+ ******************************************************************************
+ * @brief Structure to hold data and flags for 'n' mb processing for
+ * deblocking , padding and half pel generation.
+ ******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * MB's x position last processed + 1
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position ,current processing.
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * Number of MBs processed in a stretch
+ */
+ WORD32 i4_n_mbs;
+
+}n_mb_process_ctxt_t;
+
+
+/**
+******************************************************************************
+ * @brief Structure to hold coefficient info for a 4x4 subblock.
+ * The following can be used to type-cast coefficient data that is stored
+ * per subblock. Note that though i2_level is shown as an array that
+ * holds 16 coefficients, only the first few entries will be valid. Next
+ * subblocks data starts after the valid number of coefficients. Number
+ * of non-zero coefficients will be derived using number of non-zero bits
+ * in sig coeff map
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * significant coefficient map and nnz are packed in
+ * to msb (2 bytes) and lsb (2 bytes) respectively
+ */
+ WORD32 i4_sig_map_nnz;
+
+ /**
+ * array of non zero residue coefficients
+ */
+ WORD16 ai2_residue[16];
+
+}tu_sblk_coeff_data_t;
+
+/**
+******************************************************************************
+ * @brief Structure contains few common state variables such as MB indices,
+ * current SPS, PPS etc which are to be used in the entropy thread. By keeping
+ * it a different structure it is being explicitly signaled that these
+ * variables are specific to entropy threads context and other threads should
+ * not update these elements
+******************************************************************************
+ */
+typedef struct
+{
+
+ /**
+ * start of frame / start of slice flag
+ */
+ WORD32 i4_sof;
+
+ /**
+ * end of frame / end of slice flag
+ */
+ WORD32 i4_eof;
+
+ /**
+ * generate header upon request
+ */
+ WORD32 i4_gen_header;
+
+ /**
+ * seq_parameter_set_id
+ */
+ UWORD32 u4_sps_id;
+
+ /**
+ * Pointer to base of sequence parameter set structure array
+ */
+ sps_t *ps_sps_base;
+
+ /**
+ * pic_parameter_set_id
+ */
+ UWORD32 u4_pps_id;
+
+ /**
+ * Pointer to base of Picture parameter set structure array
+ */
+ pps_t *ps_pps_base;
+
+ /**
+ * Current slice idx
+ */
+ WORD32 i4_cur_slice_idx;
+
+ /**
+ * Points to the array of slice indices which is used to identify the independent slice
+ * to which each MB in a frame belongs.
+ */
+ UWORD8 *pu1_slice_idx;
+
+ /**
+ * Pointer to base of slice header structure array
+ */
+ slice_header_t *ps_slice_hdr_base;
+
+ /**
+ * entropy status
+ */
+ UWORD8 *pu1_entropy_map;
+
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * MB start address
+ */
+ WORD32 i4_mb_cnt;
+
+ /**
+ * MB start address
+ */
+ WORD32 i4_mb_start_add;
+
+ /**
+ * MB end address
+ */
+ WORD32 i4_mb_end_add;
+
+ /**
+ * Input width in mbs
+ */
+ WORD32 i4_wd_mbs;
+
+ /**
+ * Input height in mbs
+ */
+ WORD32 i4_ht_mbs;
+
+ /**
+ * Bitstream structure
+ */
+ bitstrm_t *ps_bitstrm;
+
+ /**
+ * transform_8x8_mode_flag
+ */
+ WORD8 i1_transform_8x8_mode_flag;
+
+ /**
+ * entropy_coding_mode_flag
+ */
+ WORD8 u1_entropy_coding_mode_flag;
+
+ /**
+ * Pointer to the top row nnz for luma
+ */
+ UWORD8 (*pu1_top_nnz_luma)[4];
+
+ /**
+ * left nnz for luma
+ */
+ UWORD32 u4_left_nnz_luma;
+
+ /**
+ * Pointer to zero runs before for the mb
+ */
+ UWORD8 au1_zero_run[16];
+
+ /**
+ * Pointer to the top row nnz for chroma
+ */
+ UWORD8 (*pu1_top_nnz_cbcr)[4];
+
+ /**
+ * left nnz for chroma
+ */
+ UWORD8 u4_left_nnz_cbcr;
+
+ /**
+ * Pointer frame level mb subblock coeff data
+ */
+ void *pv_pic_mb_coeff_data;
+
+ /**
+ * Pointer to mb subblock coeff data and number of subblocks and scan idx
+ * Incremented each time a coded subblock is processed
+ */
+ void *pv_mb_coeff_data;
+
+ /**
+ * Pointer frame level mb header data
+ */
+ void *pv_pic_mb_header_data;
+
+ /**
+ * Pointer to mb header data and
+ * incremented each time a coded mb is encoded
+ */
+ void *pv_mb_header_data;
+
+ /**
+ * Error code during parse stage
+ */
+ IH264E_ERROR_T i4_error_code;
+
+ /**
+ * Void pointer to job context
+ */
+ void *pv_proc_jobq, *pv_entropy_jobq;
+
+ /**
+ * Flag to signal end of frame
+ */
+ WORD32 i4_end_of_frame;
+
+ /**
+ * Abs POC count of the frame
+ */
+ WORD32 i4_abs_pic_order_cnt;
+
+ /**
+ * mb skip run
+ */
+ WORD32 *pi4_mb_skip_run;
+
+ /**
+ * Flag to signal end of sequence
+ */
+ UWORD32 u4_is_last;
+
+ /**
+ * Lower 32bits of time-stamp corresponding to the buffer being encoded
+ */
+ UWORD32 u4_timestamp_low;
+
+ /**
+ * Upper 32bits of time-stamp corresponding to the buffer being encoded
+ */
+ UWORD32 u4_timestamp_high;
+
+ /**
+ * Current Picture count - used for synchronization
+ */
+ WORD32 i4_pic_cnt;
+
+ /**
+ * Number of bits consumed by header for I and P mb types
+ */
+ UWORD32 u4_header_bits[MAX_MB_TYPE];
+
+ /**
+ * Number of bits consumed by residue for I and P mb types
+ */
+ UWORD32 u4_residue_bits[MAX_MB_TYPE];
+
+} entropy_ctxt_t;
+
+/**
+******************************************************************************
+* @brief macro block info.
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * mb type
+ */
+ UWORD16 u2_is_intra;
+
+ /**
+ * mb type
+ */
+ UWORD16 u2_mb_type;
+
+ /**
+ * csbp
+ */
+ UWORD32 u4_csbp;
+
+ /**
+ * mb distortion
+ */
+ WORD32 i4_mb_distortion;
+
+}mb_info_t;
+
+/**
+******************************************************************************
+* @brief structure presenting the neighbor availability of a mb
+* or subblk or any other partition
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * left blk/subblk/partition
+ */
+ UWORD8 u1_mb_a;
+
+ /**
+ * top blk/subblk/partition
+ */
+ UWORD8 u1_mb_b;
+
+ /**
+ * topright blk/subblk/partition
+ */
+ UWORD8 u1_mb_c;
+
+ /**
+ * topleft blk/subblk/partition
+ */
+ UWORD8 u1_mb_d;
+
+}block_neighbors_t;
+
+/**
+ ******************************************************************************
+ * @brief MB info related variables used during NMB processing
+ ******************************************************************************
+ */
+typedef struct
+{
+ UWORD32 u4_mb_type;
+ UWORD32 u4_min_sad;
+ UWORD32 u4_min_sad_reached;
+ WORD32 i4_mb_cost;
+ WORD32 i4_mb_distortion;
+
+
+ mv_t s_skip_mv;
+ mv_t s_pred_mv;
+
+ block_neighbors_t s_ngbr_avbl;
+
+ /*
+ * Buffer to hold best subpel buffer in each MB of NMB
+ */
+ UWORD8 *pu1_best_sub_pel_buf;
+
+ /*
+ * Stride for subpel buffer
+ */
+ UWORD32 u4_bst_spel_buf_strd;
+
+}mb_info_nmb_t;
+
+/**
+ ******************************************************************************
+ * @brief Pixel processing thread context
+ ******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * entropy context
+ */
+ entropy_ctxt_t s_entropy;
+
+ /**
+ * me context
+ */
+ me_ctxt_t s_me_ctxt;
+
+ /**
+ * Pointer to codec context
+ */
+ codec_t *ps_codec;
+
+ /**
+ * N mb process contest
+ */
+ n_mb_process_ctxt_t s_n_mb_ctxt;
+
+ /**
+ * Source pointer to current MB luma
+ */
+ UWORD8 *pu1_src_buf_luma;
+
+ /**
+ * Source pointer to current MB chroma
+ */
+ UWORD8 *pu1_src_buf_chroma;
+
+ /**
+ * Recon pointer to current MB luma
+ */
+ UWORD8 *pu1_rec_buf_luma;
+
+ /**
+ * Recon pointer to current MB chroma
+ */
+ UWORD8 *pu1_rec_buf_chroma;
+
+ /**
+ * Ref pointer to current MB luma
+ */
+ UWORD8 *pu1_ref_buf_luma;
+
+ /**
+ * Ref pointer to current MB chroma
+ */
+ UWORD8 *pu1_ref_buf_chroma;
+
+ /**
+ * pointer to luma plane of input buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_src_buf_luma_base;
+
+ /**
+ * pointer to luma plane of reconstructed buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_rec_buf_luma_base;
+
+ /**
+ * pointer to luma plane of ref buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_ref_buf_luma_base;
+
+ /**
+ * pointer to chroma plane of input buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_src_buf_chroma_base;
+
+ /*
+ * Buffer for color space conversion of luma
+ */
+ UWORD8 *pu1_y_csc_buf;
+
+ /*
+ * Buffer for color space conversion of luma
+ */
+
+ UWORD8 *pu1_uv_csc_buf;
+
+ /**
+ * pointer to chroma plane of reconstructed buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_rec_buf_chroma_base;
+
+ /**
+ * pointer to chroma plane of reconstructed buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_ref_buf_chroma_base;
+
+ /**
+ * Pointer to ME NMB info
+ */
+ mb_info_nmb_t *ps_nmb_info;
+
+ mb_info_nmb_t *ps_cur_mb;
+
+ /**
+ * source stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_src_strd;
+
+ /**
+ * recon stride & ref stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_rec_strd;
+
+ /**
+ * Offset for half pel x plane from the pic buf
+ */
+ UWORD32 u4_half_x_offset;
+
+ /**
+ * Offset for half pel y plane from half x plane
+ */
+ UWORD32 u4_half_y_offset;
+
+ /**
+ * Offset for half pel xy plane from half y plane
+ */
+ UWORD32 u4_half_xy_offset;
+
+ /**
+ * pred buffer pointer (temp buffer 1)
+ */
+ UWORD8 *pu1_pred_mb;
+
+ /**
+ * pred buffer pointer (prediction buffer for intra 16x16
+ */
+ UWORD8 *pu1_pred_mb_intra_16x16;
+
+ /**
+ * pred buffer pointer (prediction buffer for intra 16x16_plane
+ */
+ UWORD8 *pu1_pred_mb_intra_16x16_plane;
+
+ /**
+ * pred buffer pointer (prediction buffer for intra chroma
+ */
+ UWORD8 *pu1_pred_mb_intra_chroma;
+
+ /**
+ * pred buffer pointer (prediction buffer for intra chroma plane
+ */
+ UWORD8 *pu1_pred_mb_intra_chroma_plane;
+
+ /**
+ * temp. reference buffer ptr for intra 4x4 when rdopt is on
+ */
+ UWORD8 *pu1_ref_mb_intra_4x4;
+
+ /**
+ * prediction buffer stride
+ */
+ WORD32 i4_pred_strd;
+
+ /**
+ * transform buffer pointer (temp buffer 2)
+ */
+ WORD16 *pi2_res_buf;
+
+ /**
+ * temp. transform buffer ptr for intra 4x4 when rdopt is on
+ */
+ WORD16 *pi2_res_buf_intra_4x4;
+
+ /**
+ * transform buffer stride
+ */
+ WORD32 i4_res_strd;
+
+ /**
+ * scratch buffer for inverse transform (temp buffer 3)
+ */
+ void *pv_scratch_buff;
+
+ /**
+ * frame num
+ */
+ WORD32 i4_frame_num;
+
+ /**
+ * start address of frame / sub-frame
+ */
+ WORD32 i4_frame_strt_add;
+
+ /**
+ * IDR pic
+ */
+ UWORD32 u4_is_idr;
+
+ /**
+ * idr_pic_id
+ */
+ UWORD32 u4_idr_pic_id;
+
+ /**
+ * Input width in mbs
+ */
+ WORD32 i4_wd_mbs;
+
+ /**
+ * Input height in mbs
+ */
+ WORD32 i4_ht_mbs;
+
+ /**
+ * slice_type
+ */
+ WORD32 i4_slice_type;
+
+ /**
+ * Current slice idx
+ */
+ WORD32 i4_cur_slice_idx;
+
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * MB's x position within a Slice in raster scan in MB units
+ */
+ WORD32 i4_mb_slice_x;
+
+ /**
+ * MB's y position within a Slice in raster scan in MB units
+ */
+ WORD32 i4_mb_slice_y;
+
+ /**
+ * mb type
+ */
+ UWORD32 u4_mb_type;
+
+ /**
+ * is intra
+ */
+ UWORD32 u4_is_intra;
+
+ /**
+ * mb neighbor availability pointer
+ */
+ block_neighbors_t *ps_ngbr_avbl;
+
+ /**
+ * lambda (lagrange multiplier for cost computation)
+ */
+ UWORD32 u4_lambda;
+
+ /**
+ * mb distortion
+ */
+ WORD32 i4_mb_distortion;
+
+ /**
+ * mb cost
+ */
+ WORD32 i4_mb_cost;
+
+ /********************************************************************/
+ /* i4_ngbr_avbl_mb_16 - ngbr avbl of curr mb */
+ /* i4_ngbr_avbl_sb_8 - ngbr avbl of all 8x8 sub blocks of curr mb */
+ /* i4_ngbr_avbl_sb_4 - ngbr avbl of all 4x4 sub blocks of curr mb */
+ /* i4_ngbr_avbl_mb_c - chroma ngbr avbl of curr mb */
+ /********************************************************************/
+ WORD32 i4_ngbr_avbl_16x16_mb;
+ WORD32 ai4_neighbor_avail_8x8_subblks[4];
+ UWORD8 au1_ngbr_avbl_4x4_subblks[16];
+ WORD32 i4_chroma_neighbor_avail_8x8_mb;
+
+ /**
+ * array to store the mode of mb sub blocks
+ */
+ UWORD8 au1_intra_luma_mb_4x4_modes[16];
+
+ /**
+ * array to store the predicted mode of mb sub blks
+ */
+ UWORD8 au1_predicted_intra_luma_mb_4x4_modes[16];
+
+ /**
+ * macro block intra 16x16 mode
+ */
+ UWORD8 u1_l_i16_mode;
+
+ /**
+ * array to store the mode of the macro block intra 8x8 4 modes
+ */
+ UWORD8 au1_intra_luma_mb_8x8_modes[4];
+
+ /**
+ * intra chroma mb mode
+ */
+ UWORD8 u1_c_i8_mode;
+
+ /********************************************************************/
+ /* array to store pixels from the neighborhood for intra prediction */
+ /* i16 - 16 left pels + 1 top left pel + 16 top pels = 33 pels */
+ /* i8 - 8 lpels + 1 tlpels + 8 tpels + 8 tr pels = 25 pels */
+ /* i4 - 4 lpels + 1 tlpels + 4 tpels + 4 tr pels = 13 pels */
+ /* ic - 8 left pels + 1 top left pel + 8 top pels )*2 */
+ /********************************************************************/
+ UWORD8 au1_ngbr_pels[34];
+
+ /**
+ * array for 8x8 intra pels filtering (temp buff 4)
+ */
+ UWORD8 au1_neighbor_pels_i8x8_unfiltered[25];
+
+ /**
+ * Number of sub partitons in the inter pred MB
+ */
+ UWORD32 u4_num_sub_partitions;
+
+ /**
+ * Pointer to hold num PUs each MB in a picture
+ */
+ UWORD32 *pu4_mb_pu_cnt;
+
+ /**
+ * Pointer to the array of structures having motion vectors, size
+ * and position of sub partitions
+ */
+ enc_pu_t *ps_pu;
+
+ /**
+ * predicted motion vector
+ */
+ mv_t *ps_pred_mv;
+
+ /**
+ * top row mb syntax information base
+ * In normal working scenarios, for a given context set,
+ * the mb syntax info pointer is identical across all process threads.
+ * But when the hard bound on slices are enabled, in multi core, frame
+ * is partitioned in to sections equal to set number of cores and each
+ * partition is run independently. In this scenario, a ctxt set will alone
+ * appear to run multiple frames at a time. For this to occur, the common
+ * pointers across the proc ctxt should disappear.
+ *
+ * This is done by allocating MAX_PROCESS_THREADS memory and distributing
+ * across individual ctxts when byte bnd per slice is enabled.
+ */
+ mb_info_t *ps_top_row_mb_syntax_ele_base;
+
+ /**
+ * top row mb syntax information
+ */
+ mb_info_t *ps_top_row_mb_syntax_ele;
+
+ /**
+ * left mb syntax information
+ */
+ mb_info_t s_left_mb_syntax_ele;
+
+ /**
+ * top left mb syntax information
+ */
+ mb_info_t s_top_left_mb_syntax_ele;
+
+ /**
+ * top left mb syntax information
+ */
+
+ mb_info_t s_top_left_mb_syntax_ME;
+
+ /**
+ * left mb motion vector
+ */
+ enc_pu_t s_left_mb_pu_ME;
+
+ /**
+ * top left mb motion vector
+ */
+ enc_pu_t s_top_left_mb_pu_ME;
+
+
+ /**
+ * mb neighbor availability pointer
+ */
+ block_neighbors_t s_ngbr_avbl;
+
+ /**
+ * In case the macroblock type is intra, the intra modes of all
+ * partitions for the left mb are stored in the array below
+ */
+ UWORD8 au1_left_mb_intra_modes[16];
+
+ /**
+ * In case the macroblock type is intra, the intra modes of all
+ * partitions for the top mb are stored in the array below
+ *
+ * In normal working scenarios, for a given context set,
+ * the mb syntax info pointer is identical across all process threads.
+ * But when the hard bound on slices are enabled, in multi core, frame
+ * is partitioned in to sections equal to set number of cores and each
+ * partition is run independently. In this scenario, a ctxt set will alone
+ * appear to run multiple frames at a time. For this to occur, the common
+ * pointers across the proc ctxt should disappear.
+ *
+ * This is done by allocating MAX_PROCESS_THREADS memory and distributing
+ * across individual ctxts when byte bnd per slice is enabled.
+ */
+ UWORD8 *pu1_top_mb_intra_modes_base;
+
+ /**
+ * In case the macroblock type is intra, the intra modes of all
+ * partitions for the top mb are stored in the array below
+ */
+ UWORD8 *pu1_top_mb_intra_modes;
+
+ /**
+ * skip motion vector info
+ */
+ mv_t *ps_skip_mv;
+
+ /**
+ * left mb motion vector
+ */
+ enc_pu_t s_left_mb_pu;
+
+ /**
+ * top left mb motion vector
+ */
+ enc_pu_t s_top_left_mb_pu;
+
+ /**
+ * top row motion vector info
+ *
+ * In normal working scenarios, for a given context set,
+ * the top row pu pointer is identical across all process threads.
+ * But when the hard bound on slices are enabled, in multi core, frame
+ * is partitioned in to sections equal to set number of cores and each
+ * partition is run independently. In this scenario, a ctxt set will alone
+ * appear to run multiple frames at a time. For this to occur, the common
+ * pointers across the proc ctxt should disappear.
+ *
+ * This is done by allocating MAX_PROCESS_THREADS memory and distributing
+ * across individual ctxts when byte bnd per slice is enabled.
+ */
+ enc_pu_t *ps_top_row_pu_base;
+
+ /**
+ * top row motion vector info
+ */
+ enc_pu_t *ps_top_row_pu;
+
+ enc_pu_t *ps_top_row_pu_ME;
+
+ /**
+ * coded block pattern
+ */
+ UWORD32 u4_cbp;
+
+ /**
+ * csbp
+ */
+ UWORD32 u4_csbp;
+
+ /**
+ * number of non zero coeffs
+ */
+ UWORD32 au4_nnz[5];
+
+ /**
+ * number of non zero coeffs for intra 4x4 when rdopt is on
+ */
+ UWORD32 au4_nnz_intra_4x4[4];
+
+ /**
+ * frame qp & mb qp
+ */
+ UWORD32 u4_frame_qp, u4_mb_qp;
+
+ /**
+ * mb qp previous
+ */
+ UWORD32 u4_mb_qp_prev;
+
+ /**
+ * quantization parameters for luma & chroma planes
+ */
+ quant_params_t *ps_qp_params[3];
+
+ /**
+ * Pointer frame level mb subblock coeff data
+ */
+ void *pv_pic_mb_coeff_data;
+
+ /**
+ * Pointer to mb subblock coeff data and number of subblocks and scan idx
+ * Incremented each time a coded subblock is processed
+ */
+ void *pv_mb_coeff_data;
+
+ /**
+ * Pointer frame level mb header data
+ */
+ void *pv_pic_mb_header_data;
+
+ /**
+ * Pointer to mb header data and
+ * incremented each time a coded mb is encoded
+ */
+ void *pv_mb_header_data;
+
+ /**
+ * Signal that pic_init is called first time
+ */
+ WORD32 i4_first_pic_init;
+
+ /**
+ * Current MV Bank's buffer ID
+ */
+ WORD32 i4_cur_mv_bank_buf_id;
+
+ /**
+ * Void pointer to job context
+ */
+ void *pv_proc_jobq, *pv_entropy_jobq;
+
+ /**
+ * Number of MBs to be processed in the current Job
+ */
+ WORD32 i4_mb_cnt;
+
+ /**
+ * ID for the current context - Used for debugging
+ */
+ WORD32 i4_id;
+
+ /**
+ * Pointer to current picture buffer structure
+ */
+ pic_buf_t *ps_cur_pic;
+
+ /**
+ * Pointer to current picture's mv buffer structure
+ */
+ mv_buf_t *ps_cur_mv_buf;
+
+ /**
+ * Flag to indicate if ps_proc was initialized at least once in a frame.
+ * This is needed to handle cases where a core starts to handle format
+ * conversion jobs directly
+ */
+ WORD32 i4_init_done;
+
+ /**
+ * Process status: one byte per MB
+ */
+ UWORD8 *pu1_proc_map;
+
+ /**
+ * Deblk status: one byte per MB
+ */
+ UWORD8 *pu1_deblk_map;
+
+ /**
+ * Process status: one byte per MB
+ */
+ UWORD8 *pu1_me_map;
+
+ /*
+ * Intra refresh mask.
+ * Indicates if an Mb is coded in intra mode within the current AIR interval
+ * NOTE Refreshes after each AIR period
+ * NOTE The map is shared between process
+ */
+ UWORD8 *pu1_is_intra_coded;
+
+ /**
+ * Disable deblock level (0: Enable completely, 3: Disable completely
+ */
+ UWORD32 u4_disable_deblock_level;
+
+ /**
+ * Pointer to the structure that contains deblock context
+ */
+ deblk_ctxt_t s_deblk_ctxt;
+
+ /**
+ * Points to the array of slice indices which is used to identify the independent
+ * slice to which each MB in a frame belongs.
+ */
+ UWORD8 *pu1_slice_idx;
+
+ /**
+ * Pointer to base of slice header structure array
+ */
+ slice_header_t *ps_slice_hdr_base;
+
+ /**
+ * Number of mb's to process in one loop
+ */
+ WORD32 i4_nmb_ntrpy;
+
+ /**
+ * Number of mb's to process in one loop
+ */
+ UWORD32 u4_nmb_me;
+
+ /**
+ * Structure for current input buffer
+ */
+ inp_buf_t s_inp_buf;
+
+ /**
+ * api call cnt
+ */
+ WORD32 i4_encode_api_call_cnt;
+
+ /**
+ * Current Picture count - used for synchronization
+ */
+ WORD32 i4_pic_cnt;
+
+ /**
+ * Intermediate buffer for interpred leaf level functions
+ */
+ WORD32 ai16_pred1[HP_BUFF_WD * HP_BUFF_HT];
+
+ /**
+ * Reference picture for the current picture
+ * TODO: Only 1 reference assumed currently
+ */
+ pic_buf_t *ps_ref_pic;
+
+ /**
+ * frame info used by RC
+ */
+ frame_info_t s_frame_info;
+
+ /*
+ * NOTE NOT PERSISTANT INSIDE FUNCTIONS
+ * Min sad for current MB
+ * will be populated initially
+ * Once a sad less than eq to u4_min_sad is reached, the value will be copied to the cariable
+ */
+ UWORD32 u4_min_sad;
+
+ /*
+ * indicates weather we have rached minimum sa or not
+ */
+ UWORD32 u4_min_sad_reached;
+
+ /**
+ * Current error code
+ */
+ WORD32 i4_error_code;
+
+ /*
+ * Enables or disables computation of recon
+ */
+ UWORD32 u4_compute_recon;
+
+ /*
+ * Buffer for holding half_x (1/2,1 - interpolated)
+ * values when halfpel generation
+ * for the entire plane is not enabled
+ */
+ UWORD8 *pu1_half_x;
+
+ /*
+ * Buffer for holding half_x (1,1/2 - interpolated)
+ * values when halfpel generation
+ * for the entire plane is not enabled
+ */
+ UWORD8 *pu1_half_y;
+
+ /*
+ * Buffer for holding half_x (1/2,1/2 - interpolated)
+ * values when halfpel generation
+ * for the entire plane is not enabled
+ *
+ */
+ UWORD8 *pu1_half_xy;
+
+ /*
+ * Buffer holding best sub pel values
+ */
+ UWORD8 *pu1_best_subpel_buf;
+
+ /*
+ * Stride for buffer holding best sub pel
+ */
+ UWORD32 u4_bst_spel_buf_strd;
+
+} process_ctxt_t;
+
+/**
+ ******************************************************************************
+ * @brief Rate control related variables
+ ******************************************************************************
+ */
+typedef struct
+{
+ void *pps_rate_control_api;
+
+ void *pps_frame_time;
+
+ void *pps_time_stamp;
+
+ void *pps_pd_frm_rate;
+
+ /**
+ * frame rate pull down
+ */
+ WORD32 pre_encode_skip[MAX_CTXT_SETS];
+
+ /**
+ * skip frame (cbr)
+ */
+ WORD32 post_encode_skip[MAX_CTXT_SETS];
+
+ /**
+ * rate control type
+ */
+ rc_type_e e_rc_type;
+
+ /**
+ * pic type
+ */
+ picture_type_e e_pic_type;
+
+ /**
+ * intra cnt in previous frame
+ */
+ WORD32 num_intra_in_prev_frame;
+
+ /**
+ * avg activity of prev frame
+ */
+ WORD32 i4_avg_activity;
+
+}rate_control_ctxt_t;
+
+/**
+ * Codec context
+ */
+struct _codec_t
+{
+ /**
+ * Number of coded pictures
+ */
+ WORD32 i4_coded_pic_cnt;
+
+ /**
+ * Number of encode frame API calls made
+ */
+ WORD32 i4_encode_api_call_cnt;
+
+ /**
+ * Number of pictures encoded
+ */
+ WORD32 i4_pic_cnt;
+
+ /**
+ * Number of threads created
+ */
+ WORD32 i4_proc_thread_cnt;
+
+ /**
+ * Mutex used to keep the control calls thread-safe
+ */
+ void *pv_ctl_mutex;
+
+ /**
+ * Current active config parameters
+ */
+ cfg_params_t s_cfg;
+
+ /**
+ * Array containing the config parameter sets
+ */
+ cfg_params_t as_cfg[MAX_ACTIVE_CONFIG_PARAMS];
+
+ /**
+ * Color format used by encoder internally
+ */
+ IV_COLOR_FORMAT_T e_codec_color_format;
+
+ /**
+ * source stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_src_strd;
+
+ /**
+ * recon stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_rec_strd;
+
+ /**
+ * Flag to enable/disable deblocking of a frame
+ */
+ WORD32 i4_disable_deblk_pic;
+
+ /**
+ * Number of continuous frames where deblocking was disabled
+ */
+ WORD32 i4_disable_deblk_pic_cnt;
+
+ /**
+ * frame type
+ */
+ PIC_TYPE_T pic_type;
+
+ /**
+ * frame qp
+ */
+ UWORD32 u4_frame_qp;
+
+ /**
+ * frame num
+ */
+ WORD32 i4_frame_num;
+
+ /**
+ * slice_type
+ */
+ WORD32 i4_slice_type;
+
+ /*
+ * Force current frame to specific type
+ */
+ IV_PICTURE_CODING_TYPE_T force_curr_frame_type;
+
+ /**
+ * IDR pic
+ */
+ UWORD32 u4_is_idr;
+
+ /**
+ * idr_pic_id
+ */
+ WORD32 i4_idr_pic_id;
+
+ /**
+ * Flush mode
+ */
+ WORD32 i4_flush_mode;
+
+ /**
+ * Encode header mode
+ */
+ WORD32 i4_header_mode;
+
+ /**
+ * Flag to indicate if header has already
+ * been generated when i4_api_call_cnt 0
+ */
+ UWORD32 u4_header_generated;
+
+ /**
+ * Encode generate header
+ */
+ WORD32 i4_gen_header;
+
+ /**
+ * To signal successful completion of init
+ */
+ WORD32 i4_init_done;
+
+ /**
+ * To signal that at least one picture was decoded
+ */
+ WORD32 i4_first_pic_done;
+
+ /**
+ * Reset flag - Codec is reset if this flag is set
+ */
+ WORD32 i4_reset_flag;
+
+ /**
+ * Current error code
+ */
+ WORD32 i4_error_code;
+
+ /**
+ * threshold residue
+ */
+ WORD32 u4_thres_resi;
+
+ /**
+ * disable intra inter gating
+ */
+ UWORD32 u4_inter_gate;
+
+ /**
+ * Holds mem records passed during init.
+ * This will be used to return the mem records during retrieve call
+ */
+ iv_mem_rec_t *ps_mem_rec_backup;
+
+ /**
+ * Flag to determine if the entropy thread is active
+ */
+ volatile UWORD32 au4_entropy_thread_active[MAX_CTXT_SETS];
+
+ /**
+ * Mutex used to keep the entropy calls thread-safe
+ */
+ void *pv_entropy_mutex;
+
+ /**
+ * Job queue buffer base
+ */
+ void *pv_proc_jobq_buf, *pv_entropy_jobq_buf;
+
+ /**
+ * Job Queue mem tab size
+ */
+ WORD32 i4_proc_jobq_buf_size, i4_entropy_jobq_buf_size;
+
+ /**
+ * Memory for MV Bank buffer manager
+ */
+ void *pv_mv_buf_mgr_base;
+
+ /**
+ * MV Bank buffer manager
+ */
+ void *pv_mv_buf_mgr;
+
+ /**
+ * Pointer to MV Buf structure array
+ */
+ void *ps_mv_buf;
+
+ /**
+ * Base address for Motion Vector bank buffer
+ */
+ void *pv_mv_bank_buf_base;
+
+ /**
+ * MV Bank size allocated
+ */
+ WORD32 i4_total_mv_bank_size;
+
+ /**
+ * Memory for Picture buffer manager for reference pictures
+ */
+ void *pv_ref_buf_mgr_base;
+
+ /**
+ * Picture buffer manager for reference pictures
+ */
+ void *pv_ref_buf_mgr;
+
+ /**
+ * Number of reference buffers added to the buffer manager
+ */
+ WORD32 i4_ref_buf_cnt;
+
+ /**
+ * Pointer to Pic Buf structure array
+ */
+ void *ps_pic_buf;
+
+ /**
+ * Base address for Picture buffer
+ */
+ void *pv_pic_buf_base;
+
+ /**
+ * Total pic buffer size allocated
+ */
+ WORD32 i4_total_pic_buf_size;
+
+ /**
+ * Memory for Buffer manager for output buffers
+ */
+ void *pv_out_buf_mgr_base;
+
+ /**
+ * Buffer manager for output buffers
+ */
+ void *pv_out_buf_mgr;
+
+ /**
+ * Current output buffer's buffer ID
+ */
+ WORD32 i4_out_buf_id;
+
+ /**
+ * Number of output buffers added to the buffer manager
+ */
+ WORD32 i4_out_buf_cnt;
+
+ /**
+ * Memory for Picture buffer manager for input buffers
+ */
+ void *pv_inp_buf_mgr_base;
+
+ /**
+ * Picture buffer manager for input buffers
+ */
+ void *pv_inp_buf_mgr;
+
+ /**
+ * Current input buffer's buffer ID
+ */
+ WORD32 i4_inp_buf_id;
+
+ /**
+ * Number of input buffers added to the buffer manager
+ */
+ WORD32 i4_inp_buf_cnt;
+
+ /**
+ * Current input buffer
+ */
+ pic_buf_t *ps_inp_buf;
+
+ /**
+ * Pointer to dpb manager structure
+ */
+ void *pv_dpb_mgr;
+
+ /**
+ * Pointer to base of Sequence parameter set structure array
+ */
+ sps_t *ps_sps_base;
+
+ /**
+ * Pointer to base of Picture parameter set structure array
+ */
+ pps_t *ps_pps_base;
+
+ /**
+ * seq_parameter_set_id
+ */
+ WORD32 i4_sps_id;
+
+ /**
+ * pic_parameter_set_id
+ */
+ WORD32 i4_pps_id;
+
+ /**
+ * Pointer to base of slice header structure array
+ */
+ slice_header_t *ps_slice_hdr_base;
+
+ /**
+ * packed residue coeff data size for 1 row of mbs
+ */
+ UWORD32 u4_size_coeff_data;
+
+ /**
+ * packed header data size for 1 row of mbs
+ */
+ UWORD32 u4_size_header_data;
+
+ /**
+ * Processing context - One for each processing thread
+ * Create two sets, each set used for alternate frames
+ */
+ process_ctxt_t as_process[MAX_PROCESS_CTXT];
+
+ /**
+ * Thread handle for each of the processing threads
+ */
+ void *apv_proc_thread_handle[MAX_PROCESS_THREADS];
+
+ /**
+ * Thread created flag for each of the processing threads
+ */
+ WORD32 ai4_process_thread_created[MAX_PROCESS_THREADS];
+
+ /**
+ * Void pointer to process job context
+ */
+ void *pv_proc_jobq, *pv_entropy_jobq;
+
+ /**
+ * Number of MBs processed together for better instruction cache handling
+ */
+ WORD32 i4_proc_nmb;
+
+ /**
+ * Previous POC lsb
+ */
+ WORD32 i4_prev_poc_lsb;
+
+ /**
+ * Previous POC msb
+ */
+ WORD32 i4_prev_poc_msb;
+
+ /**
+ * Max POC lsb that has arrived till now
+ */
+ WORD32 i4_max_prev_poc_lsb;
+
+ /**
+ * Context for format conversion
+ */
+ fmt_conv_t s_fmt_conv;
+
+ /**
+ * Absolute pic order count
+ */
+ WORD32 i4_abs_pic_order_cnt;
+
+ /**
+ * Pic order count of lsb
+ */
+ WORD32 i4_pic_order_cnt_lsb;
+
+ /**
+ * Array giving current picture being processed in each context set
+ */
+ WORD32 ai4_pic_cnt[MAX_CTXT_SETS];
+
+ /*
+ * Min sad to search for
+ */
+ UWORD32 u4_min_sad;
+
+ /**
+ * Reference picture set
+ */
+ ref_set_t as_ref_set[MAX_DPB_SIZE + MAX_CTXT_SETS];
+
+ /*
+ * Air pic cnt
+ * Contains the number of pictures that have been encoded with air
+ * This value is moudulo air refresh period
+ */
+ WORD32 i4_air_pic_cnt;
+
+ /*
+ * Intra refresh map
+ * Stores the frames at which intra refresh should occur for a MB
+ */
+ UWORD16 *pu2_intr_rfrsh_map;
+
+ /*
+ * Alternate reference frames
+ * Indicates if the current frame is used as a reference frame
+ */
+ UWORD32 u4_is_curr_frm_ref;
+
+ /*
+ * Memory for color space conversion for luma plane
+ */
+ UWORD8 *pu1_y_csc_buf_base;
+
+ /*
+ * Memory for color space conversion foe chroma plane
+ */
+ UWORD8 *pu1_uv_csc_buf_base;
+
+ /**
+ * Function pointers for intra pred leaf level functions luma
+ */
+ pf_intra_pred apf_intra_pred_16_l[MAX_I16x16];
+ pf_intra_pred apf_intra_pred_8_l[MAX_I8x8];
+ pf_intra_pred apf_intra_pred_4_l[MAX_I4x4];
+
+ /**
+ * Function pointers for intra pred leaf level functions chroma
+ */
+ pf_intra_pred apf_intra_pred_c[MAX_CH_I8x8];
+
+ /**
+ * luma core coding function pointer
+ */
+ UWORD8 (*luma_energy_compaction[4])(process_ctxt_t *ps_proc);
+
+ /**
+ * chroma core coding function pointer
+ */
+ UWORD8 (*chroma_energy_compaction[2])(process_ctxt_t *ps_proc);
+
+ /**
+ * forward transform for intra blk of mb type 16x16
+ */
+ ih264_luma_16x16_resi_trans_dctrans_quant_ft *pf_resi_trans_dctrans_quant_16x16;
+
+ /**
+ * inverse transform for intra blk of mb type 16x16
+ */
+ ih264_luma_16x16_idctrans_iquant_itrans_recon_ft *pf_idctrans_iquant_itrans_recon_16x16;
+
+ /**
+ * forward transform for 4x4 blk luma
+ */
+ ih264_resi_trans_quant_ft *pf_resi_trans_quant_4x4;
+
+ /**
+ * forward transform for 4x4 blk luma
+ */
+ ih264_resi_trans_quant_ft *pf_resi_trans_quant_chroma_4x4;
+
+ /*
+ * hadamard transform and quant for a 4x4 block
+ */
+ ih264_hadamard_quant_ft *pf_hadamard_quant_4x4;
+
+ /*
+ * hadamard transform and quant for a 4x4 block
+ */
+ ih264_hadamard_quant_ft *pf_hadamard_quant_2x2_uv;
+
+ /**
+ * inverse transform for 4x4 blk
+ */
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_4x4;
+
+ /**
+ * inverse transform for chroma 4x4 blk
+ */
+ ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4;
+
+ /**
+ * inverse transform for 4x4 blk with only single dc coeff
+ */
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_4x4_dc;
+
+ /**
+ * inverse transform for chroma 4x4 blk with only single dc coeff
+ */
+ ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc;
+
+ /*
+ * Inverse hadamard transform and iquant for a 4x4 block
+ */
+ ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4;
+
+ /*
+ * Inverse hadamard transform and iquant for a 4x4 block
+ */
+ ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_2x2_uv;
+
+ /*
+ * Function for interleave copy*
+ */
+ ih264_interleave_copy_ft *pf_interleave_copy;
+
+ /**
+ * forward transform for 8x8 blk
+ */
+ ih264_resi_trans_quant_ft *pf_resi_trans_quant_8x8;
+
+ /**
+ * inverse transform for 8x8 blk
+ */
+ /**
+ * inverse transform for 4x4 blk
+ */
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_8x8;
+
+ /**
+ * forward transform for chroma MB
+ */
+ ih264_chroma_8x8_resi_trans_dctrans_quant_ft *pf_resi_trans_dctrans_quant_8x8_chroma;
+
+ /**
+ * inverse transform for chroma MB
+ */
+ ih264_idctrans_iquant_itrans_recon_ft *pf_idctrans_iquant_itrans_recon_8x8_chroma;
+
+ /**
+ * deblock vertical luma edge with blocking strength 4
+ */
+ ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4;
+
+ /**
+ * deblock vertical chroma edge with blocking strength 4
+ */
+ ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4;
+
+ /**
+ * deblock vertical luma edge with blocking strength less than 4
+ */
+ ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4;
+
+ /**
+ * deblock vertical chroma edge with blocking strength less than 4
+ */
+ ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4;
+
+ /**
+ * deblock horizontal luma edge with blocking strength 4
+ */
+ ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4;
+
+ /**
+ * deblock horizontal chroma edge with blocking strength 4
+ */
+ ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4;
+
+ /**
+ * deblock horizontal luma edge with blocking strength less than 4
+ */
+ ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4;
+
+ /**
+ * deblock horizontal chroma edge with blocking strength less than 4
+ */
+ ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4;
+
+
+ /**
+ * functions for padding
+ */
+ pf_pad pf_pad_top;
+ pf_pad pf_pad_bottom;
+ pf_pad pf_pad_left_luma;
+ pf_pad pf_pad_left_chroma;
+ pf_pad pf_pad_right_luma;
+ pf_pad pf_pad_right_chroma;
+
+ /**
+ * Inter pred leaf level functions
+ */
+ ih264_inter_pred_luma_ft *pf_inter_pred_luma_copy;
+ ih264_inter_pred_luma_ft *pf_inter_pred_luma_horz;
+ ih264_inter_pred_luma_ft *pf_inter_pred_luma_vert;
+ pf_inter_pred_luma_bilinear pf_inter_pred_luma_bilinear;
+ ih264_inter_pred_chroma_ft *pf_inter_pred_chroma;
+
+ /**
+ * fn ptrs for compute sad routines
+ */
+ ime_compute_sad_ft *apf_compute_sad_16x16[2];
+ ime_compute_sad_ft *pf_compute_sad_16x8;
+
+ /**
+ * fn ptrs for memory handling operations
+ */
+ pf_memcpy pf_mem_cpy;
+ pf_memset pf_mem_set;
+ pf_memcpy_mul8 pf_mem_cpy_mul8;
+ pf_memset_mul8 pf_mem_set_mul8;
+
+ /**
+ * intra mode eval -encoder level function
+ */
+ pf_evaluate_intra_modes pf_ih264e_evaluate_intra16x16_modes;
+ pf_evaluate_intra_modes pf_ih264e_evaluate_intra_chroma_modes;
+ pf_evaluate_intra_4x4_modes pf_ih264e_evaluate_intra_4x4_modes;
+
+ /* Half pel generation function - encoder level
+ *
+ */
+ pf_sixtapfilter_horz pf_ih264e_sixtapfilter_horz;
+ pf_sixtap_filter_2dvh_vert pf_ih264e_sixtap_filter_2dvh_vert;
+
+ /**
+ * color space conversion form YUV 420P to YUV 420Sp
+ */
+ pf_fmt_conv_420p_to_420sp pf_ih264e_conv_420p_to_420sp;
+
+
+ /**
+ * color space conversion form YUV 420P to YUV 420Sp
+ */
+ pf_fmt_conv_422ile_to_420sp pf_ih264e_fmt_conv_422i_to_420sp;
+
+ /**
+ * write mb layer for a given slice I, P, B
+ */
+ IH264E_ERROR_T (*pf_write_mb_syntax_layer[3]) ( entropy_ctxt_t *ps_ent_ctxt );
+
+
+ /**
+ * Output buffer
+ */
+ out_buf_t as_out_buf[MAX_CTXT_SETS];
+
+ /**
+ * recon buffer
+ */
+ rec_buf_t as_rec_buf[MAX_CTXT_SETS];
+
+ /**
+ * rate control context
+ */
+ rate_control_ctxt_t s_rate_control;
+};
+#endif /* IH264E_STRUCTS_H_ */
diff --git a/encoder/ih264e_time_stamp.c b/encoder/ih264e_time_stamp.c
new file mode 100755
index 0000000..a6a7f3c
--- /dev/null
+++ b/encoder/ih264e_time_stamp.c
@@ -0,0 +1,748 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_time_stamp.c
+*
+* @brief
+* This file contains functions used for source and target time stamp management
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - gcd()
+* - ih264e_get_range()
+* - ih264e_frame_time_get_init_free_memtab()
+* - ih264e_init_frame_time()
+* - ih264e_should_src_be_skipped()
+* - ih264e_time_stamp_get_init_free_memtab()
+* - ih264e_init_time_stamp()
+* - ih264e_update_time_stamp()
+* - ih264e_frame_time_get_src_frame_rate()
+* - ih264e_frame_time_get_tgt_frame_rate()
+* - ih264e_frame_time_get_src_ticks()
+* - ih264e_frame_time_get_tgt_ticks()
+* - ih264e_frame_time_get_src_time()
+* - ih264e_frame_time_get_tgt_time()
+* - ih264e_frame_time_update_src_frame_rate()
+* - ih264e_frame_time_update_tgt_frame_rate()
+* - ih264_time_stamp_update_frame_rate()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* user include files */
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_structs.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "irc_rate_control_api.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to compute gcd of two numbers
+*
+* @par Description
+* Function to compute gcd of two numbers
+*
+* @param[in] i4_x
+* value 1
+*
+* @param[in] i4_y
+* value 2
+*
+* @returns
+* GCD(value 1, value 2)
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 gcd(WORD32 i4_x, WORD32 i4_y)
+{
+ if (i4_x > i4_y)
+ {
+ i4_x = i4_y + i4_x;
+ i4_y = i4_x - i4_y;
+ i4_x = i4_x - i4_y;
+ }
+ while (i4_y != 0)
+ {
+ WORD32 temp;
+ i4_x = i4_x % i4_y;
+ temp = i4_x;
+ i4_x = i4_y;
+ i4_y = temp;
+ }
+ return (i4_x);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to determine number of bits required to represent a given
+* value
+*
+* @par Description
+* This function determines the number of bits required to represent the given
+* value. It is used to find out number of bits to read when the data size is
+* not fixed (e.g. vop_time_increment_resolution).
+*
+* @param[in] u4_value
+* Value for which the number of bits required to represent is to be determined
+*
+* @param[in] u1_no_of_bits
+* Represents the value's word type = 8/16/32
+*
+* @returns
+* The number of bits required to represent the given number
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static UWORD8 ih264e_get_range(UWORD32 u4_value, UWORD8 u1_no_of_bits)
+{
+ UWORD8 count;
+ UWORD32 temp;
+
+ if (u4_value > (UWORD32) ((1 << (u1_no_of_bits >> 1)) - 1))
+ {
+ temp = (1 << (u1_no_of_bits - 1));
+ for (count = 0; count < (u1_no_of_bits >> 1); count++)
+ {
+ if ((temp & u4_value) != 0)
+ {
+ return (UWORD8) (u1_no_of_bits - count);
+ }
+ else
+ {
+ temp >>= 1;
+ }
+ }
+ return 0;
+ }
+ else
+ {
+ temp = (1 << ((u1_no_of_bits >> 1) - 1));
+ for (count = 0; count < ((u1_no_of_bits >> 1) - 1); count++)
+ {
+ if ((temp & u4_value) != 0)
+ {
+ return (UWORD8) ((u1_no_of_bits >> 1) - count);
+ }
+ else
+ {
+ temp >>= 1;
+ }
+ }
+ return 1;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to init frame time memtabs
+*
+* @par Description
+* Function to init frame time memtabs
+*
+* @param[in] pps_frame_time
+* Pointer to frame time contexts
+*
+* @param[in] ps_memtab
+* Pointer to memtab
+*
+* @param[in] e_func_type
+* Function type (get memtabs/init memtabs)
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_init_free_memtab(frame_time_handle *pps_frame_time,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static frame_time_t s_temp_frame_time_t;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_frame_time) = &s_temp_frame_time_t;
+
+ /* for src rate control state structure */
+ if (e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(frame_time_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**) pps_frame_time, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to init frame time context
+*
+* @par Description
+* Frame time structure stores the time of the source and the target frames to
+* be encoded. Based on the time we decide whether or not to encode the source
+* frame
+*
+* @param[in] ps_frame_time
+* Pointer Frame time context
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+* Target frame rate
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_frame_time(frame_time_t *ps_frame_time,
+ UWORD32 u4_src_frm_rate,
+ UWORD32 u4_tgt_frm_rate)
+{
+ /* Initialise the common time base based on which the source and target
+ * frame times increase */
+ WORD32 i4_gcd = gcd(u4_src_frm_rate, u4_tgt_frm_rate);
+
+ ps_frame_time->common_time_base = (u4_src_frm_rate * u4_tgt_frm_rate)
+ / i4_gcd;
+
+ /* The source and target increment per vop is initialized */
+ ps_frame_time->u4_src_frm_time_incr = ps_frame_time->common_time_base
+ / u4_src_frm_rate;
+ ps_frame_time->u4_tgt_frm_time_incr = ps_frame_time->common_time_base
+ / u4_tgt_frm_rate;
+
+ /* Initialise the source and target times to 0 (RESET) */
+ ps_frame_time->u4_src_frm_time = 0;
+ ps_frame_time->u4_tgt_frm_time = 0;
+
+ /* Initialize the number of frms not to be skipped to 0 */
+ ps_frame_time->u4_num_frms_dont_skip = 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to check if frame can be skipped
+*
+* @par Description
+* Based on the source and target frame time and the delta time stamp
+* we decide whether to code the source or not.
+* This is based on the assumption
+* that the source frame rate is greater that target frame rate.
+* Updates the time_stamp structure
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] u4_delta_time_stamp
+* Time stamp difference between frames
+*
+* @param[out] pu4_frm_not_skipped_for_dts
+* Flag to indicate if frame is already skipped by application
+*
+* @returns
+* Flag to skip frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_should_src_be_skipped(frame_time_t *ps_frame_time,
+ UWORD32 u4_delta_time_stamp,
+ UWORD32 *pu4_frm_not_skipped_for_dts)
+{
+ UWORD8 skip_src = 0;
+
+ if (ps_frame_time->u4_tgt_frm_time > ps_frame_time->u4_src_frm_time &&
+ ps_frame_time->u4_tgt_frm_time >= (ps_frame_time->u4_src_frm_time +
+ ps_frame_time->u4_src_frm_time_incr))
+ {
+ skip_src = 1;
+ }
+
+ /* source time gets updated every frame */
+ ps_frame_time->u4_src_frm_time += ps_frame_time->u4_src_frm_time_incr;
+
+ /* target time gets updated only when the source is coded */
+ if (!skip_src)
+ {
+ ps_frame_time->u4_tgt_frm_time += ps_frame_time->u4_tgt_frm_time_incr;
+ }
+
+ /* If the source and target frame times get incremented properly
+ both should be equal to the common time base at the same time. If
+ that happens we reset the time to zero*/
+ if (( ps_frame_time->common_time_base ==(WORD32)ps_frame_time->u4_src_frm_time)
+ && (ps_frame_time->common_time_base ==(WORD32) ps_frame_time->u4_tgt_frm_time ))
+ {
+ ps_frame_time->u4_src_frm_time = 0;
+ ps_frame_time->u4_tgt_frm_time = 0;
+ }
+
+ /* This keeps a count of how many frames need not be skipped in order
+ to take care of the delta time stamp */
+ ps_frame_time->u4_num_frms_dont_skip += (u4_delta_time_stamp - 1);
+
+ /** If this frame is to be skipped in order to maintain the tgt_frm_rate
+ check if already a frame has been skipped by the application.
+ In that case, do not skip this frame **/
+ if (ps_frame_time->u4_num_frms_dont_skip && skip_src)
+ {
+ skip_src = 0;
+ *pu4_frm_not_skipped_for_dts = 1;
+ ps_frame_time->u4_num_frms_dont_skip -= 1;
+ }
+ else
+ {
+ pu4_frm_not_skipped_for_dts[0] = 0;
+ }
+
+ return (skip_src);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to inititialize time stamp memtabs
+*
+* @par Description
+* Function to initialize time stamp memtabs
+*
+* @param[in] pps_time_stamp
+* Pointer to time stamp context
+*
+* @param[in] ps_memtab
+* Pointer to memtab
+*
+* @param[in] e_func_type
+* Funcion type (Get memtab/ init memtab)
+*
+* @returns
+* number of memtabs used
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_time_stamp_get_init_free_memtab(time_stamp_handle *pps_time_stamp,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static time_stamp_t s_temp_time_stamp_t;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_time_stamp) = &s_temp_time_stamp_t;
+
+ /* for src rate control state structure */
+ if (e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(time_stamp_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**) pps_time_stamp, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize time stamp context
+*
+* @par Description
+* Time stamp structure stores the time stamp data that
+* needs to be sent in to the header of MPEG4. Based on the
+* max target frame rate the vop_time increment resolution is set
+* so as to support all the frame rates below max frame rate.
+* A support till the third decimal point is assumed.
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @param[in] u4_max_frm_rate
+* Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_time_stamp(time_stamp_t *ps_time_stamp,
+ UWORD32 u4_max_frm_rate,
+ UWORD32 u4_src_frm_rate)
+{
+ /* We expect the max frame rate to be less than 60000,
+ * if not we divide it by zero and work with it */
+ if (u4_max_frm_rate > 60000)
+ {
+ u4_max_frm_rate >>= 1;
+ ps_time_stamp->is_max_frame_rate_scaled = 1;
+ }
+ else
+ {
+ ps_time_stamp->is_max_frame_rate_scaled = 0;
+ }
+
+ ps_time_stamp->u4_vop_time_incr_res = u4_max_frm_rate;
+ ps_time_stamp->u4_vop_time_incr_range = ih264e_get_range(u4_max_frm_rate, 32);
+ ps_time_stamp->u4_vop_time_incr = (ps_time_stamp->u4_vop_time_incr_res * 1000) / u4_src_frm_rate;/* Since frm rate is in millisec */
+ ps_time_stamp->u4_vop_time = 0;
+ ps_time_stamp->u4_cur_tgt_vop_time = 0;
+ ps_time_stamp->u4_prev_tgt_vop_time = 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update time stamp context
+*
+* @par Description
+* Vop time is incremented by increment value. When vop time goes
+* more than the vop time resolution set the modulo time base to
+* 1 and reduce the vop time by vop time resolution so that the
+* excess value is present in vop time and get accumulated over time
+* so that the corresponding frame rate is achieved at a average of
+* 1000 seconds
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_time_stamp(time_stamp_t *ps_time_stamp)
+{
+ /* Since get time stamp is called after the update
+ A copy of the vop time and the modulo time is stored */
+ ps_time_stamp->u4_cur_tgt_vop_time = ps_time_stamp->u4_vop_time;
+
+ ps_time_stamp->u4_vop_time += ps_time_stamp->u4_vop_time_incr;
+ if (ps_time_stamp->u4_vop_time >= ps_time_stamp->u4_vop_time_incr_res)
+ {
+ ps_time_stamp->u4_vop_time -= ps_time_stamp->u4_vop_time_incr_res;
+ }
+}
+
+/****************************************************************************
+ Run-Time Modifying functions
+****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to get source frame rate
+*
+* @par Description
+* Function to get source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* source frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_frame_rate(frame_time_t *ps_frame_time)
+{
+ return (ps_frame_time->common_time_base / ps_frame_time->u4_src_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target frame rate
+*
+* @par Description
+* Function to get target frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* target frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_frame_rate(frame_time_t *ps_frame_time)
+{
+ return (ps_frame_time->common_time_base / ps_frame_time->u4_tgt_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get source time increment
+*
+* @par Description
+* Function to get source time increment
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* source time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_ticks(frame_time_t *ps_frame_time)
+{
+ return (ps_frame_time->u4_src_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target time increment
+*
+* @par Description
+* Function to get target time increment
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* target time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_ticks(frame_time_t *ps_frame_time)
+{
+ return (ps_frame_time->u4_tgt_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get src frame time
+*
+* @par Description
+* Function to get src frame time
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* src frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_time(frame_time_t *frame_time)
+{
+ return (frame_time->u4_src_frm_time);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get tgt frame time
+*
+* @par Description
+* Function to get tgt frame time
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* tgt frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_time(frame_time_t *frame_time)
+{
+ return (frame_time->u4_tgt_frm_time);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update source frame time with a new source frame rate
+*
+* @par Description
+* Function to update source frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @param[in] src_frm_rate
+* source frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_src_frame_rate(frame_time_t *ps_frame_time,
+ WORD32 src_frm_rate)
+{
+ /* Since tgt frame rate does not change deriving the tgt_frm rate from
+ * common_time_base */
+ WORD32 tgt_frm_rate = ps_frame_time->common_time_base / ps_frame_time->u4_tgt_frm_time_incr;
+
+ /* Re-initialise frame_time based on the new src_frame_rate and
+ * old tgt_frame_rate */
+ ih264e_init_frame_time(ps_frame_time, src_frm_rate, tgt_frm_rate);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+* Function to update target frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @param[in] tgt_frm_rate
+* target frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_tgt_frame_rate(frame_time_t *ps_frame_time,
+ WORD32 tgt_frm_rate)
+{
+ /* Since src frame rate does not change deriving the src_frm rate from
+ * common_time_base */
+ WORD32 src_frm_rate = ps_frame_time->common_time_base / ps_frame_time->u4_src_frm_time_incr;
+
+ /* Re-initialise frame_time based on the new tgt_frame_rate and
+ * old src_frame_rate */
+ ih264e_init_frame_time(ps_frame_time, src_frm_rate, tgt_frm_rate);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+* When the frame rate changes the time increment is modified by appropriate ticks
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @param[in] src_frm_rate
+* source frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264_time_stamp_update_frame_rate(time_stamp_t *ps_time_stamp,
+ UWORD32 src_frm_rate)
+{
+ ps_time_stamp->u4_vop_time_incr = (ps_time_stamp->u4_vop_time_incr_res * 1000) / src_frm_rate;/* Since frm rate is in millisec */
+}
diff --git a/encoder/ih264e_time_stamp.h b/encoder/ih264e_time_stamp.h
new file mode 100755
index 0000000..1ee559d
--- /dev/null
+++ b/encoder/ih264e_time_stamp.h
@@ -0,0 +1,498 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_time_stamp.h
+*
+* @brief
+* This file contains function declarations used for managing input and output
+* frame time stamps
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_TIME_STAMP_H_
+#define IH264E_TIME_STAMP_H_
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+ * Parameters for Src/Tgt frames that are encoded
+ */
+typedef struct frame_time_t
+{
+ /* common time base(=LCM) between source and target frame rate (in ticks)*/
+ WORD32 common_time_base;
+
+ /* number of ticks between two source frames */
+ UWORD32 u4_src_frm_time_incr;
+
+ /* number of ticks between two target frames */
+ UWORD32 u4_tgt_frm_time_incr;
+
+ /* Source frame time - measured as modulo of common time base
+ and incremented by src_frm_time_incr */
+ UWORD32 u4_src_frm_time;
+
+ /* Target frame time - measured as modulo of common time base
+ and incremented by tgt_frm_time_incr */
+ UWORD32 u4_tgt_frm_time;
+
+ /* Number of frames not to be skipped while maintaining
+ tgt_frm_rate due to delta_time_stamp */
+ UWORD32 u4_num_frms_dont_skip;
+}frame_time_t;
+
+typedef struct frame_time_t *frame_time_handle;
+
+/**
+ * Parameters that go in the bitstream based on tgt_frm_rate
+ * 1) Initialize the vop_time_incr_res with the max_frame_rate (in frames per 1000 bits)
+ * - To represent all kinds of frame rates
+ * 2) Decide the vop_time_incr based on the source frame rate
+ * - The decoder would like to know which source frame is encoded i.e. the source time
+ * id of the target frame encoded and there by adjusting its time of delay
+ * 3) vop_time increments every source frame and whenever a frame is encoded (target frame),
+ * the encoder queries the vop time of the source frame and sends it in the bit stream.
+ * 4) Since the Source frame skip logic is taken care by the frame_time module, whenever the
+ * encoder queries the time stamp module (which gets updated outside the encoder) the
+ * time stamp module would have the source time
+ */
+typedef struct time_stamp_t
+{
+ /*vop_time_incr_res is a integer that indicates
+ the number of evenly spaced subintervals, called ticks,
+ within one modulo time. */
+ UWORD32 u4_vop_time_incr_res;
+
+ /* number of bits to represent vop_time_incr_res */
+ UWORD32 u4_vop_time_incr_range;
+
+ /* The number of ticks elapsed between two source vops */
+ UWORD32 u4_vop_time_incr;
+
+ /* incremented by vop_time_incr for every source frame.
+ Represents the time offset after a modulo_time_base = 1 is sent
+ in bit stream*/
+ UWORD32 u4_vop_time;
+
+ /* A temporary buffer to copy of vop time and modulo time base
+ is stored since update is called before query (get time stamp) and
+ so these extra variables cur_tgt_vop_time, */
+ UWORD32 u4_cur_tgt_vop_time;
+
+ UWORD32 u4_prev_tgt_vop_time;
+
+ /* This variable is set to 1 if we scale max frame rate by a factor of 2.
+ For mpeg4 standard, we just have 16bits and we can't accommodate more than 60000 as frame rate.
+ So we scale it and work with it */
+ WORD32 is_max_frame_rate_scaled;
+} time_stamp_t;
+
+typedef struct time_stamp_t *time_stamp_handle;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to init frame time context
+*
+* @par Description
+* Frame time structure stores the time of the source and the target frames to
+* be encoded. Based on the time we decide whether or not to encode the source
+* frame
+*
+* @param[in] ps_frame_time
+* Pointer Frame time context
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+* Target frame rate
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_frame_time(frame_time_t *ps_frame_time,
+ UWORD32 u4_src_frm_rate,
+ UWORD32 u4_tgt_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to check if frame can be skipped
+*
+* @par Description
+* Based on the source and target frame time and the delta time stamp
+* we decide whether to code the source or not.
+* This is based on the assumption
+* that the source frame rate is greater that target frame rate.
+* Updates the time_stamp structure
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] u4_delta_time_stamp
+* Time stamp difference between frames
+*
+* @param[out] pu4_frm_not_skipped_for_dts
+* Flag to indicate if frame is already skipped by application
+*
+* @returns
+* Flag to skip frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_should_src_be_skipped(frame_time_t *ps_frame_time,
+ UWORD32 u4_delta_time_stamp,
+ UWORD32 *pu4_frm_not_skipped_for_dts);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize time stamp context
+*
+* @par Description
+* Time stamp structure stores the time stamp data that
+* needs to be sent in to the header of MPEG4. Based on the
+* max target frame rate the vop_time increment resolution is set
+* so as to support all the frame rates below max frame rate.
+* A support till the third decimal point is assumed.
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @param[in] u4_max_frm_rate
+* Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_time_stamp(time_stamp_handle time_stamp,
+ UWORD32 max_frm_rate,
+ UWORD32 src_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update time stamp context
+*
+* @par Description
+* Vop time is incremented by increment value. When vop time goes
+* more than the vop time resolution set the modulo time base to
+* 1 and reduce the vop time by vop time resolution so that the
+* excess value is present in vop time and get accumulated over time
+* so that the corresponding frame rate is achieved at a average of
+* 1000 seconds
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_time_stamp(time_stamp_handle time_stamp);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to init frame time memtabs
+*
+* @par Description
+* Function to init frame time memtabs
+*
+* @param[in] pps_frame_time
+* Pointer to frame time contexts
+*
+* @param[in] ps_memtab
+* Pointer to memtab
+*
+* @param[in] e_func_type
+* Function type (get memtabs/init memtabs)
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_init_free_memtab(frame_time_handle *pps_frame_time,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize time stamp memtabs
+*
+* @par Description
+* Function to initialize time stamp memtabs
+*
+* @param[in] pps_time_stamp
+* Pointer to time stamp context
+*
+* @param[in] ps_memtab
+* Pointer to memtab
+*
+* @param[in] e_func_type
+* Funcion type (Get memtab/ init memtab)
+*
+* @returns
+* number of memtabs used
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_time_stamp_get_init_free_memtab(time_stamp_handle *pps_time_stamp,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/****************************************************************************
+ Run-Time Modifying functions
+****************************************************************************/
+/**
+*******************************************************************************
+*
+* @brief Function to get source frame rate
+*
+* @par Description
+* Function to get source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* source frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_frame_rate(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target frame rate
+*
+* @par Description
+* Function to get target frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* target frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_frame_rate(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get source time increment
+*
+* @par Description
+* Function to get source time increment
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* source time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_ticks(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target time increment
+*
+* @par Description
+* Function to get target time increment
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* target time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_ticks(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get src frame time
+*
+* @par Description
+* Function to get src frame time
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* src frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_time(frame_time_t *frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get tgt frame time
+*
+* @par Description
+* Function to get tgt frame time
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* tgt frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_time(frame_time_t *frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update source frame time with a new source frame rate
+*
+* @par Description
+* Function to update source frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @param[in] src_frm_rate
+* source frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_src_frame_rate(frame_time_t *ps_frame_time, WORD32 src_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+* Function to update target frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @param[in] tgt_frm_rate
+* target frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_tgt_frame_rate(frame_time_t *ps_frame_time, WORD32 tgt_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+* When the frame rate changes the time increment is modified by appropriate ticks
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @param[in] src_frm_rate
+* source frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264_time_stamp_update_frame_rate(time_stamp_t *ps_time_stamp, UWORD32 src_frm_rate);
+
+#endif /*IH264E_TIME_STAMP_H_*/
+
diff --git a/encoder/ih264e_trace.h b/encoder/ih264e_trace.h
new file mode 100755
index 0000000..8134524
--- /dev/null
+++ b/encoder/ih264e_trace.h
@@ -0,0 +1,161 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_trace.h
+*
+* @brief
+* This file contains extern declarations of routines that could be helpful
+* for debugging purposes.
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_TRACE_H_
+#define IH264E_TRACE_H_
+
+#if ENABLE_TRACE
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Data for the trace functionality
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * fp
+ */
+ FILE *fp;
+}enc_trace_t;
+
+/*****************************************************************************/
+/* Extern variable declarations */
+/*****************************************************************************/
+extern enc_trace_t g_enc_trace;
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief defines flag used for enabling trace
+******************************************************************************
+ */
+
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to print trace messages
+******************************************************************************
+ */
+
+#define ENTROPY_TRACE(syntax_string, value) \
+ { \
+ if(g_enc_trace.fp) \
+ { \
+ fprintf( g_enc_trace.fp, "%-40s : %d\n", syntax_string, value ); \
+ fflush ( g_enc_trace.fp); \
+ } \
+ }
+
+
+/**
+******************************************************************************
+ * @brief Macro to print CABAC trace messages
+******************************************************************************
+ */
+
+#define AEV_TRACE(string, value, range) \
+ if(range && g_enc_trace.fp) \
+ { \
+ fprintf( g_enc_trace.fp, "%-40s:%8d R:%d\n", string, value, range); \
+ fflush ( g_enc_trace.fp); \
+ }
+
+#else
+
+/* Dummy macros when trace is disabled */
+#define ENTROPY_TRACE(syntax_string, value)
+
+#define AEV_TRACE(string, value, range)
+
+#endif
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+
+/**
+******************************************************************************
+*
+* @brief Dummy trace init when trace is disabled in encoder
+*
+* @par Description
+* This routine needs to be called at start of trace
+*
+* @param[in] pu1_file_name
+* Name of file where trace outputs need to be stores (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+extern WORD32 ih264e_trace_init
+ (
+ const char *pu1_file_name
+ );
+
+/**
+******************************************************************************
+*
+* @brief Dummy trace de-init function when trace is disabled
+*
+* @par Description
+* This routine needs to be called at end of trace
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+extern WORD32 ih264e_trace_deinit
+ (
+ void
+ );
+
+#endif // IH264E_TRACE_H_
diff --git a/encoder/ih264e_trace_support.h b/encoder/ih264e_trace_support.h
new file mode 100755
index 0000000..c35bd4f
--- /dev/null
+++ b/encoder/ih264e_trace_support.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_trace_support.h
+*
+* @brief
+* This file contains extern declarations of routines that could be helpful
+* for debugging purposes.
+*
+* @author
+* Harish
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef TRACE_SUPPORT_H_
+#define TRACE_SUPPORT_H_
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+typedef struct
+{
+ WORD8 * pu1_buf;
+ WORD32 i4_offset;
+ WORD32 i4_max_size;
+}trace_support_t;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+void init_trace_support(WORD8 *pu1_buf, WORD32 i4_size);
+
+int trace_printf(const WORD8 *format, ...);
+
+#endif // TRACE_SUPPORT_H_
diff --git a/encoder/ih264e_utils.c b/encoder/ih264e_utils.c
new file mode 100755
index 0000000..f0086cb
--- /dev/null
+++ b/encoder/ih264e_utils.c
@@ -0,0 +1,1804 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_utils.c
+*
+* @brief
+* Contains miscellaneous utility functions used by the encoder
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_get_min_level()
+* - ih264e_get_lvl_idx()
+* - ih264e_get_dpb_size()
+* - ih264e_get_total_pic_buf_size()
+* - ih264e_get_pic_mv_bank_size()
+* - ih264e_pic_buf_mgr_add_bufs()
+* - ih264e_mv_buf_mgr_add_bufs()
+* - ih264e_init_quant_params()
+* - ih264e_init_air_map()
+* - ih264e_codec_init()
+* - ih264e_pic_init()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* system include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* user include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+#include "ih264_common_tables.h"
+#include "ih264_debug.h"
+#include "ih264_trans_data.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "ih264e_config.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264_list.h"
+#include "ih264e_encode_header.h"
+#include "ih264e_me.h"
+#include "ime_defs.h"
+#include "ime.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_core_coding.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_debug.h"
+#include "ih264e_process.h"
+#include "ih264e_master.h"
+#include "irc_rate_control_api.h"
+#include "ime_statistics.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get minimum level index for a given picture size
+*
+* @par Description:
+* Gets the minimum level index and then gets corresponding level.
+* Also used to ignore invalid levels like 2.3, 3.3 etc
+*
+* @param[in] level
+* Level of the stream
+*
+* @returns Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_min_level(WORD32 pic_size)
+{
+ WORD32 lvl_idx = MAX_LEVEL, i;
+
+ for (i = 0; i < MAX_LEVEL; i++)
+ {
+ if (pic_size <= gai4_ih264_max_luma_pic_size[i])
+ {
+ lvl_idx = i;
+ break;
+ }
+ }
+
+ return gai4_ih264_levels[lvl_idx];
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get level index for a given level
+*
+* @par Description:
+* Converts from level_idc (which is multiplied by 30) to an index that can be
+* used as a lookup. Also used to ignore invalid levels like 2.2 , 3.2 etc
+*
+* @param[in] level
+* Level of the stream
+*
+* @returns Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_lvl_idx(WORD32 level)
+{
+ WORD32 lvl_idx = 0;
+
+ if (level < IH264_LEVEL_11)
+ {
+ lvl_idx = 0;
+ }
+ else if (level < IH264_LEVEL_12)
+ {
+ lvl_idx = 1;
+ }
+ else if (level < IH264_LEVEL_13)
+ {
+ lvl_idx = 2;
+ }
+ else if (level < IH264_LEVEL_20)
+ {
+ lvl_idx = 3;
+ }
+ else if (level < IH264_LEVEL_21)
+ {
+ lvl_idx = 4;
+ }
+ else if (level < IH264_LEVEL_22)
+ {
+ lvl_idx = 5;
+ }
+ else if (level < IH264_LEVEL_30)
+ {
+ lvl_idx = 6;
+ }
+ else if (level < IH264_LEVEL_31)
+ {
+ lvl_idx = 7;
+ }
+ else if (level < IH264_LEVEL_32)
+ {
+ lvl_idx = 8;
+ }
+ else if (level < IH264_LEVEL_40)
+ {
+ lvl_idx = 9;
+ }
+ else if (level < IH264_LEVEL_41)
+ {
+ lvl_idx = 10;
+ }
+ else if (level < IH264_LEVEL_42)
+ {
+ lvl_idx = 11;
+ }
+ else if (level < IH264_LEVEL_50)
+ {
+ lvl_idx = 12;
+ }
+
+ return (lvl_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief returns maximum number of pictures allowed in dpb for a given level
+*
+* @par Description:
+* For given width, height and level, number of pictures allowed in decoder
+* picture buffer is computed as per Annex A.3.1
+*
+* @param[in] level
+* level of the bit-stream
+*
+* @param[in] pic_size
+* width * height
+*
+* @returns Number of buffers in DPB
+*
+* @remarks
+* From annexure A.3.1 of H264 specification,
+* max_dec_frame_buffering <= MaxDpbSize, where MaxDpbSize is equal to
+* Min( 1024 * MaxDPB / ( PicWidthInMbs * FrameHeightInMbs * 384 ), 16 ) and
+* MaxDPB is given in Table A-1 in units of 1024 bytes. However the MaxDPB size
+* presented in the look up table gas_ih264_lvl_tbl is in units of 512
+* bytes. Hence the expression is modified accordingly.
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_dpb_size(WORD32 level, WORD32 pic_size)
+{
+ /* dpb size */
+ WORD32 max_dpb_size_bytes = 0;
+
+ /* dec frame buffering */
+ WORD32 max_dpb_size_frames = 0;
+
+ /* temp var */
+ WORD32 i;
+
+ /* determine max luma samples */
+ for (i = 0; i < 16; i++)
+ if (level == (WORD32)gas_ih264_lvl_tbl[i].u4_level_idc)
+ max_dpb_size_bytes = gas_ih264_lvl_tbl[i].u4_max_dpb_size;
+
+ /* from Annexure A.3.1 h264 specification */
+ max_dpb_size_frames =
+ MIN( 1024 * max_dpb_size_bytes / ( pic_size * 3 ), MAX_DPB_SIZE );
+
+ return max_dpb_size_frames;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get reference picture buffer size for a given level and
+* and padding used
+*
+* @par Description:
+* Used to get reference picture buffer size for a given level and padding used
+* Each picture is padded on all four sides
+*
+* @param[in] pic_size
+* Number of luma samples (Width * Height)
+*
+* @param[in] level
+* Level
+*
+* @param[in] horz_pad
+* Total padding used in horizontal direction
+*
+* @param[in] vert_pad
+* Total padding used in vertical direction
+*
+* @returns Total picture buffer size
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size,
+ WORD32 level,
+ WORD32 horz_pad,
+ WORD32 vert_pad,
+ WORD32 num_ref_frames,
+ WORD32 num_reorder_frames)
+{
+ WORD32 size;
+ WORD32 num_luma_samples;
+ WORD32 lvl_idx;
+ WORD32 max_wd, min_ht;
+ WORD32 num_samples;
+ WORD32 max_num_bufs;
+ WORD32 pad = MAX(horz_pad, vert_pad);
+ UNUSED(pic_size);
+ /*
+ * If num_ref_frames and num_reorder_frmaes is specified
+ * Use minimum value
+ */
+ max_num_bufs = (num_ref_frames + num_reorder_frames + MAX_CTXT_SETS);
+
+ /* Get level index */
+ lvl_idx = ih264e_get_lvl_idx(level);
+
+ /* Maximum number of luma samples in a picture at given level */
+ num_luma_samples = gai4_ih264_max_luma_pic_size[lvl_idx];
+
+ /* Account for chroma */
+ num_samples = num_luma_samples * 3 / 2;
+
+ /* Maximum width of luma samples in a picture at given level */
+ max_wd = gai4_ih264_max_wd_ht[lvl_idx];
+
+ /* Minimum height of luma samples in a picture at given level */
+ min_ht = gai4_ih264_min_wd_ht[lvl_idx];
+
+ /* Allocation is required for
+ * (Wd + horz_pad) * (Ht + vert_pad) * (2 * max_dpb_size + 1)
+ *
+ * Above expanded as
+ * ((Wd * Ht) + (horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1)
+ * (Wd * Ht) * (2 * max_dpb_size + 1) + ((horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1)
+ * Now max_dpb_size increases with smaller Wd and Ht, but Wd * ht * max_dpb_size will still be lesser or equal to max_wd * max_ht * dpb_size
+ *
+ * In the above equation (Wd * Ht) * (2 * max_dpb_size + 1) is accounted by using num_samples * (2 * max_dpb_size + 1) below
+ *
+ * For the padded area use MAX(horz_pad, vert_pad) as pad
+ * ((pad * pad) + pad * (Wd + Ht)) * (2 * max_dpb_size + 1) has to accounted from the above for padding
+ *
+ * Since Width and Height can change worst Wd + Ht is when One of the dimensions is max and other is min
+ * So use max_wd and min_ht
+ */
+
+ /* Number of bytes in reference pictures */
+ size = num_samples * max_num_bufs;
+
+ /* Account for padding area */
+ size += ((pad * pad) + pad * (max_wd + min_ht)) * max_num_bufs;
+
+ return size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Returns MV bank buffer size for a given number of luma samples
+*
+* @par Description:
+* For given number of luma samples one MV bank size is computed.
+* Each MV bank includes pu_map and enc_pu_t for all the min PUs(4x4) in a picture
+*
+* @param[in] num_luma_samples
+* Max number of luma pixels in the frame
+*
+* @returns Total MV Bank size
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples)
+{
+ /* mv bank buffer size */
+ WORD32 mv_bank_size = 0;
+
+ /* number of sub mb partitions possible */
+ WORD32 num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+
+ /* number of mbs */
+ WORD32 num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
+
+ /* Size for storing enc_pu_t start index each MB */
+ /* One extra entry is needed to compute number of PUs in the last MB */
+ mv_bank_size += num_mb * sizeof(WORD32);
+
+ /* Size for pu_map */
+ mv_bank_size += num_pu;
+
+ /* Size for storing enc_pu_t for each PU */
+ mv_bank_size += num_pu * sizeof(enc_pu_t);
+
+ return mv_bank_size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize ps_pic_buf structs add pic buffers to
+* buffer manager in case of non-shared mode
+*
+* @par Description:
+* Function to initialize ps_pic_buf structs add pic buffers to
+* buffer manager in case of non-shared mode
+* To be called once per stream or for every reset
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_buf_mgr_add_bufs(codec_t *ps_codec)
+{
+ /* error status */
+ IH264E_ERROR_T ret = IH264E_SUCCESS;
+
+ /* max ref buffer cnt */
+ WORD32 max_num_bufs = ps_codec->i4_ref_buf_cnt;
+
+ /* total size for pic buffers */
+ WORD32 pic_buf_size_allocated = ps_codec->i4_total_pic_buf_size
+ - BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
+
+ /* temp var */
+ UWORD8 *pu1_buf = (UWORD8 *) ps_codec->ps_pic_buf;
+ pic_buf_t *ps_pic_buf = (pic_buf_t *) ps_codec->ps_pic_buf;
+ WORD32 i;
+
+ pu1_buf += BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
+
+ /* In case of non-shared mode, add picture buffers to buffer manager
+ * In case of shared mode, buffers are added in the run-time
+ */
+ {
+ WORD32 buf_ret;
+
+ WORD32 luma_samples = (ps_codec->i4_rec_strd)
+ * (ps_codec->s_cfg.u4_ht + PAD_HT);
+
+ WORD32 chroma_samples = luma_samples >> 1;
+
+ /* Try and add as many buffers as possible for the memory that is allocated */
+ /* If the number of buffers that can be added is less than max_num_bufs
+ * return with an error */
+ for (i = 0; i < max_num_bufs; i++)
+ {
+ pic_buf_size_allocated -= (luma_samples + chroma_samples);
+
+ if (pic_buf_size_allocated < 0)
+ {
+ ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_PICBUF;
+ return IH264E_INSUFFICIENT_MEM_PICBUF;
+ }
+
+ ps_pic_buf->pu1_luma = pu1_buf + ps_codec->i4_rec_strd * PAD_TOP
+ + PAD_LEFT;
+ pu1_buf += luma_samples;
+
+ ps_pic_buf->pu1_chroma = pu1_buf
+ + ps_codec->i4_rec_strd * (PAD_TOP / 2)+ PAD_LEFT;
+ pu1_buf += chroma_samples;
+
+ buf_ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_ref_buf_mgr,
+ ps_pic_buf, i);
+
+ if (0 != buf_ret)
+ {
+ ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR;
+ return IH264E_BUF_MGR_ERROR;
+ }
+ pu1_buf += (HPEL_PLANES_CNT - 1) * (chroma_samples + luma_samples);
+ ps_pic_buf++;
+ }
+ }
+
+ return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to add buffers to MV Bank buffer manager
+*
+* @par Description:
+* Function to add buffers to MV Bank buffer manager. To be called once per
+* stream or for every reset
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+ IH264_ERROR_T ret;
+
+ /* max dpb size in frames */
+ WORD32 max_dpb_size = 0;
+
+ /* mv bank size for the entire dpb */
+ WORD32 mv_bank_size_allocated = 0;
+
+ /* mv bank size per pic */
+ WORD32 pic_mv_bank_size = 0;
+
+ /* mv buffer ptr */
+ mv_buf_t *ps_mv_buf = NULL;
+
+ /* num of luma samples */
+ WORD32 num_luma_samples = ALIGN16(ps_codec->s_cfg.u4_wd)
+ * ALIGN16(ps_codec->s_cfg.u4_ht);
+
+ /* number of mb's & frame partitions */
+ WORD32 num_pu, num_mb;
+
+ /* temp var */
+ UWORD8 *pu1_buf = NULL;
+ WORD32 i;
+
+ /* Compute the number of MB Bank buffers needed */
+ max_dpb_size = ps_codec->i4_ref_buf_cnt;
+
+ /* allocate memory for mv buffer array */
+ ps_codec->ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
+ pu1_buf = ps_codec->pv_mv_bank_buf_base;
+ pu1_buf += BUF_MGR_MAX_CNT * sizeof(mv_buf_t);
+
+ /********************************************************************/
+ /* allocate memory for individual elements of mv buffer ptr */
+ /********************************************************************/
+ mv_bank_size_allocated = ps_codec->i4_total_mv_bank_size
+ - (BUF_MGR_MAX_CNT * sizeof(mv_buf_t));
+
+ /* compute MV bank size per picture */
+ pic_mv_bank_size = ih264e_get_pic_mv_bank_size(num_luma_samples);
+
+ num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+ num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
+ i = 0;
+ ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
+
+ while (i < max_dpb_size)
+ {
+ mv_bank_size_allocated -= pic_mv_bank_size;
+
+ if (mv_bank_size_allocated < 0)
+ {
+ ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_MVBANK;
+
+ error_status = IH264E_INSUFFICIENT_MEM_MVBANK;
+
+ return error_status;
+ }
+
+ ps_mv_buf->pu4_mb_pu_cnt = (UWORD32 *) pu1_buf;
+
+ ps_mv_buf->pu1_pic_pu_map = (pu1_buf + num_mb * sizeof(WORD32));
+
+ ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf + num_mb * sizeof(WORD32)
+ + num_pu);
+
+ ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
+ ps_mv_buf, i);
+
+ if (IH264_SUCCESS != ret)
+ {
+ ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR;
+ error_status = IH264E_BUF_MGR_ERROR;
+ return error_status;
+ }
+
+ pu1_buf += pic_mv_bank_size;
+ ps_mv_buf++;
+ i++;
+ }
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to initialize quant params structure
+*
+* @par Description:
+* The forward quantization modules depends on qp/6, qp mod 6, forward scale
+* matrix, forward threshold matrix, weight list. The inverse quantization
+* modules depends on qp/6, qp mod 6, inverse scale matrix, weight list.
+* These params are initialized in this function.
+*
+* @param[in] ps_proc
+* pointer to process context
+*
+* @param[in] qp
+* quantization parameter
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_quant_params(process_ctxt_t *ps_proc, int qp)
+{
+ /* quant params */
+ quant_params_t *ps_qp_params;
+
+ /* ptr to forward quant threshold matrix */
+ const UWORD16 *pu2_thres_mat = NULL;
+
+ /* ptr to forward scale matrix */
+ const UWORD16 *pu2_scale_mat = gu2_quant_scale_matrix_4x4;
+
+ /* ptr to inverse scale matrix */
+ const UWORD16 *pu2_iscale_mat = gau2_ih264_iquant_scale_matrix_4x4;
+
+ /* temp var */
+ UWORD32 u4_qp[3], u4_qp_div6, u4_qp_mod6;
+ COMPONENT_TYPE plane;
+ WORD32 i;
+ UWORD32 u4_satdq_t;
+ const UWORD16 *pu2_smat;
+
+ /********************************************************************/
+ /* init quant params for all planes Y, U and V */
+ /********************************************************************/
+ /* luma qp */
+ u4_qp[Y] = qp;
+
+ /* chroma qp
+ * TODO_LATER : just in case if the chroma planes use different qp's this
+ * needs to be corrected accordingly.
+ */
+ u4_qp[U] = gu1_qpc_fqpi[qp];
+ u4_qp[V] = gu1_qpc_fqpi[qp];
+
+ plane = Y;
+ while (plane <= V)
+ {
+ u4_qp_div6 = (u4_qp[plane] / 6);
+ u4_qp_mod6 = (u4_qp[plane] % 6);
+
+ ps_qp_params = ps_proc->ps_qp_params[plane];
+
+ /* mb qp */
+ ps_qp_params->u1_mb_qp = u4_qp[plane];
+
+ /* mb qp / 6 */
+ ps_qp_params->u1_qp_div = u4_qp_div6;
+
+ /* mb qp % 6 */
+ ps_qp_params->u1_qp_rem = u4_qp_mod6;
+
+ /* QP bits */
+ ps_qp_params->u1_qbits = QP_BITS_h264_4x4 + u4_qp_div6;
+
+ /* forward scale matrix */
+ ps_qp_params->pu2_scale_mat = pu2_scale_mat + (u4_qp_mod6 * 16);
+
+ /* threshold matrix & weight for quantization */
+ pu2_thres_mat = gu2_forward_quant_threshold_4x4 + (u4_qp_mod6 * 16);
+ for (i = 0; i < 16; i++)
+ {
+ ps_qp_params->pu2_thres_mat[i] = pu2_thres_mat[i]
+ >> (8 - u4_qp_div6);
+ ps_qp_params->pu2_weigh_mat[i] = 16;
+ }
+
+ /* qp dependent rounding constant */
+ ps_qp_params->u4_dead_zone =
+ gu4_forward_quant_round_factor_4x4[u4_qp_div6];
+
+ /* slice dependent rounding constant */
+ if (ps_proc->i4_slice_type != ISLICE
+ && ps_proc->i4_slice_type != SISLICE)
+ {
+ ps_qp_params->u4_dead_zone >>= 1;
+ }
+
+ /* SATQD threshold for zero block prediction */
+ if (ps_proc->ps_codec->s_cfg.u4_enable_satqd)
+ {
+ pu2_smat = ps_qp_params->pu2_scale_mat;
+
+ u4_satdq_t = ((1 << (ps_qp_params->u1_qbits)) - ps_qp_params->u4_dead_zone);
+
+ ps_qp_params->pu2_sad_thrsh[0] = u4_satdq_t / MAX(pu2_smat[3], pu2_smat[11]);
+ ps_qp_params->pu2_sad_thrsh[1] = u4_satdq_t / MAX(pu2_smat[1], pu2_smat[9]);
+ ps_qp_params->pu2_sad_thrsh[2] = u4_satdq_t / pu2_smat[15];
+ ps_qp_params->pu2_sad_thrsh[3] = u4_satdq_t / pu2_smat[7];
+ ps_qp_params->pu2_sad_thrsh[4] = u4_satdq_t / MAX(pu2_smat[12], pu2_smat[14]);
+ ps_qp_params->pu2_sad_thrsh[5] = u4_satdq_t / MAX(pu2_smat[4], pu2_smat[6]);
+ ps_qp_params->pu2_sad_thrsh[6] = u4_satdq_t / pu2_smat[13];
+ ps_qp_params->pu2_sad_thrsh[7] = u4_satdq_t / pu2_smat[5];
+ ps_qp_params->pu2_sad_thrsh[8] = u4_satdq_t / MAX(MAX3(pu2_smat[0], pu2_smat[2], pu2_smat[8]), pu2_smat[10]);
+ }
+
+ /* inverse scale matrix */
+ ps_qp_params->pu2_iscale_mat = pu2_iscale_mat + (u4_qp_mod6 * 16);
+
+ plane += 1;
+ }
+ return ;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Initialize AIR mb frame Map
+*
+* @par Description:
+* Initialize AIR mb frame map
+* MB frame map indicates which frame an Mb should be coded as intra according to AIR
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_air_map(codec_t *ps_codec)
+{
+ /* intra refresh map */
+ UWORD16 *pu2_intr_rfrsh_map = ps_codec->pu2_intr_rfrsh_map;
+
+ /* air mode */
+ IVE_AIR_MODE_T air_mode = ps_codec->s_cfg.e_air_mode;
+
+ /* refresh period */
+ UWORD32 air_period = ps_codec->s_cfg.u4_air_refresh_period;
+
+ /* mb cnt */
+ UWORD32 u4_mb_cnt = ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs;
+
+ /* temp var */
+ UWORD32 curr_mb, seed_rand = 1;
+
+ switch (air_mode)
+ {
+ case IVE_AIR_MODE_CYCLIC:
+
+ for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++)
+ {
+ pu2_intr_rfrsh_map[curr_mb] = curr_mb % air_period;
+ }
+ break;
+
+ case IVE_AIR_MODE_RANDOM:
+
+ for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++)
+ {
+ seed_rand = (seed_rand * 32719 + 3) % 32749;
+ pu2_intr_rfrsh_map[curr_mb] = seed_rand % air_period;
+ }
+ break;
+
+ default:
+
+ break;
+ }
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Codec level initializations
+*
+* @par Description:
+* Initializes the codec with parameters that needs to be set before encoding
+* first frame
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_inp_buf
+* Pointer to input buffer context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec)
+{
+ /********************************************************************
+ * INITIALIZE CODEC CONTEXT *
+ ********************************************************************/
+ /* encoder presets */
+ if (ps_codec->s_cfg.u4_enc_speed_preset != IVE_CONFIG)
+ {
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+ {/* high quality */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+ ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 1;
+ ps_codec->luma_energy_compaction[1] =
+ ih264e_code_luma_intra_macroblock_4x4_rdopt_on;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_NORMAL)
+ {/* normal */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+ ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 1;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
+ {/* normal */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+ ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 1;
+ }
+ else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_HIGH_SPEED)
+ {/* fast */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+ ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 0;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
+ {/* fastest */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 0;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 1;
+ }
+ }
+
+ /*****************************************************************
+ * Initialize AIR inside codec
+ *****************************************************************/
+ if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode)
+ {
+ ih264e_init_air_map(ps_codec);
+
+ ps_codec->i4_air_pic_cnt = -1;
+ }
+
+ /****************************************************/
+ /* INITIALIZE RATE CONTROL */
+ /****************************************************/
+ {
+ /* init qp */
+ UWORD8 au1_init_qp[MAX_PIC_TYPE];
+
+ /* min max qp */
+ UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
+
+ /* init i,p,b qp */
+ au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
+ au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
+ au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
+
+ /* init min max qp */
+ au1_min_max_qp[2 * I_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
+ au1_min_max_qp[2 * I_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
+
+ au1_min_max_qp[2 * P_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
+ au1_min_max_qp[2 * P_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
+
+ au1_min_max_qp[2 * B_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
+ au1_min_max_qp[2 * B_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
+
+ /* get rc mode */
+ switch (ps_codec->s_cfg.e_rc_mode)
+ {
+ case IVE_RC_STORAGE:
+ ps_codec->s_rate_control.e_rc_type = VBR_STORAGE;
+ break;
+ case IVE_RC_CBR_NON_LOW_DELAY:
+ ps_codec->s_rate_control.e_rc_type = CBR_NLDRC;
+ break;
+ case IVE_RC_CBR_LOW_DELAY:
+ ps_codec->s_rate_control.e_rc_type = CBR_LDRC;
+ break;
+ case IVE_RC_NONE:
+ ps_codec->s_rate_control.e_rc_type = CONST_QP;
+ break;
+ default:
+ break;
+ }
+
+ /* init rate control */
+ ih264e_rc_init(ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_rate_control.pps_frame_time,
+ ps_codec->s_rate_control.pps_time_stamp,
+ ps_codec->s_rate_control.pps_pd_frm_rate,
+ ps_codec->s_cfg.u4_max_framerate,
+ ps_codec->s_cfg.u4_src_frame_rate,
+ ps_codec->s_cfg.u4_tgt_frame_rate,
+ ps_codec->s_rate_control.e_rc_type,
+ ps_codec->s_cfg.u4_target_bitrate,
+ ps_codec->s_cfg.u4_max_bitrate,
+ ps_codec->s_cfg.u4_vbv_buffer_delay,
+ ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp,
+ H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp,
+ ps_codec->s_cfg.u4_max_level);
+ }
+
+ /* src stride */
+ ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd;
+
+ /* recon stride */
+ ps_codec->i4_rec_strd = ALIGN16(ps_codec->s_cfg.u4_max_wd) + PAD_WD;
+
+ /* max ref and reorder cnt */
+ ps_codec->i4_ref_buf_cnt = ps_codec->s_cfg.u4_max_ref_cnt
+ + ps_codec->s_cfg.u4_max_reorder_cnt;
+ ps_codec->i4_ref_buf_cnt += MAX_CTXT_SETS;
+
+ DEBUG_HISTOGRAM_INIT();
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Picture level initializations
+*
+* @par Description:
+* Before beginning to encode the frame, the current function initializes all
+* the ctxts (proc, entropy, me, ...) basing on the input configured params.
+* It locates space for storing recon in the encoder picture buffer set, fetches
+* reference frame from encoder picture buffer set. Calls RC pre-enc to get
+* qp and pic type for the current frame. Queues proc jobs so that
+* the other threads can begin encoding. In brief, this function sets up the
+* tone for the entire encoder.
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_inp_buf
+* Pointer to input buffer context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ /* mv buff bank */
+ mv_buf_t *ps_mv_buf = NULL;
+ WORD32 cur_mv_bank_buf_id;
+
+ /* recon buffer set */
+ pic_buf_t *ps_cur_pic;
+ WORD32 cur_pic_buf_id;
+ UWORD8 *pu1_cur_pic_luma, *pu1_cur_pic_chroma;
+
+ /* ref buffer set */
+ pic_buf_t *ps_ref_pic;
+ WORD32 ref_set_id;
+
+ /* pic time stamp */
+ UWORD32 u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
+ UWORD32 u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
+
+ /* indices to access curr/prev frame info */
+ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+ /* curr pic type */
+ PIC_TYPE_T *pic_type = &ps_codec->pic_type;
+
+ /* should src be skipped */
+ WORD32 *skip_src = &ps_codec->s_rate_control.pre_encode_skip[ctxt_sel];
+
+ /* Diamond search Iteration Max Cnt */
+ UWORD32 u4_num_layers =
+ (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) ?
+ (NUM_LAYERS >> 2) : NUM_LAYERS;
+
+ /* enable fast sad */
+ UWORD32 u4_enable_fast_sad = ps_codec->s_cfg.u4_enable_fast_sad;
+
+ /********************************************************************/
+ /* INITIALIZE CODEC CONTEXT */
+ /********************************************************************/
+
+ /* pre enc rc call */
+ *skip_src = ih264e_set_rc_pic_params(ps_codec,
+ ps_codec->i4_encode_api_call_cnt,
+ (WORD32 *) pic_type);
+ if (*skip_src == 1)
+ {
+ ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_inp_buf =
+ *ps_inp_buf;
+
+ /* inform output bytes generated as zero */
+ ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 0;
+
+ return error_status;
+ }
+
+ /********************************************************************/
+ /* Alternate reference frame */
+ /********************************************************************/
+ if (ps_codec->s_cfg.u4_enable_alt_ref)
+ {
+ if (PIC_IDR == *pic_type || PIC_I == *pic_type)
+ {
+ ps_codec->u4_is_curr_frm_ref = 1;
+ }
+ else
+ {
+ ps_codec->u4_is_curr_frm_ref = 1;
+ if(ps_codec->i4_encode_api_call_cnt % (ps_codec->s_cfg.u4_enable_alt_ref + 1))
+ ps_codec->u4_is_curr_frm_ref = 0;
+ }
+
+ if ((ps_codec->u4_is_curr_frm_ref == 1) || (ps_codec->i4_frame_num < 0))
+ {
+ ps_codec->i4_frame_num++;
+ }
+ }
+ else
+ {
+ ps_codec->u4_is_curr_frm_ref = 1;
+
+ ps_codec->i4_frame_num++;
+ }
+
+ /* slice_type */
+ ps_codec->i4_slice_type = PSLICE;
+
+ if ((PIC_I == *pic_type) || (PIC_IDR == *pic_type))
+ {
+ ps_codec->i4_slice_type = ISLICE;
+ }
+ else if (PIC_P == *pic_type)
+ {
+ ps_codec->i4_slice_type = PSLICE;
+ }
+
+ /* is this an IDR pic */
+ ps_codec->u4_is_idr = 0;
+
+ if (PIC_IDR == *pic_type)
+ {
+ /* set idr flag */
+ ps_codec->u4_is_idr = 1;
+
+ /* reset frame num */
+ ps_codec->i4_frame_num = 0;
+
+ /* idr_pic_id */
+ ps_codec->i4_idr_pic_id++;
+ }
+
+ /* set deblock disable flags based on disable deblock level */
+ ps_codec->i4_disable_deblk_pic = 1;
+
+ if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_0)
+ {
+ /* enable deblocking */
+ ps_codec->i4_disable_deblk_pic = 0;
+ }
+ else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_2)
+ {
+ /* enable deblocking after a period of frames */
+ if (ps_codec->i4_disable_deblk_pic_cnt == DISABLE_DEBLOCK_INTERVAL
+ || ps_codec->i4_slice_type == ISLICE)
+ {
+ ps_codec->i4_disable_deblk_pic = 0;
+ }
+ }
+ else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_3)
+ {
+ if (ps_codec->i4_slice_type == ISLICE)
+ {
+ ps_codec->i4_disable_deblk_pic = 0;
+ }
+ }
+
+ if (ps_codec->i4_disable_deblk_pic)
+ {
+ ps_codec->i4_disable_deblk_pic_cnt++;
+ }
+ else
+ {
+ ps_codec->i4_disable_deblk_pic_cnt = 0;
+ }
+
+ /* In slice mode - lets not deblk mb edges that lie along slice boundaries */
+ if (ps_codec->i4_disable_deblk_pic == 0)
+ {
+ if (ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE)
+ {
+ ps_codec->i4_disable_deblk_pic = 2;
+ }
+ }
+
+ /* error status */
+ ps_codec->i4_error_code = IH264E_SUCCESS;
+
+ /* populate header */
+ if (ps_codec->i4_gen_header)
+ {
+ /* sps */
+ sps_t *ps_sps = NULL;
+
+ /* pps */
+ pps_t *ps_pps = NULL;
+
+ /*ps_codec->i4_pps_id ++;*/
+ ps_codec->i4_pps_id %= MAX_PPS_CNT;
+
+ /*ps_codec->i4_sps_id ++;*/
+ ps_codec->i4_sps_id %= MAX_SPS_CNT;
+
+ /* populate sps header */
+ ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
+ ih264e_populate_sps(ps_codec, ps_sps);
+
+ /* populate pps header */
+ ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
+ ih264e_populate_pps(ps_codec, ps_pps);
+ }
+
+ /* Reference and MV bank Buffer Manager */
+ {
+ /* min pic cnt among the list of pics stored in ref list */
+ WORD32 min_pic_cnt;
+
+ /* max pic cnt among the list of pics stored in ref list */
+ WORD32 max_pic_cnt;
+
+ /* temp var */
+ WORD32 i;
+
+ ps_ref_pic = NULL;
+
+ /* get reference picture when necessary */
+ /* Only nearest picture encoded (max pic cnt) is used as reference */
+ if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I))
+ {
+ max_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt;
+
+ ps_ref_pic = ps_codec->as_ref_set[0].ps_pic_buf;
+
+ /* loop through to get the max pic cnt among the list of pics stored in ref list */
+ for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++)
+ {
+ if (max_pic_cnt < ps_codec->as_ref_set[i].i4_pic_cnt)
+ {
+ max_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt;
+ ps_ref_pic = ps_codec->as_ref_set[i].ps_pic_buf;
+ }
+ }
+ }
+
+ /* get a location at which the curr pic info can be stored for future reference */
+ ref_set_id = -1;
+
+ for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+ {
+ if (-1 == ps_codec->as_ref_set[i].i4_pic_cnt)
+ {
+ ref_set_id = i;
+ break;
+ }
+ }
+
+ /* If all the entries in the ref_set array are filled, then remove the entry with least pic_cnt */
+ if (ref_set_id == -1)
+ {
+ /* pic info */
+ pic_buf_t *ps_cur_pic;
+
+ /* mv info */
+ mv_buf_t *ps_cur_mv_buf;
+
+ ref_set_id = 0;
+ min_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt;
+
+ /* loop through to get the min pic cnt among the list of pics stored in ref list */
+ for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++)
+ {
+ if (min_pic_cnt > ps_codec->as_ref_set[i].i4_pic_cnt)
+ {
+ min_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt;
+ ref_set_id = i;
+ }
+ }
+
+ ps_cur_pic = ps_codec->as_ref_set[ref_set_id].ps_pic_buf;
+
+ ps_cur_mv_buf = ps_codec->as_ref_set[ref_set_id].ps_mv_buf;
+
+ /* release this frame from reference list */
+ ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
+ ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF);
+
+ ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
+ ps_cur_pic->i4_buf_id, BUF_MGR_REF);
+ }
+
+ if (ps_codec->s_cfg.u4_enable_recon)
+ {
+ ret = ih264_buf_mgr_check_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr);
+
+ if (ret != IH264_SUCCESS)
+ {
+ return IH264E_NO_FREE_RECONBUF;
+ }
+ }
+ }
+
+ {
+ /*****************************************************************/
+ /* Get free MV Bank to hold current picture's motion vector data */
+ /* If there are no free buffers then return with an error code. */
+ /* If the buffer is to be freed by another thread, change the */
+ /* following to call thread yield and wait for buffer to be freed*/
+ /*****************************************************************/
+ ps_mv_buf = (mv_buf_t *) ih264_buf_mgr_get_next_free(
+ (buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
+ &cur_mv_bank_buf_id);
+
+ if (NULL == ps_mv_buf)
+ {
+ ps_codec->i4_error_code = IH264E_NO_FREE_MVBANK;
+ return IH264E_NO_FREE_MVBANK;
+ }
+
+ /* mark the buffer as needed for reference if the curr pic is available for ref */
+ if (ps_codec->u4_is_curr_frm_ref)
+ {
+ ih264_buf_mgr_set_status(ps_codec->pv_mv_buf_mgr,
+ cur_mv_bank_buf_id, BUF_MGR_REF);
+ }
+
+ /* Set current ABS poc to ps_mv_buf, so that while freeing a reference buffer
+ * corresponding mv buffer can be found by looping through ps_codec->ps_mv_buf array
+ * and getting a buffer id to free
+ */
+ ps_mv_buf->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt;
+
+ ps_mv_buf->i4_buf_id = cur_mv_bank_buf_id;
+ }
+
+ {
+ /*****************************************************************/
+ /* Get free pic buf to hold current picture's recon data */
+ /* If there are no free buffers then return with an error code. */
+ /* If the buffer is to be freed by another thread, change the */
+ /* following to call thread yield and wait for buffer to be freed*/
+ /*****************************************************************/
+ ps_cur_pic = (pic_buf_t *) ih264_buf_mgr_get_next_free(
+ (buf_mgr_t *) ps_codec->pv_ref_buf_mgr,
+ &cur_pic_buf_id);
+
+ if (NULL == ps_cur_pic)
+ {
+ ps_codec->i4_error_code = IH264E_NO_FREE_PICBUF;
+ return IH264E_NO_FREE_PICBUF;
+ }
+
+ /* mark the buffer as needed for reference if the curr pic is available for ref */
+ if (1 == ps_codec->u4_is_curr_frm_ref)
+ {
+ ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id,
+ BUF_MGR_REF);
+ }
+
+ /* Mark the current buffer as needed for IO if recon is enabled */
+ if (1 == ps_codec->s_cfg.u4_enable_recon)
+ {
+ ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id,
+ BUF_MGR_IO);
+ }
+
+ /* Associate input timestamp with current buffer */
+ ps_cur_pic->u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
+ ps_cur_pic->u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
+
+ ps_cur_pic->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt;
+ ps_cur_pic->i4_poc_lsb = ps_codec->i4_pic_order_cnt_lsb;
+
+ ps_cur_pic->i4_buf_id = cur_pic_buf_id;
+
+ pu1_cur_pic_luma = ps_cur_pic->pu1_luma;
+ pu1_cur_pic_chroma = ps_cur_pic->pu1_chroma;
+ }
+
+ /* in case the current picture is used for reference then add it to the reference set */
+ if (ps_codec->u4_is_curr_frm_ref
+ && ((*pic_type == PIC_IDR) || (*pic_type == PIC_I)
+ || (*pic_type == PIC_P)))
+ {
+ ps_codec->as_ref_set[ref_set_id].i4_pic_cnt = ps_codec->i4_pic_cnt;
+
+ /* TODO: Currently pic_cnt and poc are same - Once frame drops are introduced change appropriately */
+ ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_pic_cnt;
+
+ ps_codec->as_ref_set[ref_set_id].ps_mv_buf = ps_mv_buf;
+
+ ps_codec->as_ref_set[ref_set_id].ps_pic_buf = ps_cur_pic;
+ }
+
+ /********************************************************************/
+ /* INITIALIZE PROCESS CONTEXT */
+ /********************************************************************/
+ {
+ /* temp var */
+ WORD32 i, j = 0;
+
+ /* curr proc ctxt */
+ process_ctxt_t *ps_proc = NULL;
+
+ j = ctxt_sel * MAX_PROCESS_THREADS;
+
+ /* begin init */
+ for (i = j; i < (j + MAX_PROCESS_THREADS); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+
+ /* luma src buffer */
+ if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
+ {
+ ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
+ }
+ else
+ {
+ ps_proc->pu1_src_buf_luma_base =
+ ps_inp_buf->s_raw_buf.apv_bufs[0];
+ }
+
+ /* chroma src buffer */
+ if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE
+ || ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P)
+ {
+ ps_proc->pu1_src_buf_chroma_base =
+ ps_codec->pu1_uv_csc_buf_base;
+ }
+ else
+ {
+ ps_proc->pu1_src_buf_chroma_base =
+ ps_inp_buf->s_raw_buf.apv_bufs[1];
+ }
+
+ /* luma rec buffer */
+ ps_proc->pu1_rec_buf_luma_base = pu1_cur_pic_luma;
+
+ /* chroma rec buffer */
+ ps_proc->pu1_rec_buf_chroma_base = pu1_cur_pic_chroma;
+
+ /* src stride */
+ ps_proc->i4_src_strd = ps_codec->i4_src_strd;
+
+ /* rec stride */
+ ps_proc->i4_rec_strd = ps_codec->i4_rec_strd;
+
+ /* frame num */
+ ps_proc->i4_frame_num = ps_codec->i4_frame_num;
+
+ /* is idr */
+ ps_proc->u4_is_idr = ps_codec->u4_is_idr;
+
+ /* idr pic id */
+ ps_proc->u4_idr_pic_id = ps_codec->i4_idr_pic_id;
+
+ /* slice_type */
+ ps_proc->i4_slice_type = ps_codec->i4_slice_type;
+
+ /* Input width in mbs */
+ ps_proc->i4_wd_mbs = ps_codec->s_cfg.i4_wd_mbs;
+
+ /* Input height in mbs */
+ ps_proc->i4_ht_mbs = ps_codec->s_cfg.i4_ht_mbs;
+
+ /* Half x plane offset from pic buf */
+ ps_proc->u4_half_x_offset = 0;
+
+ /* Half y plane offset from half x plane */
+ ps_proc->u4_half_y_offset = 0;
+
+ /* Half x plane offset from half y plane */
+ ps_proc->u4_half_xy_offset = 0;
+
+ /* top row syntax elements */
+ ps_proc->ps_top_row_mb_syntax_ele =
+ ps_proc->ps_top_row_mb_syntax_ele_base;
+
+ ps_proc->pu1_top_mb_intra_modes =
+ ps_proc->pu1_top_mb_intra_modes_base;
+
+ ps_proc->ps_top_row_pu = ps_proc->ps_top_row_pu_base;
+
+ /* initialize quant params */
+ ps_proc->u4_frame_qp = ps_codec->u4_frame_qp;
+ ps_proc->u4_mb_qp = ps_codec->u4_frame_qp;
+ ih264e_init_quant_params(ps_proc, ps_proc->u4_frame_qp);
+
+ /* previous mb qp*/
+ ps_proc->u4_mb_qp_prev = ps_proc->u4_frame_qp;
+
+ /* Reset frame info */
+ memset(&ps_proc->s_frame_info, 0, sizeof(frame_info_t));
+
+ /* initialize proc, deblk and ME map */
+ if (i == j)
+ {
+ /* row '-1' */
+ memset(ps_proc->pu1_proc_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+ /* row 0 to ht in mbs */
+ memset(ps_proc->pu1_proc_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+ /* row '-1' */
+ memset(ps_proc->pu1_deblk_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+ /* row 0 to ht in mbs */
+ memset(ps_proc->pu1_deblk_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+ /* row '-1' */
+ memset(ps_proc->pu1_me_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+ /* row 0 to ht in mbs */
+ memset(ps_proc->pu1_me_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+ /* at the start of air refresh period, reset intra coded map */
+ if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode)
+ {
+ ps_codec->i4_air_pic_cnt = (ps_codec->i4_air_pic_cnt + 1)
+ % ps_codec->s_cfg.u4_air_refresh_period;
+
+ if (!ps_codec->i4_air_pic_cnt)
+ {
+ memset(ps_proc->pu1_is_intra_coded, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+ }
+ }
+ }
+
+ /* deblock level */
+ ps_proc->u4_disable_deblock_level = ps_codec->i4_disable_deblk_pic;
+
+ /* slice index map */
+ /* no slice */
+ if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_NONE)
+ {
+ memset(ps_proc->pu1_slice_idx, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+ }
+ /* generate slices for every 'n' rows, 'n' is given through slice param */
+ else if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
+ {
+ /* slice idx map */
+ UWORD8 *pu1_slice_idx = ps_proc->pu1_slice_idx;
+
+ /* temp var */
+ WORD32 i4_mb_y = 0, slice_idx = 0, cnt;
+
+ while (i4_mb_y < ps_proc->i4_ht_mbs)
+ {
+ if (i4_mb_y +(WORD32)ps_codec->s_cfg.u4_slice_param < ps_proc->i4_ht_mbs)
+ {
+ cnt = ps_codec->s_cfg.u4_slice_param * ps_proc->i4_wd_mbs;
+ i4_mb_y += ps_codec->s_cfg.u4_slice_param;
+ }
+ else
+ {
+ cnt = (ps_proc->i4_ht_mbs - i4_mb_y) * ps_proc->i4_wd_mbs;
+ i4_mb_y += (ps_proc->i4_ht_mbs - i4_mb_y);
+ }
+ memset(pu1_slice_idx, slice_idx, cnt);
+ slice_idx++;
+ pu1_slice_idx += cnt;
+ }
+ }
+
+ /* Current MV Bank's buffer ID */
+ ps_proc->i4_cur_mv_bank_buf_id = cur_mv_bank_buf_id;
+
+ /* Pointer to current picture buffer structure */
+ ps_proc->ps_cur_pic = ps_cur_pic;
+
+ /* Pointer to current pictures mv buffers */
+ ps_proc->ps_cur_mv_buf = ps_mv_buf;
+
+ /* pointer to ref picture */
+ ps_proc->ps_ref_pic = ps_ref_pic;
+
+ if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I))
+ {
+ /* ref pointer luma */
+ ps_proc->pu1_ref_buf_luma_base = ps_ref_pic->pu1_luma;
+
+ /* ref pointer chroma */
+ ps_proc->pu1_ref_buf_chroma_base = ps_ref_pic->pu1_chroma;
+ }
+
+ /* Structure for current input buffer */
+ ps_proc->s_inp_buf = *ps_inp_buf;
+
+ /* Number of encode frame API calls made */
+ ps_proc->i4_encode_api_call_cnt = ps_codec->i4_encode_api_call_cnt;
+
+ /* Current Picture count */
+ ps_proc->i4_pic_cnt = ps_codec->i4_pic_cnt;
+
+ /* error status */
+ ps_proc->i4_error_code = 0;
+
+ /********************************************************************/
+ /* INITIALIZE ENTROPY CONTEXT */
+ /********************************************************************/
+ {
+ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+ /* start of frame */
+ ps_entropy->i4_sof = 0;
+
+ /* end of frame */
+ ps_entropy->i4_eof = 0;
+
+ /* generate header */
+ ps_entropy->i4_gen_header = ps_codec->i4_gen_header;
+
+ /* sps ref_set_id */
+ ps_entropy->u4_sps_id = ps_codec->i4_sps_id;
+
+ /* sps base */
+ ps_entropy->ps_sps_base = ps_codec->ps_sps_base;
+
+ /* sps id */
+ ps_entropy->u4_pps_id = ps_codec->i4_pps_id;
+
+ /* sps base */
+ ps_entropy->ps_pps_base = ps_codec->ps_pps_base;
+
+ /* slice map */
+ ps_entropy->pu1_slice_idx = ps_proc->pu1_slice_idx;
+
+ /* slice hdr base */
+ ps_entropy->ps_slice_hdr_base = ps_proc->ps_slice_hdr_base;
+
+ /* initialize entropy map */
+ if (i == j)
+ {
+ /* row '-1' */
+ memset(ps_entropy->pu1_entropy_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+ /* row 0 to ht in mbs */
+ memset(ps_entropy->pu1_entropy_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+ }
+
+ /* wd in mbs */
+ ps_entropy->i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ /* ht in mbs */
+ ps_entropy->i4_ht_mbs = ps_proc->i4_ht_mbs;
+
+ /* transform_8x8_mode_flag */
+ ps_entropy->i1_transform_8x8_mode_flag = 0;
+
+ /* entropy_coding_mode_flag */
+ ps_entropy->u1_entropy_coding_mode_flag =
+ ps_codec->s_cfg.u4_entropy_coding_mode;
+
+ /* error code */
+ ps_entropy->i4_error_code = IH264E_SUCCESS;
+
+ /* mb skip run */
+ *(ps_proc->s_entropy.pi4_mb_skip_run) = 0;
+
+ /* last frame to encode */
+ ps_proc->s_entropy.u4_is_last = ps_inp_buf->u4_is_last;
+
+ /* Current Picture count */
+ ps_proc->s_entropy.i4_pic_cnt = ps_codec->i4_pic_cnt;
+
+ /* time stamps */
+ ps_entropy->u4_timestamp_low = u4_timestamp_low;
+ ps_entropy->u4_timestamp_high = u4_timestamp_high;
+
+ /* init frame statistics */
+ ps_entropy->u4_header_bits[MB_TYPE_INTRA] = 0;
+ ps_entropy->u4_header_bits[MB_TYPE_INTER] = 0;
+ ps_entropy->u4_residue_bits[MB_TYPE_INTRA] = 0;
+ ps_entropy->u4_residue_bits[MB_TYPE_INTER] = 0;
+ }
+
+ /********************************************************************/
+ /* INITIALIZE DEBLOCK CONTEXT */
+ /********************************************************************/
+ {
+ /* deblk ctxt */
+ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+ /* slice idx map */
+ ps_deblk->pu1_slice_idx = ps_proc->pu1_slice_idx;
+ }
+
+ /********************************************************************/
+ /* INITIALIZE ME CONTEXT */
+ /********************************************************************/
+ {
+ /* me ctxt */
+ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+ /* srch range x */
+ ps_me_ctxt->ai2_srch_boundaries[0] =
+ ps_codec->s_cfg.u4_srch_rng_x;
+
+ /* srch range y */
+ ps_me_ctxt->ai2_srch_boundaries[1] =
+ ps_codec->s_cfg.u4_srch_rng_y;
+
+ /* src stride */
+ ps_me_ctxt->i4_src_strd = ps_codec->i4_src_strd;
+
+ /* rec stride */
+ ps_me_ctxt->i4_rec_strd = ps_codec->i4_rec_strd;
+
+ /* Half x plane offset from pic buf */
+ ps_me_ctxt->u4_half_x_offset = ps_proc->u4_half_x_offset;
+
+ /* Half y plane offset from half x plane */
+ ps_me_ctxt->u4_half_y_offset = ps_proc->u4_half_y_offset;
+
+ /* Half x plane offset from half y plane */
+ ps_me_ctxt->u4_half_xy_offset = ps_proc->u4_half_xy_offset;
+
+ /* enable fast sad */
+ ps_me_ctxt->u4_enable_fast_sad = u4_enable_fast_sad;
+
+ /* half pel */
+ ps_me_ctxt->u4_enable_hpel = ps_codec->s_cfg.u4_enable_hpel;
+
+ /* Diamond search Iteration Max Cnt */
+ ps_me_ctxt->u4_num_layers = u4_num_layers;
+
+ /* me speed preset */
+ ps_me_ctxt->u4_me_speed_preset =
+ ps_codec->s_cfg.u4_me_speed_preset;
+
+ /* qp */
+ ps_me_ctxt->u1_mb_qp = ps_codec->u4_frame_qp;
+
+ if ((i == 0) && (0 == ps_codec->i4_pic_cnt))
+ {
+ /* init mv bits tables */
+ ih264e_init_mv_bits(ps_me_ctxt);
+ }
+ }
+
+ ps_proc->ps_ngbr_avbl = &(ps_proc->s_ngbr_avbl);
+
+ }
+
+ /* reset encoder header */
+ ps_codec->i4_gen_header = 0;
+ }
+
+ /********************************************************************/
+ /* ADD JOBS TO THE QUEUE */
+ /********************************************************************/
+ {
+ /* job structures */
+ job_t s_job;
+
+ /* temp var */
+ WORD32 i;
+
+ /* job class */
+ s_job.i4_cmd = CMD_PROCESS;
+
+ /* number of mbs to be processed in the current job */
+ s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
+
+ /* job start index x */
+ s_job.i2_mb_x = 0;
+
+ /* proc base idx */
+ s_job.i2_proc_base_idx = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
+
+ for (i = 0; i < (WORD32)ps_codec->s_cfg.i4_ht_mbs; i++)
+ {
+ /* job start index y */
+ s_job.i2_mb_y = i;
+
+ /* queue the job */
+ ret = ih264_list_queue(ps_codec->pv_proc_jobq, &s_job, 1);
+ if (ret != IH264_SUCCESS)
+ {
+ ps_codec->i4_error_code = ret;
+ return IH264E_FAIL;
+ }
+ }
+
+ /* Once all the jobs are queued, terminate the queue */
+ /* Since the threads are created and deleted in each call, terminating
+ here is not an issue */
+ ih264_list_terminate(ps_codec->pv_proc_jobq);
+ }
+
+ return error_status;
+}
diff --git a/encoder/ih264e_utils.h b/encoder/ih264e_utils.h
new file mode 100755
index 0000000..651dad9
--- /dev/null
+++ b/encoder/ih264e_utils.h
@@ -0,0 +1,327 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_utils.h
+*
+* @brief
+* Contains declarations of miscellaneous utility functions used by the encoder
+*
+* @author
+* Harish
+*
+* @par List of Functions:
+* -ih264e_get_min_level()
+* -ih264e_get_lvl_idx()
+* -ih264e_get_dpb_size()
+* -ih264e_get_total_pic_buf_size()
+* -ih264e_get_pic_mv_bank_size()
+* -ih264e_pic_buf_mgr_add_bufs()
+* -ih264e_mv_buf_mgr_add_bufs()
+* -ih264e_init_quant_params()
+* -ih264e_init_air_map()
+* -ih264e_codec_init()
+* -ih264e_pic_init()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_UTILS_H_
+#define IH264E_UTILS_H_
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get minimum level index for a given picture size
+*
+* @par Description:
+* Gets the minimum level index and then gets corresponding level.
+* Also used to ignore invalid levels like 2.3, 3.3 etc
+*
+* @param[in] level
+* Level of the stream
+*
+* @returns Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_min_level(WORD32 pic_size);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get level index for a given level
+*
+* @par Description:
+* Converts from level_idc (which is multiplied by 30) to an index that can be
+* used as a lookup. Also used to ignore invalid levels like 2.2 , 3.2 etc
+*
+* @param[in] level
+* Level of the stream
+*
+* @returns Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_lvl_idx(WORD32 level);
+
+/**
+*******************************************************************************
+*
+* @brief returns maximum number of pictures allowed in dpb for a given level
+*
+* @par Description:
+* For given width, height and level, number of pictures allowed in decoder
+* picture buffer is computed as per Annex A.3.1
+*
+* @param[in] level
+* level of the bit-stream
+*
+* @param[in] pic_size
+* width * height
+*
+* @returns Number of buffers in DPB
+*
+* @remarks
+* From annexure A.3.1 of H264 specification,
+* max_dec_frame_buffering <= MaxDpbSize, where MaxDpbSize is equal to
+* Min( 1024 * MaxDPB / ( PicWidthInMbs * FrameHeightInMbs * 384 ), 16 ) and
+* MaxDPB is given in Table A-1 in units of 1024 bytes. However the MaxDPB size
+* presented in the look up table gas_ih264_lvl_tbl is in units of 512
+* bytes. Hence the expression is modified accordingly.
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_dpb_size(WORD32 level, WORD32 pic_size);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get reference picture buffer size for a given level and
+* and padding used
+*
+* @par Description:
+* Used to get reference picture buffer size for a given level and padding used
+* Each picture is padded on all four sides
+*
+* @param[in] pic_size
+* Number of luma samples (Width * Height)
+*
+* @param[in] level
+* Level
+*
+* @param[in] horz_pad
+* Total padding used in horizontal direction
+*
+* @param[in] vert_pad
+* Total padding used in vertical direction
+*
+* @returns Total picture buffer size
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size, WORD32 level,
+ WORD32 horz_pad, WORD32 vert_pad,
+ WORD32 num_ref_frames,
+ WORD32 num_reorder_frames);
+
+/**
+*******************************************************************************
+*
+* @brief Returns MV bank buffer size for a given number of luma samples
+*
+* @par Description:
+* For given number of luma samples one MV bank size is computed.
+* Each MV bank includes pu_map and enc_pu_t for all the min PUs(4x4) in a picture
+*
+* @param[in] num_luma_samples
+* Max number of luma pixels in the frame
+*
+* @returns Total MV Bank size
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize ps_pic_buf structs add pic buffers to
+* buffer manager in case of non-shared mode
+*
+* @par Description:
+* Function to initialize ps_pic_buf structs add pic buffers to
+* buffer manager in case of non-shared mode
+* To be called once per stream or for every reset
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_buf_mgr_add_bufs(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Function to add buffers to MV Bank buffer manager
+*
+* @par Description:
+* Function to add buffers to MV Bank buffer manager. To be called once per
+* stream or for every reset
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Function to initialize quant params structure
+*
+* @par Description:
+* The forward quantization modules depends on qp/6, qp mod 6, forward scale
+* matrix, forward threshold matrix, weight list. The inverse quantization
+* modules depends on qp/6, qp mod 6, inverse scale matrix, weight list.
+* These params are initialized in this function.
+*
+* @param[in] ps_proc
+* pointer to process context
+*
+* @param[in] qp
+* quantization parameter
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_quant_params(process_ctxt_t *ps_proc, int qp);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Initialize AIR mb frame Map
+*
+* @par Description:
+* Initialize AIR mb frame map
+* MB frame map indicates which frame an Mb should be coded as intra according to AIR
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_air_map(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Codec level initializations
+*
+* @par Description:
+* Initializes the codec with parameters that needs to be set before encoding
+* first frame
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_inp_buf
+* Pointer to input buffer context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Picture level initializations
+*
+* @par Description:
+* Before beginning to encode the frame, the current function initializes all
+* the ctxts (proc, entropy, me, ...) basing on the input configured params.
+* It locates space for storing recon in the encoder picture buffer set, fetches
+* reference frame from encoder picture buffer set. Calls RC pre-enc to get
+* qp and pic type for the current frame. Queues proc jobs so that
+* the other threads can begin encoding. In brief, this function sets up the
+* tone for the entire encoder.
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_inp_buf
+* Pointer to input buffer context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf);
+
+#endif /* IH264E_UTILS_H_ */
diff --git a/encoder/ih264e_version.c b/encoder/ih264e_version.c
new file mode 100755
index 0000000..3dcba8d
--- /dev/null
+++ b/encoder/ih264e_version.c
@@ -0,0 +1,143 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_version.c
+*
+* @brief
+* Contains version info for H264 encoder
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_get_version()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* system include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* user include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ih264_structs.h"
+#include "ih264e_version.h"
+
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+ * Name of the codec and target platform (All Cortex A processors in this case)
+ */
+#define CODEC_NAME "H264ENC"
+/**
+ * Codec release type, production or evaluation
+ */
+#define CODEC_RELEASE_TYPE "production"
+/**
+ * Version string. First two digits signify major version and last two minor
+ */
+#define CODEC_RELEASE_VER "01.00"
+/**
+ * Vendor name
+ */
+#define CODEC_VENDOR "ITTIAM"
+
+#define MAX_STRLEN 511
+/**
+*******************************************************************************
+* Concatenates various strings to form a version string
+*******************************************************************************
+*/
+#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor) \
+ strncpy(version_string,"@(#)Id:", MAX_STRLEN); \
+ strncat(version_string,codec_name, MAX_STRLEN); \
+ strncat(version_string,"_", MAX_STRLEN); \
+ strncat(version_string,codec_release_type, MAX_STRLEN); \
+ strncat(version_string," Ver:", MAX_STRLEN); \
+ strncat(version_string,codec_release_ver, MAX_STRLEN); \
+ strncat(version_string," Released by ", MAX_STRLEN); \
+ strncat(version_string,codec_vendor, MAX_STRLEN); \
+ strncat(version_string," Build: ", MAX_STRLEN); \
+ strncat(version_string,__DATE__, MAX_STRLEN); \
+ strncat(version_string," @ ", MAX_STRLEN); \
+ strncat(version_string,__TIME__, MAX_STRLEN);
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Fills the version info in the given char pointer
+*
+* @par Description:
+* Fills the version info in the given char pointer
+*
+* @param[in] pc_version
+* Pointer to hold version info
+*
+* @param[in] u4_version_bufsize
+* Size of the buffer passed
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_STATUS_T ih264e_get_version(CHAR *pc_version, UWORD32 u4_version_bufsize)
+{
+ CHAR ac_version_tmp[MAX_STRLEN];
+
+ VERSION(ac_version_tmp, CODEC_NAME, CODEC_RELEASE_TYPE, CODEC_RELEASE_VER,
+ CODEC_VENDOR);
+
+ if (u4_version_bufsize >= (strnlen(ac_version_tmp, MAX_STRLEN) + 1))
+ {
+ memcpy(pc_version, ac_version_tmp, (strnlen(ac_version_tmp, MAX_STRLEN) + 1));
+ return IV_SUCCESS;
+ }
+ else
+ {
+ return IV_FAIL;
+ }
+}
diff --git a/encoder/ih264e_version.h b/encoder/ih264e_version.h
new file mode 100755
index 0000000..303a1e2
--- /dev/null
+++ b/encoder/ih264e_version.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_version.h
+*
+* @brief
+* Contains declarations of miscellaneous utility functions used by the encoder
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_VERSION_H_
+#define IH264E_VERSION_H_
+
+/**
+*******************************************************************************
+*
+* @brief
+* Fills the version info in the given char pointer
+*
+* @par Description:
+* Fills the version info in the given char pointer
+*
+* @param[in] pc_version
+* Pointer to hold version info
+*
+* @param[in] u4_version_bufsize
+* Size of the buffer passed
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_STATUS_T ih264e_get_version(CHAR *pc_version, UWORD32 u4_version_bufsize);
+
+#endif /* IH264E_VERSION_H_ */
diff --git a/encoder/ime.c b/encoder/ime.c
new file mode 100755
index 0000000..c89aaab
--- /dev/null
+++ b/encoder/ime.c
@@ -0,0 +1,836 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_me.c
+ *
+ * @brief
+ *
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * -
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+/* User include files */
+#include "ime_typedefs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ime_defs.h"
+#include "ime_macros.h"
+#include "ime.h"
+#include "ime_statistics.h"
+
+/**
+*******************************************************************************
+*
+* @brief Diamond Search
+*
+* @par Description:
+* This function computes the sad at vertices of several layers of diamond grid
+* at a time. The number of layers of diamond grid that would be evaluated is
+* configurable.The function computes the sad at vertices of a diamond grid. If
+* the sad at the center of the diamond grid is lesser than the sad at any other
+* point of the diamond grid, the function marks the candidate Mb partition as
+* mv.
+*
+* @param[in] ps_mb_part
+* pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @param[in] u4_lambda_motion
+* lambda motion
+*
+* @param[in] u4_enable_fast_sad
+* enable/disable fast sad computation
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks Diamond Srch, radius is 1
+*
+*******************************************************************************
+*/
+void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt)
+{
+ /* MB partition info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /* lagrange parameter */
+ UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+ /* srch range*/
+ WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
+ WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
+ WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
+ WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
+
+ /* enabled fast sad computation */
+// UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+ WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
+
+ /* least cost */
+ WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
+
+ /* least sad */
+ WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
+
+ /* mv pair */
+ WORD16 i2_mvx, i2_mvy;
+
+ /* mv bits */
+ UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
+
+ /* temp var */
+ WORD32 i4_cost[4];
+ WORD32 i4_sad[4];
+ UWORD8 *pu1_ref;
+ WORD16 i2_mv_u_x, i2_mv_u_y;
+
+ /* Diamond search Iteration Max Cnt */
+ UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
+
+ /* temp var */
+// UWORD8 u1_prev_jump = NONE;
+// UWORD8 u1_curr_jump = NONE;
+// UWORD8 u1_next_jump;
+// WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
+// WORD32 mask;
+// UWORD8 *apu1_ref[4];
+// WORD32 i, cnt;
+// WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
+
+ /* mv with best sad during initial evaluation */
+ i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
+ i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
+
+ i2_mv_u_x = i2_mvx;
+ i2_mv_u_y = i2_mvy;
+
+ while (u4_num_layers--)
+ {
+ /* FIXME : is this the write way to check for out of bounds ? */
+ if ( (i2_mvx - 1 < i4_srch_range_w) ||
+ (i2_mvx + 1 > i4_srch_range_e) ||
+ (i2_mvy - 1 < i4_srch_range_n) ||
+ (i2_mvy + 1 > i4_srch_range_s) )
+ {
+ break;
+ }
+
+ pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
+
+ ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
+ pu1_curr_mb,
+ i4_ref_strd,
+ i4_src_strd,
+ i4_sad);
+
+ DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
+ DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
+ DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
+ DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
+
+ /* compute cost */
+ i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+ i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+ i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+ i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+
+
+ if (i4_cost_least > i4_cost[0])
+ {
+ i4_cost_least = i4_cost[0];
+ i4_distortion_least = i4_sad[0];
+
+ i2_mv_u_x = (i2_mvx - 1);
+ i2_mv_u_y = i2_mvy;
+ }
+
+ if (i4_cost_least > i4_cost[1])
+ {
+ i4_cost_least = i4_cost[1];
+ i4_distortion_least = i4_sad[1];
+
+ i2_mv_u_x = (i2_mvx + 1);
+ i2_mv_u_y = i2_mvy;
+ }
+
+ if (i4_cost_least > i4_cost[2])
+ {
+ i4_cost_least = i4_cost[2];
+ i4_distortion_least = i4_sad[2];
+
+ i2_mv_u_x = i2_mvx;
+ i2_mv_u_y = i2_mvy - 1;
+ }
+
+ if (i4_cost_least > i4_cost[3])
+ {
+ i4_cost_least = i4_cost[3];
+ i4_distortion_least = i4_sad[3];
+
+ i2_mv_u_x = i2_mvx;
+ i2_mv_u_y = i2_mvy + 1;
+ }
+
+ if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
+ {
+ ps_mb_part->u4_exit = 1;
+ break;
+ }
+ else
+ {
+ i2_mvx = i2_mv_u_x;
+ i2_mvy = i2_mv_u_y;
+ }
+
+
+ }
+
+ if (i4_cost_least < ps_mb_part->i4_mb_cost)
+ {
+ ps_mb_part->i4_mb_cost = i4_cost_least;
+ ps_mb_part->i4_mb_distortion = i4_distortion_least;
+ ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
+ ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
+ }
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector among the tentative mv
+* candidates chosen.
+*
+* @par Description:
+* This function determines the position in the search window at which the motion
+* estimation should begin in order to minimise the number of search iterations.
+*
+* @param[in] ps_mb_part
+* pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] u4_lambda_motion
+* lambda motion
+*
+* @param[in] u4_fast_flag
+* enable/disable fast sad computation
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_evaluate_init_srchposn_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ )
+{
+ UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+ /* candidate mv cnt */
+ UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates;
+
+ /* list of candidate mvs */
+ ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+ WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
+
+ /* enabled fast sad computation */
+ UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+ /* SAD(distortion metric) of an 8x8 block */
+ WORD32 i4_mb_distortion;
+
+ /* cost = distortion + u4_lambda_motion * rate */
+ WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
+
+ /* mb partitions info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /* mv bits */
+ UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
+
+ /* temp var */
+ UWORD32 i, j, u4_srch_pos_idx = 0;
+ UWORD8 *pu1_ref = NULL;
+ WORD16 mv_x, mv_y;
+
+ if (0)
+ {
+ /************************************************************/
+ /* Compute SKIP Cost */
+ /************************************************************/
+ mv_x = ps_mv_list[SKIP_CAND].i2_mvx;
+ mv_y = ps_mv_list[SKIP_CAND].i2_mvy;
+
+ /* adjust ref pointer */
+ pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd);
+
+ /* compute distortion */
+ ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
+
+ /* for skip mode cost & distortion are identical
+ * But we shall add a bias to favor skip mode.
+ * Doc. JVT B118 Suggests SKIP_BIAS as 16.
+ * TODO : Empirical analysis of SKIP_BIAS is necessary */
+
+ i4_distortion_least = i4_mb_distortion;
+
+ u4_srch_pos_idx = 0;
+
+#define SKIP_BIAS 8
+
+ i4_mb_cost_least = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS);
+
+#undef SKIP_BIAS
+ }
+
+
+ /* Carry out a search using each of the motion vector pairs identified above as predictors. */
+ /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
+ for(i = 0; i < u4_num_candidates; i++)
+ {
+ /* compute sad */
+ WORD32 c_sad = 1;
+
+ for(j = 0; j < i; j++ )
+ {
+ if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
+ (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
+ {
+ c_sad = 0;
+ break;
+ }
+ }
+ if(c_sad)
+ {
+ /* adjust ref pointer */
+ pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
+
+ /* compute distortion */
+ ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
+ DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
+ /* compute cost */
+ i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+
+ if (i4_mb_cost < i4_mb_cost_least)
+ {
+ i4_mb_cost_least = i4_mb_cost;
+
+ i4_distortion_least = i4_mb_distortion;
+
+ u4_srch_pos_idx = i;
+ }
+ }
+ }
+
+ if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
+ {
+ ps_mb_part->u4_srch_pos_idx = u4_srch_pos_idx;
+ ps_mb_part->i4_mb_cost = i4_mb_cost_least;
+ ps_mb_part->i4_mb_distortion = i4_distortion_least;
+ ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[u4_srch_pos_idx].i2_mvx;
+ ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[u4_srch_pos_idx].i2_mvy;
+ }
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching full pixel predictor within the search
+* range
+*
+* @par Description:
+* This function begins by computing the mv predict vector for the current mb.
+* This is used for cost computations. Further basing on the algo. chosen, it
+* looks through a set of candidate vectors that best represent the mb a least
+* cost and returns this information.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_full_pel_motion_estimation_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ )
+{
+ /* mb part info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /******************************************************************/
+ /* Modify Search range about initial candidate instead of zero mv */
+ /******************************************************************/
+ /*
+ * FIXME: The motion vectors in a way can become unbounded. It may so happen that
+ * MV might exceed the limit of the profile configured.
+ */
+ ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
+ -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
+ ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
+ ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
+ ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
+ -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
+ ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
+ ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
+
+ /************************************************************/
+ /* Traverse about best initial candidate for mv */
+ /************************************************************/
+
+ switch (ps_me_ctxt->u4_me_speed_preset)
+ {
+ case DMND_SRCH:
+ ime_diamond_search_16x16(ps_me_ctxt);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx << 2;
+ ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy << 2;
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching sub pixel predictor within the search
+* range
+*
+* @par Description:
+* This function begins by searching across all sub pixel sample points
+* around the full pel motion vector. The vector with least cost is chosen as
+* the mv for the current mb. If the skip mode is not evaluated while analysing
+* the initial search candidates then analyse it here and update the mv.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_sub_pel_motion_estimation_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ )
+{
+ /* pointers to src & ref macro block */
+ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+
+
+ /* pointers to ref. half pel planes */
+ UWORD8 *pu1_ref_mb_half_x;
+ UWORD8 *pu1_ref_mb_half_y;
+ UWORD8 *pu1_ref_mb_half_xy;
+
+ /* pointers to ref. half pel planes */
+ UWORD8 *pu1_ref_mb_half_x_temp;
+ UWORD8 *pu1_ref_mb_half_y_temp;
+ UWORD8 *pu1_ref_mb_half_xy_temp;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+
+ WORD32 i4_ref_strd = ps_me_ctxt->u4_hp_buf_strd;
+
+ /* mb partitions info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /* SAD(distortion metric) of an mb */
+ WORD32 i4_mb_distortion;
+ WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
+
+ /* cost = distortion + u4_lambda_motion * rate */
+ WORD32 i4_mb_cost;
+ WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
+
+ /*Best half pel buffer*/
+ UWORD8 *pu1_best_hpel_buf = NULL;
+
+
+ /* mv bits */
+ UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
+
+ /* Motion vectors in full-pel units */
+ WORD16 mv_x, mv_y;
+
+ /* lambda - lagrange constant */
+ UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+ /* Flags to check if half pel points needs to be evaluated */
+ /**************************************/
+ /* 1 bit for each half pel candidate */
+ /* bit 0 - half x = 1, half y = 0 */
+ /* bit 1 - half x = -1, half y = 0 */
+ /* bit 2 - half x = 0, half y = 1 */
+ /* bit 3 - half x = 0, half y = -1 */
+ /* bit 4 - half x = 1, half y = 1 */
+ /* bit 5 - half x = -1, half y = 1 */
+ /* bit 6 - half x = 1, half y = -1 */
+ /* bit 7 - half x = -1, half y = -1 */
+ /**************************************/
+ /* temp var */
+ WORD16 i2_mv_u_x, i2_mv_u_y;
+ WORD32 i, j;
+ WORD32 ai4_sad[8];
+
+ i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
+ i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
+
+ /************************************************************/
+ /* Evaluate half pel */
+ /************************************************************/
+ mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
+ mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
+
+
+ /**************************************************************/
+ /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
+ /* left side of full pel */
+ /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
+ /* top side of full pel */
+ /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */
+ /* on the top left side of full pel */
+ /* for the function pf_ime_sub_pel_compute_sad_16x16 the */
+ /* default postions are */
+ /* ps_me_ctxt->pu1_half_x = right halp_pel */
+ /* ps_me_ctxt->pu1_half_y = bottom halp_pel */
+ /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */
+ /* Hence corresponding adjustments made here */
+ /**************************************************************/
+
+ pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->pu1_half_x + 1;
+ pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->pu1_half_y + 1 + i4_ref_strd;
+ pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->pu1_half_xy + 1 + i4_ref_strd;
+
+
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
+ pu1_ref_mb_half_y,
+ pu1_ref_mb_half_xy,
+ i4_src_strd, i4_ref_strd,
+ ai4_sad);
+
+ /* Half x plane */
+ for(i = 0; i < 2; i++)
+ {
+ WORD32 mv_x_tmp = (mv_x << 2) + 2;
+ WORD32 mv_y_tmp = (mv_y << 2);
+
+ mv_x_tmp -= (i * 4);
+
+ i4_mb_distortion = ai4_sad[i];
+
+ /* compute cost */
+ i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
+
+ if (i4_mb_cost < i4_mb_cost_least)
+ {
+ i4_mb_cost_least = i4_mb_cost;
+
+ i4_distortion_least = i4_mb_distortion;
+
+ i2_mv_u_x = mv_x_tmp;
+
+ i2_mv_u_y = mv_y_tmp;
+
+#ifndef HP_PL /*choosing whether left or right half_x*/
+ ps_me_ctxt->pu1_half_x = pu1_ref_mb_half_x_temp - i;
+ pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
+#endif
+ }
+
+ }
+
+ /* Half y plane */
+ for(i = 0; i < 2; i++)
+ {
+ WORD32 mv_x_tmp = (mv_x << 2);
+ WORD32 mv_y_tmp = (mv_y << 2) + 2;
+
+ mv_y_tmp -= (i * 4);
+
+ i4_mb_distortion = ai4_sad[2 + i];
+
+ /* compute cost */
+ i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
+
+ if (i4_mb_cost < i4_mb_cost_least)
+ {
+ i4_mb_cost_least = i4_mb_cost;
+
+ i4_distortion_least = i4_mb_distortion;
+
+ i2_mv_u_x = mv_x_tmp;
+
+ i2_mv_u_y = mv_y_tmp;
+
+#ifndef HP_PL/*choosing whether top or bottom half_y*/
+ ps_me_ctxt->pu1_half_y = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
+ pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
+#endif
+ }
+
+ }
+
+ /* Half xy plane */
+ for(j = 0; j < 2; j++)
+ {
+ for(i = 0; i < 2; i++)
+ {
+ WORD32 mv_x_tmp = (mv_x << 2) + 2;
+ WORD32 mv_y_tmp = (mv_y << 2) + 2;
+
+ mv_x_tmp -= (i * 4);
+ mv_y_tmp -= (j * 4);
+
+ i4_mb_distortion = ai4_sad[4 + i + 2 * j];
+
+ /* compute cost */
+ i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
+
+ if (i4_mb_cost < i4_mb_cost_least)
+ {
+ i4_mb_cost_least = i4_mb_cost;
+
+ i4_distortion_least = i4_mb_distortion;
+
+ i2_mv_u_x = mv_x_tmp;
+
+ i2_mv_u_y = mv_y_tmp;
+
+#ifndef HP_PL /*choosing between four half_xy */
+ ps_me_ctxt->pu1_half_xy = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
+ pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
+#endif
+ }
+
+ }
+ }
+
+ ps_mb_part->i4_mb_cost = i4_mb_cost_least;
+ ps_mb_part->i4_mb_distortion = i4_distortion_least;
+ ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
+ ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
+ ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes cost of skip macroblocks
+*
+* @par Description:
+*
+* @param[in] ps_me_ctxt
+* pointer to me ctxt
+*
+* @param[in] ps_skip_mv
+* pointer to skip mv
+*
+* @returns none
+*
+* @remarks
+* NOTE: while computing the skip cost, do not enable early exit from compute
+* sad function because, a negative bias gets added later
+*
+*******************************************************************************
+*/
+void ime_compute_skip_cost
+ (
+ me_ctxt_t *ps_me_ctxt,
+ void *pv_skip_mv,
+ mb_part_ctxt *ps_smb_part_info,
+ UWORD32 u4_use_stat_sad
+ )
+{
+
+ /* pointers to src & ref macro block */
+ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+ WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
+
+ /* enabled fast sad computation */
+ UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+ /* SAD(distortion metric) of an mb */
+ WORD32 i4_mb_distortion;
+
+ /* cost = distortion + u4_lambda_motion * rate */
+ WORD32 i4_mb_cost;
+
+ /* Motion vectors in full-pel units */
+ WORD16 mv_x, mv_y;
+
+ /* lambda - lagrange constant */
+ UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+ /* skip mv */
+ ime_mv_t *ps_skip_mv = pv_skip_mv, s_clip_skip_mv;
+
+ /* temp var */
+ UWORD8 *pu1_ref = NULL;
+ UWORD32 u4_is_nonzero;
+
+ s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, ps_skip_mv->i2_mvx);
+ s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, ps_skip_mv->i2_mvy);
+
+ if ((s_clip_skip_mv.i2_mvx != ps_skip_mv->i2_mvx) ||
+ (s_clip_skip_mv.i2_mvy != ps_skip_mv->i2_mvy))
+ {
+ /* skip motion vector not with in bounds */
+ /* it is possible that mv is already evaluated */
+ return ;
+ }
+
+ mv_x = (ps_skip_mv->i2_mvx + 2) >> 2;
+ mv_y = (ps_skip_mv->i2_mvy + 2) >> 2;
+
+ if ((mv_x << 2) != ps_skip_mv->i2_mvx || (mv_y << 2) != ps_skip_mv->i2_mvy)
+ {
+
+
+ return ;
+
+
+ }
+ else
+ {
+ /* adjust ref pointer */
+ pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd);
+ }
+
+ if(u4_use_stat_sad == 1)
+ {
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd,
+ ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion,&u4_is_nonzero);
+
+ /*
+ *NOTE The check here is two fold
+ * One is checking if the sad has been reached, ie min sad, which a configurable parameter
+ * If that is reached,we need not do any mode evaluation
+ * Similary if we find a distortion of zero there is no point of doing any further mode evaluation
+ * as sad is a non negative quantity
+ * hence in this case too, no further evaluation is necessary
+ */
+ /*
+ *NOTE in case we need to disable the zero check using satdq,
+ * we need only to set the u4_is_zero to a non zero value
+ */
+ if(u4_is_nonzero==0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
+ {
+ ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/
+ ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0)?0:i4_mb_distortion;
+ }
+ }
+ else
+ {
+ ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, INT_MAX, &i4_mb_distortion);
+
+ if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
+ {
+ ps_me_ctxt->i4_min_sad = i4_mb_distortion;
+ ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/
+ }
+ }
+
+ /* for skip mode cost & distortion are identical
+ * But we shall add a bias to favor skip mode.
+ * Doc. JVT B118 Suggests SKIP_BIAS as 16.
+ * TODO : Empirical analysis of SKIP_BIAS is necessary */
+#define SKIP_BIAS 8
+ i4_mb_cost = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS);
+#undef SKIP_BIAS
+
+ if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
+ {
+ ps_smb_part_info->i4_mb_cost = i4_mb_cost;
+ ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
+ ps_smb_part_info->s_mv_curr.i2_mvx = ps_skip_mv->i2_mvx;
+ ps_smb_part_info->s_mv_curr.i2_mvy = ps_skip_mv->i2_mvy;
+ }
+}
+
diff --git a/encoder/ime.h b/encoder/ime.h
new file mode 100755
index 0000000..5c039e8
--- /dev/null
+++ b/encoder/ime.h
@@ -0,0 +1,209 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ime.h
+ *
+ * @brief
+ * Contains declarations of global variables for H264 encoder
+ *
+ * @author
+ * Ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#ifndef IME_H_
+#define IME_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Number of iterations before exiting during diamond search
+******************************************************************************
+ */
+#define NUM_LAYERS 16
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+
+/**
+*******************************************************************************
+*
+* @brief Diamond Search
+*
+* @par Description:
+* This function computes the sad at vertices of several layers of diamond grid
+* at a time. The number of layers of diamond grid that would be evaluated is
+* configurable.The function computes the sad at vertices of a diamond grid. If
+* the sad at the center of the diamond grid is lesser than the sad at any other
+* point of the diamond grid, the function marks the candidate Mb partition as
+* mv.
+*
+* @param[in] ps_mb_part
+* pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @param[in] u4_lambda
+* lambda motion
+*
+* @param[in] u4_fast_flag
+* enable/disable fast sad computation
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks This module cannot be part of the final product due to its lack of
+* computational feasibility. This is only for quality eval purposes.
+*
+*******************************************************************************
+*/
+extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt);
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector among the tentative mv
+* candidates chosen.
+*
+* @par Description:
+* This function determines the position in the search window at which the motion
+* estimation should begin in order to minimise the number of search iterations.
+*
+* @param[in] ps_mb_part
+* pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] u4_lambda_motion
+* lambda motion
+*
+* @param[in] u4_fast_flag
+* enable/disable fast sad computation
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+extern void ime_evaluate_init_srchposn_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ );
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching full pixel predictor within the search
+* range
+*
+* @par Description:
+* This function begins by computing the mv predict vector for the current mb.
+* This is used for cost computations. Further basing on the algo. chosen, it
+* looks through a set of candidate vectors that best represent the mb a least
+* cost and returns this information.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+extern void ime_full_pel_motion_estimation_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ );
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching sub pixel predictor within the search
+* range
+*
+* @par Description:
+* This function begins by searching across all sub pixel sample points
+* around the full pel motion vector. The vector with least cost is chosen as
+* the mv for the current mb. If the skip mode is not evaluated while analysing
+* the initial search candidates then analyse it here and update the mv.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+extern void ime_sub_pel_motion_estimation_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function computes cost of skip macroblocks
+*
+* @par Description:
+*
+* @param[in] ps_me_ctxt
+* pointer to me ctxt
+*
+* @param[in] ps_skip_mv
+* pointer to skip mv
+*
+* @returns none
+*
+* @remarks
+* NOTE: while computing the skip cost, do not enable early exit from compute
+* sad function because, a negative bias gets added later
+*
+*******************************************************************************
+*/
+extern void ime_compute_skip_cost
+ (
+ me_ctxt_t *ps_me_ctxt,
+ void *pv_skip_mv,
+ mb_part_ctxt *ps_smb_part_info,
+ UWORD32 u4_use_stat_sad
+ );
+
+
+#endif /* IME_H_ */
diff --git a/encoder/ime_defs.h b/encoder/ime_defs.h
new file mode 100755
index 0000000..14d9c55
--- /dev/null
+++ b/encoder/ime_defs.h
@@ -0,0 +1,59 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IME_DEFS_H_
+#define _IME_DEFS_H_
+
+
+/* Macros to Label candidates */
+#define SKIP_CAND 0
+#define ZERO_CAND 1
+#define LEFT_CAND 2
+#define TOP_CAND 3
+#define TOPR_CAND 4
+
+#define NONE 0
+#define LEFT 1
+#define RIGHT 2
+#define TOP 3
+#define BOTTOM 4
+
+#define MB_SIZE 16
+
+#define FULL_SRCH 0
+#define DMND_SRCH 100
+#define NSTEP_SRCH 50
+#define HEX_SRCH 75
+
+#endif /*_IME_DEFS_H_*/
+
diff --git a/encoder/ime_distortion_metrics.c b/encoder/ime_distortion_metrics.c
new file mode 100755
index 0000000..23a1fbc
--- /dev/null
+++ b/encoder/ime_distortion_metrics.c
@@ -0,0 +1,1262 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ih264e_distortion_metrics.c
+*
+* @brief
+* This file contains definitions of routines that compute distortion
+* between two macro/sub blocks of identical dimensions
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ime_sub_pel_compute_sad_16x16()
+* - ime_calculate_sad4_prog()
+* - ime_calculate_sad3_prog()
+* - ime_calculate_sad2_prog()
+* - ime_compute_sad_16x16()
+* - ime_compute_sad_16x16_fast()
+* - ime_compute_sad_16x16_ea8()
+* - ime_compute_sad_8x8()
+* - ime_compute_sad_4x4()
+* - ime_compute_sad_16x8()
+* - ime_compute_satqd_16x16_lumainter()
+* - ime_compute_satqd_8x16_chroma()
+* - ime_compute_satqd_16x16_lumaintra()
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "ime_typedefs.h"
+#include "ime_defs.h"
+#include "ime_macros.h"
+#include "ime_statistics.h"
+#include "ime_platform_macros.h"
+#include "ime_distortion_metrics.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) at all subpel points about the src location
+*
+* @par Description
+* This functions computes SAD at all points at a subpel distance from the
+* current source location.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_ref_half_x
+* UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_y
+* UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_xy
+* UWORD8 pointer to half pel buffer
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ref_strd
+* integer ref stride
+*
+* @param[out] pi4_sad
+* integer evaluated sad
+* pi4_sad[0] - half x
+* pi4_sad[1] - half x - 1
+* pi4_sad[2] - half y
+* pi4_sad[3] - half y - 1
+* pi4_sad[4] - half xy
+* pi4_sad[5] - half xy - 1
+* pi4_sad[6] - half xy - strd
+* pi4_sad[7] - half xy - 1 - strd
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src,
+ UWORD8 *pu1_ref_half_x,
+ UWORD8 *pu1_ref_half_y,
+ UWORD8 *pu1_ref_half_xy,
+ WORD32 src_strd,
+ WORD32 ref_strd,
+ WORD32 *pi4_sad)
+{
+ UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
+ UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
+ UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
+ UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
+ UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
+
+ WORD32 row, col;
+
+ memset(pi4_sad, 0, 8 * sizeof(WORD32));
+
+ for(row = 0; row < MB_SIZE; row++)
+ {
+ for(col = 0; col < MB_SIZE; col++)
+ {
+ WORD32 src;
+ WORD32 diff;
+
+ src = pu1_src[col];
+
+ diff = src - pu1_ref_half_x[col];
+ pi4_sad[0] += ABS(diff);
+
+ diff = src - pu1_ref_half_x_left[col];
+ pi4_sad[1] += ABS(diff);
+
+ diff = src - pu1_ref_half_y[col];
+ pi4_sad[2] += ABS(diff);
+
+ diff = src - pu1_ref_half_y_top[col];
+ pi4_sad[3] += ABS(diff);
+
+ diff = src - pu1_ref_half_xy[col];
+ pi4_sad[4] += ABS(diff);
+
+ diff = src - pu1_ref_half_xy_left[col];
+ pi4_sad[5] += ABS(diff);
+
+ diff = src - pu1_ref_half_xy_top[col];
+ pi4_sad[6] += ABS(diff);
+
+ diff = src - pu1_ref_half_xy_top_left[col];
+ pi4_sad[7] += ABS(diff);
+ }
+
+ pu1_src += src_strd;
+
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref
+* UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+* integer reference stride
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[out] pi4_sad
+* pointer to integer array evaluated sad
+*
+* @returns sad at all evaluated vertexes
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad4_prog(UWORD8 *pu1_ref,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad)
+{
+
+ /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
+ UWORD8 *left_ptr = pu1_ref - 1;
+ UWORD8 *right_ptr = pu1_ref + 1;
+ UWORD8 *top_ptr = pu1_ref - ref_strd;
+ UWORD8 *bot_ptr = pu1_ref + ref_strd;
+
+ /* temp var */
+ WORD32 count2, count3;
+ UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
+ UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
+
+ memset(pi4_sad, 0, 4 * sizeof(WORD32));
+
+ for(count2 = MB_SIZE; count2 > 0; count2--)
+ {
+ for(count3 = MB_SIZE; count3 > 0 ; count3--)
+ {
+ WORD32 src;
+ WORD32 diff;
+
+ src = *pu1_src++;
+
+ diff = src - *left_ptr++;
+ pi4_sad[0] += ABS(diff);
+
+ diff = src - *right_ptr++;
+ pi4_sad[1] += ABS(diff);
+
+ diff = src - *top_ptr++;
+ pi4_sad[2] += ABS(diff);
+
+ diff = src - *bot_ptr++;
+ pi4_sad[3] += ABS(diff);
+ }
+
+ bot_ptr += u4_ref_buf_offset;
+ left_ptr += u4_ref_buf_offset;
+ right_ptr += u4_ref_buf_offset;
+ top_ptr += u4_ref_buf_offset;
+
+ pu1_src += u4_cur_buf_offset;
+ }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref1, pu1_ref2, pu1_ref3
+* UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+* integer reference stride
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[out] pi4_sad
+* pointer to integer array evaluated sad
+*
+* @returns sad at all evaluated vertexes
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad3_prog(UWORD8 *pu1_ref1,
+ UWORD8 *pu1_ref2,
+ UWORD8 *pu1_ref3,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad)
+{
+ /* temp var */
+ WORD32 i;
+ UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
+ UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
+
+ for(i = 16; i > 0; i--)
+ {
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+ pu1_ref3 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+ pu1_ref3 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+ pu1_ref3 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+ pu1_ref3 += 4;
+
+ pu1_src += u4_cur_buf_offset;
+ pu1_ref1 += u4_ref_buf_offset;
+ pu1_ref2 += u4_ref_buf_offset;
+ pu1_ref3 += u4_ref_buf_offset;
+ }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref1, pu1_ref2
+* UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+* integer reference stride
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[out] pi4_sad
+* pointer to integer array evaluated sad
+*
+* @returns sad at all evaluated vertexes
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad2_prog(UWORD8 *pu1_ref1,
+ UWORD8 *pu1_ref2,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad)
+{
+ /* temp var */
+ WORD32 i;
+ UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
+ UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
+
+ for(i = 16; i > 0; i--)
+ {
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+
+ pu1_src += u4_cur_buf_offset;
+ pu1_ref1 += u4_ref_buf_offset;
+ pu1_ref2 += u4_ref_buf_offset;
+ }
+
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = src_strd - 16;
+ UWORD32 u4_est_offset = est_strd - 16;
+ UWORD32 i;
+
+GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16);
+
+ for(i = 16; i > 0; i--)
+ {
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ /* early exit */
+ if(i4_max_sad < i4_sad)
+ {
+
+GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i);
+
+ *pi4_mb_distortion = i4_sad;
+ return ;
+ }
+ pu1_src += u4_src_offset;
+ pu1_est += u4_est_offset;
+ }
+
+ *pi4_mb_distortion = i4_sad;
+ return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_fast(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = 2 * src_strd - 16;
+ UWORD32 u4_est_offset = 2 * est_strd - 16;
+ UWORD32 i;
+
+ UNUSED(i4_max_sad);
+
+ for(i = 16; i > 0; i-= 2)
+ {
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ pu1_src += u4_src_offset;
+ pu1_est += u4_est_offset;
+ }
+
+ *pi4_mb_distortion = (i4_sad << 1);
+ return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 8x8 blocks
+*
+* @par Description
+* This functions computes SAD between 2 8x8 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] i4_sad
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+ */
+
+void ime_compute_sad_8x8(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = src_strd - 8;
+ UWORD32 u4_est_offset = est_strd - 8;
+ UWORD32 i, j;
+ WORD16 temp;
+
+ for(i = 8; i > 0; i--)
+ {
+ for(j = 8; j > 0; j--)
+ {
+ /* SAD */
+ temp = *pu1_src++ - *pu1_est++;
+ i4_sad += ABS(temp);
+ }
+ /* early exit */
+ if(i4_max_sad < i4_sad)
+ {
+ *pi4_mb_distortion = i4_sad;
+ return;
+ }
+ pu1_src += u4_src_offset;
+ pu1_est += u4_est_offset;
+ }
+ *pi4_mb_distortion = i4_sad;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 4x4 blocks
+*
+* @par Description
+* This functions computes SAD between 2 4x4 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_4x4
+ (
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion
+ )
+{
+ WORD32 i4_sad = 0;
+
+ UNUSED(i4_max_sad);
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += src_strd;
+ pu1_est += est_strd;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += src_strd;
+ pu1_est += est_strd;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += src_strd;
+ pu1_est += est_strd;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ *pi4_mb_distortion = i4_sad;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x8 blocks
+*
+*
+* @par Description
+* This functions computes SAD between 2 16x8 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x8
+ (
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion
+ )
+{
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = src_strd - 16;
+ UWORD32 u4_est_offset = est_strd - 16;
+ UWORD32 i, j;
+ WORD16 temp;
+
+GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8);
+
+ for(i = 8; i > 0; i--)
+ {
+ for(j = 16; j > 0; j--)
+ {
+ /* SAD */
+ temp = *pu1_src++ - *pu1_est++;
+ i4_sad += ABS(temp);
+ }
+ /* early exit */
+ if(i4_max_sad < i4_sad)
+ {
+
+GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i);
+
+ *pi4_mb_distortion = i4_sad;
+
+ return;
+ }
+ pu1_src += u4_src_offset;
+ pu1_est += u4_est_offset;
+ }
+
+ *pi4_mb_distortion = i4_sad;
+ return;
+
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = src_strd - 16;
+ UWORD32 u4_est_offset = est_strd - 16;
+ UWORD32 i, j;
+ WORD16 temp;
+ UWORD8 *pu1_src_temp = pu1_src + src_strd;
+ UWORD8 *pu1_est_temp = pu1_est + est_strd;
+
+ for(i = 16; i > 0; i -= 2)
+ {
+ for(j = 16; j > 0; j--)
+ {
+ /* SAD */
+ temp = *pu1_src++ - *pu1_est++;
+ i4_sad += ABS(temp);
+ }
+
+ pu1_src += (u4_src_offset + src_strd);
+ pu1_est += (u4_est_offset + est_strd);
+
+ }
+
+ /* early exit */
+ if(i4_max_sad < i4_sad)
+ {
+ *pi4_mb_distortion = i4_sad;
+ return;
+ }
+
+ pu1_src = pu1_src_temp;
+ pu1_est = pu1_est_temp;
+
+ for(i = 16; i > 0; i -= 2)
+ {
+ for(j = 16; j > 0; j--)
+ {
+ /* SAD */
+ temp = *pu1_src++ - *pu1_est++;
+ i4_sad += ABS(temp);
+ }
+
+ pu1_src += u4_src_offset + src_strd;
+ pu1_est += u4_est_offset + est_strd;
+ }
+
+ *pi4_mb_distortion = i4_sad;
+ return;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes SAD between two 16x16 blocks
+* It also computes if the block will be zero after H264 transform and quant for
+* Intra 16x16 blocks
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] pu2_thrsh
+* Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @param[out] pu4_is_zero
+* Poitner to store if the block is zero after transform and quantization
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ UWORD16 *pu2_thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD32 *pu4_is_non_zero)
+{
+ UWORD32 i,j;
+ WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
+ UWORD8 *pu1_src_lp,*pu1_est_lp;
+ UWORD32 sad = 0;
+
+ (*pi4_mb_distortion) = 0;
+ for(i=0;i<4;i++)
+ {
+ for(j=0;j<4;j++)
+ {
+ pu1_src_lp = pu1_src + 4*j;
+ pu1_est_lp = pu1_est + 4*j;
+
+ s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ sad_1 = s1+s2+s3+s4;
+
+ if(sad == 0)
+ {
+ sad_2 = sad_1<<1;
+
+ ls1 = sad_2 -(s2 + s3);
+ ls2 = sad_2 -(s1 + s4);
+ ls3 = sad_2 -(s3 + s4);
+ ls4 = sad_2 -(s3 - (s1<<1));
+ ls5 = sad_2 -(s4 - (s2<<1));
+ ls6 = sad_2 -(s1 + s2);
+ ls7 = sad_2 -(s2 - (s4<<1));
+ ls8 = sad_2 -(s1 - (s3<<1));
+
+ if(
+ pu2_thrsh[8] <= sad_1 ||
+ pu2_thrsh[0] <= ls2 ||
+ pu2_thrsh[1] <= ls1 ||
+ pu2_thrsh[2] <= ls8 ||
+ pu2_thrsh[3] <= ls5 ||
+
+ pu2_thrsh[4] <= ls6 ||
+ pu2_thrsh[5] <= ls3 ||
+ pu2_thrsh[6] <= ls7 ||
+ pu2_thrsh[7] <= ls4
+
+ )sad = 1;
+ }
+ (*pi4_mb_distortion) += sad_1;
+ }
+ pu1_src += (src_strd *4);
+ pu1_est += (est_strd *4);
+ }
+ *pu4_is_non_zero = sad;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma blocks
+*
+*
+* @par Description
+* This functions computes SAD between2 16x8 chroma blocks(interleaved)
+* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
+* If SAQTD is zero, it gives back zero
+* Other wise sad is retrned
+* There is no provison for early exit
+*
+* The transform done here is the transform for chroma blocks in H264
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] pu2_thrsh
+* Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+* Fucntion code is nit updated.
+* Will require debugging and minor modifications
+*
+******************************************************************************
+*/
+void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 max_sad,
+ UWORD16 *thrsh)
+{
+ WORD32 i,j,plane;
+ WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
+ UWORD8 *pu1_src_lp,*pu1_est_lp,*pu1_src_plane,*pu1_est_plane;
+ WORD32 sad =0;
+ UNUSED(max_sad);
+
+ pu1_src_plane = pu1_src;
+ pu1_est_plane = pu1_est;
+
+ for(plane =0;plane<2;plane++)
+ {
+ for(i=0;i<4;i++)
+ {
+ for(j=0;j<4;j++)
+ {
+ pu1_src_lp = pu1_src + 8*j;
+ pu1_est_lp = pu1_est + 8*j;
+
+ s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+ s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+ s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+ s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+ s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+ sad_1 = s1+s2+s3+s4;
+ sad_2 = sad_1<<1;
+
+ ls1 = sad_2 -(s2 + s3);
+ ls2 = sad_2 -(s1 + s4);
+ ls3 = sad_2 -(s3 + s4);
+ ls4 = sad_2 -(s3 - (s1<<1));
+ ls5 = sad_2 -(s4 - (s2<<1));
+ ls6 = sad_2 -(s1 + s2);
+ ls7 = sad_2 -(s2 - (s4<<1));
+ ls8 = sad_2 -(s1 - (s3<<1));
+
+ if(
+ //thrsh[0] > sad_1 && Chroma Dc is checked later
+ thrsh[1] > ls1 &&
+ thrsh[2] > sad_1 &&
+ thrsh[3] > ls2 &&
+
+ thrsh[4] > ls3 &&
+ thrsh[5] > ls4 &&
+ thrsh[6] > ls3 &&
+ thrsh[7] > ls5 &&
+
+ thrsh[8] > sad_1 &&
+ thrsh[9] > ls1 &&
+ thrsh[10]> sad_1 &&
+ thrsh[11]> ls2 &&
+
+ thrsh[12]> ls6 &&
+ thrsh[13]> ls7 &&
+ thrsh[14]> ls6 &&
+ thrsh[15]> ls8
+ )
+ {
+ /*set current sad to be zero*/
+ }
+ else
+ return ;
+
+ sad += sad_1;
+ }
+ pu1_src += (src_strd *4);
+ pu1_est += (est_strd *4);
+ }
+ if(sad < (thrsh[0]<<1))sad = 0;
+ else return ;
+
+ pu1_src = pu1_src_plane+1;
+ pu1_est = pu1_est_plane+1;
+ }
+ return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks.
+* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
+* If SAQTD is zero, it gives back zero
+* Other wise sad is retrned
+* There is no provison for early exit
+*
+* The transform done here is the transform for inter 16x16 blocks in H264
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] pu2_thrsh
+* Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 max_sad,
+ UWORD16 *thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD8 *sig_nz_sad)
+{
+ UWORD32 i,j;
+ WORD16 s1[4],s2[4],s3[4],s4[4],sad[4];
+ UWORD8 *pu1_src_lp,*pu1_est_lp;
+ UWORD8 *sig_sad_dc;
+ UWORD32 nz_sad_sig = 0;
+ UNUSED(max_sad);
+ *pi4_mb_distortion =0;
+
+ sig_sad_dc = sig_nz_sad;
+ sig_nz_sad++;
+
+ for(i=0;i<4;i++)
+ {
+ for(j=0;j<4;j++)
+ {
+ pu1_src_lp = pu1_src + 4*j;
+ pu1_est_lp = pu1_est + 4*j;
+
+ s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ sad[j] = ((s1[j]+s2[j]+s3[j]+s4[j])<<1);
+ }
+
+ for(j=0;j<4;j++)
+ {
+
+ if(
+ //thrsh[0] > (sad[j] >> 1) &&Dc goes in the other part
+ thrsh[1] > (sad[j] -(s2[j] + s3[j])) &&
+ thrsh[2] > (sad[j]>>1) &&
+ thrsh[3] > (sad[j] -(s1[j] + s4[j])) &&
+
+ thrsh[4] > (sad[j] -(s3[j] + s4[j])) &&
+ thrsh[5] > (sad[j] -(s3[j] - (s1[j]<<1))) &&
+ thrsh[6] > (sad[j] -(s3[j] + s4[j])) &&
+ thrsh[7] > (sad[j] -(s4[j] - (s2[j]<<1))) &&
+
+ thrsh[8] > (sad[j]>>1) &&
+ thrsh[9] > (sad[j] -(s2[j] + s3[j])) &&
+ thrsh[10]> (sad[j]>>1) &&
+ thrsh[11]> (sad[j] -(s1[j] + s4[j])) &&
+
+ thrsh[12]> (sad[j] -(s1[j] + s2[j])) &&
+ thrsh[13]> (sad[j] -(s2[j] - (s4[j]<<1))) &&
+ thrsh[14]> (sad[j] -(s1[j] + s2[j])) &&
+ thrsh[15]> (sad[j] -(s1[j] - (s3[j]<<1)))
+ )
+ {
+ //sad[j] = 0; /*set current sad to be zero*/
+ sig_nz_sad[j] = 0;/*Signal that the sad is zero*/
+ }
+ else
+ {
+ sig_nz_sad[j] = 1;/*signal that sad is non zero*/
+ nz_sad_sig = 1;
+ }
+
+ (*pi4_mb_distortion) += (sad[j]>>1);
+ //if((*pi4_mb_distortion) >= max_sad)return; /*return or some thing*/
+ }
+
+ sig_nz_sad += 4;
+ pu1_src += (src_strd *4);
+ pu1_est += (est_strd *4);
+ }
+
+ if((*pi4_mb_distortion) < thrsh[0]<<2)
+ {
+ *sig_sad_dc = 0;
+ if(nz_sad_sig == 0)(*pi4_mb_distortion) = 0;
+ }
+ else *sig_sad_dc = 1;
+}
+
diff --git a/encoder/ime_distortion_metrics.h b/encoder/ime_distortion_metrics.h
new file mode 100755
index 0000000..a30e1fc
--- /dev/null
+++ b/encoder/ime_distortion_metrics.h
@@ -0,0 +1,170 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ih264e_distortion_metrics.h
+*
+* @brief
+* This file contains declarations of routines that compute distortion
+* between two macro/sub blocks of identical dimensions
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IME_DISTORTION_METRICS_H_
+#define IME_DISTORTION_METRICS_H_
+
+
+/*****************************************************************************/
+/* Type definitions for function prototypes */
+/*****************************************************************************/
+
+typedef void ime_compute_sad_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion);
+
+typedef void ime_compute_sad4_diamond(UWORD8 *pu1_ref,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad);
+
+typedef void ime_compute_sad3_diamond(UWORD8 *pu1_ref1,
+ UWORD8 *pu1_ref2,
+ UWORD8 *pu1_ref3,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad);
+
+typedef void ime_compute_sad2_diamond(UWORD8 *pu1_ref1,
+ UWORD8 *pu1_ref2,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad);
+
+typedef void ime_sub_pel_compute_sad_16x16_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_ref_half_x,
+ UWORD8 *pu1_ref_half_y,
+ UWORD8 *pu1_ref_half_xy,
+ WORD32 src_strd,
+ WORD32 ref_strd,
+ WORD32 *pi4_sad);
+
+typedef void ime_compute_sad_stat(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ UWORD16 *pu2_thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD32 *pu4_is_zero);
+
+typedef void ime_compute_satqd_16x16_lumainter_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ UWORD16 *pu2_thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD32 *pu4_is_zero);
+
+typedef void ime_compute_satqd_8x16_chroma_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ UWORD16 *thrsh);
+
+typedef void ime_compute_satqd_16x16_lumaintra_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ UWORD16 *thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD8 *sig_nz_sad);
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+ime_compute_sad_ft ime_compute_sad_16x16;
+ime_compute_sad_ft ime_compute_sad_16x16_fast;
+ime_compute_sad_ft ime_compute_sad_16x8;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8;
+ime_compute_sad_ft ime_compute_sad_8x8;
+ime_compute_sad_ft ime_compute_sad_4x4;
+ime_compute_sad4_diamond ime_calculate_sad4_prog;
+ime_compute_sad3_diamond ime_calculate_sad3_prog;
+ime_compute_sad2_diamond ime_calculate_sad2_prog;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16;
+ime_compute_sad_stat ime_compute_16x16_sad_stat;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter;
+ime_compute_satqd_8x16_chroma_ft ime_compute_satqd_8x16_chroma;
+ime_compute_satqd_16x16_lumaintra_ft ime_compute_satqd_16x16_lumaintra;
+
+/*SSE4.2 Declarations*/
+ime_compute_sad_ft ime_compute_sad_16x16_sse42;
+ime_compute_sad_ft ime_compute_sad_16x16_fast_sse42;
+ime_compute_sad_ft ime_compute_sad_16x8_sse42;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8_sse42;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_sse42;
+ime_compute_sad4_diamond ime_calculate_sad4_prog_sse42;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_sse42;
+
+/* assembly */
+ime_compute_sad_ft ime_compute_sad_16x16_a9q;
+ime_compute_sad_ft ime_compute_sad_16x16_fast_a9q;
+ime_compute_sad_ft ime_compute_sad_16x8_a9q;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8_a9q;
+ime_compute_sad4_diamond ime_calculate_sad4_prog_a9q;
+ime_compute_sad3_diamond ime_calculate_sad3_prog_a9q;
+ime_compute_sad2_diamond ime_calculate_sad2_prog_a9q;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_a9q;
+ime_compute_sad_stat ime_compute_16x16_sad_stat_a9;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_a9q;
+
+
+/* assembly - AV8 declarations */
+ime_compute_sad_ft ime_compute_sad_16x16_av8;
+ime_compute_sad_ft ime_compute_sad_16x16_fast_av8;
+ime_compute_sad_ft ime_compute_sad_16x8_av8;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8_av8;
+ime_compute_sad4_diamond ime_calculate_sad4_prog_av8;
+ime_compute_sad3_diamond ime_calculate_sad3_prog_av8;
+ime_compute_sad2_diamond ime_calculate_sad2_prog_av8;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_av8;
+ime_compute_sad_stat ime_compute_16x16_sad_stat_av8;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_av8;
+
+
+#endif /* IME_DISTORTION_METRICS_H_ */
+
+
diff --git a/encoder/ime_macros.h b/encoder/ime_macros.h
new file mode 100755
index 0000000..a7b8c65
--- /dev/null
+++ b/encoder/ime_macros.h
@@ -0,0 +1,44 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IME_MACROS_H_
+#define _IME_MACROS_H_
+
+#define ABS(x) ((x) < 0 ? (-(x)) : (x))
+#define MAX(a,b) ((a > b)?(a):(b))
+#define MIN(a,b) ((a < b)?(a):(b))
+
+#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > maxy)?(maxy):(y)))
+#define UNUSED(x) ((void)(x))
+
+#endif /*_IME_MACROS_H_*/
diff --git a/encoder/ime_statistics.h b/encoder/ime_statistics.h
new file mode 100755
index 0000000..eeacaf2
--- /dev/null
+++ b/encoder/ime_statistics.h
@@ -0,0 +1,86 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IME_STATISTICS_H_
+#define _IME_STATISTICS_H_
+#define DEBUG_HISTOGRAM_ENABLE 0
+#define SAD_EXIT_STATS 0
+
+
+#if SAD_EXIT_STATS
+
+/**
+******************************************************************************
+* @brief While computing sad, if we want to do a early exit, how often we
+* should check if the sad computed till now has exceeded min sad param is
+* chosen statistically.
+* ******************************************************************************
+*/
+extern UWORD32 gu4_16x16_sad_ee_stats[16+1];
+extern UWORD32 gu4_16x8_sad_ee_stats[8+1];
+
+/**
+******************************************************************************
+* @brief print sad early exit stats
+******************************************************************************
+*/
+extern void print_sad_ee_stats(void);
+
+#define GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, i) \
+ gu4_16x16_sad_ee_stats[i]++;
+#define GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, i) \
+ gu4_16x8_sad_ee_stats[i]++;
+
+#else
+
+#define GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, i)
+#define GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, i)
+
+#endif
+
+
+#if DEBUG_HISTOGRAM_ENABLE
+#define DEBUG_HISTOGRAM_INIT() debug_histogram_init()
+#define DEBUG_HISTOGRAM_DUMP(condition) if(condition) debug_histogram_dump()
+#define DEBUG_MV_HISTOGRAM_ADD(mv_x, mv_y) debug_mv_histogram_add(mv_x, mv_y)
+#define DEBUG_SAD_HISTOGRAM_ADD(sad, level) debug_sad_histogram_add(sad, level)
+#else
+#define DEBUG_HISTOGRAM_INIT()
+#define DEBUG_HISTOGRAM_DUMP(condition)
+#define DEBUG_MV_HISTOGRAM_ADD(mv_x, mv_y)
+#define DEBUG_SAD_HISTOGRAM_ADD(sad, level)
+#endif
+
+
+
+#endif /*_IME_STATISTICS_H_*/
diff --git a/encoder/ime_structs.h b/encoder/ime_structs.h
new file mode 100755
index 0000000..7819b91
--- /dev/null
+++ b/encoder/ime_structs.h
@@ -0,0 +1,305 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_me.h
+ *
+ * @brief
+ *
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * -
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#ifndef _IME_STRUCTS_H_
+#define _IME_STRUCTS_H_
+
+/**
+ * Motion vector
+ */
+typedef struct
+{
+ /**
+ * Horizontal Motion Vector
+ */
+ WORD16 i2_mvx;
+
+ /**
+ * Vertical Motion Vector
+ */
+ WORD16 i2_mvy;
+} ime_mv_t;
+
+
+/**
+**************************************************************************
+* @brief mb_part_ctxt
+*
+* Structure that would hold the information for individual MB partitions
+* gathered during the full pel ME stage
+**************************************************************************
+*/
+typedef struct
+{
+ /**
+ * best mvs
+ */
+ ime_mv_t s_mv_curr;
+
+ /**
+ * mv predictor
+ */
+ ime_mv_t s_mv_pred;
+
+ /**
+ * SAD associated with the MB partition
+ */
+ WORD32 i4_mb_distortion;
+
+ /**
+ * cost for the MB partition
+ */
+ WORD32 i4_mb_cost;
+
+ /**
+ * Search position for least cost among the list of candidates
+ */
+ UWORD32 u4_srch_pos_idx;
+
+ /**
+ * Search position for least cost among the list of candidates
+ */
+ UWORD32 u4_exit;
+
+ /*
+ * Buffer corresponding to best half pel cost
+ */
+ UWORD8 *pu1_best_hpel_buf;
+
+} mb_part_ctxt;
+
+
+/**
+**************************************************************************
+* @brief me_ctxt_t
+*
+* Structure encapsulating the parameters used in the motion estimation
+* context
+**************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Ref pointer to current MB luma
+ */
+ UWORD8 *pu1_ref_buf_luma;
+
+ /**
+ * Src pointer to current MB luma
+ */
+ UWORD8 *pu1_src_buf_luma;
+
+ /**
+ * source stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_src_strd;
+
+ /**
+ * recon stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_rec_strd;
+
+ /**
+ * Offset for half pel x plane from the pic buf
+ */
+ UWORD32 u4_half_x_offset;
+
+ /**
+ * Offset for half pel y plane from half x plane
+ */
+ UWORD32 u4_half_y_offset;
+
+ /**
+ * Offset for half pel xy plane from half y plane
+ */
+ UWORD32 u4_half_xy_offset;
+
+ /**
+ * Search range in the X, Y axis in terms of pixels
+ */
+ WORD32 ai2_srch_boundaries[2];
+
+ /**
+ * Search range in the north direction in terms of pixels
+ */
+ WORD32 i4_srch_range_n;
+
+ /**
+ * Search range in the south direction in terms of pixels
+ */
+ WORD32 i4_srch_range_s;
+
+ /**
+ * Search range in the east direction in terms of pixels
+ */
+ WORD32 i4_srch_range_e;
+
+ /**
+ * Search range in the west direction in terms of pixels
+ */
+ WORD32 i4_srch_range_w;
+
+ /**
+ * left mb motion vector
+ */
+ ime_mv_t s_left_mv;
+
+ /**
+ * top left mb motion vector
+ */
+ ime_mv_t s_top_left_mv;
+
+ /**
+ * Number of valid candidates for the Initial search position
+ */
+ UWORD32 u4_num_candidates;
+
+ /**
+ * Motion vector predictors derived from neighbouring
+ * blocks for each of the six block partitions
+ */
+ ime_mv_t as_mv_init_search[5];
+
+ /**
+ * mv bits
+ */
+ UWORD8 *pu1_mv_bits;
+
+ /**
+ * lambda (lagrange multiplier for cost computation)
+ */
+ UWORD32 u4_lambda_motion;
+
+ /**
+ * enabled fast sad computation
+ */
+ UWORD32 u4_enable_fast_sad;
+
+ /*
+ * Enable SKIP block prediction based on SATQD
+ */
+ UWORD32 u4_enable_stat_sad;
+
+ /*
+ * Minimum distortion to search for
+ * */
+ WORD32 i4_min_sad;
+
+ /*
+ * Signal that minimum sad has been reached in ME
+ * */
+ UWORD32 u4_min_sad_reached;
+
+ /**
+ * Flag to enable/disbale half pel motion estimation
+ */
+ UWORD32 u4_enable_hpel;
+
+ /**
+ * Diamond search Iteration Max Cnt
+ */
+ UWORD32 u4_num_layers;
+
+ /**
+ * encoder me speed
+ */
+ UWORD32 u4_me_speed_preset;
+
+ UWORD32 u4_left_is_intra;
+
+ UWORD32 u4_left_is_skip;
+
+ /**
+ * Structure to store the MB partition info
+ */
+ mb_part_ctxt s_mb_part;
+ /*
+ * Threshold to compare the sad with
+ */
+ UWORD16 *pu2_sad_thrsh;
+
+ /**
+ * fn ptrs for compute sad routines
+ */
+ ime_compute_sad_ft *pf_ime_compute_sad_16x16[2];
+ ime_compute_sad_ft *pf_ime_compute_sad_16x8;
+ ime_compute_sad4_diamond *pf_ime_compute_sad4_diamond;
+ ime_compute_sad3_diamond *pf_ime_compute_sad3_diamond;
+ ime_compute_sad2_diamond *pf_ime_compute_sad2_diamond;
+ ime_sub_pel_compute_sad_16x16_ft *pf_ime_sub_pel_compute_sad_16x16;
+
+ /*
+ * Function poitners for SATQD
+ */
+ ime_compute_sad_stat *pf_ime_compute_sad_stat_luma_16x16;
+
+ /**
+ * Qp
+ */
+ UWORD8 u1_mb_qp;
+
+ /*
+ * Buffers for holding half_x , half_y and half_xy
+ * values when halfpel generation
+ * for the entire plane is not enabled
+ */
+ UWORD8 *pu1_half_x;
+ UWORD8 *pu1_half_y;
+ UWORD8 *pu1_half_xy;
+
+
+ /*
+ * Buffers to store the best halfpel plane*
+ */
+ UWORD8 *pu1_hpel_buf;
+
+ /*
+ * Stride for hpel buffer
+ */
+ UWORD32 u4_hpel_buf_strd;
+
+ WORD32 u4_hp_buf_strd;
+
+} me_ctxt_t;
+
+
+#endif // _IME_STRUCTS_H_
+
diff --git a/encoder/ime_typedefs.h b/encoder/ime_typedefs.h
new file mode 100755
index 0000000..d36632d
--- /dev/null
+++ b/encoder/ime_typedefs.h
@@ -0,0 +1,50 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IME_TYPEDEFS_H_
+#define _IME_TYPEDEFS_H_
+
+
+typedef unsigned char UWORD8;
+typedef unsigned short UWORD16;
+typedef unsigned int UWORD32;
+typedef unsigned long UWORD64;
+
+typedef signed char WORD8;
+typedef short WORD16;
+typedef int WORD32;
+typedef long WORD64;
+
+typedef char CHAR;
+
+#endif /*_IME_TYPEDEFS_H_*/
diff --git a/encoder/irc_bit_allocation.c b/encoder/irc_bit_allocation.c
new file mode 100755
index 0000000..1dfd9de
--- /dev/null
+++ b/encoder/irc_bit_allocation.c
@@ -0,0 +1,859 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/** Includes */
+#include <stdio.h>
+#include <string.h>
+#include "irc_datatypes.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_trace_support.h"
+
+/** Macros **/
+#define MIN(x,y) ((x) < (y))? (x) : (y)
+
+/* State structure for bit allocation */
+typedef struct
+{
+ /* using var_q number as it can cross 31 bits for large intra frameinterval */
+ number_t vq_rem_bits_in_period;
+
+ /* Storing inputs */
+ WORD32 i4_tot_frms_in_gop;
+
+ WORD32 i4_num_intra_frm_interval;
+
+ WORD32 i4_bits_per_frm;
+
+} rem_bit_in_prd_t;
+
+typedef struct bit_allocation_t
+{
+ rem_bit_in_prd_t s_rbip;
+
+ /* A universal constant giving the relative complexity between pictures */
+ WORD32 i2_K[MAX_PIC_TYPE];
+
+ /* To get a estimate of the header bits consumed */
+ WORD32 i4_prev_frm_header_bits[MAX_PIC_TYPE];
+
+ WORD32 i4_bits_per_frm;
+
+ WORD32 i4_num_gops_in_period;
+
+ /* Num gops as set by rate control module */
+ WORD32 i4_actual_num_gops_in_period;
+
+ number_t vq_saved_bits;
+
+ WORD32 i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES];
+
+ WORD32 i4_min_bits_per_frm;
+
+ /* Error bits module */
+ error_bits_handle ps_error_bits;
+
+ /* Storing frame rate */
+ WORD32 i4_frame_rate;
+
+ WORD32 i4_bit_rate;
+
+ WORD32 ai4_peak_bit_rate[MAX_NUM_DRAIN_RATES];
+
+} bit_allocation_t;
+
+static WORD32 get_number_of_frms_in_a_gop(pic_handling_handle ps_pic_handling)
+{
+ WORD32 i4_tot_frms_in_gop = 0, i;
+ WORD32 ai4_frms_in_gop[MAX_PIC_TYPE];
+
+ /* Query the pic_handling struct for the rem frames in the period */
+ irc_pic_type_get_frms_in_gop(ps_pic_handling, ai4_frms_in_gop);
+
+ /* Get the total frms in the gop */
+ i4_tot_frms_in_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_tot_frms_in_gop += ai4_frms_in_gop[i];
+ }
+ return (i4_tot_frms_in_gop);
+}
+
+static void init_rbip(rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_bits_per_frm,
+ WORD32 i4_num_intra_frm_interval)
+{
+ WORD32 i4_tot_frms_in_gop = get_number_of_frms_in_a_gop(ps_pic_handling);
+
+ /* rem_bits_in_period = bits_per_frm * tot_frms_in_gop * num_intra_frm_interval */
+ {
+ number_t vq_bits_per_frm, vq_tot_frms_in_gop, vq_num_intra_frm_interval;
+ number_t *pvq_rem_bits_in_period = &ps_rbip->vq_rem_bits_in_period;
+
+ SET_VAR_Q(vq_bits_per_frm, i4_bits_per_frm, 0);
+ SET_VAR_Q(vq_tot_frms_in_gop, i4_tot_frms_in_gop, 0);
+ SET_VAR_Q(vq_num_intra_frm_interval, i4_num_intra_frm_interval, 0);
+
+ /* rem_bits_in_period = bits_per_frm * tot_frms_in_gop */
+ mult32_var_q(vq_bits_per_frm, vq_tot_frms_in_gop,
+ pvq_rem_bits_in_period);
+
+ /* rem_bits_in_period *= num_intra_frm_interval */
+ mult32_var_q(vq_num_intra_frm_interval, pvq_rem_bits_in_period[0],
+ pvq_rem_bits_in_period);
+ }
+
+ /*
+ * Store the total number of frames in GOP value which is
+ * used from module A
+ */
+ ps_rbip->i4_tot_frms_in_gop = i4_tot_frms_in_gop;
+ ps_rbip->i4_num_intra_frm_interval = i4_num_intra_frm_interval;
+ ps_rbip->i4_bits_per_frm = i4_bits_per_frm;
+}
+
+static void check_update_rbip(rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling)
+{
+ /*
+ * NOTE: Intra frame interval changes after the first I frame that is
+ * encoded in a GOP
+ */
+ WORD32 i4_new_tot_frms_in_gop = get_number_of_frms_in_a_gop(
+ ps_pic_handling);
+
+ if(i4_new_tot_frms_in_gop != ps_rbip->i4_tot_frms_in_gop)
+ {
+ WORD32 i4_rem_frames_in_period =
+ ps_rbip->i4_num_intra_frm_interval
+ * (i4_new_tot_frms_in_gop
+ - ps_rbip->i4_tot_frms_in_gop);
+
+ number_t vq_rem_frms_in_period, s_bits_per_frm, vq_delta_bits_in_period;
+
+ SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frames_in_period, 0);
+ SET_VAR_Q(s_bits_per_frm, ps_rbip->i4_bits_per_frm, 0);
+
+ /* delta_bits_in_period = bits_per_frm * rem_frms_in_period */
+ mult32_var_q(s_bits_per_frm, vq_rem_frms_in_period,
+ &vq_delta_bits_in_period);
+
+ /* rem_bits_in_period += delta_bits_in_period */
+ add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period,
+ &ps_rbip->vq_rem_bits_in_period);
+ }
+ /* Updated the new values */
+ ps_rbip->i4_tot_frms_in_gop = i4_new_tot_frms_in_gop;
+}
+
+static void irc_ba_update_rbip(rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_num_of_bits)
+{
+ number_t vq_num_bits;
+
+ check_update_rbip(ps_rbip, ps_pic_handling);
+
+ /* rem_bits_in_period += num_of_bits */
+ SET_VAR_Q(vq_num_bits, i4_num_of_bits, 0);
+ add32_var_q(vq_num_bits, ps_rbip->vq_rem_bits_in_period,
+ &ps_rbip->vq_rem_bits_in_period);
+}
+
+static void irc_ba_change_rbip(rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_new_bits_per_frm,
+ WORD32 i4_new_num_intra_frm_interval)
+{
+ WORD32 ai4_rem_frms_in_period[MAX_PIC_TYPE], i4_rem_frms_in_gop, i;
+ irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, ai4_rem_frms_in_period);
+
+ i4_rem_frms_in_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ i4_rem_frms_in_gop += ai4_rem_frms_in_period[i];
+
+ if(i4_new_bits_per_frm != ps_rbip->i4_bits_per_frm)
+ {
+ WORD32 i4_rem_frms_in_period = (ps_rbip->i4_num_intra_frm_interval - 1)
+ * ps_rbip->i4_tot_frms_in_gop + i4_rem_frms_in_gop;
+
+ number_t vq_rem_frms_in_period, vq_delta_bits_per_frm,
+ vq_delta_bits_in_period;
+
+ /* delta_bits_per_frm = new_bits_per_frm - old_bits_per_frm */
+ SET_VAR_Q(vq_delta_bits_per_frm,
+ (i4_new_bits_per_frm - ps_rbip->i4_bits_per_frm), 0);
+
+ SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frms_in_period, 0);
+
+ /* delta_bits_in_period = delta_bits_per_frm * rem_frms_in_period */
+ mult32_var_q(vq_delta_bits_per_frm, vq_rem_frms_in_period,
+ &vq_delta_bits_in_period);
+
+ /* ps_rbip->rem_bits_in_period += delta_bits_in_period */
+ add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period,
+ &ps_rbip->vq_rem_bits_in_period);
+ }
+
+ if(i4_new_num_intra_frm_interval != ps_rbip->i4_num_intra_frm_interval)
+ {
+ WORD32 i4_rem_frms_in_period = ps_rbip->i4_tot_frms_in_gop
+ * (i4_new_num_intra_frm_interval
+ - ps_rbip->i4_num_intra_frm_interval);
+
+ number_t vq_rem_frms_in_period, vq_new_bits_per_frm,
+ vq_delta_bits_in_period;
+
+ /* new_bits_per_frm = new_new_bits_per_frm - old_new_bits_per_frm */
+ SET_VAR_Q(vq_new_bits_per_frm, i4_new_bits_per_frm, 0);
+
+ SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frms_in_period, 0);
+
+ /* delta_bits_in_period = new_bits_per_frm * rem_frms_in_period */
+ mult32_var_q(vq_new_bits_per_frm, vq_rem_frms_in_period,
+ &vq_delta_bits_in_period);
+
+ /* ps_rbip->rem_bits_in_period += delta_bits_in_period */
+ add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period,
+ &ps_rbip->vq_rem_bits_in_period);
+ }
+ /* Update the new value */
+ ps_rbip->i4_num_intra_frm_interval = i4_new_num_intra_frm_interval;
+ ps_rbip->i4_bits_per_frm = i4_new_bits_per_frm;
+}
+
+WORD32 irc_ba_num_fill_use_free_memtab(bit_allocation_t **pps_bit_allocation,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static bit_allocation_t s_bit_allocation_temp;
+
+ /*
+ * Hack for all alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_bit_allocation) = &s_bit_allocation_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(bit_allocation_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_bit_allocation,
+ e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab(
+ &pps_bit_allocation[0]->ps_error_bits,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ return (i4_mem_tab_idx);
+}
+
+/*******************************************************************************
+ Function Name : irc_ba_init_bit_allocation
+ Description : Initialize the bit_allocation structure.
+ ******************************************************************************/
+void irc_ba_init_bit_allocation(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_num_intra_frm_interval,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frm_rate,
+ WORD32 *i4_peak_bit_rate,
+ WORD32 i4_min_bitrate)
+{
+ WORD32 i;
+ WORD32 i4_bits_per_frm, i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES];
+
+ /* Calculate the bits per frame */
+ X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frm_rate, i4_bits_per_frm);
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frm_rate,
+ i4_max_bits_per_frm[i]);
+ }
+ /* Initialize the bits_per_frame */
+ ps_bit_allocation->i4_bits_per_frm = i4_bits_per_frm;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_bit_allocation->i4_max_bits_per_frm[i] = i4_max_bits_per_frm[i];
+ }
+ X_PROD_Y_DIV_Z(i4_min_bitrate, 1000, i4_frm_rate,
+ ps_bit_allocation->i4_min_bits_per_frm);
+
+ /*
+ * Initialize the rem_bits in period
+ * The first gop in case of an OPEN GOP may have fewer B_PICs,
+ * That condition is not taken care of
+ */
+ init_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, i4_bits_per_frm,
+ i4_num_intra_frm_interval);
+
+ /* Initialize the num_gops_in_period */
+ ps_bit_allocation->i4_num_gops_in_period = i4_num_intra_frm_interval;
+ ps_bit_allocation->i4_actual_num_gops_in_period = i4_num_intra_frm_interval;
+
+ /* Relative complexity between I and P frames */
+ ps_bit_allocation->i2_K[I_PIC] = (1 << K_Q);
+ ps_bit_allocation->i2_K[P_PIC] = I_TO_P_RATIO;
+ ps_bit_allocation->i2_K[B_PIC] = (P_TO_B_RATIO * I_TO_P_RATIO) >> K_Q;
+
+ /* Initialize the saved bits to 0*/
+ SET_VAR_Q(ps_bit_allocation->vq_saved_bits, 0, 0);
+
+ /* Update the error bits module with average bits */
+ irc_init_error_bits(ps_bit_allocation->ps_error_bits, i4_frm_rate,
+ i4_bit_rate);
+ /* Store the input for implementing change in values */
+ ps_bit_allocation->i4_frame_rate = i4_frm_rate;
+ ps_bit_allocation->i4_bit_rate = i4_bit_rate;
+
+ memset(ps_bit_allocation->i4_prev_frm_header_bits, 0, sizeof(ps_bit_allocation->i4_prev_frm_header_bits));
+ for(i=0;i<MAX_NUM_DRAIN_RATES;i++)
+ ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i];
+}
+
+/*******************************************************************************
+ Function Name : get_cur_frm_est_bits
+ Description : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ ******************************************************************************/
+WORD32 irc_ba_get_cur_frm_est_texture_bits(bit_allocation_t *ps_bit_allocation,
+ rc_rd_model_handle *pps_rd_model,
+ est_sad_handle ps_est_sad,
+ pic_handling_handle ps_pic_handling,
+ picture_type_e e_pic_type)
+{
+ WORD32 i, j;
+ WORD32 i4_est_texture_bits_for_frm;
+ number_t vq_rem_texture_bits;
+ number_t vq_complexity_estimate[MAX_PIC_TYPE];
+ WORD32 i4_rem_frms_in_period[MAX_PIC_TYPE], i4_frms_in_period[MAX_PIC_TYPE];
+ number_t vq_max_consumable_bits;
+ number_t vq_rem_frms_in_period[MAX_PIC_TYPE], vq_est_texture_bits_for_frm;
+ number_t vq_prev_hdr_bits[MAX_PIC_TYPE];
+
+ WORD32 complexity_est = 0;
+
+ /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+ irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, i4_rem_frms_in_period);
+ irc_pic_type_get_frms_in_gop(ps_pic_handling, i4_frms_in_period);
+
+ /* Depending on the number of gops in a period, find the num_frms_in_prd */
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ i4_rem_frms_in_period[j] += (i4_frms_in_period[j]
+ * (ps_bit_allocation->i4_num_gops_in_period - 1));
+ i4_frms_in_period[j] *= ps_bit_allocation->i4_num_gops_in_period;
+ }
+
+ /* Remove the header bits from the remaining bits to find how many bits you
+ can transfer.*/
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ SET_VAR_Q(vq_rem_frms_in_period[i], i4_rem_frms_in_period[i], 0);
+ SET_VAR_Q(vq_prev_hdr_bits[i],
+ ps_bit_allocation->i4_prev_frm_header_bits[i], 0);
+ }
+ {
+ /*
+ *rem_texture_bits = rem_bits_in_period -
+ *(rem_frms_in_period[I_PIC] * prev_frm_header_bits[I_PIC]) -
+ *(rem_frms_in_period[P_PIC] * prev_frm_header_bits[P_PIC]) -
+ *(rem_frms_in_period[B_PIC] * prev_frm_header_bits[B_PIC]);
+ */
+ number_t vq_rem_hdr_bits;
+ vq_rem_texture_bits = ps_bit_allocation->s_rbip.vq_rem_bits_in_period;
+
+ mult32_var_q(vq_prev_hdr_bits[I_PIC], vq_rem_frms_in_period[I_PIC],
+ &vq_rem_hdr_bits);
+ sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits);
+
+ mult32_var_q(vq_prev_hdr_bits[P_PIC], vq_rem_frms_in_period[P_PIC],
+ &vq_rem_hdr_bits);
+ sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits);
+
+ mult32_var_q(vq_prev_hdr_bits[B_PIC], vq_rem_frms_in_period[B_PIC],
+ &vq_rem_hdr_bits);
+ sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits);
+ }
+ {
+ /* max_consumable_bits =
+ *(frms_in_period[I_PIC] * max_bits_per_frm[0] ) +
+ *(frms_in_period[P_PIC] + frms_in_period[B_PIC] ) * max_bits_per_frm[1];
+ */
+ number_t vq_max_bits, vq_max_bits_per_frm[2];
+
+ SET_VAR_Q(vq_max_bits_per_frm[0],
+ ps_bit_allocation->i4_max_bits_per_frm[0], 0);
+ SET_VAR_Q(vq_max_bits_per_frm[1],
+ ps_bit_allocation->i4_max_bits_per_frm[1], 0);
+
+ mult32_var_q(vq_rem_frms_in_period[I_PIC], vq_max_bits_per_frm[0],
+ &vq_max_bits);
+ vq_max_consumable_bits = vq_max_bits;
+
+ mult32_var_q(vq_rem_frms_in_period[P_PIC], vq_max_bits_per_frm[1],
+ &vq_max_bits);
+ add32_var_q(vq_max_bits, vq_max_consumable_bits,
+ &vq_max_consumable_bits);
+
+ mult32_var_q(vq_rem_frms_in_period[B_PIC], vq_max_bits_per_frm[1],
+ &vq_max_bits);
+ add32_var_q(vq_max_bits, vq_max_consumable_bits,
+ &vq_max_consumable_bits);
+ }
+
+ /* rem_texture_bits = MIN(rem_texture_bits, max_consumable_bits) */
+ MIN_VARQ(vq_max_consumable_bits, vq_rem_texture_bits, vq_rem_texture_bits);
+
+ /* The bits are then allocated based on the relative complexity of the
+ current frame with respect to that of the rest of the frames in period */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ number_t vq_lin_mod_coeff, vq_est_sad, vq_K;
+
+ /* Getting the linear model coefficient */
+ vq_lin_mod_coeff = irc_get_linear_coefficient(pps_rd_model[i]);
+
+ /* Getting the estimated SAD */
+ SET_VAR_Q(vq_est_sad, irc_get_est_sad(ps_est_sad,i), 0);
+
+ /* Making K factor a var Q format */
+ SET_VAR_Q(vq_K, ps_bit_allocation->i2_K[i], K_Q);
+
+ /* Complexity_estimate = [ (lin_mod_coeff * estimated_sad) / K factor ] */
+ mult32_var_q(vq_lin_mod_coeff, vq_est_sad, &vq_lin_mod_coeff);
+ div32_var_q(vq_lin_mod_coeff, vq_K, &vq_complexity_estimate[i]);
+ }
+
+ /*
+ * For simple cases, one of the complexities go to zero and in those cases
+ * distribute the bits evenly among frames based on I_TO_P_RATIO
+ */
+
+ /* Also check the B-pictures complexity only in case they are present*/
+ if(i4_frms_in_period[B_PIC] == 0)
+ {
+ complexity_est = (vq_complexity_estimate[I_PIC]
+ && vq_complexity_estimate[P_PIC]);
+ }
+ else
+ {
+ complexity_est = (vq_complexity_estimate[I_PIC]
+ && vq_complexity_estimate[P_PIC]
+ && vq_complexity_estimate[B_PIC]);
+ }
+
+ if(complexity_est)
+ {
+ /*
+ * Estimated texture bits =
+ * (remaining bits) * (cur frm complexity)
+ * ---------------------------------------
+ * (num_i_frm*i_frm_complexity) + (num_p_frm*pfrm_complexity)
+ * + (b_frm * b_frm_cm)
+ */
+ mult32_var_q(vq_rem_texture_bits, vq_complexity_estimate[e_pic_type],
+ &vq_rem_texture_bits);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ mult32_var_q(vq_rem_frms_in_period[i], vq_complexity_estimate[i],
+ &vq_rem_frms_in_period[i]);
+ }
+
+ add32_var_q(vq_rem_frms_in_period[I_PIC], vq_rem_frms_in_period[P_PIC],
+ &vq_rem_frms_in_period[I_PIC]);
+
+ add32_var_q(vq_rem_frms_in_period[I_PIC], vq_rem_frms_in_period[B_PIC],
+ &vq_rem_frms_in_period[I_PIC]);
+
+ div32_var_q(vq_rem_texture_bits, vq_rem_frms_in_period[I_PIC],
+ &vq_est_texture_bits_for_frm);
+
+ number_t_to_word32(vq_est_texture_bits_for_frm,
+ &i4_est_texture_bits_for_frm);
+ }
+ else
+ {
+ number_t vq_i_to_p_bit_ratio, vq_rem_frms;
+
+ SET_VAR_Q(vq_i_to_p_bit_ratio, I_TO_P_BIT_RATIO, 0);
+
+ /* rem_frms = ((I_TO_P_BIT_RATIO * rem_frms_in_period[I_PIC]) +
+ * rem_frms_in_period[P_PIC] + rem_frms_in_period[B_PIC]);
+ */
+ mult32_var_q(vq_rem_frms_in_period[I_PIC], vq_i_to_p_bit_ratio,
+ &vq_rem_frms);
+ add32_var_q(vq_rem_frms_in_period[P_PIC], vq_rem_frms, &vq_rem_frms);
+ add32_var_q(vq_rem_frms_in_period[B_PIC], vq_rem_frms, &vq_rem_frms);
+
+ /* est_texture_bits_for_frm = rem_texture_bits / rem_frms */
+ div32_var_q(vq_rem_texture_bits, vq_rem_frms,
+ &vq_est_texture_bits_for_frm);
+ number_t_to_word32(vq_est_texture_bits_for_frm,
+ &i4_est_texture_bits_for_frm);
+
+ i4_est_texture_bits_for_frm =
+ (I_PIC == e_pic_type) ?
+ (i4_est_texture_bits_for_frm
+ * I_TO_P_BIT_RATIO) :
+ i4_est_texture_bits_for_frm;
+ }
+
+ /*
+ * If the remaining bits in the period becomes negative then the estimated
+ * texture bits would also become negative. This would send a feedback to
+ * the model which may go for a toss. Thus sending the minimum possible
+ * value = 0
+ */
+ if(i4_est_texture_bits_for_frm < 0)
+ {
+ i4_est_texture_bits_for_frm = 0;
+ }
+
+ return (i4_est_texture_bits_for_frm);
+}
+
+/******************************************************************************
+ Function Name : irc_ba_get_cur_frm_est_header_bits
+ Description : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ ******************************************************************************/
+WORD32 irc_ba_get_cur_frm_est_header_bits(bit_allocation_t *ps_bit_allocation,
+ picture_type_e e_pic_type)
+{
+ return (ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type]);
+}
+
+WORD32 irc_ba_get_rem_bits_in_period(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling)
+{
+ WORD32 i4_rem_bits_in_gop = 0;
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+ number_t_to_word32(ps_bit_allocation->s_rbip.vq_rem_bits_in_period,
+ &i4_rem_bits_in_gop);
+ return (i4_rem_bits_in_gop);
+}
+
+/*******************************************************************************
+ Function Name : irc_ba_update_cur_frm_consumed_bits
+ Description : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ ******************************************************************************/
+void irc_ba_update_cur_frm_consumed_bits(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ picture_type_e e_pic_type,
+ UWORD8 u1_is_scd,
+ WORD32 i4_last_frm_in_gop)
+{
+ WORD32 i4_error_bits = irc_get_error_bits(ps_bit_allocation->ps_error_bits);
+
+ /* Update the remaining bits in period */
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ (-i4_total_frame_bits + i4_error_bits));
+
+ /*
+ * Update the header bits so that it can be used as an estimate to the next
+ * frame
+ */
+ if(u1_is_scd)
+ {
+ /*
+ * In case of SCD, even though the frame type is P, it is equivalent to
+ * a I frame and so the corresponding header bits is updated
+ */
+ ps_bit_allocation->i4_prev_frm_header_bits[I_PIC] =
+ i4_model_updation_hdr_bits;
+
+#define MAX_NUM_GOPS_IN_PERIOD (3)
+ if(ps_bit_allocation->i4_num_gops_in_period < MAX_NUM_GOPS_IN_PERIOD)
+ {
+ /*
+ * Whenever there is a scene change increase the number of gops by
+ * 2 so that the number of bits allocated is not very constrained
+ */
+ ps_bit_allocation->i4_num_gops_in_period += 2;
+ /* Add the extra bits in GOP to remaining bits in period */
+ irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ ps_bit_allocation->i4_bits_per_frm,
+ ps_bit_allocation->i4_num_gops_in_period);
+ }
+ }
+ else
+ {
+ ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type] =
+ i4_model_updation_hdr_bits;
+ }
+
+ if(i4_last_frm_in_gop)
+ {
+ WORD32 i4_num_bits_in_a_gop = get_number_of_frms_in_a_gop(
+ ps_pic_handling) * ps_bit_allocation->i4_bits_per_frm;
+ /*
+ * If the number of gops in period has been increased due to scene
+ * change, slowly bring in down across the gops
+ */
+ if(ps_bit_allocation->i4_num_gops_in_period
+ > ps_bit_allocation->i4_actual_num_gops_in_period)
+ {
+ ps_bit_allocation->i4_num_gops_in_period--;
+ irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ ps_bit_allocation->i4_bits_per_frm,
+ ps_bit_allocation->i4_num_gops_in_period);
+ }
+ /*
+ * If rem_bits_in_period < 0 decrease the number of bits allocated for
+ * the next period else increase it
+ */
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ i4_num_bits_in_a_gop);
+ }
+ /* Update the lower modules */
+ irc_update_error_bits(ps_bit_allocation->ps_error_bits);
+}
+
+void irc_ba_change_remaining_bits_in_period(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frame_rate,
+ WORD32 *i4_peak_bit_rate)
+{
+ WORD32 i4_new_avg_bits_per_frm;
+ WORD32 i4_new_peak_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ WORD32 i4_rem_frms_in_period[MAX_PIC_TYPE];
+ int i;
+
+ /* Calculate the new per frame bits */
+ X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frame_rate, i4_new_avg_bits_per_frm);
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frame_rate,
+ i4_new_peak_bits_per_frm[i]);
+ }
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_bit_allocation->i4_max_bits_per_frm[i] = i4_new_peak_bits_per_frm[i];
+ }
+
+ /*
+ * Get the rem_frms_in_prd & the frms_in_prd from the pic_type state
+ * struct
+ */
+ irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, i4_rem_frms_in_period);
+
+ /*
+ * If the difference > 0(/ <0), the remaining bits in period needs to be
+ * increased(/decreased) based on the remaining number of frames
+ */
+ irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ i4_new_avg_bits_per_frm,
+ ps_bit_allocation->i4_num_gops_in_period);
+
+ /* Update the new average bits per frame */
+ ps_bit_allocation->i4_bits_per_frm = i4_new_avg_bits_per_frm;
+ /* change the lower modules state */
+ irc_change_bitrate_in_error_bits(ps_bit_allocation->ps_error_bits,
+ i4_bit_rate);
+ irc_change_frm_rate_in_error_bits(ps_bit_allocation->ps_error_bits,
+ i4_frame_rate);
+
+ /* Store the modified frame_rate */
+ ps_bit_allocation->i4_frame_rate = i4_frame_rate;
+ ps_bit_allocation->i4_bit_rate = i4_bit_rate;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i];
+}
+
+void irc_ba_change_ba_peak_bit_rate(bit_allocation_t *ps_bit_allocation,
+ WORD32 *ai4_peak_bit_rate)
+{
+ WORD32 i;
+
+ /* Calculate the bits per frame */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(ai4_peak_bit_rate[i], 1000,
+ ps_bit_allocation->i4_frame_rate,
+ ps_bit_allocation->i4_max_bits_per_frm[i]);
+ ps_bit_allocation->ai4_peak_bit_rate[i] = ai4_peak_bit_rate[i];
+ }
+}
+
+/******************************************************************************
+ * @brief Modifies the remaining bit in period for the gop which has fif.
+ * since fif would cause a new gop to be created, we need to add the number
+ * of encoded frames in the fif GOP worth of bits to remaining bits in
+ * period
+ ******************************************************************************/
+void irc_ba_change_rem_bits_in_prd_at_force_I_frame(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling)
+{
+ WORD32 i4_frms_in_period;
+ i4_frms_in_period = irc_pic_type_get_frms_in_gop_force_I_frm(
+ ps_pic_handling);
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ ps_bit_allocation->i4_bits_per_frm * i4_frms_in_period);
+}
+
+void irc_ba_check_and_update_bit_allocation(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_cur_buf_size,
+ WORD32 i4_max_buf_size,
+ WORD32 i4_max_bits_inflow_per_frm,
+ WORD32 i4_tot_frame_bits)
+{
+
+ number_t vq_max_drain_bits, vq_extra_bits, vq_less_bits,
+ vq_allocated_saved_bits, vq_min_bits_for_period;
+ WORD32 i4_num_frms_in_period = get_number_of_frms_in_a_gop(ps_pic_handling);
+ number_t vq_rem_bits_in_period, vq_num_frms_in_period, vq_zero;
+ WORD32 b_rem_bits_gt_max_drain, b_rem_bits_lt_min_bits,
+ b_saved_bits_gt_zero;
+ rem_bit_in_prd_t *ps_rbip = &ps_bit_allocation->s_rbip;
+
+ UNUSED(i4_cur_buf_size);
+ UNUSED(i4_max_buf_size);
+ UNUSED(i4_tot_frame_bits);
+
+ /*
+ * If the remaining bits is greater than what can be drained in that period
+ * Clip the remaining bits in period to the maximum it can drain in that
+ * period with the error of current buffer size.Accumulate the saved bits
+ * if any. else if the remaining bits is lesser than the minimum bit rate
+ * promised in that period Add the excess bits to remaining bits in period
+ * and reduce it from the saved bits Else Provide the extra bits from the
+ * "saved bits pool".
+ */
+ /*
+ * max_drain_bits = num_gops_in_period * num_frms_in_period *
+ * * max_bits_inflow_per_frm
+ */
+ SET_VAR_Q(vq_num_frms_in_period,
+ (ps_bit_allocation->i4_num_gops_in_period * i4_num_frms_in_period),
+ 0);
+ SET_VAR_Q(vq_max_drain_bits, i4_max_bits_inflow_per_frm, 0);
+ SET_VAR_Q(vq_zero, 0, 0);
+ mult32_var_q(vq_max_drain_bits, vq_num_frms_in_period, &vq_max_drain_bits);
+
+ /*
+ * min_bits_for_period = num_gops_in_period * num_frms_in_period *
+ * min_bits_per_frm
+ */
+ SET_VAR_Q(vq_min_bits_for_period, ps_bit_allocation->i4_min_bits_per_frm,
+ 0);
+ mult32_var_q(vq_min_bits_for_period, vq_num_frms_in_period,
+ &vq_min_bits_for_period);
+
+ vq_rem_bits_in_period = ps_rbip->vq_rem_bits_in_period;
+
+ /* Evaluate rem_bits_in_period > max_drain_bits */
+ VQ_A_GT_VQ_B(ps_rbip->vq_rem_bits_in_period, vq_max_drain_bits,
+ b_rem_bits_gt_max_drain);
+
+ /* Evaluate rem_bits_in_period < min_bits_for_period */
+ VQ_A_LT_VQ_B(ps_rbip->vq_rem_bits_in_period, vq_min_bits_for_period,
+ b_rem_bits_lt_min_bits);
+
+ /* Evaluate saved_bits > 0 */
+ VQ_A_LT_VQ_B(ps_bit_allocation->vq_saved_bits, vq_zero,
+ b_saved_bits_gt_zero);
+
+ /* (i4_rem_bits_in_period > i4_max_drain_bits) */
+ if(b_rem_bits_gt_max_drain)
+ {
+ /* extra_bits = rem_bits_in_period - max_drain_bits */
+ sub32_var_q(ps_rbip->vq_rem_bits_in_period, vq_max_drain_bits,
+ &vq_extra_bits);
+
+ /* saved_bits += extra_bits */
+ add32_var_q(ps_bit_allocation->vq_saved_bits, vq_extra_bits,
+ &ps_bit_allocation->vq_saved_bits);
+
+ /* rem_bits_in_period = vq_max_drain_bits */
+ ps_rbip->vq_rem_bits_in_period = vq_max_drain_bits;
+ }
+ else if(b_rem_bits_lt_min_bits)
+ {
+ /* extra_bits(-ve) = rem_bits_in_period - i4_min_bits_for_period */
+ sub32_var_q(ps_rbip->vq_rem_bits_in_period, vq_min_bits_for_period,
+ &vq_extra_bits);
+
+ /* saved_bits += extra_bits(-ve) */
+ add32_var_q(ps_bit_allocation->vq_saved_bits, vq_extra_bits,
+ &ps_bit_allocation->vq_saved_bits);
+
+ /* rem_bits_in_period = min_bits_for_period */
+ ps_rbip->vq_rem_bits_in_period = vq_min_bits_for_period;
+ }
+ else if(b_saved_bits_gt_zero)
+ {
+ /* less_bits = max_drain_bits - _rem_bits_in_period */
+ sub32_var_q(vq_max_drain_bits, vq_rem_bits_in_period, &vq_less_bits);
+
+ /* allocated_saved_bits = MIN (less_bits, saved_bits) */
+ MIN_VARQ(ps_bit_allocation->vq_saved_bits, vq_less_bits,
+ vq_allocated_saved_bits);
+
+ /* rem_bits_in_period += allocted_save_bits */
+ add32_var_q(ps_rbip->vq_rem_bits_in_period, vq_allocated_saved_bits,
+ &ps_rbip->vq_rem_bits_in_period);
+
+ /* saved_bits -= allocted_save_bits */
+ sub32_var_q(ps_bit_allocation->vq_saved_bits, vq_allocated_saved_bits,
+ &ps_bit_allocation->vq_saved_bits);
+ }
+ return;
+}
+
+WORD32 irc_ba_get_frame_rate(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i4_frame_rate);
+}
+
+WORD32 irc_ba_get_bit_rate(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i4_bit_rate);
+}
+
+void irc_ba_get_peak_bit_rate(bit_allocation_t *ps_bit_allocation,
+ WORD32 *pi4_peak_bit_rate)
+{
+ WORD32 i;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ pi4_peak_bit_rate[i] = ps_bit_allocation->ai4_peak_bit_rate[i];
+ }
+}
diff --git a/encoder/irc_bit_allocation.h b/encoder/irc_bit_allocation.h
new file mode 100755
index 0000000..19ba0df
--- /dev/null
+++ b/encoder/irc_bit_allocation.h
@@ -0,0 +1,99 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _BIT_ALLOCATION_H_
+#define _BIT_ALLOCATION_H_
+
+typedef struct bit_allocation_t *bit_allocation_handle;
+
+WORD32 irc_ba_num_fill_use_free_memtab(bit_allocation_handle *pps_bit_allocation,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_ba_init_bit_allocation(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_num_intra_frm_interval,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frm_rate,
+ WORD32 *u4_peak_bit_rate,
+ WORD32 i4_min_bitrate);
+
+/* Estimates the number of texture bits required by the current frame */
+WORD32 irc_ba_get_cur_frm_est_texture_bits(bit_allocation_handle ps_bit_allocation,
+ rc_rd_model_handle *pps_rd_model,
+ est_sad_handle ps_est_sad,
+ pic_handling_handle ps_pic_handling,
+ picture_type_e e_pic_type);
+
+/* Estimate the number of header bits required by the current frame */
+WORD32 irc_ba_get_cur_frm_est_header_bits(bit_allocation_handle ps_bit_allocation,
+ picture_type_e e_pic_type);
+
+/* Get the remaining bits allocated in the period */
+WORD32 irc_ba_get_rem_bits_in_period(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling);
+
+WORD32 irc_ba_get_frame_rate(bit_allocation_handle ps_bit_allocation);
+
+WORD32 irc_ba_get_bit_rate(bit_allocation_handle ps_bit_allocation);
+void irc_ba_get_peak_bit_rate(bit_allocation_handle ps_bit_allocation,
+ WORD32 *pi4_peak_bit_rate);
+
+/* Updates the bit allocation module with the actual encoded values */
+void irc_ba_update_cur_frm_consumed_bits(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ picture_type_e e_pic_type,
+ UWORD8 u1_is_scd,
+ WORD32 i4_last_frm_in_gop);
+
+void irc_ba_check_and_update_bit_allocation(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_cur_buf_size,
+ WORD32 i4_max_buf_size,
+ WORD32 i4_max_bits_inflow_per_frm,
+ WORD32 i4_tot_frame_bits);
+
+/* Based on the change in frame/bit rate update the remaining bits in period */
+void irc_ba_change_remaining_bits_in_period(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frame_rate,
+ WORD32 *i4_peak_bit_rate);
+
+/* Change the gop size in the middle of a current gop */
+void change_gop_size(bit_allocation_handle ps_bit_allocation,
+ WORD32 i4_intra_frm_interval,
+ WORD32 i4_inter_frm_interval,
+ WORD32 i4_num_intra_frm_interval);
+
+void update_rem_frms_in_period(bit_allocation_handle ps_bit_allocation,
+ picture_type_e e_pic_type,
+ UWORD8 u1_is_first_frm,
+ WORD32 i4_intra_frm_interval,
+ WORD32 i4_num_intra_frm_interval);
+
+void irc_ba_change_rem_bits_in_prd_at_force_I_frame(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling);
+
+void irc_ba_change_ba_peak_bit_rate(bit_allocation_handle ps_bit_allocation,
+ WORD32 *ai4_peak_bit_rate);
+#endif
diff --git a/encoder/irc_cbr_buffer_control.c b/encoder/irc_cbr_buffer_control.c
new file mode 100755
index 0000000..c179a28
--- /dev/null
+++ b/encoder/irc_cbr_buffer_control.c
@@ -0,0 +1,653 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_common.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_trace_support.h"
+
+typedef struct cbr_buffer_t
+{
+ /* Buffer size = Delay * Bitrate*/
+ WORD32 i4_buffer_size;
+
+ /* Constant drain rate */
+ WORD32 i4_drain_bits_per_frame[MAX_NUM_DRAIN_RATES];
+
+ /* Encoder Buffer Fullness */
+ WORD32 i4_ebf;
+
+ /* Upper threshold of the Buffer */
+ WORD32 i4_upr_thr[MAX_PIC_TYPE];
+
+ /* Lower threshold of the Buffer */
+ WORD32 i4_low_thr[MAX_PIC_TYPE];
+
+ /* Stuffing threshold equal to error bits per second in the drain bits
+ * fixed point computation */
+ WORD32 i4_stuffing_threshold;
+
+ /* For error due to bits per frame calculation */
+ error_bits_handle aps_bpf_error_bits[MAX_NUM_DRAIN_RATES];
+
+ /* Whether the buffer model is used for CBR or VBR streaming */
+ WORD32 i4_is_cbr_mode;
+
+ /* Input parameters stored for initialization */
+ WORD32 ai4_bit_rate[MAX_NUM_DRAIN_RATES];
+
+ WORD32 i4_max_delay;
+
+ WORD32 ai4_num_pics_in_delay_period[MAX_PIC_TYPE];
+
+ WORD32 i4_tgt_frm_rate;
+
+ UWORD32 u4_max_vbv_buf_size;
+
+} cbr_buffer_t;
+
+WORD32 irc_cbr_buffer_num_fill_use_free_memtab(cbr_buffer_t **pps_cbr_buffer,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0, i;
+ static cbr_buffer_t s_cbr_buffer_temp;
+
+ /*
+ * Hack for all alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_cbr_buffer) = &s_cbr_buffer_temp;
+
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(cbr_buffer_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_cbr_buffer, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab(
+ &pps_cbr_buffer[0]->aps_bpf_error_bits[i],
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+ }
+ return (i4_mem_tab_idx);
+}
+
+/******************************************************************************
+ * @brief Initialize the CBR VBV buffer state.
+ * This could however be used for VBR streaming VBV also
+ *
+ ******************************************************************************/
+void irc_init_cbr_buffer(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_buffer_delay,
+ WORD32 i4_tgt_frm_rate,
+ WORD32 *i4_bit_rate,
+ UWORD32 *u4_num_pics_in_delay_prd,
+ UWORD32 u4_vbv_buf_size)
+{
+ WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_bit_rate[i], 1000, i4_tgt_frm_rate,
+ i4_bits_per_frm[i]);
+ /* Drain rate = bitrate/(framerate/1000) */
+ ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+ /* Initialize the bits per frame error bits calculation */
+ irc_init_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i],
+ i4_tgt_frm_rate, i4_bit_rate[i]);
+ }
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ /* This would mean CBR mode */
+ if(i4_bit_rate[0] == i4_bit_rate[1])
+ {
+ X_PROD_Y_DIV_Z(i4_bit_rate[0], i4_buffer_delay, 1000,
+ ps_cbr_buffer->i4_buffer_size);
+ ps_cbr_buffer->i4_is_cbr_mode = 1;
+ }
+ else
+ {
+ /* VBR streaming case which has different drain rates for I and P */
+ ps_cbr_buffer->i4_buffer_size = u4_num_pics_in_delay_prd[0]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ + u4_num_pics_in_delay_prd[1]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ ps_cbr_buffer->i4_is_cbr_mode = 0;
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size > (WORD32)u4_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = u4_vbv_buf_size;
+ }
+
+ /* Initially Encoder buffer fullness is zero */
+ ps_cbr_buffer->i4_ebf = 0;
+
+ /* tgt_frame_rate is divided by 1000 because, an approximate value is fine
+ * as this is just a threshold below which stuffing is done to avoid buffer
+ * underflow due to fixed point error in drain rate
+ */
+ ps_cbr_buffer->i4_stuffing_threshold = (i4_bit_rate[0]
+ - (i4_bits_per_frm[0] * (i4_tgt_frm_rate / 1000)));
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /*
+ * Upper threshold for
+ * I frame = 1 * bits per frame
+ * P Frame = 4 * bits per frame.
+ * The threshold for I frame is only 1 * bits per frame as the threshold
+ * should only account for error in estimated bits.
+ * In P frame it should account for difference bets bits consumed by
+ * I(Scene change) and P frame I to P complexity is assumed to be 5.
+ */
+ WORD32 i4_index;
+ i4_index = i4_i > 0 ? 1 : 0;
+ ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+ /*
+ * For both I and P frame Lower threshold is equal to drain rate.Even if
+ * the encoder consumes zero bits it should have enough bits to drain
+ */
+ ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_cbr_buffer->ai4_bit_rate[i] = i4_bit_rate[i];
+ }
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[i] =
+ u4_num_pics_in_delay_prd[i];
+ }
+ ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate;
+ ps_cbr_buffer->i4_max_delay = i4_buffer_delay;
+ ps_cbr_buffer->u4_max_vbv_buf_size = u4_vbv_buf_size;
+}
+
+/******************************************************************************
+ * @brief Condition check for constraining the number of bits allocated based on
+ * bufer size
+ ******************************************************************************/
+WORD32 irc_cbr_buffer_constraint_check(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_max_tgt_bits, i4_min_tgt_bits;
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+ ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /* Max tgt bits = Upper threshold - current encoder buffer fullness */
+ i4_max_tgt_bits = ps_cbr_buffer->i4_upr_thr[e_pic_type]
+ - ps_cbr_buffer->i4_ebf;
+ /* Max tgt bits cannot be negative */
+ if(i4_max_tgt_bits < 0)
+ i4_max_tgt_bits = 0;
+
+ /*
+ * Min tgt bits , least number of bits in the Encoder after
+ * draining such that it is greater than lower threshold
+ */
+ i4_min_tgt_bits = ps_cbr_buffer->i4_low_thr[e_pic_type]
+ - (ps_cbr_buffer->i4_ebf - i4_drain_bits_per_frame);
+ /* Min tgt bits cannot be negative */
+ if(i4_min_tgt_bits < 0)
+ i4_min_tgt_bits = 0;
+
+ /* Current tgt bits should be between max and min tgt bits */
+ CLIP(i4_tgt_bits, i4_max_tgt_bits, i4_min_tgt_bits);
+ return i4_tgt_bits;
+}
+
+/* *****************************************************************************
+ * @brief constaints the bit allocation based on buffer size
+ *
+ ******************************************************************************/
+WORD32 irc_vbr_stream_buffer_constraint_check(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_max_tgt_bits;
+
+ /* Max tgt bits = Upper threshold - current encoder buffer fullness */
+ i4_max_tgt_bits = ps_cbr_buffer->i4_upr_thr[e_pic_type]
+ - ps_cbr_buffer->i4_ebf;
+
+ /* Max tgt bits cannot be negative */
+ if(i4_max_tgt_bits < 0)
+ i4_max_tgt_bits = 0;
+
+ if(i4_tgt_bits > i4_max_tgt_bits)
+ i4_tgt_bits = i4_max_tgt_bits;
+
+ return i4_tgt_bits;
+}
+
+/* *****************************************************************************
+ * @brief Verifies the buffer state and returns whether it is overflowing,
+ * underflowing or normal
+ *
+ ******************************************************************************/
+vbv_buf_status_e irc_get_cbr_buffer_status(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ WORD32 *pi4_num_bits_to_prevent_overflow,
+ picture_type_e e_pic_type)
+{
+ vbv_buf_status_e e_buf_status;
+ WORD32 i4_cur_enc_buf;
+ WORD32 i4_error_bits = (e_pic_type == I_PIC) ?
+ irc_get_error_bits(ps_cbr_buffer
+ ->aps_bpf_error_bits[0]) :
+ irc_get_error_bits(ps_cbr_buffer
+ ->aps_bpf_error_bits[1]);
+
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+ ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /* Add the tot consumed bits to the Encoder Buffer*/
+ i4_cur_enc_buf = ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits;
+
+ /* If the Encoder exceeds the Buffer Size signal an Overflow*/
+ if(i4_cur_enc_buf > ps_cbr_buffer->i4_buffer_size)
+ {
+ e_buf_status = VBV_OVERFLOW;
+ i4_cur_enc_buf = ps_cbr_buffer->i4_buffer_size;
+ }
+ else
+ {
+ /*
+ * Subtract the constant drain bits and error bits due to fixed point
+ * implementation
+ */
+ i4_cur_enc_buf -= (i4_drain_bits_per_frame + i4_error_bits);
+
+ /*
+ * If the buffer is less than stuffing threshold an Underflow is
+ * signaled else its NORMAL
+ */
+ if(i4_cur_enc_buf < ps_cbr_buffer->i4_stuffing_threshold)
+ {
+ e_buf_status = VBV_UNDERFLOW;
+ }
+ else
+ {
+ e_buf_status = VBV_NORMAL;
+ }
+
+ if(i4_cur_enc_buf < 0)
+ i4_cur_enc_buf = 0;
+ }
+
+ /*
+ * The RC lib models the encoder buffer, but the VBV buffer characterizes
+ * the decoder buffer
+ */
+ if(e_buf_status == VBV_OVERFLOW)
+ {
+ e_buf_status = VBV_UNDERFLOW;
+ }
+ else if(e_buf_status == VBV_UNDERFLOW)
+ {
+ e_buf_status = VBV_OVERFLOW;
+ }
+
+ pi4_num_bits_to_prevent_overflow[0] = (ps_cbr_buffer->i4_buffer_size
+ - i4_cur_enc_buf);
+
+ return e_buf_status;
+}
+
+/*******************************************************************************
+ * @brief Based on the bits consumed the buffer model is updated
+ ******************************************************************************/
+void irc_update_cbr_buffer(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_error_bits = (e_pic_type == I_PIC) ?
+ irc_get_error_bits(ps_cbr_buffer->
+ aps_bpf_error_bits[0]) :
+ irc_get_error_bits( ps_cbr_buffer->
+ aps_bpf_error_bits[1]);
+
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+ ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /* Update the Encoder buffer with the total consumed bits*/
+ ps_cbr_buffer->i4_ebf += i4_tot_consumed_bits;
+
+ /*
+ * Subtract the drain bits and error bits due to fixed point
+ * implementation
+ */
+ ps_cbr_buffer->i4_ebf -= (i4_drain_bits_per_frame + i4_error_bits);
+
+ if(ps_cbr_buffer->i4_ebf < 0)
+ ps_cbr_buffer->i4_ebf = 0;
+
+ /*SS - Fix for lack of stuffing*/
+ if(ps_cbr_buffer->i4_ebf > ps_cbr_buffer->i4_buffer_size)
+ {
+ trace_printf(
+ (const WORD8*)"Error: Should not be coming here with stuffing\n");
+ ps_cbr_buffer->i4_ebf = ps_cbr_buffer->i4_buffer_size;
+ }
+}
+
+/*******************************************************************************
+ * @brief If the buffer underflows then return the number of bits to prevent
+ * underflow
+ *
+ ******************************************************************************/
+WORD32 irc_get_cbr_bits_to_stuff(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_bits_to_stuff;
+ WORD32 i4_error_bits = (e_pic_type == I_PIC) ?
+ irc_get_error_bits(ps_cbr_buffer
+ ->aps_bpf_error_bits[0]) :
+ irc_get_error_bits(ps_cbr_buffer
+ ->aps_bpf_error_bits[1]);
+
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+ ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /*
+ * Stuffing bits got from the following equation
+ * Stuffing_threshold = ebf + tcb - drain bits - error bits + stuff_bits
+ */
+ i4_bits_to_stuff = i4_drain_bits_per_frame + i4_error_bits
+ + ps_cbr_buffer->i4_stuffing_threshold
+ - (ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits);
+
+ return i4_bits_to_stuff;
+}
+
+/*******************************************************************************
+ * @brief Update the state for change in number of pics in the delay period
+ *
+ ******************************************************************************/
+void irc_change_cbr_vbv_num_pics_in_delay_period(cbr_buffer_t *ps_cbr_buffer,
+ UWORD32 *u4_num_pics_in_delay_prd)
+{
+ WORD32 i;
+
+ if(!ps_cbr_buffer->i4_is_cbr_mode)
+ {
+ ps_cbr_buffer->i4_buffer_size =
+ u4_num_pics_in_delay_prd[0]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ + u4_num_pics_in_delay_prd[1]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ if(ps_cbr_buffer->i4_buffer_size
+ > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->i4_upr_thr[i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+ }
+
+ /* Re-initialize the number of pics in delay period */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[i] =
+ u4_num_pics_in_delay_prd[i];
+ }
+ }
+}
+
+/******************************************************************************
+ * @brief update the state for change in target frame rate
+ *
+ ******************************************************************************/
+void irc_change_cbr_vbv_tgt_frame_rate(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tgt_frm_rate)
+{
+ WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(ps_cbr_buffer->ai4_bit_rate[i], 1000, i4_tgt_frm_rate,
+ i4_bits_per_frm[i]);
+ /* Drain rate = bitrate/(framerate/1000) */
+ ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+ /* Initialize the bits per frame error bits calculation */
+ irc_change_frm_rate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i],
+ i4_tgt_frm_rate);
+ }
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ if(!ps_cbr_buffer->i4_is_cbr_mode)
+ {
+ /* VBR streaming case which has different drain rates for I and P */
+ ps_cbr_buffer->i4_buffer_size =
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[0]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ + ps_cbr_buffer->ai4_num_pics_in_delay_period[1]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size
+ > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+
+ /*
+ * Tgt_frame_rate is divided by 1000 because an approximate value is fine as
+ * this is just a threshold below which stuffing is done to avoid buffer
+ * underflow due to fixed point error in drain rate
+ */
+ ps_cbr_buffer->i4_stuffing_threshold = (ps_cbr_buffer->ai4_bit_rate[0]
+ - (i4_bits_per_frm[0] * (i4_tgt_frm_rate / 1000)));
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /*
+ * Upper threshold for
+ * I frame = 1 * bits per frame
+ * P Frame = 4 * bits per frame.
+ * The threshold for I frame is only 1 * bits per frame as the threshold should
+ * only account for error in estimated bits.
+ * In P frame it should account for difference bets bits consumed by I(Scene change)
+ * and P frame I to P complexity is assumed to be 5.
+ */
+ WORD32 i4_index;
+ i4_index = i4_i > 0 ? 1 : 0;
+ ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+ /*
+ * For both I and P frame Lower threshold is equal to drain rate.
+ * Even if the encoder consumes zero bits it should have enough bits to
+ * drain
+ */
+ ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate;
+}
+
+/*******************************************************************************
+ * @brief Change the state for change in bit rate
+ *
+ ******************************************************************************/
+void irc_change_cbr_vbv_bit_rate(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 *i4_bit_rate)
+{
+ WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_bit_rate[i], 1000, ps_cbr_buffer->i4_tgt_frm_rate,
+ i4_bits_per_frm[i]);
+ /* Drain rate = bitrate/(framerate/1000) */
+ ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+ /* Initialize the bits per frame error bits calculation */
+ irc_change_bitrate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i],
+ i4_bit_rate[i]);
+ }
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ if(i4_bit_rate[0] == i4_bit_rate[1]) /* This would mean CBR mode */
+ {
+ X_PROD_Y_DIV_Z(i4_bit_rate[0], ps_cbr_buffer->i4_max_delay, 1000,
+ ps_cbr_buffer->i4_buffer_size);
+ ps_cbr_buffer->i4_is_cbr_mode = 1;
+ }
+ else
+ {
+ /* VBR streaming case which has different drain rates for I and P */
+ ps_cbr_buffer->i4_buffer_size =
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[0]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ + ps_cbr_buffer->ai4_num_pics_in_delay_period[1]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ ps_cbr_buffer->i4_is_cbr_mode = 0;
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size
+ > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+
+ /*
+ * tgt_frame_rate is divided by 1000 because
+ * an approximate value is fine as this is just a threshold below which
+ * stuffing is done to avoid buffer underflow due to fixed point
+ * error in drain rate
+ */
+ ps_cbr_buffer->i4_stuffing_threshold = (i4_bit_rate[0]
+ - (i4_bits_per_frm[0]
+ * (ps_cbr_buffer->i4_tgt_frm_rate / 1000)));
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /*
+ * Upper threshold for
+ * I frame = 1 * bits per frame
+ * P Frame = 4 * bits per frame.
+ * The threshold for I frame is only 1 * bits per frame as the threshold
+ * should only account for error in estimated bits.
+ * In P frame it should account for difference bets bits consumed by
+ * I(Scene change) and P frame I to P complexity is assumed to be 5.
+ */
+
+ WORD32 i4_index;
+ i4_index = i4_i > 0 ? 1 : 0;
+ ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+ /* For both I and P frame Lower threshold is equal to drain rate.
+ * Even if the encoder consumes zero bits it should have enough bits to
+ * drain
+ */
+ ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_cbr_buffer->ai4_bit_rate[i] = i4_bit_rate[i];
+ }
+}
+
+void irc_change_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_buffer_delay)
+{
+ WORD32 i4_i;
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ if(ps_cbr_buffer->i4_is_cbr_mode)
+ {
+ X_PROD_Y_DIV_Z(ps_cbr_buffer->ai4_bit_rate[0], i4_buffer_delay, 1000,
+ ps_cbr_buffer->i4_buffer_size);
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size
+ > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /*
+ * Upper threshold for
+ * I frame = 1 * bits per frame
+ * P Frame = 4 * bits per frame.
+ * The threshold for I frame is only 1 * bits per frame as the threshold
+ * should only account for error in estimated bits.
+ * In P frame it should account for difference bets bits consumed by I
+ * (Scene change) and P frame I to P complexity is assumed to be 5.
+ */
+ ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ ps_cbr_buffer->i4_max_delay = i4_buffer_delay;
+}
+
+WORD32 irc_get_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->i4_max_delay);
+}
+
+WORD32 irc_get_cbr_buffer_size(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->i4_buffer_size);
+}
diff --git a/encoder/irc_cbr_buffer_control.h b/encoder/irc_cbr_buffer_control.h
new file mode 100755
index 0000000..2534961
--- /dev/null
+++ b/encoder/irc_cbr_buffer_control.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : irc_cbr_buffer_control.h */
+/* */
+/* Description : This file contains all the necessary declarations */
+/* for cbr_buffer_control functions */
+/* */
+/* */
+/* List of Functions : <List the functions defined in this file> */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2008 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef CBR_BUFFER_CONTROL_H
+#define CBR_BUFFER_CONTROL_H
+
+/* Macro for clipping a number between to extremes */
+#define CLIP(Number,Max,Min) if((Number) > (Max)) (Number) = (Max); \
+ else if((Number) < (Min)) (Number) = (Min);
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct cbr_buffer_t *cbr_buffer_handle;
+
+WORD32 irc_cbr_buffer_num_fill_use_free_memtab(cbr_buffer_handle *pps_cbr_buffer,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/* Initialize the cbr Buffer*/
+void irc_init_cbr_buffer(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_buffer_delay,
+ WORD32 i4_tgt_frm_rate,
+ WORD32 *i4_bit_rate,
+ UWORD32 *u4_num_pics_in_delay_prd,
+ UWORD32 u4_vbv_buf_size);
+
+/* Check for tgt bits with in CBR buffer*/
+WORD32 irc_cbr_buffer_constraint_check(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type);
+
+/* Get the buffer status with the current consumed bits*/
+vbv_buf_status_e irc_get_cbr_buffer_status(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ WORD32 *pi4_num_bits_to_prevent_overflow,
+ picture_type_e e_pic_type);
+
+/* Update the CBR buffer at the end of the VOP*/
+void irc_update_cbr_buffer(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type);
+
+/*Get the bits needed to stuff in case of Underflow*/
+WORD32 irc_get_cbr_bits_to_stuff(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type);
+
+WORD32 irc_get_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer);
+
+WORD32 irc_get_cbr_buffer_size(cbr_buffer_handle ps_cbr_buffer);
+
+WORD32 irc_vbr_stream_buffer_constraint_check(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type);
+
+void irc_change_cbr_vbv_bit_rate(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 *i4_bit_rate);
+
+void irc_change_cbr_vbv_tgt_frame_rate(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tgt_frm_rate);
+
+void irc_change_cbr_vbv_num_pics_in_delay_period(cbr_buffer_handle ps_cbr_buffer,
+ UWORD32 *u4_num_pics_in_delay_prd);
+
+void irc_change_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_buffer_delay);
+#endif /* CBR_BUFFER_CONTROL_H */
+
diff --git a/encoder/irc_cntrl_param.h b/encoder/irc_cntrl_param.h
new file mode 100755
index 0000000..82235f7
--- /dev/null
+++ b/encoder/irc_cntrl_param.h
@@ -0,0 +1,59 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RC_CNTRL_PARAM_H_
+#define _RC_CNTRL_PARAM_H_
+
+/* This file should contain only enumerations exported to codec by RC */
+
+/* RC algo type */
+typedef enum
+{
+ VBR_STORAGE = 0,
+ VBR_STORAGE_DVD_COMP = 1,
+ VBR_STREAMING = 2,
+ CONST_QP = 3,
+ CBR_LDRC = 4,
+ CBR_NLDRC = 5
+
+} rc_type_e;
+
+/* Picture type structure*/
+typedef enum
+{
+ BUF_PIC = -1, I_PIC = 0, P_PIC, B_PIC, MAX_PIC_TYPE
+
+} picture_type_e;
+
+/* MB Type structure*/
+typedef enum
+{
+ /* Based on MB TYPES added the array size increases */
+ MB_TYPE_INTRA, MB_TYPE_INTER, MAX_MB_TYPE
+} mb_type_e;
+
+typedef enum
+{
+ VBV_NORMAL, VBV_UNDERFLOW, VBV_OVERFLOW, VBR_CAUTION
+
+} vbv_buf_status_e;
+
+#endif
+
diff --git a/encoder/irc_common.h b/encoder/irc_common.h
new file mode 100755
index 0000000..c341de4
--- /dev/null
+++ b/encoder/irc_common.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RC_COMMON_H_
+#define _RC_COMMON_H_
+
+/****************************************************************************
+ NOTE : Put only those things into this file which are common across many
+ files, say I_TO_P_BIT_RATIO macro is used across irc_bit_allocation.c
+ and irc_rate_control_api.c.If anything is exclusive only to one file,
+ define it in the same file
+
+ This file is an RC private file. It should not be exported to Codec
+ ****************************************************************************/
+
+#define UNUSED(x) ((void)(x))
+
+typedef float number_t;
+
+#define mult32_var_q(a,b,c) *c = a * b
+
+#define div32_var_q(a,b,c) (*c = ((b == 0)? a : (a / b)))
+
+#define add32_var_q(a,b,c) *c = a + b
+
+#define sub32_var_q(a,b,c) *c = a - b
+
+#define sqrt32_var_q(a, c) *c = sqrt(a)
+
+#define number_t_to_word32(num_a, a) *a = (WORD32)num_a
+
+#define convert_float_to_fix(a_f, a) *a = (WORD32)a_f
+
+#define convert_fix_to_float(a, a_f) *a_f = (float) a
+
+#define SET_VAR_Q(a,b,c) {a = (float) b;}
+
+
+/* Defines the maximum and the minimum quantizer allowed in the stream.*/
+#define MAX_MPEG2_QP 255 /* 127*/
+
+/* Bits ratio between I and P frame */
+#define I_TO_P_BIT_RATIO 5
+
+/* Calculates P = (X*Y/Z) (Assuming all the four are in integers)*/
+#define X_PROD_Y_DIV_Z(X1,Y1,Z1,P1)\
+{\
+ number_t vq_a,vq_b,vq_c;\
+ SET_VAR_Q(vq_a,(X1),0);\
+ SET_VAR_Q(vq_b,(Y1),0);\
+ SET_VAR_Q(vq_c,(Z1),0);\
+ mult32_var_q(vq_a,vq_b,&vq_a);\
+ div32_var_q(vq_a,vq_c,&vq_a);\
+ number_t_to_word32(vq_a,&(P1));\
+}
+#define VQ_A_LT_VQ_B(A,B, Z) Z = A < B;
+#define VQ_A_GT_VQ_B(A,B, Z) Z = A > B;
+
+/* Z=MAX(A,B) where A, B and Z are var_q variables */
+#define MAX_VARQ(A,B, Z)\
+{\
+ WORD32 a_gt_b;\
+ VQ_A_GT_VQ_B((A), (B), a_gt_b);\
+ (Z) = (a_gt_b) ? (A) : (B);\
+}
+
+/* Z=MIN(A,B) where A, B and Z are var_q variables */
+#define MIN_VARQ(A,B, Z)\
+{\
+ WORD32 a_lt_b;\
+ VQ_A_LT_VQ_B((A), (B), a_lt_b);\
+ (Z) = (a_lt_b) ? (A) : (B);\
+}
+
+/* Maximum number of drain-rates supported. Currently a maximum of only 2
+ drain-rates supported. One for
+ I pictures and the other for P & B pictures */
+#define MAX_NUM_DRAIN_RATES 2
+
+/* The ratios between I to P and P to B Qp is specified here */
+#define K_Q 4
+#define I_TO_P_RATIO (19) /* In K_Q Q factor */
+#define P_TO_B_RATIO (21) /* In K_Q Q factor */
+#define P_TO_I_RATIO (13) /* In K_Q Q factor */
+
+#endif /* _RC_COMMON_H_ */
+
diff --git a/encoder/irc_datatypes.h b/encoder/irc_datatypes.h
new file mode 100755
index 0000000..8e4685a
--- /dev/null
+++ b/encoder/irc_datatypes.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IH264_TYPEDEFS_H_
+#define _IH264_TYPEDEFS_H_
+
+
+/*****************************************************************************/
+/* Unsigned data types */
+/*****************************************************************************/
+typedef unsigned char UWORD8;
+typedef unsigned short UWORD16;
+typedef unsigned int UWORD32;
+typedef unsigned long long UWORD64;
+
+
+/*****************************************************************************/
+/* Signed data types */
+/*****************************************************************************/
+typedef signed char WORD8;
+typedef short WORD16;
+typedef int WORD32;
+
+
+/*****************************************************************************/
+/* Miscellaneous data types */
+/*****************************************************************************/
+typedef char CHAR;
+typedef double DOUBLE;
+
+#endif /* _IH264_TYPEDEFS_H_ */
diff --git a/encoder/irc_est_sad.c b/encoder/irc_est_sad.c
new file mode 100755
index 0000000..0d8abc2
--- /dev/null
+++ b/encoder/irc_est_sad.c
@@ -0,0 +1,260 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_est_sad.h"
+#include "irc_common.h"
+
+typedef struct est_sad_t
+{
+ WORD32 i4_use_est_intra_sad;
+
+ /* Previous frame SAD */
+ UWORD32 au4_prev_frm_sad[MAX_PIC_TYPE];
+
+ /* Current (nth) ifi average P frame SAD */
+ UWORD32 u4_n_p_frm_ifi_avg_sad;
+
+ /* (n-1)th ifi average P frame SAD */
+ UWORD32 u4_n_1_p_frm_ifi_avg_sad;
+
+ /* (n-2)th ifi average P frame SAD */
+ UWORD32 u4_n_2_p_frm_ifi_avg_sad;
+
+ /* number of ifi encoded till now */
+ WORD32 i4_num_ifi_encoded;
+
+ /* number of P frames in the current IFI */
+ WORD32 i4_num_p_frm_in_cur_ifi;
+
+} est_sad_t;
+
+WORD32 irc_est_sad_num_fill_use_free_memtab(est_sad_t **pps_est_sad,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static est_sad_t s_est_sad;
+
+ /* Hack for al alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_est_sad) = &s_est_sad;
+
+ /* For src rate control state structure */
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(est_sad_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_est_sad, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+void irc_init_est_sad(est_sad_t *ps_est_sad, WORD32 i4_use_est_intra_sad)
+{
+ WORD32 i;
+ ps_est_sad->i4_use_est_intra_sad = i4_use_est_intra_sad;
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_est_sad->au4_prev_frm_sad[i] = 0;
+ }
+
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->u4_n_2_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->i4_num_ifi_encoded = 0;
+ ps_est_sad->i4_num_p_frm_in_cur_ifi = 0;
+}
+
+void irc_reset_est_sad(est_sad_t *ps_est_sad)
+{
+ irc_init_est_sad(ps_est_sad, ps_est_sad->i4_use_est_intra_sad);
+}
+
+/*
+ * Get estimated SAD can be called at any point. The various use cases are:
+ * 1) When a I frame is getting encoded,
+ * - get the estimated of P => No issues since we use the last coded P frame
+ * value
+ * - get estimated of I => This call for two cases:
+ * => a) if num_ifi_encoded is less than 2
+ * then return the previous encoded I frame sad
+ * => b) if num_ifi_encoded is more than 2, then we scale
+ * the prev I sad by the ratio of (n-1) ifi P to n-2 ifi P
+ * 2) When P frame is getting encoded,
+ * - get the estimated of P => No issues since we use the last coded P frame value
+ * - get the estimated of I => Simillar to I we have two cases.
+ * To handle the b) case extra logic had to introduced using
+ * u1_is_n_1_p_frm_ifi_avg_sad_usable flag
+ */
+UWORD32 irc_get_est_sad(est_sad_t *ps_est_sad, picture_type_e e_pic_type)
+{
+ if(ps_est_sad->i4_use_est_intra_sad)
+ {
+ UWORD32 u4_estimated_sad;
+ if(e_pic_type == P_PIC)
+ {
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[P_PIC];
+ }
+ else if(e_pic_type == B_PIC)
+ {
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[B_PIC];
+ }
+ else
+ {
+ if(ps_est_sad->i4_num_ifi_encoded < 2)
+ {
+ /*
+ * Only one IFI has been encoded and so use the previous I
+ * frames SAD
+ */
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC];
+ }
+ else
+ {
+ /*
+ * Since the n-1 'P' frame IFI would have just accumulated the
+ * frame sads we average it out here
+ */
+ UWORD32 u4_n_1_p_frm_ifi_avg_sad, u4_n_2_p_frm_ifi_avg_sad;
+ number_t vq_n_1_p_frm_ifi_avg_sad, vq_n_2_p_frm_ifi_avg_sad;
+ number_t vq_prev_frm_sad_i;
+
+ /*
+ * If there are frames in the current IFI start using it to
+ * estimate the I frame SAD
+ */
+ if(ps_est_sad->i4_num_p_frm_in_cur_ifi)
+ {
+ u4_n_1_p_frm_ifi_avg_sad =
+ (ps_est_sad->u4_n_p_frm_ifi_avg_sad
+ / ps_est_sad->i4_num_p_frm_in_cur_ifi);
+ u4_n_2_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+ }
+ else
+ {
+ u4_n_1_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+ u4_n_2_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_2_p_frm_ifi_avg_sad;
+ }
+
+ /*
+ * If any of the previous p frame SADs are zeros we just return
+ * the previous I frame SAD
+ */
+ if(u4_n_1_p_frm_ifi_avg_sad && u4_n_2_p_frm_ifi_avg_sad)
+ {
+ SET_VAR_Q(vq_prev_frm_sad_i,
+ ps_est_sad->au4_prev_frm_sad[I_PIC], 0);
+ SET_VAR_Q(vq_n_1_p_frm_ifi_avg_sad,
+ u4_n_1_p_frm_ifi_avg_sad, 0);
+ SET_VAR_Q(vq_n_2_p_frm_ifi_avg_sad,
+ u4_n_2_p_frm_ifi_avg_sad, 0);
+ /*
+ * Estimated SAD =
+ *(n-1)th intra frame interval(ifi) P frame Avg SAD *
+ *(prev I frame SAD /
+ *(prev (n-2)nd intra frame interval(ifi) P frame Avg SAD)
+ */
+ mult32_var_q(vq_prev_frm_sad_i, vq_n_1_p_frm_ifi_avg_sad,
+ &vq_prev_frm_sad_i);
+ div32_var_q(vq_prev_frm_sad_i, vq_n_2_p_frm_ifi_avg_sad,
+ &vq_prev_frm_sad_i);
+ number_t_to_word32(vq_prev_frm_sad_i,
+ (WORD32*)&u4_estimated_sad);
+ }
+ else
+ {
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC];
+ }
+ }
+ }
+ return u4_estimated_sad;
+ }
+ else
+ {
+ return ps_est_sad->au4_prev_frm_sad[e_pic_type];
+ }
+}
+
+void irc_update_actual_sad(est_sad_t *ps_est_sad,
+ UWORD32 u4_actual_sad,
+ picture_type_e e_pic_type)
+{
+ ps_est_sad->au4_prev_frm_sad[e_pic_type] = u4_actual_sad;
+
+ if(ps_est_sad->i4_use_est_intra_sad)
+ {
+ if(e_pic_type == I_PIC)
+ {
+ /* The requirement is to have two IFI before estimating I frame SAD */
+ if(ps_est_sad->i4_num_ifi_encoded < 2)
+ ps_est_sad->i4_num_ifi_encoded++;
+
+ /* Calculate the average SAD */
+ if(ps_est_sad->i4_num_p_frm_in_cur_ifi)
+ {
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad /=
+ ps_est_sad->i4_num_p_frm_in_cur_ifi;
+ }
+ else
+ {
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+ }
+ /* Push the (n-1)th average SAD to the (n-2)th average SAD */
+ ps_est_sad->u4_n_2_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+ /* Push the nth average SAD to the (n-1)th average SAD */
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad;
+ /* Reset SAD and number of P frames */
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->i4_num_p_frm_in_cur_ifi = 0;
+ }
+ else
+ {
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad += u4_actual_sad;
+ ps_est_sad->i4_num_p_frm_in_cur_ifi++;
+ }
+ }
+}
+
+void irc_update_actual_sad_for_intra(est_sad_t *ps_est_sad,
+ WORD32 i4_intra_frm_cost)
+{
+ if(!(ps_est_sad->i4_use_est_intra_sad))
+ {
+ irc_update_actual_sad(ps_est_sad, i4_intra_frm_cost, I_PIC);
+ }
+}
diff --git a/encoder/irc_est_sad.h b/encoder/irc_est_sad.h
new file mode 100755
index 0000000..c8238c9
--- /dev/null
+++ b/encoder/irc_est_sad.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _EST_SAD_H_
+#define _EST_SAD_H_
+
+/*
+ * "est_sad_t->i4_use_est_intra_sad" Flag to control how the I frame SAD is estimated.
+ * If set to zero
+ * - it uses the Intra sad calculated by the previous P frame as
+ * the estimated sad for the current I frame
+ * else
+ * - it uses the ratio of P frame sads of the previous two GOPS and
+ * scales the I Frame sad with this ratio to estimate the current
+ * I frame SAD
+ */
+
+/* Estimating the Average SAD for the current picture type is done by:
+ * 1) if picture_type is I
+ * - Estimated SAD = (n-1)th intra frame interval(ifi) P frame Avg SAD *
+ * ( prev I frame SAD / (n-2)nd intra frame interval(ifi) P frame Avg SAD)
+ * - if only one IFI is encoded use the previous I frame SAD
+ * 2) if picture type is P
+ * - Estimate SAD is previous P frame SAD
+ * 3) The first P frame in a IFI could use a little better logic to decide the
+ * estimated SAD but currently we assume the last coded P frames SAD
+ a*/
+
+typedef struct est_sad_t *est_sad_handle;
+
+WORD32 irc_est_sad_num_fill_use_free_memtab(est_sad_handle *est_sad,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_init_est_sad(est_sad_handle est_sad, WORD32 i4_use_est_frame_sad);
+
+UWORD32 irc_get_est_sad(est_sad_handle est_sad, picture_type_e e_pic_type);
+
+void irc_update_actual_sad(est_sad_handle est_sad,
+ UWORD32 u4_actual_sad,
+ picture_type_e e_pic_type);
+
+void irc_update_actual_sad_for_intra(est_sad_handle est_sad,
+ WORD32 i4_intra_frm_cost);
+
+void irc_reset_est_sad(est_sad_handle ps_est_sad);
+#endif
diff --git a/encoder/irc_fixed_point_error_bits.c b/encoder/irc_fixed_point_error_bits.c
new file mode 100755
index 0000000..42dcfc5
--- /dev/null
+++ b/encoder/irc_fixed_point_error_bits.c
@@ -0,0 +1,185 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_fixed_point_error_bits.h"
+
+typedef struct error_bits_t
+{
+ /* Max tgt frm rate so that dynamic change in frm rate can be handled */
+ WORD32 i4_max_tgt_frm_rate;
+
+ /* Cur frm rate */
+ WORD32 i4_cur_tgt_frm_rate;
+
+ /* tgt frame rate*/
+ WORD32 i4_tgt_frm_rate;
+
+ /* tgt frm rate increment */
+ WORD32 i4_tgt_frm_rate_incr;
+
+ /* flag to indicate 1 second is up */
+ UWORD8 u1_compute_error_bits;
+
+ /* Bitrate/frame rate value added over a period */
+ WORD32 i4_accum_bitrate;
+
+ /* bitrate */
+ WORD32 i4_bitrate;
+
+} error_bits_t;
+
+WORD32 irc_error_bits_num_fill_use_free_memtab(error_bits_t **pps_error_bits,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static error_bits_t s_error_bits_temp;
+
+ /*
+ * Hack for all alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_error_bits) = &s_error_bits_temp;
+
+ /* For src rate control state structure */
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(error_bits_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_error_bits, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/*******************************************************************************
+ * @brief Calculates the error bits due to fixed point divisions
+ ******************************************************************************/
+void irc_init_error_bits(error_bits_t *ps_error_bits,
+ WORD32 i4_max_tgt_frm_rate,
+ WORD32 i4_bitrate)
+{
+ /* Initializing the parameters*/
+ ps_error_bits->i4_cur_tgt_frm_rate = 0;
+ ps_error_bits->i4_max_tgt_frm_rate = i4_max_tgt_frm_rate;
+
+ /* Value by which i4_cur_tgt_frm_rate is incremented every VOP*/
+ ps_error_bits->i4_tgt_frm_rate_incr = 1000;
+
+ /*Compute error bits is set to 1 at the end of 1 second*/
+ ps_error_bits->u1_compute_error_bits = 0;
+ ps_error_bits->i4_tgt_frm_rate = i4_max_tgt_frm_rate;
+ ps_error_bits->i4_accum_bitrate = 0;
+ ps_error_bits->i4_bitrate = i4_bitrate;
+}
+
+/*******************************************************************************
+ * @brief Updates the error state
+ ******************************************************************************/
+void irc_update_error_bits(error_bits_t *ps_error_bits)
+{
+ WORD32 i4_bits_per_frame;
+
+ X_PROD_Y_DIV_Z(ps_error_bits->i4_bitrate, 1000,
+ ps_error_bits->i4_tgt_frm_rate, i4_bits_per_frame);
+
+ /*
+ * This value is incremented every at the end of every VOP by
+ * i4_tgt_frm_rate_incr
+ */
+ ps_error_bits->i4_cur_tgt_frm_rate += ps_error_bits->i4_tgt_frm_rate_incr;
+ if(ps_error_bits->u1_compute_error_bits == 1)
+ {
+ ps_error_bits->i4_accum_bitrate = 0;
+ }
+ ps_error_bits->i4_accum_bitrate += i4_bits_per_frame;
+
+ /*
+ * When current tgt frm rate is equal or greater than max tgt frame rate
+ * 1 second is up , compute the error bits
+ */
+ if(ps_error_bits->i4_cur_tgt_frm_rate >= ps_error_bits->i4_max_tgt_frm_rate)
+ {
+ ps_error_bits->i4_cur_tgt_frm_rate -=
+ ps_error_bits->i4_max_tgt_frm_rate;
+ ps_error_bits->u1_compute_error_bits = 1;
+ }
+ else
+ {
+ ps_error_bits->u1_compute_error_bits = 0;
+ }
+}
+
+/*******************************************************************************
+ * @brief Returns the error bits for the current frame if there are any
+ *
+ ******************************************************************************/
+WORD32 irc_get_error_bits(error_bits_t *ps_error_bits)
+{
+ WORD32 i4_error_bits = 0;
+
+ /*If 1s is up calculate error for the last 1s worth of frames*/
+ if(ps_error_bits->u1_compute_error_bits == 1)
+ {
+ /*Error = Actual bitrate - bits_per_frame * num of frames*/
+ i4_error_bits = ps_error_bits->i4_bitrate
+ - ps_error_bits->i4_accum_bitrate;
+ }
+
+ return (i4_error_bits);
+}
+
+/* *****************************************************************************
+ *
+ * @brief Change the frame rate parameter for the error bits state
+ *
+ ******************************************************************************/
+void irc_change_frm_rate_in_error_bits(error_bits_t *ps_error_bits,
+ WORD32 i4_tgt_frm_rate)
+{
+ /* Value by which i4_cur_tgt_frm_rate is incremented every VOP*/
+ ps_error_bits->i4_tgt_frm_rate_incr = (ps_error_bits->i4_max_tgt_frm_rate
+ * 1000) / i4_tgt_frm_rate;
+ ps_error_bits->i4_tgt_frm_rate = i4_tgt_frm_rate;
+}
+
+/*******************************************************************************
+ * @brief Change the bitrate value for error bits module
+ ******************************************************************************/
+void irc_change_bitrate_in_error_bits(error_bits_t *ps_error_bits,
+ WORD32 i4_bitrate)
+{
+ ps_error_bits->i4_bitrate = i4_bitrate;
+}
+
diff --git a/encoder/irc_fixed_point_error_bits.h b/encoder/irc_fixed_point_error_bits.h
new file mode 100755
index 0000000..4ddf1eb
--- /dev/null
+++ b/encoder/irc_fixed_point_error_bits.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : irc_cbr_buffer_control.h */
+/* */
+/* Description : This file contains all the necessary declarations */
+/* for cbr_buffer_control functions */
+/* */
+/* */
+/* List of Functions : <List the functions defined in this file> */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2008 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef FIXED_POINT_ERROR_BITS_H
+#define FIXED_POINT_ERROR_BITS_H
+
+typedef struct error_bits_t *error_bits_handle;
+
+WORD32 irc_error_bits_num_fill_use_free_memtab(error_bits_handle *pps_error_bits,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_init_error_bits(error_bits_handle ps_error_bits,
+ WORD32 i4_max_tgt_frm_rate,
+ WORD32 i4_bitrate);
+
+void irc_update_error_bits(error_bits_handle ps_error_bits);
+
+WORD32 irc_get_error_bits(error_bits_handle ps_error_bits);
+
+void irc_change_frm_rate_in_error_bits(error_bits_handle ps_error_bits,
+ WORD32 i4_tgt_frm_rate);
+
+void irc_change_bitrate_in_error_bits(error_bits_handle ps_error_bits,
+ WORD32 i4_bitrate);
+
+#endif
+
diff --git a/encoder/irc_frame_info_collector.c b/encoder/irc_frame_info_collector.c
new file mode 100755
index 0000000..65f24c4
--- /dev/null
+++ b/encoder/irc_frame_info_collector.c
@@ -0,0 +1,177 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/******************************************************************************/
+/* File Includes */
+/******************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+
+void irc_init_frame_info(frame_info_t *frame_info)
+{
+ WORD32 i;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ frame_info->mb_header_bits[i] = 0;
+ frame_info->tot_mb_sad[i] = 0;
+ frame_info->num_mbs[i] = 0;
+ frame_info->qp_sum[i] = 0;
+ frame_info->mb_texture_bits[i] = 0;
+ }
+
+ frame_info->other_header_bits = 0;
+ frame_info->activity_sum = 0;
+ frame_info->intra_mb_cost_sum = 0;
+}
+
+/******************************************************************************
+ * GET Functions: Sending back collected information to the rate control module
+ ******************************************************************************/
+WORD32 irc_fi_get_total_header_bits(frame_info_t *frame_info)
+{
+ WORD32 total_header_bits = 0, i;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ total_header_bits += frame_info->mb_header_bits[i];
+ }
+ total_header_bits += frame_info->other_header_bits;
+
+ return (total_header_bits);
+}
+
+WORD32 irc_fi_get_total_texture_bits(frame_info_t *frame_info)
+{
+ WORD32 total_texture_bits = 0, i;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ total_texture_bits += frame_info->mb_texture_bits[i];
+ }
+
+ return (total_texture_bits);
+}
+
+WORD32 irc_fi_get_total_frame_sad(frame_info_t *frame_info)
+{
+ WORD32 total_sad = 0, i;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ total_sad += frame_info->tot_mb_sad[i];
+ }
+
+ return (total_sad);
+}
+
+WORD32 irc_fi_get_average_qp(frame_info_t *frame_info)
+{
+ WORD32 i, total_qp = 0, total_mbs = 0;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ total_qp += frame_info->qp_sum[i];
+ total_mbs += frame_info->num_mbs[i];
+ }
+
+ if(total_mbs)
+ {
+ return (total_qp / total_mbs);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+WORD32 irc_fi_get_avg_mb_header(frame_info_t *frame_info, UWORD8 mb_type)
+{
+ if(frame_info->num_mbs[mb_type])
+ {
+ return (frame_info->mb_header_bits[mb_type]
+ / frame_info->num_mbs[mb_type]);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+WORD32 irc_fi_get_total_mb_texture_bits(frame_info_t *frame_info,
+ UWORD8 mb_type)
+{
+ return (frame_info->mb_texture_bits[mb_type]);
+}
+
+WORD32 irc_fi_get_total_mb_sad(frame_info_t *frame_info, UWORD8 mb_type)
+{
+ return (frame_info->tot_mb_sad[mb_type]);
+}
+
+WORD32 irc_fi_get_total_mb_qp(frame_info_t *frame_info, UWORD8 mb_type)
+{
+ if(frame_info->num_mbs[mb_type])
+ {
+ return (frame_info->qp_sum[mb_type]);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+WORD32 irc_fi_get_total_mb(frame_info_t *frame_info, UWORD8 mb_type)
+{
+ return (frame_info->num_mbs[mb_type]);
+}
+
+WORD32 irc_fi_get_num_intra_mb(frame_info_t *frame_info)
+{
+ return (frame_info->num_mbs[MB_TYPE_INTRA]);
+}
+
+WORD32 irc_fi_get_avg_activity(frame_info_t *frame_info)
+{
+ WORD32 i;
+ WORD32 i4_tot_mbs = 0;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ i4_tot_mbs += frame_info->num_mbs[i];
+ }
+
+ if(i4_tot_mbs)
+ {
+ return (frame_info->activity_sum / i4_tot_mbs);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+WORD32 irc_fi_get_total_intra_mb_cost(frame_info_t *frame_info)
+{
+ return (frame_info->intra_mb_cost_sum);
+}
diff --git a/encoder/irc_frame_info_collector.h b/encoder/irc_frame_info_collector.h
new file mode 100755
index 0000000..58dc467
--- /dev/null
+++ b/encoder/irc_frame_info_collector.h
@@ -0,0 +1,109 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _FRAME_INFO_COLLECTOR_H_
+#define _FRAME_INFO_COLLECTOR_H_
+
+typedef struct
+{
+ /* Number of MBs in each type */
+ WORD32 num_mbs[MAX_MB_TYPE];
+
+ /* Sum of all MB SADs of each MB type */
+ WORD32 tot_mb_sad[MAX_MB_TYPE];
+
+ /* Sum of QPs for each mb type */
+ WORD32 qp_sum[MAX_MB_TYPE];
+
+ /* Header bits consumed other than MB headers */
+ WORD32 other_header_bits;
+
+ /* Header bits consumed for each type of MBs */
+ WORD32 mb_header_bits[MAX_MB_TYPE];
+
+ /* Texture bits consumed for each type of MBs */
+ WORD32 mb_texture_bits[MAX_MB_TYPE];
+
+ /* Sum of all MB activity */
+ WORD32 activity_sum;
+
+ /* Sum of all the Intra MB cost values for the entire frame */
+ WORD32 intra_mb_cost_sum;
+
+} frame_info_t;
+
+void irc_init_frame_info(frame_info_t *frame_info);
+
+/*
+ * Update functions: Collecting information from encoder
+ */
+#define FI_UPDATE_OTHER_HEADER_BITS(frame_info,header_bits)\
+ {(frame_info)->other_header_bits += (header_bits);}
+
+#define FI_UPDATE_MB_HEADER(frame_info,header_bits,mb_type)\
+ {(frame_info)->mb_header_bits[(mb_type)] += (header_bits);}
+
+#define FI_UPDATE_MB_TEXTURE(frame_info,texture_bits,mb_type)\
+ {(frame_info)->mb_texture_bits[(mb_type)] += (texture_bits);}
+
+#define FI_UPDATE_MB_SAD(frame_info,mb_sad,mb_type)\
+ {(frame_info)->tot_mb_sad[(mb_type)] += (mb_sad);}
+
+#define FI_UPDATE_MB_QP(frame_info,qp,mb_type)\
+ {(frame_info)->qp_sum[(mb_type)] += (qp);(frame_info)->num_mbs[(mb_type)]++;}
+
+#define FI_UPDATE_ACTIVITY(frame_info,mb_activity)\
+ {(frame_info)->activity_sum += (mb_activity);}
+
+#define FI_UPDATE_INTRA_MB_COST(frame_info,intra_mb_cost)\
+ {(frame_info)->intra_mb_cost_sum += (intra_mb_cost);}
+
+/*
+ * GET Functions: Sending back collected information to the rate control module
+ */
+
+/* Frame Level Model Information */
+WORD32 irc_fi_get_total_header_bits(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_total_texture_bits(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_average_qp(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_total_frame_sad(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_avg_activity(frame_info_t *frame_info);
+
+/* Number of Intra MBs for Scene Change Detection */
+WORD32 irc_fi_get_num_intra_mb(frame_info_t *frame_info);
+
+/* MB Level Model Information */
+WORD32 irc_fi_get_avg_mb_header(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb_texture_bits(frame_info_t *frame_info,
+ UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb_sad(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb_qp(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_intra_mb_cost(frame_info_t *frame_info);
+#endif
diff --git a/encoder/irc_mb_model_based.c b/encoder/irc_mb_model_based.c
new file mode 100755
index 0000000..880ee19
--- /dev/null
+++ b/encoder/irc_mb_model_based.c
@@ -0,0 +1,157 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_mb_model_based.h"
+
+typedef struct mb_rate_control_t
+{
+ /* Frame Qp */
+ UWORD8 u1_frm_qp;
+
+ /*
+ * Estimated average activity for the current frame (updated with the
+ * previous frame activity since it is independent of picture type whether
+ * it is I or P)
+ */
+ WORD32 i4_avg_activity;
+
+} mb_rate_control_t;
+
+WORD32 irc_mbrc_num_fill_use_free_memtab(mb_rate_control_t **pps_mb_rate_control,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static mb_rate_control_t s_mb_rate_control_temp;
+
+ /*
+ * Hack for al alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ {
+ (*pps_mb_rate_control) = &s_mb_rate_control_temp;
+ }
+
+ /*For src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(mb_rate_control_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_mb_rate_control,
+ e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/*******************************************************************************
+ MB LEVEL API FUNCTIONS
+ ******************************************************************************/
+
+/******************************************************************************
+ Description : Initialize the mb model and the average activity to default
+ values
+ ******************************************************************************/
+void irc_init_mb_level_rc(mb_rate_control_t *ps_mb_rate_control)
+{
+ /* Set values to default */
+ ps_mb_rate_control->i4_avg_activity = 0;
+}
+
+/******************************************************************************
+ Description : Initialize the mb state with frame level decisions
+ *********************************************************************************/
+void irc_mb_init_frame_level(mb_rate_control_t *ps_mb_rate_control,
+ UWORD8 u1_frame_qp)
+{
+ /* Update frame level QP */
+ ps_mb_rate_control->u1_frm_qp = u1_frame_qp;
+}
+
+/******************************************************************************
+ Description : Reset the mb activity - Whenever there is SCD
+ the mb activity is reset
+ *********************************************************************************/
+void irc_reset_mb_activity(mb_rate_control_t *ps_mb_rate_control)
+{
+ ps_mb_rate_control->i4_avg_activity = 0;
+}
+
+/******************************************************************************
+ Description : Calculates the mb level qp
+ *********************************************************************************/
+void irc_get_mb_qp(mb_rate_control_t *ps_mb_rate_control,
+ WORD32 i4_cur_mb_activity,
+ WORD32 *pi4_mb_qp)
+{
+ WORD32 i4_qp;
+ /* Initialize the mb level qp with the frame level qp */
+ i4_qp = ps_mb_rate_control->u1_frm_qp;
+
+ /*
+ * Store the model based QP - This is used for updating the rate control model
+ */
+ pi4_mb_qp[0] = i4_qp;
+
+ /* Modulate the Qp based on the activity */
+ if((ps_mb_rate_control->i4_avg_activity) && (i4_qp < 100))
+ {
+ i4_qp =((((2 * i4_cur_mb_activity))
+ + ps_mb_rate_control->i4_avg_activity)* i4_qp
+ + ((i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity)
+ >> 1))/ (i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity);
+
+ if(i4_qp > ((3 * ps_mb_rate_control->u1_frm_qp) >> 1))
+ {
+ i4_qp = ((3 * ps_mb_rate_control->u1_frm_qp) >> 1);
+ }
+ }
+
+ /* Store the qp modulated by mb activity - This is used for encoding the MB */
+ pi4_mb_qp[1] = i4_qp;
+}
+
+/*******************************************************************************
+ Description : Returns the stored frame level QP
+ ******************************************************************************/
+UWORD8 irc_get_frm_level_qp(mb_rate_control_t *ps_mb_rate_control)
+{
+ return (ps_mb_rate_control->u1_frm_qp);
+}
+
+/*******************************************************************************
+ Description : Update the frame level info collected
+ ******************************************************************************/
+void irc_mb_update_frame_level(mb_rate_control_t *ps_mb_rate_control,
+ WORD32 i4_avg_activity)
+{
+ /* Update the Average Activity */
+ ps_mb_rate_control->i4_avg_activity = i4_avg_activity;
+}
diff --git a/encoder/irc_mb_model_based.h b/encoder/irc_mb_model_based.h
new file mode 100755
index 0000000..aad520a
--- /dev/null
+++ b/encoder/irc_mb_model_based.h
@@ -0,0 +1,57 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _MB_MODEL_BASED_H_
+#define _MB_MODEL_BASED_H_
+
+typedef struct mb_rate_control_t *mb_rate_control_handle;
+
+WORD32 irc_mbrc_num_fill_use_free_memtab(mb_rate_control_handle *pps_mb_rate_control,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/* Initializing the state structure */
+void irc_init_mb_level_rc(mb_rate_control_handle ps_mb_rate_control);
+
+/* MB parameters that are to be initialized at a frame level */
+void irc_mb_init_frame_level(mb_rate_control_handle ps_mb_rate_control,
+ UWORD8 u1_frame_qp);
+
+/* MB Level call to get the mb_level QP */
+void irc_get_mb_qp(mb_rate_control_handle ps_mb_rate_control,
+ WORD32 i4_cur_mb_activity,
+ WORD32 *pi4_mb_qp);
+
+/* MB Parameters that are to be updated at a frame level */
+void irc_mb_update_frame_level(mb_rate_control_handle ps_mb_rate_control,
+ WORD32 i4_avg_activity);
+
+/****************************************************************************
+ CONTROL FUCNTIONS FROM FRAME LEVEL
+ ****************************************************************************/
+
+/* Returns the stored frame level QP */
+UWORD8 irc_get_frm_level_qp(mb_rate_control_handle ps_mb_rate_control);
+
+/* Disables activity based qp modulation */
+void irc_reset_mb_activity(mb_rate_control_handle ps_mb_rate_control);
+
+#endif
+
diff --git a/encoder/irc_mem_req_and_acq.h b/encoder/irc_mem_req_and_acq.h
new file mode 100755
index 0000000..a2946a7
--- /dev/null
+++ b/encoder/irc_mem_req_and_acq.h
@@ -0,0 +1,179 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_rc_mem_interface.h
+*
+* @brief
+* This file contains function declaration and structures for rate control
+* memtabs
+*
+* @author
+* ittiam
+*
+* @remarks
+* The rate control library is a global library across various codecs. It
+* anticipates certain structures definitions. Those definitions are to be
+* imported from global workspace. Instead of that, the structures needed for
+* rc library are copied in to this file and exported to rc library. If the
+* structures / enums / ... in the global workspace change, this file also needs
+* to be modified accordingly.
+*
+******************************************************************************
+*/
+#ifndef IH264E_RC_MEM_INTERFACE_H_
+#define IH264E_RC_MEM_INTERFACE_H_
+
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+#define FILL_MEMTAB(m_pv_mem_rec, m_j, m_mem_size, m_align, m_type) \
+{ \
+ m_pv_mem_rec[m_j].u4_size = sizeof(iv_mem_rec_t); \
+ m_pv_mem_rec[m_j].u4_mem_size = m_mem_size; \
+ m_pv_mem_rec[m_j].u4_mem_alignment = m_align; \
+ m_pv_mem_rec[m_j].e_mem_type = m_type; \
+}
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ ALIGN_BYTE = 1,
+ ALIGN_WORD16 = 2,
+ ALIGN_WORD32 = 4,
+ ALIGN_WORD64 = 8,
+ ALIGN_128_BYTE = 128
+}ITT_MEM_ALIGNMENT_TYPE_E;
+
+typedef enum
+{
+ SCRATCH = 0,
+ PERSISTENT = 1,
+ WRITEONCE = 2
+}ITT_MEM_USAGE_TYPE_E;
+
+typedef enum
+{
+ L1D = 0,
+ SL2 = 1,
+ DDR = 3
+}ITT_MEM_REGION_E;
+
+typedef enum
+{
+ GET_NUM_MEMTAB = 0,
+ FILL_MEMTAB = 1,
+ USE_BASE = 2,
+ FILL_BASE =3
+}ITT_FUNC_TYPE_E;
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/*NOTE : This should be an exact replica of IALG_MemRec, any change in IALG_MemRec
+ must be replicated here*/
+typedef struct
+{
+ /* Size in bytes */
+ UWORD32 u4_size;
+
+ /* Alignment in bytes */
+ WORD32 i4_alignment;
+
+ /* decides which memory region to be placed */
+ ITT_MEM_REGION_E e_mem_region;
+
+ /* memory is scratch or persistent */
+ ITT_MEM_USAGE_TYPE_E e_usage;
+
+ /* Base pointer for allocated memory */
+ void *pv_base;
+} itt_memtab_t;
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] u4_size
+* size of the record
+*
+* @param[in] i4_alignment
+* memory alignment size
+*
+* @param[in] e_usage
+* usage
+*
+* @param[in] e_mem_region
+* mem region
+*
+* @return void
+*
+******************************************************************************
+*/
+void fill_memtab(itt_memtab_t *ps_mem_tab, WORD32 u4_size, WORD32 i4_alignment,
+ ITT_MEM_USAGE_TYPE_E e_usage, ITT_MEM_REGION_E e_mem_region);
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] ptr_to_be_filled
+* handle to the memory record storage space
+*
+* @param[in] e_func_type
+* enum that dictates fill memory records or use memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab, void **ptr_to_be_filled,
+ ITT_FUNC_TYPE_E e_func_type);
+
+
+#endif // IH264E_RC_MEM_INTERFACE_H_
+
diff --git a/encoder/irc_picture_type.c b/encoder/irc_picture_type.c
new file mode 100755
index 0000000..186188c
--- /dev/null
+++ b/encoder/irc_picture_type.c
@@ -0,0 +1,1585 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include "stdio.h"
+#include "string.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_picture_type.h"
+#include "irc_trace_support.h"
+
+#define MAX_INTER_FRM_INT 10
+
+/******************************Pic_details ************************************/
+typedef struct
+{
+ /* The id sent by the codec */
+ WORD32 i4_pic_id;
+
+ /* The pics come in, in this order */
+ WORD32 i4_pic_disp_order_no;
+
+ /* I,P,B */
+ picture_type_e e_pic_type;
+
+} pic_details_t;
+
+/**************************Pic_handling structure *****************************/
+typedef struct pic_handling_t
+{
+ /***************************************************************************
+ * Inputs from the codec
+ **************************************************************************/
+
+ /* Number of frames after which an I frame will repeat in display order */
+ WORD32 i4_intra_frm_int;
+
+ /* (num_b_pics_in_subgop + 1) */
+ WORD32 i4_inter_frm_int;
+
+ /* After these many buffered frames, the pics are encoded */
+ WORD32 i4_max_inter_frm_int;
+
+ /* OPEN or CLOSED */
+ WORD32 i4_is_gop_closed;
+
+ /* The pic stack */
+ /* Stack used to store the input pics in encode order */
+ pic_details_t as_pic_stack[MAX_INTER_FRM_INT + 2];
+
+ /***************************************************************************
+ * Counters
+ **************************************************************************/
+
+ /* Decides whether a B or ref pic */
+ WORD32 i4_buf_pic_no;
+
+ /* Current pic's number in displayed, and gets reset after an I-frm */
+ WORD32 i4_pic_disp_order_no;
+
+ /* Number of P frms that have come, in the current gop, so far */
+ WORD32 i4_p_count_in_gop;
+
+ /* Number of B frms that have come, in the current gop, so far */
+ WORD32 i4_b_count_in_gop;
+
+ /* Number of B frms that have come, in the current subgop, so far */
+ WORD32 i4_b_count_in_subgop;
+
+ /***************************************************************************
+ * Indices to the pic stack (Since we store the pics in the encode order,
+ * these vars are modified to meet that)
+ **************************************************************************/
+
+ /* B_PIC index */
+ WORD32 i4_b_pic_idx;
+
+ /* I,P PIC index */
+ WORD32 i4_ref_pic_idx;
+
+ /***************************************************************************
+ * Variables operating on the input pics
+ **************************************************************************/
+
+ /* Flag denoting whether it's the first gop or not */
+ WORD32 i4_is_first_gop;
+
+ /* Number of B_PICs in an incomplete subgop */
+ WORD32 i4_b_in_incomp_subgop;
+
+ /* In CLOSED_GOPs, even if inter_frm_int > 1, there can be 2 continous
+ * P_PICs at the GOP end. This takes values of 0 or 1 */
+ WORD32 i4_extra_p;
+
+ /***************************************************************************
+ * Arrays storing the number of frms in the gop
+ **************************************************************************/
+
+ /* In the steady state, what's the pic distribution in display order */
+ WORD32 i4_frms_in_gop[MAX_PIC_TYPE];
+
+ /*
+ * In case of a change in inter frm int call, the pic distribution in
+ * that gop in display order
+ */
+ WORD32 i4_frms_in_cur_gop[MAX_PIC_TYPE];
+
+ /*
+ * This is used to denote the number of frms remaining to be encoded in the
+ * current gop
+ */
+ WORD32 i4_rem_frms_in_gop[MAX_PIC_TYPE];
+
+ /***************************************************************************
+ * Variables operating on the output pics
+ **************************************************************************/
+
+ /* Counts the frms encoded in a gop */
+ WORD32 i4_coded_pic_no;
+
+ /* Counts from the start of stack to the end repeatedly */
+ WORD32 i4_stack_count;
+
+ /***************************************************************************
+ * Tracking a change in the inputs from the codec
+ **************************************************************************/
+
+ /* A flag that is set when the codec calls for a change in inter_frm_int */
+ WORD32 i4_change_in_inter_frm_int;
+
+ /*
+ * When a change_in_inter_frm_int is called, this stores the new
+ * inter_frm_int
+ */
+ WORD32 i4_new_inter_frm_int;
+
+ /*
+ * When a change_in_inter_frm_int is called in the middle of a gop,this
+ * stores the B_PICs in the incomplete subgop of the mixed gop
+ */
+ WORD32 i4_b_in_incomp_subgop_mix_gop;
+
+ /*
+ * For a CLOSED GOP, when a change_in_inter_frm_int is called in the middle
+ * of a gop,this is a flag denoting if there is an extra P_PIC in the mixed
+ * gop
+ */
+ WORD32 i4_extra_p_mix_gop;
+
+ /* A flag that is set when the codec calls for a change in intra_frm_int */
+ WORD32 i4_change_in_intra_frm_int;
+
+ /*
+ * When a change_in_intra_frm_int is called, this stores the new
+ * intra_frm_int
+ */
+ WORD32 i4_new_intra_frm_int;
+
+ /***************************************************************************
+ * Previous pic_stack_indices & details
+ **************************************************************************/
+ pic_details_t s_prev_pic_details;
+
+ WORD32 i4_prev_b_pic_idx;
+
+ WORD32 i4_last_frm_in_gop;
+
+ WORD32 i4_first_gop_encoded;
+
+ /* NITT TBR */
+ picture_type_e e_previous_pic_type;
+
+ WORD32 i4_force_I_frame;
+
+ WORD32 i4_forced_I_frame_cur_frame;
+
+ WORD32 i4_sum_remaining_frm_in_gop;
+
+ WORD32 i4_mod_temp_ref_cnt;
+
+ WORD32 i4_frames_in_fif_gop;
+
+ WORD32 i4_prev_intra_frame_interval;
+
+} pic_handling_t;
+
+static void irc_update_pic_distbn(pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_gop_boundary);
+
+static void find_pic_distbn_in_gop(WORD32 i4_frms_in_gop[MAX_PIC_TYPE],
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 *pi4_b_in_incomp_subgop,
+ WORD32 *pi4_extra_p);
+
+WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_t **pps_pic_handling,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static pic_handling_t s_pic_handling_temp;
+
+ /*
+ * Hack for al alloc, during which we dont have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ {
+ (*pps_pic_handling) = &s_pic_handling_temp;
+ }
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(pic_handling_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_pic_handling, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/******************************************************************************
+ Description : initializes the pic handling state struct
+ *****************************************************************************/
+void irc_init_pic_handling(pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed)
+{
+ /* Declarations */
+ WORD32 i;
+
+ /* Checks */
+ /* Codec Parameters */
+ ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int;
+ ps_pic_handling->i4_inter_frm_int = i4_max_inter_frm_int;
+ ps_pic_handling->i4_max_inter_frm_int = i4_max_inter_frm_int;
+ ps_pic_handling->i4_is_gop_closed = i4_is_gop_closed;
+
+ /* Pic_stack */
+ memset(ps_pic_handling->as_pic_stack, 0,
+ sizeof(ps_pic_handling->as_pic_stack));
+ memset(&ps_pic_handling->s_prev_pic_details, 0,
+ sizeof(ps_pic_handling->s_prev_pic_details));
+
+ /* Counters */
+ ps_pic_handling->i4_buf_pic_no = 0;
+ ps_pic_handling->i4_pic_disp_order_no = 0;
+
+ /* Indices to the pic_stack */
+ ps_pic_handling->i4_ref_pic_idx = 0;
+ ps_pic_handling->i4_b_pic_idx = 2;
+ ps_pic_handling->i4_prev_b_pic_idx = 2;
+
+ /* Variables working on the input frames */
+ ps_pic_handling->i4_is_first_gop = 1;
+ ps_pic_handling->i4_p_count_in_gop = 0;
+ ps_pic_handling->i4_b_count_in_gop = 0;
+ ps_pic_handling->i4_b_count_in_subgop = 0;
+
+ /* Variables working on the output frames */
+ ps_pic_handling->i4_coded_pic_no = -1;
+ ps_pic_handling->i4_stack_count = -1;
+
+ /* Tracks the changes in the Codec Parameters */
+ ps_pic_handling->i4_change_in_inter_frm_int = 0;
+ ps_pic_handling->i4_new_inter_frm_int = i4_max_inter_frm_int;
+
+ /* Tracks the changes in the Codec Parameters */
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+ ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int;
+
+ /* Variables on which the bit allocation is dependent */
+ /* Get the pic distribution in the gop */
+ find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int,
+ i4_max_inter_frm_int, i4_is_gop_closed,
+ &ps_pic_handling->i4_b_in_incomp_subgop,
+ &ps_pic_handling->i4_extra_p);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ ps_pic_handling->i4_rem_frms_in_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ }
+
+ ps_pic_handling->e_previous_pic_type = I_PIC;
+ ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int;
+ ps_pic_handling->i4_force_I_frame = 0;
+ ps_pic_handling->i4_forced_I_frame_cur_frame = 0;
+ ps_pic_handling->i4_sum_remaining_frm_in_gop = 0;
+ ps_pic_handling->i4_mod_temp_ref_cnt = 0;
+
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+ ps_pic_handling->i4_last_frm_in_gop = 0;
+ ps_pic_handling->i4_first_gop_encoded = 0;
+ ps_pic_handling->i4_frames_in_fif_gop = 0;
+
+}
+
+/*******************************************************************************
+ * @brief registers the new intra frame interval value
+ ******************************************************************************/
+void irc_pic_handling_register_new_int_frm_interval(pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int)
+{
+ ps_pic_handling->i4_change_in_intra_frm_int = 1;
+ ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int;
+}
+
+void irc_pic_handling_register_new_inter_frm_interval(pic_handling_t *ps_pic_handling,
+ WORD32 i4_inter_frm_int)
+{
+ /* Update the state structure with the latest values */
+ ps_pic_handling->i4_change_in_inter_frm_int = 1;
+ ps_pic_handling->i4_new_inter_frm_int = i4_inter_frm_int;
+}
+
+static void start_new_gop(pic_handling_t *ps_pic_handling)
+{
+ WORD32 i;
+ WORD32 i4_sum_remaining_frm_in_gop = 0;
+
+ /* Now, the end of gop updates */
+ ps_pic_handling->i4_pic_disp_order_no = 0;
+ ps_pic_handling->i4_buf_pic_no = 0;
+ ps_pic_handling->i4_is_first_gop = 0;
+ ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+ if(ps_pic_handling->i4_is_gop_closed)
+ {
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ }
+ /*
+ * Store the number of frames in the gop that is encoded till now
+ * just before Force I frame call is made
+ */
+ ps_pic_handling->i4_frames_in_fif_gop = ps_pic_handling->i4_b_count_in_gop
+ + ps_pic_handling->i4_p_count_in_gop + 1;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_sum_remaining_frm_in_gop += ps_pic_handling->i4_rem_frms_in_gop[i];
+ }
+ ps_pic_handling->i4_sum_remaining_frm_in_gop = i4_sum_remaining_frm_in_gop;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ ps_pic_handling->i4_rem_frms_in_gop[i] =
+ ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+}
+
+/*******************************************************************************
+ * @brief Fills the pic_stack with the incoming pics in encode order
+ ******************************************************************************/
+void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id)
+{
+ /* Declarations */
+ WORD32 i4_inter_frm_int, i4_max_inter_frm_int,
+ i4_intra_frm_int, i4_new_inter_frm_int;
+ WORD32 i4_is_gop_closed;
+ WORD32 i4_buf_pic_no, i4_pic_disp_order_no;
+ WORD32 i4_b_pic_idx, i4_ref_pic_idx;
+ WORD32 i4_is_first_gop, i4_b_in_incomp_subgop, i4_p_count_in_gop,
+ i4_b_count_in_gop, i4_b_count_in_subgop;
+ WORD32 i, i4_p_frms_in_prd, i4_b_frms_in_prd,
+ i4_num_b_in_subgop, i4_extra_p;
+ WORD32 i4_condn_for_change_in_inter_frm_int;
+ picture_type_e e_previous_pic_type, e_cur_pic_type;
+ WORD32 i4_force_I_frame;
+
+ /*
+ * Initialize the local vars with the state struct values needed by the
+ * change calls
+ */
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+ i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+
+ i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+ i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop;
+ i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC];
+ i4_is_first_gop = ps_pic_handling->i4_is_first_gop;
+ i4_new_inter_frm_int = ps_pic_handling->i4_new_inter_frm_int;
+ e_previous_pic_type = ps_pic_handling->e_previous_pic_type;
+ i4_force_I_frame = ps_pic_handling->i4_force_I_frame;
+
+ /* Force I frame :
+ * Two different cases
+ * 1)OPEN_GOP: New GOP is started after number of B pictures in the last
+ * sub gop of a gop to mimic the GOP structure.
+ * 2)Closed GOP:Wait till P frame at input and The frame after a P frame
+ * a new GOP is started to mimic the GOP structure.
+ */
+ if(i4_force_I_frame)
+ {
+ WORD32 i4_temp_is_gop_closed;
+ WORD32 i4_codn = 0;
+ /* A special case of Open GOP where the it behaves like Closed GOP*/
+ if((i4_intra_frm_int % i4_inter_frm_int) == 1)
+ {
+ i4_temp_is_gop_closed = 1;
+ }
+ else
+ {
+ i4_temp_is_gop_closed = i4_is_gop_closed;
+ }
+ /* Get the current picture type to aid decision to force an I frame*/
+ if((i4_buf_pic_no % i4_inter_frm_int)
+ && !(i4_is_gop_closed&& (i4_b_count_in_gop == i4_b_frms_in_prd)))
+ {
+ e_cur_pic_type = B_PIC;
+ }
+ else
+ {
+ if(i4_pic_disp_order_no == 0)
+ {
+ e_cur_pic_type = I_PIC;
+ }
+ else
+ {
+ e_cur_pic_type = P_PIC;
+ }
+ }
+ if((i4_intra_frm_int % i4_inter_frm_int) == 0)
+ {
+ i4_codn = (e_cur_pic_type == P_PIC);
+ }
+ else
+ {
+ i4_codn = (ps_pic_handling->i4_b_count_in_subgop
+ == ps_pic_handling->i4_b_in_incomp_subgop);
+ }
+ if(e_cur_pic_type == I_PIC)
+ {
+ /*
+ * Don't do anything. Resetting the force I frame flag
+ * since the current picture type is already a I frame
+ */
+ i4_force_I_frame = 0;
+ }
+ else if(i4_inter_frm_int == 1)
+ {
+ /*IPP case , Force I frame immediately*/
+ start_new_gop(ps_pic_handling);
+ }
+ else if((!i4_temp_is_gop_closed) && i4_codn)
+ {
+ start_new_gop(ps_pic_handling);
+ if(ps_pic_handling->i4_b_count_in_subgop)
+ {
+ ps_pic_handling->i4_b_pic_idx += 1;
+ ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+ }
+ else if(i4_temp_is_gop_closed && (e_previous_pic_type == P_PIC)
+ && (e_cur_pic_type != P_PIC))
+ {
+ start_new_gop(ps_pic_handling);
+ ps_pic_handling->i4_b_pic_idx++;
+ ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+ i4_is_first_gop = ps_pic_handling->i4_is_first_gop;
+ }
+
+
+ /***********************CHANGE_INTRA_FRM_INTERVAL**************************
+ *
+ * Call the irc_update_pic_distbn if
+ * 1)Change in intra frm interval flag is set
+ * 2)It's the first B_PIC of a gop
+ */
+ if((ps_pic_handling->i4_change_in_intra_frm_int == 1)
+ && ((i4_pic_disp_order_no == 1)))
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_new_intra_frm_int,
+ ps_pic_handling->i4_inter_frm_int, 1);
+
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+
+ if(ps_pic_handling->i4_new_intra_frm_int == 1)
+ {
+ ps_pic_handling->i4_pic_disp_order_no = 0;
+ }
+ }
+ /*********************CHANGE_INTER_FRM_INTERVAL****************************/
+ /* Call irc_update_pic_distbn if
+ * 1)Change in inter frm interval flag is set
+ * 2)It's the first B_PIC after gop/subgop start, and
+ * 3)The new inter-frm-interval won't cross the intra_frm_interval
+ */
+ if((ps_pic_handling->i4_change_in_inter_frm_int == 1)
+ && ((i4_buf_pic_no % i4_inter_frm_int == 1)
+ || (i4_pic_disp_order_no == 1) || (i4_inter_frm_int == 1)))
+ {
+ /*
+ * Condition which checks if the new inter_frm_int will cross the
+ * intra_frm_int
+ */
+ i4_condn_for_change_in_inter_frm_int = ((i4_pic_disp_order_no
+ + i4_new_inter_frm_int - 1) < i4_intra_frm_int);
+
+ if(i4_condn_for_change_in_inter_frm_int)
+ {
+ /*If the inter_frm_int = 1, then the b_pic_idx needs to be modified */
+ if(i4_inter_frm_int == 1)
+ {
+ ps_pic_handling->i4_b_pic_idx = (1
+ + ps_pic_handling->i4_ref_pic_idx)
+ % (i4_max_inter_frm_int + 1);
+ }
+
+ /*
+ * Depending on the gop/subgop boundary, call the change_inter_frm_int
+ *
+ * TO DO: make a single call, change the name of the fxn to
+ * update_state,
+ * where state = frms_in_gop + b_incomp_subgop + extra_p
+ */
+
+ /* GOP boundary */
+ if(i4_pic_disp_order_no == 1)
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_intra_frm_int,
+ ps_pic_handling->i4_new_inter_frm_int, 1);
+ }
+ /* Subgop boundary */
+ else
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_intra_frm_int,
+ ps_pic_handling->i4_new_inter_frm_int, 0);
+ }
+
+ ps_pic_handling->i4_change_in_inter_frm_int = 0;
+ ps_pic_handling->i4_new_inter_frm_int =
+ ps_pic_handling->i4_inter_frm_int;
+ }
+
+ }
+
+ /* Initialize the local vars with the state struct values */
+ i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+ i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+ i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+ i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+ i4_p_count_in_gop = ps_pic_handling->i4_p_count_in_gop;
+ i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop;
+ i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop;
+ i4_p_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[P_PIC];
+ i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC];
+ i4_extra_p = ps_pic_handling->i4_extra_p_mix_gop;
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+
+ /* Initializing the prev_state vars */
+ ps_pic_handling->i4_prev_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+
+ i4_num_b_in_subgop = (i4_inter_frm_int - 1);
+
+ /*********************** Fill the stack ***********************************/
+ /* The next part of the code is organized as
+ *
+ * if(B_PIC conditions satisfied)
+ * {
+ * Fill the pic_stack using the b_pic_index
+ * Update the b_pic_index and the other b_pic related vars for the
+ * next B_PIC
+ * }
+ * else
+ * {
+ * if(I_PIC conditions are satisfied)
+ * {
+ * Fill the pic_stack using the ref_pic_index
+ * Update the ref_pic_index and the other ref_pic related vars for the next
+ * I_PIC/P_PIC
+ * }
+ * else
+ * {
+ * Fill the pic_stack using the ref_pic_index
+ * Update the ref_pic_index and the other ref_pic related vars for the next
+ * I_PIC/P_PIC
+ * }
+ * }
+ */
+ /*
+ * Condition for a B_PIC -
+ * 1) Other than the first I_PIC and the periodically appearing P_PICs, after
+ * every inter_frm_int, rest all pics are B_PICs
+ * 2) In case of CLOSED_GOP, the last frame of the gop has to be a P_PIC
+ */
+
+ if((i4_buf_pic_no % i4_inter_frm_int)&& !(i4_is_gop_closed
+ && (i4_b_count_in_gop == i4_b_frms_in_prd))) /**** B_PIC ****/
+ {
+ /* Fill the pic_stack */
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = i4_enc_pic_id;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type = B_PIC;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no =
+ i4_pic_disp_order_no;
+
+ /* Store Pic type*/
+ e_previous_pic_type = B_PIC;
+
+ /* Update the prev_pic_details */
+ memcpy(&ps_pic_handling->s_prev_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_b_pic_idx],
+ sizeof(pic_details_t));
+
+ i4_b_count_in_gop++;
+ i4_b_count_in_subgop++;
+
+ /* Update the i4_b_pic_idx */
+ if(!i4_is_gop_closed)
+ {
+ /* If this B_PIC features in one of the complete subgops */
+ if((i4_b_count_in_subgop < i4_num_b_in_subgop)
+ && !(i4_b_count_in_gop == i4_b_frms_in_prd))
+ {
+ i4_b_pic_idx++;
+ }
+ else /* Else if this B_PIC is the last one in a subgop or gop */
+ {
+ /*
+ * If this is the last B_PIC of a GOP, depending on the number
+ * of incomp B_pics in the subgop, there can be either only I
+ * or I,P pics between this and the next B_PIC
+ */
+ if(i4_b_count_in_gop == i4_b_frms_in_prd)
+ {
+ i4_b_pic_idx += (2 + (!i4_b_in_incomp_subgop)); /*Prev*/
+ i4_b_count_in_gop = 0;
+ }
+ /*
+ * For the last B_PIC of a subgop, there's always a P b/w
+ * this & the next B_PIC
+ */
+ else
+ {
+ i4_b_pic_idx += 2;
+ }
+ i4_b_count_in_subgop = 0;
+ }
+ }
+ else
+ {
+ /* For the last B_PIC of a gop
+ * Normally,there will be 3 pics (P,I,P) between this and the next
+ * B_PIC for a CLOSED gop, except when
+ * 1)Number of P_pics in the gop = 1
+ * 2)There is an extra P at the end of the gop
+ */
+ if(i4_b_count_in_gop == i4_b_frms_in_prd)
+ {
+ i4_b_pic_idx += (3 + ((i4_b_in_incomp_subgop == 0)
+ && (i4_p_frms_in_prd> 1)
+ && (i4_pic_disp_order_no
+ != (i4_p_frms_in_prd+ i4_b_frms_in_prd- 1))));
+
+ i4_b_count_in_subgop = 0;
+ }
+ /* For a B_PIC which is not the last one in a subgop */
+ else if(i4_b_count_in_subgop < i4_num_b_in_subgop)
+ {
+ i4_b_pic_idx++;
+ }
+ else /* For the last B_PIC of a subgop */
+ {
+ i4_b_pic_idx += 2;
+ i4_b_count_in_subgop = 0;
+ }
+ }
+ i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+ /*********** I or P pic *********/
+ else
+ {
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = i4_enc_pic_id;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no =
+ i4_pic_disp_order_no;
+ /* Store Pic type*/
+ e_previous_pic_type = I_PIC;
+
+ /**** I_PIC ****/
+ if(i4_pic_disp_order_no == 0)
+ {
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = I_PIC;
+
+ /* Update the prev_pic_details */
+ memcpy(&ps_pic_handling->s_prev_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_ref_pic_idx],
+ sizeof(pic_details_t));
+ /*
+ * In case of an I-frame depending on OPEN or CLOSED gop,
+ * the ref_pic_idx changes
+ */
+ if((!i4_is_gop_closed) && (i4_is_first_gop == 0))
+ {
+ if((i4_p_frms_in_prd <= 1) && (i4_b_in_incomp_subgop == 0))
+ {
+ i4_ref_pic_idx++;
+ }
+ /*
+ * From the 2nd gop onwards, the I and first P frame are
+ * separated by the num_b_in_incomp_subgop
+ */
+ else
+ {
+ i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1);
+ }
+
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ }
+ else
+ {
+ i4_ref_pic_idx++;
+ }
+
+ i4_b_count_in_gop = 0;
+ i4_p_count_in_gop = 0;
+ i4_b_count_in_subgop = 0;
+
+ }
+ /**** P_PIC ****/
+ else
+ {
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC;
+ /* Store Pic type*/
+ e_previous_pic_type = P_PIC;
+
+ /* Update the prev_pic_details */
+ memcpy(&ps_pic_handling->s_prev_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_ref_pic_idx],
+ sizeof(pic_details_t));
+
+ i4_p_count_in_gop++;
+ ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int;
+
+ /*
+ * In case of an P-frame depending on OPEN or CLOSED gop, the
+ * ref_pic_idx changes
+ */
+ if(i4_is_gop_closed && (i4_p_count_in_gop == i4_p_frms_in_prd))
+ {
+ /*
+ * For the last P_PIC in a gop, if extra_p or incomp_b are
+ * present, the number of such pics between this and the next
+ * ref_pic is (i4_b_in_incomp_subgop + 1)
+ */
+ if((i4_p_count_in_gop > 1)
+ && (i4_b_in_incomp_subgop || i4_extra_p))
+ {
+ i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1);
+ }
+ else
+ {
+ i4_ref_pic_idx += i4_inter_frm_int;
+ }
+ }
+ else
+ {
+ i4_ref_pic_idx += i4_inter_frm_int;
+ }
+ }
+
+ i4_ref_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+
+ /* Update those variables working on the input frames */
+ i4_pic_disp_order_no++;
+ i4_buf_pic_no++;
+
+ /* For any gop */
+ if(ps_pic_handling->i4_pic_disp_order_no
+ == (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed)
+ * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)))
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[i] =
+ ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+
+ if((!i4_is_gop_closed) && (i4_is_first_gop)
+ && (ps_pic_handling->i4_rem_frms_in_gop[B_PIC]
+ > ps_pic_handling->i4_b_in_incomp_subgop_mix_gop))
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[B_PIC] =
+ ps_pic_handling->i4_frms_in_cur_gop[B_PIC]
+ - ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+ }
+ }
+
+ /* End of GOP updates */
+ if(i4_pic_disp_order_no == (i4_p_frms_in_prd + i4_b_frms_in_prd + 1))
+ {
+ /* Now, the end of gop updates */
+ i4_pic_disp_order_no = 0;
+ i4_buf_pic_no = 0;
+ i4_is_first_gop = 0;
+ ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+ if(i4_is_gop_closed)
+ {
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ }
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ }
+ }
+
+ /* Updating the vars which work on the encoded pics */
+ /* For the first gop */
+ if(((ps_pic_handling->i4_is_first_gop)
+ && (ps_pic_handling->i4_pic_disp_order_no
+ == (i4_max_inter_frm_int - 1)))
+ || (i4_intra_frm_int == 1))
+ {
+ ps_pic_handling->i4_coded_pic_no = 0;
+ ps_pic_handling->i4_stack_count = 0;
+ }
+
+ /* Update the state struct with the modifiable local vars */
+ ps_pic_handling->i4_buf_pic_no = i4_buf_pic_no;
+ ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no;
+ ps_pic_handling->i4_b_pic_idx = i4_b_pic_idx;
+ ps_pic_handling->i4_ref_pic_idx = i4_ref_pic_idx;
+ ps_pic_handling->i4_is_first_gop = i4_is_first_gop;
+ ps_pic_handling->i4_p_count_in_gop = i4_p_count_in_gop;
+ ps_pic_handling->i4_b_count_in_gop = i4_b_count_in_gop;
+ ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop;
+ ps_pic_handling->e_previous_pic_type = e_previous_pic_type;
+ ps_pic_handling->i4_force_I_frame = i4_force_I_frame;
+}
+
+/*******************************************************************************
+ * @brief Returns the picture type, ip and display order number for the frame to
+ * be encoded
+ ******************************************************************************/
+void irc_get_pic_from_stack(pic_handling_t *ps_pic_handling,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type)
+{
+ pic_details_t s_pic_details;
+ pic_details_t *ps_pic_details = &s_pic_details;
+
+ if(ps_pic_handling->i4_stack_count < 0)
+ {
+ ps_pic_details->e_pic_type = BUF_PIC;
+ ps_pic_details->i4_pic_disp_order_no = -1;
+ ps_pic_details->i4_pic_id = -1;
+ }
+ else
+ {
+ memcpy(ps_pic_details,
+ &ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count],
+ sizeof(pic_details_t));
+
+ /* Force I frame updations */
+ if((ps_pic_handling->i4_force_I_frame == 1)
+ && (ps_pic_details->e_pic_type == I_PIC))
+ {
+ /* Flag to signal change in remaining bits*/
+ ps_pic_handling->i4_forced_I_frame_cur_frame = 1;
+ ps_pic_handling->i4_force_I_frame = 0;
+ /*
+ * Indicates count for no. of Pictures whose temporal reference
+ * has to be modified
+ * in the new GOP
+ */
+ ps_pic_handling->i4_mod_temp_ref_cnt =
+ ps_pic_handling->i4_b_in_incomp_subgop + 1;
+ ps_pic_handling->i4_first_gop_encoded = 1;
+ }
+
+ /*
+ * In MPEG2, the temporal reference of the first displayed frame in a
+ * gop is 0.In case of an OPEN_GOP, the B_PICs of the last subgop in a
+ * gop, maybe coded as a part of the next gop. Hence, in such conditions
+ * the pic_disp_order needs to be modified so that it gives an
+ * indication of the temporal reference
+ */
+ if((!ps_pic_handling->i4_is_gop_closed)
+ && (ps_pic_handling->i4_first_gop_encoded))
+ {
+ if(!ps_pic_handling->i4_mod_temp_ref_cnt)
+ {
+ ps_pic_details->i4_pic_disp_order_no =
+ (ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no
+ + ps_pic_handling->i4_b_in_incomp_subgop)
+ % (ps_pic_handling->i4_prev_intra_frame_interval);
+
+ }
+ else
+ {
+ /*
+ * due to force I frame First frame will have only
+ * ps_pic_handling->i4_frames_in_fif_gop number of frames
+ */
+ ps_pic_details->i4_pic_disp_order_no =
+ (ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no
+ + ps_pic_handling->i4_b_in_incomp_subgop)
+ % (ps_pic_handling->i4_frames_in_fif_gop);
+ ps_pic_handling->i4_mod_temp_ref_cnt--;
+ }
+ }
+ }
+
+ /* Giving this to the Codec */
+ *pi4_pic_id = s_pic_details.i4_pic_id;
+ *pi4_pic_disp_order_no = s_pic_details.i4_pic_disp_order_no;
+ *pe_pic_type = s_pic_details.e_pic_type;
+}
+
+/*******************************************************************************
+ * @brief Updates the picture handling state whenever there is changes in input
+ * parameter
+ *
+ ******************************************************************************/
+static void irc_update_pic_distbn(pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_gop_boundary)
+{
+ /* Declarations */
+ WORD32 i4_is_gop_closed;
+ WORD32 i, i4_prev_inter_frm_int, i4_max_inter_frm_int, i4_pic_disp_order_no;
+ WORD32 i4_b_in_incomp_subgop, i4_extra_p,
+ i4_b_in_incomp_subgop_mix_gop,i4_extra_p_mix_gop;
+ WORD32 i4_pb_frms_till_prev_p;
+ WORD32 ai4_diff_in_frms[MAX_PIC_TYPE];
+
+ /* Initialize the local vars from the state struct */
+ i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+ i4_prev_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+ i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop;
+ i4_extra_p = ps_pic_handling->i4_extra_p;
+ i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+ i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p_mix_gop;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+
+ i4_pb_frms_till_prev_p = (ps_pic_handling->i4_p_count_in_gop
+ * i4_prev_inter_frm_int);
+
+ /* Check for the validity of the intra_frm_int */
+ if(i4_intra_frm_int <= 0)
+ {
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+ }
+ /* Check for the validity of the inter_frm_int */
+ if((i4_inter_frm_int > i4_max_inter_frm_int) || (i4_inter_frm_int < 0))
+ {
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ }
+
+ /* Keep a copy of the older frms_in_gop */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ai4_diff_in_frms[i] = ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+
+ /* Update all the variables which are calculated from the inter_frm_int */
+
+ /* Get the new pic distribution in the gop */
+ find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int,
+ i4_inter_frm_int, i4_is_gop_closed,
+ &i4_b_in_incomp_subgop, &i4_extra_p);
+
+ /* Find the other related variables */
+ if(i4_gop_boundary == 0)
+ {
+ /*
+ * Since, the inter frame interval has changed between a gop the
+ * current gop will be a mixed gop. So, we need to find the values of
+ * the related variables
+ */
+ find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_cur_gop,
+ (i4_intra_frm_int - i4_pb_frms_till_prev_p),
+ i4_inter_frm_int, i4_is_gop_closed,
+ &i4_b_in_incomp_subgop_mix_gop,
+ &i4_extra_p_mix_gop);
+
+ ps_pic_handling->i4_frms_in_cur_gop[P_PIC] +=
+ ps_pic_handling->i4_p_count_in_gop;
+ ps_pic_handling->i4_frms_in_cur_gop[B_PIC] +=
+ ps_pic_handling->i4_b_count_in_gop;
+ }
+ else
+ {
+ /*
+ * Since, the inter_frm_interval has changed at a gop boundary, the
+ * new gop will have all the subgops with the new inter_frm_interval
+ */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ }
+
+ i4_b_in_incomp_subgop_mix_gop = i4_b_in_incomp_subgop;
+ i4_extra_p_mix_gop = i4_extra_p;
+ }
+
+ /* For bit-allocation the rem_frms_in_gop need to be updated */
+ /* Checks needed:
+ 1) If the encoding is happening on the same gop as that of the buffering */
+ if(ps_pic_handling->i4_pic_disp_order_no
+ >= (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed)
+ * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)))
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[i] +=
+ (ps_pic_handling->i4_frms_in_cur_gop[i]
+ - ai4_diff_in_frms[i]);
+ }
+ }
+
+ /* Update the vars which will affect the proper filling of the pic_stack */
+ if(i4_pic_disp_order_no == 0) /*Check if redundant*/
+ {
+ ps_pic_handling->i4_buf_pic_no = 0;
+ }
+ else
+ {
+ ps_pic_handling->i4_buf_pic_no = 1;
+ }
+
+ ps_pic_handling->i4_b_count_in_subgop = 0;
+
+ /* Update the state struct with the new inter_frm_int */
+ ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int;
+ ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int;
+ ps_pic_handling->i4_b_in_incomp_subgop = i4_b_in_incomp_subgop;
+ ps_pic_handling->i4_extra_p = i4_extra_p;
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ i4_b_in_incomp_subgop_mix_gop;
+ ps_pic_handling->i4_extra_p_mix_gop = i4_extra_p_mix_gop;
+
+}
+
+/* *****************************************************************************
+ * @brief Distributes the frames as I, P and B based on intra/inter frame interval.
+ * Along with it it fills the number of frames in sub-gop and extra p frame
+ *
+ ******************************************************************************/
+static void find_pic_distbn_in_gop(WORD32 i4_frms_in_gop[MAX_PIC_TYPE],
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 *pi4_b_in_incomp_subgop,
+ WORD32 *pi4_extra_p)
+{
+ /*
+ * Find the pic distribution in the gop depending on the inter and intra
+ * frm intervals
+ */
+ i4_frms_in_gop[I_PIC] = 1;
+
+ /* All I frames */
+ if(i4_intra_frm_int == 1)
+ {
+ i4_frms_in_gop[P_PIC] = 0;
+ i4_frms_in_gop[B_PIC] = 0;
+ *pi4_b_in_incomp_subgop = 0;
+ *pi4_extra_p = 0;
+ }
+ else
+ {
+ if(i4_is_gop_closed)
+ {
+ i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 2) / i4_inter_frm_int)
+ + 1;
+
+ if((((i4_intra_frm_int - 2) / i4_inter_frm_int) * i4_inter_frm_int)
+ == (i4_intra_frm_int - 2))
+ {
+ *pi4_extra_p = 1;
+ }
+ else
+ {
+ *pi4_extra_p = 0;
+ }
+ }
+ else
+ {
+ i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 1) / i4_inter_frm_int);
+
+ *pi4_extra_p = 0;
+ }
+
+ i4_frms_in_gop[B_PIC] = (i4_intra_frm_int - 1 - i4_frms_in_gop[P_PIC]);
+
+ *pi4_b_in_incomp_subgop = (i4_frms_in_gop[B_PIC] - (i4_inter_frm_int - 1)
+ * ((i4_intra_frm_int - 1)/ i4_inter_frm_int));
+ }
+}
+
+WORD32 irc_pic_type_get_intra_frame_interval(pic_handling_t *ps_pic_handling)
+{
+
+ return (ps_pic_handling->i4_intra_frm_int);
+}
+
+WORD32 irc_pic_type_get_inter_frame_interval(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_inter_frm_int);
+}
+
+void irc_pic_type_get_rem_frms_in_gop(pic_handling_t *ps_pic_handling,
+ WORD32 ai4_rem_frms_in_gop[MAX_PIC_TYPE])
+{
+ memcpy(ai4_rem_frms_in_gop, ps_pic_handling->i4_rem_frms_in_gop,
+ sizeof(ps_pic_handling->i4_rem_frms_in_gop));
+}
+
+WORD32 irc_pic_type_get_frms_in_gop_force_I_frm(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_frames_in_fif_gop);
+}
+
+void irc_pic_type_get_frms_in_gop(pic_handling_t *ps_pic_handling,
+ WORD32 ai4_frms_in_gop[MAX_PIC_TYPE])
+{
+ memcpy(ai4_frms_in_gop, ps_pic_handling->i4_frms_in_cur_gop,
+ sizeof(ps_pic_handling->i4_frms_in_cur_gop));
+}
+
+WORD32 irc_pic_type_get_disp_order_no(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_pic_disp_order_no);
+}
+
+void irc_set_force_I_frame_flag(pic_handling_t *ps_pic_handling)
+{
+ ps_pic_handling->i4_force_I_frame = 1;
+}
+WORD32 irc_get_forced_I_frame_cur_frm_flag(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_forced_I_frame_cur_frame);
+}
+void irc_reset_forced_I_frame_cur_frm_flag(pic_handling_t *ps_pic_handling)
+{
+ ps_pic_handling->i4_forced_I_frame_cur_frame = 0;
+}
+
+/******************************************************************************/
+/* Functions that work on the encoded frames */
+/******************************************************************************/
+
+/******************************************************************************
+ Function Name : irc_update_pic_handling
+ Description : Will be called only for the frames to be encoded
+ *****************************************************************************/
+void irc_update_pic_handling(pic_handling_t *ps_pic_handling,
+ picture_type_e e_pic_type)
+{
+
+ WORD32 i4_max_inter_frm_int;
+ WORD32 i;
+
+ /* Initializing the local vars with that of the state struct */
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+
+ /* Update the variables working on the output frames */
+ /* Update the stack count */
+ ps_pic_handling->i4_stack_count++;
+
+ if(ps_pic_handling->i4_stack_count == (i4_max_inter_frm_int + 1))
+ {
+ ps_pic_handling->i4_stack_count = 0;
+ }
+
+ /* Update the rem_frms_in_gop */
+ ps_pic_handling->i4_rem_frms_in_gop[e_pic_type]--;
+
+ /* Assumption : Rem_frms_in_gop needs to be taken care of, for every change in frms */
+ ps_pic_handling->i4_last_frm_in_gop = 0;
+ if((ps_pic_handling->i4_rem_frms_in_gop[I_PIC] <= 0)
+ && (ps_pic_handling->i4_rem_frms_in_gop[P_PIC] <= 0)
+ && (ps_pic_handling->i4_rem_frms_in_gop[B_PIC] <= 0))
+ {
+ /* Copy the cur_frms_in_gop to the rem_frm_in_gop */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[i] =
+ ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+
+ ps_pic_handling->i4_last_frm_in_gop = 1;
+ ps_pic_handling->i4_first_gop_encoded = 1;
+ }
+}
+
+WORD32 irc_is_last_frame_in_gop(pic_handling_handle ps_pic_handling)
+{
+ return (ps_pic_handling->i4_last_frm_in_gop);
+}
+
+/******************************************************************************
+ Function Name : irc_skip_encoded_frame
+ Description : Needs to go to the current pic in the pic_stack.
+ If it's B_PIC don't do anything
+ If it's a reference picture, push all but the last B_PICs
+ in the current subgop one place down (i.e. just copy their
+ pic_details) and move the last B_PIC in that subgop to the
+ next slot of the skipped picture and convert it's pic_type
+ to that of the reference picture
+ *****************************************************************************/
+void irc_skip_encoded_frame(pic_handling_t *ps_pic_handling,
+ picture_type_e e_pic_type)
+{
+ pic_details_t s_pic_details;
+ WORD32 i4_stack_count, i4_next_ref_pic_idx, i4_pic_idx;
+ WORD32 i4_max_inter_frm_int, i4_last_b_pic_idx, i4_first_b_pic_idx;
+ WORD32 i4_next_pic_idx;
+
+ /* State variables used to initialize the local vars (Not to be changed) */
+ i4_stack_count = ps_pic_handling->i4_stack_count;
+ i4_next_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+
+ i4_next_pic_idx = ((i4_stack_count + 1) % (i4_max_inter_frm_int + 1));
+
+ /*
+ * Check what is the encoded frm_type
+ * Changing a B_PIC to a ref_pic is not reqd if
+ * there are no B_PICs referring from the skipped ref_pic
+ */
+ if(((e_pic_type == P_PIC) || (e_pic_type == I_PIC))
+ && (i4_next_pic_idx != i4_next_ref_pic_idx))
+ {
+ /* Go to the last B_PIC before the next_ref_pic */
+ if(i4_next_ref_pic_idx == 0)
+ {
+ i4_last_b_pic_idx = i4_max_inter_frm_int;
+ }
+ else
+ {
+ i4_last_b_pic_idx = (i4_next_ref_pic_idx - 1);
+ }
+
+ /* Keep a copy of the last B_PIC pic_details */
+ memcpy(&s_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_last_b_pic_idx],
+ sizeof(pic_details_t));
+
+ i4_pic_idx = i4_last_b_pic_idx;
+ i4_first_b_pic_idx = (i4_stack_count + 1) % (i4_max_inter_frm_int + 1);
+
+ /*
+ * All the B_PICs other than the last one, need to be shifted one place
+ * in the stack
+ */
+ while((i4_pic_idx != i4_stack_count)
+ && (i4_first_b_pic_idx != i4_last_b_pic_idx))
+ {
+ if(i4_pic_idx == 0)
+ {
+ i4_pic_idx = i4_max_inter_frm_int;
+ }
+ else
+ {
+ i4_pic_idx--;
+ }
+
+ memcpy(&ps_pic_handling->as_pic_stack[(i4_pic_idx + 1)
+ % (i4_max_inter_frm_int + 1)],
+ &ps_pic_handling->as_pic_stack[i4_pic_idx],
+ sizeof(pic_details_t));
+
+ }
+
+ /*
+ * Copy the last B_PIC pic_details to the first B_PIC place and change
+ * it's pic type to the ref_PIC
+ */
+ /*e_ref_pic_type*/
+ ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].e_pic_type = P_PIC;
+
+ ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_disp_order_no =
+ s_pic_details.i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_id =
+ s_pic_details.i4_pic_id;
+
+ /* Change the rem_frms_in_prd so that the update works properly */
+ if(ps_pic_handling->i4_rem_frms_in_gop[B_PIC] > 0)
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[B_PIC]--;
+ ps_pic_handling->i4_rem_frms_in_gop[P_PIC]++;
+ }
+ }
+
+}
+
+/******************************************************************************
+ Function Name : flush_frame
+ Description : Since when a flush frame is called, there will be no valid
+ frames after it, the last frame cannot be a B_PIC, as there
+ will be no reference frame for it (Input in display order)
+
+ So,this fxn needs to go to the last added pic in the pic_stack.
+ If it's reference pic don't do anything
+ If it's a B_PIC, copy it's pic_details and put it in the
+ place of the next reference pic, changing the pic_type to
+ P_PIC
+ *****************************************************************************/
+void irc_flush_frame_from_pic_stack(pic_handling_t *ps_pic_handling)
+{
+
+ pic_details_t s_prev_pic_details;
+
+ /* Get the last entered pic_details (not to be modified here) */
+ WORD32 i4_prev_b_pic_idx = ps_pic_handling->i4_prev_b_pic_idx;
+ WORD32 i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+ WORD32 i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+
+ memcpy(&s_prev_pic_details, &ps_pic_handling->s_prev_pic_details,
+ sizeof(pic_details_t));
+
+ if(s_prev_pic_details.e_pic_type == B_PIC)
+ {
+ /* Copy the last B_PIC details to the next reference pic in display order */
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no =
+ s_prev_pic_details.i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id =
+ s_prev_pic_details.i4_pic_id;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC;
+
+ /*
+ * Modify the last B_PIC pic_type, so that codec gets to know when
+ * all the buffered frames
+ * are flushed
+ */
+ ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].e_pic_type =
+ MAX_PIC_TYPE;
+ ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_id = -1;
+ ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_disp_order_no =
+ -1;
+ }
+ else
+ {
+ /*
+ * Modify the next pic_type details in the stack, so that codec gets to
+ * know when all the
+ * buffered frames are flushed
+ */
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = MAX_PIC_TYPE;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = -1;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no = -1;
+
+ if(ps_pic_handling->i4_inter_frm_int != 1)
+ {
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type =
+ MAX_PIC_TYPE;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = -1;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no =
+ -1;
+ }
+ }
+}
+
+/******************************************************************************
+ Function Name : irc_add_pic_to_stack_re_enc
+ Description : In case of a re-enc, we can assume the pictures to be coming
+ in the encode order.
+ In case of re-encoder basically, there are 2 problematic cases.
+ 1)Inter_frm_int is not known to start with
+ 2)Inter_frm_int can keep changing
+ 3)Intra_frm_int set by the application and that actually in the
+ decoded bitstream may be different
+ *****************************************************************************/
+WORD32 irc_add_pic_to_stack_re_enc(pic_handling_t *ps_pic_handling,
+ WORD32 i4_enc_pic_id,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_b_count_in_subgop;
+ WORD32 i4_max_inter_frm_int, i4_inter_frm_int, i4_intra_frm_int;
+ WORD32 i4_pic_disp_order_no;
+ WORD32 i4_is_gop_closed;
+ picture_type_e e_out_pic_type;
+ WORD32 i4_b_in_incomp_subgop;
+
+ /* Check if a change in intra_frm_int call has been made */
+ if(ps_pic_handling->i4_change_in_intra_frm_int == 1)
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_new_intra_frm_int,
+ ps_pic_handling->i4_inter_frm_int, 1);
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+ }
+
+ /* Check if a change in inter_frm_int call has been made */
+ if(ps_pic_handling->i4_change_in_inter_frm_int == 1)
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_intra_frm_int,
+ ps_pic_handling->i4_new_inter_frm_int, 1);
+
+ ps_pic_handling->i4_change_in_inter_frm_int = 0;
+ }
+
+ /* Initialize the local vars with the state vars */
+ i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+ i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+ i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop;
+
+ e_out_pic_type = e_pic_type;
+
+ /* Initially the rate_control assumes an IPP sequence */
+ if(e_pic_type == B_PIC)
+ {
+ /* Update the number of B_PICs in a subgop */
+ i4_b_count_in_subgop++;
+
+ if(i4_b_count_in_subgop > i4_max_inter_frm_int)
+ {
+ return (-1);
+ }
+
+ /* If the number of B_PICs exceed the set inter_frm_int then
+ change the inter_frm_int */
+ if(i4_b_count_in_subgop > (i4_inter_frm_int - 1))
+ {
+ i4_inter_frm_int = (i4_b_count_in_subgop + 1);
+
+ irc_update_pic_distbn(ps_pic_handling, i4_intra_frm_int,
+ i4_inter_frm_int, 0);
+ }
+ }
+ else if((e_pic_type == I_PIC) || (e_pic_type == P_PIC))
+ {
+ /* If the B_PICs in the prev subgop were fewer than the current
+ * (inter_frm_int-1) and none of these conditions occur, it'll mean the
+ * decrease in the inter_frm_int
+ * 1)End of a GOP
+ * 2)Beginning of an OPEN_GOP
+ */
+ if((i4_b_count_in_subgop < (i4_inter_frm_int - 1))
+ && !((!i4_is_gop_closed)
+ && (i4_b_count_in_subgop
+ >= i4_b_in_incomp_subgop))
+ && !((i4_pic_disp_order_no
+ + (i4_inter_frm_int - 1
+ - i4_b_count_in_subgop))
+ > i4_intra_frm_int))
+ {
+ i4_inter_frm_int = (i4_b_count_in_subgop + 1);
+
+ irc_update_pic_distbn(ps_pic_handling, i4_intra_frm_int,
+ i4_inter_frm_int, 0);
+ }
+
+ /* Reset the number of B_PICs in a subgop */
+ i4_b_count_in_subgop = 0;
+ }
+
+ /* Updation of the frame level vars */
+ i4_pic_disp_order_no++;
+
+ /* End of gop condition
+ *Two cases can arise :
+ *1) The intra_frm_int set by the application is greater than the actual
+ * bitstream intra_frm_int (i.e. we will get an I frame before
+ * pic_disp_order_no goes to intra_frm_int)
+ *2) The intra_frm_int set by the application is smaller than the actual bitstream intra_frm_int
+ * (i.e. we won't get an I_PIC even if pic_disp_order_no goes to
+ * intra_frm_int) Constraints :
+ * 1) I_PIC cannot be changed to B_PIC
+ * 2) B_PIC cannot be changed to I_PIC
+ */
+ if(i4_pic_disp_order_no >= i4_intra_frm_int)
+ {
+ if(e_pic_type != B_PIC)
+ {
+ e_out_pic_type = I_PIC;
+ }
+ else
+ {
+ e_out_pic_type = B_PIC;
+ ps_pic_handling->i4_rem_frms_in_gop[B_PIC]++;
+ ps_pic_handling->i4_frms_in_cur_gop[B_PIC]++;
+ ps_pic_handling->i4_frms_in_gop[B_PIC]++;
+ }
+ }
+ else
+ {
+ if((e_pic_type == I_PIC) && (!ps_pic_handling->i4_is_first_gop))
+ {
+ e_out_pic_type = P_PIC;
+ ps_pic_handling->i4_rem_frms_in_gop[P_PIC]++;
+ ps_pic_handling->i4_frms_in_cur_gop[P_PIC]++;
+ ps_pic_handling->i4_frms_in_gop[P_PIC]++;
+ }
+ else
+ {
+ e_out_pic_type = e_pic_type;
+ }
+ }
+
+ /* Update the frm_vars at the end of the gop */
+ if(i4_pic_disp_order_no
+ == (ps_pic_handling->i4_frms_in_cur_gop[P_PIC]
+ + ps_pic_handling->i4_frms_in_cur_gop[B_PIC]
+ + 1))
+ {
+ i4_pic_disp_order_no = 0;
+ ps_pic_handling->i4_is_first_gop = 0;
+ }
+
+ /* Update the vars working on the encoded pics */
+ if((ps_pic_handling->i4_is_first_gop)
+ && (ps_pic_handling->i4_stack_count == -1))
+ {
+ ps_pic_handling->i4_coded_pic_no = 0;
+ ps_pic_handling->i4_stack_count = 0;
+ }
+
+ /* Add the pic_details to the pic_stack */
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].e_pic_type =
+ e_out_pic_type;
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no =
+ ps_pic_handling->i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_id =
+ i4_enc_pic_id;
+
+ /* Writing back those values which need to be updated */
+ ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int;
+ ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no;
+ ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop;
+
+ return (0);
+}
diff --git a/encoder/irc_picture_type.h b/encoder/irc_picture_type.h
new file mode 100755
index 0000000..1af5424
--- /dev/null
+++ b/encoder/irc_picture_type.h
@@ -0,0 +1,95 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _PIC_HANDLING_H_
+#define _PIC_HANDLING_H_
+
+/*
+ * Basic Understanding:
+ * irc_add_pic_to_stack(_re_enc):
+ * This functions converts the input (or display) order to encoding order
+ * */
+typedef struct pic_handling_t *pic_handling_handle;
+
+WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_handle *pps_pic_handling,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_init_pic_handling(pic_handling_handle ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed);
+
+void irc_add_pic_to_stack(pic_handling_handle ps_pic_handling,
+ WORD32 i4_enc_pic_id);
+
+WORD32 irc_add_pic_to_stack_re_enc(pic_handling_handle ps_pic_handling,
+ WORD32 i4_enc_pic_id,
+ picture_type_e e_pic_type);
+
+void irc_get_pic_from_stack(pic_handling_handle ps_pic_handling,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type);
+
+WORD32 irc_is_last_frame_in_gop(pic_handling_handle ps_pic_handling);
+
+void irc_flush_frame_from_pic_stack(pic_handling_handle ps_pic_handling);
+
+/* NITT TBR The below two functions should be made a single function */
+void irc_skip_encoded_frame(pic_handling_handle ps_pic_handling,
+ picture_type_e e_pic_type);
+
+void irc_update_pic_handling(pic_handling_handle ps_pic_handling,
+ picture_type_e e_pic_type);
+
+/*
+ * Function returns the number of frames that have been encoded in the GOP in
+ * which the force I frame takes impact
+ */
+WORD32 irc_pic_type_get_frms_in_gop_force_I_frm(pic_handling_handle ps_pic_handling);
+
+void irc_set_force_I_frame_flag(pic_handling_handle ps_pic_handling);
+
+WORD32 irc_get_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling);
+
+void irc_reset_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling);
+
+/* Normal get functions */
+WORD32 irc_pic_type_get_inter_frame_interval(pic_handling_handle ps_pic_handling);
+
+WORD32 irc_pic_type_get_intra_frame_interval(pic_handling_handle ps_pic_handling);
+
+WORD32 irc_pic_type_get_disp_order_no(pic_handling_handle ps_pic_handling);
+
+void irc_pic_handling_register_new_int_frm_interval(pic_handling_handle ps_pic_handling,
+ WORD32 i4_intra_frm_int);
+
+void irc_pic_handling_register_new_inter_frm_interval(pic_handling_handle ps_pic_handling,
+ WORD32 i4_inter_frm_int);
+
+void irc_pic_type_get_rem_frms_in_gop(pic_handling_handle ps_pic_handling,
+ WORD32 ai4_rem_frms_in_gop[MAX_PIC_TYPE]);
+
+void irc_pic_type_get_frms_in_gop(pic_handling_handle ps_pic_handling,
+ WORD32 ai4_frms_in_gop[MAX_PIC_TYPE]);
+
+#endif /* _PIC_HANDLING_H_ */
+
diff --git a/encoder/irc_rate_control_api.c b/encoder/irc_rate_control_api.c
new file mode 100755
index 0000000..6c6586e
--- /dev/null
+++ b/encoder/irc_rate_control_api.c
@@ -0,0 +1,1600 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include "stdio.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_mb_model_based.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_vbr_str_prms.h"
+#include "irc_rate_control_api.h"
+#include "irc_rate_control_api_structs.h"
+#include "irc_trace_support.h"
+
+#define DEV_Q 4 /*Q format(Shift) for Deviation range factor */
+#define HI_DEV_FCTR 22 /* 1.4*16 */
+#define LO_DEV_FCTR 12 /* 0.75*16 */
+#define GET_HI_DEV_QP(Qprev) (( ((WORD32) Qprev)*HI_DEV_FCTR + (1<<(DEV_Q-1)))>>DEV_Q)
+#define GET_LO_DEV_QP(Qprev) (( ((WORD32) Qprev)*LO_DEV_FCTR + (1<<(DEV_Q-1)))>>DEV_Q)
+#define CLIP_QP(Qc, hi_d, lo_d) (((Qc) < (lo_d))?((lo_d)):(((Qc) > (hi_d))?(hi_d):(Qc)))
+
+/*****************************************************************************/
+/* Restricts the quantization parameter variation within delta */
+/*****************************************************************************/
+/* static WORD32 restrict_swing(WORD32 cur_qp, WORD32 prev_qp, WORD32 delta_qp)
+ {
+ if((cur_qp) - (prev_qp) > (delta_qp)) (cur_qp) = (prev_qp) + (delta_qp) ;
+ if((prev_qp) - (cur_qp) > (delta_qp)) (cur_qp) = (prev_qp) - (delta_qp) ;
+ return cur_qp;
+ }*/
+
+/*****************************************************************************
+ Function Name : rate_control_get_init_free_memtab
+ Description : Takes or gives memtab
+ Inputs : pps_rate_control_api - pointer to RC api pointer
+ ps_memtab - Memtab pointer
+ i4_use_base - Set during init, else 0
+ i4_fill_base - Set during free, else 0
+ *****************************************************************************/
+WORD32 irc_rate_control_num_fill_use_free_memtab(rate_control_handle *pps_rate_control_api,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0, i;
+ static rate_control_api_t s_temp_rc_api;
+
+ /*
+ * Hack for al alloc, during which we dont have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_rate_control_api) = &s_temp_rc_api;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(rate_control_api_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_rate_control_api,
+ e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ /* Get the memory requirement of lower modules */
+ i4_mem_tab_idx += irc_ba_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_bit_allocation,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += irc_cbr_buffer_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_cbr_buffer,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += irc_est_sad_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_est_sad,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += irc_mbrc_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_mb_rate_control,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += irc_vbr_vbv_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_vbr_storage_vbv,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_mem_tab_idx += irc_rd_model_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->aps_rd_model[i],
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+ }
+ i4_mem_tab_idx += irc_pic_handling_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_pic_handling,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ return (i4_mem_tab_idx);
+}
+
+/*****************************************************************************
+ Function Name : irc_initialise_rate_control
+ Description : Initialise the rate control structure
+ Inputs : ps_rate_control_api - api struct
+ e_rate_control_type - VBR, CBR (NLDRC/LDRC), VBR_STREAMING
+ u1_is_mb_level_rc_on - enabling mb level RC
+ u4_avg_bit_rate - bit rate to achieved across the entire
+ file size
+ u4_peak_bit_rate - max possible drain rate
+ u4_frame_rate - number of frames in 1000 seconds
+ u4_intra_frame_interval - num frames between two I frames
+ *au1_init_qp - init_qp for I,P,B
+ *****************************************************************************/
+void irc_initialise_rate_control(rate_control_api_t *ps_rate_control_api,
+ rc_type_e e_rate_control_type,
+ UWORD8 u1_is_mb_level_rc_on,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 *pu4_peak_bit_rate,
+ UWORD32 u4_min_bit_rate,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD8 *pu1_init_qp,
+ UWORD32 u4_max_vbv_buff_size,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ UWORD8 *pu1_min_max_qp,
+ WORD32 i4_use_est_intra_sad,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks)
+{
+ WORD32 i;
+ UWORD32 u4_frms_in_delay_prd = (u4_frame_rate * u4_max_delay) / 1000000;
+ ps_rate_control_api->e_rc_type = e_rate_control_type;
+ ps_rate_control_api->u1_is_mb_level_rc_on = u1_is_mb_level_rc_on;
+
+ trace_printf((const WORD8*)"RC type = %d\n", e_rate_control_type);
+
+ /* Set the avg_bitrate_changed flag for each pic_type to 0 */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_avg_bitrate_changed[i] = 0;
+ }
+
+ /* Initialize the pic_handling module */
+ irc_init_pic_handling(ps_rate_control_api->ps_pic_handling,
+ (WORD32)u4_intra_frame_interval, i4_max_inter_frm_int,
+ i4_is_gop_closed);
+
+ /*** Initialize the rate control modules ***/
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+ /* Initialize the model parameter structures */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ irc_init_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[i],
+ MAX_FRAMES_MODELLED);
+ }
+
+ /* Initialize the buffer mechanism */
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP))
+ {
+ /* Assuming both the peak bit rates are same for a VBR_STORAGE and
+ VBR_STORAGE_DVD_COMP */
+ if(pu4_peak_bit_rate[0] != pu4_peak_bit_rate[1])
+ {
+ trace_printf((const WORD8*)"For VBR_STORAGE and VBR_STORAGE_DVD_COMP the peak bit rates should be same\n");
+ }
+ irc_init_vbr_vbv(ps_rate_control_api->ps_vbr_storage_vbv,
+ (WORD32)pu4_peak_bit_rate[0],
+ (WORD32)u4_frame_rate,
+ (WORD32)u4_max_vbv_buff_size);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ UWORD32 u4_avg_bit_rate_copy[MAX_NUM_DRAIN_RATES];
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ u4_avg_bit_rate_copy[i] = u4_avg_bit_rate;
+ }
+ /* In case of CBR the num pics in delay is ignored */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ au4_num_pics_in_delay_prd[i] = 0;
+
+ irc_init_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+ u4_max_delay, u4_frame_rate,
+ (WORD32 *)u4_avg_bit_rate_copy,
+ au4_num_pics_in_delay_prd,
+ u4_max_vbv_buff_size);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ irc_init_vbv_str_prms(&ps_rate_control_api->s_vbr_str_prms,
+ u4_intra_frame_interval, u4_src_ticks,
+ u4_tgt_ticks, u4_frms_in_delay_prd);
+
+ /* Get the number of pics of each type in delay period */
+ irc_get_vsp_num_pics_in_dly_prd(
+ &ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+
+ irc_init_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+ u4_max_delay, u4_frame_rate,
+ (WORD32 *)pu4_peak_bit_rate,
+ au4_num_pics_in_delay_prd,
+ u4_max_vbv_buff_size);
+ }
+
+ /* Initialize the SAD estimation module */
+ irc_init_est_sad(ps_rate_control_api->ps_est_sad, i4_use_est_intra_sad);
+
+ /* Initialize the bit allocation module according to VBR or CBR */
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP))
+ {
+ irc_ba_init_bit_allocation(ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ VBR_BIT_ALLOC_PERIOD, u4_avg_bit_rate,
+ u4_frame_rate,
+ (WORD32 *)pu4_peak_bit_rate,
+ u4_min_bit_rate);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ irc_ba_init_bit_allocation(ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ CBR_BIT_ALLOC_PERIOD, u4_avg_bit_rate,
+ u4_frame_rate,
+ (WORD32 *)pu4_peak_bit_rate,
+ u4_min_bit_rate);
+ }
+
+ /*
+ * u1_scd_detected will be initialized to 1 when a Scene change is
+ * detected
+ */
+ ps_rate_control_api->u1_scd_detected = 0;
+ }
+
+ /* Initialize the init_qp */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_init_qp[i] = pu1_init_qp[i];
+ ps_rate_control_api->au1_prev_frm_qp[i] = pu1_init_qp[i];
+ ps_rate_control_api->au1_min_max_qp[(i << 1)] =
+ pu1_min_max_qp[(i << 1)];
+ ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i
+ << 1) + 1];
+ }
+
+ /* Initialize the is_first_frm_encoded */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_is_first_frm_coded[i] = 0;
+ }
+ ps_rate_control_api->u1_is_first_frm = 1;
+
+ /*
+ * Control flag for delayed impact after a change in peak bitrate has been
+ * made
+ */
+ ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change = 0;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[i] = pu4_peak_bit_rate[i];
+ }
+
+ /* Initialize the mb level rate control module */
+ irc_init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control);
+ ps_rate_control_api->i4_prev_frm_est_bits = u4_avg_bit_rate * 1000
+ / u4_frame_rate;
+
+ ps_rate_control_api->prev_ref_pic_type = I_PIC;
+}
+
+/******************************************************************************
+ *Description : calls irc_add_pic_to_stack
+ ******************************************************************************/
+void irc_add_picture_to_stack(rate_control_api_t *rate_control_api,
+ WORD32 i4_enc_pic_id)
+{
+ /* Call the routine to add the pic to stack in encode order */
+ irc_add_pic_to_stack(rate_control_api->ps_pic_handling, i4_enc_pic_id);
+}
+
+void irc_add_picture_to_stack_re_enc(rate_control_api_t *rate_control_api,
+ WORD32 i4_enc_pic_id,
+ picture_type_e e_pic_type)
+{
+ /*
+ * In case of a re-encoder, the pics will come in the encode order itself.
+ * So, there is no need to buffer the pics up
+ */
+ irc_add_pic_to_stack_re_enc(rate_control_api->ps_pic_handling,
+ i4_enc_pic_id, e_pic_type);
+}
+
+/*******************************************************************************
+ Description : Decides the picture type based on the state
+ ******************************************************************************/
+void irc_get_picture_details(rate_control_handle rate_control_api,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type)
+{
+ /* Call to get the pic_details */
+ irc_get_pic_from_stack(rate_control_api->ps_pic_handling, pi4_pic_id,
+ pi4_pic_disp_order_no, pe_pic_type);
+}
+
+/*******************************************************************************
+ * Description : Gets the frame level qp for the given picture type
+ ******************************************************************************/
+UWORD8 irc_get_frame_level_qp(rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 i4_ud_max_bits)
+{
+ UWORD8 u1_frame_qp, i;
+
+ if((ps_rate_control_api->e_rc_type != VBR_STORAGE)
+ && (ps_rate_control_api->e_rc_type != VBR_STORAGE_DVD_COMP)
+ && (ps_rate_control_api->e_rc_type != CBR_NLDRC)
+ && (ps_rate_control_api->e_rc_type != CONST_QP)
+ && (ps_rate_control_api->e_rc_type != VBR_STREAMING))
+ {
+ trace_printf((const WORD8*)(const WORD8*)" Only VBR,NLDRC and CONST QP supported for now \n");
+ return (0);
+ }
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ UWORD8 u1_is_first_frm_coded = 1;
+
+ /* Check whether at least one frame of a each picture type gets encoded*/
+ /* Check whether it is an IPP or IPB kind of encoding */
+ if((ps_rate_control_api->au1_is_first_frm_coded[I_PIC]
+ && ps_rate_control_api->au1_is_first_frm_coded[P_PIC])
+ || ((irc_pic_type_get_intra_frame_interval(
+ ps_rate_control_api->ps_pic_handling)
+ == 1)
+ && (ps_rate_control_api->au1_is_first_frm_coded[I_PIC])))
+ {
+ if(e_pic_type != B_PIC)
+ u1_is_first_frm_coded = 1;
+ else
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ u1_is_first_frm_coded &=
+ ps_rate_control_api->au1_is_first_frm_coded[i];
+ }
+ }
+ }
+ else
+ {
+ u1_is_first_frm_coded = 0;
+ }
+
+ if(u1_is_first_frm_coded)
+ {
+ WORD32 i4_cur_est_texture_bits, i4_cur_est_header_bits;
+ WORD32 i4_cur_est_bits;
+ UWORD32 u4_estimated_sad;
+
+ /* Force I frame updation of rem_bits_in_frame*/
+ if(irc_get_forced_I_frame_cur_frm_flag(
+ ps_rate_control_api->ps_pic_handling) == 1)
+ {
+ irc_ba_change_rem_bits_in_prd_at_force_I_frame(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling);
+ irc_reset_forced_I_frame_cur_frm_flag(
+ ps_rate_control_api->ps_pic_handling);
+ }
+
+ /* Get the estimated texture bits allocated for the current frame*/
+ i4_cur_est_texture_bits = irc_ba_get_cur_frm_est_texture_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->aps_rd_model,
+ ps_rate_control_api->ps_est_sad,
+ ps_rate_control_api->ps_pic_handling, e_pic_type);
+
+ /* Get the estimated header bits*/
+ i4_cur_est_header_bits = irc_ba_get_cur_frm_est_header_bits(
+ ps_rate_control_api->ps_bit_allocation, e_pic_type);
+
+ /* Total estimated bits */
+ i4_cur_est_bits = i4_cur_est_header_bits + i4_cur_est_texture_bits;
+
+ trace_printf((const WORD8*)"ft %d, etb = %d, eb %d, ", e_pic_type,
+ i4_cur_est_texture_bits, i4_cur_est_bits);
+
+ /* Threshold the estimated bits based on the buffer fullness*/
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ {
+ WORD32 i4_cur_frm_max_bit_possible;
+ i4_cur_frm_max_bit_possible = irc_get_max_target_bits(
+ ps_rate_control_api->ps_vbr_storage_vbv);
+
+ if(i4_cur_est_bits > i4_cur_frm_max_bit_possible)
+ {
+ /* Assuming header would consume the same amount of bits */
+ i4_cur_est_texture_bits = i4_cur_frm_max_bit_possible
+ - i4_cur_est_header_bits;
+ }
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ WORD32 i4_rem_bits_in_gop, i4_rem_frms_in_gop, i;
+ WORD32 i4_cur_frm_max_bit_possible,
+ ai4_rem_frms_in_gop[MAX_PIC_TYPE];
+ irc_pic_type_get_rem_frms_in_gop(
+ ps_rate_control_api->ps_pic_handling,
+ ai4_rem_frms_in_gop);
+ i4_rem_bits_in_gop = irc_get_rem_bits_in_period(
+ ps_rate_control_api);
+ i4_rem_frms_in_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ i4_rem_frms_in_gop += ai4_rem_frms_in_gop[i];
+
+ /* Threshold the bits based on estimated buffer fullness */
+ i4_cur_frm_max_bit_possible = irc_get_max_tgt_bits_dvd_comp(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ i4_rem_bits_in_gop, i4_rem_frms_in_gop,
+ e_pic_type);
+
+ if(i4_cur_est_bits > i4_cur_frm_max_bit_possible)
+ {
+ /* Assuming header would consume the same amount of bits */
+ i4_cur_est_texture_bits = i4_cur_frm_max_bit_possible
+ - i4_cur_est_header_bits;
+
+ }
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ WORD32 i4_cur_frm_bits_acc_buffer =
+ irc_cbr_buffer_constraint_check(
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_cur_est_bits, e_pic_type);
+
+ /* Assuming the header would consume the same amount of bits */
+ i4_cur_est_texture_bits = i4_cur_frm_bits_acc_buffer
+ - i4_cur_est_header_bits;
+
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ WORD32 i4_cur_frm_bits_acc_buffer =
+ irc_vbr_stream_buffer_constraint_check(
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_cur_est_bits, e_pic_type);
+
+ /* Assuming the header would consume the same amount of bits */
+ i4_cur_est_texture_bits = i4_cur_frm_bits_acc_buffer
+ - i4_cur_est_header_bits;
+ }
+
+ trace_printf((const WORD8*)"emtb = %d, ", i4_cur_est_texture_bits);
+
+ /*
+ * If the estimated texture bits go to values less than zero
+ * due to buffer underflow, make the estimated target bits to go
+ * to zero
+ */
+ if(i4_cur_est_texture_bits < 0)
+ i4_cur_est_texture_bits = 0;
+
+ ps_rate_control_api->i4_prev_frm_est_bits = (i4_cur_est_texture_bits
+ + i4_cur_est_header_bits);
+
+ /* Clip est_texture_bits according to the user-defined max value */
+ if((i4_cur_est_texture_bits
+ > (i4_ud_max_bits - i4_cur_est_header_bits))
+ && (e_pic_type != I_PIC))
+ {
+ i4_cur_est_texture_bits = (i4_ud_max_bits
+ - i4_cur_est_header_bits);
+ trace_printf((const WORD8*)"udcb = %d, ",
+ i4_ud_max_bits - i4_cur_est_header_bits);
+ }
+
+ /* Calculate the estimated SAD for corresponding frame*/
+ u4_estimated_sad = irc_get_est_sad(ps_rate_control_api->ps_est_sad,
+ e_pic_type);
+
+ /* Query the model for the Qp for the corresponding frame*/
+
+ /*
+ * The check is because the model gives a negative QP when the
+ * i4_cur_est_texture_bits is less than or equal to 0
+ * [This is a bug in the model]. As a temporary fix, the frame QP
+ * is being set to the max QP allowed
+ */
+ if(i4_cur_est_texture_bits > 0)
+ {
+ u1_frame_qp = irc_find_qp_for_target_bits(
+ ps_rate_control_api->aps_rd_model[e_pic_type],
+ i4_cur_est_texture_bits,
+ u4_estimated_sad,
+ ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1)],
+ ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1) + 1]);
+ }
+ else
+ {
+ u1_frame_qp = ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1) + 1];
+ }
+
+ trace_printf((const WORD8*)"ehb %d, etb %d, fqp %d, es %d, eb %d, ",
+ i4_cur_est_header_bits, i4_cur_est_texture_bits,
+ u1_frame_qp, u4_estimated_sad, i4_cur_est_bits);
+
+ /* Restricting the QP swing if the average bit rate has changed */
+ if(ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] == 0)
+ {
+ WORD32 prev_qp;
+ WORD32 hi_dev_qp, lo_dev_qp;
+ /* Restricting the qp swing */
+ prev_qp = ps_rate_control_api->au1_prev_frm_qp[ps_rate_control_api->prev_ref_pic_type];
+
+ if(ps_rate_control_api->prev_ref_pic_type != e_pic_type)
+ {
+ if(e_pic_type == I_PIC)
+ {
+ /*
+ * Constrain I-frame QP to be within specified limit of
+ * prev_ref_qp/Kp
+ */
+ prev_qp = (P_TO_I_RATIO * prev_qp + (1 << (K_Q - 1)))
+ >> (K_Q);
+ }
+ else if(e_pic_type == P_PIC)
+ {
+ /*
+ * Constrain P-frame QP to be within specified limit of
+ * Kp*prev_ref_qp
+ */
+ prev_qp = (I_TO_P_RATIO * prev_qp + (1 << (K_Q - 1)))
+ >> (K_Q);
+ }
+ else if(ps_rate_control_api->prev_ref_pic_type == P_PIC)
+ {
+ /* current frame is B-pic */
+ /* Constrain B-frame QP to be within specified limit of
+ * prev_ref_qp/Kb
+ */
+ prev_qp = (P_TO_B_RATIO * prev_qp + (1 << (K_Q - 1)))
+ >> (K_Q);
+ }
+ else /* if(ps_rate_control_api->prev_ref_pic_type == I_PIC*/
+ {
+ /* current frame is B-pic */
+ /*
+ * Constrain B-frame QP to be within specified limit of
+ * prev_ref_qp/Kb
+ */
+ prev_qp = (P_TO_B_RATIO * I_TO_P_RATIO * prev_qp
+ + (1 << (K_Q + K_Q - 1)))
+ >> (K_Q + K_Q);
+ }
+ }
+
+ hi_dev_qp = GET_HI_DEV_QP(prev_qp);
+ /*
+ * For lower QPs due to scale factor and fixed point arithmetic,
+ * the hi_dev_qp can be same as that of the prev qp and in which
+ * case it gets stuck in the lower most qp and thus not allowing
+ * QPs not to change. To avoid this,for lower qps the hi_dev_qp
+ * should be made slightly more than prev_qp
+ */
+ if(prev_qp == hi_dev_qp)
+ {
+ hi_dev_qp += 1;
+ }
+ lo_dev_qp = GET_LO_DEV_QP(prev_qp);
+ u1_frame_qp = (UWORD8)CLIP_QP((WORD32)u1_frame_qp, hi_dev_qp, lo_dev_qp);
+ }
+ else
+ {
+ ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] = 0;
+ }
+ }
+ else
+ {
+ /*
+ * The u1_is_first_frm_coded gets reset
+ * a) at start of sequence
+ * b) whenever there is a scene change.
+ * In both cases since we do not have any estimate about the
+ * current frame, we just send in the previous frame qp value.IN
+ * Scene change case the previous QP is incremented by 4 , This is
+ * done because the Scene changed VOP will have over consumed and
+ * chances of future frames skipping is very high. For the init
+ * case, the previous frame QP is initialized with the init qp
+ */
+ if((ps_rate_control_api->u1_scd_detected)
+ && (ps_rate_control_api->e_rc_type != CONST_QP))
+ {
+ /*
+ * If scene change is detected, I frame Qp would have been
+ * updated
+ */
+ /* Use a QP calculated in the prev update fxn */
+ u1_frame_qp = ps_rate_control_api->u1_frm_qp_after_scd;
+ }
+ else
+ {
+ u1_frame_qp = ps_rate_control_api->au1_prev_frm_qp[e_pic_type];
+ }
+ }
+ }
+ else
+ {
+ u1_frame_qp = ps_rate_control_api->au1_init_qp[e_pic_type];
+ }
+
+ trace_printf((const WORD8*)"fqp %d\n", u1_frame_qp);
+
+ return (u1_frame_qp);
+}
+
+/*******************************************************************************
+ *Function Name : irc_get_buffer_status
+ *Description : Gets the state of VBV buffer
+ *Outputs : 0 = normal, 1 = underflow, 2= overflow
+ *Returns : vbv_buf_status_e
+ ******************************************************************************/
+vbv_buf_status_e irc_get_buffer_status(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_total_frame_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow)
+{
+ vbv_buf_status_e e_buf_status = VBV_NORMAL;
+
+ /* Get the buffer status for the current total consumed bits and error bits*/
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ e_buf_status = irc_get_vbv_buffer_status(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ i4_total_frame_bits,
+ pi4_num_bits_to_prevent_vbv_underflow);
+
+ trace_printf((const WORD8*)"e_buf_status = %d\n", e_buf_status);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ {
+ /* For VBR case since there is not underflow returning the max value */
+ pi4_num_bits_to_prevent_vbv_underflow[0] = irc_get_max_vbv_buf_size(
+ ps_rate_control_api->ps_vbr_storage_vbv);
+ e_buf_status = VBV_NORMAL;
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ e_buf_status = irc_get_cbr_buffer_status(
+ ps_rate_control_api->ps_cbr_buffer, i4_total_frame_bits,
+ pi4_num_bits_to_prevent_vbv_underflow, e_pic_type);
+
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ /* For VBR_streaming, error bits are computed according to peak bitrate*/
+ e_buf_status = irc_get_cbr_buffer_status(
+ ps_rate_control_api->ps_cbr_buffer, i4_total_frame_bits,
+ pi4_num_bits_to_prevent_vbv_underflow, e_pic_type);
+ }
+ return e_buf_status;
+}
+
+/*******************************************************************************
+ Function Name : irc_update_pic_handling_state
+ Description : If the forward path and the backward path of rate control
+ ******************************************************************************/
+void irc_update_pic_handling_state(rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type)
+{
+ irc_update_pic_handling(ps_rate_control_api->ps_pic_handling, e_pic_type);
+}
+
+/******************************************************************************
+ Function Name : irc_update_frame_level_info
+ Description : Updates the frame level information into the rate control
+ structure
+ ******************************************************************************/
+void irc_update_frame_level_info(rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_mb_type_sad,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ WORD32 *pi4_mb_type_tex_bits,
+ WORD32 *pi4_tot_mb_type_qp,
+ WORD32 *pi4_tot_mb_in_type,
+ WORD32 i4_avg_activity,
+ UWORD8 u1_is_scd,
+ WORD32 i4_is_it_a_skip,
+ WORD32 i4_intra_frm_cost,
+ WORD32 i4_is_pic_handling_done)
+{
+ UWORD8 u1_num_skips = 0;
+ WORD32 i;
+ UWORD32 u4_frame_sad = 0;
+ WORD32 i4_tot_texture_bits = 0;
+ WORD32 i4_tot_mbs = 0;
+ WORD32 i4_avg_qp = 0;
+
+ /* SCD not supported in case of IPB encoder */
+ if(u1_is_scd && (irc_pic_type_get_inter_frame_interval(
+ ps_rate_control_api->ps_pic_handling) > 1))
+ {
+ u1_is_scd = 0;
+ }
+ trace_printf((const WORD8*)"i4_total_frame_bits %d\n", i4_total_frame_bits);
+
+ if(!i4_is_it_a_skip && !i4_is_pic_handling_done)
+ {
+ /* Update the pic_handling struct */
+ irc_update_pic_handling(ps_rate_control_api->ps_pic_handling,
+ e_pic_type);
+ }
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ if(!i4_is_it_a_skip)
+ {
+ WORD32 i4_new_period_flag;
+ /******************************************************************
+ Calculate the total values from the individual values
+ ******************************************************************/
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ u4_frame_sad += pi4_mb_type_sad[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_tot_texture_bits += pi4_mb_type_tex_bits[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_avg_qp += pi4_tot_mb_type_qp[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_tot_mbs += pi4_tot_mb_in_type[i];
+ i4_avg_qp /= i4_tot_mbs; /* Calculate the average QP */
+
+ if(ps_rate_control_api->u1_is_mb_level_rc_on)
+ {
+ /*
+ * The model needs to take into consideration the average
+ * activity of the entire frame while estimating the QP. Thus
+ * the frame sad values are scaled by the average activity
+ * before updating it into the model.
+ */
+ if(!i4_avg_activity)
+ i4_avg_activity = 1;
+ i4_intra_frm_cost *= i4_avg_activity;
+ u4_frame_sad *= i4_avg_activity;
+ }
+
+ /******************************************************************
+ Update the bit allocation module
+ NOTE: For bit allocation module, the pic_type should not be
+ modified to that of 'I', in case of a SCD.
+ ******************************************************************/
+ i4_new_period_flag = irc_is_last_frame_in_gop(
+ ps_rate_control_api->ps_pic_handling);
+ irc_ba_update_cur_frm_consumed_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ i4_total_frame_bits, i4_model_updation_hdr_bits,
+ e_pic_type, u1_is_scd, i4_new_period_flag);
+
+ if(1 == i4_new_period_flag
+ && ((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP)))
+ {
+ irc_ba_check_and_update_bit_allocation(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ irc_get_cur_vbv_buf_size(
+ ps_rate_control_api->ps_vbr_storage_vbv),
+ irc_get_max_vbv_buf_size(
+ ps_rate_control_api->ps_vbr_storage_vbv),
+ irc_get_max_bits_per_tgt_frm(
+ ps_rate_control_api->ps_vbr_storage_vbv),
+ i4_total_frame_bits);
+ }
+ }
+
+ /**********************************************************************
+ Update the buffer status
+ *********************************************************************/
+ /*
+ * This update is done after overflow and underflow handling to
+ * account for the actual bits dumped
+ */
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP))
+ {
+ irc_update_vbr_vbv(ps_rate_control_api->ps_vbr_storage_vbv,
+ i4_total_frame_bits);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ irc_update_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+ i4_total_frame_bits, e_pic_type);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+ irc_get_vsp_num_pics_in_dly_prd(
+ &ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+
+ irc_update_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+ i4_total_frame_bits, e_pic_type);
+
+ irc_update_vbr_str_prms(&ps_rate_control_api->s_vbr_str_prms,
+ e_pic_type);
+
+ irc_change_cbr_vbv_num_pics_in_delay_period(
+ ps_rate_control_api->ps_cbr_buffer,
+ au4_num_pics_in_delay_prd);
+
+ /*
+ * If the change_in_peak_bitrate flag is set, after the delay period
+ * update the peak_bitrate and the buffer parameters
+ */
+ if(!ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change)
+ {
+ irc_ba_change_ba_peak_bit_rate(
+ ps_rate_control_api->ps_bit_allocation,
+ (WORD32 *)&ps_rate_control_api->au4_new_peak_bit_rate[0]);
+ irc_change_cbr_vbv_bit_rate(
+ ps_rate_control_api->ps_cbr_buffer,
+ (WORD32 *)&ps_rate_control_api->au4_new_peak_bit_rate[0]);
+ }
+ if(ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change)
+ ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change--;
+ }
+
+ if(!i4_is_it_a_skip)
+ {
+ /*******************************************************************
+ Handle the SCENE CHANGE DETECTED
+ 1) Make the picture type as I, so that updation happens as if it is
+ an I frame
+ 2) Reset model, SAD and flag to restart the estimation process
+ ******************************************************************/
+ if(u1_is_scd)
+ {
+ WORD32 i4_frm_qp_after_scd;
+ UWORD32 u4_prev_I_frm_sad;
+
+ e_pic_type = I_PIC;
+
+ /* Scale scd qp based on SCD Frm sad and previous I Frm sad */
+ /* frm_qp_after_scd = (avg_qp * cur_frm_sad)/prev_I_frm_sad */
+
+ /*
+ * QP for the next frame should take care of
+ * 1) due to scene change, the current picture has consumed more
+ * bits
+ * 2) relative complexity of the previous scene and the current
+ * scene
+ */
+
+ /* Get the intra SAD for the previous scene */
+ u4_prev_I_frm_sad = irc_get_est_sad(
+ ps_rate_control_api->ps_est_sad, I_PIC);
+
+ /*
+ * Scale the QP based on the SAD ratio of the current pic and
+ * previous scene intra SAD
+ */
+ X_PROD_Y_DIV_Z(i4_avg_qp, u4_frame_sad, u4_prev_I_frm_sad,
+ i4_frm_qp_after_scd);
+
+ /* Limit the next frame qp by 50% across both the sides */
+ if(i4_frm_qp_after_scd > ((i4_avg_qp * 3) >> 1))
+ {
+ i4_frm_qp_after_scd = (i4_avg_qp * 3) >> 1;
+ }
+ else if(i4_frm_qp_after_scd < (i4_avg_qp >> 1))
+ {
+ i4_frm_qp_after_scd = (i4_avg_qp >> 1);
+ }
+
+ /*
+ * Ensure that the next frame QP is within the min_max limit of
+ * QP allowed
+ */
+ if(i4_frm_qp_after_scd
+ > ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1) + 1])
+ {
+ i4_frm_qp_after_scd =
+ ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1) + 1];
+ }
+ else if(i4_frm_qp_after_scd
+ < ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1)])
+ {
+ i4_frm_qp_after_scd =
+ ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1)];
+ }
+
+ /* Update the state var */
+ ps_rate_control_api->u1_frm_qp_after_scd =
+ (UWORD8)i4_frm_qp_after_scd;
+
+ /* re-set model */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ irc_reset_frm_rc_rd_model(
+ ps_rate_control_api->aps_rd_model[i]);
+ }
+
+ /* Reset the SAD estimation module */
+ irc_reset_est_sad(ps_rate_control_api->ps_est_sad);
+
+ /* Reset flag */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_is_first_frm_coded[i] = 0;
+ }
+
+ /* Reset the MB Rate control */
+ irc_init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control);
+
+ /*Set u1_scd_detected flag*/
+ ps_rate_control_api->u1_scd_detected = 1;
+
+ /*
+ * Adjust the average QP for the frame based on bits
+ * consumption
+ */
+ /*
+ * Initialize the QP for each picture type according to the
+ * average QP of the SCD pic
+ */
+ ps_rate_control_api->au1_prev_frm_qp[I_PIC] = (UWORD8)i4_avg_qp;
+
+ trace_printf((const WORD8*)"SCD DETECTED\n");
+ }
+ else
+ {
+ ps_rate_control_api->u1_scd_detected = 0;
+ /**************************************************************
+ Update the Qp used by the current frame
+ **************************************************************/
+ ps_rate_control_api->au1_prev_frm_qp[e_pic_type] =
+ (UWORD8)i4_avg_qp;
+ }
+
+ /********************************************************************
+ Update the model of the correponding picture type
+ NOTE: For SCD, we force the frame type from 'P' to that of a 'I'
+ ******************************************************************/
+ /*
+ * For very simple sequences no bits are consumed by texture. These
+ * frames do not add any information to the model and so not added
+ */
+ if(i4_tot_texture_bits && u4_frame_sad)
+ {
+ irc_add_frame_to_rd_model(
+ ps_rate_control_api->aps_rd_model[e_pic_type],
+ i4_tot_texture_bits, (UWORD8)i4_avg_qp,
+ u4_frame_sad, u1_num_skips);
+
+ /*
+ * At least one proper frame in added into the model. Until that
+ * keep using the initial QP
+ */
+ ps_rate_control_api->au1_is_first_frm_coded[e_pic_type] = 1;
+ }
+
+ if(i4_avg_activity)
+ {
+ /* Update the mb_level model */
+ irc_mb_update_frame_level(
+ ps_rate_control_api->ps_mb_rate_control,
+ i4_avg_activity);
+ }
+
+ /******************************************************************
+ Update the sad estimation module
+ NOTE: For SCD, we force the frame type from 'P' to that of a 'I'
+ ******************************************************************/
+ if(u4_frame_sad)
+ {
+ irc_update_actual_sad(ps_rate_control_api->ps_est_sad,
+ u4_frame_sad, e_pic_type);
+
+ irc_update_actual_sad_for_intra(ps_rate_control_api->ps_est_sad,
+ i4_intra_frm_cost);
+ }
+
+ /*
+ * Update the variable which denotes that a frame has been
+ * encountered
+ */
+ ps_rate_control_api->u1_is_first_frm = 0;
+
+ }
+ }
+
+ /* Store the prev encoded picture type for restricting Qp swing */
+ if((e_pic_type == I_PIC) || (e_pic_type == P_PIC))
+ {
+ ps_rate_control_api->prev_ref_pic_type = e_pic_type;
+ }
+
+ trace_printf((const WORD8*)"ft %d,hb %d,tb %d,qp %d,fs %d\n", e_pic_type,
+ i4_model_updation_hdr_bits, i4_tot_texture_bits, i4_avg_qp,
+ u4_frame_sad);
+
+ return;
+}
+
+/*******************************************************************************
+ MB Level API functions
+ ******************************************************************************/
+
+/******************************************************************************
+ Function Name : irc_init_mb_rc_frame_level
+ Description : Initialise the frame level details required for a mb level
+ ******************************************************************************/
+
+void irc_init_mb_rc_frame_level(rate_control_api_t *ps_rate_control_api,
+ UWORD8 u1_frame_qp)
+{
+ irc_mb_init_frame_level(ps_rate_control_api->ps_mb_rate_control,
+ u1_frame_qp);
+}
+
+/******************************************************************************
+ Function Name : irc_get_mb_level_qp
+ Description : Get the mb level qp
+ *****************************************************************************/
+void irc_get_mb_level_qp(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_cur_mb_activity,
+ WORD32 *pi4_mb_qp,
+ picture_type_e e_pic_type)
+{
+ if(ps_rate_control_api->u1_is_mb_level_rc_on)
+ {
+ irc_get_mb_qp(ps_rate_control_api->ps_mb_rate_control,
+ i4_cur_mb_activity, pi4_mb_qp);
+
+ /* Truncating the QP to the Max and Min Qp values possible */
+ if(pi4_mb_qp[1] < ps_rate_control_api->au1_min_max_qp[e_pic_type << 1])
+ {
+ pi4_mb_qp[1] = ps_rate_control_api->au1_min_max_qp[e_pic_type << 1];
+ }
+ if(pi4_mb_qp[1]
+ > ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1)
+ + 1])
+ {
+ pi4_mb_qp[1] = ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1)
+ + 1];
+ }
+ }
+ else
+ {
+ WORD32 i4_qp;
+ i4_qp = irc_get_frm_level_qp(ps_rate_control_api->ps_mb_rate_control);
+ /* Both the qp are used for */
+ pi4_mb_qp[0] = i4_qp; /* Used as feedback for the rate control */
+ pi4_mb_qp[1] = i4_qp; /* Used for quantising the MB*/
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_get_bits_to_stuff
+ Description : Gets the bits to stuff to prevent Underflow of Encoder Buffer
+ *****************************************************************************/
+WORD32 irc_get_bits_to_stuff(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_bits_to_stuff;
+ /* Get the CBR bits to stuff*/
+ i4_bits_to_stuff = irc_get_cbr_bits_to_stuff(
+ ps_rate_control_api->ps_cbr_buffer, i4_tot_consumed_bits,
+ e_pic_type);
+ return i4_bits_to_stuff;
+}
+
+/****************************************************************************
+ Function Name : irc_get_prev_frm_est_bits
+ Description : Returns previous frame estimated bits
+ *****************************************************************************/
+WORD32 irc_get_prev_frm_est_bits(rate_control_api_t *ps_rate_control_api)
+{
+ return (ps_rate_control_api->i4_prev_frm_est_bits);
+}
+
+/******************************************************************************
+ Control Level API functions
+ Logic: The control call sets the state structure of the rate control api
+ accordingly such that the next process call would implement the same.
+ ******************************************************************************/
+
+void irc_change_inter_frm_int_call(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_inter_frm_int)
+{
+ irc_pic_handling_register_new_inter_frm_interval(
+ ps_rate_control_api->ps_pic_handling, i4_inter_frm_int);
+}
+
+void irc_change_intra_frm_int_call(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_intra_frm_int)
+{
+ irc_pic_handling_register_new_int_frm_interval(
+ ps_rate_control_api->ps_pic_handling, i4_intra_frm_int);
+
+ if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ irc_change_vsp_ifi(&ps_rate_control_api->s_vbr_str_prms,
+ i4_intra_frm_int);
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_change_avg_bit_rate
+ Description : Whenever the average bit rate changes, the excess bits is
+ between the changed bit rate and the old one is re-distributed
+ in the bit allocation module
+ *****************************************************************************/
+void irc_change_avg_bit_rate(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_average_bit_rate)
+{
+ int i;
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ /*
+ * Bit Allocation Module: distribute the excess/deficit bits between the
+ * old and the new frame rate to all the remaining frames
+ */
+ irc_ba_change_remaining_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ u4_average_bit_rate,
+ irc_ba_get_frame_rate(
+ ps_rate_control_api->ps_bit_allocation),
+ (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+ }
+ if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ UWORD32 u4_average_bit_rate_copy[MAX_NUM_DRAIN_RATES];
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ u4_average_bit_rate_copy[i] = u4_average_bit_rate;
+ }
+ irc_change_cbr_vbv_bit_rate(ps_rate_control_api->ps_cbr_buffer,
+ (WORD32 *)(u4_average_bit_rate_copy));
+ }
+
+ /*
+ * This is done only for average bitrate changing somewhere after the model
+ * stabilizes.Here it is assumed that user will not do this call after
+ * first few frames. If we dont have this check, what would happen is since
+ * the model has not stabilized, also bitrate has changed before the first
+ * frame, we dont restrict the qp. Qp can go to very bad values after init
+ * qp since if swing is disabled.
+ * This check will become buggy if change bitrate is called say somewhere
+ * after first two frames.Bottom line - RC init is done during create and
+ * this call is done just before first process.And we want to differentiate
+ * between this call done before first process and the call which is done
+ * during run time
+ */
+ if(ps_rate_control_api->u1_is_first_frm == 0)
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_avg_bitrate_changed[i] = 1;
+ }
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_change_frame_rate
+ Description : Does the necessary changes whenever there is a change in
+ frame rate
+ *****************************************************************************/
+void irc_change_frame_rate(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks)
+{
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ UWORD32 u4_frms_in_delay_prd = ((u4_frame_rate
+ * irc_get_cbr_buffer_delay(
+ ps_rate_control_api->ps_cbr_buffer))
+ / 1000000);
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP))
+ {
+ irc_change_vbr_vbv_frame_rate(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ u4_frame_rate);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ irc_change_cbr_vbv_tgt_frame_rate(
+ ps_rate_control_api->ps_cbr_buffer, u4_frame_rate);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+ irc_change_vsp_tgt_ticks(&ps_rate_control_api->s_vbr_str_prms,
+ u4_tgt_ticks);
+ irc_change_vsp_src_ticks(&ps_rate_control_api->s_vbr_str_prms,
+ u4_src_ticks);
+ irc_change_vsp_fidp(&ps_rate_control_api->s_vbr_str_prms,
+ u4_frms_in_delay_prd);
+
+ irc_get_vsp_num_pics_in_dly_prd(
+ &ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+ irc_change_cbr_vbv_tgt_frame_rate(
+ ps_rate_control_api->ps_cbr_buffer, u4_frame_rate);
+ irc_change_cbr_vbv_num_pics_in_delay_period(
+ ps_rate_control_api->ps_cbr_buffer,
+ au4_num_pics_in_delay_prd);
+ }
+
+ /*
+ * Bit Allocation Module: distribute the excess/deficit bits between the
+ * old and the new frame rate to all the remaining frames
+ */
+ irc_ba_change_remaining_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ irc_ba_get_bit_rate(
+ ps_rate_control_api->ps_bit_allocation),
+ u4_frame_rate,
+ (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_change_frm_rate_for_bit_alloc
+ Description : Does the necessary changes only in the bit_allocation module
+ there is a change in frame rate
+ *****************************************************************************/
+void irc_change_frm_rate_for_bit_alloc(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_frame_rate)
+{
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ /*
+ * Bit Allocation Module: distribute the excess/deficit bits between the
+ * old and the new frame rate to all the remaining frames
+ */
+ irc_ba_change_remaining_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ irc_ba_get_bit_rate(
+ ps_rate_control_api->ps_bit_allocation),
+ u4_frame_rate,
+ (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE
+ || ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP)
+ {
+ irc_change_vbr_max_bits_per_tgt_frm(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ u4_frame_rate);
+ }
+ }
+}
+
+void irc_change_init_qp(rate_control_api_t *ps_rate_control_api,
+ UWORD8 *pu1_init_qp)
+{
+ WORD32 i;
+ /* Initialize the init_qp */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_init_qp[i] = pu1_init_qp[i];
+ ps_rate_control_api->au1_prev_frm_qp[i] = pu1_init_qp[i];
+ }
+}
+
+void irc_change_min_max_qp(rate_control_api_t *ps_rate_control_api,
+ UWORD8 *pu1_min_max_qp)
+{
+ WORD32 i;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_min_max_qp[(i << 1)] =
+ pu1_min_max_qp[(i << 1)];
+ ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i
+ << 1) + 1];
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_change_peak_bit_rate
+ Description : Does the necessary changes whenever there is a change in
+ peak bit rate
+ *****************************************************************************/
+WORD32 irc_change_peak_bit_rate(rate_control_api_t *ps_rate_control_api,
+ UWORD32 *pu4_peak_bit_rate)
+{
+ WORD32 i4_ret_val = RC_OK;
+ int i;
+
+ /*
+ * Buffer Mechanism Module: Re-initialize the number of bits consumed per
+ * frame
+ */
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE
+ || ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ /* Send the new peak bit rate and the old frame rate */
+ irc_change_vbr_vbv_bit_rate(ps_rate_control_api->ps_vbr_storage_vbv,
+ pu4_peak_bit_rate[0]);
+ irc_ba_change_ba_peak_bit_rate(ps_rate_control_api->ps_bit_allocation,
+ (WORD32 *)pu4_peak_bit_rate);
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[i] =
+ pu4_peak_bit_rate[i];
+ }
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ if(ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change)
+ {
+ /*
+ * Means that change in peak bit rate has been made twice before the
+ * previous change could take effect
+ */
+ i4_ret_val = RC_BENIGN_ERR;
+ }
+ /*
+ * If the change happens before encoding the first frame make the
+ * effect immediately else delay the effect
+ */
+ if(ps_rate_control_api->u1_is_first_frm)
+ {
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[i] =
+ pu4_peak_bit_rate[i];
+ }
+ irc_ba_change_ba_peak_bit_rate(
+ ps_rate_control_api->ps_bit_allocation,
+ (WORD32 *)pu4_peak_bit_rate);
+ irc_change_cbr_vbv_bit_rate(ps_rate_control_api->ps_cbr_buffer,
+ (WORD32 *)pu4_peak_bit_rate);
+ }
+ else
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_NUM_DRAIN_RATES];
+ /*
+ * Else store the number of frames after which the effect should
+ * happen and then update the peak bitrate
+ */
+ ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change =
+ irc_get_vsp_num_pics_in_dly_prd(
+ &ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[i] =
+ pu4_peak_bit_rate[i];
+ }
+ }
+ }
+
+ return (i4_ret_val);
+}
+
+void irc_change_buffer_delay(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_buffer_delay)
+{
+ UWORD32 u4_frms_in_delay_prd = ((irc_ba_get_frame_rate(
+ ps_rate_control_api->ps_bit_allocation) * u4_buffer_delay)
+ / 1000000);
+
+ /* Initialize the rate control modules */
+ if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ irc_change_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer,
+ u4_buffer_delay);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STORAGE
+ || ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+ irc_change_vsp_fidp(&ps_rate_control_api->s_vbr_str_prms,
+ u4_frms_in_delay_prd);
+
+ /* Get the number of pics of each type in delay period */
+ irc_get_vsp_num_pics_in_dly_prd(&ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+
+ irc_change_cbr_vbv_num_pics_in_delay_period(
+ ps_rate_control_api->ps_cbr_buffer,
+ au4_num_pics_in_delay_prd);
+ }
+}
+
+/* Getter functions to get the current rate control parameters */
+UWORD32 irc_get_frame_rate(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation));
+}
+
+UWORD32 irc_get_bit_rate(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation));
+}
+
+UWORD32 irc_get_peak_bit_rate(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_index)
+{
+ return (ps_rate_control_api->au4_new_peak_bit_rate[i4_index]);
+}
+
+UWORD32 irc_get_intra_frame_interval(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_pic_type_get_intra_frame_interval(
+ ps_rate_control_api->ps_pic_handling));
+}
+
+UWORD32 irc_get_inter_frame_interval(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_pic_type_get_inter_frame_interval(
+ ps_rate_control_api->ps_pic_handling));
+}
+
+rc_type_e irc_get_rc_type(rate_control_api_t *ps_rate_control_api)
+{
+ return (ps_rate_control_api->e_rc_type);
+}
+
+WORD32 irc_get_bits_per_frame(rate_control_api_t *ps_rate_control_api)
+{
+ WORD32 i4_bits_per_frm;
+
+ X_PROD_Y_DIV_Z(irc_ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation),
+ (UWORD32)1000,
+ irc_ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation),
+ i4_bits_per_frm);
+
+ return (i4_bits_per_frm);
+}
+
+UWORD32 irc_get_max_delay(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_get_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer));
+}
+
+UWORD32 irc_get_seq_no(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_pic_type_get_disp_order_no(ps_rate_control_api->ps_pic_handling));
+}
+
+UWORD32 irc_get_rem_frames_in_gop(rate_control_api_t *ps_rate_control_api)
+{
+ WORD32 ai4_rem_frms_in_period[MAX_PIC_TYPE];
+ WORD32 j;
+ UWORD32 u4_rem_frms_in_period = 0;
+
+ /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+ irc_pic_type_get_rem_frms_in_gop(ps_rate_control_api->ps_pic_handling,
+ ai4_rem_frms_in_period);
+
+ /* Depending on the number of gops in a period, find the num_frms_in_prd */
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ u4_rem_frms_in_period += ai4_rem_frms_in_period[j];
+ }
+
+ return (u4_rem_frms_in_period);
+}
+
+/****************************************************************************
+ Function Name : irc_flush_buf_frames
+ Description : API call to flush the buffered up frames
+ *****************************************************************************/
+void irc_flush_buf_frames(rate_control_api_t *ps_rate_control_api)
+{
+ irc_flush_frame_from_pic_stack(ps_rate_control_api->ps_pic_handling);
+}
+
+/****************************************************************************
+ Function Name : irc_flush_buf_frames
+ Description : API call to flush the buffered up frames
+ *****************************************************************************/
+
+void irc_post_encode_frame_skip(rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type)
+{
+ irc_skip_encoded_frame(ps_rate_control_api->ps_pic_handling, e_pic_type);
+}
+
+/****************************************************************************
+ Function Name : irc_force_I_frame
+ Description : API call to force an I frame
+ *****************************************************************************/
+void irc_force_I_frame(rate_control_api_t *ps_rate_control_api)
+{
+ irc_set_force_I_frame_flag(ps_rate_control_api->ps_pic_handling);
+}
+
+/****************************************************************************
+ * Function Name : rc_get_rem_bits_in_gop
+ * Description : API call to get remaining bits in GOP
+ * *****************************************************************************/
+WORD32 irc_get_rem_bits_in_period(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_ba_get_rem_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling));
+}
+
+/****************************************************************************
+ * Function Name : irc_get_vbv_buf_fullness
+ * Description : API call to get VBV buffer fullness
+ ******************************************************************************/
+WORD32 irc_get_vbv_buf_fullness(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_get_cur_vbv_buf_size(ps_rate_control_api->ps_vbr_storage_vbv));
+}
+
+WORD32 irc_get_vbv_buf_size(rate_control_api_t *ps_rate_control_api)
+{
+ if(ps_rate_control_api->e_rc_type == CBR_NLDRC
+ || ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ return (irc_get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer));
+ }
+ else
+ {
+ return (irc_get_max_vbv_buf_size(
+ ps_rate_control_api->ps_vbr_storage_vbv));
+ }
+}
+
+WORD32 irc_get_vbv_fulness_with_cur_bits(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_bits)
+{
+ return (irc_vbv_get_vbv_buf_fullness(
+ ps_rate_control_api->ps_vbr_storage_vbv, u4_bits));
+}
+
+void irc_set_avg_mb_act(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_avg_activity)
+{
+ irc_mb_update_frame_level(ps_rate_control_api->ps_mb_rate_control,
+ i4_avg_activity);
+ return;
+}
diff --git a/encoder/irc_rate_control_api.h b/encoder/irc_rate_control_api.h
new file mode 100755
index 0000000..0173037
--- /dev/null
+++ b/encoder/irc_rate_control_api.h
@@ -0,0 +1,188 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RATE_CONTROL_API_H_
+#define _RATE_CONTROL_API_H_
+
+#define RC_OK 0
+#define RC_FAIL -1
+#define RC_BENIGN_ERR -2
+
+/* This file should only contain RC API function declarations */
+
+typedef struct rate_control_api_t *rate_control_handle;
+
+WORD32 irc_rate_control_num_fill_use_free_memtab(rate_control_handle *pps_rate_control_api,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_initialise_rate_control(rate_control_handle ps_rate_control_api,
+ rc_type_e e_rate_control_type,
+ UWORD8 u1_is_mb_level_rc_on,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 *pu4_peak_bit_rate,
+ UWORD32 u4_min_bit_rate,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD8 *pu1_init_qp,
+ UWORD32 u4_max_vbv_buff_size,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ UWORD8 *pu1_min_max_qp,
+ WORD32 i4_use_est_intra_sad,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks);
+
+/*****************************************************************************
+ Process level API fuctions (FRAME LEVEL)
+ *****************************************************************************/
+void irc_flush_buf_frames(rate_control_handle ps_rate_control_api);
+
+void irc_post_encode_frame_skip(rate_control_handle ps_rate_control_api,
+ picture_type_e e_pic_type);
+
+void irc_add_picture_to_stack(rate_control_handle rate_control_api,
+ WORD32 i4_enc_pic_id);
+
+void irc_add_picture_to_stack_re_enc(rate_control_handle rate_control_api,
+ WORD32 i4_enc_pic_id,
+ picture_type_e e_pic_type);
+
+void irc_get_picture_details(rate_control_handle rate_control_api,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type);
+
+/* Gets the frame level Qp */
+UWORD8 irc_get_frame_level_qp(rate_control_handle rate_control_api,
+ picture_type_e pic_type,
+ WORD32 i4_max_frm_bits);
+
+vbv_buf_status_e irc_get_buffer_status(rate_control_handle rate_control_api,
+ WORD32 i4_total_frame_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow);
+
+WORD32 irc_get_prev_frm_est_bits(rate_control_handle ps_rate_control_api);
+
+void irc_update_pic_handling_state(rate_control_handle ps_rate_control_api,
+ picture_type_e e_pic_type);
+
+void irc_update_frame_level_info(rate_control_handle ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_mb_type_sad,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ WORD32 *pi4_mb_type_tex_bits,
+ WORD32 *pi4_tot_mb_type_qp,
+ WORD32 *pi4_tot_mb_in_type,
+ WORD32 i4_avg_activity,
+ UWORD8 u1_is_scd,
+ WORD32 i4_is_it_a_skip,
+ WORD32 i4_intra_frm_cost,
+ WORD32 i4_is_pic_handling_done);
+
+/*****************************************************************************
+ MB LEVEL API (just wrapper fucntions)
+ *****************************************************************************/
+
+void irc_init_mb_rc_frame_level(rate_control_handle ps_rate_control_api,
+ UWORD8 u1_frame_qp);/* Current frame qp*/
+
+void irc_get_mb_level_qp(rate_control_handle ps_rate_control_api,
+ WORD32 i4_cur_mb_activity,
+ WORD32 *pi4_mb_qp,
+ picture_type_e e_pic_type);
+
+WORD32 irc_get_bits_to_stuff(rate_control_handle ps_rate_control_api,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type);
+
+/******************************************************************************
+ Control Level API functions
+ Logic: The control call sets the state structure of the rate control api
+ accordingly such that the next process call would implement the same.
+ ******************************************************************************/
+
+void irc_change_inter_frm_int_call(rate_control_handle ps_rate_control_api,
+ WORD32 i4_inter_frm_int);
+
+void irc_change_intra_frm_int_call(rate_control_handle ps_rate_control_api,
+ WORD32 i4_intra_frm_int);
+
+void irc_change_avg_bit_rate(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_average_bit_rate);
+
+void irc_change_frame_rate(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_target_ticks);
+
+void irc_change_frm_rate_for_bit_alloc(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_frame_rate);
+
+void irc_change_init_qp(rate_control_handle ps_rate_control_api,
+ UWORD8 *init_qp);
+
+WORD32 irc_change_peak_bit_rate(rate_control_handle ps_rate_control_api,
+ UWORD32 *u4_peak_bit_rate);
+
+void irc_change_buffer_delay(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_buffer_delay);
+
+void irc_force_I_frame(rate_control_handle ps_rate_control_api);
+
+void irc_change_min_max_qp(rate_control_handle ps_rate_control_api,
+ UWORD8 *u1_min_max_qp);
+
+/********************************************************************************
+ Getter functions
+ For getting the current state of the rate control structures
+ ********************************************************************************/
+
+UWORD32 irc_get_frame_rate(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_bit_rate(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_intra_frame_interval(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_inter_frame_interval(rate_control_handle ps_rate_control_api);
+
+rc_type_e irc_get_rc_type(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_bits_per_frame(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_peak_bit_rate(rate_control_handle ps_rate_control_api,
+ WORD32 i4_index);
+
+UWORD32 irc_get_max_delay(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_seq_no(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_rem_bits_in_period(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_vbv_buf_fullness(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_vbv_buf_size(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_vbv_fulness_with_cur_bits(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_bits);
+#endif
diff --git a/encoder/irc_rate_control_api_structs.h b/encoder/irc_rate_control_api_structs.h
new file mode 100755
index 0000000..ba39e7f
--- /dev/null
+++ b/encoder/irc_rate_control_api_structs.h
@@ -0,0 +1,93 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RATE_CONTROL_API_STRUCTS_H_
+#define _RATE_CONTROL_API_STRUCTS_H_
+
+/*
+ * The following definitions were present in irc_cntrl_param.h, moved to this
+ * file as it is used by irc_rate_control_api.c
+ */
+
+/* num_frm_in_period = BIT_ALLOC_PERIOD*intra_frame_interval */
+#define VBR_BIT_ALLOC_PERIOD 3
+#define CBR_BIT_ALLOC_PERIOD 1
+
+/* Rate control state structure */
+typedef struct rate_control_api_t
+{
+ /* RC Algorithm */
+ rc_type_e e_rc_type;
+
+ /* Whether MB level rc is enabled or not */
+ UWORD8 u1_is_mb_level_rc_on;
+
+ /* Picture handling struct */
+ pic_handling_handle ps_pic_handling;
+
+ /* Model struct for I and P frms */
+ rc_rd_model_handle aps_rd_model[MAX_PIC_TYPE];
+
+ /* VBR storage VBV structure */
+ vbr_storage_vbv_handle ps_vbr_storage_vbv;
+
+ /* Calculate the estimated SAD */
+ est_sad_handle ps_est_sad;
+
+ /* Allocation of bits for each frame */
+ bit_allocation_handle ps_bit_allocation;
+
+ /* Init Qp(also used for Const Qp scenarios) */
+ UWORD8 au1_init_qp[MAX_PIC_TYPE];
+
+ /* MB Level rate control state structure */
+ mb_rate_control_handle ps_mb_rate_control;
+
+ UWORD8 au1_is_first_frm_coded[MAX_PIC_TYPE];
+
+ UWORD8 au1_prev_frm_qp[MAX_PIC_TYPE];
+
+ cbr_buffer_handle ps_cbr_buffer;
+
+ UWORD8 u1_scd_detected;
+
+ UWORD8 u1_frm_qp_after_scd;
+
+ UWORD8 au1_avg_bitrate_changed[MAX_PIC_TYPE];
+
+ UWORD8 u1_is_first_frm;
+
+ UWORD8 au1_min_max_qp[(MAX_PIC_TYPE << 1)];
+
+ WORD32 i4_prev_frm_est_bits;
+
+ vbr_str_prms_t s_vbr_str_prms;
+
+ /* Store the values which are to be impacted after a delay */
+ UWORD32 u4_frms_in_delay_prd_for_peak_bit_rate_change;
+
+ UWORD32 au4_new_peak_bit_rate[MAX_NUM_DRAIN_RATES];
+
+ picture_type_e prev_ref_pic_type;
+
+} rate_control_api_t;
+
+#endif/*_RATE_CONTROL_API_STRUCTS_H_*/
+
diff --git a/encoder/irc_rd_model.c b/encoder/irc_rd_model.c
new file mode 100755
index 0000000..f5c0737
--- /dev/null
+++ b/encoder/irc_rd_model.c
@@ -0,0 +1,565 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/****************************************************************************/
+/* File Name : irc_rd_model.c */
+/* */
+/* Description : Implall the Functions to Model the */
+/* Rate Distortion Behaviour of the Codec over the Last */
+/* Few Frames. */
+/* */
+/* List of Functions : irc_update_frame_rd_model */
+/* estimate_mpeg2_qp_for_resbits */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 21 06 2006 Sarat Initial Version */
+/****************************************************************************/
+
+/* System include files */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "math.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_rd_model.h"
+#include "irc_rd_model_struct.h"
+
+
+WORD32 irc_rd_model_num_fill_use_free_memtab(rc_rd_model_t **pps_rc_rd_model,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static rc_rd_model_t s_rc_rd_model_temp;
+
+ /*
+ * Hack for al alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_rc_rd_model) = &s_rc_rd_model_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(rc_rd_model_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_rc_rd_model, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+void irc_init_frm_rc_rd_model(rc_rd_model_t *ps_rd_model,
+ UWORD8 u1_max_frames_modelled)
+{
+
+ ps_rd_model->u1_num_frms_in_model = 0;
+ ps_rd_model->u1_curr_frm_counter = 0;
+ ps_rd_model->u1_max_frms_to_model = u1_max_frames_modelled;
+
+ ps_rd_model->model_coeff_a_lin_wo_int = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int = 0;
+}
+
+void irc_reset_frm_rc_rd_model(rc_rd_model_t *ps_rd_model)
+{
+ ps_rd_model->u1_num_frms_in_model = 0;
+ ps_rd_model->u1_curr_frm_counter = 0;
+
+ ps_rd_model->model_coeff_a_lin_wo_int = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int = 0;
+}
+
+static UWORD8 find_model_coeffs(UWORD32 *pi4_res_bits,
+ UWORD32 *pi4_sad_h264,
+ UWORD8 *pu1_num_skips,
+ UWORD8 *pui_avg_mpeg2_qp,
+ UWORD8 u1_num_frms,
+ UWORD8 u1_model_used,
+ WORD8 *pi1_frame_index,
+ model_coeff *pmc_model_coeff,
+ model_coeff *pmc_model_coeff_lin,
+ model_coeff *pmc_model_coeff_lin_wo_int,
+ rc_rd_model_t *ps_rd_model)
+{
+ UWORD32 i;
+ UWORD8 u1_num_frms_used = 0;
+ UWORD8 u1_frm_indx;
+
+#if !(ENABLE_QUAD_RC_MODEL||ENABLE_LIN_MODEL_WITH_INTERCEPT)
+ UNUSED(pu1_num_skips);
+ UNUSED(pmc_model_coeff);
+ UNUSED(pmc_model_coeff_lin);
+#endif
+ float sum_y = 0;
+ float sum_x_y = 0;
+ float sum_x2_y = 0;
+ float sum_x = 0;
+ float sum_x2 = 0;
+ float sum_x3 = 0;
+ float sum_x4 = 0;
+
+ float x0, y0;
+ float model_coeff_a = 0.0, model_coeff_b = 0.0, model_coeff_c = 0.0;
+
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ if(-1 == pi1_frame_index[i])
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+
+ y0 = (float)(pi4_res_bits[u1_frm_indx]);
+ x0 = (float)(pi4_sad_h264[u1_frm_indx]
+ / (float)pui_avg_mpeg2_qp[u1_frm_indx]);
+
+ sum_y += y0;
+ sum_x_y += x0 * y0;
+ sum_x2_y += x0 * x0 * y0;
+ sum_x += x0;
+ sum_x2 += x0 * x0;
+ sum_x3 += x0 * x0 * x0;
+ sum_x4 += x0 * x0 * x0 * x0;
+ u1_num_frms_used++;
+ }
+
+ sum_y /= u1_num_frms_used;
+ sum_x_y /= u1_num_frms_used;
+ sum_x2_y /= u1_num_frms_used;
+ sum_x /= u1_num_frms_used;
+ sum_x2 /= u1_num_frms_used;
+ sum_x3 /= u1_num_frms_used;
+ sum_x4 /= u1_num_frms_used;
+
+ {
+ UWORD8 u1_curr_frame_index;
+ UWORD8 u1_avgqp_prvfrm;
+ UWORD32 u4_prevfrm_bits, u4_prevfrm_sad;
+
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+
+ u1_avgqp_prvfrm = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+ u4_prevfrm_bits = ps_rd_model->pi4_res_bits[u1_curr_frame_index];
+ u4_prevfrm_sad = ps_rd_model->pi4_sad[u1_curr_frame_index];
+
+ if(0 != u4_prevfrm_sad)
+ model_coeff_a = (float)(u4_prevfrm_bits * u1_avgqp_prvfrm)
+ / u4_prevfrm_sad;
+ else
+ model_coeff_a = 0;
+
+ model_coeff_b = 0;
+ model_coeff_c = 0;
+
+ pmc_model_coeff_lin_wo_int[0] = model_coeff_b;
+ pmc_model_coeff_lin_wo_int[1] = model_coeff_a;
+ pmc_model_coeff_lin_wo_int[2] = model_coeff_c;
+ }
+
+ return u1_model_used;
+}
+
+static void irc_update_frame_rd_model(rc_rd_model_t *ps_rd_model)
+{
+ WORD8 pi1_frame_index[MAX_FRAMES_MODELLED],
+ pi1_frame_index_initial[MAX_FRAMES_MODELLED];
+
+ UWORD8 u1_num_skips_temp;
+ UWORD8 u1_avg_mpeg2_qp_temp, u1_min_mpeg2_qp, u1_max_mpeg2_qp;
+ UWORD8 u1_num_frms_input, u1_num_active_frames, u1_reject_frame;
+ UWORD32 u4_num_skips;
+
+ UWORD8 u1_min2_mpeg2_qp, u1_max2_mpeg2_qp;
+ UWORD8 u1_min_qp_frame_indx, u1_max_qp_frame_indx;
+ UWORD8 pu1_num_frames[MPEG2_QP_ELEM];
+ model_coeff model_coeff_array[3], model_coeff_array_lin[3],
+ model_coeff_array_lin_wo_int[3];
+ UWORD32 i;
+ UWORD8 u1_curr_frame_index;
+
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+
+ /************************************************************************/
+ /* Rearrange data to be fed into a Linear Regression Module */
+ /* Module finds a,b,c such that */
+ /* y = ax + bx^2 + c */
+ /************************************************************************/
+ u4_num_skips = 0;
+ u1_num_frms_input = 0;
+ memset(pu1_num_frames, 0, MPEG2_QP_ELEM);
+ memset(pi1_frame_index, -1, MAX_FRAMES_MODELLED);
+ u1_min_mpeg2_qp = MAX_MPEG2_QP;
+ u1_max_mpeg2_qp = 0;
+
+ u1_num_active_frames = ps_rd_model->u1_num_frms_in_model;
+ if(u1_num_active_frames > MAX_ACTIVE_FRAMES)
+ {
+ u1_num_active_frames = MAX_ACTIVE_FRAMES;
+ }
+
+ /************************************************************************/
+ /* Choose the set of Points to be used for MSE fit of Quadratic model */
+ /* Points chosen are spread across the Qp range. Max of 2 points are */
+ /* chosen for a Qp. */
+ /************************************************************************/
+ for(i = 0; i < u1_num_active_frames; i++)
+ {
+ u1_reject_frame = 0;
+ u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+ u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+
+ if((0 == u4_num_skips) && (0 != u1_num_skips_temp))
+ u1_reject_frame = 1;
+ if((1 == u4_num_skips) && (u1_num_skips_temp > 1))
+ u1_reject_frame = 1;
+ if(pu1_num_frames[u1_avg_mpeg2_qp_temp] >= 2)
+ u1_reject_frame = 1;
+
+ if(0 == i)
+ u1_reject_frame = 0;
+
+ if(0 == u1_reject_frame)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_curr_frame_index;
+ pu1_num_frames[u1_avg_mpeg2_qp_temp] += 1;
+
+ if(u1_min_mpeg2_qp > u1_avg_mpeg2_qp_temp)
+ u1_min_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+ if(u1_max_mpeg2_qp < u1_avg_mpeg2_qp_temp)
+ u1_max_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+
+ u1_num_frms_input++;
+ }
+
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+ }
+
+ /************************************************************************/
+ /* Add Pivot Points to the Data set to be used for finding Quadratic */
+ /* Model Coeffs. These will help in constraining the shape of Quadratic*/
+ /* to adapt too much to the Local deviations. */
+ /************************************************************************/
+ u1_min2_mpeg2_qp = u1_min_mpeg2_qp;
+ u1_max2_mpeg2_qp = u1_max_mpeg2_qp;
+ u1_min_qp_frame_indx = INVALID_FRAME_INDEX;
+ u1_max_qp_frame_indx = INVALID_FRAME_INDEX;
+
+ /* Loop runnning over the Stored Frame Level Data
+ to find frames of MinQp and MaxQp */
+ for(; i < ps_rd_model->u1_num_frms_in_model; i++)
+ {
+ u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+ u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+
+ if(((0 == u4_num_skips) && (0 != u1_num_skips_temp))
+ || ((1 == u4_num_skips) && (u1_num_skips_temp > 1)))
+ continue;
+
+ if(u1_min2_mpeg2_qp > u1_avg_mpeg2_qp_temp)
+ {
+ u1_min2_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+ u1_min_qp_frame_indx = u1_curr_frame_index;
+ }
+ if(u1_max2_mpeg2_qp < u1_avg_mpeg2_qp_temp)
+ {
+ u1_max2_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+ u1_max_qp_frame_indx = u1_curr_frame_index;
+ }
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+ }
+
+ /* Add the Chosen Points to the regression data set */
+ if(INVALID_FRAME_INDEX != u1_min_qp_frame_indx)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_min_qp_frame_indx;
+ u1_num_frms_input++;
+ }
+ if(INVALID_FRAME_INDEX != u1_max_qp_frame_indx)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_max_qp_frame_indx;
+ u1_num_frms_input++;
+ }
+ memcpy(pi1_frame_index_initial, pi1_frame_index, MAX_FRAMES_MODELLED);
+
+ /***** Call the Module to Return the Coeffs for the Fed Data *****/
+ ps_rd_model->u1_model_used = find_model_coeffs(ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi4_sad,
+ ps_rd_model->pu1_num_skips,
+ ps_rd_model->pu1_avg_qp,
+ u1_num_frms_input,
+ ps_rd_model->u1_model_used,
+ pi1_frame_index,
+ model_coeff_array,
+ model_coeff_array_lin,
+ model_coeff_array_lin_wo_int,
+ ps_rd_model);
+
+ ps_rd_model->model_coeff_b_lin_wo_int = model_coeff_array_lin_wo_int[0];
+ ps_rd_model->model_coeff_a_lin_wo_int = model_coeff_array_lin_wo_int[1];
+ ps_rd_model->model_coeff_c_lin_wo_int = model_coeff_array_lin_wo_int[2];
+}
+
+UWORD32 irc_estimate_bits_for_qp(rc_rd_model_t *ps_rd_model,
+ UWORD32 u4_estimated_sad,
+ UWORD8 u1_avg_qp)
+{
+ float fl_num_bits = 0;
+
+ fl_num_bits = ps_rd_model->model_coeff_a_lin_wo_int
+ * ((float)(u4_estimated_sad / u1_avg_qp));
+
+ return ((UWORD32)fl_num_bits);
+}
+
+UWORD8 irc_find_qp_for_target_bits(rc_rd_model_t *ps_rd_model,
+ UWORD32 u4_target_res_bits,
+ UWORD32 u4_estimated_sad,
+ UWORD8 u1_min_qp,
+ UWORD8 u1_max_qp)
+{
+ UWORD8 u1_qp;
+ float x_value = 1.0, f_qp;
+
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+
+ {
+ x_value = (float)u4_target_res_bits
+ / ps_rd_model->model_coeff_a_lin_wo_int;
+ }
+
+ if(0 != x_value)
+ f_qp = u4_estimated_sad / x_value;
+ else
+ f_qp = 255;
+
+ if(f_qp > 255)
+ f_qp = 255;
+
+ /* Truncating the QP to the Max and Min Qp values possible */
+ if(f_qp < u1_min_qp)
+ f_qp = u1_min_qp;
+ if(f_qp > u1_max_qp)
+ f_qp = u1_max_qp;
+
+ u1_qp = (UWORD8)(f_qp + 0.5);
+
+ return u1_qp;
+}
+
+void irc_add_frame_to_rd_model(rc_rd_model_t *ps_rd_model,
+ UWORD32 i4_res_bits,
+ UWORD8 u1_avg_mp2qp,
+ UWORD32 i4_sad_h264,
+ UWORD8 u1_num_skips)
+{
+ UWORD8 u1_curr_frame_index;
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+
+ /*Insert the Present Frame Data into the RD Model State Memory*/
+ ps_rd_model->pi4_res_bits[u1_curr_frame_index] = i4_res_bits;
+ ps_rd_model->pi4_sad[u1_curr_frame_index] = i4_sad_h264;
+ ps_rd_model->pu1_num_skips[u1_curr_frame_index] = u1_num_skips;
+ ps_rd_model->pu1_avg_qp[u1_curr_frame_index] = u1_avg_mp2qp;
+
+ ps_rd_model->u1_curr_frm_counter++;
+ if(MAX_FRAMES_MODELLED == ps_rd_model->u1_curr_frm_counter)
+ ps_rd_model->u1_curr_frm_counter = 0;
+
+ if(ps_rd_model->u1_num_frms_in_model < ps_rd_model->u1_max_frms_to_model)
+ {
+ ps_rd_model->u1_num_frms_in_model++;
+ }
+ irc_update_frame_rd_model(ps_rd_model);
+}
+
+/*****************************************************************************
+ *Function Name : irc_calc_per_frm_bits
+ *Description :
+ *Inputs : pu2_num_pics_of_a_pic_type
+ * - pointer to RC api pointer
+ * pu2_num_pics_of_a_pic_type
+ * - N1, N2,...Nk
+ * pu1_update_pic_type_model
+ * - flag which tells whether or not to update model
+ * coefficients of a particular pic-type
+ * u1_num_pic_types
+ * - value of k
+ * pu4_num_skip_of_a_pic_type
+ * - the number of skips of that pic-type. It "may" be used to
+ * update the model coefficients at a later point. Right now
+ * it is not being used at all.
+ * u1_base_pic_type
+ * - base pic type index wrt which alpha & beta are calculated
+ * pfl_gamma
+ * - gamma_i = beta_i / alpha_i
+ * pfl_eta
+ * -
+ * u1_curr_pic_type
+ * - the current pic-type for which the targetted bits need to
+ * be computed
+ * u4_bits_for_sub_gop
+ * - the number of bits to be consumed for the remaining part of
+ * sub-gop
+ * u4_curr_estimated_sad
+ * -
+ * pu1_curr_pic_type_qp
+ * - output of this function
+ *****************************************************************************/
+
+WORD32 irc_calc_per_frm_bits(rc_rd_model_t *ps_rd_model,
+ UWORD16 *pu2_num_pics_of_a_pic_type,
+ UWORD8 *pu1_update_pic_type_model,
+ UWORD8 u1_num_pic_types,
+ UWORD32 *pu4_num_skip_of_a_pic_type,
+ UWORD8 u1_base_pic_type,
+ float *pfl_gamma,
+ float *pfl_eta,
+ UWORD8 u1_curr_pic_type,
+ UWORD32 u4_bits_for_sub_gop,
+ UWORD32 u4_curr_estimated_sad,
+ UWORD8 *pu1_curr_pic_type_qp)
+{
+ WORD32 i4_per_frm_bits_Ti;
+ UWORD8 u1_i;
+ rc_rd_model_t *ps_rd_model_of_pic_type;
+
+ UNUSED(pu4_num_skip_of_a_pic_type);
+ UNUSED(u1_base_pic_type);
+
+ /* First part of this function updates all the model coefficients */
+ /*for all the pic-types */
+ {
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ if((0 != pu2_num_pics_of_a_pic_type[u1_i])
+ && (1 == pu1_update_pic_type_model[u1_i]))
+ {
+ irc_update_frame_rd_model(&ps_rd_model[u1_i]);
+ }
+ }
+ }
+
+ /*
+ * The second part of this function deals with solving the
+ * equation using all the pic-types models
+ */
+ {
+ UWORD8 u1_combined_model_used;
+
+ /* solve the equation */
+ {
+ model_coeff eff_A;
+ float fl_sad_by_qp_base;
+ float fl_sad_by_qp_curr_frm = 1.0;
+ float fl_qp_curr_frm;
+ float fl_bits_for_curr_frm = 0;
+
+
+
+ /* If the combined chosen model is linear model without an intercept */
+
+ u1_combined_model_used = PREV_FRAME_MODEL;
+ {
+ eff_A = 0.0;
+
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ ps_rd_model_of_pic_type = ps_rd_model + u1_i;
+
+ eff_A += ((pfl_eta[u1_i]
+ + pu2_num_pics_of_a_pic_type[u1_i]- 1)
+ * ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int
+ * pfl_gamma[u1_i]);
+ }
+
+ fl_sad_by_qp_base = u4_bits_for_sub_gop / eff_A;
+
+ fl_sad_by_qp_curr_frm = fl_sad_by_qp_base
+ * pfl_gamma[u1_curr_pic_type]
+ * pfl_eta[u1_curr_pic_type];
+
+ ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+
+ fl_bits_for_curr_frm =
+ ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int
+ * fl_sad_by_qp_curr_frm;
+ }
+
+ /*
+ * Store the model that was finally used to calculate Qp.
+ * This is so that the same model is used in further calculations
+ * for this picture.
+ */
+ ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+ ps_rd_model_of_pic_type->u1_model_used = u1_combined_model_used;
+
+ i4_per_frm_bits_Ti = (WORD32)(fl_bits_for_curr_frm + 0.5);
+
+ if(fl_sad_by_qp_curr_frm > 0)
+ fl_qp_curr_frm = (float)u4_curr_estimated_sad
+ / fl_sad_by_qp_curr_frm;
+ else
+ fl_qp_curr_frm = 255;
+
+ if(fl_qp_curr_frm > 255)
+ fl_qp_curr_frm = 255;
+
+ *pu1_curr_pic_type_qp = (fl_qp_curr_frm + 0.5);
+
+ }
+ }
+ return (i4_per_frm_bits_Ti);
+}
+
+model_coeff irc_get_linear_coefficient(rc_rd_model_t *ps_rd_model)
+{
+ return (ps_rd_model->model_coeff_a_lin_wo_int);
+}
+
+
diff --git a/encoder/irc_rd_model.h b/encoder/irc_rd_model.h
new file mode 100755
index 0000000..8be31c1
--- /dev/null
+++ b/encoder/irc_rd_model.h
@@ -0,0 +1,98 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Name : irc_rd_model.h */
+/* */
+/* Description : Implements all the Functions to Model the */
+/* Rate Distortion Behaviour of the Codec over the Last */
+/* Few Frames. */
+/* */
+/* List of Functions : irc_update_frame_rd_model */
+/* estimate_mpeg2_qp_for_resbits */
+/* update_mb_rd_model */
+/* find_model_coeffs */
+/* refine_set_of_points */
+/* init_mb_rd_model */
+/* irc_add_frame_to_rd_model */
+/* irc_find_qp_for_target_bits */
+/* */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 21 06 2006 Sarat Initial Version */
+/*****************************************************************************/
+
+#ifndef RC_RD_MODEL
+#define RC_RD_MODEL
+
+#define MAX_FRAMES_MODELLED 16
+
+typedef float model_coeff;
+typedef struct rc_rd_model_t *rc_rd_model_handle;
+
+WORD32 irc_rd_model_num_fill_use_free_memtab(rc_rd_model_handle *pps_rc_rd_model,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+/* Interface Functions */
+/* Initialise the rate distortion model */
+void irc_init_frm_rc_rd_model(rc_rd_model_handle ps_rd_model,
+ UWORD8 u1_max_frames_modelled);
+
+/* Reset the rate distortion model */
+void irc_reset_frm_rc_rd_model(rc_rd_model_handle ps_rd_model);
+
+/* Returns the Qp to be used for the given bits and SAD */
+UWORD8 irc_find_qp_for_target_bits(rc_rd_model_handle ps_rd_model,
+ UWORD32 u4_target_res_bits,
+ UWORD32 u4_estimated_sad,
+ UWORD8 u1_max_qp,
+ UWORD8 u1_min_qp);
+
+/* Updates the frame level statistics after encoding a frame */
+void irc_add_frame_to_rd_model(rc_rd_model_handle ps_rd_model,
+ UWORD32 i4_res_bits,
+ UWORD8 u1_avg_mp2qp,
+ UWORD32 i4_sad_h264,
+ UWORD8 u1_num_skips);
+
+UWORD32 irc_estimate_bits_for_qp(rc_rd_model_handle ps_rd_model,
+ UWORD32 u4_estimated_sad,
+ UWORD8 u1_avg_qp);
+
+/* Get the Linear model coefficient */
+model_coeff irc_get_linear_coefficient(rc_rd_model_handle ps_rd_model);
+
+WORD32 irc_calc_per_frm_bits(rc_rd_model_handle ps_rd_model,
+ UWORD16 *pu2_num_pics_of_a_pic_type,
+ UWORD8 *pu1_update_pic_type_model,
+ UWORD8 u1_num_pic_types,
+ UWORD32 *pu4_num_skip_of_a_pic_type,
+ UWORD8 u1_base_pic_type,
+ float *pfl_gamma,
+ float *pfl_eta,
+ UWORD8 u1_curr_pic_type,
+ UWORD32 u4_bits_for_sub_gop,
+ UWORD32 u4_curr_estimated_sad,
+ UWORD8 *pu1_curr_pic_type_qp);
+#endif
+
diff --git a/encoder/irc_rd_model_struct.h b/encoder/irc_rd_model_struct.h
new file mode 100755
index 0000000..dc4c0ea
--- /dev/null
+++ b/encoder/irc_rd_model_struct.h
@@ -0,0 +1,75 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef RC_RD_MODEL_STRUCT
+#define RC_RD_MODEL_STRUCT
+
+/*Enable or diable QUAD model*/
+#define ENABLE_QUAD_RC_MODEL 0
+#define ENABLE_LIN_MODEL_WITH_INTERCEPT 0
+
+/* Number of elements for QP */
+#define MPEG2_QP_ELEM (MAX_MPEG2_QP + 1)
+
+
+#if ENABLE_QUAD_RC_MODEL
+#define QUAD 1
+#define MIN_FRAMES_FOR_QUAD_MODEL 5
+#endif
+
+#define MAX_ACTIVE_FRAMES 16
+#define MIN_FRAMES_FOR_LIN_MODEL 3
+#define INVALID_FRAME_INDEX 255
+
+#define UP_THR_SM 1 /* (1 /pow(2,4) = 0.0625 */
+#define UP_THR_E 4
+
+#define LO_THR_SM 368 /* (368.64 / pow(2,14)) = 0.0225 */
+#define LO_THR_E 14
+
+#define LIN_DEV_THR_SM 1 /* (1 / pow(1,2)) = .25*/
+#define LIN_DEV_THR_E 2
+
+#define PREV_FRAME_MODEL 2
+
+/* Q Factors used for fixed point calculation */
+#define Q_FORMAT_GAMMA 8
+#define Q_FORMAT_ETA 8
+
+typedef struct rc_rd_model_t
+{
+ UWORD8 u1_curr_frm_counter;
+ UWORD8 u1_num_frms_in_model;
+ UWORD8 u1_max_frms_to_model;
+ UWORD8 u1_model_used;
+
+ UWORD32 pi4_res_bits[MAX_FRAMES_MODELLED];
+ UWORD32 pi4_sad[MAX_FRAMES_MODELLED];
+
+ UWORD8 pu1_num_skips[MAX_FRAMES_MODELLED];
+ UWORD8 pu1_avg_qp[MAX_FRAMES_MODELLED];
+ UWORD8 au1_num_frames[MPEG2_QP_ELEM];
+
+ model_coeff model_coeff_a_lin_wo_int;
+ model_coeff model_coeff_b_lin_wo_int;
+ model_coeff model_coeff_c_lin_wo_int;
+} rc_rd_model_t;
+
+#endif /* RC_RD_MODEL_STRUCT */
diff --git a/encoder/irc_trace_support.h b/encoder/irc_trace_support.h
new file mode 100755
index 0000000..c35bd4f
--- /dev/null
+++ b/encoder/irc_trace_support.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_trace_support.h
+*
+* @brief
+* This file contains extern declarations of routines that could be helpful
+* for debugging purposes.
+*
+* @author
+* Harish
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef TRACE_SUPPORT_H_
+#define TRACE_SUPPORT_H_
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+typedef struct
+{
+ WORD8 * pu1_buf;
+ WORD32 i4_offset;
+ WORD32 i4_max_size;
+}trace_support_t;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+void init_trace_support(WORD8 *pu1_buf, WORD32 i4_size);
+
+int trace_printf(const WORD8 *format, ...);
+
+#endif // TRACE_SUPPORT_H_
diff --git a/encoder/irc_vbr_storage_vbv.c b/encoder/irc_vbr_storage_vbv.c
new file mode 100755
index 0000000..23e9959
--- /dev/null
+++ b/encoder/irc_vbr_storage_vbv.c
@@ -0,0 +1,368 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_trace_support.h"
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+
+typedef struct vbr_storage_vbv_t
+{
+ WORD32 i4_max_buf_size;
+ WORD32 i4_cur_buf_size;
+ WORD32 i4_max_bits_inflow_per_frm_period;
+ WORD32 i4_max_bits_per_tgt_frm;
+ /* Storing input variables */
+ WORD32 i4_max_bit_rate;
+ WORD32 i4_max_frame_rate;
+ /* Error bits calculation module */
+ error_bits_handle ps_error_bits;
+
+} vbr_storage_vbv_t;
+
+static void overflow_avoided_summation(WORD32 *pi4_accumulator, WORD32 i4_input)
+{
+ if((pi4_accumulator[0] > 0)
+ && (((int)0x7fffffff - pi4_accumulator[0]) < i4_input))
+ {
+ pi4_accumulator[0] = 0x7fffffff;
+ }
+ else if((pi4_accumulator[0] < 0)
+ && (((int)0x80000000 - pi4_accumulator[0]) > i4_input))
+ {
+ pi4_accumulator[0] = 0x80000000;
+ }
+ else
+ {
+ pi4_accumulator[0] += i4_input;
+ }
+}
+
+WORD32 irc_vbr_vbv_num_fill_use_free_memtab(vbr_storage_vbv_t **pps_vbr_storage_vbv,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static vbr_storage_vbv_t s_vbr_storage_vbv_temp;
+
+ /*
+ * Hack for al alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_vbr_storage_vbv) = &s_vbr_storage_vbv_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(vbr_storage_vbv_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_vbr_storage_vbv,
+ e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab(
+ &pps_vbr_storage_vbv[0]->ps_error_bits,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+ return (i4_mem_tab_idx);
+}
+
+void irc_init_vbr_vbv(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_max_bit_rate,
+ WORD32 i4_frm_rate,
+ WORD32 i4_max_vbv_buff_size)
+{
+ ps_vbr_storage_vbv->i4_max_buf_size = i4_max_vbv_buff_size;
+ ps_vbr_storage_vbv->i4_cur_buf_size = i4_max_vbv_buff_size;
+
+ /*
+ * Calculate the max number of bits that flow into the decoder
+ * in the interval of two frames
+ */
+ X_PROD_Y_DIV_Z(i4_max_bit_rate, 1000, i4_frm_rate,
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+ /* init error bits */
+ irc_init_error_bits(ps_vbr_storage_vbv->ps_error_bits, i4_frm_rate,
+ i4_max_bit_rate);
+
+ /* Storing the input values */
+ ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm =
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period;
+ ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate;
+ ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate;
+}
+
+void irc_update_vbr_vbv(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_total_bits_decoded)
+{
+ WORD32 i4_error_bits = irc_get_error_bits(
+ ps_vbr_storage_vbv->ps_error_bits);
+ /*
+ * In the time interval between two decoded frames the buffer would have been
+ * filled up by the max_bits_inflow_per_frm_period.
+ */
+ overflow_avoided_summation(
+ &ps_vbr_storage_vbv->i4_cur_buf_size,
+ (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+ + i4_error_bits));
+
+ if(ps_vbr_storage_vbv->i4_cur_buf_size
+ > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ ps_vbr_storage_vbv->i4_cur_buf_size =
+ ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ ps_vbr_storage_vbv->i4_cur_buf_size -= i4_total_bits_decoded;
+
+ /* Update the error bits state */
+ irc_update_error_bits(ps_vbr_storage_vbv->ps_error_bits);
+
+}
+
+WORD32 irc_get_max_target_bits(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size;
+ WORD32 i4_error_bits = irc_get_error_bits(
+ ps_vbr_storage_vbv->ps_error_bits);
+
+ /* The buffer size when the next frame is decoded */
+ overflow_avoided_summation(
+ &i4_cur_buf_size,
+ (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+ + i4_error_bits));
+ if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ /*
+ * Thus for the next frame the maximum number of bits the decoder can consume
+ * without underflow is i4_cur_buf_size
+ */
+ return i4_cur_buf_size;
+}
+
+/****************************************************************************
+ Function Name : irc_get_buffer_status
+ Description : Gets the state of VBV buffer
+ Inputs : Rate control API , header and texture bits
+ Outputs : 0 = normal, 1 = underflow, 2= overflow
+ Returns : vbv_buf_status_e
+ *****************************************************************************/
+vbv_buf_status_e irc_get_vbv_buffer_status(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_total_frame_bits,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow)
+{
+ vbv_buf_status_e e_buf_status;
+ WORD32 i4_cur_buf;
+ WORD32 i4_error_bits = irc_get_error_bits(
+ ps_vbr_storage_vbv->ps_error_bits);
+
+ /* error bits due to fixed point computation of drain rate*/
+ i4_cur_buf = ps_vbr_storage_vbv->i4_cur_buf_size;
+ overflow_avoided_summation(
+ &i4_cur_buf,
+ (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+ + i4_error_bits));
+
+ if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_cur_buf = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ pi4_num_bits_to_prevent_vbv_underflow[0] = i4_cur_buf;
+
+ i4_cur_buf -= i4_total_frame_bits;
+ if(i4_cur_buf < 0)
+ {
+ e_buf_status = VBV_UNDERFLOW;
+ }
+ else if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ e_buf_status = VBV_OVERFLOW;
+ }
+ else if(i4_cur_buf < (ps_vbr_storage_vbv->i4_max_buf_size >> 2))
+ {
+ e_buf_status = VBR_CAUTION;
+ }
+ else
+ {
+ e_buf_status = VBV_NORMAL;
+ }
+
+ return e_buf_status;
+}
+
+UWORD8 irc_restrict_swing_dvd_comp(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ UWORD8 u1_restrict_swing = 1;
+
+ if(ps_vbr_storage_vbv->i4_cur_buf_size
+ < (ps_vbr_storage_vbv->i4_max_buf_size >> 1))
+ {
+ u1_restrict_swing = 0;
+ }
+
+ return (u1_restrict_swing);
+}
+
+WORD32 irc_get_max_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_max_buf_size);
+}
+
+WORD32 irc_get_cur_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_cur_buf_size);
+}
+
+WORD32 irc_get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+}
+
+WORD32 irc_get_max_bits_per_tgt_frm(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm);
+}
+
+WORD32 irc_vbv_get_vbv_buf_fullness(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ UWORD32 u4_bits)
+{
+ WORD32 i4_error_bits = irc_get_error_bits(
+ ps_vbr_storage_vbv->ps_error_bits);
+ WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size;
+
+ overflow_avoided_summation(
+ &i4_cur_buf_size,
+ (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+ + i4_error_bits));
+
+ if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ i4_cur_buf_size -= u4_bits;
+
+ return (i4_cur_buf_size);
+}
+
+WORD32 irc_get_max_tgt_bits_dvd_comp(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_rem_bits_in_gop,
+ WORD32 i4_rem_frms_in_gop,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_dbf_max, i4_dbf_min, i4_dbf_prev, i4_vbv_size, i4_dbf_desired;
+ WORD32 i4_max_tgt_bits;
+
+ i4_vbv_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ i4_dbf_max = 95 * i4_vbv_size / 100;
+ i4_dbf_min = 10 * i4_vbv_size / 100;
+ i4_dbf_prev = ps_vbr_storage_vbv->i4_cur_buf_size;
+
+ if(i4_rem_bits_in_gop < 0)
+ i4_rem_bits_in_gop = 0;
+ if(i4_rem_frms_in_gop <= 0)
+ i4_rem_frms_in_gop = 1;
+
+ if(e_pic_type == I_PIC)
+ {
+ i4_dbf_desired = i4_dbf_min;
+ }
+ else
+ {
+ i4_dbf_desired = (i4_dbf_max - i4_rem_bits_in_gop / i4_rem_frms_in_gop
+ - i4_dbf_prev) / i4_rem_frms_in_gop;
+ i4_dbf_desired += i4_dbf_prev;
+ }
+
+ i4_dbf_prev += ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period;
+ if(i4_dbf_prev > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_dbf_prev = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ i4_max_tgt_bits = MAX(0, (i4_dbf_prev - i4_dbf_desired));
+ return (i4_max_tgt_bits);
+}
+
+void irc_change_vbr_vbv_frame_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_frm_rate)
+{
+ /*
+ * Calculate the max number of bits that flow into the decoder
+ * in the interval of two frames
+ */
+ X_PROD_Y_DIV_Z(ps_vbr_storage_vbv->i4_max_bit_rate, 1000, i4_frm_rate,
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+ /* Update the lower modules */
+ irc_change_frm_rate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits,
+ i4_frm_rate);
+ /* Storing the input values */
+ ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate;
+}
+
+void irc_change_vbr_vbv_bit_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_max_bit_rate)
+{
+ /*
+ * Calculate the max number of bits that flow into the decoder
+ * in the interval of two frames
+ */
+ X_PROD_Y_DIV_Z(i4_max_bit_rate, 1000, ps_vbr_storage_vbv->i4_max_frame_rate,
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+ /* update the lower modules */
+ irc_change_bitrate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits,
+ i4_max_bit_rate);
+
+ /* Storing the input values */
+ ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate;
+}
+
+void irc_change_vbr_max_bits_per_tgt_frm(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_tgt_frm_rate)
+{
+ /*
+ * Calculate the max number of bits that flow into the decoder
+ * in the interval of two frames
+ */
+ X_PROD_Y_DIV_Z(ps_vbr_storage_vbv->i4_max_bit_rate, 1000, i4_tgt_frm_rate,
+ ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm);
+
+}
diff --git a/encoder/irc_vbr_storage_vbv.h b/encoder/irc_vbr_storage_vbv.h
new file mode 100755
index 0000000..c53c66d
--- /dev/null
+++ b/encoder/irc_vbr_storage_vbv.h
@@ -0,0 +1,119 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _VBR_STORAGE_VBV_H_
+#define _VBR_STORAGE_VBV_H_
+/******************************************************************************
+VBR STORAGE (VBV):
+Max. buffer filling rate: Rmax
+Max. buffer size: Bmax (as specified by level and profile)
+Current Buffer Level: Bcur
+Frame Rate: F
+
+For a storage scenario, the initial buffer size is assumed to be max. For every
+frame the Maximum bits filled in to the buffer is given by Rmaxfrm = Rmax/F. If
+the buffer overflows then the buffer is thresholded to the max buffer size.
+
+ (overflow)
+ B(0) /|
+---|--------------/-|------------------------------ Bmax
+ | / |
+ | /|/ |
+ | /| / |
+ | / | /|/ |
+ |/ | / | /|
+ |/ |/ |
+ |
+ |
+-----------------------|---------------------------
+ |<->| |
+(1/F)=>1/frame_rate (underflow)
+
+
+ B"(i) - Bits in buffer just before decoding a frame.
+ B'(i) - Bits in buffer just after decoding a frame.
+
+
+ B(0) (initBuffer size) = Bmax.
+ B'(i) = B"(i) - bits_decoded
+ B"(i) = Min( Bmax, B'(i-1) + Rmaxfrm)
+
+Overflow Scenario: In VBR case, since we have only a max filling rate (or input bit rate)
+buffer overflow is not a issue (since the buffer filling rate can be reduced to any value
+below this rate)
+
+Underflow Scenario: B'(i) should always be > 0. If not then, the buffer underflows. To
+prevent this condition the number bits that needs to be decoded must be equal to B"(i)
+which is equal to Min( Bmax, B'(i-1) + Rmaxfrm)
+****************************************************************************************/
+
+typedef struct vbr_storage_vbv_t* vbr_storage_vbv_handle;
+
+WORD32 irc_vbr_vbv_num_fill_use_free_memtab(vbr_storage_vbv_handle *pps_vbr_storage_vbv,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/* Initalises the vbv buffer status */
+void irc_init_vbr_vbv(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 max_bit_rate, /* In bits/sec*/
+ WORD32 max_frm_rate, /* In frames/1000 sec*/
+ WORD32 i4_max_vbv_buff_size); /* in bits*/
+
+/* Updates the buffer after decoding a frame */
+void irc_update_vbr_vbv(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_total_bits_decoded);
+
+/* gets the max_number of bits that can be decoded out of the VBV without underflow */
+WORD32 irc_get_max_target_bits(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_max_bits_per_tgt_frm(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_cur_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+/* Queries the VBV buffer for the buffer status */
+vbv_buf_status_e irc_get_vbv_buffer_status(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_total_frame_bits,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow);
+
+UWORD8 irc_restrict_swing_dvd_comp(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_max_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_vbv_get_vbv_buf_fullness(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ UWORD32 u4_bits);
+
+WORD32 irc_get_max_tgt_bits_dvd_comp(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_rem_bits_in_gop,
+ WORD32 i4_rem_frms_in_gop,
+ picture_type_e e_pic_type);
+
+/* Changing input values at run time */
+void irc_change_vbr_vbv_bit_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_max_bit_rate);
+
+void irc_change_vbr_vbv_frame_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_frm_rate);
+
+void irc_change_vbr_max_bits_per_tgt_frm(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_tgt_frm_rate);
+#endif
+
diff --git a/encoder/irc_vbr_str_prms.c b/encoder/irc_vbr_str_prms.c
new file mode 100755
index 0000000..29055c2
--- /dev/null
+++ b/encoder/irc_vbr_str_prms.c
@@ -0,0 +1,199 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_vbr_str_prms.h"
+
+/******************************************************************************
+ Function Name : irc_init_vbv_str_prms
+ Description : Initializes and calculates the number of I frame and P frames
+ in the delay period
+ Return Values : void
+ *****************************************************************************/
+void irc_init_vbv_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frm_interval,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks,
+ UWORD32 u4_frms_in_delay_period)
+{
+
+ UWORD32 i4_num_i_frms_in_delay_per, i4_num_p_frms_in_delay_per;
+
+ p_vbr_str_prms->u4_frms_in_delay_prd = u4_frms_in_delay_period;
+ p_vbr_str_prms->u4_src_ticks = u4_src_ticks;
+ p_vbr_str_prms->u4_tgt_ticks = u4_tgt_ticks;
+ p_vbr_str_prms->u4_intra_frame_int = u4_intra_frm_interval;
+
+ /*
+ * Finding the number of I frames and P frames in delay period. This
+ * value along with the drain rates for the corresponding picture types will
+ * be used to calculate the buffer sizes
+ */
+ i4_num_i_frms_in_delay_per = ((u4_frms_in_delay_period * u4_src_ticks)
+ / (u4_intra_frm_interval * u4_tgt_ticks));
+
+ /* Ceiling the above result*/
+ if((i4_num_i_frms_in_delay_per * u4_intra_frm_interval * u4_tgt_ticks)
+ < (u4_frms_in_delay_period * u4_src_ticks))
+ {
+ i4_num_i_frms_in_delay_per++;
+
+ }
+ i4_num_p_frms_in_delay_per = u4_frms_in_delay_period
+ - i4_num_i_frms_in_delay_per;
+
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC] =
+ i4_num_i_frms_in_delay_per;
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC] =
+ i4_num_p_frms_in_delay_per;
+ p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks = (u4_intra_frm_interval
+ * (p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]))
+ * u4_tgt_ticks;
+ p_vbr_str_prms->u4_pic_num = 0;
+ p_vbr_str_prms->u4_cur_pos_in_src_ticks = 0;
+}
+
+WORD32 irc_get_vsp_num_pics_in_dly_prd(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 *pu4_num_pics_in_delay_prd)
+{
+ pu4_num_pics_in_delay_prd[I_PIC] =
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC];
+ pu4_num_pics_in_delay_prd[P_PIC] =
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC];
+ return (p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+
+/******************************************************************************
+ Function Name : irc_update_vbr_str_prms
+ Description : update the number of I frames and P/B frames in the delay period
+ for buffer size calculations
+ *****************************************************************************/
+void irc_update_vbr_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+ picture_type_e e_pic_type)
+{
+ /*
+ * Updating the number of I frames and P frames after encoding every
+ * picture. These values along with the drain rates for the corresponding
+ * picture types will be used to calculate the CBR buffer size every frame
+ */
+
+ if(e_pic_type == I_PIC)
+ {
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]--;
+ }
+ else
+ {
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC]--;
+ }
+
+ /* If the next I frame falls within the delay period, we need to increment
+ * the number of I frames in the period, else increment the number of P
+ * frames
+ */
+ if((p_vbr_str_prms->u4_cur_pos_in_src_ticks
+ + (p_vbr_str_prms->u4_frms_in_delay_prd
+ * p_vbr_str_prms->u4_src_ticks))
+ >= p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks)
+ {
+ p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks -=
+ p_vbr_str_prms->u4_cur_pos_in_src_ticks;
+ p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks +=
+ p_vbr_str_prms->u4_intra_frame_int
+ * p_vbr_str_prms->u4_tgt_ticks;
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]++;
+ p_vbr_str_prms->u4_pic_num = 0;
+ p_vbr_str_prms->u4_cur_pos_in_src_ticks = 0;
+ }
+ else
+ {
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC]++;
+ }
+ p_vbr_str_prms->u4_pic_num++;
+ p_vbr_str_prms->u4_cur_pos_in_src_ticks += p_vbr_str_prms->u4_src_ticks;
+}
+
+void irc_get_vsp_src_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 *pu4_src_ticks,
+ UWORD32 *pu4_tgt_ticks)
+{
+ pu4_src_ticks[0] = p_vbr_str_prms->u4_src_ticks;
+ pu4_tgt_ticks[0] = p_vbr_str_prms->u4_tgt_ticks;
+}
+
+/*******************************************************************************
+ Function Name : change_vbr_str_prms
+ Description : Takes in changes of Intra frame interval, source and target
+ ticks and recalculates the position of the next I frame
+ ******************************************************************************/
+void irc_change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frame_int)
+{
+ irc_init_vbv_str_prms(p_vbr_str_prms, u4_intra_frame_int,
+ p_vbr_str_prms->u4_src_ticks,
+ p_vbr_str_prms->u4_tgt_ticks,
+ p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+
+void irc_change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_tgt_ticks)
+{
+ UWORD32 u4_rem_intra_per_scaled;
+ UWORD32 u4_prev_tgt_ticks = p_vbr_str_prms->u4_tgt_ticks;
+
+ /*
+ * If the target frame rate is changed, recalculate the position of the next
+ * I frame based on the new target frame rate
+ * LIMITATIONS :
+ * Currently no support is available for dynamic change in source frame rate
+ */
+
+ u4_rem_intra_per_scaled = ((p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks
+ - p_vbr_str_prms->u4_cur_pos_in_src_ticks)
+ / u4_prev_tgt_ticks) * u4_tgt_ticks;
+
+ p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks = u4_rem_intra_per_scaled
+ + p_vbr_str_prms->u4_cur_pos_in_src_ticks;
+
+}
+
+void irc_change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_src_ticks)
+{
+ irc_init_vbv_str_prms(p_vbr_str_prms, p_vbr_str_prms->u4_intra_frame_int,
+ u4_src_ticks, p_vbr_str_prms->u4_tgt_ticks,
+ p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+
+void irc_change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_frms_in_delay_period)
+{
+ irc_init_vbv_str_prms(p_vbr_str_prms, p_vbr_str_prms->u4_intra_frame_int,
+ p_vbr_str_prms->u4_src_ticks,
+ p_vbr_str_prms->u4_tgt_ticks,
+ u4_frms_in_delay_period);
+}
diff --git a/encoder/irc_vbr_str_prms.h b/encoder/irc_vbr_str_prms.h
new file mode 100755
index 0000000..34301d8
--- /dev/null
+++ b/encoder/irc_vbr_str_prms.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _VBR_STR_PRMS_H_
+#define _VBR_STR_PRMS_H_
+
+typedef struct
+{
+ UWORD32 u4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+ UWORD32 u4_pic_num;
+ UWORD32 u4_intra_prd_pos_in_tgt_ticks;
+ UWORD32 u4_cur_pos_in_src_ticks;
+ UWORD32 u4_intra_frame_int;
+ UWORD32 u4_src_ticks;
+ UWORD32 u4_tgt_ticks;
+ UWORD32 u4_frms_in_delay_prd;
+} vbr_str_prms_t;
+
+void irc_init_vbv_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frm_interval,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks,
+ UWORD32 u4_frms_in_delay_period);
+
+WORD32 irc_get_vsp_num_pics_in_dly_prd(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 *pu4_num_pics_in_delay_prd);
+
+void irc_get_vsp_src_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 *pu4_src_ticks,
+ UWORD32 *pu4_tgt_ticks);
+
+void irc_update_vbr_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+ picture_type_e e_pic_type);
+
+void irc_change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frame_int);
+
+void irc_change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_tgt_ticks);
+
+void irc_change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_src_ticks);
+
+void irc_change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_frms_in_delay_period);
+
+#endif
+
diff --git a/encoder/ithread.h b/encoder/ithread.h
new file mode 100755
index 0000000..82170a5
--- /dev/null
+++ b/encoder/ithread.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ithread.h */
+/* */
+/* Description : This file contains all the necessary structure and */
+/* enumeration definitions needed for the Application */
+/* Program Interface(API) of the */
+/* Thread Abstraction Layer */
+/* */
+/* List of Functions : ithread_get_handle_size() */
+/* ithread_get_mutex_lock_size() */
+/* ithread_create() */
+/* ithread_exit() */
+/* ithread_join() */
+/* ithread_get_mutex_struct_size() */
+/* ithread_mutex_init() */
+/* ithread_mutex_destroy() */
+/* ithread_mutex_lock() */
+/* ithread_mutex_unlock() */
+/* ithread_yield() */
+/* ithread_sleep() */
+/* ithread_msleep() */
+/* ithread_usleep() */
+/* ithread_get_sem_struct_size() */
+/* ithread_sem_init() */
+/* ithread_sem_post() */
+/* ithread_sem_wait() */
+/* ithread_sem_destroy() */
+/* ithread_set_affinity() */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 06 09 2012 Harish Initial Version */
+/* */
+/*****************************************************************************/
+
+#ifndef _ITHREAD_H_
+#define _ITHREAD_H_
+
+UWORD32 ithread_get_handle_size(void);
+
+UWORD32 ithread_get_mutex_lock_size(void);
+
+WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument);
+
+void ithread_exit(void *val_ptr);
+
+WORD32 ithread_join(void *thread_id, void ** val_ptr);
+
+WORD32 ithread_get_mutex_struct_size(void);
+
+WORD32 ithread_mutex_init(void *mutex);
+
+WORD32 ithread_mutex_destroy(void *mutex);
+
+WORD32 ithread_mutex_lock(void *mutex);
+
+WORD32 ithread_mutex_unlock(void *mutex);
+
+void ithread_yield(void);
+
+void ithread_sleep(UWORD32 u4_time);
+
+void ithread_msleep(UWORD32 u4_time_ms);
+
+void ithread_usleep(UWORD32 u4_time_us);
+
+UWORD32 ithread_get_sem_struct_size(void);
+
+WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value);
+
+WORD32 ithread_sem_post(void *sem);
+
+WORD32 ithread_sem_wait(void *sem);
+
+WORD32 ithread_sem_destroy(void *sem);
+
+WORD32 ithread_set_affinity(WORD32 core_id);
+#endif /* _ITHREAD_H_ */
diff --git a/encoder/iv2.h b/encoder/iv2.h
new file mode 100755
index 0000000..538bb1e
--- /dev/null
+++ b/encoder/iv2.h
@@ -0,0 +1,386 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* iv2.h
+*
+* @brief
+* This file contains all the necessary structure and enumeration
+* definitions needed for the Application Program Interface(API) of the
+* Ittiam Video codecs This is version 2 of Ittiam Video API
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IV2_H_
+#define _IV2_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define IV_MAX_RAW_COMPONENTS 4
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+
+/** Function status */
+typedef enum{
+ IV_STATUS_NA = 0x7FFFFFFF,
+ IV_SUCCESS = 0x0,
+ IV_FAIL = 0x1,
+}IV_STATUS_T;
+
+
+/** Defines the types of memory */
+typedef enum {
+ IV_NA_MEM_TYPE = 0x7FFFFFFF,
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM = 0x0,
+ IV_EXTERNAL_CACHEABLE_SCRATCH_MEM = 0x1,
+ IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x2,
+ IV_EXTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x3,
+ IV_INTERNAL_CACHEABLE_PERSISTENT_MEM = 0x10,
+ IV_INTERNAL_CACHEABLE_SCRATCH_MEM = 0x11,
+ IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x12,
+ IV_INTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x13,
+}IV_MEM_TYPE_T;
+
+/* The color formats used in video/image codecs */
+
+typedef enum {
+ IV_CHROMA_NA = 0x7FFFFFFF,
+ IV_YUV_420P = 0x0,
+ IV_YUV_420SP_UV = 0x1,
+ IV_YUV_420SP_VU = 0x2,
+
+ IV_YUV_422P = 0x10,
+ IV_YUV_422IBE = 0x11,
+ IV_YUV_422ILE = 0x12,
+
+ IV_YUV_444P = 0x20,
+ IV_YUV_411P = 0x21,
+
+ IV_GRAY = 0x30,
+
+ IV_RGB_565 = 0x31,
+ IV_RGB_24 = 0x32,
+ IV_RGBA_8888 = 0x33
+}IV_COLOR_FORMAT_T;
+
+/** Frame/Field coding types */
+typedef enum {
+ IV_NA_FRAME = 0x7FFFFFFF,
+ IV_I_FRAME = 0x0,
+ IV_P_FRAME = 0x1,
+ IV_B_FRAME = 0x2,
+ IV_IDR_FRAME = 0x3,
+ IV_II_FRAME = 0x4,
+ IV_IP_FRAME = 0x5,
+ IV_IB_FRAME = 0x6,
+ IV_PI_FRAME = 0x7,
+ IV_PP_FRAME = 0x8,
+ IV_PB_FRAME = 0x9,
+ IV_BI_FRAME = 0xa,
+ IV_BP_FRAME = 0xb,
+ IV_BB_FRAME = 0xc,
+ IV_MBAFF_I_FRAME = 0xd,
+ IV_MBAFF_P_FRAME = 0xe,
+ IV_MBAFF_B_FRAME = 0xf,
+ IV_MBAFF_IDR_FRAME = 0x10,
+ IV_NOT_CODED_FRAME = 0x11,
+ IV_FRAMETYPE_DEFAULT = IV_I_FRAME
+}IV_PICTURE_CODING_TYPE_T;
+
+/** Field type */
+typedef enum {
+ IV_NA_FLD = 0x7FFFFFFF,
+ IV_TOP_FLD = 0x0,
+ IV_BOT_FLD = 0x1,
+ IV_FLD_TYPE_DEFAULT = IV_TOP_FLD
+}IV_FLD_TYPE_T;
+
+/** Video content type progressive/interlaced etc */
+typedef enum {
+ IV_CONTENTTYPE_NA = 0x7FFFFFFF,
+ IV_PROGRESSIVE = 0x0,
+ IV_INTERLACED = 0x1,
+ IV_PROGRESSIVE_FRAME = 0x2,
+ IV_INTERLACED_FRAME = 0x3,
+ IV_INTERLACED_TOPFIELD = 0x4,
+ IV_INTERLACED_BOTTOMFIELD = 0x5,
+ IV_CONTENTTYPE_DEFAULT = IV_PROGRESSIVE,
+}IV_CONTENT_TYPE_T;
+
+/** Profile */
+typedef enum
+{
+ IV_PROFILE_NA = 0x7FFFFFFF,
+ IV_PROFILE_BASE = 0x0,
+ IV_PROFILE_MAIN = 0x1,
+ IV_PROFILE_HIGH = 0x2,
+
+
+ IV_PROFILE_SIMPLE = 0x100,
+ IV_PROFILE_ADVSIMPLE = 0x101,
+ IV_PROFILE_DEFAULT = IV_PROFILE_BASE,
+}IV_PROFILE_T;
+
+
+/** Architecture Enumeration */
+typedef enum
+{
+ ARCH_NA = 0x7FFFFFFF,
+ ARCH_ARM_NONEON = 0x0,
+ ARCH_ARM_A9Q,
+ ARCH_ARM_A9A,
+ ARCH_ARM_A9,
+ ARCH_ARM_A7,
+ ARCH_ARM_A5,
+ ARCH_ARM_A15,
+ ARCH_ARM_NEONINTR,
+ ARCH_X86_GENERIC,
+ ARCH_X86_SSSE3,
+ ARCH_X86_SSE42,
+ ARCH_ARM_A53,
+ ARCH_ARM_A57,
+ ARCH_ARM_V8_NEON
+}IV_ARCH_T;
+
+/** SOC Enumeration */
+typedef enum
+{
+ SOC_NA = 0x7FFFFFFF,
+ SOC_GENERIC = 0x0,
+ SOC_HISI_37X
+}IV_SOC_T;
+
+
+/** API command type */
+typedef enum {
+ IV_CMD_NA = 0x7FFFFFFF,
+ IV_CMD_GET_NUM_MEM_REC = 0x0,
+ IV_CMD_FILL_NUM_MEM_REC = 0x1,
+ IV_CMD_RETRIEVE_MEMREC = 0x2,
+ IV_CMD_INIT = 0x3,
+ /* Do not add anything after the following entry */
+ IV_CMD_EXTENSIONS = 0x100
+}IV_API_COMMAND_TYPE_T;
+
+/*****************************************************************************/
+/* Structure Definitions */
+/*****************************************************************************/
+
+/** This structure defines the handle for the codec instance */
+
+typedef struct{
+ /** size of the structure */
+ UWORD32 u4_size;
+ /** Pointer to the API function pointer table of the codec */
+ void *pv_fxns;
+ /** Pointer to the handle of the codec */
+ void *pv_codec_handle;
+}iv_obj_t;
+
+/** This structure defines the memory record holder which will *
+ * be used by the codec to communicate its memory requirements to the *
+ * application through appropriate API functions */
+
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+ /** Pointer to the memory allocated by the application */
+ void *pv_base;
+ /** u4_size of the memory to be allocated */
+ UWORD32 u4_mem_size;
+ /** Alignment of the memory pointer */
+ UWORD32 u4_mem_alignment;
+ /** Type of the memory to be allocated */
+ IV_MEM_TYPE_T e_mem_type;
+}iv_mem_rec_t;
+
+/** This structure defines attributes for the raw buffer */
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Color format */
+ IV_COLOR_FORMAT_T e_color_fmt;
+
+ /** Pointer to each component */
+ void *apv_bufs[IV_MAX_RAW_COMPONENTS];
+
+ /** Width of each component */
+ UWORD32 au4_wd[IV_MAX_RAW_COMPONENTS];
+
+ /** Height of each component */
+ UWORD32 au4_ht[IV_MAX_RAW_COMPONENTS];
+
+ /** Stride of each component */
+ UWORD32 au4_strd[IV_MAX_RAW_COMPONENTS];
+
+}iv_raw_buf_t;
+
+/** This structure defines attributes for the bitstream buffer */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Pointer to buffer */
+ void *pv_buf;
+
+ /** Number of valid bytes in the buffer */
+ UWORD32 u4_bytes;
+
+ /** Allocated size of the buffer */
+ UWORD32 u4_bufsize;
+
+}iv_bits_buf_t;
+/*****************************************************************************/
+/* Get Number of Memory Records */
+/*****************************************************************************/
+
+/** Input structure : Get number of memory records */
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type */
+ IV_API_COMMAND_TYPE_T e_cmd;
+}iv_num_mem_rec_ip_t;
+
+/** Output structure : Get number of memory records */
+typedef struct{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Number of memory records that will be used by the codec */
+ UWORD32 u4_num_mem_rec;
+}iv_num_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Fill Memory Records */
+/*****************************************************************************/
+
+/** Input structure : Fill memory records */
+
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type */
+ IV_API_COMMAND_TYPE_T e_cmd;
+
+ /** Number of memory records */
+ UWORD32 u4_num_mem_rec;
+
+ /** pointer to array of memrecords structures should be filled by codec
+ with details of memory resource requirements */
+ iv_mem_rec_t *ps_mem_rec;
+
+ /** maximum width for which codec should request memory requirements */
+ UWORD32 u4_max_wd;
+
+ /** maximum height for which codec should request memory requirements*/
+ UWORD32 u4_max_ht;
+
+ /** Maximum number of reference frames */
+ UWORD32 u4_max_ref_cnt;
+
+ /** Maximum number of reorder frames */
+ UWORD32 u4_max_reorder_cnt;
+
+ /** Maximum level supported */
+ UWORD32 u4_max_level;
+
+ /** Color format that codec supports for input/output */
+ IV_COLOR_FORMAT_T e_color_format;
+
+ /** Maximum search range to be used in X direction */
+ UWORD32 u4_max_srch_rng_x;
+
+ /** Maximum search range to be used in Y direction */
+ UWORD32 u4_max_srch_rng_y;
+
+}iv_fill_mem_rec_ip_t;
+
+
+/** Output structure : Fill memory records */
+typedef struct{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** no of memory record structures which are filled by codec */
+ UWORD32 u4_num_mem_rec;
+}iv_fill_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Retrieve Memory Records */
+/*****************************************************************************/
+
+/** Input structure : Retrieve memory records */
+
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type */
+ IV_API_COMMAND_TYPE_T e_cmd;
+
+ /** array of structures where codec should fill with all memory requested earlier */
+ iv_mem_rec_t *ps_mem_rec;
+}iv_retrieve_mem_rec_ip_t;
+
+
+typedef struct{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** no of memory record structures which are filled by codec */
+ UWORD32 u4_num_mem_rec_filled;
+}iv_retrieve_mem_rec_op_t;
+
+#endif /* _IV2_H_ */
+
diff --git a/encoder/ive2.h b/encoder/ive2.h
new file mode 100755
index 0000000..8cb0fd1
--- /dev/null
+++ b/encoder/ive2.h
@@ -0,0 +1,1445 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ive2.h
+*
+* @brief
+* This file contains all the necessary structure and enumeration
+* definitions needed for the Application Program Interface(API) of the
+* Ittiam Video Encoders This is version 2
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IVE2_H_
+#define _IVE2_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/** Maximum number of components in I/O Buffers */
+#define IVE_MAX_IO_BUFFER_COMPONENTS 4
+
+/** Maximum number of reference pictures */
+#define IVE_MAX_REF 16
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/** Slice modes */
+typedef enum
+{
+ IVE_SLICE_MODE_NA = 0x7FFFFFFF,
+ IVE_SLICE_MODE_NONE = 0x0,
+
+ IVE_SLICE_MODE_BYTES = 0x1,
+ IVE_SLICE_MODE_BLOCKS = 0x2,
+}IVE_SLICE_MODE_T;
+
+/** Adaptive Intra refresh modes */
+typedef enum
+{
+ IVE_AIR_MODE_NA = 0x7FFFFFFF,
+ IVE_AIR_MODE_NONE = 0x0,
+ IVE_AIR_MODE_CYCLIC = 0x1,
+ IVE_AIR_MODE_RANDOM = 0x2,
+ IVE_AIR_MODE_DISTORTION = 0x3,
+}IVE_AIR_MODE_T;
+
+/** Rate control modes */
+typedef enum
+{
+ IVE_RC_NA = 0x7FFFFFFF,
+ IVE_RC_NONE = 0x0,
+ IVE_RC_STORAGE = 0x1,
+ IVE_RC_CBR_NON_LOW_DELAY = 0x2,
+ IVE_RC_CBR_LOW_DELAY = 0x3,
+ IVE_RC_TWOPASS = 0x4,
+ IVE_RC_RATECONTROLPRESET_DEFAULT = IVE_RC_STORAGE
+}IVE_RC_MODE_T;
+
+/** Encoder mode */
+typedef enum
+{
+ IVE_ENC_MODE_NA = 0x7FFFFFFF,
+ IVE_ENC_MODE_HEADER = 0x1,
+ IVE_ENC_MODE_PICTURE = 0x0,
+ IVE_ENC_MODE_DEFAULT = IVE_ENC_MODE_PICTURE,
+}IVE_ENC_MODE_T;
+
+/** Speed Config */
+typedef enum IVE_SPEED_CONFIG
+{
+ IVE_QUALITY_DUMMY = 0x7FFFFFFF,
+ IVE_CONFIG = 0,
+ IVE_SLOWEST = 1,
+ IVE_NORMAL = 2,
+ IVE_FAST = 3,
+ IVE_HIGH_SPEED = 4,
+ IVE_FASTEST = 5,
+}IVE_SPEED_CONFIG;
+
+/** API command type */
+typedef enum
+{
+ IVE_CMD_VIDEO_NA = 0x7FFFFFFF,
+ IVE_CMD_VIDEO_CTL = IV_CMD_EXTENSIONS + 1,
+ IVE_CMD_VIDEO_ENCODE,
+ IVE_CMD_QUEUE_INPUT,
+ IVE_CMD_DEQUEUE_INPUT,
+ IVE_CMD_QUEUE_OUTPUT,
+ IVE_CMD_DEQUEUE_OUTPUT,
+ IVE_CMD_GET_RECON,
+}IVE_API_COMMAND_TYPE_T;
+
+/** Video Control API command type */
+typedef enum
+{
+ IVE_CMD_CT_NA = 0x7FFFFFFF,
+ IVE_CMD_CTL_SETDEFAULT = 0x0,
+ IVE_CMD_CTL_SET_DIMENSIONS = 0x1,
+ IVE_CMD_CTL_SET_FRAMERATE = 0x2,
+ IVE_CMD_CTL_SET_BITRATE = 0x3,
+ IVE_CMD_CTL_SET_FRAMETYPE = 0x4,
+ IVE_CMD_CTL_SET_QP = 0x5,
+ IVE_CMD_CTL_SET_ENC_MODE = 0x6,
+ IVE_CMD_CTL_SET_VBV_PARAMS = 0x7,
+ IVE_CMD_CTL_SET_AIR_PARAMS = 0x8,
+ IVE_CMD_CTL_SET_ME_PARAMS = 0X9,
+ IVE_CMD_CTL_SET_GOP_PARAMS = 0XA,
+ IVE_CMD_CTL_SET_PROFILE_PARAMS = 0XB,
+ IVE_CMD_CTL_SET_DEBLOCK_PARAMS = 0XC,
+ IVE_CMD_CTL_SET_IPE_PARAMS = 0XD,
+ IVE_CMD_CTL_SET_NUM_CORES = 0x30,
+ IVE_CMD_CTL_RESET = 0xA0,
+ IVE_CMD_CTL_FLUSH = 0xB0,
+ IVE_CMD_CTL_GETBUFINFO = 0xC0,
+ IVE_CMD_CTL_GETVERSION = 0xC1,
+ IVE_CMD_CTL_CODEC_SUBCMD_START = 0x100,
+}IVE_CONTROL_API_COMMAND_TYPE_T;
+
+/* IVE_ERROR_BITS_T: A UWORD32 container will be used for reporting the error*/
+/* code to the application. The first 8 bits starting from LSB have been */
+/* reserved for the codec to report internal error details. The rest of the */
+/* bits will be generic for all video encoders and each bit has an associated*/
+/* meaning as mentioned below. The unused bit fields are reserved for future */
+/* extenstions and will be zero in the current implementation */
+typedef enum {
+
+ /* Bit 8 - Unsupported input parameter or configuration. */
+ IVE_UNSUPPORTEDPARAM = 0x8,
+
+ /* Bit 9 - Fatal error (stop the codec).If there is an */
+ /* error and this bit is not set, the error is a recoverable one. */
+ IVE_FATALERROR = 0x9,
+
+ IVE_ERROR_BITS_T_DUMMY_ELEMENT = 0x7FFFFFFF
+}IVE_ERROR_BITS_T;
+
+/* IVE_ERROR_CODES_T: The list of error codes depicting the possible error */
+/* scenarios that can be encountered while encoding */
+typedef enum
+{
+
+ IVE_ERR_NA = 0x7FFFFFFF,
+ IVE_ERR_NONE = 0x00,
+ IVE_ERR_INVALID_API_CMD = 0x01,
+ IVE_ERR_INVALID_API_SUB_CMD = 0x02,
+ IVE_ERR_IP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x03,
+ IVE_ERR_OP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x04,
+ IVE_ERR_IP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x05,
+ IVE_ERR_OP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x06,
+ IVE_ERR_IP_INIT_API_STRUCT_SIZE_INCORRECT = 0x07,
+ IVE_ERR_OP_INIT_API_STRUCT_SIZE_INCORRECT = 0x08,
+ IVE_ERR_IP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x09,
+ IVE_ERR_OP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x0A,
+ IVE_ERR_IP_ENCODE_API_STRUCT_SIZE_INCORRECT = 0x0B,
+ IVE_ERR_OP_ENCODE_API_STRUCT_SIZE_INCORRECT = 0x0C,
+ IVE_ERR_IP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT = 0x0D,
+ IVE_ERR_OP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT = 0x0E,
+ IVE_ERR_IP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT = 0x0F,
+ IVE_ERR_OP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT = 0x10,
+ IVE_ERR_IP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT = 0x11,
+ IVE_ERR_OP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT = 0x12,
+ IVE_ERR_IP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT = 0x13,
+ IVE_ERR_OP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT = 0x14,
+ IVE_ERR_IP_CTL_RESET_API_STRUCT_SIZE_INCORRECT = 0x15,
+ IVE_ERR_OP_CTL_RESET_API_STRUCT_SIZE_INCORRECT = 0x16,
+ IVE_ERR_IP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT = 0x17,
+ IVE_ERR_OP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT = 0x18,
+ IVE_ERR_IP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT = 0x19,
+ IVE_ERR_OP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT = 0x1A,
+ IVE_ERR_IP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT = 0x1B,
+ IVE_ERR_OP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT = 0x1C,
+ IVE_ERR_IP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT = 0x1D,
+ IVE_ERR_OP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT = 0x1E,
+ IVE_ERR_IP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT = 0x1F,
+ IVE_ERR_OP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT = 0x20,
+ IVE_ERR_IP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x21,
+ IVE_ERR_OP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x22,
+ IVE_ERR_IP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x23,
+ IVE_ERR_OP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x24,
+ IVE_ERR_IP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x25,
+ IVE_ERR_OP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x26,
+ IVE_ERR_IP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT = 0x27,
+ IVE_ERR_OP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT = 0x28,
+ IVE_ERR_IP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x29,
+ IVE_ERR_OP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x2A,
+ IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL = 0x2B,
+ IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT = 0x2C,
+ IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT = 0x2D,
+ IVE_ERR_MEM_REC_BASE_POINTER_NULL = 0x2E,
+ IVE_ERR_MEM_REC_OVERLAP_ERR = 0x2F,
+ IVE_ERR_MEM_REC_INSUFFICIENT_SIZE = 0x30,
+ IVE_ERR_MEM_REC_ALIGNMENT_ERR = 0x31,
+ IVE_ERR_MEM_REC_INCORRECT_TYPE = 0x32,
+ IVE_ERR_HANDLE_NULL = 0x33,
+ IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT = 0x34,
+ IVE_ERR_API_FUNCTION_PTR_NULL = 0x35,
+ IVE_ERR_INVALID_CODEC_HANDLE = 0x36,
+ IVE_ERR_CTL_GET_VERSION_BUFFER_IS_NULL = 0x37,
+ IVE_ERR_IP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT = 0x38,
+ IVE_ERR_OP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT = 0x39,
+ IVE_ERR_IP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT = 0x3A,
+ IVE_ERR_OP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT = 0x3B,
+ IVE_ERR_IP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT = 0x3C,
+ IVE_ERR_OP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT = 0x3D,
+ IVE_ERR_IP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT = 0x3E,
+ IVE_ERR_OP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT = 0x3F,
+
+}IVE_ERROR_CODES_T;
+
+
+/*****************************************************************************/
+/* Initialize encoder */
+/*****************************************************************************/
+
+/** Input structure : Initialize the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type */
+ IV_API_COMMAND_TYPE_T e_cmd;
+
+ /** Number of memory records */
+ UWORD32 u4_num_mem_rec;
+
+ /** pointer to array of memrecords structures should be filled by codec
+ with details of memory resource requirements */
+ iv_mem_rec_t *ps_mem_rec;
+
+ /** maximum width for which codec should request memory requirements */
+ UWORD32 u4_max_wd;
+
+ /** maximum height for which codec should request memory requirements */
+ UWORD32 u4_max_ht;
+
+ /** Maximum number of reference frames */
+ UWORD32 u4_max_ref_cnt;
+
+ /** Maximum number of reorder frames */
+ UWORD32 u4_max_reorder_cnt;
+
+ /** Maximum level supported */
+ UWORD32 u4_max_level;
+
+ /** Input color format */
+ IV_COLOR_FORMAT_T e_inp_color_fmt;
+
+ /** Flag to enable/disable - To be used only for debugging/testing */
+ UWORD32 u4_enable_recon;
+
+ /** Recon color format */
+ IV_COLOR_FORMAT_T e_recon_color_fmt;
+
+ /** Rate control mode */
+ IVE_RC_MODE_T e_rc_mode;
+
+ /** Maximum frame rate to be supported */
+ UWORD32 u4_max_framerate;
+
+ /** Maximum bitrate to be supported */
+ UWORD32 u4_max_bitrate;
+
+ /** Maximum number of consecutive B frames */
+ UWORD32 u4_max_num_bframes;
+
+ /** Content type Interlaced/Progressive */
+ IV_CONTENT_TYPE_T e_content_type;
+
+ /** Maximum search range to be used in X direction */
+ UWORD32 u4_max_srch_rng_x;
+
+ /** Maximum search range to be used in Y direction */
+ UWORD32 u4_max_srch_rng_y;
+
+ /** Slice Mode */
+ IVE_SLICE_MODE_T e_slice_mode;
+
+ /** Slice parameter */
+ UWORD32 u4_slice_param;
+
+ /** Processor architecture */
+ IV_ARCH_T e_arch;
+
+ /** SOC details */
+ IV_SOC_T e_soc;
+
+
+}ive_init_ip_t;
+
+/** Output structure : Initialize the encoder */
+typedef struct
+{
+ /** Size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_init_op_t;
+
+
+/*****************************************************************************/
+/* Video Encode - Deprecated */
+/*****************************************************************************/
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Descriptor for input raw buffer */
+ iv_raw_buf_t s_inp_buf;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_bufs;
+
+ /** Flag to indicate if mb info is sent along with input buffer */
+ UWORD32 u4_mb_info_type;
+
+ /** Buffer containing mb info if mb_info_type is non-zero */
+ void *pv_mb_info;
+
+ /** Flag to indicate if pic info is sent along with input buffer */
+ UWORD32 u4_pic_info_type;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_pic_info;
+
+ /** Lower 32bits of input time stamp */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of input time stamp */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if this is the last input in the stream */
+ UWORD32 u4_is_last;
+
+ /** Descriptor for output bit-stream buffer */
+ iv_bits_buf_t s_out_buf;
+
+ /** Descriptor for recon buffer */
+ iv_raw_buf_t s_recon_buf;
+
+}ive_video_encode_ip_t;
+
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** error code */
+ UWORD32 u4_error_code;
+
+ /* Output present */
+ WORD32 output_present;
+
+ /* dump recon */
+ WORD32 dump_recon;
+
+ /* encoded frame type */
+ UWORD32 u4_encoded_frame_type;
+
+ /** Descriptor for input raw buffer freed from codec */
+ iv_raw_buf_t s_inp_buf;
+
+ /** Descriptor for output bit-stream buffer */
+ iv_bits_buf_t s_out_buf;
+
+ /** Descriptor for recon buffer */
+ iv_raw_buf_t s_recon_buf;
+
+}ive_video_encode_op_t;
+
+/*****************************************************************************/
+/* Queue Input raw buffer - Send the YUV buffer to be encoded */
+/*****************************************************************************/
+/** Input structure : Queue input buffer to the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command : IVE_CMD_QUEUE_INPUT */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Descriptor for input raw buffer */
+ iv_raw_buf_t s_inp_buf;
+
+ /** Flag to indicate if mb info is sent along with input buffer */
+ UWORD32 u4_mb_info_type;
+
+ /** Flag to indicate the size of mb info structure */
+ UWORD32 u4_mb_info_size;
+
+ /** Buffer containing mb info if mb_info_type is non-zero */
+ void *pv_mb_info;
+
+ /** Flag to indicate if pic info is sent along with input buffer */
+ UWORD32 u4_pic_info_type;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_pic_info;
+
+ /** Lower 32bits of input time stamp */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of input time stamp */
+ UWORD32 u4_timestamp_high;
+
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+
+ /** Flag to indicate if this is the last input in the stream */
+ UWORD32 u4_is_last;
+
+}ive_queue_inp_ip_t;
+
+/** Input structure : Queue output buffer to the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_queue_inp_op_t;
+
+/*****************************************************************************/
+/* Dequeue Input raw buffer - Get free YUV buffer from the encoder */
+/*****************************************************************************/
+/** Input structure : Dequeue input buffer from the encoder */
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command: IVE_CMD_DEQUEUE_INPUT */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+
+}ive_dequeue_inp_ip_t;
+
+/** Output structure : Dequeue input buffer from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Buffer descriptor of the buffer returned from encoder */
+ iv_raw_buf_t s_inp_buf;
+
+ /** Flag to indicate if mb info is sent along with input buffer */
+ UWORD32 u4_mb_info_type;
+
+ /** Flag to indicate the size of mb info structure */
+ UWORD32 u4_mb_info_size;
+
+ /** Buffer containing mb info if mb_info_type is non-zero */
+ void *pv_mb_info;
+
+ /** Flag to indicate if pic info is sent along with input buffer */
+ UWORD32 u4_pic_info_type;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_pic_info;
+
+ /** Lower 32bits of input time stamp */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of input time stamp */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if this is the last input in the stream */
+ UWORD32 u4_is_last;
+
+
+}ive_dequeue_inp_op_t;
+
+/*****************************************************************************/
+/* Queue Output bitstream buffer - Send the bistream buffer to be filled */
+/*****************************************************************************/
+/** Input structure : Queue output buffer to the encoder */
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command : IVE_CMD_QUEUE_OUTPUT */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Descriptor for output bit-stream buffer */
+ iv_bits_buf_t s_out_buf;
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+
+ /** Flag to indicate if this is the last output in the stream */
+ UWORD32 u4_is_last;
+
+}ive_queue_out_ip_t;
+
+/** Output structure : Queue output buffer to the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+}ive_queue_out_op_t;
+
+
+/*****************************************************************************/
+/* Dequeue Output bitstream buffer - Get the bistream buffer filled */
+/*****************************************************************************/
+/** Input structure : Dequeue output buffer from the encoder */
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command : IVE_CMD_DEQUEUE_OUTPUT */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+}ive_dequeue_out_ip_t;
+
+/** Output structure : Dequeue output buffer from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Descriptor for output bit-stream buffer */
+ iv_bits_buf_t s_out_buf;
+
+ /** Lower 32bits of timestamp corresponding to this buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of timestamp corresponding to this buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if this is the last output in the stream */
+ UWORD32 u4_is_last;
+
+}ive_dequeue_out_op_t;
+
+/*****************************************************************************/
+/* Get Recon data - Get the reconstructed data from encoder */
+/*****************************************************************************/
+/** Input structure : Get recon data from the encoder */
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command : IVE_CMD_GET_RECON */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+
+ /** Descriptor for recon buffer */
+ iv_raw_buf_t s_recon_buf;
+
+ /** Flag to indicate if this is the last recon in the stream */
+ UWORD32 u4_is_last;
+
+}ive_get_recon_ip_t;
+
+/** Output structure : Get recon data from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Lower 32bits of time stamp corresponding to this buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to this buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if this is the last recon in the stream */
+ UWORD32 u4_is_last;
+
+}ive_get_recon_op_t;
+
+/*****************************************************************************/
+/* Video control Flush */
+/*****************************************************************************/
+
+/** Input structure : Flush all the buffers from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_FLUSH */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+}ive_ctl_flush_ip_t;
+
+/** Output structure : Flush all the buffers from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_flush_op_t;
+
+/*****************************************************************************/
+/* Video control reset */
+/*****************************************************************************/
+/** Input structure : Reset the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_RESET */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+}ive_ctl_reset_ip_t;
+
+/** Output structure : Reset the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_reset_op_t;
+
+/*****************************************************************************/
+/* Video control:Get Buf Info */
+/*****************************************************************************/
+
+/** Input structure : Get encoder buffer requirements */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_GETBUFINFO */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** maximum width for which codec should request memory requirements */
+ UWORD32 u4_max_wd;
+
+ /** maximum height for which codec should request memory requirements */
+ UWORD32 u4_max_ht;
+
+ /** Input color format */
+ IV_COLOR_FORMAT_T e_inp_color_fmt;
+
+}ive_ctl_getbufinfo_ip_t;
+
+/** Output structure : Get encoder buffer requirements */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Minimum number of input buffers required for codec */
+ UWORD32 u4_min_inp_bufs;
+
+ /** Minimum number of output buffers required for codec */
+ UWORD32 u4_min_out_bufs;
+
+ /** Number of components in input buffers required for codec */
+ UWORD32 u4_inp_comp_cnt;
+
+ /** Number of components in output buffers required for codec */
+ UWORD32 u4_out_comp_cnt;
+
+ /** Minimum sizes of each component in input buffer required */
+ UWORD32 au4_min_in_buf_size[IVE_MAX_IO_BUFFER_COMPONENTS];
+
+ /** Minimum sizes of each component in output buffer required */
+ UWORD32 au4_min_out_buf_size[IVE_MAX_IO_BUFFER_COMPONENTS];
+
+}ive_ctl_getbufinfo_op_t;
+
+
+
+
+/*****************************************************************************/
+/* Video control:Get Version Info */
+/*****************************************************************************/
+
+/** Input structure : Get encoder version information */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_GETVERSION */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Buffer where version info will be returned */
+ UWORD8 *pu1_version;
+
+ /** Size of the buffer allocated for version info */
+ UWORD32 u4_version_bufsize;
+}ive_ctl_getversioninfo_ip_t;
+
+/** Output structure : Get encoder version information */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_getversioninfo_op_t;
+
+
+/*****************************************************************************/
+/* Video control:set default params */
+/*****************************************************************************/
+/** Input structure : Set default encoder parameters */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SETDEFAULT */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_setdefault_ip_t;
+
+/** Output structure : Set default encoder parameters */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_setdefault_op_t;
+
+/*****************************************************************************/
+/* Video control Set Frame dimensions */
+/*****************************************************************************/
+
+/** Input structure : Set frame dimensions */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_DIMENSIONS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Input width */
+ UWORD32 u4_wd;
+
+ /** Input height */
+ UWORD32 u4_ht;
+
+ /** Input stride */
+ UWORD32 u4_strd;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_dimensions_ip_t;
+
+/** Output structure : Set frame dimensions */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_dimensions_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Frame rates */
+/*****************************************************************************/
+
+/** Input structure : Set frame rate */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_FRAMERATE */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Source frame rate */
+ UWORD32 u4_src_frame_rate;
+
+ /** Target frame rate */
+ UWORD32 u4_tgt_frame_rate;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_frame_rate_ip_t;
+
+/** Output structure : Set frame rate */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_frame_rate_op_t;
+
+/*****************************************************************************/
+/* Video control Set Bitrate */
+/*****************************************************************************/
+
+/** Input structure : Set bitrate */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_BITRATE */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Target bitrate in kilobits per second */
+ UWORD32 u4_target_bitrate;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_bitrate_ip_t;
+
+/** Output structure : Set bitrate */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_bitrate_op_t;
+
+/*****************************************************************************/
+/* Video control Set Frame type */
+/*****************************************************************************/
+
+/** Input structure : Set frametype */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_FRAMETYPE */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Force current frame type */
+ IV_PICTURE_CODING_TYPE_T e_frame_type;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_frame_type_ip_t;
+
+/** Output structure : Set frametype */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_frame_type_op_t;
+
+/*****************************************************************************/
+/* Video control Set Encode mode */
+/*****************************************************************************/
+
+/** Input structure : Set encode mode */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_ENC_MODE */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Encoder mode */
+ IVE_ENC_MODE_T e_enc_mode;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_enc_mode_ip_t;
+
+/** Output structure : Set encode mode */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+}ive_ctl_set_enc_mode_op_t;
+
+/*****************************************************************************/
+/* Video control Set QP */
+/*****************************************************************************/
+
+/** Input structure : Set QP */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_QP */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Set initial Qp for I pictures */
+ UWORD32 u4_i_qp;
+
+ /** Set initial Qp for P pictures */
+ UWORD32 u4_p_qp;
+
+ /** Set initial Qp for B pictures */
+ UWORD32 u4_b_qp;
+
+ /** Set minimum Qp for I pictures */
+ UWORD32 u4_i_qp_min;
+
+ /** Set maximum Qp for I pictures */
+ UWORD32 u4_i_qp_max;
+
+ /** Set minimum Qp for P pictures */
+ UWORD32 u4_p_qp_min;
+
+ /** Set maximum Qp for P pictures */
+ UWORD32 u4_p_qp_max;
+
+ /** Set minimum Qp for B pictures */
+ UWORD32 u4_b_qp_min;
+
+ /** Set maximum Qp for B pictures */
+ UWORD32 u4_b_qp_max;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+
+}ive_ctl_set_qp_ip_t;
+
+/** Output structure : Set QP */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_qp_op_t;
+
+/*****************************************************************************/
+/* Video control Set AIR params */
+/*****************************************************************************/
+
+/** Input structure : Set AIR params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_AIR_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Adaptive intra refresh mode */
+ IVE_AIR_MODE_T e_air_mode;
+
+ /** Adaptive intra refresh period in frames */
+ UWORD32 u4_air_refresh_period;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+
+}ive_ctl_set_air_params_ip_t;
+
+/** Output structure : Set AIR params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_air_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set VBV params */
+/*****************************************************************************/
+
+/** Input structure : Set VBV params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_VBV_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** VBV buffer delay */
+ UWORD32 u4_vbv_buffer_delay;
+
+ /** VBV buffer size */
+ UWORD32 u4_vbv_buf_size;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+
+}ive_ctl_set_vbv_params_ip_t;
+
+/** Output structure : Set VBV params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_vbv_params_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Processor Details */
+/*****************************************************************************/
+
+/** Input structure : Set processor details */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_NUM_CORES */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Total number of cores to be used */
+ UWORD32 u4_num_cores;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_num_cores_ip_t;
+
+/** Output structure : Set processor details */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_num_cores_op_t;
+
+/*****************************************************************************/
+/* Video control Set Intra Prediction estimation params */
+/*****************************************************************************/
+
+/** Input structure : Set IPE params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_IPE_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Flag to enable/disbale intra 4x4 analysis */
+ UWORD32 u4_enable_intra_4x4;
+
+ /** Flag to enable/disable pre-enc stage of Intra Pred estimation */
+ UWORD32 u4_pre_enc_ipe;
+
+ /** Speed preset - Value between 0 (slowest) and 100 (fastest) */
+ IVE_SPEED_CONFIG u4_enc_speed_preset;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_ipe_params_ip_t;
+
+/** Output structure : Set IPE Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_ipe_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Motion estimation params */
+/*****************************************************************************/
+
+/** Input structure : Set ME Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_ME_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Flag to enable/disable pre-enc stage of Motion estimation */
+ UWORD32 u4_pre_enc_me;
+
+ /** Speed preset - Value between 0 (slowest) and 100 (fastest) */
+ UWORD32 u4_me_speed_preset;
+
+ /** Flag to enable/disable half pel motion estimation */
+ UWORD32 u4_enable_hpel;
+
+ /** Flag to enable/disable quarter pel motion estimation */
+ UWORD32 u4_enable_qpel;
+
+ /** Flag to enable/disable fast SAD approximation */
+ UWORD32 u4_enable_fast_sad;
+
+ /** Flag to enable/disable alternate reference frames */
+ UWORD32 u4_enable_alt_ref;
+
+ /** Maximum search range in X direction for farthest reference */
+ UWORD32 u4_srch_rng_x;
+
+ /** Maximum search range in Y direction for farthest reference */
+ UWORD32 u4_srch_rng_y;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_me_params_ip_t;
+
+/** Output structure : Set ME Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_me_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set GOP params */
+/*****************************************************************************/
+
+/** Input structure : Set GOP Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_GOP_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** I frame interval */
+ UWORD32 u4_i_frm_interval;
+
+ /** IDR frame interval */
+ UWORD32 u4_idr_frm_interval;
+
+ /** consecutive B frames */
+ UWORD32 u4_num_b_frames;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_gop_params_ip_t;
+
+/** Output structure : Set GOP params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_gop_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Deblock params */
+/*****************************************************************************/
+
+/** Input structure : Set Deblock Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_GOP_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Disable deblock level (0: Enable completely, 3: Disable completely */
+ UWORD32 u4_disable_deblock_level;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_deblock_params_ip_t;
+
+/** Output structure : Set Deblock Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_deblock_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Profile params */
+/*****************************************************************************/
+
+/** Input structure : Set Profile Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_PROFILE_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Profile */
+ IV_PROFILE_T e_profile;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_profile_params_ip_t;
+
+/** Output structure : Set Profile Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_profile_params_op_t;
+
+
+#endif /* _IVE2_H_ */
+
diff --git a/encoder/mips/ih264e_function_selector.c b/encoder/mips/ih264e_function_selector.c
new file mode 100755
index 0000000..58ec4d0
--- /dev/null
+++ b/encoder/mips/ih264e_function_selector.c
@@ -0,0 +1,110 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in h264
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+void ih264e_init_function_ptr(void *pv_codec)
+{
+ codec_t *ps_codec = (codec_t *)pv_codec;
+ ih264e_init_function_ptr_generic(ps_codec);
+}
+
+IV_ARCH_T ih264e_default_arch(void)
+{
+ return ARCH_NA;
+}
+
diff --git a/encoder/mips/ih264e_platform_macros.h b/encoder/mips/ih264e_platform_macros.h
new file mode 100755
index 0000000..ed1edd4
--- /dev/null
+++ b/encoder/mips/ih264e_platform_macros.h
@@ -0,0 +1,135 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_platform_macros.h
+ *
+ * @brief
+ * Contains platform specific routines used for codec context intialization
+ *
+ * @author
+ * ittiam
+ *
+ * @remarks
+ * none
+ *
+ *******************************************************************************
+ */
+
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+#define DATA_SYNC()
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+/**
+*******************************************************************************
+*
+* @brief Data Memory Barrier, Data Synchronization Barrier
+*
+*
+* @par Description: These functions do nothing on x86 side. But on arm platforms,
+*
+* Data Memory Barrier acts as a memory barrier. It ensures that all explicit
+* memory accesses that appear in program order before the DMB instruction are
+* observed before any explicit memory accesses that appear in program order
+* after the DMB instruction. It does not affect the ordering of any other
+* instructions executing on the processor
+*
+* Data Synchronization Barrier acts as a special kind of memory barrier. No
+* instruction in program order after this instruction executes until this instruction
+* completes. This instruction completes when:
+* 1. All explicit memory accesses before this instruction complete.
+* 2. All Cache, Branch predictor and TLB maintenance operations before
+* this instruction complete.
+*
+* @param[in] void
+*
+* @returns void
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/mips/ime_platform_macros.h b/encoder/mips/ime_platform_macros.h
new file mode 100755
index 0000000..18e2e8f
--- /dev/null
+++ b/encoder/mips/ime_platform_macros.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ime_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+ sad += ABS(src[0]-est[0]) + \
+ ABS(src[1]-est[1]) + \
+ ABS(src[2]-est[2]) + \
+ ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/encoder/x86/ih264e_function_selector.c b/encoder/x86/ih264e_function_selector.c
new file mode 100755
index 0000000..429cdab
--- /dev/null
+++ b/encoder/x86/ih264e_function_selector.c
@@ -0,0 +1,141 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in h264
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec)
+{
+ codec_t *ps_codec = (codec_t *)pv_codec;
+ ih264e_init_function_ptr_generic(ps_codec);
+ switch(ps_codec->s_cfg.e_arch)
+ {
+ case ARCH_X86_GENERIC:
+ ih264e_init_function_ptr_generic(ps_codec);
+ break;
+ case ARCH_X86_SSSE3:
+ ih264e_init_function_ptr_ssse3(ps_codec);
+ break;
+ case ARCH_X86_SSE42:
+ default:
+ ih264e_init_function_ptr_ssse3(ps_codec);
+ ih264e_init_function_ptr_sse42(ps_codec);
+ break;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void)
+{
+ return ARCH_X86_SSE42;
+}
+
+
diff --git a/encoder/x86/ih264e_function_selector_sse42.c b/encoder/x86/ih264e_function_selector_sse42.c
new file mode 100755
index 0000000..6fa6308
--- /dev/null
+++ b/encoder/x86/ih264e_function_selector_sse42.c
@@ -0,0 +1,146 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_sse42.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_sse42
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_sse42(codec_t *ps_codec)
+{
+ WORD32 i;
+ process_ctxt_t *ps_proc = NULL;
+ me_ctxt_t *ps_me_ctxt = NULL;
+ printf("Enabling SSE42 functions\n");
+
+ /* Init luma forward transform fn ptr */
+ ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_sse42;
+ ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_sse42;
+ ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_sse42;
+ ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_sse42;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_sse42;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_sse42;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_sse42;
+
+ /* sad me level functions */
+ ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42;
+ ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42;
+ ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_sse42;
+
+ /* sad me level functions */
+ for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+
+ ps_me_ctxt = &ps_proc->s_me_ctxt;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42;
+ ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_sse42;
+ ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_sse42;
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_sse42;
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_sse42;
+ }
+}
diff --git a/encoder/x86/ih264e_function_selector_ssse3.c b/encoder/x86/ih264e_function_selector_ssse3.c
new file mode 100755
index 0000000..7401e53
--- /dev/null
+++ b/encoder/x86/ih264e_function_selector_ssse3.c
@@ -0,0 +1,190 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_ssse3.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_ssse3
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_ssse3(codec_t *ps_codec)
+{
+ printf("Enabling SSSE3 functions\n");
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_ssse3;
+ ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_ssse3;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_ssse3;
+ ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3;
+ ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3;
+ ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_ssse3;
+ ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_ssse3;
+ ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_ssse3;
+ ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_ssse3;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_ssse3;
+ ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3;
+ ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3;
+ ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_ssse3;
+ ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_ssse3;
+ ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_ssse3;
+ ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_ssse3;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_ssse3;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8_ssse3;
+ ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_ssse3;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_ssse3;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_ssse3;
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_ssse3;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_ssse3;
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_ssse3;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_ssse3;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_ssse3;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_ssse3;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_ssse3;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_ssse3;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_ssse3;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_ssse3;
+
+ /* Inter pred leaf level functions */
+ ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_ssse3;
+ ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_ssse3;
+ ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_ssse3;
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_ssse3;
+
+ /* memory handling operations */
+ ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_ssse3;
+ ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_ssse3;
+
+ /*intra mode eval -encoder level function*/
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_ssse3;
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_ssse3;
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_ssse3;
+
+ /* Halp pel generation function - encoder level*/
+ ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_ssse3;
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_ssse3;
+}
diff --git a/encoder/x86/ih264e_half_pel_ssse3.c b/encoder/x86/ih264e_half_pel_ssse3.c
new file mode 100755
index 0000000..42580fa
--- /dev/null
+++ b/encoder/x86/ih264e_half_pel_ssse3.c
@@ -0,0 +1,487 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_half_pel_ssse3.c
+ *
+ * @brief
+ * Contains the x86 intrinsic function definitions for 6-tap vertical filter
+ * and cascaded 2D filter used in motion estimation in H264 encoder.
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * ih264e_sixtapfilter_horz_ssse3
+ * ih264e_sixtap_filter_2dvh_vert_ssse3
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_defs.h"
+#include "ih264e_half_pel.h"
+#include "ih264_macros.h"
+#include "ih264e_half_pel.h"
+#include "ih264e_debug.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/*
+*******************************************************************************
+*
+* @brief
+* Interprediction luma filter for horizontal input(Filter run for width = 17
+* and height =16)
+*
+* @par Description:
+* Applies a 6 tap horizontal filter .The output is clipped to 8 bits sec.
+* 8.4.2.2.1 titled "Luma sample interpolation process"
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @returns
+* None
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264e_sixtapfilter_horz_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd)
+{
+ WORD32 ht;
+ WORD32 tmp;
+
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+ __m128i const_val16_8x16b;
+
+ ht = 16;
+ pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+ //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b);
+ res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ tmp = ((pu1_src[18] + pu1_src[19]) << 2) - pu1_src[17] - pu1_src[20];
+ tmp = pu1_src[16] + pu1_src[21] + (tmp << 2) + tmp;
+
+ res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits.
+ res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);
+ tmp = (tmp + 16) >> 5;
+
+ src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b);
+ pu1_dst[16] = CLIP_U8(tmp);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, src_r0_16x8b);
+
+ ht--;
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ while(ht > 0);
+}
+
+/*
+*******************************************************************************
+*
+* @brief
+* This function implements a two stage cascaded six tap filter. It
+* applies the six tap filter in the vertical direction on the
+* predictor values, followed by applying the same filter in the
+* horizontal direction on the output of the first stage. The six tap
+* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+* interpolation process" (Filter run for width = 17 and height =17)
+*
+* @par Description:
+* The function interpolates the predictors first in the vertical direction
+* and then in the horizontal direction to output the (1/2,1/2). The output
+* of the first stage of the filter is stored in the buffer pointed to by
+* pi16_pred1(only in C) in 16 bit precision.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst1
+* UWORD8 pointer to the destination(Vertical filtered output)
+*
+* @param[out] pu1_dst2
+* UWORD8 pointer to the destination(out put after applying horizontal filter
+* to the intermediate vertical output)
+*
+* @param[in] src_strd
+* integer source stride
+
+* @param[in] dst_strd
+* integer destination stride of pu1_dst
+*
+* @param[in]pi16_pred1
+* Pointer to 16bit intermediate buffer(used only in c)
+*
+* @param[in] pi16_pred1_strd
+* integer destination stride of pi16_pred1
+*
+* @returns
+* None
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264e_sixtap_filter_2dvh_vert_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst1,
+ UWORD8 *pu1_dst2,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 *pi4_pred1,
+ WORD32 pred1_strd)
+{
+ WORD32 ht;
+ WORD16 *pi2_pred1;
+
+ ht = 17;
+ pi2_pred1 = (WORD16 *)pi4_pred1;
+ pred1_strd = pred1_strd << 1;
+
+ // Vertical 6-tap filter
+ {
+ __m128i src1_r0_16x8b, src1_r1_16x8b, src1_r2_16x8b;
+ __m128i src1_r3_16x8b, src1_r4_16x8b, src1_r5_16x8b;
+ __m128i src2_r0_16x8b, src2_r1_16x8b, src2_r2_16x8b;
+ __m128i src2_r3_16x8b, src2_r4_16x8b, src2_r5_16x8b;
+
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+ __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+
+ pu1_src -= 2;
+ pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3])
+
+ // Loading first five rows to start first row processing.
+ // 22 values loaded in each row.
+ src1_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ src1_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ src1_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ src1_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ src1_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ do
+ {
+ src1_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src1_r0_16x8b, src1_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src1_r2_16x8b, src1_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src1_r4_16x8b, src1_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)pi2_pred1, res_t1_8x16b);
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src1_r0_16x8b, src1_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src1_r2_16x8b, src1_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src1_r4_16x8b, src1_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_pred1 + 8), res_t1_8x16b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src2_r0_16x8b, src2_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src2_r2_16x8b, src2_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src2_r4_16x8b, src2_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_pred1 + 14), res_t1_8x16b);
+
+ src1_r0_16x8b = src1_r1_16x8b;
+ src1_r1_16x8b = src1_r2_16x8b;
+ src1_r2_16x8b = src1_r3_16x8b;
+ src1_r3_16x8b = src1_r4_16x8b;
+ src1_r4_16x8b = src1_r5_16x8b;
+
+ src2_r0_16x8b = src2_r1_16x8b;
+ src2_r1_16x8b = src2_r2_16x8b;
+ src2_r2_16x8b = src2_r3_16x8b;
+ src2_r3_16x8b = src2_r4_16x8b;
+ src2_r4_16x8b = src2_r5_16x8b;
+
+ ht--;
+ pu1_src += src_strd;
+ pi2_pred1 += pred1_strd;
+ }
+ while(ht > 0);
+ }
+
+ ht = 17;
+ pi2_pred1 = (WORD16 *)pi4_pred1;
+
+ // Horizontal 6-tap filter
+ {
+ WORD32 temp;
+
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
+ __m128i src_r4_8x16b, src_r5_8x16b;
+ __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
+ __m128i res_vert1_8x16b, res_vert2_8x16b, res_16x8b;
+
+ __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_c0_8x16b, res_c1_8x16b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b, const_val16_8x16b;
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); //c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014); //c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); //c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ const_val512_4x32b = _mm_set1_epi32(512);
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ do
+ {
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 1));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 2));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 3));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 4));
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 5));
+
+ res_vert1_8x16b = _mm_add_epi16(src_r2_8x16b, const_val16_8x16b);
+ res_vert1_8x16b = _mm_srai_epi16(res_vert1_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_c0_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 1));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 2));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 3));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 4));
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 5));
+
+ res_vert2_8x16b = _mm_add_epi16(src_r2_8x16b, const_val16_8x16b);
+ res_vert2_8x16b = _mm_srai_epi16(res_vert2_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b ,10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_c1_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+
+ res_16x8b = _mm_packus_epi16(res_vert1_8x16b, res_vert2_8x16b);
+ _mm_storeu_si128((__m128i *)pu1_dst1, res_16x8b);
+ pu1_dst1[16] = CLIP_U8((pi2_pred1[18] + 16) >> 5);
+
+ res_16x8b = _mm_packus_epi16(res_c0_8x16b, res_c1_8x16b);
+ _mm_storeu_si128((__m128i *)pu1_dst2, res_16x8b);
+ temp = ((pi2_pred1[18] + pi2_pred1[19]) << 2) - pi2_pred1[17] - pi2_pred1[20];
+ temp = pi2_pred1[16] + pi2_pred1[21] + (temp << 2) + temp;
+ pu1_dst2[16] = CLIP_U8((temp + 512) >> 10);
+
+ ht--;
+ pi2_pred1 += pred1_strd;
+ pu1_dst1 += dst_strd;
+ pu1_dst2 += dst_strd;
+ }
+ while(ht > 0);
+ }
+}
diff --git a/encoder/x86/ih264e_intra_modes_eval_ssse3.c b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
new file mode 100755
index 0000000..657921f
--- /dev/null
+++ b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
@@ -0,0 +1,1259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_intra_modes_eval_ssse3.c
+*
+* @brief
+* This file contains definitions of routines that perform rate distortion
+* analysis on a macroblock if they are to be coded as intra.
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* ih264e_evaluate_intra16x16_modes_ssse3
+* ih264e_evaluate_intra_4x4_modes_ssse3
+* ih264e_evaluate_intra_chroma_modes_ssse3
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+#include <immintrin.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264e_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ime_distortion_metrics.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_structs.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+
+#include "ih264e_structs.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_globals.h"
+#include "ime_platform_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+* prediction.
+*
+* @par Description
+* This function evaluates first three 16x16 modes and compute corresponding
+* SAD and returns the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* says what all modes are valid
+*
+* @return
+* None
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra16x16_modes_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels_i16,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes)
+{
+ UWORD8 *pu1_src_temp;
+
+ WORD32 left, top, horz_flag, vert_flag, dc_flag;
+ WORD32 sad_vert, sad_horz, sad_dc, min_sad;
+
+ WORD32 cnt, dcval;
+ WORD32 src_strd2, src_strd3, src_strd4;
+ WORD32 dst_strd2, dst_strd3, dst_strd4;
+
+ __m128i src1_16x8b, src2_16x8b, src3_16x8b, src4_16x8b;
+ __m128i val1_16x8b, val2_16x8b, val3_16x8b, val4_16x8b;
+ __m128i sad1_8x16b, sad2_8x16b, sad3_8x16b, sad4_8x16b;
+
+ __m128i sad_8x16b, val_16x8b, zero_vector;
+
+ sad_vert = INT_MAX;
+ sad_horz = INT_MAX;
+ sad_dc = INT_MAX;
+
+ src_strd2 = src_strd << 1;
+ src_strd4 = src_strd << 2;
+ src_strd3 = src_strd + src_strd2;
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd4 = dst_strd << 2;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ left = (n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ zero_vector = _mm_setzero_si128();
+
+ horz_flag = left && ((u4_valid_intra_modes & 02) != 0);
+ vert_flag = top && ((u4_valid_intra_modes & 01) != 0);
+ dc_flag = (u4_valid_intra_modes & 04) != 0;
+
+ if(horz_flag)
+ {
+ pu1_src_temp = pu1_src;
+
+ val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[15]);
+ val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[14]);
+ val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[13]);
+ val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[12]);
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val2_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val3_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val4_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+
+ cnt = 11;
+ sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+ do
+ {
+ pu1_src_temp += src_strd4;
+
+ val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt]);
+ val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 1]);
+ val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 2]);
+ val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 3]);
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val2_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val3_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val4_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+
+ cnt -= 4;
+ sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b);
+ }
+ while(cnt >= 0);
+
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+
+ sad_horz = _mm_extract_epi16(sad_8x16b, 0);
+ }
+
+ if(vert_flag)
+ {
+ pu1_src_temp = pu1_src;
+
+ val1_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17));
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+
+ cnt = 11;
+ sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+ do
+ {
+ pu1_src_temp += src_strd4;
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+
+ cnt -= 4;
+ sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b);
+ }
+ while(cnt >= 0);
+
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+
+ sad_vert = _mm_extract_epi16(sad_8x16b, 0);
+ }
+
+ dcval = 0;
+
+ if(left)
+ {
+ val_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels_i16);
+ dcval += 8;
+
+ sad1_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+ dcval += _mm_extract_epi16(sad1_8x16b, 0);
+ dcval += _mm_extract_epi16(sad1_8x16b, 4);
+ }
+ if(top)
+ {
+ val_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17));
+ dcval += 8;
+
+ sad1_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+ dcval += _mm_extract_epi16(sad1_8x16b, 0);
+ dcval += _mm_extract_epi16(sad1_8x16b, 4);
+ }
+ dcval = dcval >> (3 + left + top);
+ dcval += ((left == 0) & (top == 0)) << 7;
+
+ if(dc_flag)
+ {
+ pu1_src_temp = pu1_src;
+ val1_16x8b = _mm_set1_epi8(dcval);
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+
+ cnt = 12;
+ sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+ do
+ {
+ pu1_src_temp += src_strd4;
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+
+ cnt -= 4;
+ sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b);
+ }
+ while(cnt > 0);
+
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0);
+ }
+
+ // Doing prediction for minimum SAD
+ min_sad = MIN3(sad_horz, sad_vert, sad_dc);
+ if(min_sad < *pu4_sadmin)
+ {
+ *pu4_sadmin = min_sad;
+ if(min_sad == sad_vert)
+ {
+ *u4_intra_mode = VERT_I16x16;
+ val1_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17));
+ cnt = 15;
+ do
+ {
+ _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val1_16x8b);
+
+ cnt -= 4;
+ pu1_dst += dst_strd4;
+ }
+ while(cnt > 0);
+ }
+ else if(min_sad == sad_horz)
+ {
+ *u4_intra_mode = HORZ_I16x16;
+ cnt = 15;
+ do
+ {
+ val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt]);
+ val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 1]);
+ val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 2]);
+ val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 3]);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val3_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val4_16x8b);
+
+ cnt -= 4;
+ pu1_dst += dst_strd4;
+ }
+ while(cnt >= 0);
+ }
+ else
+ {
+ *u4_intra_mode = DC_I16x16;
+ val1_16x8b = _mm_set1_epi8(dcval);
+ cnt = 15;
+ do
+ {
+ _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val1_16x8b);
+
+ cnt -= 4;
+ pu1_dst += dst_strd4;
+ }
+ while(cnt > 0);
+ }
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief :Evaluate best intra 4x4 mode and do the prediction.
+*
+* @par Description
+* This function evaluates intra 4x4 modes, computes corresponding sad
+* and returns the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+** @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* * @param[in] u4_lambda
+* Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+* Predicted mode for cost computation
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra_4x4_modes_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes,
+ UWORD32 u4_lambda,
+ UWORD32 u4_predictd_mode)
+{
+ WORD32 left, top;
+ WORD32 sad[MAX_I4x4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+ INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+ WORD32 cost[MAX_I4x4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+ INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+
+ WORD32 min_cost;
+ WORD32 lambda4 = u4_lambda << 2;
+ WORD32 dst_strd2, dst_strd3;
+
+ __m128i left_top_16x8b, src_16x8b, pred0_16x8b, sad_8x16b;
+ __m128i pred1_16x8b, pred2_16x8b, pred3_16x8b, pred4_16x8b;
+ __m128i pred5_16x8b, pred6_16x8b, pred7_16x8b, pred8_16x8b;
+ __m128i shuffle_16x8b, zero_vector, mask_low_32b;
+
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ // loading the 4x4 source block and neighbouring pixels
+ {
+ __m128i row1_16x8b, row2_16x8b;
+
+ row1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ row2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+ left_top_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels);
+
+ pu1_src += src_strd << 1;
+ src_16x8b = _mm_unpacklo_epi32(row1_16x8b, row2_16x8b);
+
+ row1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ row2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+ zero_vector = _mm_setzero_si128();
+
+ row1_16x8b = _mm_unpacklo_epi32(row1_16x8b, row2_16x8b);
+ src_16x8b = _mm_unpacklo_epi64(src_16x8b, row1_16x8b);
+ }
+
+ /* Computing SADs*/
+ if(u4_valid_intra_modes & 1)/* VERT mode valid ????*/
+ {
+ pred0_16x8b = _mm_srli_si128(left_top_16x8b, 5);
+ pred0_16x8b = _mm_shuffle_epi32(pred0_16x8b, 0);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred0_16x8b);
+
+ sad[VERT_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[VERT_I4x4] = sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 2)/* HORZ mode valid ????*/
+ {
+ shuffle_16x8b = _mm_setr_epi8(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0);
+ pred1_16x8b = _mm_shuffle_epi8(left_top_16x8b, shuffle_16x8b);
+
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred1_16x8b);
+
+ sad[HORZ_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[HORZ_I4x4] = sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 4)/* DC mode valid ????*/
+ {
+ if(top + left)
+ {
+ WORD32 shft = 1, dcval = 0;
+
+ __m128i val_16x8b, temp_16x8b, temp_8x16b;
+
+ val_16x8b = _mm_setzero_si128();
+
+ if(top)
+ {
+ temp_16x8b = _mm_srli_si128(left_top_16x8b, 5);
+ val_16x8b = _mm_alignr_epi8(temp_16x8b, val_16x8b, 4);
+ shft ++;
+ dcval += 2;
+ }
+ if(left)
+ {
+ val_16x8b = _mm_alignr_epi8(left_top_16x8b, val_16x8b, 4);
+ shft++;
+ dcval += 2;
+ }
+
+ temp_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+ dcval += _mm_extract_epi16(temp_8x16b, 4);
+ dcval = dcval >> shft;
+ pred2_16x8b = _mm_set1_epi8(dcval);
+ }
+ else
+ pred2_16x8b = _mm_set1_epi8(128);
+
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred2_16x8b);
+
+ sad[DC_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[DC_I4x4] = sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes > 7)/* if modes other than VERT, HORZ and DC are valid ????*/
+ {
+ __m128i w11_16x8b, w121_16x8b;
+ __m128i temp1_16x8b, temp2_16x8b;
+
+ /* Performing FILT121 and FILT11 operation for all neighbour values*/
+ {
+ __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b;
+ __m128i const_2_8x16b;
+
+ const_2_8x16b = _mm_set1_epi16(2);
+
+ temp1_8x16b = _mm_unpacklo_epi8(left_top_16x8b, zero_vector); //l3 l2 l1 l0 tl t0 t1 t2
+ temp2_8x16b = _mm_slli_si128(temp1_8x16b, 2); // 0 l3 l2 l1 l0 tl t0 t1
+ temp2_8x16b = _mm_shufflelo_epi16(temp2_8x16b, 0xe5); //l3 l3 l2 l1 l0 tl t0 t1
+
+ temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b); //l3+l3 l3+l2 l2+l1... t1+t2
+ temp2_8x16b = _mm_slli_si128(temp1_8x16b, 2); //l3+l3 l3+l3 l3+l2... t0+t1
+ temp2_8x16b = _mm_shufflelo_epi16(temp2_8x16b, 0xe5);
+ temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b); //4*l3 l3+2*l3+l2 l3+2*l2+l1... t0+2*t1+t2
+
+ temp1_8x16b = _mm_add_epi16(const_2_8x16b, temp1_8x16b); //4*l3+2 3*l3+l2+2 l3+2*l2+l1+2.. t0+2*t1+t2+2
+ temp1_8x16b = _mm_srli_epi16(temp1_8x16b, 2);
+
+ temp1_16x8b = _mm_srli_si128(left_top_16x8b, 1);
+ w11_16x8b = _mm_avg_epu8(left_top_16x8b, temp1_16x8b);
+
+ temp2_16x8b = _mm_srli_si128(left_top_16x8b, 6);
+ temp2_8x16b = _mm_unpacklo_epi8(temp2_16x8b, zero_vector); //t1 t2 t3 t4 t5 t6 t7 0
+ temp3_8x16b = _mm_srli_si128(temp2_8x16b, 2); //t2 t3 t4 t5 t6 t7 0 0
+ temp3_8x16b = _mm_shufflehi_epi16(temp3_8x16b, 0xd4); //t2 t3 t4 t5 t6 t7 t7 0
+
+ temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b); //t1+t2 t2+t3... t6+t7 t7+t7 0
+ temp3_8x16b = _mm_srli_si128(temp2_8x16b, 2); //t2+t3 t3+t4... t7+t7 0 0
+ temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b); //t1+2*t2+t3 t2+2*t3+t4.. t6+2*t7+t7 t7+t7 0
+
+ temp2_8x16b = _mm_add_epi16(const_2_8x16b, temp2_8x16b); //t1+2*t2+t3+2 t2+2*t3+t4+2 t3+2*t4+t5+2... t6+2*t7+t7+2 t7+t7+2 2
+ temp2_8x16b = _mm_srli_epi16(temp2_8x16b, 2);
+
+ w121_16x8b = _mm_packus_epi16(temp1_8x16b, temp2_8x16b);
+ }
+
+ if(u4_valid_intra_modes & 8)/* DIAG_DL */
+ {
+ shuffle_16x8b = _mm_setr_epi8( 7, 8, 9, 10,
+ 8, 9, 10, 11,
+ 9, 10, 11, 12,
+ 10, 11, 12, 13);
+ pred3_16x8b = _mm_shuffle_epi8(w121_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred3_16x8b);
+
+ sad[DIAG_DL_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[DIAG_DL_I4x4] = sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 16)/* DIAG_DR */
+ {
+ shuffle_16x8b = _mm_setr_epi8(5, 6, 7, 8,
+ 4, 5, 6, 7,
+ 3, 4, 5, 6,
+ 2, 3, 4, 5);
+ pred4_16x8b = _mm_shuffle_epi8(w121_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred4_16x8b);
+
+ sad[DIAG_DR_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[DIAG_DR_I4x4] = sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
+ {
+ temp1_16x8b = _mm_srli_si128(w121_16x8b, 1);
+ temp1_16x8b = _mm_unpacklo_epi64(temp1_16x8b, w11_16x8b);
+ shuffle_16x8b = _mm_setr_epi8(12, 13, 14, 15,
+ 4, 5, 6, 7,
+ 3, 12, 13, 14,
+ 2, 4, 5, 6);
+ pred5_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred5_16x8b);
+
+ sad[VERT_R_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[VERT_R_I4x4] = sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
+ {
+ temp1_16x8b = _mm_unpacklo_epi64(w121_16x8b, w11_16x8b);
+ shuffle_16x8b = _mm_setr_epi8(11, 5, 6, 7,
+ 10, 4, 11, 5,
+ 9, 3, 10, 4,
+ 8, 2, 9, 3);
+ pred6_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred6_16x8b);
+
+ sad[HORZ_D_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[HORZ_D_I4x4] = sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
+ {
+ temp1_16x8b = _mm_srli_si128(w121_16x8b, 5);
+ temp2_16x8b = _mm_srli_si128(w11_16x8b, 5);
+ temp1_16x8b = _mm_unpacklo_epi64(temp1_16x8b, temp2_16x8b);
+ shuffle_16x8b = _mm_setr_epi8(8, 9, 10, 11,
+ 2, 3, 4, 5,
+ 9, 10, 11, 12,
+ 3, 4, 5, 6);
+ pred7_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred7_16x8b);
+
+ sad[VERT_L_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[VERT_L_I4x4] = sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
+ {
+ temp1_16x8b = _mm_unpacklo_epi64(w121_16x8b, w11_16x8b);
+ shuffle_16x8b = _mm_setr_epi8(10, 3, 9, 2,
+ 9, 2, 8, 1,
+ 8, 1, 0, 0,
+ 0, 0, 0, 0);
+ pred8_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred8_16x8b);
+
+ sad[HORZ_U_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[HORZ_U_I4x4] = sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda: lambda4);
+ }
+
+ min_cost = MIN3(MIN3(cost[0], cost[1], cost[2]),
+ MIN3(cost[3], cost[4], cost[5]),
+ MIN3(cost[6], cost[7], cost[8]));
+ }
+ else
+ { /*Only first three modes valid*/
+ min_cost = MIN3(cost[0], cost[1], cost[2]);
+ }
+
+ *pu4_sadmin = min_cost;
+
+ if(min_cost == cost[0])
+ {
+ *u4_intra_mode = VERT_I4x4;
+ }
+ else if(min_cost == cost[1])
+ {
+ *u4_intra_mode = HORZ_I4x4;
+ pred0_16x8b = pred1_16x8b;
+ }
+ else if(min_cost == cost[2])
+ {
+ *u4_intra_mode = DC_I4x4;
+ pred0_16x8b = pred2_16x8b;
+ }
+ else if(min_cost == cost[3])
+ {
+ *u4_intra_mode = DIAG_DL_I4x4;
+ pred0_16x8b = pred3_16x8b;
+ }
+ else if(min_cost == cost[4])
+ {
+ *u4_intra_mode = DIAG_DR_I4x4;
+ pred0_16x8b = pred4_16x8b;
+ }
+ else if(min_cost == cost[5])
+ {
+ *u4_intra_mode = VERT_R_I4x4;
+ pred0_16x8b = pred5_16x8b;
+ }
+ else if(min_cost == cost[6])
+ {
+ *u4_intra_mode = HORZ_D_I4x4;
+ pred0_16x8b = pred6_16x8b;
+ }
+ else if(min_cost == cost[7])
+ {
+ *u4_intra_mode = VERT_L_I4x4;
+ pred0_16x8b = pred7_16x8b;
+ }
+ else if(min_cost == cost[8])
+ {
+ *u4_intra_mode = HORZ_U_I4x4;
+ pred0_16x8b = pred8_16x8b;
+ }
+
+ mask_low_32b = _mm_set1_epi8(0xff);
+ mask_low_32b = _mm_srli_si128(mask_low_32b, 12);
+
+ _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)pu1_dst);
+ pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4);
+ _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4);
+ _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4);
+ _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra chroma mode (among VERT, HORZ and DC) and do the prediction.
+*
+* @par Description
+* This function evaluates first three intra chroma modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+** @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* says what all modes are valid
+*
+* @return
+* none
+*
+******************************************************************************
+*/
+
+void ih264e_evaluate_intra_chroma_modes_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes)
+{
+ WORD32 left, top;
+ WORD32 sad_vert = INT_MAX, sad_horz = INT_MAX, sad_dc = INT_MAX, min_sad;
+
+ __m128i src1_16x8b, src2_16x8b, src3_16x8b, src4_16x8b;
+ __m128i src5_16x8b, src6_16x8b, src7_16x8b, src8_16x8b;
+
+ __m128i top_16x8b, left_16x8b;
+ __m128i pred1_16x8b, pred2_16x8b;
+ __m128i tmp1_8x16b, tmp2_8x16b, sad_8x16b;
+
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ //Loading source
+ {
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src5_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src6_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src7_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src8_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ }
+
+ if(left)
+ {
+ left_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels);
+
+ if(u4_valid_intra_modes & 02) //If HORZ mode is valid
+ {
+ __m128i left_tmp_16x8b, left_sh_16x8b;
+ __m128i const_14_15_16x8b;
+
+ const_14_15_16x8b = _mm_set1_epi16(0x0f0e);
+ left_sh_16x8b = _mm_slli_si128(left_16x8b, 2);
+
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 1
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 2
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred2_16x8b);
+
+ left_tmp_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 3
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 4
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred2_16x8b);
+
+ left_tmp_16x8b = _mm_slli_si128(left_tmp_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 5
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 6
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b);
+
+ left_tmp_16x8b = _mm_slli_si128(left_tmp_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 7
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 8
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b);
+
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_horz = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ }
+
+ if(top)
+ {
+ UWORD8 *pu1_top;
+
+ pu1_top = pu1_ngbr_pels + 2 * BLK8x8SIZE + 2;
+ top_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
+
+ if(u4_valid_intra_modes & 04) //If VERT mode is valid
+ {
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, top_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, top_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, top_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, top_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, top_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, top_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, top_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, top_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_vert = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ }
+
+ if(u4_valid_intra_modes & 01) //If DC mode is valid
+ {
+ if(left && top)
+ {
+ WORD32 left_up_u, left_down_u, left_up_v, left_down_v;
+ WORD32 top_left_u, top_right_u, top_left_v, top_right_v;
+ WORD32 dc_1u, dc_1v, dc_2u, dc_2v;
+
+ __m128i val_sh_16x8b;
+ __m128i intrlv_mask_8x16b, zero_vector;
+
+ intrlv_mask_8x16b = _mm_set1_epi16(0x00ff);
+ zero_vector = _mm_setzero_si128();
+
+ val_sh_16x8b = _mm_srli_si128(left_16x8b, 1);
+
+ tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_16x8b);
+ tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, val_sh_16x8b);
+ tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+ tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+ left_up_u = _mm_extract_epi16(tmp1_8x16b, 4);
+ left_up_v = _mm_extract_epi16(tmp2_8x16b, 4);
+ left_down_u = _mm_extract_epi16(tmp1_8x16b, 0);
+ left_down_v = _mm_extract_epi16(tmp2_8x16b, 0);
+
+ val_sh_16x8b = _mm_srli_si128(top_16x8b, 1);
+
+ tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_16x8b);
+ tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, val_sh_16x8b);
+ tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+ tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+ top_left_u = _mm_extract_epi16(tmp1_8x16b, 0);
+ top_left_v = _mm_extract_epi16(tmp2_8x16b, 0);
+ top_right_u = _mm_extract_epi16(tmp1_8x16b, 4);
+ top_right_v = _mm_extract_epi16(tmp2_8x16b, 4);
+
+ // First four rows
+ dc_1u = (left_up_u + top_left_u + 4) >> 3;
+ dc_1v = (left_up_v + top_left_v + 4) >> 3;
+ dc_2u = (top_right_u + 2) >> 2;
+ dc_2v = (top_right_v + 2) >> 2;
+
+ pred1_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v,
+ dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v);
+
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ // Second four rows
+ dc_1u = (left_down_u + 2) >> 2;
+ dc_1v = (left_down_v + 2) >> 2;
+ dc_2u = (left_down_u + top_right_u + 4) >> 3;
+ dc_2v = (left_down_v + top_right_v + 4) >> 3;
+
+ pred2_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v,
+ dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v);
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred2_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred2_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ else if(left)
+ {
+ WORD32 left_up_u, left_down_u, left_up_v, left_down_v;
+ WORD32 dc_u, dc_v;
+
+ __m128i left_sh_16x8b;
+ __m128i intrlv_mask_8x16b, zero_vector;
+
+ intrlv_mask_8x16b = _mm_set1_epi16(0x00ff);
+ zero_vector = _mm_setzero_si128();
+
+ left_sh_16x8b = _mm_srli_si128(left_16x8b, 1);
+
+ tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_16x8b);
+ tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_sh_16x8b);
+ tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+ tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+ left_up_u = _mm_extract_epi16(tmp1_8x16b, 4);
+ left_up_v = _mm_extract_epi16(tmp2_8x16b, 4);
+ left_down_u = _mm_extract_epi16(tmp1_8x16b, 0);
+ left_down_v = _mm_extract_epi16(tmp2_8x16b, 0);
+
+ // First four rows
+ dc_u = (left_up_u + 2) >> 2;
+ dc_v = (left_up_v + 2) >> 2;
+
+ pred1_16x8b = _mm_set1_epi16(dc_u | (dc_v << 8));
+
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ // Second four rows
+ dc_u = (left_down_u + 2) >> 2;
+ dc_v = (left_down_v + 2) >> 2;
+
+ pred2_16x8b = _mm_set1_epi16(dc_u | (dc_v << 8));
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred2_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred2_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ else if(top)
+ {
+ WORD32 top_left_u, top_right_u, top_left_v, top_right_v;
+ WORD32 dc_1u, dc_1v, dc_2u, dc_2v;
+
+ __m128i top_sh_16x8b;
+ __m128i intrlv_mask_8x16b, zero_vector;
+
+ intrlv_mask_8x16b = _mm_set1_epi16(0x00ff);
+ zero_vector = _mm_setzero_si128();
+
+ top_sh_16x8b = _mm_srli_si128(top_16x8b, 1);
+
+ tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_16x8b);
+ tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_sh_16x8b);
+ tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+ tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+ top_left_u = _mm_extract_epi16(tmp1_8x16b, 0);
+ top_left_v = _mm_extract_epi16(tmp2_8x16b, 0);
+ top_right_u = _mm_extract_epi16(tmp1_8x16b, 4);
+ top_right_v = _mm_extract_epi16(tmp2_8x16b, 4);
+
+ dc_1u = (top_left_u + 2) >> 2;
+ dc_1v = (top_left_v + 2) >> 2;
+ dc_2u = (top_right_u + 2) >> 2;
+ dc_2v = (top_right_v + 2) >> 2;
+
+ pred1_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v,
+ dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v);
+
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ else
+ {
+ pred1_16x8b = _mm_set1_epi8(128);
+
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ }
+
+ min_sad = MIN3(sad_horz, sad_vert, sad_dc);
+
+ /* Finding minimum SAD and doing corresponding prediction*/
+ if(min_sad < *pu4_sadmin)
+ {
+ *pu4_sadmin = min_sad;
+
+ if(min_sad == sad_dc)
+ {
+ *u4_intra_mode = DC_CH_I8x8;
+
+ if(!left)
+ pred2_16x8b = pred1_16x8b;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ }
+ else if(min_sad == sad_horz)
+ {
+ __m128i left_sh_16x8b, const_14_15_16x8b;
+
+ *u4_intra_mode = HORZ_CH_I8x8;
+
+ const_14_15_16x8b = _mm_set1_epi16(0x0f0e);
+
+ left_sh_16x8b = _mm_slli_si128(left_16x8b, 2);
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 1
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 2
+
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+
+ left_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 3
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 4
+
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+
+ left_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 5
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 6
+
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+
+ left_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 7
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 8
+
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ }
+ else
+ {
+ *u4_intra_mode = VERT_CH_I8x8;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ }
+ }
+}
diff --git a/encoder/x86/ih264e_platform_macros.h b/encoder/x86/ih264e_platform_macros.h
new file mode 100755
index 0000000..b4dfadd
--- /dev/null
+++ b/encoder/x86/ih264e_platform_macros.h
@@ -0,0 +1,154 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_platform_macros.h
+ *
+ * @brief
+ * Contains platform specific routines used for codec context intialization
+ *
+ * @author
+ * ittiam
+ *
+ * @remarks
+ * none
+ *
+ *******************************************************************************
+ */
+
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_ssse3(codec_t *ps_codec);
+void ih264e_init_function_ptr_sse42(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+/**
+*******************************************************************************
+*
+* @brief Data Memory Barrier, Data Synchronization Barrier
+*
+*
+* @par Description: These functions do nothing on x86 side. But on arm platforms,
+*
+* Data Memory Barrier acts as a memory barrier. It ensures that all explicit
+* memory accesses that appear in program order before the DMB instruction are
+* observed before any explicit memory accesses that appear in program order
+* after the DMB instruction. It does not affect the ordering of any other
+* instructions executing on the processor
+*
+* Data Synchronization Barrier acts as a special kind of memory barrier. No
+* instruction in program order after this instruction executes until this instruction
+* completes. This instruction completes when:
+* 1. All explicit memory accesses before this instruction complete.
+* 2. All Cache, Branch predictor and TLB maintenance operations before
+* this instruction complete.
+*
+* @param[in] void
+*
+* @returns void
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/x86/ime_distortion_metrics_sse42.c b/encoder/x86/ime_distortion_metrics_sse42.c
new file mode 100755
index 0000000..0876788
--- /dev/null
+++ b/encoder/x86/ime_distortion_metrics_sse42.c
@@ -0,0 +1,1940 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ime_distortion_metrics_sse42.c
+*
+* @brief
+* This file contains definitions of routines that compute distortion
+* between two macro/sub blocks of identical dimensions
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ime_compute_sad_16x16_sse42()
+* - ime_compute_sad_16x16_fast_sse42()
+* - ime_compute_sad_16x16_ea8_sse42()
+* - ime_compute_sad_16x8_sse42()
+* - ime_calculate_sad4_prog_sse42()
+* - ime_sub_pel_compute_sad_16x16_sse42()
+* - ime_compute_satqd_16x16_lumainter_sse42()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "ime_typedefs.h"
+#include "ime_defs.h"
+#include "ime_macros.h"
+#include "ime_statistics.h"
+#include "ime_platform_macros.h"
+#include "ime_distortion_metrics.h"
+#include <immintrin.h>
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_val;
+ int val1, val2;
+
+ // Row 0-3 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(res_r0, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 4-7 sad calculation
+ pu1_src += 4*src_strd;
+ pu1_est += 4*est_strd;
+
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 8-11 sad calculation
+ pu1_src += 4*src_strd;
+ pu1_est += 4*est_strd;
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 12-15 sad calculation
+ pu1_src += 4*src_strd;
+ pu1_est += 4*est_strd;
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ val1 = _mm_extract_epi32(sad_val,0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+ *pi4_mb_distortion = (val1+val2);
+
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x8 blocks
+*
+*
+* @par Description
+* This functions computes SAD between 2 16x8 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x8_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_val;
+ int val1, val2;
+
+ // Row 0-3 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(res_r0, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 4-7 sad calculation
+ pu1_src += 4*src_strd;
+ pu1_est += 4*est_strd;
+
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ val1 = _mm_extract_epi32(sad_val,0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+ *pi4_mb_distortion = (val1+val2);
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_ea8_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_val;
+ WORD32 val1, val2;
+ WORD32 i4_sad;
+ UWORD8 *pu1_src_temp = pu1_src + src_strd;
+ UWORD8 *pu1_est_temp = pu1_est + est_strd;
+
+ // Row 0,2,4,6 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(res_r0, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 8,10,12,14 sad calculation
+ pu1_src += 8*src_strd;
+ pu1_est += 8*est_strd;
+
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ pu1_src = pu1_src_temp;
+ pu1_est = pu1_est_temp;
+
+ val1 = _mm_extract_epi32(sad_val, 0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+
+ i4_sad = val1 + val2;
+ if (i4_max_sad < i4_sad)
+ {
+ *pi4_mb_distortion = i4_sad;
+ return ;
+ }
+ // Row 1,3,5,7 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 9,11,13,15 sad calculation
+ pu1_src += 8*src_strd;
+ pu1_est += 8*est_strd;
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ val1 = _mm_extract_epi32(sad_val, 0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+ *pi4_mb_distortion = (val1+val2);
+
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks by processing alternate
+* rows (fast mode). For fast mode it is assumed sad obtained by processing
+* alternate rows is approximately twice as that for the whole block.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_fast_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_val;
+ WORD32 val1, val2;
+ WORD32 i4_sad;
+ UWORD8 *pu1_src_temp = pu1_src + src_strd;
+ UWORD8 *pu1_est_temp = pu1_est + est_strd;
+
+ // Row 0,2,4,6 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2 * src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4 * src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6 * src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2 * est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4 * est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6 * est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(res_r0, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 8,10,12,14 sad calculation
+ pu1_src += 8 * src_strd;
+ pu1_est += 8 * est_strd;
+
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2 * src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4 * src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6 * src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2 * est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4 * est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6 * est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ pu1_src = pu1_src_temp;
+ pu1_est = pu1_est_temp;
+
+ val1 = _mm_extract_epi32(sad_val, 0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+
+ i4_sad = val1 + val2;
+ *pi4_mb_distortion = (i4_sad<<1);
+ return;
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref
+* UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+* integer reference stride
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[out] pi4_sad
+* pointer to integer array evaluated sad
+*
+* @returns sad at all evaluated vertexes
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad4_prog_sse42(UWORD8 *pu1_ref,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad)
+{
+ /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
+ UWORD8 *left_ptr = pu1_ref - 1;
+ UWORD8 *right_ptr = pu1_ref + 1;
+ UWORD8 *top_ptr = pu1_ref - ref_strd;
+ UWORD8 *bot_ptr = pu1_ref + ref_strd;
+
+ WORD32 val1, val2;
+ __m128i src, ref_left, ref_right, ref_top, ref_bot;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_r0, sad_r1, sad_r2, sad_r3;
+
+ // Row 0 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ sad_r0 = _mm_sad_epu8(src, ref_left);
+ sad_r1 = _mm_sad_epu8(src, ref_right);
+ sad_r2 = _mm_sad_epu8(src, ref_top);
+ sad_r3 = _mm_sad_epu8(src, ref_bot);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 1 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 2 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 3 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 4 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 5 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 6 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 7 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 8 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 9 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 10 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 11 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 12 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 13 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 14 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 15 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ val1 = _mm_extract_epi32(sad_r0, 0);
+ val2 = _mm_extract_epi32(sad_r0, 2);
+ pi4_sad[0] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r1, 0);
+ val2 = _mm_extract_epi32(sad_r1, 2);
+ pi4_sad[1] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r2, 0);
+ val2 = _mm_extract_epi32(sad_r2, 2);
+ pi4_sad[2] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r3, 0);
+ val2 = _mm_extract_epi32(sad_r3, 2);
+ pi4_sad[3] = (val1 + val2);
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) at all subpel points about the src location
+*
+* @par Description
+* This functions computes SAD at all points at a subpel distance from the
+* current source location.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_ref_half_x
+* UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_y
+* UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_xy
+* UWORD8 pointer to half pel buffer
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ref_strd
+* integer ref stride
+*
+* @param[out] pi4_sad
+* integer evaluated sad
+* pi4_sad[0] - half x
+* pi4_sad[1] - half x - 1
+* pi4_sad[2] - half y
+* pi4_sad[3] - half y - 1
+* pi4_sad[4] - half xy
+* pi4_sad[5] - half xy - 1
+* pi4_sad[6] - half xy - strd
+* pi4_sad[7] - half xy - 1 - strd
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_sub_pel_compute_sad_16x16_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_ref_half_x,
+ UWORD8 *pu1_ref_half_y,
+ UWORD8 *pu1_ref_half_xy,
+ WORD32 src_strd,
+ WORD32 ref_strd,
+ WORD32 *pi4_sad)
+{
+ UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
+ UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
+ UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
+ UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
+ UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
+ WORD32 val1, val2;
+
+ __m128i src, ref_half_x, ref_half_y, ref_half_xy;
+ __m128i ref_half_x_left, ref_half_y_top, ref_half_xy_left, ref_half_xy_top, ref_half_xy_top_left;
+ __m128i res_r0, res_r1, res_r2, res_r3, res_r4, res_r5, res_r6, res_r7;
+ __m128i sad_r0, sad_r1, sad_r2, sad_r3, sad_r4, sad_r5, sad_r6, sad_r7;
+ // Row 0 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ sad_r0 = _mm_sad_epu8(src, ref_half_x);
+ sad_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ sad_r2 = _mm_sad_epu8(src, ref_half_y);
+ sad_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ sad_r4 = _mm_sad_epu8(src, ref_half_xy);
+ sad_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ sad_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ sad_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 1 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 2 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 3 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 4 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+
+ // Row 5 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 6 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 7 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 8 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 9 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 10 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 11 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 12 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 13 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 14 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 15 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ val1 = _mm_extract_epi32(sad_r0, 0);
+ val2 = _mm_extract_epi32(sad_r0, 2);
+ pi4_sad[0] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r1, 0);
+ val2 = _mm_extract_epi32(sad_r1, 2);
+ pi4_sad[1] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r2, 0);
+ val2 = _mm_extract_epi32(sad_r2, 2);
+ pi4_sad[2] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r3, 0);
+ val2 = _mm_extract_epi32(sad_r3, 2);
+ pi4_sad[3] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r4, 0);
+ val2 = _mm_extract_epi32(sad_r4, 2);
+ pi4_sad[4] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r5, 0);
+ val2 = _mm_extract_epi32(sad_r5, 2);
+ pi4_sad[5] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r6, 0);
+ val2 = _mm_extract_epi32(sad_r6, 2);
+ pi4_sad[6] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r7, 0);
+ val2 = _mm_extract_epi32(sad_r7, 2);
+ pi4_sad[7] = (val1 + val2);
+
+ return;
+}
+/*
+*
+* @brief This function computes SAD between two 16x16 blocks
+* It also computes if the block will be zero after H264 transform and quant for
+* Intra 16x16 blocks
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] pu2_thrsh
+* Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @param[out] pu4_is_zero
+* Poitner to store if the block is zero after transform and quantization
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_satqd_16x16_lumainter_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ UWORD16 *pu2_thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD32 *pu4_is_zero)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i temp0, temp1, temp2, temp3, temp4;
+ __m128i zero = _mm_setzero_si128(); // all bits reset to zero
+ __m128i all_one = _mm_set1_epi8(0xFF);
+ __m128i sad_b1, sad_b2, threshold;
+ WORD16 sad_1, sad_2;
+ WORD32 i;
+ UWORD32 flag = 0;
+ WORD32 test1, test2;
+ threshold = _mm_loadu_si128((__m128i *) pu2_thrsh);
+ (*pi4_mb_distortion) = 0;
+
+ for (i=0; i<4; i++)
+ {
+ src_r0 = _mm_loadl_epi64((__m128i *) pu1_src); //Row 0 - Block1 and 2
+ src_r1 = _mm_loadl_epi64((__m128i *) (pu1_src + src_strd)); //Row 1 - Block1 and 2
+ src_r2 = _mm_loadl_epi64((__m128i *) (pu1_src + 2 * src_strd)); //Row 2 - Block1 and 2
+ src_r3 = _mm_loadl_epi64((__m128i *) (pu1_src + 3 * src_strd)); //Row 3 - Block1 and 2
+
+ src_r0 = _mm_cvtepu8_epi16(src_r0);
+ src_r1 = _mm_cvtepu8_epi16(src_r1);
+ src_r2 = _mm_cvtepu8_epi16(src_r2);
+ src_r3 = _mm_cvtepu8_epi16(src_r3);
+
+ est_r0 = _mm_loadl_epi64((__m128i *) pu1_est);
+ est_r1 = _mm_loadl_epi64((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadl_epi64((__m128i *) (pu1_est + 2 * est_strd));
+ est_r3 = _mm_loadl_epi64((__m128i *) (pu1_est + 3 * est_strd));
+
+ est_r0 = _mm_cvtepu8_epi16(est_r0);
+ est_r1 = _mm_cvtepu8_epi16(est_r1);
+ est_r2 = _mm_cvtepu8_epi16(est_r2);
+ est_r3 = _mm_cvtepu8_epi16(est_r3);
+
+ src_r0 = _mm_sub_epi16(src_r0, est_r0);
+ src_r1 = _mm_sub_epi16(src_r1, est_r1);
+ src_r2 = _mm_sub_epi16(src_r2, est_r2);
+ src_r3 = _mm_sub_epi16(src_r3, est_r3);
+
+ src_r0 = _mm_abs_epi16(src_r0);
+ src_r1 = _mm_abs_epi16(src_r1);
+ src_r2 = _mm_abs_epi16(src_r2);
+ src_r3 = _mm_abs_epi16(src_r3);
+
+ src_r0 = _mm_add_epi16(src_r0, src_r3); //s1 s4 s4 s1 a1 a4 a4 a1
+ src_r1 = _mm_add_epi16(src_r1, src_r2); //s2 s3 s3 s2 a2 a3 a3 a2
+
+ //SAD calculation
+ temp0 = _mm_add_epi16(src_r0, src_r1); //s1+s2 s4+s3 s4+s3 s1+s2 a1+a2 a4+a3 a4+a3 a1+a2
+ temp0 = _mm_hadd_epi16(temp0, zero);
+ temp0 = _mm_hadd_epi16(temp0, zero); //sad1, sad2 - 16bit values
+
+ sad_1 = _mm_extract_epi16(temp0, 0);
+ sad_2 = _mm_extract_epi16(temp0, 1);
+
+ (*pi4_mb_distortion) += sad_1 + sad_2;
+
+ if (flag == 0) {
+ sad_b1 = _mm_set1_epi16((sad_1 << 1));
+ sad_b2 = _mm_set1_epi16((sad_2 << 1));
+
+ src_r0 = _mm_shufflelo_epi16(src_r0, 0x9c); //Block 0 s1 s1 s4 s4 a1 a4 a4 a1
+ src_r0 = _mm_shufflehi_epi16(src_r0, 0x9c); //Block 1 s1 s1 s4 s4 a1 a1 a4 a4
+
+ src_r1 = _mm_shufflelo_epi16(src_r1, 0x9c); //Block 0 s2 s2 s3 s3 a2 a3 a3 a2
+ src_r1 = _mm_shufflehi_epi16(src_r1, 0x9c); //Block 1 s2 s2 s3 s3 a2 a2 a3 a3
+
+ src_r0 = _mm_hadd_epi16(src_r0, zero); //s1 s4 a1 a4 0 0 0 0
+ src_r1 = _mm_hadd_epi16(src_r1, zero); //s2 s3 a2 a3 0 0 0 0
+
+ temp0 = _mm_slli_epi16(src_r0, 1);//s1<<1 s4<<1 a1<<1 a4<<1 0 0 0 0
+ temp1 = _mm_slli_epi16(src_r1, 1);//s2<<1 s3<<1 a2<<1 a3<<1 0 0 0 0
+
+ temp0 = _mm_shufflelo_epi16(temp0, 0xb1);//s4<<1 s1<<1 a4<<1 a1<<1 0 0 0 0
+ temp1 = _mm_shufflelo_epi16(temp1, 0xb1);//s3<<1 s2<<1 a3<<1 a2<<1 0 0 0 0
+
+ temp2 = _mm_sub_epi16(src_r0, temp1);//(s1-s3<<1) (s4-s2<<1) (a1-a3<<1) (a4-a2<<1) 0 0 0 0
+ temp3 = _mm_sub_epi16(src_r1, temp0);//(s2-s4<<1) (s3-s1<<1) (a2-a4<<1) (a3-a1<<1) 0 0 0 0
+
+ temp4 = _mm_add_epi16(src_r0, src_r1);//s1+s2 s4+s3 a1+a2 a4+a3 0 0 0 0
+
+ temp0 = _mm_hadd_epi16(src_r0, zero); //s1+s4 a1+a4 0 0 0 0 0 0
+ temp1 = _mm_hadd_epi16(src_r1, zero); //s2+s3 a2+a3 0 0 0 0 0 0
+
+ temp0 = _mm_unpacklo_epi16(temp0, temp1);//s1+s4 s2+s3 a1+a4 a2+a3 0 0 0 0
+
+ temp0 = _mm_unpacklo_epi32(temp0, temp2);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1)
+ temp1 = _mm_unpacklo_epi32(temp4, temp3);//s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) a1+a2 a4+a3 (a2-a4<<1) (a3-a1<<1)
+
+ temp2 = _mm_unpacklo_epi64(temp0, temp1);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1)
+ temp3 = _mm_unpackhi_epi64(temp0, temp1); //a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) a1+a2 a4+a3 (s2-s4<<1) (s3-s1<<1)
+
+ sad_b1 = _mm_sub_epi16(sad_b1, temp2); //lsi values Block0
+ sad_b2 = _mm_sub_epi16(sad_b2, temp3); //lsi values Block1
+
+ temp0 = _mm_cmpgt_epi16(threshold, sad_b1); //if any threshold[i]>ls[i], corresponding 16-bit value in temp becomes 0xffff
+
+ temp1 = _mm_cmpgt_epi16(threshold, sad_b2);
+
+ temp0 = _mm_xor_si128(temp0, all_one); //Xor with 1 => NOT operation
+ temp1 = _mm_xor_si128(temp1, all_one);
+
+ test1 = _mm_test_all_zeros(temp0, all_one);
+ test2 = _mm_test_all_zeros(temp1, all_one);
+
+ if (test1 == 0 || test2 == 0 || pu2_thrsh[8] <= sad_1
+ || pu2_thrsh[8] <= sad_2)
+ flag = 1;
+ }
+
+ pu1_src += 8;
+ pu1_est += 8;
+
+ src_r0 = _mm_loadl_epi64((__m128i *) pu1_src); //Row 0 - Block1 and 2
+ src_r1 = _mm_loadl_epi64((__m128i *) (pu1_src + src_strd)); //Row 1 - Block1 and 2
+ src_r2 = _mm_loadl_epi64((__m128i *) (pu1_src + 2 * src_strd)); //Row 2 - Block1 and 2
+ src_r3 = _mm_loadl_epi64((__m128i *) (pu1_src + 3 * src_strd)); //Row 3 - Block1 and 2
+
+ src_r0 = _mm_cvtepu8_epi16(src_r0);
+ src_r1 = _mm_cvtepu8_epi16(src_r1);
+ src_r2 = _mm_cvtepu8_epi16(src_r2);
+ src_r3 = _mm_cvtepu8_epi16(src_r3);
+
+ est_r0 = _mm_loadl_epi64((__m128i *) pu1_est);
+ est_r1 = _mm_loadl_epi64((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadl_epi64((__m128i *) (pu1_est + 2 * est_strd));
+ est_r3 = _mm_loadl_epi64((__m128i *) (pu1_est + 3 * est_strd));
+
+ est_r0 = _mm_cvtepu8_epi16(est_r0);
+ est_r1 = _mm_cvtepu8_epi16(est_r1);
+ est_r2 = _mm_cvtepu8_epi16(est_r2);
+ est_r3 = _mm_cvtepu8_epi16(est_r3);
+
+ src_r0 = _mm_sub_epi16(src_r0, est_r0);
+ src_r1 = _mm_sub_epi16(src_r1, est_r1);
+ src_r2 = _mm_sub_epi16(src_r2, est_r2);
+ src_r3 = _mm_sub_epi16(src_r3, est_r3);
+
+ src_r0 = _mm_abs_epi16(src_r0);
+ src_r1 = _mm_abs_epi16(src_r1);
+ src_r2 = _mm_abs_epi16(src_r2);
+ src_r3 = _mm_abs_epi16(src_r3);
+
+ src_r0 = _mm_add_epi16(src_r0, src_r3); //s1 s4 s4 s1 a1 a4 a4 a1
+ src_r1 = _mm_add_epi16(src_r1, src_r2); //s2 s3 s3 s2 a2 a3 a3 a2
+
+ //SAD calculation
+ temp0 = _mm_add_epi16(src_r0, src_r1);
+ temp0 = _mm_hadd_epi16(temp0, zero);
+ temp0 = _mm_hadd_epi16(temp0, zero); //sad1, sad2 - 16bit values
+
+ sad_1 = _mm_extract_epi16(temp0, 0);
+ sad_2 = _mm_extract_epi16(temp0, 1);
+
+ (*pi4_mb_distortion) += sad_1 + sad_2;
+
+ if (flag == 0) {
+ sad_b1 = _mm_set1_epi16((sad_1 << 1));
+ sad_b2 = _mm_set1_epi16((sad_2 << 1));
+
+ src_r0 = _mm_shufflelo_epi16(src_r0, 0x9c); //Block 0 s1 s1 s4 s4 a1 a4 a4 a1
+ src_r0 = _mm_shufflehi_epi16(src_r0, 0x9c); //Block 1 s1 s1 s4 s4 a1 a1 a4 a4
+
+ src_r1 = _mm_shufflelo_epi16(src_r1, 0x9c); //Block 0 s2 s2 s3 s3 a2 a3 a3 a2
+ src_r1 = _mm_shufflehi_epi16(src_r1, 0x9c); //Block 1 s2 s2 s3 s3 a2 a2 a3 a3
+
+ src_r0 = _mm_hadd_epi16(src_r0, zero); //s1 s4 a1 a4 0 0 0 0
+ src_r1 = _mm_hadd_epi16(src_r1, zero); //s2 s3 a2 a3 0 0 0 0
+
+ temp0 = _mm_slli_epi16(src_r0, 1);//s1<<1 s4<<1 a1<<1 a4<<1 0 0 0 0
+ temp1 = _mm_slli_epi16(src_r1, 1);//s2<<1 s3<<1 a2<<1 a3<<1 0 0 0 0
+
+ temp0 = _mm_shufflelo_epi16(temp0, 0xb1);//s4<<1 s1<<1 a4<<1 a1<<1 0 0 0 0
+ temp1 = _mm_shufflelo_epi16(temp1, 0xb1);//s3<<1 s2<<1 a3<<1 a2<<1 0 0 0 0
+
+ temp2 = _mm_sub_epi16(src_r0, temp1);//(s1-s3<<1) (s4-s2<<1) (a1-a3<<1) (a4-a2<<1) 0 0 0 0
+ temp3 = _mm_sub_epi16(src_r1, temp0);//(s2-s4<<1) (s3-s1<<1) (a2-a4<<1) (a3-a1<<1) 0 0 0 0
+
+ temp4 = _mm_add_epi16(src_r0, src_r1);//s1+s2 s4+s3 a1+a2 a4+a3 0 0 0 0
+
+ temp0 = _mm_hadd_epi16(src_r0, zero); //s1+s4 a1+a4 0 0 0 0 0 0
+ temp1 = _mm_hadd_epi16(src_r1, zero); //s2+s3 a2+a3 0 0 0 0 0 0
+
+ temp0 = _mm_unpacklo_epi16(temp0, temp1);//s1+s4 s2+s3 a1+a4 a2+a3 0 0 0 0
+
+ temp0 = _mm_unpacklo_epi32(temp0, temp2);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1)
+ temp1 = _mm_unpacklo_epi32(temp4, temp3);//s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) a1+a2 a4+a3 (a2-a4<<1) (a3-a1<<1)
+
+ temp2 = _mm_unpacklo_epi64(temp0, temp1);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1)
+ temp3 = _mm_unpackhi_epi64(temp0, temp1); //a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) a1+a2 a4+a3 (s2-s4<<1) (s3-s1<<1)
+
+ sad_b1 = _mm_sub_epi16(sad_b1, temp2); //lsi values Block0
+ sad_b2 = _mm_sub_epi16(sad_b2, temp3); //lsi values Block1
+
+ temp0 = _mm_cmpgt_epi16(threshold, sad_b1); //if any threshold[i]>ls[i], corresponding 16-bit value in temp becomes 0xffff
+
+ temp1 = _mm_cmpgt_epi16(threshold, sad_b2);
+
+ temp0 = _mm_xor_si128(temp0, all_one); //Xor with 1 => NOT operation
+ temp1 = _mm_xor_si128(temp1, all_one);
+
+ test1 = _mm_test_all_zeros(temp0, all_one);
+ test2 = _mm_test_all_zeros(temp1, all_one);
+
+ if (test1 == 0 || test2 == 0 || pu2_thrsh[8] <= sad_1
+ || pu2_thrsh[8] <= sad_2)
+ flag = 1;
+ }
+
+ pu1_src += 4*src_strd - 8;
+ pu1_est += 4*est_strd - 8;
+ }
+
+ *pu4_is_zero = flag;
+}
diff --git a/encoder/x86/ime_platform_macros.h b/encoder/x86/ime_platform_macros.h
new file mode 100755
index 0000000..18e2e8f
--- /dev/null
+++ b/encoder/x86/ime_platform_macros.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ime_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+ sad += ABS(src[0]-est[0]) + \
+ ABS(src[1]-est[1]) + \
+ ABS(src[2]-est[2]) + \
+ ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */