Initial version

Change-Id: I7efe9a589cd24edf86e8d086b40c27cbbf8b4017
author: Hamsalekha S <hamsalekha.s@ittiam.com> 2015-03-13 21:24:58 +0530
committer: Hamsalekha S <hamsalekha.s@ittiam.com> 2015-04-02 15:59:02 +0530
commit: 8d3d303c7942ced6a987a52db8977d768dc3605f (patch)
tree: cc806c96794356996b13ba9970941d0aed74a97e /encoder
parent: 3956d913d37327dcb340f836e604b04bd478b158 (diff)
download: android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.gz
android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.bz2
android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.zip
119 files changed, 59630 insertions, 0 deletions
diff --git a/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
new file mode 100755
index 0000000..fe0ce17
--- /dev/null
+++ b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
@@ -0,0 +1,313 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+@******************************************************************************
+@*
+@* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC )
+@*                and do the prediction.
+@*
+@* @par Description
+@*   This function evaluates  first three 16x16 modes and compute corresponding sad
+@*   and return the buffer predicted with best mode.
+@*
+@* @param[in] pu1_src
+@*  UWORD8 pointer to the source
+@*
+@** @param[in] pu1_ngbr_pels_i16
+@*  UWORD8 pointer to neighbouring pels
+@*
+@* @param[out] pu1_dst
+@*  UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@*  integer source stride
+@*
+@* @param[in] dst_strd
+@*  integer destination stride
+@*
+@* @param[in] u4_n_avblty
+@* availability of neighbouring pixels
+@*
+@* @param[in] u4_intra_mode
+@* Pointer to the variable in which best mode is returned
+@*
+@* @param[in] pu4_sadmin
+@* Pointer to the variable in which minimum sad is returned
+@*
+@* @param[in] u4_valid_intra_modes
+@* Says what all modes are valid
+@*
+@*
+@* @return      none
+@*
+@******************************************************************************
+@*/
+@
+@void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
+@                                      UWORD8 *pu1_ngbr_pels_i16,
+@                                      UWORD8 *pu1_dst,
+@                                      UWORD32 src_strd,
+@                                      UWORD32 dst_strd,
+@                                      WORD32 u4_n_avblty,
+@                                      UWORD32 *u4_intra_mode,
+@                                      WORD32 *pu4_sadmin,
+@                                      UWORD32 u4_valid_intra_modes)
+@
+.text
+.p2align 2
+
+    .global ih264e_evaluate_intra16x16_modes_a9q
+
+ih264e_evaluate_intra16x16_modes_a9q:
+
+@r0 = pu1_src,
+@r1 = pu1_ngbr_pels_i16,
+@r2 = pu1_dst,
+@r3 = src_strd,
+@r4 = dst_strd,
+@r5 = u4_n_avblty,
+@r6 = u4_intra_mode,
+@r7 = pu4_sadmin
+
+
+
+    stmfd         sp!, {r4-r12, r14}    @store register values to stack
+    ldr           r5, [sp, #44]
+
+
+    vpush         {d8-d15}
+    vld1.32       {q4}, [r1]!
+    sub           r6, r1, #1
+    add           r1, r1, #1
+    mov           r10, #0
+    vld1.32       {q5}, [r1]!
+    mov           r11, #0
+    mov           r4, #0
+    @/* Left available ????
+    ands          r7, r5, #01
+    movne         r10, #1
+
+    @/* Top  available ????
+    ands          r8, r5, #04
+    lsl           r9, r10, #3
+    movne         r11, #1
+    lsl           r12, r11, #3
+    adds          r8, r9, r12
+
+
+    @/* None available :(
+    moveq         r4, #128
+
+
+
+@/fINDING dc val*/
+    @----------------------
+    vaddl.u8      q15, d8, d9
+
+    vaddl.u8      q14, d10, d11
+
+    vadd.u16      q15, q14, q15
+    @ VLD1.32  {q2},[r0],r3;row 2
+    vadd.u16      d30, d31, d30
+    vpadd.u16     d30, d30
+    @ VLD1.32  {q3},[r0],r3 ;row 3
+    vpadd.u16     d30, d30
+    @---------------------
+
+
+    vmov.u16      r7, d30[0]
+    add           r7, r7, r8
+    add           r11, r11, #3
+    add           r8, r10, r11
+
+    lsr           r7, r8
+    add           r7, r4, r7
+    vld1.32       {q0}, [r0], r3        @ source r0w 0
+    vdup.8        q15, r7               @dc val
+
+@/* computing SADs for all three modes*/
+    ldrb          r7, [r6]
+    vdup.8        q10, r7               @/HORIZONTAL VALUE ROW=0;
+    @/vertical row 0;
+    vabdl.u8      q8, d0, d10
+    vabdl.u8      q9, d1, d11
+    sub           r6, r6, #1
+    @/HORZ row 0;
+    vabdl.u8      q13, d0, d20
+    vabdl.u8      q14, d1, d21
+    mov           r1, #15
+    @/dc row 0;
+    vabdl.u8      q11, d0, d30
+    vabdl.u8      q12, d1, d31
+
+
+loop:
+    vld1.32       {q1}, [r0], r3        @row i
+    @/dc row i;
+    vabal.u8      q11, d2, d30
+    ldrb          r7, [r6]
+    vabal.u8      q12, d3, d31
+
+    @/vertical row i;
+    vabal.u8      q8, d2, d10
+    vdup.8        q10, r7               @/HORIZONTAL VALUE ROW=i;
+    sub           r6, r6, #1
+    vabal.u8      q9, d3, d11
+
+    subs          r1, r1, #1
+    @/HORZ row i;
+    vabal.u8      q13, d2, d20
+    vabal.u8      q14, d3, d21
+    bne           loop
+
+    @------------------------------------------------------------------------------
+
+    vadd.i16      q9, q9, q8            @/VERT
+    vadd.i16      d18, d19, d18         @/VERT
+    vpaddl.u16    d18, d18              @/VERT
+    vadd.i16      q14, q13, q14         @/HORZ
+    vadd.i16      d28, d29, d28         @/HORZ
+    vpaddl.u32    d18, d18              @/VERT
+    vpaddl.u16    d28, d28              @/HORZ
+
+    vpaddl.u32    d28, d28              @/HORZ
+    vmov.u32      r8, d18[0]            @ vert
+    vadd.i16      q12, q11, q12         @/DC
+    vmov.u32      r9, d28[0]            @horz
+    mov           r11, #1
+    vadd.i16      d24, d24, d25         @/DC
+    lsl           r11 , #30
+
+    @-----------------------
+    ldr           r0, [sp, #120]        @ u4_valid_intra_modes
+    @--------------------------------------------
+    ands          r7, r0, #01           @ vert mode valid????????????
+    moveq         r8, r11
+    vpaddl.u16    d24, d24              @/DC
+
+    ands          r6, r0, #02           @ horz mode valid????????????
+    moveq         r9, r11
+    vpaddl.u32    d24, d24              @/DC
+
+    vmov.u32      r10, d24[0]           @dc
+@--------------------------------
+    ldr           r4, [sp, #104]        @r4 = dst_strd,
+    ldr           r7, [sp, #116]        @r7 = pu4_sadmin
+@----------------------------------------------
+    ands          r6, r0, #04           @ dc mode valid????????????
+    moveq         r10, r11
+
+    @---------------------------
+    ldr           r6, [sp, #112]        @ R6 =MODE
+    @--------------------------
+
+    cmp           r8, r9
+    bgt           not_vert
+    cmp           r8, r10
+    bgt           do_dc
+
+    @/----------------------
+    @DO VERTICAL PREDICTION
+    str           r8 , [r7]             @MIN SAD
+    mov           r8, #0
+    str           r8 , [r6]             @ MODE
+    vmov          q15, q5
+
+    b             do_dc_vert
+    @-----------------------------
+not_vert:
+    cmp           r9, r10
+    bgt           do_dc
+
+    @/----------------------
+    @DO HORIZONTAL
+    vdup.8        q5, d9[7]             @0
+    str           r9 , [r7]             @MIN SAD
+    vdup.8        q6, d9[6]             @1
+    mov           r9, #1
+    vdup.8        q7, d9[5]             @2
+    vst1.32       {d10, d11} , [r2], r4 @0
+    vdup.8        q8, d9[4]             @3
+    str           r9 , [r6]             @ MODE
+    vdup.8        q9, d9[3]             @4
+    vst1.32       {d12, d13} , [r2], r4 @1
+    vdup.8        q10, d9[2]            @5
+    vst1.32       {d14, d15} , [r2], r4 @2
+    vdup.8        q11, d9[1]            @6
+    vst1.32       {d16, d17} , [r2], r4 @3
+    vdup.8        q12, d9[0]            @7
+    vst1.32       {d18, d19} , [r2], r4 @4
+    vdup.8        q13, d8[7]            @8
+    vst1.32       {d20, d21} , [r2], r4 @5
+    vdup.8        q14, d8[6]            @9
+    vst1.32       {d22, d23} , [r2], r4 @6
+    vdup.8        q15, d8[5]            @10
+    vst1.32       {d24, d25} , [r2], r4 @7
+    vdup.8        q1, d8[4]             @11
+    vst1.32       {d26, d27} , [r2], r4 @8
+    vdup.8        q2, d8[3]             @12
+    vst1.32       {d28, d29} , [r2], r4 @9
+    vdup.8        q3, d8[2]             @13
+    vst1.32       {d30, d31}, [r2], r4  @10
+    vdup.8        q5, d8[1]             @14
+    vst1.32       {d2, d3} , [r2], r4   @11
+    vdup.8        q6, d8[0]             @15
+    vst1.32       {d4, d5} , [r2], r4   @12
+
+    vst1.32       {d6, d7} , [r2], r4   @13
+
+    vst1.32       {d10, d11} , [r2], r4 @14
+
+    vst1.32       {d12, d13} , [r2], r4 @15
+    b             end_func
+
+
+    @/-----------------------------
+
+do_dc: @/---------------------------------
+    @DO DC
+    str           r10 , [r7]            @MIN SAD
+    mov           r10, #2
+    str           r10 , [r6]            @ MODE
+do_dc_vert:
+    vst1.32       {d30, d31}, [r2], r4  @0
+    vst1.32       {d30, d31}, [r2], r4  @1
+    vst1.32       {d30, d31}, [r2], r4  @2
+    vst1.32       {d30, d31}, [r2], r4  @3
+    vst1.32       {d30, d31}, [r2], r4  @4
+    vst1.32       {d30, d31}, [r2], r4  @5
+    vst1.32       {d30, d31}, [r2], r4  @6
+    vst1.32       {d30, d31}, [r2], r4  @7
+    vst1.32       {d30, d31}, [r2], r4  @8
+    vst1.32       {d30, d31}, [r2], r4  @9
+    vst1.32       {d30, d31}, [r2], r4  @10
+    vst1.32       {d30, d31}, [r2], r4  @11
+    vst1.32       {d30, d31}, [r2], r4  @12
+    vst1.32       {d30, d31}, [r2], r4  @13
+    vst1.32       {d30, d31}, [r2], r4  @14
+    vst1.32       {d30, d31}, [r2], r4  @15
+    @/------------------
+end_func:
+    vpop          {d8-d15}
+    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
+
+
diff --git a/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
new file mode 100755
index 0000000..568e623
--- /dev/null
+++ b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
@@ -0,0 +1,529 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+
+.data
+.p2align 2
+
+scratch_intrapred_luma_4x4_prediction:
+    .long ver, hor, d_c, dia_dl
+    .long dia_dr, ver_r, hor_d, ver_l
+    .long hor_u
+
+
+.text
+.p2align 2
+
+scratch_intrapred_luma_4x4_prediction_addr1:
+    .long scratch_intrapred_luma_4x4_prediction - scrintra_4x4 - 8
+
+
+
+@/**
+@/**
+@******************************************************************************
+@*
+@* @brief :Evaluate best intra 4x4 mode
+@*                and do the prediction.
+@*
+@* @par Description
+@*   This function evaluates  4x4 modes and compute corresponding sad
+@*   and return the buffer predicted with best mode.
+@*
+@* @param[in] pu1_src
+@*  UWORD8 pointer to the source
+@*
+@** @param[in] pu1_ngbr_pels
+@*  UWORD8 pointer to neighbouring pels
+@*
+@* @param[out] pu1_dst
+@*  UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@*  integer source stride
+@*
+@* @param[in] dst_strd
+@*  integer destination stride
+@*
+@* @param[in] u4_n_avblty
+@* availability of neighbouring pixels
+@*
+@* @param[in] u4_intra_mode
+@* Pointer to the variable in which best mode is returned
+@*
+@* @param[in] pu4_sadmin
+@* Pointer to the variable in which minimum cost is returned
+@*
+@* @param[in] u4_valid_intra_modes
+@* Says what all modes are valid
+@*
+@* * @param[in] u4_lambda
+@* Lamda value for computing cost from SAD
+@*
+@* @param[in] u4_predictd_mode
+@* Predicted mode for cost computation
+@*
+@*
+@*
+@* @return      none
+@*
+@******************************************************************************
+@*/
+@void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
+@                                     UWORD8 *pu1_ngbr_pels,
+@                                     UWORD8 *pu1_dst,
+@                                     UWORD32 src_strd,
+@                                    UWORD32 dst_strd,
+@                                     WORD32 u4_n_avblty,
+@                                     UWORD32 *u4_intra_mode,
+@                                     WORD32 *pu4_sadmin,
+@                                     UWORD32 u4_valid_intra_modes,
+@                                     UWORD32  u4_lambda,
+@                                     UWORD32 u4_predictd_mode)
+
+
+
+    .global ih264e_evaluate_intra_4x4_modes_a9q
+
+ih264e_evaluate_intra_4x4_modes_a9q:
+
+@r0 = pu1_src,
+@r1 = pu1_ngbr_pels_i16,
+@r2 = pu1_dst,
+@r3 = src_strd,
+@r4 = dst_strd,
+@r5 = u4_n_avblty,
+@r6 = u4_intra_mode,
+@r7 = pu4_sadmin
+@r8 = u4_valid_intra_modes
+@r0 =u4_lambda
+@r1 = u4_predictd_mode
+
+
+    stmfd         sp!, {r4-r12, r14}    @store register values to stack
+
+@--------------------
+    ldr           r5, [sp, #44]         @r5 = u4_n_avblty,
+@----------------------
+    vpush         {d8-d15}
+@Loading neighbours
+    vld1.32       {q0}, [r1]
+    add           r4, r1, #12
+    vld1.8        d1[5], [r4]
+    vld1.8        d1[7], [r1]
+    @--------------------------------
+    ldr           r8, [sp, #120]        @u4_valid_intra_modes
+@----------------------------------------------
+
+
+
+@ LOADING pu1_src
+    vld1.32       {d20[0]}, [r0], r3
+    vext.8        q1, q0, q0, #1
+    vld1.32       {d20[1]}, [r0], r3
+    mov           r11, #1
+    vld1.32       {d21[0]}, [r0], r3
+    lsl           r11, r11, #30
+    vld1.32       {d21[1]}, [r0], r3
+
+
+
+@--------------------------------
+    ldr           r0, [sp, #124]        @r0 =u4_lambda
+    ldr           r1, [sp, #128]        @r1 = u4_predictd_mode
+@------
+
+
+vert:
+    ands          r10, r8, #01          @VERT sad ??
+    beq           horz
+    vdup.32       q2, d2[1]
+    vabdl.u8      q14, d4, d20
+    vabal.u8      q14, d4, d21
+    vadd.i16      d28, d29, d28
+    subs          r6, r1, #0
+    vpaddl.u16    d28, d28              @
+    lslne         r6, r0, #2
+    vpaddl.u32    d28, d28              @/
+    moveq         r6, r0                @
+    vmov.u32      r9, d28[0]            @ vert
+    add           r9, r6, r9
+
+    subs          r6, r11, r9
+    movgt         r11, r9
+    movgt         r12, #0
+
+horz:
+    ands          r10, r8, #02          @HORZ sad ??
+    beq           dc
+    vdup.32       q3, d0[0]
+    vmov.32       q4, q3
+    vtrn.8        q3, q4
+    vtrn.16       d7, d6
+    vtrn.16       d9, d8
+    vtrn.32       d9, d7
+    vtrn.32       d8, d6
+    vabdl.u8      q14, d6, d20
+    subs          r6, r1, #1
+    vabal.u8      q14, d7, d21
+    vadd.i16      d28, d29, d28
+    lslne         r6, r0, #2
+    vpaddl.u16    d28, d28              @
+    vpaddl.u32    d28, d28              @/
+    vmov.u32      r9, d28[0]            @
+    moveq         r6, r0                @
+    add           r9, r6, r9
+
+    subs          r6, r11, r9
+    movgt         r11, r9
+    movgt         r12, #1
+
+dc:
+    ands          r10, r8, #04          @DC sad ??
+    beq           diags
+    vext.8        q4, q0, q0, #5
+    vaddl.u8      q4, d0, d8
+    vpaddl.u16    d8, d8                @
+    vpaddl.u32    d8, d8                @/
+    vmov.u32      r4, d8[0]             @
+    mov           r14, #1
+    ands          r10, r5, #1
+    addne         r4, r4, #2
+    addne         r14, r14, #1
+    ands          r10, r5, #4
+    addne         r4, r4, #2
+    addne         r14, r14, #1
+    ands          r10, r5, #5
+    moveq         r4, #128
+    moveq         r14, #0
+    subs          r6, r1, #2
+    lsr           r4, r4, r14
+    vdup.8        q4, r4
+    lslne         r6, r0, #2
+    vabdl.u8      q14, d8, d20
+    vabal.u8      q14, d9, d21
+    vadd.i16      d28, d29, d28
+    vpaddl.u16    d28, d28              @
+    vpaddl.u32    d28, d28              @/
+    vmov.u32      r9, d28[0]            @
+
+    moveq         r6, r0                @
+    add           r9, r6, r9
+
+    subs          r6, r11, r9
+    movgt         r11, r9
+    movgt         r12, #2
+
+diags:
+    ands          r10, r8, #504         @/* if modes other than VERT, HORZ and DC are  valid ????*/
+    beq           pred
+    @/* Performing FILT11 and FILT121 operation for all neighbour values*/
+    vext.8        q5, q0, q0, #2
+    vaddl.u8      q6, d0, d2
+    vaddl.u8      q7, d1, d3
+    vaddl.u8      q8, d10, d2
+    vaddl.u8      q9, d11, d3
+    vadd.u16      q12, q10, q11
+    vqrshrun.s16  d10, q6, #1
+    vqrshrun.s16  d11, q7, #1
+    vadd.u16      q11, q6, q8
+    vadd.u16      q12, q7, q9
+    vqrshrun.s16  d12, q11, #2
+    vqrshrun.s16  d13, q12, #2
+    mov           r14, #0
+    vdup.32       q13 , r14
+    mov           r14, #-1
+    vmov.i32      d26[0], r14
+
+diag_dl:
+    ands          r10, r8, #0x08        @DIAG_DL sad ??
+    beq           diag_dr
+
+    vext.8        q15, q6, q6, #5
+    vbit.32       d14, d30, d26
+    vext.8        q15, q6, q6, #15
+    vbit.32       d15, d31, d26
+    vext.8        q15, q6, q6, #2
+    vext.32       q14, q13, q13, #3
+    vbit.32       d14, d30, d28
+    vext.8        q15, q6, q6, #4
+    vbit.32       d15, d30, d28
+    vabdl.u8      q14, d14, d20
+    subs          r6, r1, #3
+    vabal.u8      q14, d15, d21
+    vadd.i16      d28, d29, d28
+    vpaddl.u16    d28, d28              @
+    lslne         r6, r0, #2
+    vpaddl.u32    d28, d28              @/
+    vmov.u32      r9, d28[0]            @
+
+    moveq         r6, r0                @
+    add           r9, r6, r9
+
+    subs          r6, r11, r9
+    movgt         r11, r9
+    movgt         r12, #3
+
+diag_dr:
+    ands          r10, r8, #16          @DIAG_DR sad ??
+    beq           vert_r
+
+    vext.8        q15, q6, q6, #3
+    vbit.32       d16, d30, d26
+    vext.8        q15, q6, q6, #1
+    vbit.32       d17, d30, d26
+    vext.8        q15, q6, q6, #4
+    vext.32       q14, q13, q13, #3
+    vbit.32       d17, d31, d28
+    vext.8        q15, q6, q6, #6
+    vbit.32       d16, d31, d28
+    vabdl.u8      q14, d16, d20
+    subs          r6, r1, #4
+    vabal.u8      q14, d17, d21
+    vadd.i16      d28, d29, d28
+    vpaddl.u16    d28, d28              @
+    lslne         r6, r0, #2
+    vpaddl.u32    d28, d28              @/
+    vmov.u32      r9, d28[0]            @
+
+    moveq         r6, r0                @
+    add           r9, r6, r9
+
+    subs          r6, r11, r9
+    movgt         r11, r9
+    movgt         r12, #4
+
+vert_r:
+    ands          r10, r8, #32          @VERT_R sad ??
+    beq           horz_d
+    vext.8        q15, q5, q5, #4
+    vbit.32       d18, d30, d26
+    vext.8        q15, q5, q5, #3
+    vbit.32       d19, d30, d26
+    vext.32       q14, q13, q13, #3
+    vext.8        q15, q6, q6, #15
+    vbit.32       d18, d30, d28
+    vext.8        q15, q6, q6, #14
+    vbit.32       d19, d30, d28
+    mov           r14, #0
+    vdup.32       q14 , r14
+    mov           r14, #0xff
+    vmov.i8       d28[0], r14
+    vext.8        q15, q6, q6, #2
+    vbit.32       d19, d30, d28
+    vext.32       q14, q14, q14, #3
+    subs          r6, r1, #5
+    vext.8        q15, q6, q6, #13
+    vbit.32       d19, d30, d28
+    lslne         r6, r0, #2
+    vabdl.u8      q14, d18, d20
+    vabal.u8      q14, d19, d21
+    vadd.i16      d28, d29, d28
+    vpaddl.u16    d28, d28              @
+    vpaddl.u32    d28, d28              @/
+    vmov.u32      r9, d28[0]            @
+
+
+    moveq         r6, r0                @
+    add           r9, r6, r9
+
+    subs          r6, r11, r9
+    movgt         r11, r9
+    movgt         r12, #5
+
+horz_d:
+    vmov.8        q1, q5
+    vmov.8        q15, q6
+    vzip.8        q1, q15
+
+    ands          r10, r8, #64          @HORZ_D sad ??
+    beq           vert_l
+    vext.8        q15, q6, q6, #2
+    vbit.32       d8, d30, d26
+    mov           r14, #0
+    vdup.32       q14 , r14
+    mov           r14, #0xff
+    vmov.i8       d28[0], r14
+    vext.8        q15, q5, q5, #3
+    vbit.32       d8, d30, d28
+    vext.8        q15, q1, q1, #2
+    vbit.32       d9, d30, d26
+    vext.32       q14, q13, q13, #3
+    vbit.32       d8, d2, d28
+    subs          r6, r1, #6
+    vext.8        q15, q1, q1, #12
+    vbit.32       d9, d30, d28
+    vabdl.u8      q14, d8, d20
+    vabal.u8      q14, d9, d21
+    vadd.i16      d28, d29, d28
+    vpaddl.u16    d28, d28              @
+    lslne         r6, r0, #2
+    vpaddl.u32    d28, d28              @/
+    vmov.u32      r9, d28[0]            @
+
+
+    moveq         r6, r0                @
+    add           r9, r6, r9
+
+    subs          r6, r11, r9
+    movgt         r11, r9
+    movgt         r12, #6
+vert_l:
+    ands          r10, r8, #128         @VERT_L sad ??
+    beq           horz_u
+    vext.8        q15, q5, q5, #5
+    vbit.32       d24, d30, d26
+    vext.8        q15, q15, q15, #1
+    vbit.32       d25, d30, d26
+    vext.8        q15, q6, q6, #1
+    vext.32       q14, q13, q13, #3
+    vbit.32       d24, d30, d28
+    vext.8        q15, q15, q15, #1
+    subs          r6, r1, #7
+    vbit.32       d25, d30, d28
+    vabdl.u8      q14, d24, d20
+    vabal.u8      q14, d25, d21
+    vadd.i16      d28, d29, d28
+    vpaddl.u16    d28, d28              @
+    lslne         r6, r0, #2
+    vpaddl.u32    d28, d28              @/
+    vmov.u32      r9, d28[0]            @
+
+    moveq         r6, r0                @
+    add           r9, r6, r9
+
+    subs          r6, r11, r9
+    movgt         r11, r9
+    movgt         r12, #7
+
+horz_u:
+    ands          r10, r8, #256         @HORZ_U sad ??
+    beq           pred
+    vrev64.8      q5, q1
+    vdup.8        q1, d0[0]
+    vext.8        q6, q6, #7
+    mov           r14, #0
+    vdup.32       q14 , r14
+    mov           r14, #0xff
+    vmov.i8       d28[0], r14
+    vbit.32       d11, d13, d28
+    movw          r14, #0xffff
+    vmov.i16      d28[0], r14
+    vext.8        q6, q5, q5, #7
+    subs          r6, r1, #8
+    vbit.32       d3, d12, d28
+    vext.8        q6, q5, q5, #3
+    vbit.32       d2, d12, d26
+    vext.32       q14, q13, q13, #3
+    vext.8        q6, q5, q5, #1
+    vbit.32       d2, d12, d28
+    vabdl.u8      q14, d2, d20
+    vabal.u8      q14, d3, d21
+    vadd.i16      d28, d29, d28
+    vpaddl.u16    d28, d28              @
+    lslne         r6, r0, #2
+    vpaddl.u32    d28, d28              @/
+    vmov.u32      r9, d28[0]            @
+
+
+    moveq         r6, r0                @
+    add           r9, r6, r9
+
+    subs          r6, r11, r9
+    movgt         r11, r9
+    movgt         r12, #8
+
+pred: @/*dOING FINAL PREDICTION*/
+@---------------------------
+    ldr           r7, [sp, #116]        @r7 = pu4_sadmin
+    ldr           r6, [sp, #112]        @ R6 =MODE
+@--------------------------
+    str           r11, [r7]             @/STORING MIN SAD*/
+    str           r12, [r6]             @/FINAL MODE*/
+
+
+    ldr           r3, scratch_intrapred_luma_4x4_prediction_addr1
+scrintra_4x4:
+    add           r3, r3, pc
+    lsl           r12, r12, #2
+    add           r3, r3, r12
+
+    ldr           r5, [r3]
+    and           r5, r5, #0xfffffffe
+
+    bx            r5
+
+
+ver:
+    vext.8        q0, q0, q0, #1
+    vdup.32       q15, d0[1]
+    b             store
+
+hor:
+    vmov.32       q15, q3
+    b             store
+
+d_c:
+    vdup.8        q15, r4
+    b             store
+
+dia_dl:
+    vmov.32       q15, q7
+    b             store
+
+dia_dr:
+    vmov.32       q15, q8
+    b             store
+
+ver_r:
+    vmov.32       q15, q9
+    b             store
+
+hor_d:
+    vmov.32       q15, q4
+    b             store
+
+ver_l:
+    vmov.32       q15, q12
+    b             store
+
+hor_u:
+    vmov.32       q15, q1
+
+store: @/* storing to pu1_dst*/
+
+    ldr           r4, [sp, #104]        @r4 = dst_strd,
+
+    vst1.32       {d30[0]}, [r2], r4
+    vst1.32       {d30[1]}, [r2], r4
+    vst1.32       {d31[0]}, [r2], r4
+    vst1.32       {d31[1]}, [r2], r4
+
+
+end_func:
+    vpop          {d8-d15}
+    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
+
+
+
+
+
diff --git a/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
new file mode 100755
index 0000000..e4dfca8
--- /dev/null
+++ b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
@@ -0,0 +1,346 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+@******************************************************************************
+@*
+@* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC )
+@*                and do the prediction.
+@*
+@* @par Description
+@*   This function evaluates  first three intra chroma modes and compute corresponding sad
+@*   and return the buffer predicted with best mode.
+@*
+@* @param[in] pu1_src
+@*  UWORD8 pointer to the source
+@*
+@** @param[in] pu1_ngbr_pels
+@*  UWORD8 pointer to neighbouring pels
+@*
+@* @param[out] pu1_dst
+@*  UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@*  integer source stride
+@*
+@* @param[in] dst_strd
+@*  integer destination stride
+@*
+@* @param[in] u4_n_avblty
+@* availability of neighbouring pixels
+@*
+@* @param[in] u4_intra_mode
+@* Pointer to the variable in which best mode is returned
+@*
+@* @param[in] pu4_sadmin
+@* Pointer to the variable in which minimum sad is returned
+@*
+@* @param[in] u4_valid_intra_modes
+@* Says what all modes are valid
+@*
+@*
+@* @return      none
+@*
+@******************************************************************************
+@*/
+@
+@void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
+@                                      UWORD8 *pu1_ngbr_pels_i16,
+@                                      UWORD8 *pu1_dst,
+@                                      UWORD32 src_strd,
+@                                      UWORD32 dst_strd,
+@                                      WORD32 u4_n_avblty,
+@                                      UWORD32 *u4_intra_mode,
+@                                      WORD32 *pu4_sadmin,
+@                                      UWORD32 u4_valid_intra_modes)
+@
+.text
+.p2align 2
+
+    .global ih264e_evaluate_intra_chroma_modes_a9q
+
+ih264e_evaluate_intra_chroma_modes_a9q:
+
+@r0 = pu1_src,
+@r1 = pu1_ngbr_pels_i16,
+@r2 = pu1_dst,
+@r3 = src_strd,
+@r4 = dst_strd,
+@r5 = u4_n_avblty,
+@r6 = u4_intra_mode,
+@r7 = pu4_sadmin
+
+
+
+    stmfd         sp!, {r4-r12, r14}    @store register values to stack
+    @-----------------------
+    ldr           r5, [sp, #44]         @r5 = u4_n_avblty,
+    @-------------------------
+    mov           r12, r1               @
+    vpush         {d8-d15}
+    vld1.32       {q4}, [r1]!
+    add           r1, r1, #2
+    vld1.32       {q5}, [r1]!
+
+    vuzp.u8       q4, q5                @
+
+    vpaddl.u8     d8, d8
+    vpadd.u16     d8, d8
+
+    vpaddl.u8     d9, d9
+    vpadd.u16     d9, d9
+
+    vpaddl.u8     d10, d10
+    vpadd.u16     d10, d10
+
+    vpaddl.u8     d11, d11
+
+    and           r7, r5, #5
+    vpadd.u16     d11, d11
+    subs          r8, r7, #5
+    beq           all_available
+    subs          r8, r7, #4
+    beq           top_available
+    subs          r8, r7, #1
+    beq           left_available
+    mov           r10, #128
+    vdup.8        q14, r10
+    vdup.8        q15, r10
+    b             sad
+
+all_available:
+    vzip.u16      q4, q5
+    vext.16       q6, q4, q4, #2
+    vadd.u16      q7, q5, q6
+    vqrshrn.u16   d14, q7, #3
+    vqrshrn.u16   d15, q4, #2
+    vqrshrn.u16   d16, q5, #2
+    vdup.16       d28, d14[0]
+    vdup.16       d29, d16[1]
+    vdup.16       d30, d15[0]
+    vdup.16       d31, d14[1]
+    b             sad
+top_available:
+    vzip.u16      q4, q5
+    vqrshrn.u16   d16, q5, #2
+    vdup.16       d28, d16[0]
+    vdup.16       d29, d16[1]
+    vdup.16       d30, d16[0]
+    vdup.16       d31, d16[1]
+    b             sad
+left_available:
+    vzip.u16      q4, q5
+    vqrshrn.u16   d16, q4, #2
+    vdup.16       d28, d16[3]
+    vdup.16       d29, d16[3]
+    vdup.16       d30, d16[2]
+    vdup.16       d31, d16[2]
+
+
+sad:
+    vld1.32       {q4}, [r12]!
+    sub           r8, r12, #2
+    add           r12, r12, #2
+    vld1.32       {q5}, [r12]!
+    add           r12, r0, r3, lsl  #2
+    sub           r10, r8, #8
+    vld1.32       {q0}, [r0], r3
+    ldrh          r9, [r8]
+    vdup.16       q10, r9               @ row 0
+
+    @/vertical row 0;
+    vabdl.u8      q8, d0, d10
+    vabdl.u8      q9, d1, d11
+    sub           r8, r8, #2
+    vld1.32       {q1}, [r12], r3
+
+    @/HORZ row 0;
+    vabdl.u8      q13, d0, d20
+    vabdl.u8      q7, d1, d21
+    ldrh          r9, [r10]
+    @/dc row 0;
+    vabdl.u8      q11, d0, d28
+    vabdl.u8      q12, d1, d29
+
+
+    vdup.16       q10, r9               @ row 4
+    @/vertical row 4;
+    vabal.u8      q8, d2, d10
+    vabal.u8      q9, d3, d11
+    sub           r10, r10, #2
+
+    @/HORZ row 4;
+    vabal.u8      q13, d2, d20
+    vabal.u8      q7, d3, d21
+    @/dc row 4;
+    vabal.u8      q11, d2, d30
+    vabal.u8      q12, d3, d31
+
+    mov           r11, #3
+
+loop:
+    vld1.32       {q0}, [r0], r3
+    ldrh          r9, [r8]
+
+
+    @/vertical row i;
+    vabal.u8      q8, d0, d10
+    vabal.u8      q9, d1, d11
+
+    vdup.16       q10, r9               @ row i
+    vld1.32       {q1}, [r12], r3
+    sub           r8, r8, #2
+    @/HORZ row i;
+    vabal.u8      q13, d0, d20
+    vabal.u8      q7, d1, d21
+    ldrh          r9, [r10]
+    @/dc row i;
+    vabal.u8      q11, d0, d28
+    vabal.u8      q12, d1, d29
+    sub           r10, r10, #2
+
+    vdup.16       q10, r9               @ row i+4
+    @/vertical row 4;
+    vabal.u8      q8, d2, d10
+    vabal.u8      q9, d3, d11
+    subs          r11, r11, #1
+
+    @/HORZ row i+4;
+    vabal.u8      q13, d2, d20
+    vabal.u8      q7, d3, d21
+    @/dc row i+4;
+    vabal.u8      q11, d2, d30
+    vabal.u8      q12, d3, d31
+    bne           loop
+
+
+
+@-------------------------------------------
+
+    vadd.i16      q9, q9, q8            @/VERT
+    vadd.i16      q7, q13, q7           @/HORZ
+    vadd.i16      q12, q11, q12         @/DC
+    vadd.i16      d18, d19, d18         @/VERT
+    vadd.i16      d14, d15, d14         @/HORZ
+    vadd.i16      d24, d24, d25         @/DC
+    vpaddl.u16    d18, d18              @/VERT
+    vpaddl.u16    d14, d14              @/HORZ
+    vpaddl.u16    d24, d24              @/DC
+    vpaddl.u32    d18, d18              @/VERT
+    vpaddl.u32    d14, d14              @/HORZ
+    vpaddl.u32    d24, d24              @/DC
+
+
+
+    vmov.u32      r8, d18[0]            @ vert
+    vmov.u32      r9, d14[0]            @horz
+    vmov.u32      r10, d24[0]           @dc
+
+    mov           r11, #1
+@-----------------------
+    ldr           r0, [sp, #120]        @ u4_valid_intra_modes
+@--------------------------------------------
+
+
+    lsl           r11 , #30
+
+    ands          r7, r0, #04           @ vert mode valid????????????
+    moveq         r8, r11
+
+    ands          r6, r0, #02           @ horz mode valid????????????
+    moveq         r9, r11
+
+    ands          r6, r0, #01           @ dc mode valid????????????
+    moveq         r10, r11
+
+
+    @---------------------------
+    ldr           r4, [sp, #104]        @r4 = dst_strd,
+    ldr           r6, [sp, #112]        @ R6 =MODE
+    ldr           r7, [sp, #116]        @r7 = pu4_sadmin
+
+    @--------------------------
+
+    cmp           r10, r9
+    bgt           not_dc
+    cmp           r10, r8
+    bgt           do_vert
+
+    @/----------------------
+    @DO DC PREDICTION
+    str           r10 , [r7]            @MIN SAD
+    mov           r10, #0
+    str           r10 , [r6]            @ MODE
+    b             do_dc_vert
+    @-----------------------------
+
+not_dc:
+    cmp           r9, r8
+    bgt           do_vert
+    @/----------------------
+    @DO HORIZONTAL
+
+    vdup.16       q10, d9[3]            @/HORIZONTAL VALUE ROW=0;
+    str           r9 , [r7]             @MIN SAD
+    mov           r9, #1
+    vdup.16       q11, d9[2]            @/HORIZONTAL VALUE ROW=1;
+    str           r9 , [r6]             @ MODE
+    vdup.16       q12, d9[1]            @/HORIZONTAL VALUE ROW=2;
+    vst1.32       {d20, d21} , [r2], r4 @0
+    vdup.16       q13, d9[0]            @/HORIZONTAL VALUE ROW=3;
+    vst1.32       {d22, d23} , [r2], r4 @1
+    vdup.16       q14, d8[3]            @/HORIZONTAL VALUE ROW=4;
+    vst1.32       {d24, d25} , [r2], r4 @2
+    vdup.16       q15, d8[2]            @/HORIZONTAL VALUE ROW=5;
+    vst1.32       {d26, d27} , [r2], r4 @3
+    vdup.16       q1, d8[1]             @/HORIZONTAL VALUE ROW=6;
+    vst1.32       {d28, d29} , [r2], r4 @4
+    vdup.16       q2, d8[0]             @/HORIZONTAL VALUE ROW=7;
+    vst1.32       {d30, d31} , [r2], r4 @5
+    vst1.32       {d2, d3} , [r2], r4   @6
+    vst1.32       {d4, d5} , [r2], r4   @7
+    b             end_func
+
+do_vert:
+    @DO VERTICAL PREDICTION
+    str           r8 , [r7]             @MIN SAD
+    mov           r8, #2
+    str           r8 , [r6]             @ MODE
+    vmov          q15, q5
+    vmov          q14, q5
+
+do_dc_vert:
+    vst1.32       {d28, d29} , [r2], r4 @0
+    vst1.32       {d28, d29} , [r2], r4 @1
+    vst1.32       {d28, d29} , [r2], r4 @2
+    vst1.32       {d28, d29} , [r2], r4 @3
+    vst1.32       {d30, d31} , [r2], r4 @4
+    vst1.32       {d30, d31} , [r2], r4 @5
+    vst1.32       {d30, d31} , [r2], r4 @6
+    vst1.32       {d30, d31} , [r2], r4 @7
+
+
+end_func:
+    vpop          {d8-d15}
+    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
+
+
+
diff --git a/encoder/arm/ih264e_fmt_conv.s b/encoder/arm/ih264e_fmt_conv.s
new file mode 100755
index 0000000..2bf1479
--- /dev/null
+++ b/encoder/arm/ih264e_fmt_conv.s
@@ -0,0 +1,329 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+.text
+.p2align 2
+@/**
+
+@/*****************************************************************************
+@*                                                                            *
+@*  Function Name    : IH264D_CXA8_YUV420toYUV420SP_UV()                      *
+@*                                                                            *
+@*  Description      : This function conversts the image from YUV420P color   *
+@*                     space to 420SP color space(UV interleaved).        *
+@*                                                                            *
+@*  Arguments        : R0           pu1_y                                     *
+@*                     R1           pu1_u                                     *
+@*                     R2           pu1_v                                     *
+@*                     R3           pu1_dest_y                                *
+@*                     [R13 #40]    pu1_dest_uv                               *
+@*                     [R13 #44]    u2_height                                 *
+@*                     [R13 #48]    u2_width                                  *
+@*                     [R13 #52]    u2_stridey                                *
+@*                     [R13 #56]    u2_strideu                                *
+@*                     [R13 #60]    u2_stridev                                *
+@*                     [R13 #64]    u2_dest_stride_y                          *
+@*                     [R13 #68]    u2_dest_stride_uv                         *
+@*                     [R13 #72]    convert_uv_only                           *
+@*                                                                            *
+@*  Values Returned  : None                                                   *
+@*                                                                            *
+@*  Register Usage   : R0 - R14                                               *
+@*                                                                            *
+@*  Stack Usage      : 40 Bytes                                               *
+@*                                                                            *
+@*  Interruptibility : Interruptible                                          *
+@*                                                                            *
+@*  Known Limitations                                                         *
+@*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
+@*                     greater than or equal to 16                *
+@*                     Image Height:    Assumed to be even.                   *
+@*                                                                            *
+@*  Revision History :                                                        *
+@*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
+@*         07 06 2010   Varshita        Draft                                 *
+@*         07 06 2010   Naveen Kr T     Completed                             *
+@*                                                                            *
+@*****************************************************************************/
+    .global ih264e_fmt_conv_420p_to_420sp_a9q
+
+ih264e_fmt_conv_420p_to_420sp_a9q:
+
+    @// push the registers on the stack
+    stmfd         sp!, {r4-r12, lr}
+
+    ldr           r4, [sp, #72]         @// Load convert_uv_only
+
+    cmp           r4, #1
+    beq           yuv420sp_uv_chroma
+    @/* Do the preprocessing before the main loops start */
+    @// Load the parameters from stack
+    ldr           r4, [sp, #44]         @// Load u2_height from stack
+    ldr           r5, [sp, #48]         @// Load u2_width from stack
+    ldr           r7, [sp, #52]         @// Load u2_stridey from stack
+    ldr           r8, [sp, #64]         @// Load u2_dest_stride_y from stack
+    sub           r7, r7, r5            @// Source increment
+    sub           r8, r8, r5            @// Destination increment
+
+    vpush         {d8-d15}
+yuv420sp_uv_row_loop_y:
+    mov           r6, r5
+
+yuv420sp_uv_col_loop_y:
+    pld           [r0, #128]
+    vld1.8        {d0, d1}, [r0]!
+    vst1.8        {d0, d1}, [r3]!
+    sub           r6, r6, #16
+    cmp           r6, #15
+    bgt           yuv420sp_uv_col_loop_y
+
+    cmp           r6, #0
+    beq           yuv420sp_uv_row_loop_end_y
+    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    @//Ex if width is 162, above loop will process 160 pixels. And
+    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+    @// and written using VLD1 and VST1
+    rsb           r6, r6, #16
+    sub           r0, r0, r6
+    sub           r3, r3, r6
+
+    vld1.8        {d0, d1}, [r0]!
+    vst1.8        {d0, d1}, [r3]!
+
+yuv420sp_uv_row_loop_end_y:
+    add           r0, r0, r7
+    add           r3, r3, r8
+    subs          r4, r4, #1
+    bgt           yuv420sp_uv_row_loop_y
+
+yuv420sp_uv_chroma:
+
+    ldr           r3, [sp, #40]         @// Load pu1_dest_uv from stack
+
+    ldr           r4, [sp, #44]         @// Load u2_height from stack
+
+    ldr           r5, [sp, #48]         @// Load u2_width from stack
+
+
+    ldr           r7, [sp, #56]         @// Load u2_strideu from stack
+
+    ldr           r8, [sp, #68]         @// Load u2_dest_stride_uv from stack
+
+    sub           r7, r7, r5, lsr #1    @// Source increment
+
+    sub           r8, r8, r5            @// Destination increment
+
+    mov           r5, r5, lsr #1
+    mov           r4, r4, lsr #1
+    ldr           r3, [sp, #40]         @// Load pu1_dest_uv from stack
+    vpush         {d8-d15}
+yuv420sp_uv_row_loop_uv:
+    mov           r6, r5
+
+
+yuv420sp_uv_col_loop_uv:
+    pld           [r1, #128]
+    pld           [r2, #128]
+    vld1.8        d0, [r1]!
+    vld1.8        d1, [r2]!
+    vst2.8        {d0, d1}, [r3]!
+    sub           r6, r6, #8
+    cmp           r6, #7
+    bgt           yuv420sp_uv_col_loop_uv
+
+    cmp           r6, #0
+    beq           yuv420sp_uv_row_loop_end_uv
+    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    @//Ex if width is 162, above loop will process 160 pixels. And
+    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+    @// and written using VLD1 and VST1
+    rsb           r6, r6, #8
+    sub           r1, r1, r6
+    sub           r2, r2, r6
+    sub           r3, r3, r6, lsl #1
+
+    vld1.8        d0, [r1]!
+    vld1.8        d1, [r2]!
+    vst2.8        {d0, d1}, [r3]!
+
+yuv420sp_uv_row_loop_end_uv:
+    add           r1, r1, r7
+    add           r2, r2, r7
+    add           r3, r3, r8
+    subs          r4, r4, #1
+    bgt           yuv420sp_uv_row_loop_uv
+    @//POP THE REGISTERS
+    vpop          {d8-d15}
+    ldmfd         sp!, {r4-r12, pc}
+
+
+
+
+
+@ /**
+@ *******************************************************************************
+@ *
+@ * @brief ih264e_fmt_conv_422i_to_420sp_a9q
+@ *     Function used from format conversion or frame copy
+@ *
+@ *
+@ *
+@ *Inputs             : r0 - pu1_y            -   UWORD8 pointer to y plane.
+@ *                     r1 - pu1_u            -   UWORD8 pointer to u plane.
+@ *                     r2 - pu1_v            -   UWORD8 pointer to u plane.
+@ *                     r3 - pu2_yuv422i      -   UWORD16 pointer to yuv422iimage.
+@ *             stack + 40 - u4_width         -   Width of the Y plane.
+@ *                     44 - u4_height        -   Height of the Y plane.
+@ *                     48 - u4_stride_y      -   Stride in pixels of Y plane.
+@ *                     52 - u4_stride_u      -   Stride in pixels of U plane.
+@ *                     56 - u4_stride_v      -   Stride in pixels of V plane.
+@ *                     60 - u4_stride_yuv422i-   Stride in pixels of yuv422i image.
+@ *
+@ * @par   Description
+@ * Function used from copying or converting a reference frame to display buffer
+@ * in non shared mode
+@ *
+@ * @param[in] pu1_y_dst
+@ *   Output Y pointer
+@ *
+@ * @param[in] pu1_u_dst
+@ *   Output U/UV pointer ( UV is interleaved in the same format as that of input)
+@ *
+@ * @param[in] pu1_v_dst
+@ *   Output V pointer ( used in 420P output case)
+@ *
+@ * @param[in] u4_dst_y_strd
+@ *   Stride of destination Y buffer
+@ *
+@ * @param[in] u4_dst_u_strd
+@ *   Stride of destination  U/V buffer
+@ *
+@ *
+@ * @param[in] blocking
+@ *   To indicate whether format conversion should wait till frame is reconstructed
+@ *   and then return after complete copy is done. To be set to 1 when called at the
+@ *   end of frame processing and set to 0 when called between frame processing modules
+@ *   in order to utilize available MCPS
+@ *
+@ * @returns Error from IH264E_ERROR_T
+@ *
+@ * @remarks
+@ * Assumes that the stride of U and V buffers are same.
+@ * This is correct in most cases
+@ * If a case comes where this is not true we need to modify the fmt conversion funcnions called inside also
+@ * Since we read 4 pixels ata time the width should be aligned to 4
+@ * In assembly width should be aligned to 16 and height to 2.
+@ *
+@ *
+@ * Revision History :
+@ *         DD MM YYYY   Author(s)              Changes (Describe the changes made)
+@ *         07 06 2010   Harinarayanan K K       Adapeted to 422p
+@ *
+@ *******************************************************************************
+@ */
+
+@//`
+@*/
+    .global ih264e_fmt_conv_422i_to_420sp_a9q
+ih264e_fmt_conv_422i_to_420sp_a9q:
+    stmfd         sp!, {r4-r12, lr}     @// Back the register which are used
+
+
+
+    @/* Do the preprocessing before the main loops start */
+    @// Load the parameters from stack
+    ldr           r4, [sp, #48]         @// Load u4_stride_y       from stack
+
+    ldr           r5, [sp, #60]         @// Load u4_stride_yuv422i from stack
+    add           r6, r0, r4            @// pu1_y_nxt_row       = pu1_y + u4_stride_y
+
+    ldr           r7, [sp, #40]         @// Load u4_width          from stack
+    add           r8, r3, r5, lsl #1    @// pu2_yuv422i_nxt_row = pu2_yuv422i_y + u4_stride_yuv422i(2 Bytes for each pixel)
+
+    ldr           r9, [sp, #52]         @// Load u4_stride_u       from stack
+    sub           r12, r4, r7           @// u2_offset1          = u4_stride_y - u4_width
+
+@LDR            r10,[sp,#56]                ;// Load u4_stride_v       from stack
+    sub           r14, r5, r7           @// u2_offset_yuv422i   = u4_stride_yuv422i - u4_width
+
+    ldr           r11, [sp, #44]        @// Load u4_height         from stack
+    sub           r9, r9, r7            @// u2_offset2          = u4_stride_u - u4_width >> 1
+
+@   SUB         r10,r10,r7,ASR #1           ;// u2_offset3          = u4_stride_v - u4_width >> 1
+    mov           r14, r14, lsl #1      @// u2_offset_yuv422i   = u2_offset_yuv422i * 2
+
+    mov           r7, r7, asr #4        @// u4_width = u4_width / 16 (u4_width >> 4)
+    mov           r11, r11, asr #1      @// u4_width = u4_width / 2 (u4_width >> 1)
+
+    add           r4, r12, r4           @// u2_offset1 = u2_offset1 + u4_stride_y
+    add           r5, r14, r5, lsl #1   @// u2_offset_yuv422i = u2_offset_yuv422i + u4_stride_yuv422i
+
+    vpush         {d8-d15}
+
+@// Register Assignment
+@// pu1_y               - r0
+@// pu1_y_nxt_row       - r6
+@// pu1_u               - r1
+@// pu1_v               - r2
+@// pu2_yuv422i         - r3
+@// pu2_yuv422i_nxt_row - r8
+@// u2_offset1          - r4
+@// u2_offset2          - r9
+@// u2_offset3          - r10
+@// u2_offset_yuv422i   - r5
+@// u4_width / 16       - r7
+@// u4_height / 2       - r11
+@// inner loop count    - r12
+yuv420_to_yuv422i_hight_loop:
+
+    mov           r12, r7               @// Inner loop count = u4_width / 16
+
+yuv420_to_yuv422i_width_loop:
+    vld4.8        {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1
+    vld4.8        {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2
+    subs          r12, r12, #1
+
+    vrhadd.u8     d0, d0, d4
+    vrhadd.u8     d2, d2, d6
+
+    vst2.8        {d1, d3}, [r0]!       @// Store the 16 elements of row1 Y
+    vst2.8        {d5, d7}, [r6]!       @// Store the 16 elements of row2 Y
+
+    vst2.8        {d0, d2}, [r1]!       @// Store the 8 elements of row1/2 U
+
+    bgt           yuv420_to_yuv422i_width_loop
+
+    @// Update the buffer pointer so that they will refer to next pair of rows
+    add           r0, r0, r4            @// pu1_y               = pu1_y                 + u2_offset1
+    add           r6, r6, r4            @// pu1_y_nxt_row       = pu1_y_nxt_row         + u2_offset1
+
+    add           r1, r1, r9            @// pu1_u               = pu1_u                 + u2_offset2
+    subs          r11, r11, #1
+
+    add           r3, r3, r5            @// pu2_yuv422i         = pu2_yuv422i           + u2_offset_yuv422i
+
+    add           r8, r8, r5            @// pu2_yuv422i_nxt_row = pu2_yuv422i_nxt_row   + u2_offset_yuv422i
+    bgt           yuv420_to_yuv422i_hight_loop
+    vpop          {d8-d15}
+    ldmfd         sp!, {r4-r12, pc}     @// Restore the register which are used
+
+
+
diff --git a/encoder/arm/ih264e_function_selector.c b/encoder/arm/ih264e_function_selector.c
new file mode 100755
index 0000000..bb181c1
--- /dev/null
+++ b/encoder/arm/ih264e_function_selector.c
@@ -0,0 +1,170 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in h264
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+#ifdef ARMV8
+void ih264e_init_function_ptr(void *pv_codec)
+{
+    codec_t *ps_codec = (codec_t *)pv_codec;
+    ih264e_init_function_ptr_generic(ps_codec);
+    switch(ps_codec->s_cfg.e_arch)
+    {
+        case ARCH_ARM_NONEON:
+            break;
+        case ARCH_ARM_A53:
+        case ARCH_ARM_A57:
+        case ARCH_ARM_V8_NEON:
+            ih264e_init_function_ptr_neon_av8(ps_codec);
+            break;
+        default:
+            ih264e_init_function_ptr_neon_av8(ps_codec);
+            break;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void)
+{
+    return ARCH_ARM_V8_NEON;
+}
+
+#else
+
+void ih264e_init_function_ptr(void *pv_codec)
+{
+    codec_t *ps_codec = (codec_t *)pv_codec;
+    ih264e_init_function_ptr_generic(ps_codec);
+    switch(ps_codec->s_cfg.e_arch)
+    {
+        case ARCH_ARM_NONEON:
+              break;
+        case ARCH_ARM_A9Q:
+        case ARCH_ARM_A9A:
+        case ARCH_ARM_A9:
+        case ARCH_ARM_A7:
+        case ARCH_ARM_A5:
+        case ARCH_ARM_A15:
+            ih264e_init_function_ptr_neon_a9q(ps_codec);
+            break;
+        default:
+            ih264e_init_function_ptr_neon_a9q(ps_codec);
+            break;
+    }
+}
+
+IV_ARCH_T ih264e_default_arch(void)
+{
+    return ARCH_ARM_A9Q;
+}
+
+#endif
diff --git a/encoder/arm/ih264e_function_selector_a9q.c b/encoder/arm/ih264e_function_selector_a9q.c
new file mode 100755
index 0000000..8b2879b
--- /dev/null
+++ b/encoder/arm/ih264e_function_selector_a9q.c
@@ -0,0 +1,252 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264e_function_selector_generic.c
+*
+* @brief
+*  Contains functions to initialize function pointers of codec context
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - ih264e_init_function_ptr_generic
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec)
+{
+    WORD32 i= 0;
+
+        /* curr proc ctxt */
+        process_ctxt_t *ps_proc = NULL;
+        me_ctxt_t *ps_me_ctxt = NULL;
+
+        /* Init function pointers for intra pred leaf level functions luma
+         * Intra 16x16 */
+        ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q;
+        ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q;
+        ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q;
+        ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q;
+
+        /* Init function pointers for intra pred leaf level functions luma
+         * Intra 4x4 */
+        ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q;
+        ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q;
+        ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q;
+        ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
+        ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
+        ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
+        ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
+        ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
+        ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
+
+        /* Init function pointers for intra pred leaf level functions luma
+         * Intra 8x8 */
+        ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q;
+        ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q;
+        ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
+        ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
+        ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
+        ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
+        ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
+        ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
+
+        /* Init function pointers for intra pred leaf level functions chroma
+         * Intra 8x8 */
+        ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q;
+        ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q;
+        ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q;
+        ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q;
+
+        /* Init forward transform fn ptr */
+        ps_codec->pf_resi_trans_quant_8x8           = ih264_resi_trans_quant_8x8;
+        ps_codec->pf_resi_trans_quant_4x4           = ih264_resi_trans_quant_4x4_a9;
+        ps_codec->pf_resi_trans_quant_chroma_4x4    = ih264_resi_trans_quant_chroma_4x4_a9;
+        ps_codec->pf_hadamard_quant_4x4             = ih264_hadamard_quant_4x4_a9;
+        ps_codec->pf_hadamard_quant_2x2_uv          = ih264_hadamard_quant_2x2_uv_a9;
+
+        /* Init inverse transform fn ptr */
+        ps_codec->pf_iquant_itrans_recon_8x8            = ih264_iquant_itrans_recon_8x8;
+        ps_codec->pf_iquant_itrans_recon_4x4            = ih264_iquant_itrans_recon_4x4_a9;
+        ps_codec->pf_iquant_itrans_recon_4x4_dc         = ih264_iquant_itrans_recon_4x4_dc_a9;
+        ps_codec->pf_iquant_itrans_recon_chroma_4x4     = ih264_iquant_itrans_recon_chroma_4x4_a9;
+        ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc  = ih264_iquant_itrans_recon_chroma_4x4_dc_a9;
+        ps_codec->pf_ihadamard_scaling_4x4              = ih264_ihadamard_scaling_4x4_a9;
+        ps_codec->pf_ihadamard_scaling_2x2_uv           = ih264_ihadamard_scaling_2x2_uv_a9;
+        ps_codec->pf_interleave_copy                    = ih264_interleave_copy_a9;
+
+        /* Init fn ptr luma core coding */
+        ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+        ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+        ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+        /* Init fn ptr chroma core coding */
+        ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+        ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+        /* Init fn ptr luma deblocking */
+        ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9;
+        ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9;
+        ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9;
+        ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9;
+
+        /* Init fn ptr chroma deblocking */
+        ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9;
+        ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9;
+        ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9;
+        ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9;
+
+        /* write mb syntax layer */
+        ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
+        ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+
+        /* Padding Functions */
+        ps_codec->pf_pad_top = ih264_pad_top_a9q;
+        ps_codec->pf_pad_bottom = ih264_pad_bottom;
+        ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q;
+        ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q;
+        ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q;
+        ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q;
+
+        /* Inter pred leaf level functions */
+        ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q;
+        ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q;
+        ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q;
+        ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q;
+        ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q;
+
+        /* sad me level functions */
+        ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+        ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+        ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+
+        /* memor handling operations */
+        ps_codec->pf_mem_cpy = ih264_memcpy_a9q;
+        ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q;
+        ps_codec->pf_mem_set = ih264_memset_a9q;
+        ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_a9q;
+
+        /* sad me level functions */
+        for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+        {
+            ps_proc = &ps_codec->as_process[i];
+            ps_me_ctxt = &ps_proc->s_me_ctxt;
+            ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+            ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+            ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+            ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q;
+            ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q;
+            ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q;
+            ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q;
+            ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q;
+        }
+
+        /* intra mode eval -encoder level function */
+        ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q;
+        ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q;
+        ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q;
+
+        /* csc */
+        ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp_a9q;
+        ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp_a9q;
+
+        /* Halp pel generation function - encoder level*/
+        ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_a9q;
+        ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_a9q;
+
+        return ;
+    }
+
diff --git a/encoder/arm/ih264e_function_selector_av8.c b/encoder/arm/ih264e_function_selector_av8.c
new file mode 100755
index 0000000..173c2d5
--- /dev/null
+++ b/encoder/arm/ih264e_function_selector_av8.c
@@ -0,0 +1,259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264e_function_selector_generic.c
+*
+* @brief
+*  Contains functions to initialize function pointers of codec context
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - ih264e_init_function_ptr_generic
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec)
+{
+
+    WORD32 i= 0;
+
+        /* curr proc ctxt */
+        process_ctxt_t *ps_proc = NULL;
+        me_ctxt_t *ps_me_ctxt = NULL;
+
+        /* Init function pointers for intra pred leaf level functions luma
+         * Intra 16x16 */
+        ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_av8;
+        ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_av8;
+        ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_av8;
+        ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_av8;
+
+        /* Init function pointers for intra pred leaf level functions luma
+         * Intra 4x4 */
+        ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_av8;
+        ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_av8;
+        ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_av8;
+        ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8;
+        ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8;
+        ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8;
+        ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8;
+        ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8;
+        ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8;
+
+        /* Init function pointers for intra pred leaf level functions luma
+         * Intra 8x8 */
+        ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_av8;
+        ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_av8;
+        ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8;
+        ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8;
+        ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8;
+        ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8;
+        ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8;
+        ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8;
+
+        /* Init function pointers for intra pred leaf level functions chroma
+         * Intra 8x8 */
+        ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_av8;
+        ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8;
+        ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_av8;
+        ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8;
+
+
+        /* Init forward transform fn ptr */
+        ps_codec->pf_resi_trans_quant_8x8           = ih264_resi_trans_quant_8x8;
+        ps_codec->pf_resi_trans_quant_4x4           = ih264_resi_trans_quant_4x4_av8;
+        ps_codec->pf_resi_trans_quant_chroma_4x4    = ih264_resi_trans_quant_chroma_4x4_av8;
+        ps_codec->pf_hadamard_quant_4x4             = ih264_hadamard_quant_4x4_av8;
+        ps_codec->pf_hadamard_quant_2x2_uv          = ih264_hadamard_quant_2x2_uv_av8;
+
+        /* Init inverse transform fn ptr */
+        ps_codec->pf_iquant_itrans_recon_8x8          = ih264_iquant_itrans_recon_8x8_av8;
+        ps_codec->pf_iquant_itrans_recon_4x4          = ih264_iquant_itrans_recon_4x4_av8;
+        ps_codec->pf_iquant_itrans_recon_4x4_dc       = ih264_iquant_itrans_recon_4x4_dc_av8;
+        ps_codec->pf_iquant_itrans_recon_chroma_4x4   = ih264_iquant_itrans_recon_chroma_4x4_av8;
+        ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_av8;
+        ps_codec->pf_ihadamard_scaling_4x4            = ih264_ihadamard_scaling_4x4_av8;
+        ps_codec->pf_ihadamard_scaling_2x2_uv         = ih264_ihadamard_scaling_2x2_uv_av8;
+        ps_codec->pf_interleave_copy                  = ih264_interleave_copy_av8;
+
+        /* Init fn ptr luma core coding */
+        ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+        ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+        ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+        /* Init fn ptr chroma core coding */
+        ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+        ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+        /* Init fn ptr luma deblocking */
+        ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8;
+        ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8;
+        ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8;
+        ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8;
+
+          /* Init fn ptr chroma deblocking */
+        ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8;
+        ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8;
+        ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8;
+        ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8;
+
+        /* write mb syntax layer */
+        ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
+        ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+
+        /* Padding Functions */
+        ps_codec->pf_pad_top = ih264_pad_top_av8;
+        ps_codec->pf_pad_bottom = ih264_pad_bottom;
+        ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8;
+        ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8;
+        ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8;
+        ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8;
+
+        /* Inter pred leaf level functions */
+        ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_av8;
+        ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_av8;
+        ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_av8;
+        ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
+        ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8;
+
+        /* sad me level functions */
+        ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
+        ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
+        ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_av8;
+
+        /* memor handling operations */
+        ps_codec->pf_mem_cpy = ih264_memcpy_av8;
+        ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_av8;
+        ps_codec->pf_mem_set = ih264_memset_av8;
+        ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_av8;
+
+        /* sad me level functions */
+        for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+        {
+            ps_proc = &ps_codec->as_process[i];
+            ps_me_ctxt = &ps_proc->s_me_ctxt;
+            ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
+            ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
+            ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_av8;
+            ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_av8;
+            ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_av8;
+            ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_av8;
+            ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_av8;
+            ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_av8;
+        }
+
+        /* intra mode eval -encoder level function */
+        ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_av8;
+        ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_av8;
+        ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
+
+        /* csc */
+        ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
+        ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
+
+        /* Halp pel generation function - encoder level*/
+        ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_av8;
+        ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_av8;
+
+        return ;
+    }
+
diff --git a/encoder/arm/ih264e_half_pel.s b/encoder/arm/ih264e_half_pel.s
new file mode 100755
index 0000000..1b9a87a
--- /dev/null
+++ b/encoder/arm/ih264e_half_pel.s
@@ -0,0 +1,951 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ *  ih264e_half_pel.s
+@ *
+@ * @brief
+@ *
+@ *
+@ * @author
+@ *  Ittiam
+@ *
+@ * @par List of Functions:
+@ *  ih264e_sixtapfilter_horz
+@ *  ih264e_sixtap_filter_2dvh_vert
+@
+@ *
+@ * @remarks
+@ *  None
+@ *
+@ *******************************************************************************
+@ */
+
+
+.text
+.p2align 2
+
+@ /**
+@/*******************************************************************************
+@*
+@* @brief
+@*     Interprediction luma filter for horizontal input(Filter run for width = 17 and height =16)
+@*
+@* @par Description:
+@*    Applies a 6 tap horizontal filter .The output is  clipped to 8 bits
+@*    sec 8.4.2.2.1 titled "Luma sample interpolation process"
+@*
+@* @param[in] pu1_src
+@*  UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@*  UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@*  integer source stride
+@*
+@* @param[in] dst_strd
+@*  integer destination stride
+@*
+@*
+@* @returns
+@*
+@* @remarks
+@*  None
+@*
+@*******************************************************************************
+@*/
+@void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
+@                                UWORD8 *pu1_dst,
+@                                WORD32 src_strd,
+@                                WORD32 dst_strd);
+
+
+.equ HALFPEL_WIDTH ,  17 + 1            @( make it even, two rows are processed at a time)
+
+
+    .global ih264e_sixtapfilter_horz_a9q
+ih264e_sixtapfilter_horz_a9q:
+    stmfd         sp!, {lr}
+
+    vmov.i8       d0, #5
+    sub           r0, r0, #2
+
+    vmov.i8       d1, #20
+    mov           r14, #HALFPEL_WIDTH
+    vpush         {d8-d15}
+
+filter_horz_loop:
+
+
+    vld1.8        {d2, d3, d4}, [r0], r2 @// Load row0
+    vld1.8        {d5, d6, d7}, [r0], r2 @// Load row1
+
+    @// Processing row0 and row1
+
+    vext.8        d31, d2, d3, #5       @//extract a[5]                         (column1,row0)
+    vext.8        d30, d3, d4, #5       @//extract a[5]                         (column2,row0)
+
+    vaddl.u8      q4, d31, d2           @// a0 + a5                             (column1,row0)
+    vext.8        d29, d4, d4, #5       @//extract a[5]                         (column3,row0)
+    vaddl.u8      q5, d30, d3           @// a0 + a5                             (column2,row0)
+    vext.8        d28, d5, d6, #5       @//extract a[5]                         (column1,row1)
+    vaddl.u8      q6, d29, d4           @// a0 + a5                             (column3,row0)
+    vext.8        d27, d6, d7, #5       @//extract a[5]                         (column2,row1)
+    vaddl.u8      q7, d28, d5           @// a0 + a5                             (column1,row1)
+    vext.8        d26, d7, d7, #5       @//extract a[5]                         (column3,row1)
+
+    vaddl.u8      q8, d27, d6           @// a0 + a5                             (column2,row1)
+    vext.8        d31, d2, d3, #2       @//extract a[2]                         (column1,row0)
+    vaddl.u8      q9, d26, d7           @// a0 + a5                             (column3,row1)
+    vext.8        d30, d3, d4, #2       @//extract a[2]                         (column2,row0)
+    vmlal.u8      q4, d31, d1           @// a0 + a5 + 20a2                      (column1,row0)
+    vext.8        d29, d4, d4, #2       @//extract a[2]                         (column3,row0)
+    vmlal.u8      q5, d30, d1           @// a0 + a5 + 20a2                      (column2,row0)
+    vext.8        d28, d5, d6, #2       @//extract a[2]                         (column1,row1)
+    vmlal.u8      q6, d29, d1           @// a0 + a5 + 20a2                      (column3,row0)
+    vext.8        d27, d6, d7, #2       @//extract a[2]                         (column2,row1)
+    vmlal.u8      q7, d28, d1           @// a0 + a5 + 20a2                      (column1,row1)
+    vext.8        d26, d7, d7, #2       @//extract a[2]                         (column3,row1)
+
+    vmlal.u8      q8, d27, d1           @// a0 + a5 + 20a2                      (column2,row1)
+    vext.8        d31, d2, d3, #3       @//extract a[3]                         (column1,row0)
+    vmlal.u8      q9, d26, d1           @// a0 + a5 + 20a2                      (column3,row1)
+    vext.8        d30, d3, d4, #3       @//extract a[3]                         (column2,row0)
+    vmlal.u8      q4, d31, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row0)
+    vext.8        d29, d4, d4, #3       @//extract a[3]                         (column3,row0)
+    vmlal.u8      q5, d30, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row0)
+    vext.8        d28, d5, d6, #3       @//extract a[3]                         (column1,row1)
+    vmlal.u8      q6, d29, d1           @// a0 + a5 + 20a2 + 20a3               (column3,row0)
+    vext.8        d27, d6, d7, #3       @//extract a[3]                         (column2,row1)
+    vmlal.u8      q7, d28, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row1)
+    vext.8        d26, d7, d7, #3       @//extract a[3]                         (column3,row1)
+
+    vmlal.u8      q8, d27, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row1)
+    vext.8        d31, d2, d3, #1       @//extract a[1]                         (column1,row0)
+    vmlal.u8      q9, d26, d1           @// a0 + a5 + 20a2 + 20a3               (column3,row1)
+    vext.8        d30, d3, d4, #1       @//extract a[1]                         (column2,row0)
+    vmlsl.u8      q4, d31, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
+    vext.8        d29, d4, d4, #1       @//extract a[1]                         (column3,row0)
+    vmlsl.u8      q5, d30, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
+    vext.8        d28, d5, d6, #1       @//extract a[1]                         (column1,row1)
+    vmlsl.u8      q6, d29, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
+    vext.8        d27, d6, d7, #1       @//extract a[1]                         (column2,row1)
+    vmlsl.u8      q7, d28, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row1)
+    vext.8        d26, d7, d7, #1       @//extract a[1]                         (column3,row1)
+
+    vmlsl.u8      q8, d27, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row1)
+    vext.8        d31, d2, d3, #4       @//extract a[4]                         (column1,row0)
+    vmlsl.u8      q9, d26, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row1)
+    vext.8        d30, d3, d4, #4       @//extract a[4]                         (column2,row0)
+    vmlsl.u8      q4, d31, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
+    vext.8        d29, d4, d4, #4       @//extract a[4]                         (column3,row0)
+    vmlsl.u8      q5, d30, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
+    vext.8        d28, d5, d6, #4       @//extract a[4]                         (column1,row1)
+    vmlsl.u8      q6, d29, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
+    vext.8        d27, d6, d7, #4       @//extract a[4]                         (column2,row1)
+    vmlsl.u8      q7, d28, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row1)
+    vext.8        d26, d7, d7, #4       @//extract a[4]                         (column3,row1)
+
+    vmlsl.u8      q8, d27, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row1)
+    vmlsl.u8      q9, d26, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row1)
+
+    vqrshrun.s16  d20, q4, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
+    vqrshrun.s16  d21, q5, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
+    vqrshrun.s16  d22, q6, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
+    vqrshrun.s16  d23, q7, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row1)
+    vqrshrun.s16  d24, q8, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row1)
+    vqrshrun.s16  d25, q9, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row1)
+
+    vst1.8        {d20, d21, d22}, [r1], r3 @//Store dest row0
+    vst1.8        {d23, d24, d25}, [r1], r3 @//Store dest row1
+
+    subs          r14, r14, #2          @   decrement counter
+
+    bne           filter_horz_loop
+
+    vpop          {d8-d15}
+    ldmfd         sp!, {pc}
+
+
+
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@*   This function implements a two stage cascaded six tap filter. It
+@*    applies the six tap filter in the vertical direction on the
+@*    predictor values, followed by applying the same filter in the
+@*    horizontal direction on the output of the first stage. The six tap
+@*    filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+@*    interpolation process"
+@*    (Filter run for width = 17 and height =17)
+@* @par Description:
+@*    The function interpolates
+@*    the predictors first in the vertical direction and then in the
+@*    horizontal direction to output the (1/2,1/2). The output of the first
+@*    stage of the filter is stored in the buffer pointed to by pi16_pred1(only in C)
+@*    in 16 bit precision.
+@*
+@*
+@* @param[in] pu1_src
+@*  UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst1
+@*  UWORD8 pointer to the destination(vertical filtered output)
+@*
+@* @param[out] pu1_dst2
+@*  UWORD8 pointer to the destination(out put after applying horizontal filter to the intermediate vertical output)
+@*
+@* @param[in] src_strd
+@*  integer source stride
+@*
+@* @param[in] dst_strd
+@*  integer destination stride of pu1_dst
+@*
+@* @param[in]pi16_pred1
+@*  Pointer to 16bit intermediate buffer(used only in c)
+@*
+@* @param[in] pi16_pred1_strd
+@*  integer destination stride of pi16_pred1
+@*
+@*
+@* @returns
+@*
+@* @remarks
+@*  None
+@*
+@*******************************************************************************
+@*/
+@void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
+@                                UWORD8 *pu1_dst1,
+@                                UWORD8 *pu1_dst2,
+@                                WORD32 src_strd,
+@                                WORD32 dst_strd,
+@                                WORD32 *pi16_pred1,/* Pointer to 16bit intermmediate buffer (used only in c)*/
+@                                WORD32 pi16_pred1_strd)
+
+
+
+
+    .global ih264e_sixtap_filter_2dvh_vert_a9q
+
+ih264e_sixtap_filter_2dvh_vert_a9q:
+    stmfd         sp!, {r10, r11, r12, lr}
+
+@//r0 - pu1_ref
+@//r3 - u4_ref_width
+    vpush         {d8-d15}
+    @// Load six rows for vertical interpolation
+    lsl           r12, r3, #1
+    sub           r0, r0, r12
+    sub           r0, r0, #2
+    vld1.8        {d2, d3, d4}, [r0], r3
+    vld1.8        {d5, d6, d7}, [r0], r3
+    vld1.8        {d8, d9, d10}, [r0], r3
+    mov           r12, #5
+    vld1.8        {d11, d12, d13}, [r0], r3
+    mov           r14, #20
+    vld1.8        {d14, d15, d16}, [r0], r3
+    vmov.16       d0[0], r12
+    vmov.16       d0[1], r14
+    vld1.8        {d17, d18, d19}, [r0], r3
+    vmov.i8       d1, #20
+
+@// r12 - u2_buff1_width
+@// r14 - u2_buff2_width
+    ldr           r12, [sp, #80]
+    add           r11, r1, #6
+
+    mov           r14, r12
+
+    mov           r10, #3               @loop counter
+
+
+filter_2dvh_loop:
+
+    @// ////////////// ROW 1 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+    vaddl.u8      q10, d2, d17          @// a0 + a5                             (column1,row0)
+    vmov.i8       d31, #5
+    vmlal.u8      q10, d8, d1           @// a0 + a5 + 20a2                      (column1,row0)
+    vmlal.u8      q10, d11, d1          @// a0 + a5 + 20a2 + 20a3               (column1,row0)
+    vmlsl.u8      q10, d5, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
+    vmlsl.u8      q10, d14, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
+
+
+    vaddl.u8      q11, d3, d18          @// a0 + a5                             (column2,row0)
+    vmlal.u8      q11, d9, d1           @// a0 + a5 + 20a2                      (column2,row0)
+    vmlal.u8      q11, d12, d1          @// a0 + a5 + 20a2 + 20a3               (column2,row0)
+    vmlsl.u8      q11, d6, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
+    vmlsl.u8      q11, d15, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
+    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
+
+    vaddl.u8      q12, d4, d19          @// a0 + a5                             (column3,row0)
+    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
+    vmlal.u8      q12, d10, d1          @// a0 + a5 + 20a2                      (column3,row0)
+    vmlal.u8      q12, d13, d1          @// a0 + a5 + 20a2 + 20a3               (column3,row0)
+    vmlsl.u8      q12, d7, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
+    vmlsl.u8      q12, d16, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
+
+    vqrshrun.s16  d2, q10, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
+    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
+    vqrshrun.s16  d3, q11, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
+    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
+
+    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
+    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
+    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
+    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
+
+    vqrshrun.s16  d4, q12, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
+    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
+
+    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
+    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
+    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
+    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
+    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
+    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
+    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
+
+    vext.8        d2, d2, d3, #2
+    vst1.8        {d3, d4}, [r11], r12  @// store row1 - 1,1/2 grid
+    vst1.8        {d2}, [r1], r12       @// store row1 - 1,1/2 grid
+
+    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
+    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
+
+    vaddl.s16     q1, d31, d22          @// a0 + a5                             (set3)
+    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
+    vmlal.s16     q1, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
+    vmlal.s16     q1, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
+    vmlsl.s16     q1, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
+    vmlsl.s16     q1, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
+    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
+
+    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
+    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
+    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
+    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
+
+    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
+    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
+    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
+    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
+    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
+
+    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
+    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
+
+    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
+    vshrn.s32     d28, q1, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
+
+    vld1.8        {d2, d3, d4}, [r0], r3 @// Load next Row data
+    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
+    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
+    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
+    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
+    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
+    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
+
+
+    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
+    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
+
+    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
+    @// ////////////// ROW 2 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+    vaddl.u8      q10, d5, d2           @// a0 + a5                             (column1,row0)
+    vmov.i8       d31, #5
+    vmlal.u8      q10, d11, d1          @// a0 + a5 + 20a2                      (column1,row0)
+    vmlal.u8      q10, d14, d1          @// a0 + a5 + 20a2 + 20a3               (column1,row0)
+    vmlsl.u8      q10, d8, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
+    vmlsl.u8      q10, d17, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
+
+    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
+    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    vaddl.u8      q11, d6, d3           @// a0 + a5                             (column2,row0)
+    vmlal.u8      q11, d12, d1          @// a0 + a5 + 20a2                      (column2,row0)
+    vmlal.u8      q11, d15, d1          @// a0 + a5 + 20a2 + 20a3               (column2,row0)
+    vmlsl.u8      q11, d9, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
+    vmlsl.u8      q11, d18, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
+
+    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
+    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
+
+    vaddl.u8      q12, d7, d4           @// a0 + a5                             (column3,row0)
+    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
+    vmlal.u8      q12, d13, d1          @// a0 + a5 + 20a2                      (column3,row0)
+    vmlal.u8      q12, d16, d1          @// a0 + a5 + 20a2 + 20a3               (column3,row0)
+    vmlsl.u8      q12, d10, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
+    vmlsl.u8      q12, d19, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
+    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+    vqrshrun.s16  d5, q10, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
+    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
+    vqrshrun.s16  d6, q11, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
+    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
+
+    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
+    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
+    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
+    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
+
+    vqrshrun.s16  d7, q12, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
+    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
+
+    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
+    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
+    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
+    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
+    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
+    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
+    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
+
+    vext.8        d5, d5, d6, #2
+    vst1.8        {d6, d7}, [r11], r12  @// store row1 - 1,1/2 grid
+    vst1.8        {d5}, [r1], r12       @// store row1 - 1,1/2 grid
+
+    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
+    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
+
+    vaddl.s16     q3, d31, d22          @// a0 + a5                             (set3)
+    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
+    vmlal.s16     q3, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
+    vmlal.s16     q3, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
+    vmlsl.s16     q3, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
+    vmlsl.s16     q3, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
+    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
+
+    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
+    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
+    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
+    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
+
+    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
+    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
+    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
+    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
+    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
+
+    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
+    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
+
+    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
+    vshrn.s32     d28, q3, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
+
+    vld1.8        {d5, d6, d7}, [r0], r3 @// Load next Row data
+    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
+    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
+    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
+    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
+    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
+    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
+
+
+    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
+    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
+
+    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
+    @// ////////////// ROW 3 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+    vaddl.u8      q10, d8, d5           @// a0 + a5                             (column1,row0)
+    vmov.i8       d31, #5
+    vmlal.u8      q10, d14, d1          @// a0 + a5 + 20a2                      (column1,row0)
+    vmlal.u8      q10, d17, d1          @// a0 + a5 + 20a2 + 20a3               (column1,row0)
+    vmlsl.u8      q10, d11, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
+    vmlsl.u8      q10, d2, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
+
+    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
+    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    vaddl.u8      q11, d9, d6           @// a0 + a5                             (column2,row0)
+    vmlal.u8      q11, d15, d1          @// a0 + a5 + 20a2                      (column2,row0)
+    vmlal.u8      q11, d18, d1          @// a0 + a5 + 20a2 + 20a3               (column2,row0)
+    vmlsl.u8      q11, d12, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
+    vmlsl.u8      q11, d3, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
+
+    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
+    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
+
+    vaddl.u8      q12, d10, d7          @// a0 + a5                             (column3,row0)
+    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
+    vmlal.u8      q12, d16, d1          @// a0 + a5 + 20a2                      (column3,row0)
+    vmlal.u8      q12, d19, d1          @// a0 + a5 + 20a2 + 20a3               (column3,row0)
+    vmlsl.u8      q12, d13, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
+    vmlsl.u8      q12, d4, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
+
+    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+    vqrshrun.s16  d8, q10, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
+    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
+    vqrshrun.s16  d9, q11, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
+    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
+
+    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
+    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
+    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
+    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
+
+    vqrshrun.s16  d10, q12, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
+    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
+
+    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
+    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
+    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
+    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
+    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
+    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
+    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
+
+    vext.8        d8, d8, d9, #2
+    vst1.8        {d9, d10}, [r11], r12 @// store row1 - 1,1/2 grid
+    vst1.8        {d8}, [r1], r12       @// store row1 - 1,1/2 grid
+
+    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
+    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
+
+    vaddl.s16     q4, d31, d22          @// a0 + a5                             (set3)
+    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
+    vmlal.s16     q4, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
+    vmlal.s16     q4, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
+    vmlsl.s16     q4, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
+    vmlsl.s16     q4, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
+    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
+
+    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
+    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
+    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
+    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
+
+    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
+    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
+    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
+    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
+    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
+
+    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
+    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
+
+    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
+    vshrn.s32     d28, q4, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
+
+    vld1.8        {d8, d9, d10}, [r0], r3 @// Load next Row data
+    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
+    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
+    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
+    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
+    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
+    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
+
+
+    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
+    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
+
+    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
+    @// ////////////// ROW 4 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+    vaddl.u8      q10, d11, d8          @// a0 + a5                             (column1,row0)
+    vmov.i8       d31, #5
+    vmlal.u8      q10, d17, d1          @// a0 + a5 + 20a2                      (column1,row0)
+    vmlal.u8      q10, d2, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row0)
+    vmlsl.u8      q10, d14, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
+    vmlsl.u8      q10, d5, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
+
+    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
+    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    vaddl.u8      q11, d12, d9          @// a0 + a5                             (column2,row0)
+    vmlal.u8      q11, d18, d1          @// a0 + a5 + 20a2                      (column2,row0)
+    vmlal.u8      q11, d3, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row0)
+    vmlsl.u8      q11, d15, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
+    vmlsl.u8      q11, d6, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
+
+    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
+    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
+
+    vaddl.u8      q12, d13, d10         @// a0 + a5                             (column3,row0)
+    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
+    vmlal.u8      q12, d19, d1          @// a0 + a5 + 20a2                      (column3,row0)
+    vmlal.u8      q12, d4, d1           @// a0 + a5 + 20a2 + 20a3               (column3,row0)
+    vmlsl.u8      q12, d16, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
+    vmlsl.u8      q12, d7, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
+
+    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+    vqrshrun.s16  d11, q10, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
+    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
+    vqrshrun.s16  d12, q11, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
+    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
+
+    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
+    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
+    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
+    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
+
+    vqrshrun.s16  d13, q12, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
+    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
+
+    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
+    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
+    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
+    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
+    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
+    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
+    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
+
+    vext.8        d11, d11, d12, #2
+    vst1.8        {d12, d13}, [r11], r12 @// store row1 - 1,1/2 grid
+    vst1.8        {d11}, [r1], r12      @// store row1 - 1,1/2 grid
+
+    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
+    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
+
+    vaddl.s16     q6, d31, d22          @// a0 + a5                             (set3)
+    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
+    vmlal.s16     q6, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
+    vmlal.s16     q6, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
+    vmlsl.s16     q6, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
+    vmlsl.s16     q6, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
+    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
+
+    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
+    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
+    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
+    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
+
+    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
+    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
+    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
+    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
+    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
+
+    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
+    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
+
+    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
+    vshrn.s32     d28, q6, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
+
+    vld1.8        {d11, d12, d13}, [r0], r3 @// Load next Row data
+    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
+    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
+    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
+    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
+    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
+    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
+
+
+    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
+    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
+
+    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
+    @// ////////////// ROW 5 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+    vaddl.u8      q10, d14, d11         @// a0 + a5                             (column1,row0)
+    vmov.i8       d31, #5
+    vmlal.u8      q10, d2, d1           @// a0 + a5 + 20a2                      (column1,row0)
+    vmlal.u8      q10, d5, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row0)
+    vmlsl.u8      q10, d17, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
+    vmlsl.u8      q10, d8, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
+
+    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
+    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    vaddl.u8      q11, d15, d12         @// a0 + a5                             (column2,row0)
+    vmlal.u8      q11, d3, d1           @// a0 + a5 + 20a2                      (column2,row0)
+    vmlal.u8      q11, d6, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row0)
+    vmlsl.u8      q11, d18, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
+    vmlsl.u8      q11, d9, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
+
+    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
+    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
+
+    vaddl.u8      q12, d16, d13         @// a0 + a5                             (column3,row0)
+    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
+    vmlal.u8      q12, d4, d1           @// a0 + a5 + 20a2                      (column3,row0)
+    vmlal.u8      q12, d7, d1           @// a0 + a5 + 20a2 + 20a3               (column3,row0)
+    vmlsl.u8      q12, d19, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
+    vmlsl.u8      q12, d10, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
+
+    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+    vqrshrun.s16  d14, q10, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
+    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
+    vqrshrun.s16  d15, q11, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
+    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
+
+    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
+    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
+    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
+    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
+
+    vqrshrun.s16  d16, q12, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
+    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
+
+    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
+    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
+    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
+    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
+    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
+    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
+    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
+
+    vext.8        d14, d14, d15, #2
+    vst1.8        {d15, d16}, [r11], r12 @// store row1 - 1,1/2 grid
+    vst1.8        {d14}, [r1], r12      @// store row1 - 1,1/2 grid
+
+    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
+    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
+
+    vaddl.s16     q7, d31, d22          @// a0 + a5                             (set3)
+    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
+    vmlal.s16     q7, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
+    vmlal.s16     q7, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
+    vmlsl.s16     q7, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
+    vmlsl.s16     q7, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
+    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
+
+    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
+    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
+    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
+    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
+
+    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
+    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
+    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
+    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
+    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
+
+    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
+    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
+
+    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
+    vshrn.s32     d28, q7, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
+
+    vld1.8        {d14, d15, d16}, [r0], r3 @// Load next Row data
+    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
+    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
+    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
+    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
+    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
+    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
+
+
+    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
+    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
+
+    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
+    @// ////////////// ROW 6 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+
+    cmp           r10, #1               @// if it 17 rows are complete skip
+    beq           filter_2dvh_skip_row
+    vaddl.u8      q10, d17, d14         @// a0 + a5                             (column1,row0)
+    vmov.i8       d31, #5
+    vmlal.u8      q10, d5, d1           @// a0 + a5 + 20a2                      (column1,row0)
+    vmlal.u8      q10, d8, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row0)
+    vmlsl.u8      q10, d2, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
+    vmlsl.u8      q10, d11, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
+
+    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
+    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    vaddl.u8      q11, d18, d15         @// a0 + a5                             (column2,row0)
+    vmlal.u8      q11, d6, d1           @// a0 + a5 + 20a2                      (column2,row0)
+    vmlal.u8      q11, d9, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row0)
+    vmlsl.u8      q11, d3, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
+    vmlsl.u8      q11, d12, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
+
+    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
+    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
+
+    vaddl.u8      q12, d19, d16         @// a0 + a5                             (column3,row0)
+    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
+    vmlal.u8      q12, d7, d1           @// a0 + a5 + 20a2                      (column3,row0)
+    vmlal.u8      q12, d10, d1          @// a0 + a5 + 20a2 + 20a3               (column3,row0)
+    vmlsl.u8      q12, d4, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
+    vmlsl.u8      q12, d13, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
+
+    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+    vqrshrun.s16  d17, q10, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
+    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
+    vqrshrun.s16  d18, q11, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
+    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
+
+    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
+    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
+    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
+    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
+
+    vqrshrun.s16  d19, q12, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
+    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
+
+    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
+    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
+    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
+    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
+    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
+    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
+    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
+
+    vext.8        d17, d17, d18, #2
+    vst1.8        {d18, d19}, [r11], r12 @// store row1 - 1,1/2 grid
+    vst1.8        {d17}, [r1], r12      @// store row1 - 1,1/2 grid
+
+    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
+    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
+
+    vaddl.s16     q9, d31, d22          @// a0 + a5                             (set3)
+    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
+    vmlal.s16     q9, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
+    vmlal.s16     q9, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
+    vmlsl.s16     q9, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
+    vmlsl.s16     q9, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
+    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
+
+    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
+    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
+    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
+    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
+
+    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
+    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
+    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
+    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
+    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
+    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
+    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
+    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
+
+    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
+    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
+
+    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
+    vshrn.s32     d28, q9, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
+
+    vld1.8        {d17, d18, d19}, [r0], r3 @// Load next Row data
+    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
+    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
+    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
+    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
+    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
+    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
+
+
+    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
+    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
+
+    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+    subs          r10, r10, #1          @//decrement loop counter
+
+    bne           filter_2dvh_loop
+
+
+@// Process first vertical interpolated row
+@// each column is
+    @// ////////////// ROW 13 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+    vpop          {d8-d15}
+    ldmfd         sp!, {r10, r11, r12, pc}
+
+filter_2dvh_skip_row:
+
+    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
+    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
+
+    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
+
+    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+    vpop          {d8-d15}
+    ldmfd         sp!, {r10, r11, r12, pc}
+
+
+
+
diff --git a/encoder/arm/ih264e_platform_macros.h b/encoder/arm/ih264e_platform_macros.h
new file mode 100755
index 0000000..39cac96
--- /dev/null
+++ b/encoder/arm/ih264e_platform_macros.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264e_platform_macros.h
+*
+* @brief
+*  Contains platform specific routines used for codec context intialization
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/arm/ime_distortion_metrics_a9q.s b/encoder/arm/ime_distortion_metrics_a9q.s
new file mode 100755
index 0000000..b58911e
--- /dev/null
+++ b/encoder/arm/ime_distortion_metrics_a9q.s
@@ -0,0 +1,1353 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+@******************************************************************************
+@*
+@*
+@* @brief
+@*  This file contains definitions of routines that compute distortion
+@*  between two macro/sub blocks of identical dimensions
+@*
+@* @author
+@*  Ittiam
+@*
+@* @par List of Functions:
+@*  - ime_compute_sad_16x16_a9q()
+@*  - ime_compute_sad_16x16_fast_a9q()
+@*  - ime_compute_sad_16x8_a9q()
+@*  - ime_compute_sad_16x16_ea8_a9q()
+@*  - ime_calculate_sad2_prog_a9q()
+@*  - ime_calculate_sad3_prog_a9q()
+@*  - ime_calculate_sad4_prog_a9q()
+@*  - ime_sub_pel_compute_sad_16x16_a9q()
+@*  - ime_compute_satqd_16x16_lumainter_a9q()
+@*  -
+@* @remarks
+@*  None
+@*
+@*******************************************************************************
+@
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+@*
+@* @par   Description
+@*   This functions computes SAD between 2 16x16 blocks. There is a provision
+@*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@*  UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@*  UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@*  integer source stride
+@*
+@* @param[in] dst_strd
+@*  integer destination stride
+@*
+@* @param[in] i4_max_sad
+@*  integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@*  integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+.text
+.p2align 2
+    .global ime_compute_sad_16x16_fast_a9q
+ime_compute_sad_16x16_fast_a9q:
+
+    stmfd     sp!, {r12, lr}
+    lsl       r2, r2, #1
+    lsl       r3, r3, #1
+
+    @for bringing buffer2 into cache..., dummy load instructions
+    @ LDR      r12,[r1]
+
+    vld1.8    {d4, d5}, [r0], r2
+    vld1.8    {d6, d7}, [r1], r3
+    mov       r12, #6
+    vld1.8    {d8, d9}, [r0], r2
+    vabdl.u8  q0, d6, d4
+    vabdl.u8  q1, d7, d5
+    vld1.8    {d10, d11}, [r1], r3
+
+loop_sad_16x16_fast:
+
+    vld1.8    {d4, d5}, [r0], r2
+    vabal.u8  q0, d10, d8
+    vabal.u8  q1, d11, d9
+    vld1.8    {d6, d7}, [r1], r3
+    subs      r12, #2
+    vld1.8    {d8, d9}, [r0], r2
+    vabal.u8  q0, d6, d4
+    vabal.u8  q1, d7, d5
+    vld1.8    {d10, d11}, [r1], r3
+
+    bne       loop_sad_16x16_fast
+
+    vabal.u8  q0, d10, d8
+    vabal.u8  q1, d11, d9
+
+    vadd.i16  q0, q0, q1
+    vadd.i16  d0, d1, d0
+
+    ldr       r12, [sp, #12]
+    vpaddl.u16 d0, d0
+    vpaddl.u32 d0, d0
+    vshl.u32  d0, d0, #1
+    vst1.32   {d0[0]}, [r12]
+
+    ldmfd     sp!, {r12, pc}
+
+
+
+
+@/**
+@******************************************************************************
+@*
+@*  @brief computes distortion (SAD) between 2 16x8  blocks
+@*
+@*
+@*  @par   Description
+@*   This functions computes SAD between 2 16x8 blocks. There is a provision
+@*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@*  UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@*  UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@*  integer source stride
+@*
+@* @param[in] dst_strd
+@*  integer destination stride
+@*
+@* @param[in] u4_max_sad
+@*  integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@*  integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+@
+    .global ime_compute_sad_16x8_a9q
+ime_compute_sad_16x8_a9q:
+
+    stmfd     sp!, {r12, lr}
+
+    @for bringing buffer2 into cache..., dummy load instructions
+    @LDR      r12,[r1]
+
+    vld1.8    {d4, d5}, [r0], r2
+    vld1.8    {d6, d7}, [r1], r3
+    mov       r12, #6
+    vld1.8    {d8, d9}, [r0], r2
+    vabdl.u8  q0, d6, d4
+    vabdl.u8  q1, d7, d5
+    vld1.8    {d10, d11}, [r1], r3
+
+loop_sad_16x8:
+
+    vld1.8    {d4, d5}, [r0], r2
+    vabal.u8  q0, d10, d8
+    vabal.u8  q1, d11, d9
+    vld1.8    {d6, d7}, [r1], r3
+    subs      r12, #2
+    vld1.8    {d8, d9}, [r0], r2
+    vabal.u8  q0, d6, d4
+    vabal.u8  q1, d7, d5
+    vld1.8    {d10, d11}, [r1], r3
+
+    bne       loop_sad_16x8
+
+    vabal.u8  q0, d10, d8
+    vabal.u8  q1, d11, d9
+
+    vadd.i16  q0, q0, q1
+    vadd.i16  d0, d1, d0
+
+    ldr       r12, [sp, #12]
+    vpaddl.u16 d0, d0
+    vpaddl.u32 d0, d0
+    vst1.32   {d0[0]}, [r12]
+
+    ldmfd     sp!, {r12, pc}
+
+
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x16 blocks with early exit
+@*
+@* @par   Description
+@*   This functions computes SAD between 2 16x16 blocks. There is a provision
+@*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@*  UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@*  UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@*  integer source stride
+@*
+@* @param[in] dst_strd
+@*  integer destination stride
+@*
+@* @param[in] i4_max_sad
+@*  integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@*  integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+    .global ime_compute_sad_16x16_ea8_a9q
+
+ime_compute_sad_16x16_ea8_a9q:
+
+    stmfd     sp!, {r5-r7, lr}
+    lsl       r2, r2, #1
+    lsl       r3, r3, #1
+
+    @for bringing buffer2 into cache..., dummy load instructions
+    @LDR         r12,[r1]
+
+    vld1.8    {d4, d5}, [r0], r2
+    vld1.8    {d6, d7}, [r1], r3
+    mov       r5, #6
+    vld1.8    {d8, d9}, [r0], r2
+    vabdl.u8  q0, d6, d4
+    vabdl.u8  q1, d7, d5
+    vld1.8    {d10, d11}, [r1], r3
+    ldrd      r6, r7, [sp, #16]
+    @r6 = i4_max_sad, r7 = pi4_mb_distortion
+
+loop_sad_16x16_ea8_1:
+
+    vld1.8    {d4, d5}, [r0], r2
+    vabal.u8  q0, d10, d8
+    vabal.u8  q1, d11, d9
+    vld1.8    {d6, d7}, [r1], r3
+    subs      r5, #2
+    vld1.8    {d8, d9}, [r0], r2
+    vabal.u8  q0, d6, d4
+    vabal.u8  q1, d7, d5
+    vld1.8    {d10, d11}, [r1], r3
+
+    bne       loop_sad_16x16_ea8_1
+
+    vabal.u8  q0, d10, d8
+    sub       r0, r0, r2, lsl #3
+    vabal.u8  q1, d11, d9
+    sub       r1, r1, r3, lsl #3
+
+    vadd.i16  q6, q0, q1
+    add       r0, r0, r2, asr #1
+    vadd.i16  d12, d12, d13
+    add       r1, r1, r3, asr #1
+
+    vpaddl.u16 d12, d12
+    vld1.8    {d4, d5}, [r0], r2
+    vld1.8    {d6, d7}, [r1], r3
+    vpaddl.u32 d12, d12
+    vld1.8    {d8, d9}, [r0], r2
+    vabal.u8  q0, d6, d4
+    vabal.u8  q1, d7, d5
+
+    vst1.32   {d12[0]}, [r7]
+    ldr       r5, [r7]
+    cmp       r5, r6
+    bgt       end_func_16x16_ea8
+
+    vld1.8    {d10, d11}, [r1], r3
+    mov       r5, #6
+
+loop_sad_16x16_ea8_2:
+
+    vld1.8    {d4, d5}, [r0], r2
+    vabal.u8  q0, d10, d8
+    vabal.u8  q1, d11, d9
+    vld1.8    {d6, d7}, [r1], r3
+    subs      r5, #2
+    vld1.8    {d8, d9}, [r0], r2
+    vabal.u8  q0, d6, d4
+    vabal.u8  q1, d7, d5
+    vld1.8    {d10, d11}, [r1], r3
+
+    bne       loop_sad_16x16_ea8_2
+
+    vabal.u8  q0, d10, d8
+    vabal.u8  q1, d11, d9
+
+    vadd.i16  q0, q0, q1
+    vadd.i16  d0, d1, d0
+
+    vpaddl.u16 d0, d0
+    vpaddl.u32 d0, d0
+
+    vst1.32   {d0[0]}, [r7]
+
+end_func_16x16_ea8:
+
+    ldmfd     sp!, {r5-r7, pc}
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      : Calculate_Mad2_prog()
+@//
+@// Detail Description : This function find the sad values of 4 Progressive MBs
+@//                        at one shot
+@//
+@// Platform           : CortexA8/NEON            .
+@//
+@//-----------------------------------------------------------------------------
+@*/
+
+    .global ime_calculate_sad2_prog_a9q
+
+ime_calculate_sad2_prog_a9q:
+
+    @ r0    = ref1     <UWORD8 *>
+    @ r1    = ref2     <UWORD8 *>
+    @ r2    = src     <UWORD8 *>
+    @ r3    = RefBufferWidth <UWORD32>
+    @ stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+
+    stmfd     sp!, {r4-r5, lr}
+
+    ldr       r4, [sp, #8]              @ load src stride to r4
+    mov       r5, #14
+
+    @Row 1
+    vld1.8    {d0, d1}, [r2], r4        @ load src Row 1
+    vld1.8    {d2, d3}, [r0], r3        @ load ref1 Row 1
+    vld1.8    {d4, d5}, [r1], r3        @ load ref2 Row 1
+
+    @Row 2
+    vld1.8    {d6, d7}, [r2], r4        @ load src Row 2
+    vabdl.u8  q6, d2, d0
+    vabdl.u8  q7, d3, d1
+    vld1.8    {d8, d9}, [r0], r3        @ load ref1 Row 2
+    vabdl.u8  q8, d4, d0
+    vabdl.u8  q9, d5, d1
+    vld1.8    {d10, d11}, [r1], r3      @ load ref2 Row 2
+
+loop_sad2_prog:
+
+    subs      r5, #2
+    @Row 1
+    vld1.8    {d0, d1}, [r2], r4        @ load src Row 1
+    vabal.u8  q6, d8, d6
+    vabal.u8  q7, d9, d7
+    vld1.8    {d2, d3}, [r0], r3        @ load ref1 Row 1
+    vabal.u8  q8, d10, d6
+    vabal.u8  q9, d11, d7
+    vld1.8    {d4, d5}, [r1], r3        @ load ref2 Row 1
+
+    @Row 2
+    vld1.8    {d6, d7}, [r2], r4        @ load src Row 2
+    vabal.u8  q6, d2, d0
+    vabal.u8  q7, d3, d1
+    vld1.8    {d8, d9}, [r0], r3        @ load ref1 Row 2
+    vabal.u8  q8, d4, d0
+    vabal.u8  q9, d5, d1
+    vld1.8    {d10, d11}, [r1], r3      @ load ref2 Row 2
+
+    bne       loop_sad2_prog
+
+    vabal.u8  q6, d8, d6
+    vabal.u8  q7, d9, d7
+    vabal.u8  q8, d10, d6
+    vabal.u8  q9, d11, d7
+
+    @ Compute SAD
+
+    vadd.u16  q6, q6, q7                @ Q6  : sad_ref1
+    vadd.u16  q8, q8, q9                @ Q8  : sad_ref2
+
+    vadd.u16  d12, d12, d13
+    ldr       r5, [sp, #16]             @ loading pi4_sad to r5
+    vadd.u16  d16, d16, d17
+
+    vpadd.u16 d12, d12, d16
+    vpaddl.u16 d12, d12
+
+    vst1.64   {d12}, [r5]!
+
+    ldmfd     sp!, {r4-r5, pc}
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      : Calculate_Mad3_prog()
+@//
+@// Detail Description : This function find the sad values of 4 Progressive MBs
+@//                        at one shot
+@//
+@// Platform           : CortexA8/NEON            .
+@//
+@//-----------------------------------------------------------------------------
+@*/
+
+    .global ime_calculate_sad3_prog_a9q
+
+ime_calculate_sad3_prog_a9q:
+
+    @ r0    = ref1     <UWORD8 *>
+    @ r1    = ref2     <UWORD8 *>
+    @ r2    = ref3     <UWORD8 *>
+    @ r3    = src      <UWORD8 *>
+    @ stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *>
+
+
+    stmfd     sp!, {r4-r6, lr}
+
+    ldrd      r4, r5, [sp, #16]         @ load ref stride to r4, src stride to r5
+    mov       r6, #14
+
+    @ Row 1
+    vld1.8    {d0, d1}, [r3], r5        @ load src Row 1
+    vld1.8    {d2, d3}, [r0], r4        @ load ref1 Row 1
+    vld1.8    {d4, d5}, [r1], r4        @ load ref2 Row 1
+    vabdl.u8  q8, d2, d0
+    vabdl.u8  q9, d3, d1
+    vld1.8    {d6, d7}, [r2], r4        @ load ref3 Row 1
+    vabdl.u8  q10, d4, d0
+    vabdl.u8  q11, d5, d1
+
+    @ Row 2
+    vld1.8    {d8, d9}, [r3], r5        @ load src Row 1
+    vabdl.u8  q12, d6, d0
+    vabdl.u8  q13, d7, d1
+    vld1.8    {d10, d11}, [r0], r4      @ load ref1 Row 1
+    vld1.8    {d12, d13}, [r1], r4      @ load ref2 Row 1
+    vabal.u8  q8, d10, d8
+    vabal.u8  q9, d11, d9
+    vld1.8    {d14, d15}, [r2], r4      @ load ref3 Row 1
+    vabal.u8  q10, d12, d8
+    vabal.u8  q11, d13, d9
+
+loop_sad3_prog:
+
+    @Row 1
+    vld1.8    {d0, d1}, [r3], r5        @ load src Row 1
+    vabal.u8  q12, d14, d8
+    vabal.u8  q13, d15, d9
+    vld1.8    {d2, d3}, [r0], r4        @ load ref1 Row 1
+    vld1.8    {d4, d5}, [r1], r4        @ load ref2 Row 1
+    vabal.u8  q8, d2, d0
+    vabal.u8  q9, d3, d1
+    vld1.8    {d6, d7}, [r2], r4        @ load ref3 Row 1
+    vabal.u8  q10, d4, d0
+    vabal.u8  q11, d5, d1
+
+    @Row 2
+    vld1.8    {d8, d9}, [r3], r5        @ load src Row 1
+    vabal.u8  q12, d6, d0
+    vabal.u8  q13, d7, d1
+    vld1.8    {d10, d11}, [r0], r4      @ load ref1 Row 1
+    subs      r6, #2
+    vld1.8    {d12, d13}, [r1], r4      @ load ref2 Row 1
+    vabal.u8  q8, d10, d8
+    vabal.u8  q9, d11, d9
+    vld1.8    {d14, d15}, [r2], r4      @ load ref3 Row 1
+    vabal.u8  q10, d12, d8
+    vabal.u8  q11, d13, d9
+
+    bne       loop_sad3_prog
+
+    vabal.u8  q12, d14, d8
+    vabal.u8  q13, d15, d9
+
+    @ Compute SAD
+
+    vadd.u16  q8, q8, q9                @ Q8  : sad_ref1
+    vadd.u16  q10, q10, q11             @ Q10 : sad_ref2
+    vadd.u16  q12, q12, q13             @ Q12 : sad_ref3
+
+    vadd.u16  d16, d16, d17
+    vadd.u16  d20, d20, d21
+    vadd.u16  d24, d24, d25
+
+    vpadd.u16 d16, d16, d20
+    vpadd.u16 d24, d24, d24
+
+    ldr       r6, [sp, #24]             @ loading pi4_sad to r6
+    vpaddl.u16 d16, d16
+    vpaddl.u16 d24, d24
+
+    vst1.64   {d16}, [r6]!
+    vst1.32   {d24[0]}, [r6]
+
+    ldmfd     sp!, {r4-r6, pc}
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) for sub-pel motion estimation
+@*
+@* @par   Description
+@*   This functions computes SAD for all the 8 half pel points
+@*
+@* @param[out] pi4_sad
+@*  integer evaluated sad
+@*  pi4_sad[0] - half x
+@*  pi4_sad[1] - half x - 1
+@*  pi4_sad[2] - half y
+@*  pi4_sad[3] - half y - 1
+@*  pi4_sad[4] - half xy
+@*  pi4_sad[5] - half xy - 1
+@*  pi4_sad[6] - half xy - strd
+@*  pi4_sad[7] - half xy - 1 - strd
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+
+.text
+.p2align 2
+
+    .global ime_sub_pel_compute_sad_16x16_a9q
+
+ime_sub_pel_compute_sad_16x16_a9q:
+
+    stmfd     sp!, {r4-r11, lr}         @store register values to stack
+
+    ldr       r9, [sp, #36]
+    ldr       r10, [sp, #40]
+
+    sub       r4, r1, #1                @ x left
+    sub       r5, r2, r10               @ y top
+
+    sub       r6, r3, #1                @ xy left
+    sub       r7, r3, r10               @ xy top
+
+    sub       r8, r7, #1                @ xy top-left
+    mov       r11, #15
+
+    @for bringing buffer2 into cache..., dummy load instructions
+    @ LDR         r12,[r1]
+    @ LDR         r12,[sp,#12]
+
+    vld1.8    {d0, d1}, [r0], r9        @ src
+    vld1.8    {d2, d3}, [r5], r10       @ y top LOAD
+    vld1.8    {d4, d5}, [r7], r10       @ xy top LOAD
+    vld1.8    {d6, d7}, [r8], r10       @ xy top-left LOAD
+
+    vabdl.u8  q6, d2, d0                @ y top ABS1
+    vabdl.u8  q7, d4, d0                @ xy top ABS1
+    vld1.8    {d8, d9}, [r1], r10       @ x LOAD
+    vabdl.u8  q8, d6, d0                @ xy top-left ABS1
+    vabdl.u8  q9, d8, d0                @ x ABS1
+    vld1.8    {d10, d11}, [r4], r10     @ x left LOAD
+
+    vabal.u8  q6, d3, d1                @ y top ABS2
+    vabal.u8  q7, d5, d1                @ xy top ABS2
+    vld1.8    {d2, d3}, [r2], r10       @ y LOAD
+    vabal.u8  q8, d7, d1                @ xy top-left ABS2
+    vabal.u8  q9, d9, d1                @ x ABS2
+    vld1.8    {d4, d5}, [r3], r10       @ xy LOAD
+
+    vabdl.u8  q10, d10, d0              @ x left ABS1
+    vabdl.u8  q11, d2, d0               @ y ABS1
+    vld1.8    {d6, d7}, [r6], r10       @ xy left LOAD
+    vabdl.u8  q12, d4, d0               @ xy ABS1
+    vabdl.u8  q13, d6, d0               @ xy left ABS1
+
+loop_sub_pel_16x16:
+
+    vabal.u8  q10, d11, d1              @ x left ABS2
+    vabal.u8  q11, d3, d1               @ y ABS2
+    subs      r11, #1
+    vabal.u8  q12, d5, d1               @ xy ABS2
+    vabal.u8  q13, d7, d1               @ xy left ABS2
+
+    vld1.8    {d0, d1}, [r0], r9        @ src
+    vabal.u8  q6, d2, d0                @ y top ABS1
+    vabal.u8  q7, d4, d0                @ xy top ABS1
+    vld1.8    {d8, d9}, [r1], r10       @ x LOAD
+    vabal.u8  q8, d6, d0                @ xy top-left ABS1
+    vabal.u8  q9, d8, d0                @ x ABS1
+    vld1.8    {d10, d11}, [r4], r10     @ x left LOAD
+
+    vabal.u8  q6, d3, d1                @ y top ABS2
+    vabal.u8  q7, d5, d1                @ xy top ABS2
+    vld1.8    {d2, d3}, [r2], r10       @ y LOAD
+    vabal.u8  q8, d7, d1                @ xy top-left ABS2
+    vabal.u8  q9, d9, d1                @ x ABS2
+    vld1.8    {d4, d5}, [r3], r10       @ xy LOAD
+
+    vabal.u8  q10, d10, d0              @ x left ABS1
+    vabal.u8  q11, d2, d0               @ y ABS1
+    vld1.8    {d6, d7}, [r6], r10       @ xy left LOAD
+    vabal.u8  q12, d4, d0               @ xy ABS1
+    vabal.u8  q13, d6, d0               @ xy left ABS1
+
+    bne       loop_sub_pel_16x16
+
+    vabal.u8  q10, d11, d1              @ x left ABS2
+    vabal.u8  q11, d3, d1               @ y ABS2
+    vabal.u8  q12, d5, d1               @ xy ABS2
+    vabal.u8  q13, d7, d1               @ xy left ABS2
+
+    vadd.i16  d0, d18, d19              @ x
+    vadd.i16  d3, d12, d13              @ y top
+    vadd.i16  d6, d14, d15              @ xy top
+    vadd.i16  d5, d26, d27              @ xy left
+    vadd.i16  d1, d20, d21              @ x left
+    vadd.i16  d2, d22, d23              @ y
+    vadd.i16  d4, d24, d25              @ xy
+    vadd.i16  d7, d16, d17              @ xy top left
+
+    vpadd.i16 d0, d0, d1
+    vpadd.i16 d2, d2, d3
+    vpadd.i16 d4, d4, d5
+    vpadd.i16 d6, d6, d7
+
+    vpaddl.u16 d0, d0
+    vpaddl.u16 d2, d2
+    ldr       r11, [sp, #44]
+    vpaddl.u16 d4, d4
+    vpaddl.u16 d6, d6
+
+    vst1.32   {d0}, [r11]!
+    vst1.32   {d2}, [r11]!
+    vst1.32   {d4}, [r11]!
+    vst1.32   {d6}, [r11]!
+
+    ldmfd     sp!, {r4-r11, pc}         @Restoring registers from stack
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x16 blocks
+@*
+@* @par   Description
+@*   This functions computes SAD between 2 16x16 blocks. There is a provision
+@*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@*  UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@*  UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@*  integer source stride
+@*
+@* @param[in] dst_strd
+@*  integer destination stride
+@*
+@* @param[in] i4_max_sad
+@*  integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@*  integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+
+.text
+.p2align 2
+
+    .global ime_compute_sad_16x16_a9q
+
+ime_compute_sad_16x16_a9q:
+
+
+    @STMFD       sp!,{r12,lr}
+    stmfd     sp!, {r12, r14}           @store register values to stack
+
+    @for bringing buffer2 into cache..., dummy load instructions
+    @ LDR         r12,[r1]
+    @ LDR         r12,[sp,#12]
+
+    vld1.8    {d4, d5}, [r0], r2
+    vld1.8    {d6, d7}, [r1], r3
+
+    mov       r12, #14
+    vld1.8    {d8, d9}, [r0], r2
+    vabdl.u8  q0, d4, d6
+    vld1.8    {d10, d11}, [r1], r3
+    vabdl.u8  q1, d5, d7
+
+loop_sad_16x16:
+
+    vld1.8    {d4, d5}, [r0], r2
+    vabal.u8  q0, d8, d10
+    vld1.8    {d6, d7}, [r1], r3
+    vabal.u8  q1, d9, d11
+
+    vld1.8    {d8, d9}, [r0], r2
+    vabal.u8  q0, d4, d6
+    subs      r12, #2
+    vld1.8    {d10, d11}, [r1], r3
+    vabal.u8  q1, d5, d7
+
+    bne       loop_sad_16x16
+
+    vabal.u8  q0, d8, d10
+    vabal.u8  q1, d9, d11
+
+    vadd.i16  q0, q0, q1
+    vadd.i16  d0, d1, d0
+    ldr       r12, [sp, #12]
+
+    vpaddl.u16 d0, d0
+    vpaddl.u32 d0, d0
+    vst1.32   {d0[0]}, [r12]
+
+    ldmfd     sp!, {r12, pc}            @Restoring registers from stack
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      : Calculate_Mad4_prog()
+@//
+@// Detail Description : This function find the sad values of 4 Progressive MBs
+@//                        at one shot
+@//
+@// Platform           : CortexA8/NEON            .
+@//
+@//-----------------------------------------------------------------------------
+@*/
+
+    .global ime_calculate_sad4_prog_a9q
+
+ime_calculate_sad4_prog_a9q:
+    @ r0    = temp_frame     <UWORD8 *>
+    @ r1    = buffer_ptr     <UWORD8 *>
+    @ r2    = RefBufferWidth <UWORD32>
+    @ r3    = CurBufferWidth <UWORD32>
+    @ stack = psad           <UWORD32 *> {at 0x34}
+
+    stmfd     sp!, {r4-r7, lr}
+
+    @UWORD8 *left_ptr       = temp_frame - 1;
+    @UWORD8 *right_ptr      = temp_frame + 1;
+    @UWORD8 *top_ptr        = temp_frame - RefBufferWidth;
+    @UWORD8 *bot_ptr        = temp_frame + RefBufferWidth;
+
+    mov       r7, #14
+    sub       r4, r0, #0x01             @r4 = left_ptr
+    add       r5, r0, #0x1              @r5 = right_ptr
+    sub       r6, r0, r2                @r6 = top_ptr
+    add       r0, r0, r2                @r0 = bot_ptr
+                                        @r1 = buffer_ptr
+
+    @D0:D1  : buffer
+    @D2:D3  : top
+    @D4:D5  : left
+    @D6:D7  : right
+    @D8:D9  : bottom
+
+    @Row 1
+    vld1.8    {d0, d1}, [r1], r3        @ load src Row 1
+    vld1.8    {d2, d3}, [r6], r2        @ load top Row 1
+    vld1.8    {d4, d5}, [r4], r2        @ load left Row 1
+
+    vabdl.u8  q5, d2, d0
+    vld1.8    {d6, d7}, [r5], r2        @ load right Row 1
+    vabdl.u8  q6, d3, d1
+
+    vabdl.u8  q7, d0, d4
+    vld1.8    {d8, d9}, [r0], r2        @ load bottom Row 1
+    vabdl.u8  q8, d1, d5
+
+    @Row 2
+    vabdl.u8  q9, d0, d6
+    vld1.8    {d26, d27}, [r1], r3      @ load src Row 2
+    vabdl.u8  q10, d1, d7
+
+    vabdl.u8  q11, d0, d8
+    vld1.8    {d2, d3}, [r6], r2        @ load top Row 2
+    vabdl.u8  q12, d1, d9
+
+loop_sad4_prog:
+
+    vabal.u8  q5, d26, d2
+    vld1.8    {d4, d5}, [r4], r2        @ load left Row 2
+    vabal.u8  q6, d27, d3
+
+    vabal.u8  q7, d26, d4
+    vld1.8    {d6, d7}, [r5], r2        @ load right Row 2
+    vabal.u8  q8, d27, d5
+
+    vabal.u8  q9, d26, d6
+    vld1.8    {d8, d9}, [r0], r2        @ load bottom Row 2
+    vabal.u8  q10, d27, d7
+
+    @Row 1
+    vabal.u8  q11, d26, d8
+    vld1.8    {d0, d1}, [r1], r3        @ load src Row 1
+    vabal.u8  q12, d27, d9
+
+    vld1.8    {d2, d3}, [r6], r2        @ load top Row 1
+    subs      r7, #2
+    vld1.8    {d4, d5}, [r4], r2        @ load left Row 1
+
+    vabal.u8  q5, d0, d2
+
+    vld1.8    {d6, d7}, [r5], r2        @ load right Row 1
+    vabal.u8  q6, d1, d3
+
+    vabal.u8  q7, d0, d4
+    vld1.8    {d8, d9}, [r0], r2        @ load bottom Row 1
+    vabal.u8  q8, d1, d5
+
+    @Row 2
+    vabal.u8  q9, d0, d6
+    vld1.8    {d26, d27}, [r1], r3      @ load src Row 2
+    vabal.u8  q10, d1, d7
+
+    vabal.u8  q11, d0, d8
+    vld1.8    {d2, d3}, [r6], r2        @ load top Row 2
+    vabal.u8  q12, d1, d9
+
+    bne       loop_sad4_prog
+
+    vabal.u8  q5, d26, d2
+    vld1.8    {d4, d5}, [r4], r2        @ load left Row 2
+    vabal.u8  q6, d27, d3
+
+    vabal.u8  q7, d26, d4
+    vld1.8    {d6, d7}, [r5], r2        @ load right Row 2
+    vabal.u8  q8, d27, d5
+
+    vabal.u8  q9, d26, d6
+    vld1.8    {d8, d9}, [r0], r2        @ load bottom Row 2
+    vabal.u8  q10, d27, d7
+
+    vabal.u8  q11, d26, d8
+    vabal.u8  q12, d27, d9
+
+    @;Q5:Q6   : sad_top
+    @;Q7:Q8   : sad_left
+    @;Q9:Q10  : sad_right
+    @;Q11:Q12 : sad_bot
+
+    vadd.u16  q5, q5, q6
+    vadd.u16  q7, q7, q8
+    vadd.u16  q9, q9, q10
+    vadd.u16  q11, q11, q12
+
+    @; Free :-
+    @; Q6,Q8,Q10,Q12
+
+    @;Q5  -> D10:D11
+    @;Q7  -> D14:D15
+    @;Q9  -> D18:D19
+    @;Q11 -> D22:D23
+
+    vadd.u16  d10, d10, d11
+    vadd.u16  d14, d14, d15
+    vadd.u16  d18, d18, d19
+    vadd.u16  d22, d22, d23
+
+    @;D10  : sad_top
+    @;D14  : sad_left
+    @;D18  : sad_right
+    @;D22  : sad_bot
+
+
+    vpaddl.u16 d11, d10
+    vpaddl.u16 d15, d14
+    vpaddl.u16 d19, d18
+    vpaddl.u16 d23, d22
+
+    @;D11  : sad_top
+    @;D15  : sad_left
+    @;D19  : sad_right
+    @;D23  : sad_bot
+
+    vpaddl.u32 d10, d11
+    vpaddl.u32 d22, d23
+    vpaddl.u32 d14, d15
+    vpaddl.u32 d18, d19
+
+    @;D10  : sad_top
+    @;D14  : sad_left
+    @;D18  : sad_right
+    @;D22  : sad_bot
+
+    ldr       r4, [sp, #20]             @;Can be rearranged
+
+    vsli.64   d10, d22, #32
+    vsli.64   d14, d18, #32
+
+    vst1.64   {d14}, [r4]!
+    vst1.64   {d10}, [r4]!
+
+    ldmfd     sp!, {r4-r7, pc}
+
+
+
+
+@*****************************************************************************
+@*
+@* Function Name        : ime_compute_satqd_16x16_lumainter_a9
+@* Description          : This fucntion computes SAD for a 16x16 block.
+@                       : It also computes if any 4x4 block will have a nonzero coefficent after transform and quant
+@
+@  Arguments            :   R0 :pointer to src buffer
+@                           R1 :pointer to est buffer
+@                           R2 :source stride
+@                           R3 :est stride
+@                           STACk :Threshold,distotion,is_nonzero
+@*
+@* Values Returned   : NONE
+@*
+@* Register Usage    : R0-R11
+@* Stack Usage       :
+@* Cycles            : Around
+@* Interruptiaility  : Interruptable
+@*
+@* Known Limitations
+@*   \Assumptions    :
+@*
+@* Revision History  :
+@*         DD MM YYYY    Author(s)          Changes
+@*         14 04 2014    Harinarayanan K K  First version
+@*
+@*****************************************************************************
+    .global ime_compute_satqd_16x16_lumainter_a9q
+ime_compute_satqd_16x16_lumainter_a9q:
+    @R0 :pointer to src buffer
+    @R1 :pointer to est buffer
+    @R2 :Source stride
+    @R3 :Pred stride
+    @R4 :Threshold pointer
+    @R5 :Distortion,ie SAD
+    @R6 :is nonzero
+
+    push      {r4-r12, lr}              @push all the variables first
+    @ADD      SP,SP,#40         ;decrement stack pointer,to accomodate two variables
+    ldr       r4, [sp, #40]             @load the threshold address
+
+    mov       r8, #8                    @Number of 4x8 blocks to be processed
+    mov       r10, #0                   @Sad
+    mov       r7, #0                    @Nonzero info
+    @----------------------------------------------------
+
+    vld1.u8   d30, [r0], r2             @I  load 8 pix src row 1
+
+    vld1.u8   d31, [r1], r3             @I  load 8 pix pred row 1
+
+    vld1.u8   d28, [r0], r2             @I  load 8 pix src row 2
+
+    vld1.u8   d29, [r1], r3             @I  load 8 pix pred row 2
+
+    vld1.u8   d26, [r0], r2             @I  load 8 pix src row 3
+    vabdl.u8  q0, d30, d31              @I  Abs diff r1 blk 12
+
+    vld1.u8   d27, [r1], r3             @I  load 8 pix pred row 3
+
+    vld1.u8   d24, [r0], r2             @I  load 8 pix src row 4
+
+    vld1.u8   d25, [r1], r3             @I  load 8 pix pred row 4
+    vabdl.u8  q1, d28, d29              @I  Abs diff r1 blk 12
+
+    vld1.u16  {q11}, [r4]               @I  load the threhold
+    vabdl.u8  q2, d26, d27              @I  Abs diff r1 blk 12
+
+    vabdl.u8  q3, d24, d25              @I  Abs diff r1 blk 12
+
+
+
+core_loop:
+                                        @S1  S2  S3  S4     A1  A2  A3  A4
+                                        @S5  S6  S7  S8     A5  A6  A7  A8
+                                        @S9  S10 S11 S12    A9  A10 A11 A12
+                                        @S13 S14 S15 S16    A13 A14 A15 A16
+    ands      r11, r8, #1               @II See if we are at even or odd block
+    vadd.u16  q4 , q0, q3               @I  Add r1 r4
+    lsl       r11, r2, #2               @II Move back src 4 rows
+
+    subeq     r0, r0, r11               @II Move back src 4 rows if we are at even block
+    vadd.u16  q5 , q1, q2               @I  Add r2 r3
+    addeq     r0, r0, #8                @II Move src 8 cols forward if we are at even block
+
+    lsl       r11, r3, #2               @II Move back pred 4 rows
+    vtrn.16   d8 , d10                  @I trnspse 1
+    subeq     r1, r1, r11               @II Move back pred 4 rows if we are at even block
+
+    addeq     r1, r1, #8                @II Move pred 8 cols forward if we are at even block
+    vtrn.16   d9 , d11                  @I trnspse 2
+    subne     r0, r0, #8                @II Src 8clos back for odd rows
+
+    subne     r1, r1, #8                @II Pred 8 cols back for odd rows
+    vtrn.32   d10, d11                  @I trnspse 4
+
+
+    vtrn.32   d8 , d9                   @I trnspse 3
+    vswp      d10, d11                  @I rearrange so that the q4 and q5 add properly
+                                        @D8     S1 S4 A1 A4
+                                        @D9     S2 S3 A2 A3
+                                        @D11    S1 S4 A1 A4
+                                        @D10    S2 S3 A2 A3
+
+    vadd.s16  q6, q4, q5                @I  Get s1 s4
+    vld1.u8   d30, [r0], r2             @II load first 8 pix src row 1
+
+    vtrn.s16  d12, d13                  @I  Get s2 s3
+                                        @D12 S1 S4 A1 A4
+                                        @D13 S2 S3 A2 A3
+
+    vshl.s16  q7, q6 , #1               @I  si  = si<<1
+    vld1.u8   d31, [r1], r3             @II load first 8 pix pred row 1
+
+    vpadd.s16 d16, d12, d13             @I  (s1 + s4) (s2 + s3)
+    vld1.u8   d28, [r0], r2             @II load first 8 pix src row 2
+                                        @   D16  S14 A14 S23 A23
+    vrev32.16 d0, d16                   @I
+    vuzp.s16  d16, d0                   @I
+                                        @D16  S14 S23 A14 A23
+    vadd.s16  d17, d12, d13             @I  (s1 + s2) (s3 + s4)
+    vld1.u8   d29, [r1], r3             @II load first 8 pix pred row 2
+                                        @D17  S12 S34 A12 A34
+
+    vrev32.16 q9, q7                    @I  Rearrange si's
+                                        @Q9  Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2
+
+                                        @D12    S1 S4 A1 A4
+                                        @D19    Z3 Z2 Y3 Y2
+    vsub.s16  d8, d12, d19              @I  (s1 - (s3<<1)) (s4 - (s2<<1))
+    vld1.u8   d26, [r0], r2             @II load first 8 pix src row 3
+                                        @D13    S2 S3 A2 A3
+                                        @D18    Z4 Z1 Y4 Y1
+    vsub.s16  d9, d13, d18              @I  (s2 - (s4<<1)) (s3 - (s1<<1))
+    vld1.u8   d27, [r1], r3             @II load first 8 pix pred row 3
+                                        @Q10    S8 S5 A8 A5 S7 S4 A7 A4
+
+                                        @D16  S14 S23 A14 A23
+    vpadd.s16 d10, d16, d17             @I  Get sad by adding s1 s2 s3 s4
+    vld1.u8   d24, [r0], r2             @II load first 8 pix src row 4
+                                        @D22 SAD1 SAD2 junk junk
+
+
+                                        @Q8     S2 S1 A2 A1 S6 S3 A6 A3
+                                        @Q10    S8 S5 A8 A5 S7 S4 A7 A4
+    vtrn.32   q8, q4                    @I  Rearrange to make ls of each block togather
+                                        @Q8     S2 S1 S8 S5 S6 S3 S7 S4
+                                        @Q10    A2 A1 A8 A5 A6 A3 A7 A4
+
+
+    ldrh      r11, [r4, #16]            @I  Load the threshold for DC val blk 1
+    vdup.s16  q6, d10[0]                @I  Get the sad blk 1
+    vabdl.u8  q0, d30, d31              @II Abs diff r1 blk 12
+
+    vshl.s16  q7, q6, #1                @I  sad_2 = sad_1<<1
+    vmov.s16  r9, d10[0]                @I  Get the sad for block 1
+
+    vsub.s16  q9, q7, q8                @I  Add to the lss
+    vmov.s16  r5, d10[1]                @I  Get the sad for block 2
+
+    vcle.s16  q7, q11, q9               @I  Add to the lss
+    vld1.u8   d25, [r1], r3             @II load first 8 pix pred row 4
+
+    vdup.s16  q15, d10[1]               @I  Get the sad blk 1
+    vabdl.u8  q1, d28, d29              @II Abs diff r1 blk 12
+
+
+    vshl.s16  q14, q15, #1              @I  sad_2 = sad_1<<1
+    vsub.s16  q3, q14, q4               @I  Add to the lss
+    vcle.s16  q15, q11, q3              @I  Add to the lss
+
+    ADD       R10, R10, R9              @I  Add to  the global sad blk 1
+    vtrn.u8   q15, q7                   @I  get all comparison bits to one reg
+    vabdl.u8  q2, d26, d27              @II Abs diff r1 blk 12
+
+    ADD       R10, R10, R5              @I  Add to  the global sad blk 2
+    vshr.u8   q14, q15, #7              @I  Shift the bits so that no  overflow occurs
+    cmp       r11, r9
+
+    movle     r7, #0xf                  @I  If not met mark it by mvoing non zero val to R7 blk 1                   ;I  Compare with threshold blk 1
+    vadd.u8   d28, d28, d29             @I  Add the bits
+    cmp       r11, r5                   @I  Compare with threshold blk 2
+
+    movle     r7, #0xf                  @I  If not met mark it by mvoing non zero val to R7 blk 2
+    vpadd.u8  d28, d28, d29             @I  Add the bits
+
+    vmov.u32  r11, d28[0]               @I  Since a set bit now represents a unstatisofrd contifon store it in r11
+    vabdl.u8  q3, d24, d25              @II Abs diff r1 blk 12
+
+    orr       r7, r7, r11               @I  get the guy to r11
+
+
+    sub       r8, r8, #1                @I  Decremrnt block count
+
+    cmp       r7, #0                    @I  If we have atlest one non zero block
+    bne       compute_sad_only          @I  if a non zero block is der,From now on compute sad only
+
+    cmp       r8, #1                    @I  See if we are at the last block
+    bne       core_loop                 @I  If the blocks are zero, lets continue the satdq
+
+
+    @EPILOUGE for core loop
+                                        @S1  S2  S3  S4     A1  A2  A3  A4
+                                        @S5  S6  S7  S8     A5  A6  A7  A8
+                                        @S9  S10 S11 S12    A9  A10 A11 A12
+                                        @S13 S14 S15 S16    A13 A14 A15 A16
+    vadd.u16  q4 , q0, q3               @Add r1 r4
+    vadd.u16  q5 , q1, q2               @Add r2 r3
+                                        @D8     S1 S2 S2 S1
+                                        @D10    S4 S3 S3 S4
+                                        @D9     A1 A2 A2 A1
+                                        @D11    A4 A3 A3 A4
+    vtrn.16   d8 , d10                  @I trnspse 1
+    vtrn.16   d9 , d11                  @I trnspse 2
+    vtrn.32   d8 , d9                   @I trnspse 3
+    vtrn.32   d10, d11                  @I trnspse 4
+
+    vswp      d10, d11                  @I rearrange so that the q4 and q5 add properly
+                                        @D8     S1 S4 A1 A4
+                                        @D9     S2 S3 A2 A3
+                                        @D11    S1 S4 A1 A4
+                                        @D10    S2 S3 A2 A3
+    vadd.s16  q6, q4, q5                @Get s1 s4
+    vtrn.s16  d12, d13                  @Get s2 s3
+                                        @D12 S1 S4 A1 A4
+                                        @D13 S2 S3 A2 A3
+
+    vshl.s16  q7, q6 , #1               @si  = si<<1
+    vmov.s16  r9, d10[0]                @Get the sad for block 1
+
+    vpadd.s16 d16, d12, d13             @(s1 + s4) (s2 + s3)
+    vmov.s16  r5, d10[1]                @Get the sad for block 2
+                                        @D16  S14 A14 S23 A23
+    vrev32.16 d30, d16                  @
+    vuzp.s16  d16, d30                  @
+                                        @D16  S14 S23 A14 A23
+    vadd.s16  d17, d12, d13             @(s1 + s2) (s3 + s4)
+                                        @D17  S12 S34 A12 A34
+
+    vrev32.16 q9, q7                    @Rearrange si's
+                                        @Q9  Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2
+
+                                        @D12    S1 S4 A1 A4
+                                        @D19    Z3 Z2 Y3 Y2
+    vsub.s16  d8, d12, d19              @(s1 - (s3<<1)) (s4 - (s2<<1))
+                                        @D13    S2 S3 A2 A3
+                                        @D18    Z4 Z1 Y4 Y1
+    vsub.s16  d9, d13, d18              @(s2 - (s4<<1)) (s3 - (s1<<1))
+                                        @Q10    S8 S5 A8 A5 S7 S4 A7 A4
+
+                                        @D16  S14 S23 A14 A23
+    vpadd.s16 d10, d16, d17             @I  Get sad by adding s1 s2 s3 s4
+                                        @D22 SAD1 SAD2 junk junk
+    vmov.u16  r9, d10[0]                @Get the sad for block 1
+    vmov.u16  r5, d10[1]                @Get the sad for block 2
+
+                                        @Q8     S2 S1 A2 A1 S6 S3 A6 A3
+                                        @Q10    S8 S5 A8 A5 S7 S4 A7 A4
+    ldrh      r11, [r4, #16]            @Load the threshold for DC val blk 1
+    vtrn.32   q8, q4                    @Rearrange to make ls of each block togather
+    ADD       R10, R10, R9              @Add to  the global sad blk 1
+
+                                        @Q8     S2 S1 S8 S5 S6 S3 S7 S4
+                                        @Q10    A2 A1 A8 A5 A6 A3 A7 A4
+
+    vld1.u16  {q11}, [r4]               @load the threhold
+    ADD       R10, R10, R5              @Add to  the global sad blk 2
+
+    vdup.u16  q6, d10[0]                @Get the sad blk 1
+
+    cmp       r11, r9                   @Compare with threshold blk 1
+    vshl.u16  q7, q6, #1                @sad_2 = sad_1<<1
+
+    vsub.s16  q9, q7, q8                @Add to the lss
+
+    vcle.s16  q15, q11, q9              @Add to the lss
+    movle     r7, #0xf                  @If not met mark it by mvoing non zero val to R7 blk 1
+
+    cmp       r11, r5                   @Compare with threshold blk 2
+    vdup.u16  q14, d10[1]               @Get the sad blk 1
+
+    vshl.u16  q13, q14, #1              @sad_2 = sad_1<<1
+    vsub.s16  q12, q13, q4              @Add to the lss
+    vcle.s16  q14, q11, q12             @Add to the lss
+    movle     r7, #0xf                  @If not met mark it by mvoing non zero val to R7 blk 2
+
+    vtrn.u8   q14, q15                  @get all comparison bits to one reg
+    vshr.u8   q14, q14, #7              @Shift the bits so that no  overflow occurs
+    vadd.u8   d28, d28, d29             @Add the bits
+    vpadd.u8  d28, d28, d29             @Add the bits
+    vmov.u32  r11, d28[0]               @Since a set bit now represents a unstatisofrd contifon store it in r11
+    orr       r7, r7, r11               @get the guy to r11
+
+    b         funcend_sad_16x16         @Since all blocks ar processed nw, got to end
+
+compute_sad_only:                       @This block computes SAD only, so will be lighter
+                                        @IT will start processign at n odd block
+                                        @It will compute sad for odd blok,
+                                        @and then for two blocks at a time
+                                        @The counter is r7, hence r7 blocks will be processed
+
+    and       r11, r8, #1               @Get the last bit of counter
+    cmp       r11, #0                   @See if we are at even or odd block
+                                        @iif the blk is even we just have to set the pointer to the
+                                        @start of current row
+
+    lsleq     r11, r2, #2               @I  Move back src 4 rows
+    subeq     r0, r0, r11               @I  Move back src 4 rows if we are at even block
+
+    lsleq     r11, r3, #2               @I  Move back pred 4 rows
+    subeq     r1, r1, r11               @I  Move back pred 4 rows if we are at even block
+    @ADDEQ R8,R8,#2         ;Inc counter
+    beq       skip_odd_blk              @If the blk is odd we have to compute sad
+
+
+    vadd.u16  q4, q0, q1                @Add SAD of row1 and row2
+    vadd.u16  q5, q2, q3                @Add SAD of row3 and row4
+    vadd.u16  q6, q4, q5                @Add SAD of row 1-4
+    vadd.u16  d14, d12, d13             @Add Blk1 and blk2
+    vpadd.u16 d16, d14, d15             @Add col 1-2 and 3-4
+    vpadd.u16 d18, d16, d17             @Add col 12-34
+
+    vmov.u16  r9, d18[0]                @Move sad to arm
+    ADD       R10, R10, R9              @Add to  the global sad
+
+    sub       r8, r8, #1                @Dec counter
+    cmp       r8, #0                    @See if we processed last block
+    beq       funcend_sad_16x16         @if lprocessed last block goto end of func
+
+    sub       r0, r0, #8                @Since we processed od block move back src by 8 cols
+    sub       r1, r1, #8                @Since we processed od block move back pred by 8 cols
+
+skip_odd_blk:
+
+    vmov.s16  q0, #0                    @Initialize the accumulator
+    vmov.s16  q1, #0                    @Initialize the accumulator
+
+    vld1.u8   {q15}, [r0], r2           @load src r1
+    vld1.u8   {q14}, [r1], r3           @load pred r1
+
+    vld1.u8   {q13}, [r0], r2           @load src r2
+    vld1.u8   {q12}, [r1], r3           @load pred r2
+
+    vld1.u8   {q11}, [r0], r2           @load src r3
+    vld1.u8   {q10}, [r1], r3           @load pred r2
+
+    vld1.u8   {q9}, [r0], r2            @load src r4
+    vld1.u8   {q8}, [r1], r3            @load pred r4
+
+    cmp       r8, #2
+    beq       sad_epilouge
+
+sad_loop:
+
+    vabal.u8  q0, d30, d28              @I  accumulate Abs diff R1
+    vabal.u8  q1, d31, d29              @I  accumulate Abs diff R1
+
+    vld1.u8   {q15}, [r0], r2           @II load r1 src
+    vabal.u8  q0, d26, d24              @I  accumulate Abs diff R2
+
+    vld1.u8   {q14}, [r1], r3           @II load r1 pred
+    vabal.u8  q1, d27, d25              @I  accumulate Abs diff R2
+
+    vld1.u8   {q13}, [r0], r2           @II load r3 src
+    vabal.u8  q0, d22, d20              @I  accumulate Abs diff R3
+
+    vld1.u8   {q12}, [r1], r3           @II load r2 pred
+    vabal.u8  q1, d23, d21              @I  accumulate Abs diff R3
+
+    vld1.u8   {q11}, [r0], r2           @II load r3 src
+    vabal.u8  q0, d18, d16              @I  accumulate Abs diff R4
+
+
+    sub       r8, r8, #2                @Since we processe 16 pix @a time, dec by 2
+    vld1.u8   {q10}, [r1], r3           @II load r3 pred
+    vabal.u8  q1, d19, d17              @I  accumulate Abs diff R4
+
+    cmp       r8, #2                    @Check if last loop
+    vld1.u8   {q9}, [r0], r2            @II load r4 src
+    vld1.u8   {q8}, [r1], r3            @II load r4 pred
+
+    bne       sad_loop                  @Go back to SAD computation
+
+sad_epilouge:
+    vabal.u8  q0, d30, d28              @Accumulate Abs diff R1
+    vabal.u8  q1, d31, d29              @Accumulate Abs diff R1
+
+    vabal.u8  q0, d26, d24              @Accumulate Abs diff R2
+    vabal.u8  q1, d27, d25              @Accumulate Abs diff R2
+
+    vabal.u8  q0, d22, d20              @Accumulate Abs diff R3
+    vabal.u8  q1, d23, d21              @Aaccumulate Abs diff R3
+
+    vabal.u8  q0, d18, d16              @Accumulate Abs diff R4
+    vabal.u8  q1, d19, d17              @Accumulate Abs diff R4
+
+    vadd.u16  q2, q0, q1                @ADD two accumulators
+    vadd.u16  d6, d4, d5                @Add two blk sad
+    vpadd.u16 d8, d6, d7                @Add col 1-2 and 3-4 sad
+    vpadd.u16 d10, d8, d9               @Add col 12-34 sad
+
+    vmov.u16  r9, d10[0]                @move SAD to ARM
+    ADD       R10, R10, R9              @Add to  the global sad
+
+funcend_sad_16x16:                      @End of fucntion process
+    ldr       r5, [sp, #44]
+    ldr       r6, [sp, #48]
+
+    str       r7, [r6]                  @Store the is zero reg
+    str       r10, [r5]                 @Store sad
+
+    @SUB SP,SP,#40
+    pop       {r4-r12, pc}
+
+
diff --git a/encoder/arm/ime_platform_macros.h b/encoder/arm/ime_platform_macros.h
new file mode 100755
index 0000000..0f5b2f2
--- /dev/null
+++ b/encoder/arm/ime_platform_macros.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ime_platform_macros.h
+*
+* @brief
+*  Platform specific Macro definitions used in the codec
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions                                                */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+                sad +=  ABS(src[0]-est[0]) + \
+                ABS(src[1]-est[1]) + \
+                ABS(src[2]-est[2]) + \
+                ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
new file mode 100755
index 0000000..c442077
--- /dev/null
+++ b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
@@ -0,0 +1,592 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+
+///**
+//******************************************************************************
+//*
+//* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC )
+//*                and do the prediction.
+//*
+//* @par Description
+//*   This function evaluates  first three 16x16 modes and compute corresponding sad
+//*   and return the buffer predicted with best mode.
+//*
+//* @param[in] pu1_src
+//*  UWORD8 pointer to the source
+//*
+//** @param[in] pu1_ngbr_pels_i16
+//*  UWORD8 pointer to neighbouring pels
+//*
+//* @param[out] pu1_dst
+//*  UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//*  integer source stride
+//*
+//* @param[in] dst_strd
+//*  integer destination stride
+//*
+//* @param[in] u4_n_avblty
+//* availability of neighbouring pixels
+//*
+//* @param[in] u4_intra_mode
+//* Pointer to the variable in which best mode is returned
+//*
+//* @param[in] pu4_sadmin
+//* Pointer to the variable in which minimum sad is returned
+//*
+//* @param[in] u4_valid_intra_modes
+//* Says what all modes are valid
+//*
+//*
+//* @return      none
+//*
+//******************************************************************************
+//*/
+//
+//void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
+//                                      UWORD8 *pu1_ngbr_pels_i16,
+//                                      UWORD8 *pu1_dst,
+//                                      UWORD32 src_strd,
+//                                      UWORD32 dst_strd,
+//                                      WORD32 u4_n_avblty,
+//                                      UWORD32 *u4_intra_mode,
+//                                      WORD32 *pu4_sadmin,
+//                                       UWORD32 u4_valid_intra_modes)
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+.globl ih264e_evaluate_intra16x16_modes_av8
+
+ih264e_evaluate_intra16x16_modes_av8:
+
+//x0 = pu1_src,
+//x1 = pu1_ngbr_pels_i16,
+//x2 = pu1_dst,
+//x3 = src_strd,
+//x4 = dst_strd,
+//x5 = u4_n_avblty,
+//x6 = u4_intra_mode,
+//x7 = pu4_sadmin
+
+
+
+    // STMFD sp!, {x4-x12, x14}          //store register values to stack
+    push_v_regs
+    stp       x19, x20, [sp, #-16]!
+
+    ldr       x16, [sp, #80]
+    mov       x17, x4
+    mov       x18, x5
+    mov       x14, x6
+    mov       x15, x7
+
+
+    sub       v0.16b, v0.16b, v0.16b
+    sub       v1.16b, v1.16b, v1.16b
+    mov       w10, #0
+    mov       w11 , #3
+
+    ands      x6, x5, #0x01
+    beq       top_available             //LEFT NOT AVAILABLE
+    ld1       {v0.16b}, [x1]
+    add       w10, w10, #8
+    add       w11, w11, #1
+top_available:
+    ands      x6, x5, #0x04
+    beq       none_available
+    add       x6, x1, #17
+    ld1       {v1.16b}, [x6]
+    add       w10, w10, #8
+    add       w11, w11, #1
+    b         summation
+none_available:
+    cmp       x5, #0
+    bne       summation
+    mov       w6, #128
+    dup       v30.16b, w6
+    dup       v31.16b, w6
+    b         sad_comp
+summation:
+    uaddl     v2.8h, v0.8b, v1.8b
+    uaddl2    v3.8h, v0.16b, v1.16b
+    dup       v10.8h, w10
+    neg       w11, w11
+    dup       v20.8h, w11
+    add       v0.8h, v2.8h, v3.8h
+    mov       v1.d[0], v0.d[1]
+    add       v0.4h, v0.4h, v1.4h
+    addp      v0.4h, v0.4h , v0.4h
+    addp      v0.4h, v0.4h , v0.4h
+    add       v0.4h, v0.4h, v10.4h
+    uqshl     v0.8h, v0.8h, v20.8h
+    sqxtun    v0.8b, v0.8h
+
+    dup       v30.16b, v0.b[0]
+    dup       v31.16b, v0.b[0]
+
+
+sad_comp:
+    ld1       { v0.2s, v1.2s }, [x0], x3 // source x0w 0
+
+    ld1       { v2.2s, v3.2s}, [x0], x3 //row 1
+
+    ld1       { v4.2s, v5.2s}, [x0], x3 //row 2
+
+    ld1       { v6.2s, v7.2s}, [x0], x3 //row 3
+
+    //---------------------
+
+    //values for vertical prediction
+    add       x6, x1, #17
+    ld1       {v10.8b}, [x6], #8
+    ld1       {v11.8b}, [x6], #8
+    ld1       {v9.16b}, [x1]
+
+
+
+    dup       v20.8b, v9.b[15]          ///HORIZONTAL VALUE ROW=0//
+    dup       v21.8b, v9.b[15]          ///HORIZONTAL VALUE ROW=0//
+
+
+///* computing SADs for all three modes*/
+    ///vertical row 0@
+    uabdl     v16.8h, v0.8b, v10.8b
+    uabdl     v18.8h, v1.8b, v11.8b
+
+    ///HORZ row 0@
+    uabdl     v26.8h, v0.8b, v20.8b
+    uabdl     v28.8h, v1.8b, v21.8b
+
+    ///dc row 0@
+    uabdl     v22.8h, v0.8b, v30.8b
+    uabdl     v24.8h, v1.8b, v31.8b
+
+
+
+
+
+    dup       v20.8b, v9.b[14]          ///HORIZONTAL VALUE ROW=1//
+    dup       v21.8b, v9.b[14]
+
+
+    ///vertical row 1@
+    uabal     v16.8h, v2.8b, v10.8b
+    uabal     v18.8h, v3.8b, v11.8b
+
+    ld1       { v0.2s, v1.2s }, [x0], x3 //row 4
+    ///HORZ row 1@
+    uabal     v26.8h, v2.8b, v20.8b
+    uabal     v28.8h, v3.8b, v21.8b
+
+    ///dc row 1@
+    uabal     v22.8h, v2.8b, v30.8b
+    uabal     v24.8h, v3.8b, v31.8b
+
+    dup       v20.8b, v9.b[13]          ///HORIZONTAL VALUE ROW=2//
+    dup       v21.8b, v9.b[13]
+
+    ///vertical row 2@
+    uabal     v16.8h, v4.8b, v10.8b
+    uabal     v18.8h, v5.8b, v11.8b
+
+    ld1       { v2.2s, v3.2s}, [x0], x3 //row 5
+    ///HORZ row 2@
+    uabal     v26.8h, v4.8b, v20.8b
+    uabal     v28.8h, v5.8b, v21.8b
+
+    ///dc row 2@
+    uabal     v22.8h, v4.8b, v30.8b
+    uabal     v24.8h, v5.8b, v31.8b
+
+    dup       v20.8b, v9.b[12]          ///HORIZONTAL VALUE ROW=3//
+    dup       v21.8b, v9.b[12]
+
+    ///vertical row 3@
+    uabal     v16.8h, v6.8b, v10.8b
+    uabal     v18.8h, v7.8b, v11.8b
+
+    ld1       { v4.2s, v5.2s}, [x0], x3 //row 6
+    ///HORZ row 3@
+    uabal     v26.8h, v6.8b, v20.8b
+    uabal     v28.8h, v7.8b, v21.8b
+
+    ///dc row 3@
+    uabal     v22.8h, v6.8b, v30.8b
+    uabal     v24.8h, v7.8b, v31.8b
+//----------------------------------------------------------------------------------------------
+
+    dup       v20.8b, v9.b[11]          ///HORIZONTAL VALUE ROW=0//
+    dup       v21.8b, v9.b[11]
+
+    ///vertical row 0@
+    uabal     v16.8h, v0.8b, v10.8b
+    uabal     v18.8h, v1.8b, v11.8b
+
+    ld1       {  v6.2s, v7.2s}, [x0], x3 //row 7
+    ///HORZ row 0@
+    uabal     v26.8h, v0.8b, v20.8b
+    uabal     v28.8h, v1.8b, v21.8b
+
+    ///dc row 0@
+    uabal     v22.8h, v0.8b, v30.8b
+    uabal     v24.8h, v1.8b, v31.8b
+
+    dup       v20.8b, v9.b[10]          ///HORIZONTAL VALUE ROW=1//
+    dup       v21.8b, v9.b[10]
+
+    ///vertical row 1@
+    uabal     v16.8h, v2.8b, v10.8b
+    uabal     v18.8h, v3.8b, v11.8b
+
+    ld1       { v0.2s, v1.2s }, [x0], x3 //row 8
+    ///HORZ row 1@
+    uabal     v26.8h, v2.8b, v20.8b
+    uabal     v28.8h, v3.8b, v21.8b
+
+    ///dc row 1@
+    uabal     v22.8h, v2.8b, v30.8b
+    uabal     v24.8h, v3.8b, v31.8b
+
+    dup       v20.8b, v9.b[9]           ///HORIZONTAL VALUE ROW=2//
+    dup       v21.8b, v9.b[9]
+
+    ///vertical row 2@
+    uabal     v16.8h, v4.8b, v10.8b
+    uabal     v18.8h, v5.8b, v11.8b
+
+    ld1       { v2.2s, v3.2s}, [x0], x3 //row 9
+
+    ///HORZ row 2@
+    uabal     v26.8h, v4.8b, v20.8b
+    uabal     v28.8h, v5.8b, v21.8b
+
+    ///dc row 2@
+    uabal     v22.8h, v4.8b, v30.8b
+    uabal     v24.8h, v5.8b, v31.8b
+
+    dup       v20.8b, v9.b[8]           ///HORIZONTAL VALUE ROW=3//
+    dup       v21.8b, v9.b[8]
+
+    ///vertical row 3@
+    uabal     v16.8h, v6.8b, v10.8b
+    uabal     v18.8h, v7.8b, v11.8b
+
+    ld1       { v4.2s, v5.2s}, [x0], x3 //row 10
+
+    ///HORZ row 3@
+    uabal     v26.8h, v6.8b, v20.8b
+    uabal     v28.8h, v7.8b, v21.8b
+
+    ///dc row 3@
+    uabal     v22.8h, v6.8b, v30.8b
+    uabal     v24.8h, v7.8b, v31.8b
+
+
+//-------------------------------------------
+
+    dup       v20.8b, v9.b[7]           ///HORIZONTAL VALUE ROW=0//
+    dup       v21.8b, v9.b[7]
+
+    ///vertical row 0@
+    uabal     v16.8h, v0.8b, v10.8b
+    uabal     v18.8h, v1.8b, v11.8b
+
+    ld1       {  v6.2s, v7.2s}, [x0], x3 //row11
+
+    ///HORZ row 0@
+    uabal     v26.8h, v0.8b, v20.8b
+    uabal     v28.8h, v1.8b, v21.8b
+
+    ///dc row 0@
+    uabal     v22.8h, v0.8b, v30.8b
+    uabal     v24.8h, v1.8b, v31.8b
+
+    dup       v20.8b, v9.b[6]           ///HORIZONTAL VALUE ROW=1//
+    dup       v21.8b, v9.b[6]
+
+    ///vertical row 1@
+    uabal     v16.8h, v2.8b, v10.8b
+    uabal     v18.8h, v3.8b, v11.8b
+
+    ld1       { v0.2s, v1.2s }, [x0], x3 //row12
+
+    ///HORZ row 1@
+    uabal     v26.8h, v2.8b, v20.8b
+    uabal     v28.8h, v3.8b, v21.8b
+
+    ///dc row 1@
+    uabal     v22.8h, v2.8b, v30.8b
+    uabal     v24.8h, v3.8b, v31.8b
+
+    dup       v20.8b, v9.b[5]           ///HORIZONTAL VALUE ROW=2//
+    dup       v21.8b, v9.b[5]
+
+    ///vertical row 2@
+    uabal     v16.8h, v4.8b, v10.8b
+    uabal     v18.8h, v5.8b, v11.8b
+
+    ld1       { v2.2s, v3.2s}, [x0], x3 //row13
+
+    ///HORZ row 2@
+    uabal     v26.8h, v4.8b, v20.8b
+    uabal     v28.8h, v5.8b, v21.8b
+
+    ///dc row 2@
+    uabal     v22.8h, v4.8b, v30.8b
+    uabal     v24.8h, v5.8b, v31.8b
+
+    dup       v20.8b, v9.b[4]           ///HORIZONTAL VALUE ROW=3//
+    dup       v21.8b, v9.b[4]
+
+    ///vertical row 3@
+    uabal     v16.8h, v6.8b, v10.8b
+    uabal     v18.8h, v7.8b, v11.8b
+
+    ld1       { v4.2s, v5.2s}, [x0], x3 //row14
+
+    ///HORZ row 3@
+    uabal     v26.8h, v6.8b, v20.8b
+    uabal     v28.8h, v7.8b, v21.8b
+
+    ///dc row 3@
+    uabal     v22.8h, v6.8b, v30.8b
+    uabal     v24.8h, v7.8b, v31.8b
+    //-----------------------------------------------------------------
+
+    dup       v20.8b, v9.b[3]           ///HORIZONTAL VALUE ROW=0//
+    dup       v21.8b, v9.b[3]
+
+    ///vertical row 0@
+    uabal     v16.8h, v0.8b, v10.8b
+    uabal     v18.8h, v1.8b, v11.8b
+
+    ld1       {  v6.2s, v7.2s}, [x0], x3 //row15
+
+    ///HORZ row 0@
+    uabal     v26.8h, v0.8b, v20.8b
+    uabal     v28.8h, v1.8b, v21.8b
+
+    ///dc row 0@
+    uabal     v22.8h, v0.8b, v30.8b
+    uabal     v24.8h, v1.8b, v31.8b
+
+    dup       v20.8b, v9.b[2]           ///HORIZONTAL VALUE ROW=1//
+    dup       v21.8b, v9.b[2]
+
+    ///vertical row 1@
+    uabal     v16.8h, v2.8b, v10.8b
+    uabal     v18.8h, v3.8b, v11.8b
+
+    ///HORZ row 1@
+    uabal     v26.8h, v2.8b, v20.8b
+    uabal     v28.8h, v3.8b, v21.8b
+
+    ///dc row 1@
+    uabal     v22.8h, v2.8b, v30.8b
+    uabal     v24.8h, v3.8b, v31.8b
+
+    dup       v20.8b, v9.b[1]           ///HORIZONTAL VALUE ROW=2//
+    dup       v21.8b, v9.b[1]
+
+    ///vertical row 2@
+    uabal     v16.8h, v4.8b, v10.8b
+    uabal     v18.8h, v5.8b, v11.8b
+
+    ///HORZ row 2@
+    uabal     v26.8h, v4.8b, v20.8b
+    uabal     v28.8h, v5.8b, v21.8b
+
+    ///dc row 2@
+    uabal     v22.8h, v4.8b, v30.8b
+    uabal     v24.8h, v5.8b, v31.8b
+
+    dup       v20.8b, v9.b[0]           ///HORIZONTAL VALUE ROW=3//
+    dup       v21.8b, v9.b[0]
+
+    ///vertical row 3@
+    uabal     v16.8h, v6.8b, v10.8b
+    uabal     v18.8h, v7.8b, v11.8b
+
+    ///HORZ row 3@
+    uabal     v26.8h, v6.8b, v20.8b
+    uabal     v28.8h, v7.8b, v21.8b
+
+    ///dc row 3@
+    uabal     v22.8h, v6.8b, v30.8b
+    uabal     v24.8h, v7.8b, v31.8b
+    //------------------------------------------------------------------------------
+
+
+    //vert sum
+
+    add       v16.8h, v16.8h , v18.8h
+    mov       v18.d[0], v16.d[1]
+    add       v16.4h, v16.4h , v18.4h
+    uaddlp    v16.2s, v16.4h
+    addp      v16.2s, v16.2s, v16.2s
+    smov      x8, v16.s[0]              //dc
+
+
+    //horz sum
+
+    add       v26.8h, v26.8h , v28.8h
+    mov       v28.d[0], v26.d[1]
+    add       v26.4h, v26.4h , v28.4h
+    uaddlp    v26.2s, v26.4h
+    addp      v26.2s, v26.2s, v26.2s
+    smov      x9, v26.s[0]
+
+    //dc sum
+
+    add       v24.8h, v22.8h , v24.8h   ///DC
+    mov       v25.d[0], v24.d[1]
+    add       v24.4h, v24.4h , v25.4h   ///DC
+    uaddlp    v24.2s, v24.4h            ///DC
+    addp      v24.2s, v24.2s, v24.2s    ///DC
+    smov      x10, v24.s[0]             //dc
+
+
+    //-----------------------
+    mov       x11, #1
+    lsl       x11, x11, #30
+
+    mov       x0, x16
+    //--------------------------------------------
+    ands      x7, x0, #01               // vert mode valid????????????
+    csel      x8, x11, x8, eq
+
+
+    ands      x6, x0, #02               // horz mode valid????????????
+    csel      x9, x11, x9, eq
+
+    ands      x6, x0, #04               // dc mode valid????????????
+    csel      x10, x11, x10, eq
+
+
+
+
+//--------------------------------
+
+    mov       x4, x17
+    mov       x7, x15
+    mov       x6, x14
+
+    //---------------------------
+
+    //--------------------------
+
+    cmp       x8, x9
+    bgt       not_vert
+    cmp       x8, x10
+    bgt       do_dc
+
+    ///----------------------
+    //DO VERTICAL PREDICTION
+    str       x8 , [x7]                 //MIN SAD
+    mov       x8, #0
+    str       x8 , [x6]                 // MODE
+    add       x6, x1, #17
+    ld1       {v30.16b}, [x6]
+    b         do_dc_vert
+    //-----------------------------
+not_vert: cmp x9, x10
+    bgt       do_dc
+
+    ///----------------------
+    //DO HORIZONTAL
+    str       x9 , [x7]                 //MIN SAD
+    mov       x9, #1
+    str       x9 , [x6]                 // MODE
+
+    ld1       {v0.16b}, [x1]
+    dup       v10.16b, v0.b[15]
+    dup       v11.16b, v0.b[14]
+    dup       v12.16b, v0.b[13]
+    dup       v13.16b, v0.b[12]
+    st1       {v10.16b}, [x2], x4
+    dup       v14.16b, v0.b[11]
+    st1       {v11.16b}, [x2], x4
+    dup       v15.16b, v0.b[10]
+    st1       {v12.16b}, [x2], x4
+    dup       v16.16b, v0.b[9]
+    st1       {v13.16b}, [x2], x4
+    dup       v17.16b, v0.b[8]
+    st1       {v14.16b}, [x2], x4
+    dup       v18.16b, v0.b[7]
+    st1       {v15.16b}, [x2], x4
+    dup       v19.16b, v0.b[6]
+    st1       {v16.16b}, [x2], x4
+    dup       v20.16b, v0.b[5]
+    st1       {v17.16b}, [x2], x4
+    dup       v21.16b, v0.b[4]
+    st1       {v18.16b}, [x2], x4
+    dup       v22.16b, v0.b[3]
+    st1       {v19.16b}, [x2], x4
+    dup       v23.16b, v0.b[2]
+    st1       {v20.16b}, [x2], x4
+    dup       v24.16b, v0.b[1]
+    st1       {v21.16b}, [x2], x4
+    dup       v25.16b, v0.b[0]
+    st1       {v22.16b}, [x2], x4
+    st1       {v23.16b}, [x2], x4
+    st1       {v24.16b}, [x2], x4
+    st1       {v25.16b}, [x2], x4
+
+
+
+    b         end_func
+
+
+    ///-----------------------------
+
+do_dc: ///---------------------------------
+    //DO DC
+    str       x10 , [x7]                //MIN SAD
+    mov       x10, #2
+    str       x10 , [x6]                // MODE
+do_dc_vert:
+    st1       {v30.4s}, [x2], x4        //0
+    st1       {v30.4s}, [x2], x4        //1
+    st1       {v30.4s}, [x2], x4        //2
+    st1       {v30.4s}, [x2], x4        //3
+    st1       {v30.4s}, [x2], x4        //4
+    st1       {v30.4s}, [x2], x4        //5
+    st1       {v30.4s}, [x2], x4        //6
+    st1       {v30.4s}, [x2], x4        //7
+    st1       {v30.4s}, [x2], x4        //8
+    st1       {v30.4s}, [x2], x4        //9
+    st1       {v30.4s}, [x2], x4        //10
+    st1       {v30.4s}, [x2], x4        //11
+    st1       {v30.4s}, [x2], x4        //12
+    st1       {v30.4s}, [x2], x4        //13
+    st1       {v30.4s}, [x2], x4        //14
+    st1       {v30.4s}, [x2], x4        //15
+    ///------------------
+end_func:
+    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
+    ldp       x19, x20, [sp], #16
+    pop_v_regs
+    ret
+
+
diff --git a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
new file mode 100755
index 0000000..b02afd1
--- /dev/null
+++ b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
@@ -0,0 +1,467 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+
+///**
+//******************************************************************************
+//*
+//* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC )
+//*                and do the prediction.
+//*
+//* @par Description
+//*   This function evaluates  first three intra chroma modes and compute corresponding sad
+//*   and return the buffer predicted with best mode.
+//*
+//* @param[in] pu1_src
+//*  UWORD8 pointer to the source
+//*
+//** @param[in] pu1_ngbr_pels
+//*  UWORD8 pointer to neighbouring pels
+//*
+//* @param[out] pu1_dst
+//*  UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//*  integer source stride
+//*
+//* @param[in] dst_strd
+//*  integer destination stride
+//*
+//* @param[in] u4_n_avblty
+//* availability of neighbouring pixels
+//*
+//* @param[in] u4_intra_mode
+//* Pointer to the variable in which best mode is returned
+//*
+//* @param[in] pu4_sadmin
+//* Pointer to the variable in which minimum sad is returned
+//*
+//* @param[in] u4_valid_intra_modes
+//* Says what all modes are valid
+//*
+//*
+//* @return      none
+//*
+//******************************************************************************
+//*/
+//
+//void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
+//                                      UWORD8 *pu1_ngbr_pels_i16,
+//                                      UWORD8 *pu1_dst,
+//                                      UWORD32 src_strd,
+//                                      UWORD32 dst_strd,
+//                                      WORD32 u4_n_avblty,
+//                                      UWORD32 *u4_intra_mode,
+//                                      WORD32 *pu4_sadmin,
+//                                       UWORD32 u4_valid_intra_modes)
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+.global ih264e_evaluate_intra_chroma_modes_av8
+
+ih264e_evaluate_intra_chroma_modes_av8:
+
+//x0 = pu1_src,
+//x1 = pu1_ngbr_pels_i16,
+//x2 = pu1_dst,
+//x3 = src_strd,
+//x4 = dst_strd,
+//x5 = u4_n_avblty,
+//x6 = u4_intra_mode,
+//x7 = pu4_sadmin
+
+
+
+    // STMFD sp!, {x4-x12, x14}          //store register values to stack
+    push_v_regs
+    stp       x19, x20, [sp, #-16]!
+    //-----------------------
+    ldr       x16, [sp, #80]
+    mov       x17, x4
+    mov       x18, x5
+    mov       x14, x6
+    mov       x15, x7
+
+    mov       x19, #5
+    ands      x6, x5, x19
+    beq       none_available
+    cmp       x6, #1
+    beq       left_only_available
+    cmp       x6, #4
+    beq       top_only_available
+
+all_available:
+    ld1       {v0.8b, v1.8b}, [x1]
+    add       x6, x1, #18
+    ld1       {v2.8b, v3.8b}, [x6]
+    uxtl      v0.8h, v0.8b
+    uxtl      v1.8h, v1.8b
+    addp      v0.4s, v0.4s , v0.4s
+    addp      v1.4s, v1.4s , v1.4s
+    addp      v0.4s, v0.4s , v0.4s
+    addp      v1.4s, v1.4s , v1.4s
+    uxtl      v2.8h, v2.8b
+    uxtl      v3.8h, v3.8b
+    addp      v2.4s, v2.4s , v2.4s
+    addp      v3.4s, v3.4s , v3.4s
+    addp      v2.4s, v2.4s , v2.4s
+    addp      v3.4s, v3.4s , v3.4s
+    rshrn     v5.8b, v0.8h, #2
+    dup       v21.8h, v5.h[0]
+    rshrn     v6.8b, v3.8h, #2
+    dup       v20.8h, v6.h[0]
+    add       v1.8h, v1.8h, v2.8h
+    rshrn     v1.8b, v1.8h, #3
+    dup       v23.8h, v1.h[0]
+    mov       v20.d[0], v23.d[0]
+    add       v0.8h, v0.8h, v3.8h
+    rshrn     v0.8b, v0.8h, #3
+    dup       v23.8h, v0.h[0]
+    mov       v31.d[0], v23.d[0]
+    mov       v28.d[0], v20.d[0]
+    mov       v29.d[0], v20.d[1]
+    mov       v30.d[0], v21.d[0]
+    b         sad_comp
+
+left_only_available:
+    ld1       {v0.8b, v1.8b}, [x1]
+    uxtl      v0.8h, v0.8b
+    uxtl      v1.8h, v1.8b
+    addp      v0.4s, v0.4s , v0.4s
+    addp      v1.4s, v1.4s , v1.4s
+    addp      v0.4s, v0.4s , v0.4s
+    addp      v1.4s, v1.4s , v1.4s
+    rshrn     v0.8b, v0.8h, #2
+    rshrn     v1.8b, v1.8h, #2
+
+    dup       v28.8h , v1.h[0]
+    dup       v29.8h , v1.h[0]
+    dup       v30.8h, v0.h[0]
+    dup       v31.8h, v0.h[0]
+    b         sad_comp
+
+top_only_available:
+    add       x6, x1, #18
+    ld1       {v0.8b, v1.8b}, [x6]
+    uxtl      v0.8h, v0.8b
+    uxtl      v1.8h, v1.8b
+    addp      v0.4s, v0.4s , v0.4s
+    addp      v1.4s, v1.4s , v1.4s
+    addp      v0.4s, v0.4s , v0.4s
+    addp      v1.4s, v1.4s , v1.4s
+    rshrn     v0.8b, v0.8h, #2
+    rshrn     v1.8b, v1.8h, #2
+    dup       v28.8h , v0.h[0]
+    dup       v30.8h, v1.h[0]
+    mov       v29.d[0], v30.d[1]
+    mov       v30.d[0], v28.d[0]
+    mov       v31.d[0], v30.d[1]
+    b         sad_comp
+none_available:
+    mov       w20, #128
+    dup       v28.16b, w20
+    dup       v29.16b, w20
+    dup       v30.16b, w20
+    dup       v31.16b, w20
+
+
+
+sad_comp:
+    add       x6, x1, #18
+    ld1       {v10.8b, v11.8b}, [x6]    // vertical values
+
+    ld1       {v27.8h}, [x1]
+
+    dup       v20.8h, v27.h[7]          ///HORIZONTAL VALUE ROW=0//
+    dup       v21.8h, v27.h[7]
+
+    ld1       { v0.8b, v1.8b}, [x0], x3
+
+
+    ///vertical row 0@
+    uabdl     v16.8h, v0.8b, v10.8b
+    uabdl     v18.8h, v1.8b, v11.8b
+
+    ///HORZ row 0@
+    uabdl     v26.8h, v0.8b, v20.8b
+    uabdl     v14.8h, v1.8b, v21.8b
+
+    ld1       {v2.8b, v3.8b}, [x0], x3
+
+
+
+    ///dc row 0@
+    uabdl     v22.8h, v0.8b, v28.8b
+    uabdl     v24.8h, v1.8b, v29.8b
+
+
+    dup       v20.8h, v27.h[6]
+    dup       v21.8h, v27.h[6]          ///HORIZONTAL VALUE ROW=1//
+
+    ///vertical row 1@
+    uabal     v16.8h, v2.8b, v10.8b
+    uabal     v18.8h, v3.8b, v11.8b
+
+    ld1       { v4.8b, v5.8b}, [x0], x3
+
+    ///HORZ row 1@
+    uabal     v26.8h, v2.8b, v20.8b
+    uabal     v14.8h, v3.8b, v21.8b
+
+    ///dc row 1@
+    uabal     v22.8h, v2.8b, v28.8b
+    uabal     v24.8h, v3.8b, v29.8b
+
+    dup       v20.8h, v27.h[5]
+    dup       v21.8h, v27.h[5]          ///HORIZONTAL VALUE ROW=2//
+
+    ///vertical row 2@
+    uabal     v16.8h, v4.8b, v10.8b
+    uabal     v18.8h, v5.8b, v11.8b
+
+    ld1       { v6.8b, v7.8b}, [x0], x3
+    ///HORZ row 2@
+    uabal     v26.8h, v4.8b, v20.8b
+    uabal     v14.8h, v5.8b, v21.8b
+
+    ///dc row 2@
+    uabal     v22.8h, v4.8b, v28.8b
+    uabal     v24.8h, v5.8b, v29.8b
+
+    dup       v20.8h, v27.h[4]
+    dup       v21.8h, v27.h[4]          ///HORIZONTAL VALUE ROW=3//
+
+    ///vertical row 3@
+    uabal     v16.8h, v6.8b, v10.8b
+    uabal     v18.8h, v7.8b, v11.8b
+
+    ///HORZ row 3@
+    uabal     v26.8h, v6.8b, v20.8b
+    uabal     v14.8h, v7.8b, v21.8b
+
+    ///dc row 3@
+    uabal     v22.8h, v6.8b, v28.8b
+    uabal     v24.8h, v7.8b, v29.8b
+
+    //----------------------------------------------------------------------------------------------
+    ld1       { v0.8b, v1.8b}, [x0], x3
+
+
+    dup       v20.8h, v27.h[3]
+    dup       v21.8h, v27.h[3]          ///HORIZONTAL VALUE ROW=0//
+
+    ///vertical row 0@
+    uabal     v16.8h, v0.8b, v10.8b
+    uabal     v18.8h, v1.8b, v11.8b
+
+    ///HORZ row 0@
+    uabal     v26.8h, v0.8b, v20.8b
+    uabal     v14.8h, v1.8b, v21.8b
+
+    ld1       { v2.8b, v3.8b}, [x0], x3
+
+    ///dc row 0@
+    uabal     v22.8h, v0.8b, v30.8b
+    uabal     v24.8h, v1.8b, v31.8b
+
+    dup       v20.8h, v27.h[2]
+    dup       v21.8h, v27.h[2]          ///HORIZONTAL VALUE ROW=1//
+
+    ///vertical row 1@
+    uabal     v16.8h, v2.8b, v10.8b
+    uabal     v18.8h, v3.8b, v11.8b
+
+    ///HORZ row 1@
+    uabal     v26.8h, v2.8b, v20.8b
+    uabal     v14.8h, v3.8b, v21.8b
+
+    ld1       { v4.8b, v5.8b}, [x0], x3
+
+    ///dc row 1@
+    uabal     v22.8h, v2.8b, v30.8b
+    uabal     v24.8h, v3.8b, v31.8b
+
+    dup       v20.8h, v27.h[1]
+    dup       v21.8h, v27.h[1]          ///HORIZONTAL VALUE ROW=2//
+
+    ///vertical row 2@
+    uabal     v16.8h, v4.8b, v10.8b
+    uabal     v18.8h, v5.8b, v11.8b
+
+    ///HORZ row 2@
+    uabal     v26.8h, v4.8b, v20.8b
+    uabal     v14.8h, v5.8b, v21.8b
+
+    ld1       {v6.8b, v7.8b}, [x0], x3
+
+    ///dc row 2@
+    uabal     v22.8h, v4.8b, v30.8b
+    uabal     v24.8h, v5.8b, v31.8b
+
+    dup       v20.8h, v27.h[0]
+    dup       v21.8h, v27.h[0]          ///HORIZONTAL VALUE ROW=3//
+
+    ///vertical row 3@
+    uabal     v16.8h, v6.8b, v10.8b
+    uabal     v18.8h, v7.8b, v11.8b
+
+    ///HORZ row 3@
+    uabal     v26.8h, v6.8b, v20.8b
+    uabal     v14.8h, v7.8b, v21.8b
+
+    ///dc row 3@
+    uabal     v22.8h, v6.8b, v30.8b
+    uabal     v24.8h, v7.8b, v31.8b
+
+
+//-------------------------------------------
+
+
+//vert sum
+
+    add       v16.8h, v16.8h , v18.8h
+    mov       v18.d[0], v16.d[1]
+    add       v16.4h, v16.4h , v18.4h
+    uaddlp    v16.2s, v16.4h
+    addp      v16.2s, v16.2s, v16.2s
+    smov      x8, v16.s[0]
+
+
+    //horz sum
+
+    add       v26.8h, v26.8h , v14.8h
+    mov       v14.d[0], v26.d[1]
+    add       v26.4h, v26.4h , v14.4h
+    uaddlp    v26.2s, v26.4h
+    addp      v26.2s, v26.2s, v26.2s
+    smov      x9, v26.s[0]
+
+    //dc sum
+
+    add       v24.8h, v22.8h , v24.8h   ///DC
+    mov       v25.d[0], v24.d[1]
+    add       v24.4h, v24.4h , v25.4h   ///DC
+    uaddlp    v24.2s, v24.4h            ///DC
+    addp      v24.2s, v24.2s, v24.2s    ///DC
+    smov      x10, v24.s[0]             //dc
+
+
+
+
+    mov       x11, #1
+//-----------------------
+    mov       x0, x16 // u4_valid_intra_modes
+
+//--------------------------------------------
+
+
+    lsl       x11, x11, #30
+
+    ands      x7, x0, #04               // vert mode valid????????????
+    csel      x8, x11, x8, eq
+
+    ands      x6, x0, #02               // horz mode valid????????????
+    csel      x9, x11, x9, eq
+
+    ands      x6, x0, #01               // dc mode valid????????????
+    csel      x10, x11, x10, eq
+
+
+    //---------------------------
+
+    mov       x4, x17
+    mov       x6, x14
+    mov       x7, x15
+
+    //--------------------------
+
+    cmp       x10, x9
+    bgt       not_dc
+    cmp       x10, x8
+    bgt       do_vert
+
+    ///----------------------
+    //DO DC PREDICTION
+    str       x10 , [x7]                //MIN SAD
+
+    mov       x10, #0
+    str       x10 , [x6]                // MODE
+
+    b         do_dc_vert
+    //-----------------------------
+
+not_dc:
+    cmp       x9, x8
+    bgt       do_vert
+    ///----------------------
+    //DO HORIZONTAL
+    str       x9 , [x7]                 //MIN SAD
+
+    mov       x10, #1
+    str       x10 , [x6]                // MODE
+    ld1       {v0.8h}, [x1]
+
+    dup       v10.8h, v0.h[7]
+    dup       v11.8h, v0.h[6]
+    dup       v12.8h, v0.h[5]
+    dup       v13.8h, v0.h[4]
+    st1       {v10.8h}, [x2], x4
+    dup       v14.8h, v0.h[3]
+    st1       {v11.8h}, [x2], x4
+    dup       v15.8h, v0.h[2]
+    st1       {v12.8h}, [x2], x4
+    dup       v16.8h, v0.h[1]
+    st1       {v13.8h}, [x2], x4
+    dup       v17.8h, v0.h[0]
+    st1       {v14.8h}, [x2], x4
+    st1       {v15.8h}, [x2], x4
+    st1       {v16.8h}, [x2], x4
+    st1       {v17.8h}, [x2], x4
+
+    b         end_func
+
+do_vert:
+    //DO VERTICAL PREDICTION
+    str       x8 , [x7]                 //MIN SAD
+    mov       x8, #2
+    str       x8 , [x6]                 // MODE
+    add       x6, x1, #18
+    ld1       {v28.8b, v29.8b}, [x6]    // vertical values
+    ld1       {v30.8b, v31.8b}, [x6]    // vertical values
+
+do_dc_vert:
+    st1       {v28.2s, v29.2s} , [x2], x4 //0
+    st1       {v28.2s, v29.2s} , [x2], x4 //1
+    st1       {v28.2s, v29.2s} , [x2], x4 //2
+    st1       {v28.2s, v29.2s} , [x2], x4 //3
+    st1       {v30.2s, v31.2s} , [x2], x4 //4
+    st1       {v30.2s, v31.2s} , [x2], x4 //5
+    st1       {v30.2s, v31.2s} , [x2], x4 //6
+    st1       {v30.2s, v31.2s} , [x2], x4 //7
+
+end_func:
+    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
+    ldp       x19, x20, [sp], #16
+    pop_v_regs
+    ret
+
+
diff --git a/encoder/armv8/ih264e_half_pel_av8.s b/encoder/armv8/ih264e_half_pel_av8.s
new file mode 100755
index 0000000..6dbd8f8
--- /dev/null
+++ b/encoder/armv8/ih264e_half_pel_av8.s
@@ -0,0 +1,1024 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+// *******************************************************************************
+// * @file
+// *  ih264e_half_pel.s
+// *
+// * @brief
+// *
+// *
+// * @author
+// *  Ittiam
+// *
+// * @par List of Functions:
+// *  ih264e_sixtapfilter_horz
+// *  ih264e_sixtap_filter_2dvh_vert
+//
+// *
+// * @remarks
+// *  None
+// *
+// *******************************************************************************
+// */
+
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+// /**
+///*******************************************************************************
+//*
+//* @brief
+//*     Interprediction luma filter for horizontal input(Filter run for width = 17 and height =16)
+//*
+//* @par Description:
+//*    Applies a 6 tap horizontal filter .The output is  clipped to 8 bits
+//*    sec 8.4.2.2.1 titled "Luma sample interpolation process"
+//*
+//* @param[in] pu1_src
+//*  UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//*  UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//*  integer source stride
+//*
+//* @param[in] dst_strd
+//*  integer destination stride
+//*
+//*
+//* @returns
+//*
+//* @remarks
+//*  None
+//*
+//*******************************************************************************
+//*/
+//void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
+//                                UWORD8 *pu1_dst,
+//                                WORD32 src_strd,
+//                                WORD32 dst_strd);
+
+
+.equ halfpel_width ,  17 + 1            //( make it even, two rows are processed at a time)
+
+
+        .global ih264e_sixtapfilter_horz_av8
+ih264e_sixtapfilter_horz_av8:
+    // STMFD sp!,{x14}
+    push_v_regs
+    stp       x19, x20, [sp, #-16]!
+
+    movi      v0.8b, #5
+    sub       x0, x0, #2
+    sub       x3, x3, #16
+    movi      v1.8b, #20
+    mov       x14, #16
+
+filter_horz_loop:
+
+
+    ld1       {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row0
+    ld1       {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1
+
+    //// Processing row0 and row1
+
+    ext       v31.8b, v2.8b , v3.8b , #5
+    ext       v30.8b, v3.8b , v4.8b , #5
+
+    uaddl     v8.8h, v31.8b, v2.8b      //// a0 + a5                             (column1,row0)
+    ext       v29.8b, v4.8b , v4.8b , #5
+    uaddl     v10.8h, v30.8b, v3.8b     //// a0 + a5                             (column2,row0)
+    ext       v28.8b, v5.8b , v6.8b , #5
+    uaddl     v12.8h, v29.8b, v4.8b     //// a0 + a5                             (column3,row0)
+    ext       v27.8b, v6.8b , v7.8b , #5
+    uaddl     v14.8h, v28.8b, v5.8b     //// a0 + a5                             (column1,row1)
+    ext       v26.8b, v7.8b , v7.8b , #5
+
+    uaddl     v16.8h, v27.8b, v6.8b     //// a0 + a5                             (column2,row1)
+    ext       v31.8b, v2.8b , v3.8b , #2
+    uaddl     v18.8h, v26.8b, v7.8b     //// a0 + a5                             (column3,row1)
+    ext       v30.8b, v3.8b , v4.8b , #2
+    umlal     v8.8h, v31.8b, v1.8b      //// a0 + a5 + 20a2                         (column1,row0)
+    ext       v29.8b, v4.8b , v4.8b , #2
+    umlal     v10.8h, v30.8b, v1.8b     //// a0 + a5 + 20a2                         (column2,row0)
+    ext       v28.8b, v5.8b , v6.8b , #2
+    umlal     v12.8h, v29.8b, v1.8b     //// a0 + a5 + 20a2                         (column3,row0)
+    ext       v27.8b, v6.8b , v7.8b , #2
+    umlal     v14.8h, v28.8b, v1.8b     //// a0 + a5 + 20a2                         (column1,row1)
+    ext       v26.8b, v7.8b , v7.8b , #2
+
+    umlal     v16.8h, v27.8b, v1.8b     //// a0 + a5 + 20a2                         (column2,row1)
+    ext       v31.8b, v2.8b , v3.8b , #3
+    umlal     v18.8h, v26.8b, v1.8b     //// a0 + a5 + 20a2                         (column3,row1)
+    ext       v30.8b, v3.8b , v4.8b , #3
+    umlal     v8.8h, v31.8b, v1.8b      //// a0 + a5 + 20a2 + 20a3                  (column1,row0)
+    ext       v29.8b, v4.8b , v4.8b , #3
+    umlal     v10.8h, v30.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                  (column2,row0)
+    ext       v28.8b, v5.8b , v6.8b , #3
+    umlal     v12.8h, v29.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                  (column3,row0)
+    ext       v27.8b, v6.8b , v7.8b , #3
+    umlal     v14.8h, v28.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                  (column1,row1)
+    ext       v26.8b, v7.8b , v7.8b , #3
+
+    umlal     v16.8h, v27.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                  (column2,row1)
+    ext       v31.8b, v2.8b , v3.8b , #1
+    umlal     v18.8h, v26.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                  (column3,row1)
+    ext       v30.8b, v3.8b , v4.8b , #1
+    umlsl     v8.8h, v31.8b, v0.8b      //// a0 + a5 + 20a2 + 20a3 - 5a1           (column1,row0)
+    ext       v29.8b, v4.8b , v4.8b , #1
+    umlsl     v10.8h, v30.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1           (column2,row0)
+    ext       v28.8b, v5.8b , v6.8b , #1
+    umlsl     v12.8h, v29.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1           (column3,row0)
+    ext       v27.8b, v6.8b , v7.8b , #1
+    umlsl     v14.8h, v28.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1           (column1,row1)
+    ext       v26.8b, v7.8b , v7.8b , #1
+
+    umlsl     v16.8h, v27.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1           (column2,row1)
+    ext       v31.8b, v2.8b , v3.8b , #4
+    umlsl     v18.8h, v26.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1           (column3,row1)
+    ext       v30.8b, v3.8b , v4.8b , #4
+    umlsl     v8.8h, v31.8b, v0.8b      //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column1,row0)
+    ext       v29.8b, v4.8b , v4.8b , #4
+    umlsl     v10.8h, v30.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column2,row0)
+    ext       v28.8b, v5.8b , v6.8b , #4
+    umlsl     v12.8h, v29.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column3,row0)
+    ext       v27.8b, v6.8b , v7.8b , #4
+    umlsl     v14.8h, v28.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column1,row1)
+    ext       v26.8b, v7.8b , v7.8b , #4
+
+    umlsl     v16.8h, v27.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column2,row1)
+    umlsl     v18.8h, v26.8b, v0.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column3,row1)
+
+    sqrshrun  v20.8b, v8.8h, #5         //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column1,row0)
+    sqrshrun  v21.8b, v10.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column2,row0)
+    sqrshrun  v22.8b, v12.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column3,row0)
+    sqrshrun  v23.8b, v14.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column1,row1)
+    sqrshrun  v24.8b, v16.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column2,row1)
+    sqrshrun  v25.8b, v18.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column3,row1)
+
+    st1       {v20.8b, v21.8b}, [x1], #16 ////Store dest row0
+    st1       {v22.h}[0], [x1], x3
+    st1       {v23.8b, v24.8b}, [x1], #16 ////Store dest row1
+    st1       {v25.h}[0], [x1], x3
+
+    subs      x14, x14, #2              //    decrement counter
+
+    bne       filter_horz_loop
+
+
+    // LDMFD sp!,{pc}
+    ldp       x19, x20, [sp], #16
+    pop_v_regs
+    ret
+
+
+
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//*   This function implements a two stage cascaded six tap filter. It
+//*    applies the six tap filter in the vertical direction on the
+//*    predictor values, followed by applying the same filter in the
+//*    horizontal direction on the output of the first stage. The six tap
+//*    filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+//*    interpolation process"
+//*    (Filter run for width = 17 and height =17)
+//* @par Description:
+//*    The function interpolates
+//*    the predictors first in the vertical direction and then in the
+//*    horizontal direction to output the (1/2,1/2). The output of the first
+//*    stage of the filter is stored in the buffer pointed to by pi16_pred1(only in C)
+//*    in 16 bit precision.
+//*
+//*
+//* @param[in] pu1_src
+//*  UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst1
+//*  UWORD8 pointer to the destination(vertical filtered output)
+//*
+//* @param[out] pu1_dst2
+//*  UWORD8 pointer to the destination(out put after applying horizontal filter to the intermediate vertical output)
+//*
+//* @param[in] src_strd
+//*  integer source stride
+//*
+//* @param[in] dst_strd
+//*  integer destination stride of pu1_dst
+//*
+//* @param[in]pi16_pred1
+//*  Pointer to 16bit intermediate buffer(used only in c)
+//*
+//* @param[in] pi16_pred1_strd
+//*  integer destination stride of pi16_pred1
+//*
+//*
+//* @returns
+//*
+//* @remarks
+//*  None
+//*
+//*******************************************************************************
+//*/
+//void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
+//                                UWORD8 *pu1_dst1,
+//                                UWORD8 *pu1_dst2,
+//                                WORD32 src_strd,
+//                                WORD32 dst_strd,
+//                                WORD32 *pi16_pred1,/* Pointer to 16bit intermmediate buffer (used only in c)*/
+//                                WORD32 pi16_pred1_strd)
+
+
+
+
+        .global ih264e_sixtap_filter_2dvh_vert_av8
+
+ih264e_sixtap_filter_2dvh_vert_av8:
+    // STMFD sp!,{x10,x11,x12,x14}
+    push_v_regs
+    stp       x19, x20, [sp, #-16]!
+
+////x0 - pu1_ref
+////x3 - u4_ref_width
+
+    //// Load six rows for vertical interpolation
+    lsl       x12, x3, #1
+    sub       x0, x0, x12
+    sub       x0, x0, #2
+    ld1       {v2.8b, v3.8b, v4.8b}, [x0], x3
+    ld1       {v5.8b, v6.8b, v7.8b}, [x0], x3
+    ld1       {v8.8b, v9.8b, v10.8b}, [x0], x3
+    mov       x12, #5
+    ld1       {v11.8b, v12.8b, v13.8b}, [x0], x3
+    mov       x14, #20
+    ld1       {v14.8b, v15.8b, v16.8b}, [x0], x3
+    mov       v0.4h[0], w12
+    mov       v0.4h[1], w14
+    ld1       {v17.8b, v18.8b, v19.8b}, [x0], x3
+    movi      v1.8b, #20
+
+//// x12 - u2_buff1_width
+//// x14 - u2_buff2_width
+    mov       x12, x4
+    add       x11, x1, #16
+
+    mov       x14, x12
+
+    mov       x10, #3 //loop counter
+    sub       x16 , x12, #8
+    sub       x19, x14, #16
+filter_2dvh_loop:
+
+    //// ////////////// ROW 1 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+    uaddl     v20.8h, v2.8b, v17.8b     //// a0 + a5                             (column1,row0)
+    movi      v31.8b, #5
+    umlal     v20.8h, v8.8b, v1.8b      //// a0 + a5 + 20a2                         (column1,row0)
+    umlal     v20.8h, v11.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                  (column1,row0)
+    umlsl     v20.8h, v5.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1           (column1,row0)
+    umlsl     v20.8h, v14.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column1,row0)
+    mov       v21.d[0], v20.d[1]
+
+    uaddl     v22.8h, v3.8b, v18.8b     //// a0 + a5                                (column2,row0)
+    umlal     v22.8h, v9.8b, v1.8b      //// a0 + a5 + 20a2                        (column2,row0)
+    umlal     v22.8h, v12.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                (column2,row0)
+    umlsl     v22.8h, v6.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1            (column2,row0)
+    umlsl     v22.8h, v15.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column2,row0)
+    ext       v30.8b, v20.8b , v21.8b , #4
+    mov       v23.d[0], v22.d[1]
+
+
+    uaddl     v24.8h, v4.8b, v19.8b     //// a0 + a5                                (column3,row0)
+    ext       v29.8b, v20.8b , v21.8b , #6
+    umlal     v24.8h, v10.8b, v1.8b     //// a0 + a5 + 20a2                        (column3,row0)
+    umlal     v24.8h, v13.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                (column3,row0)
+    umlsl     v24.8h, v7.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1            (column3,row0)
+    umlsl     v24.8h, v16.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column3,row0)
+    mov       v25.d[0], v24.d[1]
+
+    sqrshrun  v2.8b, v20.8h, #5         //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column1,row0)
+    ext       v31.8b, v21.8b , v22.8b , #2
+    sqrshrun  v3.8b, v22.8h, #5         //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column2,row0)
+    ext       v28.8b, v20.8b , v21.8b , #2
+
+    saddl     v26.4s, v31.4h, v20.4h    //// a0 + a5                             (set1)
+    ext       v31.8b, v22.8b , v23.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set1)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set1)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set1)
+    smlsl     v26.4s, v21.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set1)
+    ext       v30.8b, v21.8b , v22.8b , #4
+
+    sqrshrun  v4.8b, v24.8h, #5         //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column3,row0)
+    ext       v29.8b, v21.8b , v22.8b , #6
+
+    ext       v28.8b, v21.8b , v22.8b , #2
+    saddl     v20.4s, v31.4h, v21.4h    //// a0 + a5                             (set2)
+    smlal     v20.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set2)
+    smlal     v20.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set2)
+    smlsl     v20.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set2)
+    smlsl     v20.4s, v22.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set2)
+    ext       v31.8b, v23.8b , v24.8b , #2
+    mov       v21.d[0], v20.d[1]
+    ext       v2.8b, v2.8b , v3.8b , #2
+    ext       v3.8b, v3.8b , v4.8b , #2
+    ext       v4.8b, v4.8b , v4.8b , #2
+
+    st1       {v2.8b, v3.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+    st1       {v4.h}[0], [x11], x12     //// store row1 - 1,1/2 grid
+
+    ext       v30.8b, v22.8b , v23.8b , #4
+    ext       v29.8b, v22.8b , v23.8b , #6
+
+    saddl     v2.4s, v31.4h, v22.4h     //// a0 + a5                             (set3)
+    ext       v28.8b, v22.8b , v23.8b , #2
+    smlal     v2.4s, v30.4h, v0.4h[1]   //// a0 + a5 + 20a2                         (set3)
+    smlal     v2.4s, v29.4h, v0.4h[1]   //// a0 + a5 + 20a2 + 20a3                  (set3)
+    smlsl     v2.4s, v28.4h, v0.4h[0]   //// a0 + a5 + 20a2 + 20a3 - 5a1          (set3)
+    smlsl     v2.4s, v23.4h, v0.4h[0]   //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set3)
+    ext       v31.8b, v24.8b , v25.8b , #2
+
+    shrn      v21.4h, v20.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set2)
+    ext       v30.8b, v23.8b , v24.8b , #4
+    shrn      v20.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set1)
+    ext       v29.8b, v23.8b , v24.8b , #6
+
+    saddl     v26.4s, v31.4h, v23.4h    //// a0 + a5                             (set4)
+    ext       v28.8b, v23.8b , v24.8b , #2
+    ext       v31.8b, v25.8b , v25.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set4)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set4)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set4)
+    smlsl     v26.4s, v24.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set4)
+    ext       v30.8b, v24.8b , v25.8b , #4
+
+    saddl     v22.4s, v31.4h, v24.4h    //// a0 + a5                             (set5)
+    ext       v29.8b, v24.8b , v25.8b , #6
+
+    ext       v31.8b, v24.8b , v25.8b , #2
+    shrn      v28.4h, v2.4s, #8         //// shift by 8 and later we will shift by 2 more with rounding     (set3)
+
+    ld1       {v2.8b, v3.8b, v4.8b}, [x0], x3 //// Load next Row data
+    smlal     v22.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set5)
+    smlal     v22.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set5)
+    smlsl     v22.4s, v31.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set5)
+    smlsl     v22.4s, v25.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set5)
+    shrn      v29.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set4)
+    mov       v20.d[1], v21.d[0]
+    sqrshrun  v26.8b, v20.8h, #2        //// half,half gird set1,2
+
+
+    ////VQRSHRUN.s16    D27,Q14,#2            ;// half,half gird set3,4
+    ////VSHRN.s32        D28,Q11,#8            ;// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    ////VQRSHRUN.s16    D28,Q14,#2            ;// half,half gird set5
+
+    ////VST1.8        {D26,D27,D28},[x2],x14    ;// store 1/2,1,2 grif values
+    //// ////////////// ROW 2 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+    uaddl     v20.8h, v5.8b, v2.8b      //// a0 + a5                             (column1,row0)
+    movi      v31.8b, #5
+    umlal     v20.8h, v11.8b, v1.8b     //// a0 + a5 + 20a2                         (column1,row0)
+    umlal     v20.8h, v14.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                  (column1,row0)
+    umlsl     v20.8h, v8.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1           (column1,row0)
+    umlsl     v20.8h, v17.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column1,row0)
+    mov       v21.d[0], v20.d[1]
+
+    mov       v28.d[1], v29.d[0]
+    sqrshrun  v27.8b, v28.8h, #2        //// half,half gird set3,4
+
+    shrn      v28.4h, v22.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    uaddl     v22.8h, v6.8b, v3.8b      //// a0 + a5                                (column2,row0)
+    umlal     v22.8h, v12.8b, v1.8b     //// a0 + a5 + 20a2                        (column2,row0)
+    umlal     v22.8h, v15.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                (column2,row0)
+    umlsl     v22.8h, v9.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1            (column2,row0)
+    umlsl     v22.8h, v18.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column2,row0)
+    mov       v23.d[0], v22.d[1]
+
+    sqrshrun  v28.8b, v28.8h, #2        //// half,half gird set5
+    ext       v30.8b, v20.8b , v21.8b , #4
+
+    uaddl     v24.8h, v7.8b, v4.8b      //// a0 + a5                                (column3,row0)
+    ext       v29.8b, v20.8b , v21.8b , #6
+    umlal     v24.8h, v13.8b, v1.8b     //// a0 + a5 + 20a2                        (column3,row0)
+    umlal     v24.8h, v16.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                (column3,row0)
+    umlsl     v24.8h, v10.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1            (column3,row0)
+    umlsl     v24.8h, v19.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column3,row0)
+    mov       v25.d[0], v24.d[1]
+
+    st1       {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+    st1       {v28.h}[0], [x2], x19     //// store 1/2,1,2 grif values
+
+    sqrshrun  v5.8b, v20.8h, #5         //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column1,row0)
+    ext       v31.8b, v21.8b , v22.8b , #2
+    sqrshrun  v6.8b, v22.8h, #5         //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column2,row0)
+    ext       v28.8b, v20.8b , v21.8b , #2
+
+    saddl     v26.4s, v31.4h, v20.4h    //// a0 + a5                             (set1)
+    ext       v31.8b, v22.8b , v23.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set1)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set1)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set1)
+    smlsl     v26.4s, v21.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set1)
+    ext       v30.8b, v21.8b , v22.8b , #4
+
+    sqrshrun  v7.8b, v24.8h, #5         //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column3,row0)
+    ext       v29.8b, v21.8b , v22.8b , #6
+
+    ext       v28.8b, v21.8b , v22.8b , #2
+    saddl     v20.4s, v31.4h, v21.4h    //// a0 + a5                             (set2)
+    smlal     v20.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set2)
+    smlal     v20.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set2)
+    smlsl     v20.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set2)
+    smlsl     v20.4s, v22.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set2)
+    ext       v31.8b, v23.8b , v24.8b , #2
+
+    ext       v5.8b, v5.8b , v6.8b , #2
+    ext       v6.8b, v6.8b , v7.8b , #2
+    ext       v7.8b, v7.8b , v7.8b , #2
+
+    st1       {v5.8b, v6.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+    st1       {v7.h}[0], [x11], x12     //// store row1 - 1,1/2 grid
+
+    ext       v30.8b, v22.8b , v23.8b , #4
+    ext       v29.8b, v22.8b , v23.8b , #6
+
+    saddl     v6.4s, v31.4h, v22.4h     //// a0 + a5                             (set3)
+    ext       v28.8b, v22.8b , v23.8b , #2
+    smlal     v6.4s, v30.4h, v0.4h[1]   //// a0 + a5 + 20a2                         (set3)
+    smlal     v6.4s, v29.4h, v0.4h[1]   //// a0 + a5 + 20a2 + 20a3                  (set3)
+    smlsl     v6.4s, v28.4h, v0.4h[0]   //// a0 + a5 + 20a2 + 20a3 - 5a1          (set3)
+    smlsl     v6.4s, v23.4h, v0.4h[0]   //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set3)
+    ext       v31.8b, v24.8b , v25.8b , #2
+
+    shrn      v21.4h, v20.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set2)
+    ext       v30.8b, v23.8b , v24.8b , #4
+    shrn      v20.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set1)
+    ext       v29.8b, v23.8b , v24.8b , #6
+
+    saddl     v26.4s, v31.4h, v23.4h    //// a0 + a5                             (set4)
+    ext       v28.8b, v23.8b , v24.8b , #2
+    ext       v31.8b, v25.8b , v25.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set4)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set4)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set4)
+    smlsl     v26.4s, v24.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set4)
+    ext       v30.8b, v24.8b , v25.8b , #4
+
+    saddl     v22.4s, v31.4h, v24.4h    //// a0 + a5                             (set5)
+    ext       v29.8b, v24.8b , v25.8b , #6
+
+    ext       v31.8b, v24.8b , v25.8b , #2
+    shrn      v28.4h, v6.4s, #8         //// shift by 8 and later we will shift by 2 more with rounding     (set3)
+
+    ld1       {v5.8b, v6.8b, v7.8b}, [x0], x3 //// Load next Row data
+    smlal     v22.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set5)
+    smlal     v22.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set5)
+    smlsl     v22.4s, v31.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set5)
+    smlsl     v22.4s, v25.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set5)
+    shrn      v29.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set4)
+    mov       v20.d[1], v21.d[0]
+    sqrshrun  v26.8b, v20.8h, #2        //// half,half gird set1,2
+
+
+    ////VQRSHRUN.s16    D27,Q14,#2            ;// half,half gird set3,4
+    ////VSHRN.s32        D28,Q11,#8            ;// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    ////VQRSHRUN.s16    D28,Q14,#2            ;// half,half gird set5
+
+    ////VST1.8        {D26,D27,D28},[x2],x14    ;// store 1/2,1,2 grif values
+    //// ////////////// ROW 3 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+    uaddl     v20.8h, v8.8b, v5.8b      //// a0 + a5                             (column1,row0)
+    movi      v31.8b, #5
+    umlal     v20.8h, v14.8b, v1.8b     //// a0 + a5 + 20a2                         (column1,row0)
+    umlal     v20.8h, v17.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                  (column1,row0)
+    umlsl     v20.8h, v11.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1           (column1,row0)
+    umlsl     v20.8h, v2.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column1,row0)
+    mov       v21.d[0], v20.d[1]
+
+    mov       v28.d[1], v29.d[0]
+    sqrshrun  v27.8b, v28.8h, #2        //// half,half gird set3,4
+    shrn      v28.4h, v22.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    uaddl     v22.8h, v9.8b, v6.8b      //// a0 + a5                                (column2,row0)
+    umlal     v22.8h, v15.8b, v1.8b     //// a0 + a5 + 20a2                        (column2,row0)
+    umlal     v22.8h, v18.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                (column2,row0)
+    umlsl     v22.8h, v12.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1            (column2,row0)
+    umlsl     v22.8h, v3.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column2,row0)
+    mov       v23.d[0], v22.d[1]
+
+    sqrshrun  v28.8b, v28.8h, #2        //// half,half gird set5
+    ext       v30.8b, v20.8b , v21.8b , #4
+
+    uaddl     v24.8h, v10.8b, v7.8b     //// a0 + a5                                (column3,row0)
+    ext       v29.8b, v20.8b , v21.8b , #6
+    umlal     v24.8h, v16.8b, v1.8b     //// a0 + a5 + 20a2                        (column3,row0)
+    umlal     v24.8h, v19.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                (column3,row0)
+    umlsl     v24.8h, v13.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1            (column3,row0)
+    umlsl     v24.8h, v4.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column3,row0)
+    mov       v25.d[0], v24.d[1]
+
+    st1       {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+    st1       { v28.h}[0], [x2], x19    //// store 1/2,1,2 grif values
+
+    sqrshrun  v8.8b, v20.8h, #5         //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column1,row0)
+    ext       v31.8b, v21.8b , v22.8b , #2
+    sqrshrun  v9.8b, v22.8h, #5         //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column2,row0)
+    ext       v28.8b, v20.8b , v21.8b , #2
+
+    saddl     v26.4s, v31.4h, v20.4h    //// a0 + a5                             (set1)
+    ext       v31.8b, v22.8b , v23.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set1)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set1)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set1)
+    smlsl     v26.4s, v21.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set1)
+    ext       v30.8b, v21.8b , v22.8b , #4
+
+    sqrshrun  v10.8b, v24.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column3,row0)
+    ext       v29.8b, v21.8b , v22.8b , #6
+
+    ext       v28.8b, v21.8b , v22.8b , #2
+    saddl     v20.4s, v31.4h, v21.4h    //// a0 + a5                             (set2)
+    smlal     v20.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set2)
+    smlal     v20.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set2)
+    smlsl     v20.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set2)
+    smlsl     v20.4s, v22.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set2)
+    ext       v31.8b, v23.8b , v24.8b , #2
+
+    ext       v8.8b, v8.8b , v9.8b , #2
+    ext       v9.8b, v9.8b , v10.8b , #2
+    ext       v10.8b, v10.8b , v10.8b , #2
+
+    st1       {v8.8b, v9.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+    st1       {v10.h}[0], [x11], x12    //// store row1 - 1,1/2 grid
+
+    ext       v30.8b, v22.8b , v23.8b , #4
+    ext       v29.8b, v22.8b , v23.8b , #6
+
+    saddl     v8.4s, v31.4h, v22.4h     //// a0 + a5                             (set3)
+    ext       v28.8b, v22.8b , v23.8b , #2
+    smlal     v8.4s, v30.4h, v0.4h[1]   //// a0 + a5 + 20a2                         (set3)
+    smlal     v8.4s, v29.4h, v0.4h[1]   //// a0 + a5 + 20a2 + 20a3                  (set3)
+    smlsl     v8.4s, v28.4h, v0.4h[0]   //// a0 + a5 + 20a2 + 20a3 - 5a1          (set3)
+    smlsl     v8.4s, v23.4h, v0.4h[0]   //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set3)
+    ext       v31.8b, v24.8b , v25.8b , #2
+
+    shrn      v21.4h, v20.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set2)
+    ext       v30.8b, v23.8b , v24.8b , #4
+    shrn      v20.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set1)
+    ext       v29.8b, v23.8b , v24.8b , #6
+
+    saddl     v26.4s, v31.4h, v23.4h    //// a0 + a5                             (set4)
+    ext       v28.8b, v23.8b , v24.8b , #2
+    ext       v31.8b, v25.8b , v25.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set4)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set4)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set4)
+    smlsl     v26.4s, v24.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set4)
+    ext       v30.8b, v24.8b , v25.8b , #4
+
+    saddl     v22.4s, v31.4h, v24.4h    //// a0 + a5                             (set5)
+    ext       v29.8b, v24.8b , v25.8b , #6
+
+    ext       v31.8b, v24.8b , v25.8b , #2
+    shrn      v28.4h, v8.4s, #8         //// shift by 8 and later we will shift by 2 more with rounding     (set3)
+
+    ld1       {v8.8b, v9.8b, v10.8b}, [x0], x3 //// Load next Row data
+    smlal     v22.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set5)
+    smlal     v22.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set5)
+    smlsl     v22.4s, v31.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set5)
+    smlsl     v22.4s, v25.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set5)
+    shrn      v29.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set4)
+    mov       v20.d[1], v21.d[0]
+    sqrshrun  v26.8b, v20.8h, #2        //// half,half gird set1,2
+
+
+    ////VQRSHRUN.s16    D27,Q14,#2            ;// half,half gird set3,4
+    ////VSHRN.s32        D28,Q11,#8            ;// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    ////VQRSHRUN.s16    D28,Q14,#2            ;// half,half gird set5
+
+    ////VST1.8        {D26,D27,D28},[x2],x14    ;// store 1/2,1,2 grif values
+    //// ////////////// ROW 4 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+    uaddl     v20.8h, v11.8b, v8.8b     //// a0 + a5                             (column1,row0)
+    movi      v31.8b, #5
+    umlal     v20.8h, v17.8b, v1.8b     //// a0 + a5 + 20a2                         (column1,row0)
+    umlal     v20.8h, v2.8b, v1.8b      //// a0 + a5 + 20a2 + 20a3                  (column1,row0)
+    umlsl     v20.8h, v14.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1           (column1,row0)
+    umlsl     v20.8h, v5.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column1,row0)
+    mov       v21.d[0], v20.d[1]
+    mov       v28.d[1], v29.d[0]
+    sqrshrun  v27.8b, v28.8h, #2        //// half,half gird set3,4
+    shrn      v28.4h, v22.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    uaddl     v22.8h, v12.8b, v9.8b     //// a0 + a5                                (column2,row0)
+    umlal     v22.8h, v18.8b, v1.8b     //// a0 + a5 + 20a2                        (column2,row0)
+    umlal     v22.8h, v3.8b, v1.8b      //// a0 + a5 + 20a2 + 20a3                (column2,row0)
+    umlsl     v22.8h, v15.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1            (column2,row0)
+    umlsl     v22.8h, v6.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column2,row0)
+    mov       v23.d[0], v22.d[1]
+
+    sqrshrun  v28.8b, v28.8h, #2        //// half,half gird set5
+    ext       v30.8b, v20.8b , v21.8b , #4
+
+    uaddl     v24.8h, v13.8b, v10.8b    //// a0 + a5                                (column3,row0)
+    ext       v29.8b, v20.8b , v21.8b , #6
+    umlal     v24.8h, v19.8b, v1.8b     //// a0 + a5 + 20a2                        (column3,row0)
+    umlal     v24.8h, v4.8b, v1.8b      //// a0 + a5 + 20a2 + 20a3                (column3,row0)
+    umlsl     v24.8h, v16.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1            (column3,row0)
+    umlsl     v24.8h, v7.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column3,row0)
+    mov       v25.d[0], v24.d[1]
+
+    st1       {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+    st1       {v28.h}[0], [x2], x19     //// store 1/2,1,2 grif values
+
+    sqrshrun  v11.8b, v20.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column1,row0)
+    ext       v31.8b, v21.8b , v22.8b , #2
+    sqrshrun  v12.8b, v22.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column2,row0)
+    ext       v28.8b, v20.8b , v21.8b , #2
+
+    saddl     v26.4s, v31.4h, v20.4h    //// a0 + a5                             (set1)
+    ext       v31.8b, v22.8b , v23.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set1)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set1)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set1)
+    smlsl     v26.4s, v21.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set1)
+    ext       v30.8b, v21.8b , v22.8b , #4
+
+    sqrshrun  v13.8b, v24.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column3,row0)
+    ext       v29.8b, v21.8b , v22.8b , #6
+
+    ext       v28.8b, v21.8b , v22.8b , #2
+    saddl     v20.4s, v31.4h, v21.4h    //// a0 + a5                             (set2)
+    smlal     v20.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set2)
+    smlal     v20.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set2)
+    smlsl     v20.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set2)
+    smlsl     v20.4s, v22.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set2)
+    ext       v31.8b, v23.8b , v24.8b , #2
+
+    ext       v11.8b, v11.8b , v12.8b , #2
+    ext       v12.8b, v12.8b , v13.8b , #2
+    ext       v13.8b, v13.8b , v13.8b , #2
+
+    st1       {v11.8b, v12.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+    st1       {v13.h}[0], [x11], x12    //// store row1 - 1,1/2 grid
+
+    ext       v30.8b, v22.8b , v23.8b , #4
+    ext       v29.8b, v22.8b , v23.8b , #6
+
+    saddl     v12.4s, v31.4h, v22.4h    //// a0 + a5                             (set3)
+    ext       v28.8b, v22.8b , v23.8b , #2
+    smlal     v12.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set3)
+    smlal     v12.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set3)
+    smlsl     v12.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set3)
+    smlsl     v12.4s, v23.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set3)
+    ext       v31.8b, v24.8b , v25.8b , #2
+
+    shrn      v21.4h, v20.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set2)
+    ext       v30.8b, v23.8b , v24.8b , #4
+    shrn      v20.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set1)
+    ext       v29.8b, v23.8b , v24.8b , #6
+
+    saddl     v26.4s, v31.4h, v23.4h    //// a0 + a5                             (set4)
+    ext       v28.8b, v23.8b , v24.8b , #2
+    ext       v31.8b, v25.8b , v25.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set4)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set4)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set4)
+    smlsl     v26.4s, v24.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set4)
+    ext       v30.8b, v24.8b , v25.8b , #4
+
+    saddl     v22.4s, v31.4h, v24.4h    //// a0 + a5                             (set5)
+    ext       v29.8b, v24.8b , v25.8b , #6
+
+    ext       v31.8b, v24.8b , v25.8b , #2
+    shrn      v28.4h, v12.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set3)
+
+    ld1       {v11.8b, v12.8b, v13.8b}, [x0], x3 //// Load next Row data
+    smlal     v22.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set5)
+    smlal     v22.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set5)
+    smlsl     v22.4s, v31.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set5)
+    smlsl     v22.4s, v25.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set5)
+    shrn      v29.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set4)
+    mov       v20.d[1], v21.d[0]
+    sqrshrun  v26.8b, v20.8h, #2        //// half,half gird set1,2
+
+
+    ////VQRSHRUN.s16    D27,Q14,#2            ;// half,half gird set3,4
+    ////VSHRN.s32        D28,Q11,#8            ;// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    ////VQRSHRUN.s16    D28,Q14,#2            ;// half,half gird set5
+
+    ////VST1.8        {D26,D27,D28},[x2],x14    ;// store 1/2,1,2 grif values
+    //// ////////////// ROW 5 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+    uaddl     v20.8h, v14.8b, v11.8b    //// a0 + a5                             (column1,row0)
+    movi      v31.8b, #5
+    umlal     v20.8h, v2.8b, v1.8b      //// a0 + a5 + 20a2                         (column1,row0)
+    umlal     v20.8h, v5.8b, v1.8b      //// a0 + a5 + 20a2 + 20a3                  (column1,row0)
+    umlsl     v20.8h, v17.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1           (column1,row0)
+    umlsl     v20.8h, v8.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column1,row0)
+    mov       v21.d[0], v20.d[1]
+    mov       v28.d[1], v29.d[0]
+    sqrshrun  v27.8b, v28.8h, #2        //// half,half gird set3,4
+    shrn      v28.4h, v22.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    uaddl     v22.8h, v15.8b, v12.8b    //// a0 + a5                                (column2,row0)
+    umlal     v22.8h, v3.8b, v1.8b      //// a0 + a5 + 20a2                        (column2,row0)
+    umlal     v22.8h, v6.8b, v1.8b      //// a0 + a5 + 20a2 + 20a3                (column2,row0)
+    umlsl     v22.8h, v18.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1            (column2,row0)
+    umlsl     v22.8h, v9.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column2,row0)
+    mov       v23.d[0], v22.d[1]
+
+    sqrshrun  v28.8b, v28.8h, #2        //// half,half gird set5
+    ext       v30.8b, v20.8b , v21.8b , #4
+
+    uaddl     v24.8h, v16.8b, v13.8b    //// a0 + a5                                (column3,row0)
+    ext       v29.8b, v20.8b , v21.8b , #6
+    umlal     v24.8h, v4.8b, v1.8b      //// a0 + a5 + 20a2                        (column3,row0)
+    umlal     v24.8h, v7.8b, v1.8b      //// a0 + a5 + 20a2 + 20a3                (column3,row0)
+    umlsl     v24.8h, v19.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1            (column3,row0)
+    umlsl     v24.8h, v10.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column3,row0)
+    mov       v25.d[0], v24.d[1]
+
+    st1       {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+    st1       {v28.h}[0], [x2], x19     //// store 1/2,1,2 grif values
+
+    sqrshrun  v14.8b, v20.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column1,row0)
+    ext       v31.8b, v21.8b , v22.8b , #2
+    sqrshrun  v15.8b, v22.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column2,row0)
+    ext       v28.8b, v20.8b , v21.8b , #2
+
+    saddl     v26.4s, v31.4h, v20.4h    //// a0 + a5                             (set1)
+    ext       v31.8b, v22.8b , v23.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set1)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set1)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set1)
+    smlsl     v26.4s, v21.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set1)
+    ext       v30.8b, v21.8b , v22.8b , #4
+
+    sqrshrun  v16.8b, v24.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column3,row0)
+    ext       v29.8b, v21.8b , v22.8b , #6
+
+    ext       v28.8b, v21.8b , v22.8b , #2
+    saddl     v20.4s, v31.4h, v21.4h    //// a0 + a5                             (set2)
+    smlal     v20.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set2)
+    smlal     v20.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set2)
+    smlsl     v20.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set2)
+    smlsl     v20.4s, v22.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set2)
+    ext       v31.8b, v23.8b , v24.8b , #2
+
+    ext       v14.8b, v14.8b , v15.8b , #2
+    ext       v15.8b, v15.8b , v16.8b , #2
+    ext       v16.8b, v16.8b , v16.8b , #2
+
+    st1       {v14.8b, v15.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+    st1       {v16.h}[0], [x11], x12    //// store row1 - 1,1/2 grid
+
+    ext       v30.8b, v22.8b , v23.8b , #4
+    ext       v29.8b, v22.8b , v23.8b , #6
+
+    saddl     v14.4s, v31.4h, v22.4h    //// a0 + a5                             (set3)
+    ext       v28.8b, v22.8b , v23.8b , #2
+    smlal     v14.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set3)
+    smlal     v14.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set3)
+    smlsl     v14.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set3)
+    smlsl     v14.4s, v23.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set3)
+    ext       v31.8b, v24.8b , v25.8b , #2
+
+    shrn      v21.4h, v20.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set2)
+    ext       v30.8b, v23.8b , v24.8b , #4
+    shrn      v20.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set1)
+    ext       v29.8b, v23.8b , v24.8b , #6
+
+    saddl     v26.4s, v31.4h, v23.4h    //// a0 + a5                             (set4)
+    ext       v28.8b, v23.8b , v24.8b , #2
+    ext       v31.8b, v25.8b , v25.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set4)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set4)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set4)
+    smlsl     v26.4s, v24.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set4)
+    ext       v30.8b, v24.8b , v25.8b , #4
+
+    saddl     v22.4s, v31.4h, v24.4h    //// a0 + a5                             (set5)
+    ext       v29.8b, v24.8b , v25.8b , #6
+
+    ext       v31.8b, v24.8b , v25.8b , #2
+    shrn      v28.4h, v14.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set3)
+
+    ld1       {v14.8b, v15.8b, v16.8b}, [x0], x3 //// Load next Row data
+    smlal     v22.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set5)
+    smlal     v22.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set5)
+    smlsl     v22.4s, v31.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set5)
+    smlsl     v22.4s, v25.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set5)
+    shrn      v29.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set4)
+    mov       v20.d[1], v21.d[0]
+    sqrshrun  v26.8b, v20.8h, #2        //// half,half gird set1,2
+
+
+    ////VQRSHRUN.s16    D27,Q14,#2            ;// half,half gird set3,4
+    ////VSHRN.s32        D28,Q11,#8            ;// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    ////VQRSHRUN.s16    D28,Q14,#2            ;// half,half gird set5
+
+    ////VST1.8        {D26,D27,D28},[x2],x14    ;// store 1/2,1,2 grif values
+    //// ////////////// ROW 6 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+
+    cmp       x10, #1                   //// if it 17 rows are complete skip
+    beq       filter_2dvh_skip_row
+    uaddl     v20.8h, v17.8b, v14.8b    //// a0 + a5                             (column1,row0)
+    movi      v31.8b, #5
+    umlal     v20.8h, v5.8b, v1.8b      //// a0 + a5 + 20a2                         (column1,row0)
+    umlal     v20.8h, v8.8b, v1.8b      //// a0 + a5 + 20a2 + 20a3                  (column1,row0)
+    umlsl     v20.8h, v2.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1           (column1,row0)
+    umlsl     v20.8h, v11.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4     (column1,row0)
+    mov       v21.d[0], v20.d[1]
+    mov       v28.d[1], v29.d[0]
+    sqrshrun  v27.8b, v28.8h, #2        //// half,half gird set3,4
+    shrn      v28.4h, v22.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    uaddl     v22.8h, v18.8b, v15.8b    //// a0 + a5                                (column2,row0)
+    umlal     v22.8h, v6.8b, v1.8b      //// a0 + a5 + 20a2                        (column2,row0)
+    umlal     v22.8h, v9.8b, v1.8b      //// a0 + a5 + 20a2 + 20a3                (column2,row0)
+    umlsl     v22.8h, v3.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1            (column2,row0)
+    umlsl     v22.8h, v12.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column2,row0)
+    mov       v23.d[0], v22.d[1]
+
+    sqrshrun  v28.8b, v28.8h, #2        //// half,half gird set5
+    ext       v30.8b, v20.8b , v21.8b , #4
+
+    uaddl     v24.8h, v19.8b, v16.8b    //// a0 + a5                                (column3,row0)
+    ext       v29.8b, v20.8b , v21.8b , #6
+    umlal     v24.8h, v7.8b, v1.8b      //// a0 + a5 + 20a2                        (column3,row0)
+    umlal     v24.8h, v10.8b, v1.8b     //// a0 + a5 + 20a2 + 20a3                (column3,row0)
+    umlsl     v24.8h, v4.8b, v31.8b     //// a0 + a5 + 20a2 + 20a3 - 5a1            (column3,row0)
+    umlsl     v24.8h, v13.8b, v31.8b    //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (column3,row0)
+    mov       v25.d[0], v24.d[1]
+
+    st1       {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+    st1       {v28.h}[0], [x2], x19     //// store 1/2,1,2 grif values
+
+    sqrshrun  v17.8b, v20.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column1,row0)
+    ext       v31.8b, v21.8b , v22.8b , #2
+    sqrshrun  v18.8b, v22.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column2,row0)
+    ext       v28.8b, v20.8b , v21.8b , #2
+
+    saddl     v26.4s, v31.4h, v20.4h    //// a0 + a5                             (set1)
+    ext       v31.8b, v22.8b , v23.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set1)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set1)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set1)
+    smlsl     v26.4s, v21.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set1)
+    ext       v30.8b, v21.8b , v22.8b , #4
+
+    sqrshrun  v19.8b, v24.8h, #5        //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5    (column3,row0)
+    ext       v29.8b, v21.8b , v22.8b , #6
+
+    ext       v28.8b, v21.8b , v22.8b , #2
+    saddl     v20.4s, v31.4h, v21.4h    //// a0 + a5                             (set2)
+    smlal     v20.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set2)
+    smlal     v20.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set2)
+    smlsl     v20.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set2)
+    smlsl     v20.4s, v22.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set2)
+    ext       v31.8b, v23.8b , v24.8b , #2
+
+    ext       v17.8b, v17.8b , v18.8b , #2
+    ext       v18.8b, v18.8b , v19.8b , #2
+    ext       v19.8b, v19.8b , v19.8b , #2
+
+    st1       {v17.8b, v18.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+    st1       {v19.h}[0], [x11], x12    //// store row1 - 1,1/2 grid
+
+    ext       v30.8b, v22.8b , v23.8b , #4
+    ext       v29.8b, v22.8b , v23.8b , #6
+
+    saddl     v18.4s, v31.4h, v22.4h    //// a0 + a5                             (set3)
+    ext       v28.8b, v22.8b , v23.8b , #2
+    smlal     v18.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set3)
+    smlal     v18.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set3)
+    smlsl     v18.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set3)
+    smlsl     v18.4s, v23.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set3)
+    ext       v31.8b, v24.8b , v25.8b , #2
+
+    shrn      v21.4h, v20.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set2)
+    ext       v30.8b, v23.8b , v24.8b , #4
+    shrn      v20.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set1)
+    ext       v29.8b, v23.8b , v24.8b , #6
+
+    saddl     v26.4s, v31.4h, v23.4h    //// a0 + a5                             (set4)
+    ext       v28.8b, v23.8b , v24.8b , #2
+    ext       v31.8b, v25.8b , v25.8b , #2
+    smlal     v26.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set4)
+    smlal     v26.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set4)
+    smlsl     v26.4s, v28.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set4)
+    smlsl     v26.4s, v24.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set4)
+    ext       v30.8b, v24.8b , v25.8b , #4
+
+    saddl     v22.4s, v31.4h, v24.4h    //// a0 + a5                             (set5)
+    ext       v29.8b, v24.8b , v25.8b , #6
+
+    ext       v31.8b, v24.8b , v25.8b , #2
+    shrn      v28.4h, v18.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set3)
+
+    ld1       {v17.8b, v18.8b, v19.8b}, [x0], x3 //// Load next Row data
+    smlal     v22.4s, v30.4h, v0.4h[1]  //// a0 + a5 + 20a2                         (set5)
+    smlal     v22.4s, v29.4h, v0.4h[1]  //// a0 + a5 + 20a2 + 20a3                  (set5)
+    smlsl     v22.4s, v31.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1          (set5)
+    smlsl     v22.4s, v25.4h, v0.4h[0]  //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4    (set5)
+    shrn      v29.4h, v26.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set4)
+    mov       v20.d[1], v21.d[0]
+    sqrshrun  v26.8b, v20.8h, #2        //// half,half gird set1,2
+
+    mov       v28.d[1], v29.d[0]
+    sqrshrun  v27.8b, v28.8h, #2        //// half,half gird set3,4
+    shrn      v28.4h, v22.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    sqrshrun  v28.8b, v28.8h, #2        //// half,half gird set5
+
+    st1       {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+    st1       {v28.h}[0], [x2], x19     //// store 1/2,1,2 grif values
+
+    subs      x10, x10, #1              ////decrement loop counter
+
+    bne       filter_2dvh_loop
+
+
+//// Process first vertical interpolated row
+//// each column is
+    //// ////////////// ROW 13 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+
+    // LDMFD sp!,{x10,x11,x12,pc}
+    ldp       x19, x20, [sp], #16
+    pop_v_regs
+    ret
+
+filter_2dvh_skip_row:
+    mov       v28.d[1], v29.d[0]
+    sqrshrun  v27.8b, v28.8h, #2        //// half,half gird set3,4
+    shrn      v28.4h, v22.4s, #8        //// shift by 8 and later we will shift by 2 more with rounding     (set5)
+
+    sqrshrun  v28.8b, v28.8h, #2        //// half,half gird set5
+
+    st1       {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+    st1       {v28.h}[0], [x2], x19     //// store 1/2,1,2 grif values
+    // LDMFD sp!,{x10,x11,x12,pc}
+    ldp       x19, x20, [sp], #16
+    pop_v_regs
+    ret
+
+
+///*****************************************
+
+
+
+
+
+
+    .section .note.gnu-stack,"",%progbits
diff --git a/encoder/armv8/ih264e_platform_macros.h b/encoder/armv8/ih264e_platform_macros.h
new file mode 100755
index 0000000..39cac96
--- /dev/null
+++ b/encoder/armv8/ih264e_platform_macros.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264e_platform_macros.h
+*
+* @brief
+*  Contains platform specific routines used for codec context intialization
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/armv8/ime_distortion_metrics_av8.s b/encoder/armv8/ime_distortion_metrics_av8.s
new file mode 100755
index 0000000..99ebc8a
--- /dev/null
+++ b/encoder/armv8/ime_distortion_metrics_av8.s
@@ -0,0 +1,978 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+//**
+
+///**
+//******************************************************************************
+//*
+//*
+//* @brief
+//*  This file contains definitions of routines that compute distortion
+//*  between two macro/sub blocks of identical dimensions
+//*
+//* @author
+//*  Ittiam
+//*
+//* @par List of Functions:
+//*  - ime_compute_sad_16x16()
+//*  - ime_compute_sad_8x8()
+//*  - ime_compute_sad_4x4()
+//*  - ime_compute_sad_16x8()
+//*  - ime_compute_satqd_16x16_lumainter_av8()
+//*
+//* @remarks
+//*  None
+//*
+//*******************************************************************************
+//
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+//*
+//* @par   Description
+//*   This functions computes SAD between 2 16x16 blocks. There is a provision
+//*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//*  UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//*  UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//*  integer source stride
+//*
+//* @param[in] dst_strd
+//*  integer destination stride
+//*
+//* @param[in] i4_max_sad
+//*  integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//*  integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+.text
+.p2align 2
+
+.macro push_v_regs
+    stp       d8, d9, [sp, #-16]!
+    stp       d10, d11, [sp, #-16]!
+    stp       d12, d13, [sp, #-16]!
+    stp       d14, d15, [sp, #-16]!
+.endm
+.macro pop_v_regs
+    ldp       d14, d15, [sp], #16
+    ldp       d12, d13, [sp], #16
+    ldp       d10, d11, [sp], #16
+    ldp       d8, d9, [sp], #16
+.endm
+
+    .global ime_compute_sad_16x16_fast_av8
+ime_compute_sad_16x16_fast_av8:
+    push_v_regs
+    lsl       x2, x2, #1
+    lsl       x3, x3, #1
+
+    mov       x6, #2
+    movi      v30.8h, #0
+
+core_loop_ime_compute_sad_16x16_fast_av8:
+
+    ld1       {v0.16b}, [x0], x2
+    ld1       {v1.16b}, [x1], x3
+    ld1       {v2.16b}, [x0], x2
+    ld1       {v3.16b}, [x1], x3
+
+    uabal     v30.8h, v0.8b, v1.8b
+    uabal2    v30.8h, v0.16b, v1.16b
+
+    uabal     v30.8h, v2.8b, v3.8b
+    uabal2    v30.8h, v2.16b, v3.16b
+
+    ld1       {v4.16b}, [x0], x2
+    ld1       {v5.16b}, [x1], x3
+    ld1       {v6.16b}, [x0], x2
+    ld1       {v7.16b}, [x1], x3
+
+    uabal     v30.8h, v4.8b, v5.8b
+    uabal2    v30.8h, v4.16b, v5.16b
+
+    uabal     v30.8h, v6.8b, v7.8b
+    uabal2    v30.8h, v6.16b, v7.16b
+
+    subs      x6, x6, #1
+    bne       core_loop_ime_compute_sad_16x16_fast_av8
+
+
+    addp      v30.8h, v30.8h, v30.8h
+    uaddlp    v30.4s, v30.8h
+    addp      v30.2s, v30.2s, v30.2s
+    shl       v30.2s, v30.2s, #1
+
+    st1       {v30.s}[0], [x5]
+    pop_v_regs
+    ret
+
+
+///**
+//******************************************************************************
+//*
+//*  @brief computes distortion (SAD) between 2 16x8  blocks
+//*
+//*
+//*  @par   Description
+//*   This functions computes SAD between 2 16x8 blocks. There is a provision
+//*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//*  UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//*  UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//*  integer source stride
+//*
+//* @param[in] dst_strd
+//*  integer destination stride
+//*
+//* @param[in] u4_max_sad
+//*  integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//*  integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+//
+    .global ime_compute_sad_16x8_av8
+ime_compute_sad_16x8_av8:
+
+    //chheck what stride incremtn to use
+    //earlier code did not have this lsl
+    push_v_regs
+    mov       x6, #2
+    movi      v30.8h, #0
+
+core_loop_ime_compute_sad_16x8_av8:
+
+    ld1       {v0.16b}, [x0], x2
+    ld1       {v1.16b}, [x1], x3
+    ld1       {v2.16b}, [x0], x2
+    ld1       {v3.16b}, [x1], x3
+
+    uabal     v30.8h, v0.8b, v1.8b
+    uabal2    v30.8h, v0.16b, v1.16b
+
+    uabal     v30.8h, v2.8b, v3.8b
+    uabal2    v30.8h, v2.16b, v3.16b
+
+    ld1       {v4.16b}, [x0], x2
+    ld1       {v5.16b}, [x1], x3
+    ld1       {v6.16b}, [x0], x2
+    ld1       {v7.16b}, [x1], x3
+
+    uabal     v30.8h, v4.8b, v5.8b
+    uabal2    v30.8h, v4.16b, v5.16b
+
+    uabal     v30.8h, v6.8b, v7.8b
+    uabal2    v30.8h, v6.16b, v7.16b
+
+    subs      x6, x6, #1
+    bne       core_loop_ime_compute_sad_16x8_av8
+
+
+    addp      v30.8h, v30.8h, v30.8h
+    uaddlp    v30.4s, v30.8h
+    addp      v30.2s, v30.2s, v30.2s
+
+    st1       {v30.s}[0], [x5]
+    pop_v_regs
+    ret
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x16 blocks with early exit
+//*
+//* @par   Description
+//*   This functions computes SAD between 2 16x16 blocks. There is a provision
+//*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//*  UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//*  UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//*  integer source stride
+//*
+//* @param[in] dst_strd
+//*  integer destination stride
+//*
+//* @param[in] i4_max_sad
+//*  integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//*  integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+
+    .global ime_compute_sad_16x16_ea8_av8
+ime_compute_sad_16x16_ea8_av8:
+
+    push_v_regs
+    movi      v30.8h, #0
+
+    add       x7, x0, x2
+    add       x8, x1, x3
+
+    lsl       x2, x2, #1
+    lsl       x3, x3, #1
+
+    ld1       {v0.16b}, [x0], x2
+    ld1       {v1.16b}, [x1], x3
+    ld1       {v2.16b}, [x0], x2
+    ld1       {v3.16b}, [x1], x3
+    ld1       {v8.16b}, [x0], x2
+    ld1       {v9.16b}, [x1], x3
+    ld1       {v10.16b}, [x0], x2
+    ld1       {v11.16b}, [x1], x3
+    ld1       {v12.16b}, [x0], x2
+    ld1       {v13.16b}, [x1], x3
+    ld1       {v14.16b}, [x0], x2
+    ld1       {v15.16b}, [x1], x3
+    ld1       {v16.16b}, [x0], x2
+    ld1       {v17.16b}, [x1], x3
+    ld1       {v18.16b}, [x0], x2
+    ld1       {v19.16b}, [x1], x3
+
+    uabal     v30.8h, v0.8b, v1.8b
+    uabal2    v30.8h, v0.16b, v1.16b
+
+    uabal     v30.8h, v2.8b, v3.8b
+    uabal2    v30.8h, v2.16b, v3.16b
+
+    uabal     v30.8h, v8.8b, v9.8b
+    uabal2    v30.8h, v8.16b, v9.16b
+
+    uabal     v30.8h, v10.8b, v11.8b
+    uabal2    v30.8h, v10.16b, v11.16b
+
+    uabal     v30.8h, v12.8b, v13.8b
+    uabal2    v30.8h, v12.16b, v13.16b
+
+    uabal     v30.8h, v14.8b, v15.8b
+    uabal2    v30.8h, v14.16b, v15.16b
+
+    uabal     v30.8h, v16.8b, v17.8b
+    uabal2    v30.8h, v16.16b, v17.16b
+
+    uabal     v30.8h, v18.8b, v19.8b
+    uabal2    v30.8h, v18.16b, v19.16b
+
+    addp      v31.8h, v30.8h, v30.8h
+    uaddlp    v31.4s, v31.8h
+    addp      v31.2s, v31.2s, v31.2s
+    mov       w6, v31.s[0]
+    cmp       w6, w4
+    bgt       end_func_16x16
+
+    //do the stuff again
+    ld1       {v0.16b}, [x7], x2
+    ld1       {v1.16b}, [x8], x3
+    ld1       {v2.16b}, [x7], x2
+    ld1       {v3.16b}, [x8], x3
+    ld1       {v8.16b}, [x7], x2
+    ld1       {v9.16b}, [x8], x3
+    ld1       {v10.16b}, [x7], x2
+    ld1       {v11.16b}, [x8], x3
+    ld1       {v12.16b}, [x7], x2
+    ld1       {v13.16b}, [x8], x3
+    ld1       {v14.16b}, [x7], x2
+    ld1       {v15.16b}, [x8], x3
+    ld1       {v16.16b}, [x7], x2
+    ld1       {v17.16b}, [x8], x3
+    ld1       {v18.16b}, [x7], x2
+    ld1       {v19.16b}, [x8], x3
+
+    uabal     v30.8h, v0.8b, v1.8b
+    uabal2    v30.8h, v0.16b, v1.16b
+
+    uabal     v30.8h, v2.8b, v3.8b
+    uabal2    v30.8h, v2.16b, v3.16b
+
+    uabal     v30.8h, v8.8b, v9.8b
+    uabal2    v30.8h, v8.16b, v9.16b
+
+    uabal     v30.8h, v10.8b, v11.8b
+    uabal2    v30.8h, v10.16b, v11.16b
+
+    uabal     v30.8h, v12.8b, v13.8b
+    uabal2    v30.8h, v12.16b, v13.16b
+
+    uabal     v30.8h, v14.8b, v15.8b
+    uabal2    v30.8h, v14.16b, v15.16b
+
+    uabal     v30.8h, v16.8b, v17.8b
+    uabal2    v30.8h, v16.16b, v17.16b
+
+    uabal     v30.8h, v18.8b, v19.8b
+    uabal2    v30.8h, v18.16b, v19.16b
+
+    addp      v31.8h, v30.8h, v30.8h
+    uaddlp    v31.4s, v31.8h
+    addp      v31.2s, v31.2s, v31.2s
+
+end_func_16x16:
+    st1       {v31.s}[0], [x5]
+    pop_v_regs
+    ret
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      : ime_calculate_sad2_prog_av8()
+////
+//// Detail Description : This function find the sad values of 4 Progressive MBs
+////                        at one shot
+////
+//// Platform           : CortexAv8/NEON            .
+////
+////-----------------------------------------------------------------------------
+//*/
+
+    .global ime_calculate_sad2_prog_av8
+ime_calculate_sad2_prog_av8:
+
+    // x0    = ref1     <UWORD8 *>
+    // x1    = ref2     <UWORD8 *>
+    // x2    = src     <UWORD8 *>
+    // x3    = RefBufferWidth <UWORD32>
+    // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+    push_v_regs
+    mov       x6, #8
+    movi      v30.8h, #0
+    movi      v31.8h, #0
+
+core_loop_ime_calculate_sad2_prog_av8:
+
+    ld1       {v0.16b}, [x0], x3
+    ld1       {v1.16b}, [x1], x3
+    ld1       {v2.16b}, [x3], x4
+
+    ld1       {v3.16b}, [x0], x3
+    ld1       {v4.16b}, [x1], x3
+    ld1       {v5.16b}, [x3], x4
+
+
+    uabal     v30.8h, v0.8b, v2.8b
+    uabal2    v30.8h, v0.16b, v2.16b
+    uabal     v31.8h, v1.8b, v2.8b
+    uabal2    v31.8h, v1.16b, v2.16b
+
+    uabal     v30.8h, v3.8b, v5.8b
+    uabal2    v30.8h, v3.16b, v5.16b
+    uabal     v31.8h, v4.8b, v5.8b
+    uabal2    v31.8h, v4.16b, v5.16b
+
+
+    ld1       {v6.16b}, [x0], x3
+    ld1       {v7.16b}, [x1], x3
+    ld1       {v8.16b}, [x3], x4
+
+    ld1       {v9.16b}, [x0], x3
+    ld1       {v10.16b}, [x1], x3
+    ld1       {v11.16b}, [x3], x4
+
+    uabal     v30.8h, v6.8b, v8.8b
+    uabal2    v30.8h, v6.16b, v8.16b
+    uabal     v31.8h, v7.8b, v8.8b
+    uabal2    v31.8h, v7.16b, v8.16b
+
+    uabal     v30.8h, v9.8b, v11.8b
+    uabal2    v30.8h, v9.16b, v11.16b
+    uabal     v31.8h, v10.8b, v11.8b
+    uabal2    v31.8h, v0.16b, v11.16b
+
+    subs      x6, x6, #1
+    bne       core_loop_ime_calculate_sad2_prog_av8
+
+    addp      v30.8h, v30.8h, v31.8h
+    uaddlp    v30.4s, v30.8h
+    addp      v30.2s, v30.2s, v30.2s
+    shl       v30.2s, v30.2s, #1
+
+    st1       {v30.2s}, [x5]
+    pop_v_regs
+    ret
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      : Calculate_Mad3_prog()
+////
+//// Detail Description : This function find the sad values of 4 Progressive MBs
+////                        at one shot
+////
+//// Platform           : CortexA8/NEON            .
+////
+////-----------------------------------------------------------------------------
+//*/
+
+    .global ime_calculate_sad3_prog_av8
+ime_calculate_sad3_prog_av8:
+
+    // x0    = ref1     <UWORD8 *>
+    // x1    = ref2     <UWORD8 *>
+    // x2    = ref3     <UWORD8 *>
+    // x3    = src     <UWORD8 *>
+    // stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *>
+
+
+    // x0    = ref1     <UWORD8 *>
+    // x1    = ref2     <UWORD8 *>
+    // x2    = src     <UWORD8 *>
+    // x3    = RefBufferWidth <UWORD32>
+    // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+    push_v_regs
+    mov       x6, #16
+    movi      v29.8h, #0
+    movi      v30.8h, #0
+    movi      v31.8h, #0
+
+core_loop_ime_calculate_sad3_prog_av8:
+
+    ld1       {v0.16b}, [x0], x4
+    ld1       {v1.16b}, [x1], x4
+    ld1       {v2.16b}, [x2], x4
+    ld1       {v3.16b}, [x3], x5
+
+    uabal     v29.8h, v0.8b, v3.8b
+    uabal2    v29.8h, v0.16b, v3.16b
+    uabal     v30.8h, v1.8b, v3.8b
+    uabal2    v30.8h, v1.16b, v3.16b
+    uabal     v31.8h, v2.8b, v3.8b
+    uabal2    v31.8h, v2.16b, v3.16b
+
+    ld1       {v4.16b}, [x0], x4
+    ld1       {v5.16b}, [x1], x4
+    ld1       {v6.16b}, [x2], x4
+    ld1       {v7.16b}, [x3], x5
+
+    uabal     v29.8h, v4.8b, v7.8b
+    uabal2    v29.8h, v4.16b, v7.16b
+    uabal     v30.8h, v5.8b, v7.8b
+    uabal2    v30.8h, v5.16b, v7.16b
+    uabal     v31.8h, v6.8b, v7.8b
+    uabal2    v31.8h, v6.16b, v7.16b
+
+    subs      x6, x6, #1
+    bne       core_loop_ime_calculate_sad2_prog_av8
+
+    addp      v30.8h, v30.8h, v31.8h
+    uaddlp    v30.4s, v30.8h
+    addp      v30.2s, v30.2s, v30.2s
+    shl       v30.2s, v30.2s, #1
+
+    st1       {v30.2s}, [x5]
+    pop_v_regs
+    ret
+
+
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) for sub-pel motion estimation
+//*
+//* @par   Description
+//*   This functions computes SAD for all the 8 half pel points
+//*
+//* @param[out] pi4_sad
+//*  integer evaluated sad
+//*  pi4_sad[0] - half x
+//*  pi4_sad[1] - half x - 1
+//*  pi4_sad[2] - half y
+//*  pi4_sad[3] - half y - 1
+//*  pi4_sad[4] - half xy
+//*  pi4_sad[5] - half xy - 1
+//*  pi4_sad[6] - half xy - strd
+//*  pi4_sad[7] - half xy - 1 - strd
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+
+.text
+.p2align 2
+
+    .global ime_sub_pel_compute_sad_16x16_av8
+ime_sub_pel_compute_sad_16x16_av8:
+    push_v_regs
+    sub       x7, x1, #1                //x left
+    sub       x8, x2, x5                //y top
+    sub       x9, x3, #1                //xy  left
+    sub       x10, x3, x5               //xy top
+    sub       x11, x10, #1              //xy top left
+
+    movi      v24.8h, #0
+    movi      v25.8h, #0
+    movi      v26.8h, #0
+    movi      v27.8h, #0
+    movi      v28.8h, #0
+    movi      v29.8h, #0
+    movi      v30.8h, #0
+    movi      v31.8h, #0
+
+    mov       x12, #16
+core_loop_ime_sub_pel_compute_sad_16x16_av8:
+
+    ld1       {v0.16b}, [x0], x4        //src
+    ld1       {v1.16b}, [x1], x5        //x
+    ld1       {v2.16b}, [x7], x5        //x left
+    ld1       {v3.16b}, [x2], x5        //y
+    ld1       {v9.16b}, [x8], x5        //y top
+    ld1       {v10.16b}, [x3], x5       //xy
+    ld1       {v11.16b}, [x9], x5       //xy left
+    ld1       {v12.16b}, [x10], x5      //xy top
+    ld1       {v13.16b}, [x11], x5      //xy top left
+
+    uabal     v24.8h, v0.8b, v1.8b
+    uabal2    v24.8h, v0.16b, v1.16b
+    uabal     v25.8h, v0.8b, v2.8b
+    uabal2    v25.8h, v0.16b, v2.16b
+    uabal     v26.8h, v0.8b, v3.8b
+    uabal2    v26.8h, v0.16b, v3.16b
+    uabal     v27.8h, v0.8b, v9.8b
+    uabal2    v27.8h, v0.16b, v9.16b
+    uabal     v28.8h, v0.8b, v10.8b
+    uabal2    v28.8h, v0.16b, v10.16b
+    uabal     v29.8h, v0.8b, v11.8b
+    uabal2    v29.8h, v0.16b, v11.16b
+    uabal     v30.8h, v0.8b, v12.8b
+    uabal2    v30.8h, v0.16b, v12.16b
+    uabal     v31.8h, v0.8b, v13.8b
+    uabal2    v31.8h, v0.16b, v13.16b
+
+    subs      x12, x12, #1
+    bne       core_loop_ime_sub_pel_compute_sad_16x16_av8
+
+    addp      v24.8h, v24.8h, v25.8h
+    addp      v26.8h, v26.8h, v27.8h
+    addp      v28.8h, v28.8h, v29.8h
+    addp      v30.8h, v30.8h, v31.8h
+
+    uaddlp    v24.4s, v24.8h
+    uaddlp    v26.4s, v26.8h
+    uaddlp    v28.4s, v28.8h
+    uaddlp    v30.4s, v30.8h
+
+    addp      v24.4s, v24.4s, v26.4s
+    addp      v25.4s, v28.4s, v30.4s
+
+    st1       {v24.4s-v25.4s}, [x6]
+
+
+    pop_v_regs
+    ret
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x16 blocks
+//*
+//* @par   Description
+//*   This functions computes SAD between 2 16x16 blocks. There is a provision
+//*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//*  UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//*  UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//*  integer source stride
+//*
+//* @param[in] dst_strd
+//*  integer destination stride
+//*
+//* @param[in] i4_max_sad
+//*  integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//*  integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+    .global ime_compute_sad_16x16_av8
+ime_compute_sad_16x16_av8:
+    push_v_regs
+    mov       x6, #4
+    movi      v30.8h, #0
+
+core_loop_ime_compute_sad_16x16_av8:
+
+    ld1       {v0.16b}, [x0], x2
+    ld1       {v1.16b}, [x1], x3
+    ld1       {v2.16b}, [x0], x2
+    ld1       {v3.16b}, [x1], x3
+
+    uabal     v30.8h, v0.8b, v1.8b
+    uabal2    v30.8h, v0.16b, v1.16b
+
+    uabal     v30.8h, v2.8b, v3.8b
+    uabal2    v30.8h, v2.16b, v3.16b
+
+    ld1       {v4.16b}, [x0], x2
+    ld1       {v5.16b}, [x1], x3
+    ld1       {v6.16b}, [x0], x2
+    ld1       {v7.16b}, [x1], x3
+
+    uabal     v30.8h, v4.8b, v5.8b
+    uabal2    v30.8h, v4.16b, v5.16b
+
+    uabal     v30.8h, v6.8b, v7.8b
+    uabal2    v30.8h, v6.16b, v7.16b
+
+    subs      x6, x6, #1
+    bne       core_loop_ime_compute_sad_16x16_av8
+
+
+    addp      v30.8h, v30.8h, v30.8h
+    uaddlp    v30.4s, v30.8h
+    addp      v30.2s, v30.2s, v30.2s
+
+    st1       {v30.s}[0], [x5]
+    pop_v_regs
+    ret
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      : Calculate_Mad4_prog()
+////
+//// Detail Description : This function find the sad values of 4 Progressive MBs
+////                        at one shot
+////
+//// Platform           : CortexA8/NEON            .
+////
+////-----------------------------------------------------------------------------
+//*/
+
+    .global ime_calculate_sad4_prog_av8
+ime_calculate_sad4_prog_av8:
+    push_v_regs
+    sub       x5, x0, #1                //left
+    add       x6, x0, #1                //right
+    sub       x7, x0, x2                //top
+    add       x8, x0, x2                //bottom
+
+    movi      v28.8h, #0
+    movi      v29.8h, #0
+    movi      v30.8h, #0
+    movi      v31.8h, #0
+
+    mov       x9, #16
+core_loop_ime_calculate_sad4_prog_av8:
+
+    ld1       {v0.16b}, [x1], x3
+    ld1       {v1.16b}, [x5], x2
+    ld1       {v2.16b}, [x6], x2
+    ld1       {v3.16b}, [x7], x2
+    ld1       {v9.16b}, [x8], x2
+
+    uabal     v28.8h, v0.8b, v1.8b
+    uabal2    v28.8h, v0.16b, v1.16b
+    uabal     v29.8h, v0.8b, v2.8b
+    uabal2    v29.8h, v0.16b, v2.16b
+    uabal     v30.8h, v0.8b, v3.8b
+    uabal2    v30.8h, v0.16b, v3.16b
+    uabal     v31.8h, v0.8b, v9.8b
+    uabal2    v31.8h, v0.16b, v9.16b
+
+    subs      x9, x9, #1
+    bne       core_loop_ime_calculate_sad4_prog_av8
+
+    addp      v28.8h, v28.8h, v29.8h
+    addp      v30.8h, v30.8h, v31.8h
+
+    uaddlp    v28.4s, v28.8h
+    uaddlp    v30.4s, v30.8h
+
+    addp      v28.4s, v28.4s, v30.4s
+    st1       {v28.4s}, [x4]
+    pop_v_regs
+    ret
+
+
+
+//*****************************************************************************
+//*
+//* Function Name         : ime_compute_satqd_16x16_lumainter_av8
+//* Description           : This fucntion computes SAD for a 16x16 block.
+//                        : It also computes if any 4x4 block will have a nonzero coefficent after transform and quant
+//
+//  Arguments             :   x0 :pointer to src buffer
+//                            x1 :pointer to est buffer
+//                            x2 :source stride
+//                            x3 :est stride
+//                            STACk :Threshold,distotion,is_nonzero
+//*
+//* Values Returned   : NONE
+//*
+//* Register Usage    : x0-x11
+//* Stack Usage       :
+//* Cycles            : Around
+//* Interruptiaility  : Interruptable
+//*
+//* Known Limitations
+//*   \Assumptions    :
+//*
+//* Revision History  :
+//*         DD MM YYYY    Author(s)           Changes
+//*         14 04 2014    Harinarayanan K K  First version
+//*
+//*****************************************************************************
+    .global ime_compute_satqd_16x16_lumainter_av8
+ime_compute_satqd_16x16_lumainter_av8:
+    //x0 :pointer to src buffer
+    //x1 :pointer to est buffer
+    //x2 :Source stride
+    //x3 :Pred stride
+    //x4 :Threshold pointer
+    //x5 :Distortion,ie SAD
+    //x6 :is nonzero
+    //x7 :loop counter
+    push_v_regs
+    stp       d8, d9, [sp, #-16]!
+    stp       d10, d11, [sp, #-16]!
+    stp       d12, d13, [sp, #-16]!
+    stp       d14, d15, [sp, #-16]!
+
+    ld1       {v30.8h}, [x4]
+
+    dup       v20.4h, v30.h[1]          //ls1
+    dup       v24.4h, v30.h[0]          //ls2
+    dup       v21.4h, v30.h[5]          //ls3
+    dup       v25.4h, v30.h[7]          //ls4
+    dup       v22.4h, v30.h[3]          //ls5
+    dup       v26.4h, v30.h[4]          //ls6
+    dup       v23.4h, v30.h[6]          //ls7
+    dup       v27.4h, v30.h[2]          //ls8
+
+    mov       v20.d[1], v24.d[0]
+    mov       v21.d[1], v25.d[0]
+    mov       v22.d[1], v26.d[0]
+    mov       v23.d[1], v27.d[0]
+
+    add       x4, x4, #16
+    ld1       {v29.h}[0], [x4]
+    dup       v29.4h, v29.h[0]
+
+    movi      v31.8h, #0
+
+    mov       x7, #4
+core_loop_satqd_ime_compute_satqd_16x16_lumainter:
+    ld1       {v0.16b}, [x0], x2
+    ld1       {v1.16b}, [x1], x3
+    ld1       {v2.16b}, [x0], x2
+    ld1       {v3.16b}, [x1], x3
+    ld1       {v4.16b}, [x0], x2
+    ld1       {v5.16b}, [x1], x3
+    ld1       {v6.16b}, [x0], x2
+    ld1       {v7.16b}, [x1], x3
+
+    uabdl     v10.8h, v0.8b, v1.8b
+    uabdl2    v15.8h, v0.16b, v1.16b
+    uabdl     v11.8h, v2.8b, v3.8b
+    uabdl2    v16.8h, v2.16b, v3.16b
+    uabdl     v12.8h, v4.8b, v5.8b
+    uabdl2    v17.8h, v4.16b, v5.16b
+    uabdl     v13.8h, v6.8b, v7.8b
+    uabdl2    v18.8h, v6.16b, v7.16b
+
+    add       v0.8h, v10.8h, v13.8h
+    add       v1.8h, v11.8h, v12.8h
+    add       v2.8h, v15.8h, v18.8h
+    add       v3.8h, v16.8h, v17.8h
+
+    //v0 : S1     S4     S4     S1        A1    A4    A4    A1
+    //v1 : S2     S3     S3     S2        A2    A3    A3    A2
+    //v2 : B1     B4     B4     B1        X1    X4    X4    X1
+    //v3 : B3     B2     B2     B3        X3    X2    X2    X3
+
+    trn1      v4.8h, v0.8h, v1.8h
+    trn2      v5.8h, v0.8h, v1.8h
+    trn1      v6.8h, v2.8h, v3.8h
+    trn2      v7.8h, v2.8h, v3.8h
+
+    trn1      v0.4s, v4.4s, v6.4s
+    trn2      v2.4s, v4.4s, v6.4s
+    trn1      v1.4s, v5.4s, v7.4s
+    trn2      v3.4s, v5.4s, v7.4s
+
+    add       v4.8h, v0.8h, v3.8h
+    add       v5.8h, v1.8h, v2.8h
+    //v4 : S1     S2     B1     B2      A1    A2    X1    X2
+    //v5 : S4     S3     B4     B3      A4    A3    X4    X3
+
+    //compute sad for each 4x4 block
+    add       v6.8h, v4.8h, v5.8h
+    addp      v19.8h, v6.8h, v6.8h
+    //duplicate the sad into 128 bit so that we can compare using 128bit
+    add       v31.4h, v31.4h, v19.4h
+
+    //sad_2 = sad_1<<1;
+    shl       v28.8h, v19.8h, #1
+
+    //sad_2 - pu2_thrsh
+    sub       v24.8h, v28.8h, v20.8h
+    sub       v25.8h, v28.8h, v21.8h
+    sub       v26.8h, v28.8h, v22.8h
+    sub       v27.8h, v28.8h, v23.8h
+
+    trn1      v0.4s, v4.4s, v5.4s
+    trn2      v1.4s, v4.4s, v5.4s
+    //v0 : S1     S2     S4     S3      A1    A2    A4    A3
+    //v1 : B1     B2     B4     B3      X1    X2    X4    X3
+
+    trn1      v4.8h, v0.8h, v1.8h
+    trn2      v5.8h, v0.8h, v1.8h
+    //v4 : S1     B1     S4     B4      A1    X1    A4    X4
+    //v5 : S2     B2     S3     B3      A2    X2    A3    X3
+
+    mov       v7.s[0], v4.s[1]
+    mov       v7.s[1], v4.s[3]
+    mov       v6.s[0], v5.s[1]          // V4 //S1 B1 A1 X1
+    mov       v6.s[1], v5.s[3]          // V5 //S2 B2 A2 X2
+    mov       v4.s[1], v4.s[2]          // V6 //S3 B3 A3 X3
+    mov       v5.s[1], v5.s[2]          // V7 //S4 B4 A4 X4
+
+    shl       v0.4h, v4.4h, #1          //S1<<1
+    shl       v1.4h, v5.4h, #1          //S2<<1
+    shl       v2.4h, v6.4h, #1          //S3<<1
+    shl       v3.4h, v7.4h, #1          //S4<<1
+
+    add       v8.4h, v5.4h, v6.4h       //(s2[j] + s3[j]))
+    add       v9.4h, v4.4h, v7.4h       //(s1[j] + s4[j]))
+    add       v10.4h, v6.4h, v7.4h      //(s3[j] + s4[j]))
+    sub       v11.4h, v6.4h, v0.4h      //(s3[j] - (s1[j]<<1))
+    sub       v12.4h, v7.4h, v1.4h      //(s4[j] - (s2[j]<<1))
+    add       v13.4h, v4.4h, v5.4h      //(s1[j] + s2[j]))
+    sub       v14.4h, v5.4h, v3.4h      //(s2[j] - (s4[j]<<1)))
+    sub       v15.4h, v4.4h, v2.4h      //(s1[j] - (s3[j]<<1)))
+
+    mov       v8.d[1], v9.d[0]
+    mov       v10.d[1], v11.d[0]
+    mov       v12.d[1], v13.d[0]
+    mov       v14.d[1], v15.d[0]
+
+    cmge      v0.8h, v24.8h, v8.8h      //ls1 ls2
+    cmge      v1.8h, v25.8h, v10.8h     //ls3 ls4
+    cmge      v2.8h, v26.8h, v12.8h     //ls5 ls6
+    cmge      v3.8h, v27.8h, v14.8h     //ls7 ls8
+    cmge      v4.4h, v19.4h, v29.4h     //sad
+
+    orr       v0.16b, v0.16b, v1.16b
+    orr       v2.16b, v2.16b, v3.16b
+    orr       v2.16b, v0.16b, v2.16b
+    xtn       v2.8b, v2.8h
+    orr       v2.8b, v2.8b, v4.8b
+
+    //if the comparison is non zero, out
+    mov       x4, v2.d[0]
+    cmp       x4, #0
+    bne       core_loop_compute_sad_pre
+
+    subs      x7, x7, #1
+    bne       core_loop_satqd_ime_compute_satqd_16x16_lumainter
+    b         satdq_end_func
+
+
+core_loop_compute_sad:
+    ld1       {v0.16b}, [x0], x2
+    ld1       {v1.16b}, [x1], x3
+    ld1       {v2.16b}, [x0], x2
+    ld1       {v3.16b}, [x1], x3
+
+    uabal     v31.8h, v0.8b, v1.8b
+    uabal2    v31.8h, v0.16b, v1.16b
+
+    uabal     v31.8h, v2.8b, v3.8b
+    uabal2    v31.8h, v2.16b, v3.16b
+
+    ld1       {v4.16b}, [x0], x2
+    ld1       {v5.16b}, [x1], x3
+    ld1       {v6.16b}, [x0], x2
+    ld1       {v7.16b}, [x1], x3
+
+    uabal     v31.8h, v4.8b, v5.8b
+    uabal2    v31.8h, v4.16b, v5.16b
+
+    uabal     v31.8h, v6.8b, v7.8b
+    uabal2    v31.8h, v6.16b, v7.16b
+
+core_loop_compute_sad_pre:
+    subs      x7, x7, #1
+    bne       core_loop_compute_sad
+
+satdq_end_func:
+
+    mov       x7, #1
+    cmp       x4, #0
+    csel      x7, x4, x7, eq
+    str       w7, [x6]
+
+    addp      v31.8h, v31.8h, v31.8h
+    uaddlp    v31.4s, v31.8h
+    addp      v31.2s, v31.2s, v31.2s
+    st1       {v31.s}[0], [x5]
+
+
+    ldp       d14, d15, [sp], #16
+    ldp       d12, d13, [sp], #16
+    ldp       d10, d11, [sp], #16
+    ldp       d8, d9, [sp], #16
+    pop_v_regs
+    ret
+    .section .note.gnu-stack,"",%progbits
diff --git a/encoder/armv8/ime_platform_macros.h b/encoder/armv8/ime_platform_macros.h
new file mode 100755
index 0000000..0f5b2f2
--- /dev/null
+++ b/encoder/armv8/ime_platform_macros.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ime_platform_macros.h
+*
+* @brief
+*  Platform specific Macro definitions used in the codec
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions                                                */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+                sad +=  ABS(src[0]-est[0]) + \
+                ABS(src[1]-est[1]) + \
+                ABS(src[2]-est[2]) + \
+                ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/encoder/ih264e.h b/encoder/ih264e.h
new file mode 100755
index 0000000..15a9d8f
--- /dev/null
+++ b/encoder/ih264e.h
@@ -0,0 +1,620 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : ih264e.h                                    */
+/*                                                                           */
+/*  Description       : This file contains all the necessary structure and   */
+/*                      enumeration definitions needed for the Application   */
+/*                      Program Interface(API) of the Ittiam MPEG4        */
+/*                      Encoder on Cortex A8 - Neon platform                 */
+/*                                                                           */
+/*  List of Functions : ih264e_api_function                              */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         26 08 2010   100239(RCY)     Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef _IH264E_H_
+#define _IH264E_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "iv2.h"
+#include "ive2.h"
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* API Function Prototype                                                    */
+/*****************************************************************************/
+IV_STATUS_T ih264e_api_function(iv_obj_t *ps_handle, void *pv_api_ip,void *pv_api_op);
+
+/*****************************************************************************/
+/* Enums                                                                     */
+/*****************************************************************************/
+typedef enum
+{
+    IH264E_CMD_CTL_SET_ME_INFO_ENABLE,
+}IH264E_CMD_CTL_SUB_CMDS;
+
+
+/*****************************************************************************/
+/* Extended Structures                                                       */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/*  Get Number of Memory Records                                             */
+/*****************************************************************************/
+
+
+typedef struct
+{
+    iv_num_mem_rec_ip_t                    s_ive_ip;
+}ih264e_num_mem_rec_ip_t;
+
+
+typedef struct
+{
+    iv_num_mem_rec_op_t                    s_ive_op;
+}ih264e_num_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/*  Fill Memory Records                                                      */
+/*****************************************************************************/
+
+
+typedef struct
+{
+    iv_fill_mem_rec_ip_t                   s_ive_ip;
+}ih264e_fill_mem_rec_ip_t;
+
+
+typedef struct
+{
+    iv_fill_mem_rec_op_t                   s_ive_op;
+}ih264e_fill_mem_rec_op_t;
+
+/*****************************************************************************/
+/*  Retrieve Memory Records                                                  */
+/*****************************************************************************/
+
+
+typedef struct
+{
+    iv_retrieve_mem_rec_ip_t               s_ive_ip;
+}ih264e_retrieve_mem_rec_ip_t;
+
+
+typedef struct
+{
+    iv_retrieve_mem_rec_op_t               s_ive_op;
+}ih264e_retrieve_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/*   Initialize encoder                                                      */
+/*****************************************************************************/
+
+typedef struct
+{
+    ive_init_ip_t                           s_ive_ip;
+}ih264e_init_ip_t;
+
+
+typedef struct
+{
+    ive_init_op_t                           s_ive_op;
+}ih264e_init_op_t;
+
+
+/*****************************************************************************/
+/*   Queue Input raw buffer - Send the YUV buffer to be encoded              */
+/*****************************************************************************/
+typedef struct
+{
+    ive_queue_inp_ip_t                      s_ive_ip;
+}ih264e_queue_inp_ip_t;
+
+typedef struct
+{
+    ive_queue_inp_op_t                      s_ive_op;
+}ih264e_queue_inp_op_t;
+
+/*****************************************************************************/
+/*   Dequeue Input raw buffer - Get free YUV buffer from the encoder         */
+/*****************************************************************************/
+typedef struct
+{
+    ive_dequeue_inp_ip_t                      s_ive_ip;
+}ih264e_dequeue_inp_ip_t;
+
+typedef struct
+{
+    ive_dequeue_inp_op_t                      s_ive_op;
+}ih264e_dequeue_inp_op_t;
+
+
+/*****************************************************************************/
+/*   Queue Output bitstream buffer - Send the bistream buffer to be filled   */
+/*****************************************************************************/
+typedef struct
+{
+    ive_queue_out_ip_t                      s_ive_ip;
+}ih264e_queue_out_ip_t;
+
+typedef struct
+{
+    ive_queue_out_op_t                      s_ive_op;
+}ih264e_queue_out_op_t;
+
+/*****************************************************************************/
+/* Dequeue Output bitstream buffer - Get the bistream buffer filled          */
+/*****************************************************************************/
+typedef struct
+{
+    ive_dequeue_out_ip_t                      s_ive_ip;
+}ih264e_dequeue_out_ip_t;
+
+typedef struct
+{
+    ive_dequeue_out_op_t                      s_ive_op;
+}ih264e_dequeue_out_op_t;
+
+
+/*****************************************************************************/
+/* Get Recon data - Get the reconstructed data from encoder                  */
+/*****************************************************************************/
+typedef struct
+{
+    ive_get_recon_ip_t                        s_ive_ip;
+}ih264e_get_recon_ip_t;
+
+typedef struct
+{
+    ive_get_recon_op_t                        s_ive_op;
+}ih264e_get_recon_op_t;
+/*****************************************************************************/
+/*   Video control  Flush                                                    */
+/*****************************************************************************/
+
+
+typedef struct
+{
+    ive_ctl_flush_ip_t                      s_ive_ip;
+}ih264e_ctl_flush_ip_t;
+
+
+typedef struct
+{
+    ive_ctl_flush_op_t                      s_ive_op;
+}ih264e_ctl_flush_op_t;
+
+/*****************************************************************************/
+/*   Video control reset                                                     */
+/*****************************************************************************/
+
+
+typedef struct
+{
+    ive_ctl_reset_ip_t                      s_ive_ip;
+}ih264e_ctl_reset_ip_t;
+
+
+typedef struct
+{
+    ive_ctl_reset_op_t                      s_ive_op;
+}ih264e_ctl_reset_op_t;
+
+
+/*****************************************************************************/
+/*   Video control:Get Buf Info                                              */
+/*****************************************************************************/
+
+
+typedef struct
+{
+    ive_ctl_getbufinfo_ip_t             s_ive_ip;
+}ih264e_ctl_getbufinfo_ip_t;
+
+
+
+typedef struct
+{
+    ive_ctl_getbufinfo_op_t             s_ive_op;
+}ih264e_ctl_getbufinfo_op_t;
+
+
+
+/*****************************************************************************/
+/*   Video control:Get Version Info                                          */
+/*****************************************************************************/
+
+
+typedef struct
+{
+    ive_ctl_getversioninfo_ip_t         s_ive_ip;
+}ih264e_ctl_getversioninfo_ip_t;
+
+
+
+typedef struct
+{
+    ive_ctl_getversioninfo_op_t         s_ive_op;
+}ih264e_ctl_getversioninfo_op_t;
+
+/*****************************************************************************/
+/*   Video control:Set default params                                       */
+/*****************************************************************************/
+
+
+typedef struct
+{
+    ive_ctl_setdefault_ip_t         s_ive_ip;
+}ih264e_ctl_setdefault_ip_t;
+
+
+
+typedef struct
+{
+    ive_ctl_setdefault_op_t         s_ive_op;
+}ih264e_ctl_setdefault_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set IPE params                                           */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_ipe_params_ip_t     s_ive_ip;
+}ih264e_ctl_set_ipe_params_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_ipe_params_op_t     s_ive_op;
+}ih264e_ctl_set_ipe_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Frame dimensions                                     */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_dimensions_ip_t     s_ive_ip;
+}ih264e_ctl_set_dimensions_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_dimensions_op_t     s_ive_op;
+}ih264e_ctl_set_dimensions_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Frame rates                                          */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_frame_rate_ip_t     s_ive_ip;
+}ih264e_ctl_set_frame_rate_ip_t;
+typedef struct
+{
+    ive_ctl_set_frame_rate_op_t     s_ive_op;
+}ih264e_ctl_set_frame_rate_op_t;
+
+
+/*****************************************************************************/
+/*   Video control  Set Bitrate                                              */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_bitrate_ip_t        s_ive_ip;
+}ih264e_ctl_set_bitrate_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_bitrate_op_t        s_ive_op;
+}ih264e_ctl_set_bitrate_op_t;
+
+
+/*****************************************************************************/
+/*   Video control  Set Frame type                                           */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_frame_type_ip_t     s_ive_ip;
+}ih264e_ctl_set_frame_type_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_frame_type_op_t     s_ive_op;
+}ih264e_ctl_set_frame_type_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Encode mode                                          */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_enc_mode_ip_t       s_ive_ip;
+}ih264e_ctl_set_enc_mode_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_enc_mode_op_t       s_ive_op;
+}ih264e_ctl_set_enc_mode_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set QP                                                   */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_qp_ip_t             s_ive_ip;
+}ih264e_ctl_set_qp_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_qp_op_t             s_ive_op;
+}ih264e_ctl_set_qp_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set AIR params                                           */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_air_params_ip_t     s_ive_ip;
+}ih264e_ctl_set_air_params_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_air_params_op_t     s_ive_op;
+}ih264e_ctl_set_air_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set VBV params                                           */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_vbv_params_ip_t     s_ive_ip;
+}ih264e_ctl_set_vbv_params_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_vbv_params_op_t     s_ive_op;
+}ih264e_ctl_set_vbv_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Processor Details                                    */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_num_cores_ip_t      s_ive_ip;
+}ih264e_ctl_set_num_cores_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_num_cores_op_t      s_ive_op;
+}ih264e_ctl_set_num_cores_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Motion estimation params                             */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_me_params_ip_t      s_ive_ip;
+}ih264e_ctl_set_me_params_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_me_params_op_t      s_ive_op;
+}ih264e_ctl_set_me_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set GOP params                                           */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_gop_params_ip_t     s_ive_ip;
+}ih264e_ctl_set_gop_params_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_gop_params_op_t     s_ive_op;
+}ih264e_ctl_set_gop_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Deblock params                                       */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_deblock_params_ip_t s_ive_ip;
+}ih264e_ctl_set_deblock_params_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_deblock_params_op_t s_ive_op;
+}ih264e_ctl_set_deblock_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Profile params                                       */
+/*****************************************************************************/
+typedef struct
+{
+    ive_ctl_set_profile_params_ip_t s_ive_ip;
+}ih264e_ctl_set_profile_params_ip_t;
+
+typedef struct
+{
+    ive_ctl_set_profile_params_op_t s_ive_op;
+}ih264e_ctl_set_profile_params_op_t;
+
+/*****************************************************************************/
+/*   Synchronous video encode call                                           */
+/*****************************************************************************/
+typedef struct
+{
+    ive_video_encode_ip_t s_ive_ip;
+}ih264e_video_encode_ip_t;
+
+typedef struct
+{
+    ive_video_encode_op_t s_ive_op;
+}ih264e_video_encode_op_t;
+
+
+/* The enum values should not have greater than 8 bits as this is assigned to WORD8 */
+typedef enum
+{
+    INTRA16x16 = 0,
+    INTRA4x4,
+    INTER16x16
+}IV_MB_TYPE_T;
+
+/*****************************************************************************/
+/*   Pic info structures                                                     */
+/*****************************************************************************/
+typedef struct
+{
+    /** Qp  */
+    UWORD32                                     u4_qp;
+
+    /** Pic Type */
+    IV_PICTURE_CODING_TYPE_T                    e_frame_type;
+
+}ih264e_pic_info1_t;
+
+typedef struct
+{
+    /** Qp  */
+    UWORD32                                     u4_qp;
+
+    /** Pic Type */
+    IV_PICTURE_CODING_TYPE_T                    e_frame_type;
+
+    /** Disable deblock level (0: Enable completely, 3: Disable completely */
+    UWORD32                                     u4_disable_deblock_level;
+
+}ih264e_pic_info2_t;
+
+
+/*****************************************************************************/
+/*   MB info structures                                                     */
+/*****************************************************************************/
+typedef struct
+{
+    /** MV X    */
+    WORD16                                  i2_mv_x;
+
+    /** MV Y    */
+    WORD16                                  i2_mv_y;
+}ih264e_mv_t;
+
+typedef struct
+{
+    /** Intra / Inter    */
+    WORD8                                       i1_mb_type;
+    union
+    {
+        ih264e_mv_t                                 as_mv[1];
+
+        /** Intra mode */
+        WORD8                                       ai1_intra_mode[1];
+    };
+}ih264e_mb_info1_t;
+
+typedef struct
+{
+    /** Intra / Inter    */
+    WORD8                                       i1_mb_type;
+
+
+    /** SAD     */
+    UWORD16                                     u2_sad;
+
+    union
+    {
+        ih264e_mv_t                                 as_mv[1];
+
+        /** Intra mode */
+        WORD8                                       ai1_intra_mode[1];
+    };
+
+
+}ih264e_mb_info2_t;
+
+typedef struct
+{
+    /** Intra / Inter    */
+    WORD8                                       i1_mb_type;
+
+    union
+    {
+        ih264e_mv_t                                 as_mv[4];
+
+        /** Intra mode */
+        WORD8                                       ai1_intra_mode[16];
+    };
+
+}ih264e_mb_info3_t;
+
+typedef struct
+{
+    /** Intra / Inter    */
+    WORD8                                       i1_mb_type;
+
+    /** Intra Mode      */
+    WORD8                                       i1_intra_mode;
+
+    /** SAD     */
+    UWORD16                                     u2_sad;
+
+    union
+    {
+        ih264e_mv_t                                 as_mv[16];
+
+        /** Intra mode */
+        WORD8                                       ai1_intra_mode[16];
+    };
+
+
+
+}ih264e_mb_info4_t;
+
+/* Add any new structures to the following union. It is used to calculate the max size needed for allocation of memory */
+typedef struct
+{
+    union
+    {
+        ih264e_mb_info1_t               s_mb_info1;
+        ih264e_mb_info2_t               s_mb_info2;
+        ih264e_mb_info3_t               s_mb_info3;
+        ih264e_mb_info4_t               s_mb_info4;
+    };
+}ih264e_mb_info_t;
+
+#ifdef __cplusplus
+} /* closing brace for extern "C" */
+#endif
+#endif /* _IH264E_H_ */
diff --git a/encoder/ih264e_api.c b/encoder/ih264e_api.c
new file mode 100755
index 0000000..e5c66ea
--- /dev/null
+++ b/encoder/ih264e_api.c
@@ -0,0 +1,5559 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_api.c
+*
+* @brief
+*  Contains api function definitions for H264 encoder
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - api_check_struct_sanity()
+*  - ih264e_codec_update_config()
+*  - ih264e_set_default_params()
+*  - ih264e_init()
+*  - ih264e_get_num_rec()
+*  - ih264e_fill_num_mem_rec()
+*  - ih264e_init_mem_rec()
+*  - ih264e_retrieve_memrec()
+*  - ih264e_set_flush_mode()
+*  - ih264e_get_buf_info()
+*  - ih264e_set_dimensions()
+*  - ih264e_set_frame_rate()
+*  - ih264e_set_bit_rate()
+*  - ih264e_set_frame_type()
+*  - ih264e_set_qp()
+*  - ih264e_set_enc_mode()
+*  - ih264e_set_vbv_params()
+*  - ih264_set_air_params()
+*  - ih264_set_me_params()
+*  - ih264_set_ipe_params()
+*  - ih264_set_gop_params()
+*  - ih264_set_profile_params()
+*  - ih264_set_deblock_params()
+*  - ih264e_set_num_cores()
+*  - ih264e_reset()
+*  - ih264e_ctl()
+*  - ih264e_api_function()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include Files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_size_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+#include "ih264_buf_mgr.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "ih264e_core_coding.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_list.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_common_tables.h"
+#include "ih264e_master.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_version.h"
+
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+WORD32 ih264e_get_rate_control_mem_tab(void *pv_rate_control,
+                                       iv_mem_rec_t *ps_mem,
+                                       ITT_FUNC_TYPE_E e_func_type);
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Used to test arguments for corresponding API call
+*
+* @par Description:
+*  For each command the arguments are validated
+*
+* @param[in] ps_handle
+*  Codec handle at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input structure
+*
+* @param[out] pv_api_op
+*  Pointer to output structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle,
+                                           void *pv_api_ip,
+                                           void *pv_api_op)
+{
+    /* api call */
+    WORD32 command = IV_CMD_NA;
+
+    /* input structure expected by the api call */
+    UWORD32 *pu4_api_ip = pv_api_ip;
+
+    /* output structure expected by the api call */
+    UWORD32 *pu4_api_op = pv_api_op;
+
+    /* temp var */
+    WORD32 i, j;
+
+    if (NULL == pv_api_op || NULL == pv_api_ip)
+    {
+        return (IV_FAIL);
+    }
+
+    /* get command */
+    command = pu4_api_ip[1];
+
+    /* set error code */
+    pu4_api_op[1] = 0;
+
+    /* error checks on handle */
+    switch (command)
+    {
+        case IV_CMD_GET_NUM_MEM_REC:
+        case IV_CMD_FILL_NUM_MEM_REC:
+            break;
+
+        case IV_CMD_INIT:
+            if (ps_handle == NULL)
+            {
+                *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL;
+                return IV_FAIL;
+            }
+
+            if (ps_handle->u4_size != sizeof(iv_obj_t))
+            {
+                *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT;
+                return IV_FAIL;
+            }
+            break;
+
+        case IVE_CMD_QUEUE_INPUT:
+        case IVE_CMD_QUEUE_OUTPUT:
+        case IVE_CMD_DEQUEUE_OUTPUT:
+        case IVE_CMD_GET_RECON:
+        case IV_CMD_RETRIEVE_MEMREC:
+        case IVE_CMD_VIDEO_CTL:
+        case IVE_CMD_VIDEO_ENCODE:
+
+            if (ps_handle == NULL)
+            {
+                *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL;
+                return IV_FAIL;
+            }
+
+            if (ps_handle->u4_size != sizeof(iv_obj_t))
+            {
+                *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT;
+                return IV_FAIL;
+            }
+
+            if (ps_handle->pv_fxns != ih264e_api_function)
+            {
+                *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVE_ERR_API_FUNCTION_PTR_NULL;
+                return IV_FAIL;
+            }
+
+            if (ps_handle->pv_codec_handle == NULL)
+            {
+                *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVE_ERR_INVALID_CODEC_HANDLE;
+                return IV_FAIL;
+            }
+            break;
+
+        default:
+            *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+            *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD;
+            return IV_FAIL;
+    }
+
+    /* error checks on input output structures */
+    switch (command)
+    {
+        case IV_CMD_GET_NUM_MEM_REC:
+        {
+            ih264e_num_mem_rec_ip_t *ps_ip = pv_api_ip;
+            ih264e_num_mem_rec_op_t *ps_op = pv_api_op;
+
+            ps_op->s_ive_op.u4_error_code = 0;
+
+            if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_num_mem_rec_ip_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_IP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+
+            if (ps_op->s_ive_op.u4_size != sizeof(ih264e_num_mem_rec_op_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_OP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+            break;
+        }
+
+        case IV_CMD_FILL_NUM_MEM_REC:
+        {
+            ih264e_fill_mem_rec_ip_t *ps_ip = pv_api_ip;
+            ih264e_fill_mem_rec_op_t *ps_op = pv_api_op;
+
+            iv_mem_rec_t *ps_mem_rec = NULL;
+
+            WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+            WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+
+            ps_op->s_ive_op.u4_error_code = 0;
+
+            if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_fill_mem_rec_ip_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_IP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+
+            if (ps_op->s_ive_op.u4_size != sizeof(ih264e_fill_mem_rec_op_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_OP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+
+            if (max_wd < MIN_WD || max_wd > MAX_WD)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (max_ht < MIN_HT || max_ht > MAX_HT)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            /* verify number of mem rec ptr */
+            if (NULL == ps_ip->s_ive_ip.ps_mem_rec)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL;
+                return (IV_FAIL);
+            }
+
+            /* verify number of mem records */
+            if (ps_ip->s_ive_ip.u4_num_mem_rec != MEM_REC_CNT)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT;
+                return IV_FAIL;
+            }
+
+            /* check mem records sizes are correct */
+            ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec;
+            for (i = 0; i < MEM_REC_CNT; i++)
+            {
+                if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+                {
+                    ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                    ps_op->s_ive_op.u4_error_code |=
+                                    IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT;
+                    return IV_FAIL;
+                }
+            }
+            break;
+        }
+
+        case IV_CMD_INIT:
+        {
+            ih264e_init_ip_t *ps_ip = pv_api_ip;
+            ih264e_init_op_t *ps_op = pv_api_op;
+
+            iv_mem_rec_t *ps_mem_rec = NULL;
+
+            WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+            WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+
+            ps_op->s_ive_op.u4_error_code = 0;
+
+            if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_init_ip_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_IP_INIT_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+
+            if (ps_op->s_ive_op.u4_size != sizeof(ih264e_init_op_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_OP_INIT_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+
+            if (max_wd < MIN_WD || max_wd > MAX_WD)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (max_ht < MIN_HT || max_ht > MAX_HT)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (ps_ip->s_ive_ip.u4_max_ref_cnt != 1)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (ps_ip->s_ive_ip.u4_max_reorder_cnt != 0)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if ((ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_10)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_1B)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_11)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_12)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_13)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_20)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_21)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_22)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_30)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_31)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_32)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_40)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_41)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_42)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_50)
+                            && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_51))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IH264E_CODEC_LEVEL_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if ((ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P)
+                            && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_422ILE)
+                            && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_UV)
+                            && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_VU))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if ((ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420P)
+                            && (ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420SP_UV)
+                            && (ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420SP_VU))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IH264E_RECON_CHROMA_FORMAT_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if ((ps_ip->s_ive_ip.e_rc_mode != IVE_RC_NONE)
+                            && (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_STORAGE)
+                            && (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_CBR_NON_LOW_DELAY))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IH264E_RATE_CONTROL_MODE_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (ps_ip->s_ive_ip.u4_max_framerate > DEFAULT_MAX_FRAMERATE)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IH264E_FRAME_RATE_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (ps_ip->s_ive_ip.u4_max_bitrate > DEFAULT_MAX_BITRATE)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |= IH264E_BITRATE_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (ps_ip->s_ive_ip.u4_max_num_bframes != 0)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (ps_ip->s_ive_ip.e_content_type != IV_PROGRESSIVE)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IH264E_CONTENT_TYPE_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (ps_ip->s_ive_ip.u4_max_srch_rng_x > DEFAULT_MAX_SRCH_RANGE_X)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if (ps_ip->s_ive_ip.u4_max_srch_rng_y > DEFAULT_MAX_SRCH_RANGE_Y)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED;
+                return (IV_FAIL);
+            }
+
+            if ((ps_ip->s_ive_ip.e_slice_mode != IVE_SLICE_MODE_NONE)
+                            && (ps_ip->s_ive_ip.e_slice_mode != IVE_SLICE_MODE_BLOCKS))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IH264E_SLICE_TYPE_INPUT_INVALID;
+                return (IV_FAIL);
+            }
+
+            if (ps_ip->s_ive_ip.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
+            {
+                if (ps_ip->s_ive_ip.u4_slice_param == 0
+                                || ps_ip->s_ive_ip.u4_slice_param > ((UWORD32)max_ht >> 4))
+                {
+                    ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                    ps_op->s_ive_op.u4_error_code |=
+                                    IH264E_SLICE_PARAM_INPUT_INVALID;
+                    return (IV_FAIL);
+                }
+            }
+
+            if (NULL == ps_ip->s_ive_ip.ps_mem_rec)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL;
+                return (IV_FAIL);
+            }
+
+            /* verify number of mem records */
+            if (ps_ip->s_ive_ip.u4_num_mem_rec != MEM_REC_CNT)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT;
+                return (IV_FAIL);
+            }
+
+            ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec;
+
+            /* check memrecords sizes are correct */
+            for (i = 0; i <((WORD32)ps_ip->s_ive_ip.u4_num_mem_rec); i++)
+            {
+                if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+                {
+                    ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                    ps_op->s_ive_op.u4_error_code |=
+                                    IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT;
+                    return IV_FAIL;
+                }
+
+                /* check memrecords pointers are not NULL */
+                if (ps_mem_rec[i].pv_base == NULL)
+                {
+                    ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                    ps_op->s_ive_op.u4_error_code |=
+                                    IVE_ERR_MEM_REC_BASE_POINTER_NULL;
+                    return IV_FAIL;
+                }
+            }
+
+            /* verify memtabs for overlapping regions */
+            {
+                void *start[MEM_REC_CNT];
+                void *end[MEM_REC_CNT];
+
+                start[0] = (ps_mem_rec[0].pv_base);
+                end[0] = ((UWORD8 *) ps_mem_rec[0].pv_base)
+                                + ps_mem_rec[0].u4_mem_size - 1;
+
+                for (i = 1; i < MEM_REC_CNT; i++)
+                {
+                    /* This array is populated to check memtab overlap */
+                    start[i] = (ps_mem_rec[i].pv_base);
+                    end[i] = ((UWORD8 *) ps_mem_rec[i].pv_base)
+                                    + ps_mem_rec[i].u4_mem_size - 1;
+
+                    for (j = 0; j < i; j++)
+                    {
+                        if ((start[i] >= start[j]) && (start[i] <= end[j]))
+                        {
+                            ps_op->s_ive_op.u4_error_code |= 1
+                                            << IVE_UNSUPPORTEDPARAM;
+                            ps_op->s_ive_op.u4_error_code |=
+                                            IVE_ERR_MEM_REC_OVERLAP_ERR;
+                            return IV_FAIL;
+                        }
+
+                        if ((end[i] >= start[j]) && (end[i] <= end[j]))
+                        {
+                            ps_op->s_ive_op.u4_error_code |= 1
+                                            << IVE_UNSUPPORTEDPARAM;
+                            ps_op->s_ive_op.u4_error_code |=
+                                            IVE_ERR_MEM_REC_OVERLAP_ERR;
+                            return IV_FAIL;
+                        }
+
+                        if ((start[i] < start[j]) && (end[i] > end[j]))
+                        {
+                            ps_op->s_ive_op.u4_error_code |= 1
+                                            << IVE_UNSUPPORTEDPARAM;
+                            ps_op->s_ive_op.u4_error_code |=
+                                            IVE_ERR_MEM_REC_OVERLAP_ERR;
+                            return IV_FAIL;
+                        }
+                    }
+                }
+            }
+
+            /* re-validate mem records with init config */
+            {
+                /* mem records */
+                iv_mem_rec_t s_mem_rec_ittiam_api[MEM_REC_CNT];
+
+                /* api interface structs */
+                ih264e_fill_mem_rec_ip_t s_ip;
+                ih264e_fill_mem_rec_op_t s_op;
+
+                /* error status */
+                IV_STATUS_T e_status;
+
+                /* temp var */
+                WORD32 i;
+
+                s_ip.s_ive_ip.u4_size = sizeof(ih264e_fill_mem_rec_ip_t);
+                s_op.s_ive_op.u4_size = sizeof(ih264e_fill_mem_rec_op_t);
+
+                s_ip.s_ive_ip.e_cmd = IV_CMD_FILL_NUM_MEM_REC;
+                s_ip.s_ive_ip.ps_mem_rec = s_mem_rec_ittiam_api;
+                s_ip.s_ive_ip.u4_max_wd = max_wd;
+                s_ip.s_ive_ip.u4_max_ht = max_ht;
+                s_ip.s_ive_ip.u4_num_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec;
+                s_ip.s_ive_ip.u4_max_level = ps_ip->s_ive_ip.u4_max_level;
+                s_ip.s_ive_ip.u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt;
+                s_ip.s_ive_ip.u4_max_reorder_cnt =
+                                ps_ip->s_ive_ip.u4_max_reorder_cnt;
+                s_ip.s_ive_ip.e_color_format = ps_ip->s_ive_ip.e_inp_color_fmt;
+                s_ip.s_ive_ip.u4_max_srch_rng_x =
+                                ps_ip->s_ive_ip.u4_max_srch_rng_x;
+                s_ip.s_ive_ip.u4_max_srch_rng_y =
+                                ps_ip->s_ive_ip.u4_max_srch_rng_y;
+
+                for (i = 0; i < MEM_REC_CNT; i++)
+                {
+                    s_mem_rec_ittiam_api[i].u4_size = sizeof(iv_mem_rec_t);
+                }
+
+                /* fill mem records */
+                e_status = ih264e_api_function(NULL, (void *) &s_ip,
+                                               (void *) &s_op);
+
+                if (IV_FAIL == e_status)
+                {
+                    ps_op->s_ive_op.u4_error_code = s_op.s_ive_op.u4_error_code;
+                    return (IV_FAIL);
+                }
+
+                /* verify mem records */
+                for (i = 0; i < MEM_REC_CNT; i++)
+                {
+                    if (ps_mem_rec[i].u4_mem_size
+                                    < s_mem_rec_ittiam_api[i].u4_mem_size)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_MEM_REC_INSUFFICIENT_SIZE;
+
+                        return IV_FAIL;
+                    }
+
+                    if (ps_mem_rec[i].u4_mem_alignment
+                                    != s_mem_rec_ittiam_api[i].u4_mem_alignment)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_MEM_REC_ALIGNMENT_ERR;
+
+                        return IV_FAIL;
+                    }
+
+                    if (ps_mem_rec[i].e_mem_type
+                                    != s_mem_rec_ittiam_api[i].e_mem_type)
+                    {
+                        UWORD32 check = IV_SUCCESS;
+                        UWORD32 diff = s_mem_rec_ittiam_api[i].e_mem_type
+                                        - ps_mem_rec[i].e_mem_type;
+
+                        if ((ps_mem_rec[i].e_mem_type
+                                        <= IV_EXTERNAL_CACHEABLE_SCRATCH_MEM)
+                                        && (s_mem_rec_ittiam_api[i].e_mem_type
+                                                        >= IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM))
+                        {
+                            check = IV_FAIL;
+                        }
+
+                        if (3 != (s_mem_rec_ittiam_api[i].e_mem_type % 4))
+                        {
+                            /* It is not IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM or
+                             * IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM */
+
+                            if ((diff < 1) || (diff > 3))
+                            {
+                                /* Difference between 1 and 3 is okay for all cases other than the
+                                 * two filtered with the MOD condition above */
+                                check = IV_FAIL;
+                            }
+                        }
+                        else
+                        {
+                            if (diff == 1)
+                            {
+                                /* This particular case is when codec asked for External Persistent,
+                                 * but got Internal Scratch */
+                                check = IV_FAIL;
+                            }
+                            if ((diff != 2) && (diff != 3))
+                            {
+                                check = IV_FAIL;
+                            }
+                        }
+
+                        if (check == IV_FAIL)
+                        {
+                            ps_op->s_ive_op.u4_error_code |= 1
+                                            << IVE_UNSUPPORTEDPARAM;
+                            ps_op->s_ive_op.u4_error_code |=
+                                            IVE_ERR_MEM_REC_INCORRECT_TYPE;
+
+                            return IV_FAIL;
+                        }
+                    }
+                }
+            }
+            break;
+        }
+
+        case IVE_CMD_QUEUE_INPUT:
+        case IVE_CMD_QUEUE_OUTPUT:
+        case IVE_CMD_DEQUEUE_OUTPUT:
+        case IVE_CMD_GET_RECON:
+            break;
+
+        case IV_CMD_RETRIEVE_MEMREC:
+        {
+            ih264e_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip;
+            ih264e_retrieve_mem_rec_op_t *ps_op = pv_api_op;
+
+            iv_mem_rec_t *ps_mem_rec = NULL;
+
+            ps_op->s_ive_op.u4_error_code = 0;
+
+            if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_retrieve_mem_rec_ip_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_IP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+
+            if (ps_op->s_ive_op.u4_size != sizeof(ih264e_retrieve_mem_rec_op_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_OP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+
+            if (NULL == ps_ip->s_ive_ip.ps_mem_rec)
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL;
+                return (IV_FAIL);
+            }
+
+            ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec;
+
+            /* check memrecords sizes are correct */
+            for (i = 0; i < MEM_REC_CNT; i++)
+            {
+                if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+                {
+                    ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                    ps_op->s_ive_op.u4_error_code |=
+                                    IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT;
+                    return IV_FAIL;
+                }
+            }
+            break;
+        }
+
+        case IVE_CMD_VIDEO_ENCODE:
+        {
+            ih264e_video_encode_ip_t *ps_ip = pv_api_ip;
+            ih264e_video_encode_op_t *ps_op = pv_api_op;
+
+            if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_video_encode_ip_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_IP_ENCODE_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+
+            if (ps_op->s_ive_op.u4_size != sizeof(ih264e_video_encode_op_t))
+            {
+                ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+                ps_op->s_ive_op.u4_error_code |=
+                                IVE_ERR_OP_ENCODE_API_STRUCT_SIZE_INCORRECT;
+                return (IV_FAIL);
+            }
+            break;
+        }
+
+        case IVE_CMD_VIDEO_CTL:
+        {
+            /* ptr to input structure */
+            WORD32 *pu4_ptr_cmd = pv_api_ip;
+
+            /* sub command */
+            WORD32 sub_command = pu4_ptr_cmd[2];
+
+            switch (sub_command)
+            {
+                case IVE_CMD_CTL_SETDEFAULT:
+                {
+                    ih264e_ctl_setdefault_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_setdefault_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_setdefault_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_setdefault_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+                    break;
+                }
+
+                case IVE_CMD_CTL_GETBUFINFO:
+                {
+                    codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle);
+
+                    ih264e_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_getbufinfo_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_getbufinfo_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_getbufinfo_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_max_wd < MIN_WD)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_WIDTH_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_max_wd > ps_codec->s_cfg.u4_max_wd)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_WIDTH_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_max_ht < MIN_HT)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_HEIGHT_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_max_ht > ps_codec->s_cfg.u4_max_ht)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_HEIGHT_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if ((ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P)
+                                    && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_422ILE)
+                                    && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_UV)
+                                    && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_VU))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+                    break;
+                }
+
+                case IVE_CMD_CTL_GETVERSION:
+                {
+                    ih264e_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_getversioninfo_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_getversioninfo_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_getversioninfo_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_ip->s_ive_ip.pu1_version == NULL)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_CTL_GET_VERSION_BUFFER_IS_NULL;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_FLUSH:
+                {
+                    ih264e_ctl_flush_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_flush_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_flush_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_flush_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_RESET:
+                {
+                    ih264e_ctl_reset_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_reset_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_reset_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_RESET_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_reset_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_RESET_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_NUM_CORES:
+                {
+                    ih264e_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_num_cores_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_num_cores_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_num_cores_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_num_cores < 1)
+                                    || (ps_ip->s_ive_ip.u4_num_cores > MAX_NUM_CORES))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_NUM_CORES;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_DIMENSIONS:
+                {
+                    codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle);
+
+                    ih264e_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_dimensions_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_dimensions_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_dimensions_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_wd < MIN_WD)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_WIDTH_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_wd > ps_codec->s_cfg.u4_max_wd)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_WIDTH_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_ht < MIN_HT)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_HEIGHT_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_ht > ps_codec->s_cfg.u4_max_ht)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_HEIGHT_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_FRAMERATE:
+                {
+                    ih264e_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_frame_rate_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_frame_rate_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_frame_rate_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (((ps_ip->s_ive_ip.u4_src_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE)
+                                    || ((ps_ip->s_ive_ip.u4_tgt_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_FRAME_RATE_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_src_frame_rate == 0)
+                                    || (ps_ip->s_ive_ip.u4_tgt_frame_rate == 0))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_FRAME_RATE_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_tgt_frame_rate
+                                    > ps_ip->s_ive_ip.u4_src_frame_rate)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_TGT_FRAME_RATE_EXCEEDS_SRC_FRAME_RATE;
+                        return (IV_FAIL);
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_BITRATE:
+                {
+                    ih264e_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_bitrate_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_bitrate_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_bitrate_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_target_bitrate > DEFAULT_MAX_BITRATE)
+                                    || (ps_ip->s_ive_ip.u4_target_bitrate == 0))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_BITRATE_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_FRAMETYPE:
+                {
+                    ih264e_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_frame_type_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_frame_type_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_frame_type_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.e_frame_type != IV_NA_FRAME)
+                                    && (ps_ip->s_ive_ip.e_frame_type != IV_I_FRAME)
+                                    && (ps_ip->s_ive_ip.e_frame_type != IV_P_FRAME)
+                                    && (ps_ip->s_ive_ip.e_frame_type != IV_IDR_FRAME))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_FORCE_FRAME_INPUT;
+                        return IV_FAIL;
+                    }
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_ME_PARAMS:
+                {
+                    codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle);
+
+                    ih264e_ctl_set_me_params_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_me_params_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_me_params_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_me_params_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_me_speed_preset != FULL_SRCH)
+                                    && (ps_ip->s_ive_ip.u4_me_speed_preset != DMND_SRCH)
+                                    && (ps_ip->s_ive_ip.u4_me_speed_preset != HEX_SRCH))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_ME_SPEED_PRESET;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_enable_hpel != 0)
+                                    && (ps_ip->s_ive_ip.u4_enable_hpel != 1))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_HALFPEL_OPTION;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_enable_qpel != 0)
+                                    && (ps_ip->s_ive_ip.u4_enable_qpel != 1))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_QPEL_OPTION;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_enable_fast_sad != 0)
+                                    && (ps_ip->s_ive_ip.u4_enable_fast_sad != 1))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_FAST_SAD_OPTION;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_enable_alt_ref > 255)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_ALT_REF_OPTION;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_srch_rng_x
+                                    > ps_codec->s_cfg.u4_max_srch_rng_x)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_srch_rng_y
+                                    > ps_codec->s_cfg.u4_max_srch_rng_y)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED;
+                        return (IV_FAIL);
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_IPE_PARAMS:
+                {
+                    ih264e_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_ipe_params_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_ipe_params_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_ipe_params_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_enable_intra_4x4 != 0)
+                                    && (ps_ip->s_ive_ip.u4_enable_intra_4x4 != 1))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_INTRA4x4_OPTION;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_CONFIG)
+                                    && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_SLOWEST)
+                                    && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_NORMAL)
+                                    && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FAST)
+                                    && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_HIGH_SPEED)
+                                    && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FASTEST))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_ENC_SPEED_PRESET;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_GOP_PARAMS:
+                {
+                    ih264e_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_gop_params_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_gop_params_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_gop_params_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_i_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE)
+                                    || (ps_ip->s_ive_ip.u4_i_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_INTRA_FRAME_INTERVAL;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_idr_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE)
+                                    || (ps_ip->s_ive_ip.u4_idr_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_IDR_FRAME_INTERVAL;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_num_b_frames != 0)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_BFRAMES_NOT_SUPPORTED;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_DEBLOCK_PARAMS:
+                {
+                    ih264e_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_deblock_params_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_deblock_params_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_deblock_params_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_0)
+                                    && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_2)
+                                    && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_3)
+                                    && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_4))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_DEBLOCKING_TYPE_INPUT;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_QP:
+                {
+                    ih264e_ctl_set_qp_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_qp_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_qp_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_qp_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_i_qp_max > MAX_H264_QP)
+                                    || (ps_ip->s_ive_ip.u4_p_qp_max > MAX_H264_QP)
+                                    || (ps_ip->s_ive_ip.u4_b_qp_max > MAX_H264_QP))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_MAX_FRAME_QP;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_i_qp_min > ps_ip->s_ive_ip.u4_i_qp_max)
+                                    || (ps_ip->s_ive_ip.u4_p_qp_min > ps_ip->s_ive_ip.u4_p_qp_max)
+                                    || (ps_ip->s_ive_ip.u4_b_qp_min > ps_ip->s_ive_ip.u4_b_qp_max))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_MIN_FRAME_QP;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_i_qp > ps_ip->s_ive_ip.u4_i_qp_max)
+                                    || (ps_ip->s_ive_ip.u4_p_qp > ps_ip->s_ive_ip.u4_p_qp_max)
+                                    || (ps_ip->s_ive_ip.u4_b_qp > ps_ip->s_ive_ip.u4_b_qp_max))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_i_qp < ps_ip->s_ive_ip.u4_i_qp_min)
+                                    || (ps_ip->s_ive_ip.u4_p_qp < ps_ip->s_ive_ip.u4_p_qp_min)
+                                    || (ps_ip->s_ive_ip.u4_b_qp < ps_ip->s_ive_ip.u4_b_qp_min))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_ENC_MODE:
+                {
+                    ih264e_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_enc_mode_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_enc_mode_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_enc_mode_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_HEADER)
+                                    && (ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_PICTURE))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_ENC_OPERATION_MODE;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_VBV_PARAMS:
+                {
+                    ih264e_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_vbv_params_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_vbv_params_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_vbv_params_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.u4_vbv_buffer_delay < DEFAULT_MIN_BUFFER_DELAY)
+                                    || (ps_ip->s_ive_ip.u4_vbv_buffer_delay > DEFAULT_MAX_BUFFER_DELAY))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_BUFFER_DELAY;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_AIR_PARAMS:
+                {
+                    ih264e_ctl_set_air_params_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_air_params_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_air_params_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_air_params_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if ((ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_NONE)
+                                    && (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_CYCLIC)
+                                    && (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_RANDOM))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_AIR_MODE;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_ip->s_ive_ip.u4_air_refresh_period == 0)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_INVALID_AIR_REFRESH_PERIOD;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                case IVE_CMD_CTL_SET_PROFILE_PARAMS:
+                {
+                    ih264e_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip;
+                    ih264e_ctl_set_profile_params_op_t *ps_op = pv_api_op;
+
+                    if (ps_ip->s_ive_ip.u4_size
+                                    != sizeof(ih264e_ctl_set_profile_params_ip_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_IP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_op->s_ive_op.u4_size
+                                    != sizeof(ih264e_ctl_set_profile_params_op_t))
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IVE_ERR_OP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+
+                    if (ps_ip->s_ive_ip.e_profile != IV_PROFILE_BASE)
+                    {
+                        ps_op->s_ive_op.u4_error_code |= 1
+                                        << IVE_UNSUPPORTEDPARAM;
+                        ps_op->s_ive_op.u4_error_code |=
+                                        IH264E_PROFILE_NOT_SUPPORTED;
+                        return IV_FAIL;
+                    }
+
+                    break;
+                }
+
+                default:
+                    *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+                    *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_SUB_CMD;
+                    return IV_FAIL;
+            }
+
+            break;
+        }
+
+        default:
+            *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+            *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD;
+            return IV_FAIL;
+    }
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief update encoder configuration parameters
+*
+* @par Description:
+*  updates encoder configuration parameters from the given config set.
+*  Initialize/reinitialize codec parameters according to new configurations.
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @param[in] ps_cfg
+*  Pointer to config param set
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec,
+                                          cfg_params_t *ps_cfg)
+{
+    /* config params */
+    cfg_params_t *ps_curr_cfg = &ps_codec->s_cfg;
+
+    /* error status */
+    IH264E_ERROR_T err = IH264E_SUCCESS;
+
+    /* temp var */
+    UWORD32 u4_init_rc = 0;
+
+    /***********************/
+    /* UPDATE CODEC CONFIG */
+    /***********************/
+    if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DIMENSIONS)
+    {
+        UWORD32 wd_aln = ALIGN16(ps_cfg->u4_wd);
+        UWORD32 ht_aln = ALIGN16(ps_cfg->u4_ht);
+
+        if (ps_curr_cfg->u4_wd != wd_aln || ps_curr_cfg->u4_ht != ht_aln
+                        || ps_curr_cfg->u4_strd != ps_cfg->u4_strd
+                        || ps_curr_cfg->u4_disp_wd != ps_cfg->u4_disp_wd
+                        || ps_curr_cfg->u4_disp_ht != ps_cfg->u4_disp_ht)
+        {
+            ps_curr_cfg->u4_wd = wd_aln;
+            ps_curr_cfg->u4_ht = ht_aln;
+            ps_curr_cfg->u4_strd = ps_cfg->u4_strd;
+
+            if (ps_curr_cfg->u4_strd == 0)
+            {
+                ps_curr_cfg->u4_strd = ps_curr_cfg->u4_wd;
+            }
+
+            ps_curr_cfg->u4_disp_wd = ps_cfg->u4_disp_wd;
+            ps_curr_cfg->u4_disp_ht = ps_cfg->u4_disp_ht;
+
+            ps_curr_cfg->i4_wd_mbs = ps_curr_cfg->u4_wd >> 4;
+            ps_curr_cfg->i4_ht_mbs = ps_curr_cfg->u4_ht >> 4;
+
+            ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd;
+            ps_codec->i4_rec_strd = ALIGN16(ps_cfg->u4_wd) + PAD_WD;
+
+            /* If number of MBs in a frame changes the air map also changes.
+             * Hence recompute air map also reset air pic cnt */
+            if (ps_codec->s_cfg.e_air_mode != IVE_AIR_MODE_NONE)
+            {
+                /* re-init the air map */
+                ih264e_init_air_map(ps_codec);
+
+                /* reset air counter */
+                ps_codec->i4_air_pic_cnt = -1;
+            }
+
+            /* initialize mv bank buffer manager */
+            err = ih264e_mv_buf_mgr_add_bufs(ps_codec);
+            if (err != IH264E_SUCCESS)
+                return err;
+
+            /* initialize ref bank buffer manager */
+            err = ih264e_pic_buf_mgr_add_bufs(ps_codec);
+            if (err != IH264E_SUCCESS)
+                return err;
+
+            /* since dimension changed, start new sequence by forcing IDR */
+            ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+
+            /* in case dimension changes, we need to reinitialize RC as the
+             * old model shall not fit further */
+            u4_init_rc = 1;
+
+            /* when the dimension changes, the header needs to be regenerated */
+            ps_codec->i4_header_mode = 1;
+        }
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_FRAMERATE)
+    {
+        /* temp var */
+        UWORD32 u4_src_ticks, u4_tgt_ticks;
+
+        u4_src_ticks = ih264e_frame_time_get_src_ticks(
+                        ps_codec->s_rate_control.pps_frame_time);
+
+        u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(
+                        ps_codec->s_rate_control.pps_frame_time);
+
+        /* Change frame rate */
+        if (ps_codec->s_cfg.u4_src_frame_rate
+                        != ps_cfg->u4_src_frame_rate * 1000)
+        {
+            ps_codec->s_cfg.u4_src_frame_rate = ps_cfg->u4_src_frame_rate
+                            * 1000;
+
+            ih264e_frame_time_update_src_frame_rate(
+                            ps_codec->s_rate_control.pps_frame_time,
+                            ps_codec->s_cfg.u4_src_frame_rate);
+
+            ih264_time_stamp_update_frame_rate(
+                            ps_codec->s_rate_control.pps_time_stamp,
+                            ps_codec->s_cfg.u4_src_frame_rate);
+
+            irc_change_frame_rate(ps_codec->s_rate_control.pps_rate_control_api,
+                                  ps_codec->s_cfg.u4_src_frame_rate,
+                                  u4_src_ticks, u4_tgt_ticks);
+        }
+
+        if (ps_codec->s_cfg.u4_tgt_frame_rate
+                        != ps_cfg->u4_tgt_frame_rate * 1000)
+        {
+            ps_codec->s_cfg.u4_tgt_frame_rate = ps_cfg->u4_tgt_frame_rate
+                            * 1000;
+
+            ih264e_frame_time_update_tgt_frame_rate(
+                            ps_codec->s_rate_control.pps_frame_time,
+                            ps_codec->s_cfg.u4_tgt_frame_rate);
+
+            irc_change_frame_rate(ps_codec->s_rate_control.pps_rate_control_api,
+                                  ps_codec->s_cfg.u4_src_frame_rate,
+                                  u4_src_ticks, u4_tgt_ticks);
+
+            irc_change_frm_rate_for_bit_alloc(
+                            ps_codec->s_rate_control.pps_rate_control_api,
+                            ps_codec->s_cfg.u4_tgt_frame_rate);
+        }
+
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_BITRATE)
+    {
+        if (ps_curr_cfg->u4_target_bitrate != ps_cfg->u4_target_bitrate)
+        {
+            if (IVE_RC_NONE != ps_curr_cfg->e_rc_mode)
+                irc_change_avg_bit_rate(
+                                ps_codec->s_rate_control.pps_rate_control_api,
+                                ps_cfg->u4_target_bitrate);
+
+            ps_curr_cfg->u4_target_bitrate = ps_cfg->u4_target_bitrate;
+        }
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_FRAMETYPE)
+    {
+        switch (ps_cfg->e_frame_type)
+        {
+            case IV_I_FRAME:
+                ps_codec->force_curr_frame_type = IV_I_FRAME;
+                break;
+
+            case IV_IDR_FRAME:
+                ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+                break;
+
+            case IV_P_FRAME:
+            default:
+                break;
+        }
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_ME_PARAMS)
+    {
+        if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG)
+        {
+            ps_codec->s_cfg.u4_enable_hpel = ps_cfg->u4_enable_hpel;
+            ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad;
+            ps_codec->s_cfg.u4_me_speed_preset = ps_cfg->u4_me_speed_preset;
+            ps_codec->s_cfg.u4_enable_qpel = ps_cfg->u4_enable_qpel;
+        }
+        else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST)
+        {
+            ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad;
+        }
+        ps_codec->s_cfg.u4_srch_rng_x = ps_cfg->u4_srch_rng_x;
+        ps_codec->s_cfg.u4_srch_rng_y = ps_cfg->u4_srch_rng_y;
+
+        if (ps_codec->s_cfg.u4_enable_alt_ref != ps_cfg->u4_enable_alt_ref)
+        {
+            ps_codec->s_cfg.u4_enable_alt_ref = ps_cfg->u4_enable_alt_ref;
+            ps_codec->u4_is_curr_frm_ref = 1;
+        }
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_IPE_PARAMS)
+    {
+        ps_curr_cfg->u4_enc_speed_preset = ps_cfg->u4_enc_speed_preset;
+
+        if (ps_curr_cfg->u4_enc_speed_preset == IVE_SLOWEST)
+        {/* high quality */
+            /* enable diamond search */
+            ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+            ps_curr_cfg->u4_enable_fast_sad = 0;
+
+            /* disable intra 4x4 */
+            ps_curr_cfg->u4_enable_intra_4x4 = 1;
+            ps_codec->luma_energy_compaction[1] =
+                            ih264e_code_luma_intra_macroblock_4x4_rdopt_on;
+
+            /* sub pel off */
+            ps_curr_cfg->u4_enable_hpel = 1;
+
+            /* deblocking off */
+            ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+            /* disabled intra inter gating in Inter slices */
+            ps_codec->u4_inter_gate = 0;
+        }
+        else if (ps_curr_cfg->u4_enc_speed_preset == IVE_NORMAL)
+        {/* normal */
+            /* enable diamond search */
+            ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+            ps_curr_cfg->u4_enable_fast_sad = 0;
+
+            /* disable intra 4x4 */
+            ps_curr_cfg->u4_enable_intra_4x4 = 1;
+
+            /* sub pel off */
+            ps_curr_cfg->u4_enable_hpel = 1;
+
+            /* deblocking off */
+            ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+            /* disabled intra inter gating in Inter slices */
+            ps_codec->u4_inter_gate = 0;
+        }
+        else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FAST)
+        {/* normal */
+            /* enable diamond search */
+            ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+            ps_curr_cfg->u4_enable_fast_sad = 0;
+
+            /* disable intra 4x4 */
+            ps_curr_cfg->u4_enable_intra_4x4 = 0;
+
+            /* sub pel off */
+            ps_curr_cfg->u4_enable_hpel = 1;
+
+            /* deblocking off */
+            ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+            /* disabled intra inter gating in Inter slices */
+            ps_codec->u4_inter_gate = 1;
+        }
+        else if (ps_curr_cfg->u4_enc_speed_preset == IVE_HIGH_SPEED)
+        {/* fast */
+            /* enable diamond search */
+            ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+            ps_curr_cfg->u4_enable_fast_sad = 0;
+
+            /* disable intra 4x4 */
+            ps_curr_cfg->u4_enable_intra_4x4 = 0;
+
+            /* sub pel off */
+            ps_curr_cfg->u4_enable_hpel = 0;
+
+            /* deblocking off */
+            ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+            /* disabled intra inter gating in Inter slices */
+            ps_codec->u4_inter_gate = 0;
+        }
+        else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST)
+        {/* fastest */
+            /* enable diamond search */
+            ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+            //u4_num_layers = 4;
+
+            /* disable intra 4x4 */
+            ps_curr_cfg->u4_enable_intra_4x4 = 0;
+
+            /* sub pel off */
+            ps_curr_cfg->u4_enable_hpel = 0;
+
+            /* deblocking off */
+            ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+            /* disabled intra inter gating in Inter slices */
+            ps_codec->u4_inter_gate = 1;
+        }
+        else if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG)
+        {
+            ps_curr_cfg->u4_enable_intra_4x4 = ps_cfg->u4_enable_intra_4x4;
+        }
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_GOP_PARAMS)
+    {
+        if (ps_curr_cfg->u4_i_frm_interval != ps_cfg->u4_i_frm_interval)
+        {
+            ps_curr_cfg->u4_i_frm_interval = ps_cfg->u4_i_frm_interval;
+
+            /* reset air counter */
+            ps_codec->i4_air_pic_cnt = -1;
+
+            /* re-init air map */
+            ih264e_init_air_map(ps_codec);
+
+            /*Effect intra frame interval change*/
+
+            irc_change_intra_frm_int_call(
+                            ps_codec->s_rate_control.pps_rate_control_api,
+                            ps_curr_cfg->u4_i_frm_interval);
+        }
+
+        ps_curr_cfg->u4_idr_frm_interval = ps_cfg->u4_idr_frm_interval;
+
+        ps_curr_cfg->u4_num_b_frames = ps_cfg->u4_num_b_frames;
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DEBLOCK_PARAMS)
+    {
+        if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG)
+        {
+            ps_curr_cfg->u4_disable_deblock_level =
+                            ps_cfg->u4_disable_deblock_level;
+        }
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_QP)
+    {
+        UWORD8 au1_init_qp[MAX_PIC_TYPE];
+        UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
+
+        ps_codec->s_cfg.u4_i_qp_max = ps_cfg->u4_i_qp_max;
+        ps_codec->s_cfg.u4_i_qp_min = ps_cfg->u4_i_qp_min;
+        ps_codec->s_cfg.u4_i_qp = ps_cfg->u4_i_qp;
+
+        ps_codec->s_cfg.u4_p_qp_max = ps_cfg->u4_p_qp_max;
+        ps_codec->s_cfg.u4_p_qp_min = ps_cfg->u4_p_qp_min;
+        ps_codec->s_cfg.u4_p_qp = ps_cfg->u4_p_qp;
+
+        ps_codec->s_cfg.u4_b_qp_max = ps_cfg->u4_b_qp_max;
+        ps_codec->s_cfg.u4_b_qp_min = ps_cfg->u4_b_qp_min;
+        ps_codec->s_cfg.u4_b_qp = ps_cfg->u4_b_qp;
+
+        /* update rc lib with modified qp */
+        au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
+        au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
+        au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
+
+        irc_change_init_qp(ps_codec->s_rate_control.pps_rate_control_api,
+                           au1_init_qp);
+
+        au1_min_max_qp[2 * I_PIC] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
+        au1_min_max_qp[2 * I_PIC + 1] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
+
+        au1_min_max_qp[2 * P_PIC] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
+        au1_min_max_qp[2 * P_PIC + 1] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
+
+        au1_min_max_qp[2 * B_PIC] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
+        au1_min_max_qp[2 * B_PIC + 1] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
+
+        irc_change_min_max_qp(ps_codec->s_rate_control.pps_rate_control_api,
+                              au1_min_max_qp);
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_ENC_MODE)
+    {
+        ps_codec->s_cfg.e_enc_mode = ps_cfg->e_enc_mode;
+
+        if (ps_codec->s_cfg.e_enc_mode == IVE_ENC_MODE_HEADER)
+        {
+            ps_codec->i4_header_mode = 1;
+            ps_codec->s_cfg.e_enc_mode = IVE_ENC_MODE_PICTURE;
+        }
+        else
+        {
+            ps_codec->i4_header_mode = 0;
+        }
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_VBV_PARAMS
+                    && IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode)
+    {
+        ps_codec->s_cfg.u4_vbv_buf_size = ps_cfg->u4_vbv_buf_size;
+        ps_codec->s_cfg.u4_vbv_buffer_delay = ps_cfg->u4_vbv_buffer_delay;
+
+        // irc_change_buffer_delay(ps_codec->s_rate_control.pps_rate_control_api, ps_codec->s_cfg.u4_vbv_buffer_delay);
+
+        // TODO: remove this when the support for changing buffer dynamically
+        // is yet to be added.
+        u4_init_rc = 1;
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_AIR_PARAMS)
+    {
+        if (ps_curr_cfg->e_air_mode != ps_cfg->e_air_mode
+                        || ps_curr_cfg->u4_air_refresh_period
+                                        != ps_cfg->u4_air_refresh_period)
+        {
+            ps_curr_cfg->e_air_mode = ps_cfg->e_air_mode;
+            ps_curr_cfg->u4_air_refresh_period = ps_cfg->u4_air_refresh_period;
+
+            ih264e_init_air_map(ps_codec);
+
+            /* reset air counter */
+            ps_codec->i4_air_pic_cnt = -1;
+        }
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_PROFILE_PARAMS)
+    {
+        ps_codec->s_cfg.e_profile = ps_cfg->e_profile;
+    }
+    else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_NUM_CORES)
+    {
+        ps_codec->s_cfg.u4_num_cores = ps_cfg->u4_num_cores;
+    }
+
+    /* reset RC model */
+    if (u4_init_rc)
+    {
+        /* init qp */
+        UWORD8 au1_init_qp[MAX_PIC_TYPE];
+
+        /* min max qp */
+        UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
+
+        /* init i,p,b qp */
+        au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
+        au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
+        au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
+
+        /* init min max qp */
+        au1_min_max_qp[2 * I_PIC] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
+        au1_min_max_qp[2 * I_PIC + 1] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
+
+        au1_min_max_qp[2 * P_PIC] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
+        au1_min_max_qp[2 * P_PIC + 1] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
+
+        au1_min_max_qp[2 * B_PIC] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
+        au1_min_max_qp[2 * B_PIC + 1] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
+
+        /* get rc mode */
+        switch (ps_codec->s_cfg.e_rc_mode)
+        {
+            case IVE_RC_STORAGE:
+                ps_codec->s_rate_control.e_rc_type = VBR_STORAGE;
+                break;
+
+            case IVE_RC_CBR_NON_LOW_DELAY:
+                ps_codec->s_rate_control.e_rc_type = CBR_NLDRC;
+                break;
+
+            case IVE_RC_CBR_LOW_DELAY:
+                ps_codec->s_rate_control.e_rc_type = CBR_LDRC;
+                break;
+
+            case IVE_RC_NONE:
+                ps_codec->s_rate_control.e_rc_type = CONST_QP;
+                break;
+
+            default:
+                break;
+        }
+
+        /* init rate control */
+        ih264e_rc_init(ps_codec->s_rate_control.pps_rate_control_api,
+                       ps_codec->s_rate_control.pps_frame_time,
+                       ps_codec->s_rate_control.pps_time_stamp,
+                       ps_codec->s_rate_control.pps_pd_frm_rate,
+                       ps_codec->s_cfg.u4_max_framerate,
+                       ps_codec->s_cfg.u4_src_frame_rate,
+                       ps_codec->s_cfg.u4_tgt_frame_rate,
+                       ps_codec->s_rate_control.e_rc_type,
+                       ps_codec->s_cfg.u4_target_bitrate,
+                       ps_codec->s_cfg.u4_max_bitrate,
+                       ps_codec->s_cfg.u4_vbv_buffer_delay,
+                       ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp,
+                       H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp,
+                       ps_codec->s_cfg.u4_max_level);
+    }
+
+    return err;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets default encoder config parameters
+*
+* @par Description:
+*  Sets default dynamic parameters. Will be called in ih264e_init() to ensure
+*  that even if set_params is not called, codec continues to work
+*
+* @param[in] ps_cfg
+*  Pointer to encoder config params
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_set_default_params(cfg_params_t *ps_cfg)
+{
+    WORD32 ret = IV_SUCCESS;
+
+    ps_cfg->u4_max_wd = MAX_WD;
+    ps_cfg->u4_max_ht = MAX_HT;
+    ps_cfg->u4_max_ref_cnt = MAX_REF_CNT;
+    ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT;
+    ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL;
+    ps_cfg->e_inp_color_fmt = IV_YUV_420SP_UV;
+    ps_cfg->u4_enable_recon = DEFAULT_RECON_ENABLE;
+    ps_cfg->e_recon_color_fmt = IV_YUV_420P;
+    ps_cfg->u4_enc_speed_preset = IVE_FASTEST;
+    ps_cfg->e_rc_mode = DEFAULT_RC;
+    ps_cfg->u4_max_framerate = DEFAULT_MAX_FRAMERATE;
+    ps_cfg->u4_max_bitrate = DEFAULT_MAX_BITRATE;
+    ps_cfg->u4_max_num_bframes = 0;
+    ps_cfg->e_content_type = IV_PROGRESSIVE;
+    ps_cfg->u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X;
+    ps_cfg->u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y;
+    ps_cfg->e_slice_mode = IVE_SLICE_MODE_NONE;
+    ps_cfg->u4_slice_param = DEFAULT_SLICE_PARAM;
+    ps_cfg->e_arch = ih264e_default_arch();
+    ps_cfg->e_soc = SOC_GENERIC;
+    ps_cfg->u4_disp_wd = MAX_WD;
+    ps_cfg->u4_disp_ht = MAX_HT;
+    ps_cfg->u4_wd = MAX_WD;
+    ps_cfg->u4_ht = MAX_HT;
+    ps_cfg->u4_strd = ALIGN16(MAX_WD);
+    ps_cfg->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE;
+    ps_cfg->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE;
+    ps_cfg->u4_target_bitrate = DEFAULT_BITRATE;
+    ps_cfg->e_frame_type = IV_NA_FRAME;
+    ps_cfg->e_enc_mode = IVE_ENC_MODE_DEFAULT;
+    ps_cfg->u4_i_qp = DEFAULT_I_QP;
+    ps_cfg->u4_p_qp = DEFAULT_P_QP;
+    ps_cfg->u4_b_qp = DEFAULT_B_QP;
+    ps_cfg->u4_i_qp_min = DEFAULT_QP_MIN;
+    ps_cfg->u4_i_qp_max = DEFAULT_QP_MAX;
+    ps_cfg->u4_p_qp_min = DEFAULT_QP_MIN;
+    ps_cfg->u4_p_qp_max = DEFAULT_QP_MAX;
+    ps_cfg->u4_b_qp_min = DEFAULT_QP_MIN;
+    ps_cfg->u4_b_qp_max = DEFAULT_QP_MAX;
+    ps_cfg->e_air_mode = DEFAULT_AIR_MODE;
+    ps_cfg->u4_air_refresh_period = DEFAULT_AIR_REFRESH_PERIOD;
+    ps_cfg->u4_vbv_buffer_delay = DEFAULT_VBV_DELAY;
+    ps_cfg->u4_vbv_buf_size = DEFAULT_VBV_SIZE;
+    ps_cfg->u4_num_cores = DEFAULT_NUM_CORES;
+    ps_cfg->u4_me_speed_preset = DEFAULT_ME_SPEED_PRESET;
+    ps_cfg->u4_enable_hpel = DEFAULT_HPEL;
+    ps_cfg->u4_enable_qpel = DEFAULT_QPEL;
+    ps_cfg->u4_enable_intra_4x4 = DEFAULT_I4;
+    ps_cfg->u4_enable_intra_8x8 = DEFAULT_I8;
+    ps_cfg->u4_enable_intra_16x16 = DEFAULT_I16;
+    ps_cfg->u4_enable_fast_sad = DEFAULT_ENABLE_FAST_SAD;
+    ps_cfg->u4_enable_satqd = DEFAULT_ENABLE_SATQD;
+    ps_cfg->i4_min_sad =
+                    (ps_cfg->u4_enable_satqd == DEFAULT_ENABLE_SATQD) ?
+                                    DEFAULT_MIN_SAD_ENABLE :
+                                    DEFAULT_MIN_SAD_DISABLE;
+    ps_cfg->u4_srch_rng_x = DEFAULT_SRCH_RNG_X;
+    ps_cfg->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y;
+    ps_cfg->u4_i_frm_interval = DEFAULT_I_INTERVAL;
+    ps_cfg->u4_idr_frm_interval = DEFAULT_IDR_INTERVAL;
+    ps_cfg->u4_num_b_frames = DEFAULT_B_FRAMES;
+    ps_cfg->u4_disable_deblock_level = DEFAULT_DISABLE_DEBLK_LEVEL;
+    ps_cfg->e_profile = DEFAULT_PROFILE;
+    ps_cfg->u4_timestamp_low = 0;
+    ps_cfg->u4_timestamp_high = 0;
+    ps_cfg->u4_is_valid = 1;
+    ps_cfg->e_cmd = IVE_CMD_CT_NA;
+    ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4;
+    ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4;
+    ps_cfg->u4_entropy_coding_mode = CAVLC;
+    ps_cfg->u4_weighted_prediction = 0;
+    ps_cfg->u4_constrained_intra_pred = 0;
+    ps_cfg->u4_pic_info_type = 0;
+    ps_cfg->u4_mb_info_type = 0;
+
+    return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Initialize encoder context. This will be called by init_mem_rec and during
+*  codec reset
+*
+* @par Description:
+*  Initializes the context
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_init(codec_t *ps_codec)
+{
+    /* enc config param set */
+    cfg_params_t *ps_cfg = &(ps_codec->s_cfg);
+
+    /* temp var */
+    WORD32 i;
+
+    /* coded pic count */
+    ps_codec->i4_coded_pic_cnt = 0;
+
+    /* Number of API calls to encode are made */
+    ps_codec->i4_encode_api_call_cnt = -1;
+
+    /* Indicates no header has been generated yet */
+    ps_codec->u4_header_generated = 0;
+
+    /* Number of pictures encoded */
+    ps_codec->i4_pic_cnt = -1;
+
+    /* Number of threads created */
+    ps_codec->i4_proc_thread_cnt = 0;
+
+    /* ctl mutex init */
+    ithread_mutex_init(ps_codec->pv_ctl_mutex);
+
+    /* Set encoder chroma format */
+    ps_codec->e_codec_color_format =
+                    (ps_cfg->e_inp_color_fmt == IV_YUV_420SP_VU) ?
+                                    IV_YUV_420SP_VU : IV_YUV_420SP_UV;
+
+    /* Number of continuous frames where deblocking was disabled */
+    ps_codec->i4_disable_deblk_pic_cnt = 0;
+
+    /* frame num */
+    ps_codec->i4_frame_num = -1;
+
+    /* set the current frame type to I frame, since we are going to start  encoding*/
+    ps_codec->force_curr_frame_type = IV_NA_FRAME;
+
+    /* idr_pic_id */
+    ps_codec->i4_idr_pic_id = -1;
+
+    /* Flush mode */
+    ps_codec->i4_flush_mode = 0;
+
+    /* Encode header mode */
+    ps_codec->i4_header_mode = 0;
+
+    /* Encode generate header */
+    ps_codec->i4_gen_header = 0;
+
+    /* To signal successful completion of init */
+    ps_codec->i4_init_done = 1;
+
+    /* To signal that at least one picture was decoded */
+    ps_codec->i4_first_pic_done = 0;
+
+    /* Reset Codec */
+    ps_codec->i4_reset_flag = 0;
+
+    /* Current error code */
+    ps_codec->i4_error_code = IH264E_SUCCESS;
+
+    /* threshold residue */
+    ps_codec->u4_thres_resi = 1;
+
+    /* inter gating enable */
+    ps_codec->u4_inter_gate = 0;
+
+    /* entropy mutex init */
+    ithread_mutex_init(ps_codec->pv_entropy_mutex);
+
+    /* sps id */
+    ps_codec->i4_sps_id = 0;
+
+    /* sps id */
+    ps_codec->i4_pps_id = 0;
+
+    /* Process thread created status */
+    memset(ps_codec->ai4_process_thread_created, 0, MAX_PROCESS_THREADS);
+
+    /* Number of MBs processed together */
+    ps_codec->i4_proc_nmb = 8;
+
+    /* Previous POC msb */
+    ps_codec->i4_prev_poc_msb = 0;
+
+    /* Previous POC lsb */
+    ps_codec->i4_prev_poc_lsb = -1;
+
+    /* max Previous POC lsb */
+    ps_codec->i4_max_prev_poc_lsb = -1;
+
+    /* sps, pps status */
+    {
+        sps_t *ps_sps = ps_codec->ps_sps_base;
+        pps_t *ps_pps = ps_codec->ps_pps_base;
+
+        for (i = 0; i < MAX_SPS_CNT; i++)
+        {
+            ps_sps->i1_sps_valid = 0;
+            ps_sps++;
+        }
+
+        for (i = 0; i < MAX_PPS_CNT; i++)
+        {
+            ps_pps->i1_pps_valid = 0;
+            ps_pps++;
+        }
+    }
+
+    {
+        WORD32 max_mb_rows = ps_cfg->i4_ht_mbs;
+
+        WORD32 num_jobs = max_mb_rows * 2;
+        WORD32 clz;
+
+        /* Use next power of two number of entries*/
+        clz = CLZ(num_jobs);
+        num_jobs = 1 << (32 - clz);
+
+        /* init process jobq */
+        ps_codec->pv_proc_jobq = ih264_list_init(
+                        ps_codec->pv_proc_jobq_buf,
+                        ps_codec->i4_proc_jobq_buf_size, num_jobs,
+                        sizeof(job_t), 10);
+        RETURN_IF((ps_codec->pv_proc_jobq == NULL), IV_FAIL);
+        ih264_list_reset(ps_codec->pv_proc_jobq);
+
+        /* init entropy jobq */
+        ps_codec->pv_entropy_jobq = ih264_list_init(
+                        ps_codec->pv_entropy_jobq_buf,
+                        ps_codec->i4_entropy_jobq_buf_size, num_jobs,
+                        sizeof(job_t), 10);
+        RETURN_IF((ps_codec->pv_entropy_jobq == NULL), IV_FAIL);
+        ih264_list_reset(ps_codec->pv_entropy_jobq);
+    }
+
+    /* Update the jobq context to all the threads */
+    for (i = 0; i < MAX_PROCESS_CTXT; i++)
+    {
+        ps_codec->as_process[i].pv_proc_jobq = ps_codec->pv_proc_jobq;
+        ps_codec->as_process[i].pv_entropy_jobq = ps_codec->pv_entropy_jobq;
+
+        /* i4_id always stays between 0 and MAX_PROCESS_THREADS */
+        ps_codec->as_process[i].i4_id =
+                        (i >= MAX_PROCESS_THREADS) ?
+                                        (i - MAX_PROCESS_THREADS) : i;
+        ps_codec->as_process[i].ps_codec = ps_codec;
+
+        ps_codec->as_process[i].s_entropy.pv_proc_jobq = ps_codec->pv_proc_jobq;
+        ps_codec->as_process[i].s_entropy.pv_entropy_jobq =
+                        ps_codec->pv_entropy_jobq;
+        ps_codec->as_process[i].s_entropy.i4_abs_pic_order_cnt = -1;
+    }
+
+    /* Initialize MV Bank buffer manager */
+    ps_codec->pv_mv_buf_mgr = ih264_buf_mgr_init(ps_codec->pv_mv_buf_mgr_base);
+
+    /* Initialize Picture buffer manager for reference buffers*/
+    ps_codec->pv_ref_buf_mgr = ih264_buf_mgr_init(
+                    ps_codec->pv_ref_buf_mgr_base);
+
+    /* Initialize Picture buffer manager for input buffers*/
+    ps_codec->pv_inp_buf_mgr = ih264_buf_mgr_init(
+                    ps_codec->pv_inp_buf_mgr_base);
+
+    /* Initialize buffer manager for output buffers*/
+    ps_codec->pv_out_buf_mgr = ih264_buf_mgr_init(
+                    ps_codec->pv_out_buf_mgr_base);
+
+    /* buffer cnt in buffer manager */
+    ps_codec->i4_inp_buf_cnt = 0;
+    ps_codec->i4_out_buf_cnt = 0;
+    ps_codec->i4_ref_buf_cnt = 0;
+
+    ps_codec->ps_pic_buf = (pic_buf_t *) ps_codec->pv_pic_buf_base;
+    memset(ps_codec->ps_pic_buf, 0, BUF_MGR_MAX_CNT * sizeof(pic_buf_t));
+
+    /* Initialize dpb manager */
+    ih264_dpb_mgr_init((dpb_mgr_t*) ps_codec->pv_dpb_mgr);
+
+    memset(ps_codec->as_ref_set, 0,
+           sizeof(ref_set_t) * (MAX_DPB_SIZE + MAX_CTXT_SETS));
+    for (i = 0; i < (MAX_DPB_SIZE + MAX_CTXT_SETS); i++)
+    {
+        ps_codec->as_ref_set[i].i4_pic_cnt = -1;
+    }
+
+    /* fn ptr init */
+    ih264e_init_function_ptr(ps_codec);
+
+    /* reset status flags */
+    for (i = 0; i < MAX_CTXT_SETS; i++)
+    {
+        ps_codec->au4_entropy_thread_active[i] = 0;
+        ps_codec->ai4_pic_cnt[i] = -1;
+
+        ps_codec->s_rate_control.pre_encode_skip[i] = 0;
+        ps_codec->s_rate_control.post_encode_skip[i] = 0;
+    }
+
+    ps_codec->s_rate_control.num_intra_in_prev_frame = 0;
+    ps_codec->s_rate_control.i4_avg_activity = 0;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Gets number of memory records required by the codec
+*
+* @par Description:
+*  Gets codec memory requirements
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns  status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_get_num_rec(void *pv_api_ip, void *pv_api_op)
+{
+    UNUSED(pv_api_ip);
+    /* api call I/O structures */
+    ih264e_num_mem_rec_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_num_mem_rec = MEM_REC_CNT;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Fills memory records of the codec
+*
+* @par Description:
+*  Fills codec memory requirements
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
+{
+    /* api call I/O structures */
+    ih264e_fill_mem_rec_ip_t *ps_ip = pv_api_ip;
+    ih264e_fill_mem_rec_op_t *ps_op = pv_api_op;
+
+    /* profile / level info */
+    WORD32 level;
+    WORD32 num_reorder_frames;
+    WORD32 num_ref_frames;
+
+    /* mem records */
+    WORD32 no_of_mem_rec;
+    iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec;
+
+    /* frame dimensions */
+    WORD32 max_wd_luma, max_ht_luma;
+    WORD32 max_mb_rows, max_mb_cols, max_mb_cnt;
+
+    /* temp var */
+    WORD32 i;
+
+    /* error status */
+    IV_STATUS_T status = IV_SUCCESS;
+
+    /* profile / level info */
+    level = ps_ip->s_ive_ip.u4_max_level;
+    num_reorder_frames = ps_ip->s_ive_ip.u4_max_reorder_cnt;
+    num_ref_frames = ps_ip->s_ive_ip.u4_max_ref_cnt;
+
+    /* mem records */
+    ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec;
+    no_of_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec;
+
+    /* frame dimensions */
+    max_ht_luma = ps_ip->s_ive_ip.u4_max_ht;
+    max_wd_luma = ps_ip->s_ive_ip.u4_max_wd;
+    max_ht_luma = ALIGN16(max_ht_luma);
+    max_wd_luma = ALIGN16(max_wd_luma);
+    max_mb_rows = max_ht_luma / MB_SIZE;
+    max_mb_cols = max_wd_luma / MB_SIZE;
+    max_mb_cnt = max_mb_rows * max_mb_cols;
+
+    /* validate params */
+    if ((level < MIN_LEVEL) || (level > MAX_LEVEL))
+    {
+        ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED;
+        level = MAX_LEVEL;
+    }
+
+    if (num_ref_frames > MAX_REF_CNT)
+    {
+        ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
+        num_ref_frames = MAX_REF_CNT;
+    }
+
+    if (num_reorder_frames > MAX_REF_CNT)
+    {
+        ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED;
+        num_reorder_frames = MAX_REF_CNT;
+    }
+
+    /* Set all memory records as persistent and alignment as 128 by default */
+    ps_mem_rec = ps_mem_rec_base;
+    for (i = 0; i < no_of_mem_rec; i++)
+    {
+        ps_mem_rec->u4_mem_alignment = 128;
+        ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+        ps_mem_rec++;
+    }
+
+    /************************************************************************
+     * Request memory for h264 encoder handle                               *
+     ***********************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_IV_OBJ];
+    {
+        ps_mem_rec->u4_mem_size = sizeof(iv_obj_t);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_IV_OBJ, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory for h264 encoder context                              *
+     ***********************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_CODEC];
+    {
+        ps_mem_rec->u4_mem_size = sizeof(codec_t);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CODEC, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  Request memory for entropy context                                  *
+     *  In multi core encoding, each row is assumed to be launched on a     *
+     *  thread. The rows below can only start after its neighbors are coded *
+     *  The status of an mb coded/uncoded is signaled via entropy map.     *
+     *         1. One word32 to store skip run cnt                          *
+     *         2. mb entropy map (mb status entropy coded/uncoded). The size*
+     *            of the entropy map is max mb cols. Further allocate one   *
+     *            more additional row to evade checking for row -1.         *
+     *         3. size of bit stream buffer to store bit stream ctxt.       *
+     *         4. Entropy coding is dependent on nnz coefficient count for  *
+     *            the neighbor blocks. It is sufficient to maintain one row *
+     *            worth of nnz as entropy for lower row waits on entropy map*
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size of skip mb run */
+        total_size += sizeof(WORD32);
+        total_size = ALIGN8(total_size);
+
+        /* size in bytes to store entropy status of an entire frame */
+        total_size += (max_mb_cols * max_mb_rows);
+        /* add an additional 1 row of bytes to evade the special case of row 0 */
+        total_size += max_mb_cols;
+        total_size = ALIGN128(total_size);
+
+        /* size of bit stream buffer */
+        total_size += sizeof(bitstrm_t);
+        total_size = ALIGN128(total_size);
+
+        /* top nnz luma */
+        total_size += (max_mb_cols * 4 * sizeof(UWORD8));
+        total_size = ALIGN128(total_size);
+
+        /* top nnz cbcr */
+        total_size += (max_mb_cols * 4 * sizeof(UWORD8));
+        total_size = ALIGN128(total_size);
+
+        /* total size per each proc ctxt */
+        total_size *= MAX_CTXT_SETS;
+
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  The residue coefficients that needs to be entropy coded are packed  *
+     *  at a buffer space by the proc threads. The entropy thread shall     *
+     *  read from the buffer space, unpack them and encode the same. The    *
+     *  buffer space required to pack a row of mbs are as follows.          *
+     *  Assuming transform_8x8_flag is disabled,                            *
+     *  In the worst case, 1 mb contains 1 dc 4x4 luma sub block, followed  *
+     *  by 16 ac 4x4 luma sub blocks, 2 dc chroma 2x2 sub blocks, followed  *
+     *  by 8 ac 4x4 chroma sub blocks.                                      *
+     *  For the sake of simplicity we assume that all sub blocks are of     *
+     *  type 4x4. The packing of each 4x4 is depicted by the structure      *
+     *  tu_sblk_coeff_data_t                                                *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA];
+    {
+        /* temp var */
+        WORD32 size = 0;
+
+        /* size of coeff data of 1 mb */
+        size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS;
+
+        /* size of coeff data of 1 row of mb's */
+        size *= max_mb_cols;
+
+        /* align to avoid any false sharing across threads */
+        size = ALIGN64(size);
+
+        /* size for one full frame */
+        size *= max_mb_rows;
+
+        /* size of each proc buffer set (ping, pong) */
+        size *= MAX_CTXT_SETS;
+
+        ps_mem_rec->u4_mem_size = size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_COEFF_DATA, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  while encoding an mb, the mb header data is signaled to the entropy*
+     *  thread by writing to a buffer space. the size of header data per mb *
+     *  is assumed to be 40 bytes                                           *
+     *  TODO: revisit this inference                                        *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_HEADER_DATA];
+    {
+        /* temp var */
+        WORD32 size;
+
+        /* size per MB */
+        size = 40;
+
+        /* size for 1 row of mbs */
+        size = size * max_mb_cols;
+
+        /* align to avoid any false sharing across threads */
+        size = ALIGN64(size);
+
+        /* size for one full frame */
+        size *= max_mb_rows;
+
+        /* size of each proc buffer set (ping, pong) */
+        size *= MAX_CTXT_SETS;
+
+        ps_mem_rec->u4_mem_size = size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_HEADER_DATA, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  Size for holding mv_buf_t for each MV Bank.                         *
+     *  Note this allocation is done for BUF_MGR_MAX_CNT instead of         *
+     *  MAX_DPB_SIZE or max_dpb_size for following reasons                  *
+     *  max_dpb_size will be based on max_wd and max_ht                     *
+     *  For higher max_wd and max_ht this number will be smaller than       *
+     *  MAX_DPB_SIZE But during actual initialization number of buffers     *
+     *  allocated can be more.                                              *
+     *                                                                      *
+     *  One extra MV Bank is needed to hold current pics MV bank.           *
+     *  Since this is only a structure allocation and not actual buffer     *
+     *  allocation, it is allocated for BUF_MGR_MAX_CNT entries             *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBANK];
+    {
+        /* max luma samples */
+        WORD32 max_luma_samples = 0;
+
+        /* determine max luma samples */
+        for (i = 0; i < 16; i++)
+            if (level ==(WORD32)gas_ih264_lvl_tbl[i].u4_level_idc)
+                max_luma_samples = gas_ih264_lvl_tbl[i].u4_max_fs
+                                << (BLK_SIZE + BLK_SIZE);
+
+        ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+
+        /************************************************************************
+         * Allocate for pu_map, enc_pu_t and pic_pu_idx for each MV bank        *
+         * Note: Number of luma samples is not max_wd * max_ht here, instead it *
+         * is set to maximum number of luma samples allowed at the given level. *
+         * This is done to ensure that any stream with width and height lesser  *
+         * than max_wd and max_ht is supported. Number of buffers required can  *
+         * be greater for lower width and heights at a given level and this     *
+         * increased number of buffers might require more memory than what      *
+         * max_wd and max_ht buffer would have required Also note one extra     *
+         * buffer is allocated to store current pictures MV bank.                *
+         ***********************************************************************/
+
+        ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(mv_buf_t);
+
+        ps_mem_rec->u4_mem_size += (num_ref_frames + num_reorder_frames
+                        + MAX_CTXT_SETS)
+                        * ih264e_get_pic_mv_bank_size(max_luma_samples);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MVBANK, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  While encoding inter slices, to compute the cost of encoding an mb  *
+     *  with the mv's at hand, we employ the expression cost = sad + lambda *
+     *  x mv_bits. Here mv_bits is the total number of bits taken to represe*
+     *  nt the mv in the stream. The mv bits for all the possible mv are    *
+     *  stored in the look up table. The mem record for this look up table  *
+     *  is given below.                                                     *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBITS];
+    {
+        /* max srch range x */
+        UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
+
+        /* max srch range y */
+        UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
+
+        /* max srch range */
+        UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y);
+
+        /* due to subpel */
+        u4_max_srch_range <<= 2;
+
+        /* due to mv on either direction */
+        u4_max_srch_range = (u4_max_srch_range << 1);
+
+        /* due to pred mv + zero */
+        u4_max_srch_range = (u4_max_srch_range << 1) + 1;
+
+        u4_max_srch_range = ALIGN128(u4_max_srch_range);
+
+        ps_mem_rec->u4_mem_size = u4_max_srch_range;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MVBITS, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory for SPS                                               *
+     ***********************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_SPS];
+    {
+        ps_mem_rec->u4_mem_size = MAX_SPS_CNT * sizeof(sps_t);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SPS, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory for PPS                                               *
+     ***********************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_PPS];
+    {
+        ps_mem_rec->u4_mem_size = MAX_PPS_CNT * sizeof(pps_t);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PPS, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory for Slice Header                                      *
+     ***********************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_HDR];
+    {
+        ps_mem_rec->u4_mem_size = MAX_CTXT_SETS * MAX_SLICE_HDR_CNT
+                        * sizeof(slice_header_t);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SLICE_HDR, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory for Adaptive Intra Refresh                            *
+     ***********************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_AIR_MAP];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* intra coded map */
+        total_size += max_mb_cnt;
+        total_size *= MAX_CTXT_SETS;
+
+        /* mb refresh map */
+        total_size += sizeof(UWORD16) * max_mb_cnt;
+
+        /* alignment */
+        total_size = ALIGN128(total_size);
+
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_AIR_MAP, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  In multi slice encoding, this memory record helps tracking the start*
+     *  of slice with reference to mb.                                      *
+     *  MEM RECORD for holding                                              *
+     *         1. mb slice map                                              *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_MAP];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to slice index of all mbs of a frame */
+        total_size = ALIGN64(max_mb_cnt);
+
+        /* total size per each proc ctxt */
+        total_size *= MAX_CTXT_SETS;
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SLICE_MAP, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory to hold thread handles for each processing thread     *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_THREAD_HANDLE];
+    {
+        WORD32 handle_size = ithread_get_handle_size();
+
+        ps_mem_rec->u4_mem_size = MAX_PROCESS_THREADS * handle_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_THREAD_HANDLE, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory to hold mutex for control calls                       *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_CTL_MUTEX];
+    {
+        ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size();
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CTL_MUTEX, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory to hold mutex for entropy calls                       *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_MUTEX];
+    {
+        ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size();
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY_MUTEX, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory to hold process jobs                                  *
+     ***********************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_JOBQ];
+    {
+        /* One process job per row of MBs */
+        /* Allocate for two pictures, so that wrap around can be handled easily */
+        WORD32 num_jobs = max_mb_rows * 2;
+
+        WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
+
+        ps_mem_rec->u4_mem_size = job_queue_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_JOBQ, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory to hold entropy jobs                                  *
+     ***********************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_JOBQ];
+    {
+        /* One process job per row of MBs */
+        /* Allocate for two pictures, so that wrap around can be handled easily */
+        WORD32 num_jobs = max_mb_rows * 2;
+
+        WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
+
+        ps_mem_rec->u4_mem_size = job_queue_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY_JOBQ, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  In multi core encoding, each row is assumed to be launched on a     *
+     *  thread. The rows below can only start after its neighbors are coded *
+     *  The status of an mb coded/uncoded is signaled via proc map.        *
+     *  MEM RECORD for holding                                              *
+     *         1. mb proc map (mb status core coded/uncoded)                *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_MAP];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to mb core coding status of an entire frame */
+        total_size = max_mb_cnt;
+
+        /* add an additional 1 row of bytes to evade the special case of row 0 */
+        total_size += max_mb_cols;
+
+        /* total size per each proc ctxt */
+        total_size *= MAX_CTXT_SETS;
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_MAP, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  mem record for holding a particular MB is deblocked or not          *
+     *         1. mb deblk map (mb status deblocked/not deblocked)          *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_DBLK_MAP];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to mb core coding status of an entire frame */
+        total_size = max_mb_cnt;
+
+        /* add an additional 1 row of bytes to evade the special case of row 0 */
+        total_size += max_mb_cols;
+
+        total_size = ALIGN64(total_size);
+
+        /* total size per each proc ctxt */
+        total_size *= MAX_CTXT_SETS;
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_DBLK_MAP, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  mem record for holding a particular MB's me is done or not          *
+     *         1. mb me map                                                 *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_ME_MAP];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to mb core coding status of an entire frame */
+        total_size = max_mb_cnt;
+
+        /* add an additional 1 row of bytes to evade the special case of row 0 */
+        total_size += max_mb_cols;
+
+        /* total size per each proc ctxt */
+        total_size *= MAX_CTXT_SETS;
+
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ME_MAP, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * size for holding dpb manager context                                 *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_DPB_MGR];
+    {
+        ps_mem_rec->u4_mem_size = sizeof(dpb_mgr_t);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_DPB_MGR, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  luma or chroma core coding involves mb estimation, error computation*
+     *  between the estimated singnal and the actual signal, transform the  *
+     *  error, quantize the error, then inverse transform and inverse quant *
+     *  ize the residue and add the result back to estimated signal.        *
+     *  To perform all these, a set of temporary buffers are needed.        *
+     *  MEM RECORD for holding scratch buffers                              *
+     *         1. prediction buffer used during mb mode analysis            *
+     *         2  temp. reference buffer when intra 4x4 with rdopt on is    *
+     *            enabled                                                   *
+     *            - when intra 4x4 is enabled, rdopt is on, to store the    *
+     *            reconstructed values and use them later this temp. buffer *
+     *            is used.                                                  *
+     *         3. prediction buffer used during intra mode analysis         *
+     *         4. prediction buffer used during intra 16x16 plane mode      *
+     *            analysis
+     *         5. prediction buffer used during intra chroma mode analysis  *
+     *         6. prediction buffer used during intra chroma 16x16 plane    *
+     *            mode analysis
+     *         7. forward transform output buffer                           *
+     *            - to store the error between estimated and the actual inp *
+     *              ut and to store the fwd transformed quantized output    *
+     *         8. forward transform output buffer                           *
+     *            - when intra 4x4 is enabled, rdopt is on, to store the    *
+     *            fwd transform values and use them later this temp. buffer *
+     *            is used.                                                  *
+     *         9. temporary buffer for inverse transform                    *
+     *            - temporary buffer used in inverse transform and inverse  *
+     *              quantization                                            *
+     *         A. Buffers for holding half_x , half_y and half_xy planes    *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH];
+    {
+        WORD32 total_size = 0;
+
+        /* size to hold prediction buffer */
+        total_size += sizeof(UWORD8) * 16 * 16;
+        total_size = ALIGN64(total_size);
+
+        /* size to hold recon for intra 4x4 buffer */
+        total_size += sizeof(UWORD8) * 16 * 16;
+        total_size = ALIGN64(total_size);
+
+        /* prediction buffer intra 16x16 */
+        total_size += sizeof(UWORD8) * 16 * 16;
+        total_size = ALIGN64(total_size);
+
+        /* prediction buffer intra 16x16 plane*/
+        total_size += sizeof(UWORD8) * 16 * 16;
+        total_size = ALIGN64(total_size);
+
+        /* prediction buffer intra chroma*/
+        total_size += sizeof(UWORD8) * 16 * 8;
+        total_size = ALIGN64(total_size);
+
+        /* prediction buffer intra chroma plane*/
+        total_size += sizeof(UWORD8) * 16 * 8;
+        total_size = ALIGN64(total_size);
+
+        /* size to hold fwd transform output */
+        total_size += sizeof(WORD16) * SIZE_TRANS_BUFF;
+        total_size = ALIGN64(total_size);
+
+        /* size to hold fwd transform output */
+        total_size += sizeof(WORD16) * SIZE_TRANS_BUFF;
+        total_size = ALIGN64(total_size);
+
+        /* size to hold temporary data during inverse transform */
+        total_size += sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS;
+        total_size = ALIGN64(total_size);
+
+        /* Buffers for holding half_x , half_y and half_xy planes */
+        total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+        total_size = ALIGN64(total_size);
+
+        total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+        total_size = ALIGN64(total_size);
+
+        total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+        total_size = ALIGN64(total_size);
+
+        /* Allocate for each process thread */
+        total_size *= MAX_PROCESS_CTXT;
+
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_SCRATCH, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  When transform_8x8_flag is disabled, the size of a sub block is     *
+     *  4x4 and when the transform_8x8_flag is enabled the size of the sub  *
+     *  block is 8x8. The threshold matrix and the forward scaling list     *
+     *  is of the size of the sub block.                                    *
+     *  MEM RECORD for holding                                              *
+     *         1. quantization parameters for plane y, cb, cr               *
+     *            - threshold matrix for quantization                       *
+     *            - forward weight matrix                                   *
+     *            - satqd threshold matrix                                  *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_QUANT_PARAM];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* quantization parameter list for planes y,cb and cr */
+        total_size += ALIGN64(sizeof(quant_params_t)) * 3;
+
+        /* size of threshold matrix for quantization
+         * (assuming the transform_8x8_flag is disabled).
+         * for all 3 planes */
+        total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3;
+
+        /* size of forward weight matrix for quantization
+         * (assuming the transform_8x8_flag is disabled).
+         * for all 3 planes */
+        total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3;
+
+        /* Size for SATDQ threshold matrix for palnes y, cb and cr */
+        total_size += ALIGN64(sizeof(UWORD16) * 9) * 3;
+
+        /* total size per each proc thread */
+        total_size *= MAX_PROCESS_CTXT;
+
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_QUANT_PARAM, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  While computing blocking strength for the current mb, the csbp, mb  *
+     *  type for the neighboring mbs are necessary. memtab for storing top  *
+     *  row mbtype and csbp is evaluated here.                              *
+     *                                                                      *
+     *  when encoding intra 4x4 or intra 8x8 the submb types are estimated  *
+     *  and sent. The estimation is dependent on neighbor mbs. For this     *
+     *  store the top row sub mb types for intra mbs                        *
+     *                                                                      *
+     *  During motion vector prediction, the curr mb mv is predicted from   *
+     *  neigbors left, top, top right and sometimes top left depending on   *
+     *  the availability. The top and top right content is accessed from    *
+     *  the memtab specified below.                                         *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_TOP_ROW_SYN_INFO];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to store  1 row of mb_info_t */
+        /* one additional mb, to avoid checking end of row condition */
+        total_size += (max_mb_cols + 1) * sizeof(mb_info_t);
+
+        /* size in bytes to store  1 row of intra macroblock sub modes */
+        total_size += max_mb_cols * sizeof(UWORD8) * 16;
+
+        /* size in bytes to store  1 row + 1 of enc_pu_t */
+        /* one additional mb, to avoid checking end of row condition */
+        total_size += (max_mb_cols + 1) * sizeof(enc_pu_t);
+
+        /* total size per proc ctxt */
+        total_size = ALIGN128(total_size);
+
+        /* total size per each proc ctxt */
+        total_size *= MAX_CTXT_SETS;
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_TOP_ROW_SYN_INFO, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  When transform_8x8_flag is disabled, the mb is partitioned into     *
+     *  4 sub blocks. This corresponds to 1 vertical left edge and 1        *
+     *  vertical inner edge, 1 horizontal top edge and 1 horizontal         *
+     *  inner edge per mb. Further, When transform_8x8_flag is enabled,     *
+     *  the mb is partitioned in to 16 sub blocks. This corresponds to      *
+     *  1 vertical left edge and 3 vertical inner edges, 1 horizontal top   *
+     *  edge and 3 horizontal inner edges per mb.                           *
+     *  MEM RECORD for holding                                              *
+     *         1. vertical edge blocking strength                           *
+     *         2. horizontal edge blocking strength                         *
+     *         3. mb qp                                                     *
+     *         all are frame level                                          *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_BS_QP];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to store vertical edge bs, horizontal edge bs and qp of every mb*/
+        WORD32 vert_bs_size, horz_bs_size, qp_size;
+
+        /* vertical edge bs = total number of vertical edges * number of bytes per each edge */
+        /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0),
+         * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+        vert_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+        /* horizontal edge bs = total number of horizontal edges * number of bytes per each edge */
+        /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0),
+         * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+        horz_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+        /* qp of each mb requires 1 byte */
+        qp_size = ALIGN64(max_mb_cnt);
+
+        /* total size */
+        total_size = vert_bs_size + horz_bs_size + qp_size;
+
+        /* total size per each proc ctxt */
+        total_size *= MAX_CTXT_SETS;
+
+        ps_mem_rec->u4_mem_size = total_size;
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_BS_QP, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * size for holding dpb manager context                                 *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_INP_PIC];
+    {
+        ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_INP_PIC, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * size for holding dpb manager context                                 *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_OUT];
+    {
+        ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_OUT, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Size for color space conversion                                      *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_CSC];
+    {
+        /* We need a total a memory for a single frame of 420 sp, ie
+         * (wd * ht) for luma and (wd * ht / 2) for chroma*/
+        ps_mem_rec->u4_mem_size = MAX_CTXT_SETS
+                        * ((3 * max_ht_luma * max_wd_luma) >> 1);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CSC, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     *  Size for holding pic_buf_t for each reference picture               *
+     *  Note this allocation is done for BUF_MGR_MAX_CNT instead of         *
+     *  MAX_DPB_SIZE or max_dpb_size for following reasons                  *
+     *  max_dpb_size will be based on max_wd and max_ht                     *
+     *  For higher max_wd and max_ht this number will be smaller than       *
+     *  MAX_DPB_SIZE But during actual initialization number of buffers     *
+     *  allocated can be more.                                              *
+     *                                                                      *
+     *  Also to handle display depth application can allocate more than     *
+     *  what codec asks for in case of non-shared mode                      *
+     *  Since this is only a structure allocation and not actual buffer     *
+     *  allocation, it is allocated for BUF_MGR_MAX_CNT entries             *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_REF_PIC];
+    {
+        ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+        ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
+
+        /************************************************************************
+         * Note: Number of luma samples is not max_wd * max_ht here, instead it *
+         * is set to maximum number of luma samples allowed at the given level. *
+         * This is done to ensure that any stream with width and height lesser  *
+         * than max_wd and max_ht is supported. Number of buffers required can  *
+         * be greater for lower width and heights at a given level and this     *
+         * increased number of buffers might require more memory than what      *
+         * max_wd and max_ht buffer would have required. Number of buffers is   *
+         * doubled in order to return one frame at a time instead of sending    *
+         * multiple outputs during dpb full case. Also note one extra buffer is *
+         * allocted to store current picture.                                   *
+         *                                                                      *
+         * Half-pel planes for each reference buffer are allocated along with   *
+         * the reference buffer. So each reference buffer is 4 times the        *
+         * required size. This way buffer management for the half-pel planes is *
+         * easier and while using the half-pel planes in MC, an offset can be   *
+         * used from a single pointer                                           *
+         ***********************************************************************/
+        ps_mem_rec->u4_mem_size += HPEL_PLANES_CNT
+                        * ih264e_get_total_pic_buf_size(
+                                        max_wd_luma * max_ht_luma, level,
+                                        PAD_WD, PAD_HT, num_ref_frames,
+                                        num_reorder_frames);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_REF_PIC, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * Request memory to hold mem recs to be returned during retrieve call  *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_BACKUP];
+    {
+        ps_mem_rec->u4_mem_size = MEM_REC_CNT * sizeof(iv_mem_rec_t);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_BACKUP, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * size for memory required by NMB info structs and buffer for storing  *
+     * half pel plane                                                       *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB];
+    {
+        ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * MAX_NMB
+                        * (sizeof(mb_info_nmb_t)
+                                        + MB_SIZE * MB_SIZE * sizeof(UWORD8));
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_INFO_NMB, ps_mem_rec->u4_mem_size);
+
+    /************************************************************************
+     * RC mem records                                                       *
+     ************************************************************************/
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_RC];
+    {
+        ih264e_get_rate_control_mem_tab(NULL, ps_mem_rec, FILL_MEMTAB);
+    }
+    DEBUG("\nMemory record Id %d = %d \n", MEM_REC_RC, ps_mem_rec->u4_mem_size);
+
+    /* Each memtab size is aligned to next multiple of 128 bytes */
+    /* This is to ensure all the memtabs start at different cache lines */
+    ps_mem_rec = ps_mem_rec_base;
+    for (i = 0; i < MEM_REC_CNT; i++)
+    {
+        ps_mem_rec->u4_mem_size = ALIGN128(ps_mem_rec->u4_mem_size);
+        ps_mem_rec++;
+    }
+
+    ps_op->s_ive_op.u4_num_mem_rec = MEM_REC_CNT;
+
+    DEBUG("Num mem recs in fill call : %d\n", ps_op->s_ive_op.u4_num_mem_rec);
+
+    return (status);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Initializes from mem records passed to the codec
+*
+* @par Description:
+*  Initializes pointers based on mem records passed
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
+                                  void *pv_api_ip,
+                                  void *pv_api_op)
+{
+    /* api call I/O structures */
+    ih264e_init_ip_t *ps_ip = pv_api_ip;
+    ih264e_init_op_t *ps_op = pv_api_op;
+
+    /* mem records */
+    iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec;
+
+    /* codec variables */
+    codec_t * ps_codec;
+    cfg_params_t *ps_cfg;
+
+    /* frame dimensions */
+    WORD32 max_wd_luma, max_ht_luma;
+    WORD32 max_mb_rows, max_mb_cols, max_mb_cnt;
+
+    /* temp var */
+    WORD32 i;
+    WORD32 status = IV_SUCCESS;
+
+    /* frame dimensions */
+    max_ht_luma = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+    max_wd_luma = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+    max_mb_rows = max_ht_luma / MB_SIZE;
+    max_mb_cols = max_wd_luma / MB_SIZE;
+    max_mb_cnt = max_mb_rows * max_mb_cols;
+
+    /* mem records */
+    ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec;
+
+    /* Init mem records */
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_CODEC];
+    {
+        ps_codec_obj->pv_codec_handle = ps_mem_rec->pv_base;
+        ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle);
+    }
+
+    /* Note this memset can not be done in init() call, since init will called
+     during reset as well. And calling this during reset will mean all pointers
+     need to reinitialized */
+    memset(ps_codec, 0, sizeof(codec_t));
+
+    /* Set default Config Params */
+    ps_cfg = &ps_codec->s_cfg;
+    ih264e_set_default_params(ps_cfg);
+
+    /* Update config params as per input */
+    ps_cfg->u4_max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+    ps_cfg->u4_max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+    ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4;
+    ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4;
+    ps_cfg->u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt;
+    ps_cfg->u4_max_reorder_cnt = ps_ip->s_ive_ip.u4_max_reorder_cnt;
+    ps_cfg->u4_max_level = ps_ip->s_ive_ip.u4_max_level;
+    ps_cfg->e_inp_color_fmt = ps_ip->s_ive_ip.e_inp_color_fmt;
+    ps_cfg->e_recon_color_fmt = ps_ip->s_ive_ip.e_recon_color_fmt;
+    ps_cfg->u4_max_framerate = ps_ip->s_ive_ip.u4_max_framerate;
+    ps_cfg->u4_max_bitrate = ps_ip->s_ive_ip.u4_max_bitrate;
+    ps_cfg->u4_max_num_bframes = ps_ip->s_ive_ip.u4_max_num_bframes;
+    ps_cfg->e_content_type = ps_ip->s_ive_ip.e_content_type;
+    ps_cfg->u4_max_srch_rng_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
+    ps_cfg->u4_max_srch_rng_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
+    ps_cfg->e_slice_mode = ps_ip->s_ive_ip.e_slice_mode;
+    ps_cfg->u4_slice_param = ps_ip->s_ive_ip.u4_slice_param;
+    ps_cfg->e_arch = ps_ip->s_ive_ip.e_arch;
+    ps_cfg->e_soc = ps_ip->s_ive_ip.e_soc;
+    ps_cfg->u4_enable_recon = ps_ip->s_ive_ip.u4_enable_recon;
+    ps_cfg->e_rc_mode = ps_ip->s_ive_ip.e_rc_mode;
+
+    /* Validate params */
+    if ((ps_ip->s_ive_ip.u4_max_level < MIN_LEVEL)
+                    || (ps_ip->s_ive_ip.u4_max_level > MAX_LEVEL))
+    {
+        ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED;
+        ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL;
+    }
+
+    if (ps_ip->s_ive_ip.u4_max_ref_cnt > MAX_REF_CNT)
+    {
+        ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
+        ps_cfg->u4_max_ref_cnt = MAX_REF_CNT;
+    }
+
+    if (ps_ip->s_ive_ip.u4_max_reorder_cnt > MAX_REF_CNT)
+    {
+        ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED;
+        ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_BACKUP];
+    {
+        ps_codec->ps_mem_rec_backup = (iv_mem_rec_t *) ps_mem_rec->pv_base;
+
+        memcpy(ps_codec->ps_mem_rec_backup, ps_mem_rec_base,
+               MEM_REC_CNT * sizeof(iv_mem_rec_t));
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY];
+    {
+        /* temp var */
+        WORD32 size = 0, offset;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                /* base ptr */
+                UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+                /* reset size */
+                size = 0;
+
+                /* skip mb run */
+                ps_codec->as_process[i].s_entropy.pi4_mb_skip_run =
+                                (void *) (pu1_buf + size);
+                size += sizeof(WORD32);
+                size = ALIGN8(size);
+
+                /* entropy map */
+                ps_codec->as_process[i].s_entropy.pu1_entropy_map =
+                                (void *) (pu1_buf + size + max_mb_cols);
+                /* size in bytes to store entropy status of an entire frame */
+                size += (max_mb_cols * max_mb_rows);
+                /* add an additional 1 row of bytes to evade the special case of row 0 */
+                size += max_mb_cols;
+                size = ALIGN128(size);
+
+                /* bit stream ptr */
+                ps_codec->as_process[i].s_entropy.ps_bitstrm = (void *) (pu1_buf
+                                + size);
+                size += sizeof(bitstrm_t);
+                size = ALIGN128(size);
+
+                /* nnz luma */
+                ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma =
+                                (void *) (pu1_buf + size);
+                size += (max_mb_cols * 4 * sizeof(UWORD8));
+                size = ALIGN128(size);
+
+                /* nnz chroma */
+                ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr =
+                                (void *) (pu1_buf + size);
+                size += (max_mb_cols * 4 * sizeof(UWORD8));
+                size = ALIGN128(size);
+                offset = size;
+            }
+            else
+            {
+                /* base ptr */
+                UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+                /* reset size */
+                size = offset;
+
+                /* skip mb run */
+                ps_codec->as_process[i].s_entropy.pi4_mb_skip_run =
+                                (void *) (pu1_buf + size);
+                size += sizeof(WORD32);
+                size = ALIGN8(size);
+
+                /* entropy map */
+                ps_codec->as_process[i].s_entropy.pu1_entropy_map =
+                                (void *) (pu1_buf + size + max_mb_cols);
+                /* size in bytes to store entropy status of an entire frame */
+                size += (max_mb_cols * max_mb_rows);
+                /* add an additional 1 row of bytes to evade the special case of row 0 */
+                size += max_mb_cols;
+                size = ALIGN128(size);
+
+                /* bit stream ptr */
+                ps_codec->as_process[i].s_entropy.ps_bitstrm = (void *) (pu1_buf
+                                + size);
+                size += sizeof(bitstrm_t);
+                size = ALIGN128(size);
+
+                /* nnz luma */
+                ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma =
+                                (void *) (pu1_buf + size);
+                size += (max_mb_cols * 4 * sizeof(UWORD8));
+                size = ALIGN128(size);
+
+                /* nnz chroma */
+                ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr =
+                                (void *) (pu1_buf + size);
+                size += (max_mb_cols * 4 * sizeof(UWORD8));
+                size = ALIGN128(size);
+           }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA];
+    {
+        /* temp var */
+        WORD32 size = 0, size_of_row;
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        /* size of coeff data of 1 mb */
+        size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS;
+
+        /* size of coeff data of 1 row of mb's */
+        size *= max_mb_cols;
+
+        /* align to avoid false sharing */
+        size = ALIGN64(size);
+        size_of_row = size;
+
+        /* size for one full frame */
+        size *= max_mb_rows;
+
+        ps_codec->u4_size_coeff_data = size_of_row;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf;
+                ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data =
+                                pu1_buf;
+            }
+            else
+            {
+                ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf + size;
+                ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = pu1_buf
+                                + size;
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_HEADER_DATA];
+    {
+        /* temp var */
+        WORD32 size, size_of_row;
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        /* size of header data of 1 mb */
+        size = 40;
+
+        /* size for 1 row of mbs */
+        size = size * max_mb_cols;
+
+        /* align to avoid any false sharing across threads */
+        size = ALIGN64(size);
+        size_of_row = size;
+
+        /* size for one full frame */
+        size *= max_mb_rows;
+
+        ps_codec->u4_size_header_data = size_of_row;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf;
+                ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data =
+                                pu1_buf;
+            }
+            else
+            {
+                ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf + size;
+                ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data =
+                                pu1_buf + size;
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBANK];
+    {
+        /* size of buf mgr struct */
+        WORD32 size = ih264_buf_mgr_size();
+
+        /* temp var */
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        /* mv buffer mgr */
+        ps_codec->pv_mv_buf_mgr_base = pu1_buf;
+
+        /* mv bank */
+        ps_codec->pv_mv_bank_buf_base = pu1_buf + size;
+        ps_codec->i4_total_mv_bank_size = ps_mem_rec->u4_mem_size - size;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBITS];
+    {
+        /* max srch range x */
+        UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
+
+        /* max srch range y */
+        UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
+
+        /* max srch range */
+        UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y);
+
+        /* temp var */
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        /* due to subpel */
+        u4_max_srch_range <<= 2;
+
+//        /* due to mv on either direction */
+//        u4_max_srch_range = (u4_max_srch_range << 1);
+
+        /* due to pred mv + zero */
+        u4_max_srch_range = (u4_max_srch_range << 1) + 1;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            /* me ctxt */
+            me_ctxt_t *ps_mem_ctxt = &(ps_codec->as_process[i].s_me_ctxt);
+
+            /* init at zero mv */
+            ps_mem_ctxt->pu1_mv_bits = pu1_buf + u4_max_srch_range;
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_SPS];
+    {
+        ps_codec->ps_sps_base = (sps_t *) ps_mem_rec->pv_base;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_PPS];
+    {
+        ps_codec->ps_pps_base = (pps_t *) ps_mem_rec->pv_base;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_HDR];
+    {
+        ps_codec->ps_slice_hdr_base = ps_mem_rec->pv_base;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base;
+            }
+            else
+            {
+                /* temp var */
+                WORD32 size = MAX_SLICE_HDR_CNT * sizeof(slice_header_t);
+                void *pv_buf = (UWORD8 *) ps_mem_rec->pv_base + size;
+
+                ps_codec->as_process[i].ps_slice_hdr_base = pv_buf;
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_AIR_MAP];
+    {
+        /* temp var */
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf;
+            }
+            else
+            {
+                ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf
+                                + max_mb_cnt;
+            }
+        }
+
+        ps_codec->pu2_intr_rfrsh_map = (UWORD16 *) (pu1_buf + max_mb_cnt * 2);
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_MAP];
+    {
+        /* pointer to storage space */
+        UWORD8 *pu1_buf_ping, *pu1_buf_pong;
+
+        /* init pointer */
+        pu1_buf_ping = ps_mem_rec->pv_base;
+        pu1_buf_pong = pu1_buf_ping + ALIGN64(max_mb_cnt);
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping;
+            }
+            else
+            {
+                ps_codec->as_process[i].pu1_slice_idx = pu1_buf_pong;
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_THREAD_HANDLE];
+    {
+        WORD32 handle_size = ithread_get_handle_size();
+
+        for (i = 0; i < MAX_PROCESS_THREADS; i++)
+        {
+            ps_codec->apv_proc_thread_handle[i] = (UWORD8 *) ps_mem_rec->pv_base
+                            + (i * handle_size);
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_CTL_MUTEX];
+    {
+        ps_codec->pv_ctl_mutex = ps_mem_rec->pv_base;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_MUTEX];
+    {
+        ps_codec->pv_entropy_mutex = ps_mem_rec->pv_base;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_JOBQ];
+    {
+        ps_codec->pv_proc_jobq_buf = ps_mem_rec->pv_base;
+        ps_codec->i4_proc_jobq_buf_size = ps_mem_rec->u4_mem_size;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_JOBQ];
+    {
+        ps_codec->pv_entropy_jobq_buf = ps_mem_rec->pv_base;
+        ps_codec->i4_entropy_jobq_buf_size = ps_mem_rec->u4_mem_size;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_MAP];
+    {
+        /* pointer to storage space */
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to mb core coding status of an entire frame */
+        total_size = max_mb_cnt;
+
+        /* add an additional 1 row of bytes to evade the special case of row 0 */
+        total_size += max_mb_cols;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols;
+            }
+            else
+            {
+                ps_codec->as_process[i].pu1_proc_map = pu1_buf + total_size
+                                + max_mb_cols;
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_DBLK_MAP];
+    {
+        /* pointer to storage space */
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to mb core coding status of an entire frame */
+        total_size = max_mb_cnt;
+
+        /* add an additional 1 row of bytes to evade the special case of row 0 */
+        total_size += max_mb_cols;
+
+        /*Align the memory offsets*/
+        total_size = ALIGN64(total_size);
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols;
+
+            }
+            else
+            {
+                ps_codec->as_process[i].pu1_deblk_map = pu1_buf + total_size
+                                + max_mb_cols;
+
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_ME_MAP];
+    {
+        /* pointer to storage space */
+        UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to mb core coding status of an entire frame */
+        total_size = max_mb_cnt;
+
+        /* add an additional 1 row of bytes to evade the special case of row 0 */
+        total_size += max_mb_cols;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols;
+            }
+            else
+            {
+                ps_codec->as_process[i].pu1_me_map = pu1_buf + total_size
+                                + max_mb_cols;
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_DPB_MGR];
+    {
+        ps_codec->pv_dpb_mgr = ps_mem_rec->pv_base;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH];
+    {
+        /* pointer to storage space */
+        UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+        /* size of pred buffer, fwd transform output, temp buffer for inv tra */
+        WORD32 size_pred_luma, size_pred_chroma, size_fwd, size_inv, size_hp;
+
+        /* temp var */
+        WORD32 size = 0;
+
+        /* size to hold intra/inter prediction buffer */
+        size_pred_luma = sizeof(UWORD8) * 16 * 16;
+        size_pred_chroma = sizeof(UWORD8) * 8 * 16;
+
+        /* size to hold fwd transform output */
+        size_fwd = sizeof(WORD16) * SIZE_TRANS_BUFF;
+
+        /* size to hold temporary data during inverse transform */
+        size_inv = sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS;
+
+        /* size to hold half pel plane buffers */
+        size_hp = sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            /* prediction buffer */
+            ps_codec->as_process[i].pu1_pred_mb = (void *) (pu1_buf + size);
+            ps_codec->as_process[i].i4_pred_strd = 16;
+            size += size_pred_luma;
+            size = ALIGN64(size);
+
+            /* prediction buffer */
+            ps_codec->as_process[i].pu1_ref_mb_intra_4x4 = (void *) (pu1_buf
+                            + size);
+            size += size_pred_luma;
+            size = ALIGN64(size);
+
+            /* prediction buffer intra 16x16 */
+            ps_codec->as_process[i].pu1_pred_mb_intra_16x16 = (void *) (pu1_buf
+                            + size);
+            size += size_pred_luma;
+            size = ALIGN64(size);
+
+            /* prediction buffer intra 16x16 plane*/
+            ps_codec->as_process[i].pu1_pred_mb_intra_16x16_plane =
+                            (void *) (pu1_buf + size);
+            size += size_pred_luma;
+            size = ALIGN64(size);
+
+            /* prediction buffer intra chroma*/
+            ps_codec->as_process[i].pu1_pred_mb_intra_chroma = (void *) (pu1_buf
+                            + size);
+            size += size_pred_chroma;
+            size = ALIGN64(size);
+
+            /* prediction buffer intra chroma plane*/
+            ps_codec->as_process[i].pu1_pred_mb_intra_chroma_plane =
+                            (void *) (pu1_buf + size);
+            size += size_pred_chroma;
+            size = ALIGN64(size);
+
+            /* Fwd transform output */
+            ps_codec->as_process[i].pi2_res_buf = (void *) (pu1_buf + size);
+            ps_codec->as_process[i].i4_res_strd = 16;
+            size += size_fwd;
+            size = ALIGN64(size);
+
+            /* Fwd transform output */
+            ps_codec->as_process[i].pi2_res_buf_intra_4x4 = (void *) (pu1_buf
+                            + size);
+            size += size_fwd;
+            size = ALIGN64(size);
+
+            /* scratch buffer used during inverse transform */
+            ps_codec->as_process[i].pv_scratch_buff = (void *) (pu1_buf + size);
+            size += size_inv;
+            size = ALIGN64(size);
+
+            /* Buffers for holding half_x , half_y and half_xy values */
+            ps_codec->as_process[i].pu1_half_x = (void *) (pu1_buf + size);
+            size += size_hp;
+            size = ALIGN64(size);
+
+            ps_codec->as_process[i].pu1_half_y = (void *) (pu1_buf + size);
+            size += size_hp;
+            size = ALIGN64(size);
+
+            ps_codec->as_process[i].pu1_half_xy = (void *) (pu1_buf + size);
+            size += size_hp;
+            size = ALIGN64(size);
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_QUANT_PARAM];
+    {
+        /* pointer to storage space */
+        UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+        /* size of qp, threshold matrix, fwd scaling list for one plane */
+        WORD32 size_quant_param, size_thres_mat, size_fwd_weight_mat,
+                        size_satqd_weight_mat;
+
+        /* temp var */
+        WORD32 total_size = 0;
+
+        /* size of quantization parameter list of 1 plane */
+        size_quant_param = ALIGN64(sizeof(quant_params_t));
+
+        /* size of threshold matrix for quantization
+         * (assuming the transform_8x8_flag is disabled).
+         * for 1 plane */
+        size_thres_mat = ALIGN64(sizeof(WORD16) * 4 * 4);
+
+        /* size of forward weight matrix for quantization
+         * (assuming the transform_8x8_flag is disabled).
+         * for 1 plane */
+        size_fwd_weight_mat = ALIGN64(sizeof(WORD16) * 4 * 4);
+
+        /* size of SATQD matrix*/
+        size_satqd_weight_mat = ALIGN64(sizeof(UWORD16) * 9);
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            quant_params_t **ps_qp_params = ps_codec->as_process[i].ps_qp_params;
+
+            /* quantization param structure */
+            ps_qp_params[0] = (quant_params_t *) (pu1_buf + total_size);
+            total_size = total_size + size_quant_param;
+            ps_qp_params[1] = (quant_params_t *) (pu1_buf + total_size);
+            total_size = total_size + size_quant_param;
+            ps_qp_params[2] = (quant_params_t *) (pu1_buf + total_size);
+            total_size = total_size + size_quant_param;
+
+            /* threshold matrix for quantization */
+            ps_qp_params[0]->pu2_thres_mat = (void *) (pu1_buf + total_size);
+            total_size = total_size + size_thres_mat;
+            ps_qp_params[1]->pu2_thres_mat = (void *) (pu1_buf + total_size);
+            total_size = total_size + size_thres_mat;
+            ps_qp_params[2]->pu2_thres_mat = (void *) (pu1_buf + total_size);
+            total_size = total_size + size_thres_mat;
+
+            /* fwd weight matrix */
+            ps_qp_params[0]->pu2_weigh_mat = (void *) (pu1_buf + total_size);
+            total_size = total_size + size_fwd_weight_mat;
+            ps_qp_params[1]->pu2_weigh_mat = (void *) (pu1_buf + total_size);
+            total_size = total_size + size_fwd_weight_mat;
+            ps_qp_params[2]->pu2_weigh_mat = (void *) (pu1_buf + total_size);
+            total_size = total_size + size_fwd_weight_mat;
+
+            /* threshold matrix for SATQD */
+            ps_qp_params[0]->pu2_sad_thrsh = (void *) (pu1_buf + total_size);
+            total_size = total_size + size_satqd_weight_mat;
+            ps_qp_params[1]->pu2_sad_thrsh = (void *) (pu1_buf + total_size);
+            total_size = total_size + size_satqd_weight_mat;
+            ps_qp_params[2]->pu2_sad_thrsh = (void *) (pu1_buf + total_size);
+            total_size = total_size + size_satqd_weight_mat;
+
+            total_size = ALIGN128(total_size);
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_TOP_ROW_SYN_INFO];
+    {
+        /* total size of the mem record */
+        WORD32 total_size = 0, size_csbp, size_intra_modes, size_mv;
+
+        /* pointer to buffer */
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        /* size in bytes to store  1 row of mb_info_t */
+        /* one additional mb, to avoid checking end of row condition */
+        size_csbp = (max_mb_cols + 1) * sizeof(mb_info_t);
+
+        /* size in bytes to store  1 row of intra macroblock sub modes */
+        size_intra_modes = max_mb_cols * sizeof(UWORD8) * 16;
+
+        /* size in bytes to store  1 row + 1 of enc_pu_t */
+        /* one additional mb, to avoid checking end of row condition */
+        size_mv = (max_mb_cols + 1) * sizeof(enc_pu_t);
+
+        /* total size per proc ctxt */
+        total_size = size_csbp + size_intra_modes + size_mv;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base =
+                                (mb_info_t *) pu1_buf;
+                ps_codec->as_process[i].pu1_top_mb_intra_modes_base = pu1_buf
+                                + size_csbp;
+                ps_codec->as_process[i].ps_top_row_pu_base =
+                                (enc_pu_t *) (pu1_buf + size_csbp
+                                                + size_intra_modes);
+            }
+            else
+            {
+                ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base =
+                                (mb_info_t *) (pu1_buf + total_size);
+                ps_codec->as_process[i].pu1_top_mb_intra_modes_base = pu1_buf
+                                + total_size + size_csbp;
+                ps_codec->as_process[i].ps_top_row_pu_base =
+                                (enc_pu_t *) (pu1_buf + total_size + size_csbp
+                                                + size_intra_modes);
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_BS_QP];
+    {
+        UWORD8 *pu1_buf_ping, *pu1_buf_pong;
+
+        /* total size of the mem record */
+        WORD32 total_size = 0;
+
+        /* size in bytes to store vertical edge bs, horizontal edge bs and qp of every mb*/
+        WORD32 vert_bs_size, horz_bs_size, qp_size;
+
+        /* vertical edge bs = total number of vertical edges * number of bytes per each edge */
+        /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0),
+         * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+        vert_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+        /* horizontal edge bs = total number of horizontal edges * number of bytes per each edge */
+        /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0),
+         * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+        horz_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+        /* qp of each mb requires 1 byte */
+        qp_size = ALIGN64(max_mb_cnt);
+
+        /* total size */
+        total_size = vert_bs_size + horz_bs_size + qp_size;
+
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            if (i < MAX_PROCESS_CTXT / 2)
+            {
+                pu1_buf_ping = (UWORD8 *) ps_mem_rec->pv_base;
+
+                /* vertical edge bs storage space */
+                ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_vert_bs =
+                                (UWORD32 *) pu1_buf_ping;
+                pu1_buf_ping += vert_bs_size;
+
+                /* horizontal edge bs storage space */
+                ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_horz_bs =
+                                (UWORD32 *) pu1_buf_ping;
+                pu1_buf_ping += horz_bs_size;
+
+                /* qp */
+                ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp =
+                                (UWORD8 *) pu1_buf_ping;
+                pu1_buf_ping += qp_size;
+            }
+            else
+            {
+                pu1_buf_pong = (UWORD8 *) ps_mem_rec->pv_base;
+                pu1_buf_pong += total_size;
+
+                /* vertical edge bs storage space */
+                ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_vert_bs =
+                                (UWORD32 *) pu1_buf_pong;
+                pu1_buf_pong += vert_bs_size;
+
+                /* horizontal edge bs storage space */
+                ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_horz_bs =
+                                (UWORD32 *) pu1_buf_pong;
+                pu1_buf_pong += horz_bs_size;
+
+                /* qp */
+                ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp =
+                                (UWORD8 *) pu1_buf_pong;
+                pu1_buf_pong += qp_size;
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_INP_PIC];
+    {
+        ps_codec->pv_inp_buf_mgr_base = ps_mem_rec->pv_base;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_OUT];
+    {
+        ps_codec->pv_out_buf_mgr_base = ps_mem_rec->pv_base;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_CSC];
+    {
+        ps_codec->pu1_y_csc_buf_base = ps_mem_rec->pv_base;
+        ps_codec->pu1_uv_csc_buf_base = (UWORD8 *) ps_mem_rec->pv_base
+                        + (max_ht_luma * max_wd_luma);
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_REF_PIC];
+    {
+        /* size of buf mgr struct */
+        WORD32 size = ih264_buf_mgr_size();
+
+        /* temp var */
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        /* pic buffer mgr */
+        ps_codec->pv_ref_buf_mgr_base = pu1_buf;
+
+        /* picture bank */
+        ps_codec->pv_pic_buf_base = pu1_buf + size;
+        ps_codec->i4_total_pic_buf_size = ps_mem_rec->u4_mem_size - size;
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB];
+    {
+        /* temp var */
+        UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+        /* size of nmb ctxt */
+        WORD32 size = MAX_NMB * sizeof(mb_info_nmb_t);
+
+        UWORD32 nmb_cntr, subpel_buf_size;
+
+        /* init nmb info structure pointer in all proc ctxts */
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            ps_codec->as_process[i].ps_nmb_info = (mb_info_nmb_t *) (pu1_buf);
+
+            pu1_buf += size;
+        }
+
+        subpel_buf_size = MB_SIZE * MB_SIZE * sizeof(UWORD8);
+
+        /* adjusting pointers for nmb halfpel buffer */
+        for (i = 0; i < MAX_PROCESS_CTXT; i++)
+        {
+            mb_info_nmb_t* ps_mb_info_nmb =
+                            &ps_codec->as_process[i].ps_nmb_info[0];
+
+            for (nmb_cntr = 0; nmb_cntr < MAX_NMB; nmb_cntr++)
+            {
+                ps_mb_info_nmb[nmb_cntr].pu1_best_sub_pel_buf = pu1_buf;
+
+                pu1_buf = pu1_buf + subpel_buf_size;
+
+                ps_mb_info_nmb[nmb_cntr].u4_bst_spel_buf_strd = MB_SIZE;
+            }
+        }
+    }
+
+    ps_mem_rec = &ps_mem_rec_base[MEM_REC_RC];
+    {
+        ih264e_get_rate_control_mem_tab(&ps_codec->s_rate_control, ps_mem_rec,
+                                        USE_BASE);
+    }
+
+    /* init codec ctxt */
+    status = ih264e_init(ps_codec);
+
+    return status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Retrieves mem records passed to the codec
+*
+* @par Description:
+*  Retrieves mem recs passed during init
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_retrieve_memrec(iv_obj_t *ps_codec_obj,
+                                     void *pv_api_ip,
+                                     void *pv_api_op)
+{
+    /* codec ctxt */
+    codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
+
+    /* ctrl call I/O structures */
+    ih264e_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip;
+    ih264e_retrieve_mem_rec_op_t *ps_op = pv_api_op;
+
+    if (ps_codec->i4_init_done != 1)
+    {
+        ps_op->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR;
+        ps_op->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE;
+        return IV_FAIL;
+    }
+
+    /* join threads upon at end of sequence */
+    ih264e_join_threads(ps_codec);
+
+    /* collect list of memory records used by the encoder library */
+    memcpy(ps_ip->s_ive_ip.ps_mem_rec, ps_codec->ps_mem_rec_backup,
+           MEM_REC_CNT * (sizeof(iv_mem_rec_t)));
+    ps_op->s_ive_op.u4_num_mem_rec_filled = MEM_REC_CNT;
+
+    /* clean up mutex memory */
+    ih264_list_free(ps_codec->pv_entropy_jobq);
+    ih264_list_free(ps_codec->pv_proc_jobq);
+    ithread_mutex_destroy(ps_codec->pv_ctl_mutex);
+    ithread_mutex_destroy(ps_codec->pv_entropy_mutex);
+
+
+    ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_mv_buf_mgr);
+    ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr);
+    ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_inp_buf_mgr);
+    ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_out_buf_mgr);
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets the encoder in flush mode.
+*
+* @par Description:
+*  Sets the encoder in flush mode
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks This call has no real effect on encoder
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_set_flush_mode(iv_obj_t *ps_codec_obj,
+                                    void *pv_api_ip,
+                                    void *pv_api_op)
+{
+    UNUSED(pv_api_ip);
+    /* codec ctxt */
+    codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
+
+    /* ctrl call I/O structures */
+    ih264e_ctl_flush_op_t *ps_ctl_op = pv_api_op;
+
+    ps_ctl_op->s_ive_op.u4_error_code = 0;
+
+    /* signal flush frame control call */
+    ps_codec->i4_flush_mode = 1;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Gets encoder buffer requirements
+*
+* @par Description:
+*  Gets the encoder buffer requirements. Basing on max width and max height
+*  configuration settings, this routine, computes the sizes of necessary input,
+*  output buffers returns this info to callee.
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_get_buf_info(iv_obj_t *ps_codec_obj,
+                                  void *pv_api_ip,
+                                  void *pv_api_op)
+{
+    UNUSED(ps_codec_obj);
+    /* ctrl call I/O structures */
+    ih264e_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_getbufinfo_op_t *ps_op = pv_api_op;
+
+    /* temp var */
+    WORD32 wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+    WORD32 ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+    WORD32 i;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    /* Number of components in input buffers required for codec  &
+     * Minimum sizes of each component in input buffer required */
+    if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420P)
+    {
+        ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420_COMP;
+
+        ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht;
+        ps_op->s_ive_op.au4_min_in_buf_size[1] = (wd >> 1) * (ht >> 1);
+        ps_op->s_ive_op.au4_min_in_buf_size[2] = (wd >> 1) * (ht >> 1);
+    }
+    else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_422ILE)
+    {
+        ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_422ILE_COMP;
+
+        ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2;
+        ps_op->s_ive_op.au4_min_in_buf_size[1] =
+                        ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+    }
+    else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGB_565)
+    {
+        ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGB565_COMP;
+
+        ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2;
+        ps_op->s_ive_op.au4_min_in_buf_size[1] =
+                        ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+    }
+    else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGBA_8888)
+    {
+        ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGBA8888_COMP;
+
+        ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 4;
+        ps_op->s_ive_op.au4_min_in_buf_size[1] =
+                        ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+    }
+    else if ((ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_UV)
+                    || (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_VU))
+    {
+        ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420SP_COMP;
+
+        ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht;
+        ps_op->s_ive_op.au4_min_in_buf_size[1] = wd * (ht >> 1);
+        ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+    }
+
+    /* Number of components in output buffers required for codec  &
+     * Minimum sizes of each component in output buffer required */
+    ps_op->s_ive_op.u4_out_comp_cnt = MIN_BITS_BUFS_COMP;
+
+    for (i = 0; i < (WORD32) ps_op->s_ive_op.u4_out_comp_cnt; i++)
+    {
+        ps_op->s_ive_op.au4_min_out_buf_size[i] = (wd * ht * 3) >> 1;
+    }
+
+    ps_op->s_ive_op.u4_min_inp_bufs = MIN_INP_BUFS;
+    ps_op->s_ive_op.u4_min_out_bufs = MIN_OUT_BUFS;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets the picture dimensions
+*
+* @par Description:
+*  Sets width, height, display width, display height and strides
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_dimensions(void *pv_api_ip,
+                                         void *pv_api_op,
+                                         cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_dimensions_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_wd = ALIGN16(ps_ip->s_ive_ip.u4_wd);
+    ps_cfg->u4_ht = ALIGN16(ps_ip->s_ive_ip.u4_ht);
+    ps_cfg->u4_strd = ps_ip->s_ive_ip.u4_strd;
+    ps_cfg->i4_wd_mbs = ps_cfg->u4_wd >> 4;
+    ps_cfg->i4_ht_mbs = ps_cfg->u4_ht >> 4;
+    ps_cfg->u4_disp_wd = ps_ip->s_ive_ip.u4_wd;
+    ps_cfg->u4_disp_ht = ps_ip->s_ive_ip.u4_ht;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets source and target frame rates
+*
+* @par Description:
+*  Sets source and target frame rates
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_frame_rate(void *pv_api_ip,
+                                         void *pv_api_op,
+                                         cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_frame_rate_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_src_frame_rate = ps_ip->s_ive_ip.u4_src_frame_rate;
+    ps_cfg->u4_tgt_frame_rate = ps_ip->s_ive_ip.u4_tgt_frame_rate;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets target bit rate
+*
+* @par Description:
+*  Sets target bit rate
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_bit_rate(void *pv_api_ip,
+                                       void *pv_api_op,
+                                       cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_bitrate_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_target_bitrate = ps_ip->s_ive_ip.u4_target_bitrate;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets frame type
+*
+* @par Description:
+*  Sets frame type
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks not a sticky tag
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_frame_type(void *pv_api_ip,
+                                         void *pv_api_op,
+                                         cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_frame_type_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->e_frame_type = ps_ip->s_ive_ip.e_frame_type;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets quantization params
+*
+* @par Description:
+*  Sets the max, min and default qp for I frame, P frame and B frame
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_qp(void *pv_api_ip,
+                                 void *pv_api_op,
+                                 cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_qp_ip_t *ps_set_qp_ip = pv_api_ip;
+    ih264e_ctl_set_qp_op_t *ps_set_qp_op = pv_api_op;
+
+    ps_set_qp_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_i_qp_max = ps_set_qp_ip->s_ive_ip.u4_i_qp_max;
+    ps_cfg->u4_i_qp_min = ps_set_qp_ip->s_ive_ip.u4_i_qp_min;
+    ps_cfg->u4_i_qp = ps_set_qp_ip->s_ive_ip.u4_i_qp;
+    ps_cfg->u4_p_qp_max = ps_set_qp_ip->s_ive_ip.u4_p_qp_max;
+    ps_cfg->u4_p_qp_min = ps_set_qp_ip->s_ive_ip.u4_p_qp_min;
+    ps_cfg->u4_p_qp = ps_set_qp_ip->s_ive_ip.u4_p_qp;
+    ps_cfg->u4_b_qp_max = ps_set_qp_ip->s_ive_ip.u4_b_qp_max;
+    ps_cfg->u4_b_qp_min = ps_set_qp_ip->s_ive_ip.u4_b_qp_min;
+    ps_cfg->u4_b_qp = ps_set_qp_ip->s_ive_ip.u4_b_qp;
+
+    ps_cfg->u4_timestamp_high = ps_set_qp_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_set_qp_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets encoding mode
+*
+* @par Description:
+*  Sets encoding mode
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_enc_mode(void *pv_api_ip,
+                                       void *pv_api_op,
+                                       cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_enc_mode_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->e_enc_mode = ps_ip->s_ive_ip.e_enc_mode;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets vbv parameters
+*
+* @par Description:
+*  Sets vbv parameters
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_vbv_params(void *pv_api_ip,
+                                         void *pv_api_op,
+                                         cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_vbv_params_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_vbv_buf_size = ps_ip->s_ive_ip.u4_vbv_buf_size;
+    ps_cfg->u4_vbv_buffer_delay = ps_ip->s_ive_ip.u4_vbv_buffer_delay;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets AIR parameters
+*
+* @par Description:
+*  Sets AIR parameters
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_air_params(void *pv_api_ip,
+                                        void *pv_api_op,
+                                        cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_air_params_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_air_params_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->e_air_mode = ps_ip->s_ive_ip.e_air_mode;
+    ps_cfg->u4_air_refresh_period = ps_ip->s_ive_ip.u4_air_refresh_period;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets motion estimation parameters
+*
+* @par Description:
+*  Sets motion estimation parameters
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_me_params(void *pv_api_ip,
+                                       void *pv_api_op,
+                                       cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_me_params_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_me_params_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_enable_hpel = ps_ip->s_ive_ip.u4_enable_hpel;
+    ps_cfg->u4_enable_qpel = ps_ip->s_ive_ip.u4_enable_qpel;
+    ps_cfg->u4_enable_fast_sad = ps_ip->s_ive_ip.u4_enable_fast_sad;
+    ps_cfg->u4_enable_alt_ref = ps_ip->s_ive_ip.u4_enable_alt_ref;
+    ps_cfg->u4_srch_rng_x = ps_ip->s_ive_ip.u4_srch_rng_x;
+    ps_cfg->u4_srch_rng_y = ps_ip->s_ive_ip.u4_srch_rng_y;
+    ps_cfg->u4_me_speed_preset = ps_ip->s_ive_ip.u4_me_speed_preset;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets Intra/Inter Prediction estimation parameters
+*
+* @par Description:
+*  Sets Intra/Inter Prediction estimation parameters
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_ipe_params(void *pv_api_ip,
+                                        void *pv_api_op,
+                                        cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_ipe_params_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_enable_intra_4x4 = ps_ip->s_ive_ip.u4_enable_intra_4x4;
+    ps_cfg->u4_enc_speed_preset = ps_ip->s_ive_ip.u4_enc_speed_preset;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets GOP parameters
+*
+* @par Description:
+*  Sets GOP parameters
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_gop_params(void *pv_api_ip,
+                                        void *pv_api_op,
+                                        cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_gop_params_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_i_frm_interval = ps_ip->s_ive_ip.u4_i_frm_interval;
+    ps_cfg->u4_idr_frm_interval = ps_ip->s_ive_ip.u4_idr_frm_interval;
+    ps_cfg->u4_num_b_frames = ps_ip->s_ive_ip.u4_num_b_frames;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets profile parameters
+*
+* @par Description:
+*  Sets profile parameters
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @param[out] ps_cfg
+*  Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_profile_params(void *pv_api_ip,
+                                            void *pv_api_op,
+                                            cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_profile_params_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->e_profile = ps_ip->s_ive_ip.e_profile;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets disable deblock level
+*
+* @par Description:
+*  Sets disable deblock level. Level 0 means no disabling  and level 4 means
+*  disable completely. 1, 2, 3 are intermediate levels that control amount
+*  of deblocking done.
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264_set_deblock_params(void *pv_api_ip,
+                                       void *pv_api_op,
+                                       cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_deblock_params_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_disable_deblock_level = ps_ip->s_ive_ip.u4_disable_deblock_level;
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets number of cores
+*
+* @par Description:
+*  Sets number of cores
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks The number of encoder threads is limited to MAX_PROCESS_THREADS
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_set_num_cores(void *pv_api_ip,
+                                   void *pv_api_op,
+                                   cfg_params_t *ps_cfg)
+{
+    /* ctrl call I/O structures */
+    ih264e_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip;
+    ih264e_ctl_set_num_cores_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    ps_cfg->u4_num_cores = MIN(ps_ip->s_ive_ip.u4_num_cores, MAX_PROCESS_THREADS);
+
+    ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+    ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Resets encoder state
+*
+* @par Description:
+*  Resets encoder state by calling ih264e_init()
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_reset(iv_obj_t *ps_codec_obj,
+                           void *pv_api_ip,
+                           void *pv_api_op)
+{
+    UNUSED(pv_api_ip);
+    /* codec ctxt */
+    codec_t * ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle);
+
+    /* ctrl call I/O structures */
+    ih264e_ctl_reset_op_t *ps_op = pv_api_op;
+
+    ps_op->s_ive_op.u4_error_code = 0;
+
+    if (ps_codec != NULL)
+    {
+        ih264e_init(ps_codec);
+    }
+    else
+    {
+        ps_op->s_ive_op.u4_error_code = IH264E_INIT_NOT_DONE;
+    }
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Codec control call
+*
+* @par Description:
+*  Codec control call which in turn calls appropriate calls  based on sub-command
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_ctl(iv_obj_t *ps_codec_obj,
+                         void *pv_api_ip,
+                         void *pv_api_op)
+{
+    /* codec ctxt */
+    codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
+
+    /* ctrl call I/O structures */
+    ih264e_ctl_setdefault_ip_t *ps_ctl_ip = pv_api_ip;
+    ih264e_ctl_setdefault_op_t *ps_ctl_op = pv_api_op;
+
+    /* ctrl call sub cmd */
+    IVE_CONTROL_API_COMMAND_TYPE_T sub_cmd = ps_ctl_ip->s_ive_ip.e_sub_cmd;
+
+    /* error status */
+    IV_STATUS_T ret = 0;
+
+    /* temp var */
+    WORD32 i;
+    cfg_params_t *ps_cfg = NULL;
+
+    /* control call is for configuring encoding params, this is not to be called
+     * before a successful init call */
+    if (ps_codec->i4_init_done != 1)
+    {
+        ps_ctl_op->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR;
+        ps_ctl_op->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE;
+        return IV_FAIL;
+    }
+
+    /* make it thread safe */
+    ithread_mutex_lock(ps_codec->pv_ctl_mutex);
+
+    /* find a free config param set to hold current parameters */
+    for (i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++)
+    {
+        if (0 == ps_codec->as_cfg[i].u4_is_valid)
+        {
+            ps_cfg = &ps_codec->as_cfg[i];
+            break;
+        }
+    }
+
+    /* If all are invalid, then start overwriting from the head config params */
+    if (NULL == ps_cfg)
+    {
+        ps_cfg = &ps_codec->as_cfg[0];
+    }
+
+    ps_cfg->u4_is_valid = 1;
+
+    ps_cfg->e_cmd = sub_cmd;
+
+    switch (sub_cmd)
+    {
+        case IVE_CMD_CTL_SET_DIMENSIONS:
+            ret = ih264e_set_dimensions(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_FRAMERATE:
+            ret = ih264e_set_frame_rate(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_BITRATE:
+            ret = ih264e_set_bit_rate(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_FRAMETYPE:
+            ret = ih264e_set_frame_type(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_QP:
+            ret = ih264e_set_qp(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_ENC_MODE:
+            ret = ih264e_set_enc_mode(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_VBV_PARAMS:
+            ret = ih264e_set_vbv_params(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_AIR_PARAMS:
+            ret = ih264_set_air_params(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_ME_PARAMS:
+            ret = ih264_set_me_params(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_IPE_PARAMS:
+            ret = ih264_set_ipe_params(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_GOP_PARAMS:
+            ret = ih264_set_gop_params(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_PROFILE_PARAMS:
+            ret = ih264_set_profile_params(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_SET_DEBLOCK_PARAMS:
+            ret = ih264_set_deblock_params(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        case IVE_CMD_CTL_RESET:
+
+            /* invalidate config param struct as it is being served right away */
+            ps_codec->as_cfg[i].u4_is_valid = 0;
+
+            ret = ih264e_reset(ps_codec_obj, pv_api_ip, pv_api_op);
+            break;
+
+        case IVE_CMD_CTL_SETDEFAULT:
+        {
+            /* ctrl call I/O structures */
+            ih264e_ctl_setdefault_op_t *ps_op = pv_api_op;
+
+            /* invalidate config param struct as it is being served right away */
+            ps_codec->as_cfg[i].u4_is_valid = 0;
+
+            /* error status */
+            ret = ih264e_set_default_params(ps_cfg);
+
+            ps_op->s_ive_op.u4_error_code = ret;
+
+            break;
+        }
+
+        case IVE_CMD_CTL_FLUSH:
+
+            /* invalidate config param struct as it is being served right away */
+            ps_codec->as_cfg[i].u4_is_valid = 0;
+
+            ret = ih264e_set_flush_mode(ps_codec_obj, pv_api_ip, pv_api_op);
+            break;
+
+        case IVE_CMD_CTL_GETBUFINFO:
+
+            /* invalidate config param struct as it is being served right away */
+            ps_codec->as_cfg[i].u4_is_valid = 0;
+
+            ret = ih264e_get_buf_info(ps_codec_obj, pv_api_ip, pv_api_op);
+            break;
+
+        case IVE_CMD_CTL_GETVERSION:
+        {
+            /* ctrl call I/O structures */
+            ih264e_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip;
+            ih264e_ctl_getversioninfo_op_t *ps_op = pv_api_op;
+
+            /* invalidate config param struct as it is being served right away */
+            ps_codec->as_cfg[i].u4_is_valid = 0;
+
+            /* error status */
+            ps_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+            if (ps_ip->s_ive_ip.u4_version_bufsize <= 0)
+            {
+                ps_op->s_ive_op.u4_error_code =
+                                IH264E_CXA_VERS_BUF_INSUFFICIENT;
+                ret = IV_FAIL;
+            }
+            else
+            {
+                ret = ih264e_get_version((CHAR *) ps_ip->s_ive_ip.pu1_version,
+                                         ps_ip->s_ive_ip.u4_version_bufsize);
+
+                if (ret != IV_SUCCESS)
+                {
+                    ps_op->s_ive_op.u4_error_code =
+                                    IH264E_CXA_VERS_BUF_INSUFFICIENT;
+                    ret = IV_FAIL;
+                }
+            }
+            break;
+        }
+
+        case IVE_CMD_CTL_SET_NUM_CORES:
+            ret = ih264e_set_num_cores(pv_api_ip, pv_api_op, ps_cfg);
+            break;
+
+        default:
+            /* invalidate config param struct as it is being served right away */
+            ps_codec->as_cfg[i].u4_is_valid = 0;
+
+            DEBUG("Warning !! unrecognized control api command \n");
+            break;
+    }
+
+    ithread_mutex_unlock(ps_codec->pv_ctl_mutex);
+
+    return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Codec entry point function. All the function calls to  the codec are done
+*  using this function with different values specified in command
+*
+* @par Description:
+*  Arguments are tested for validity and then based on the command
+*  appropriate function is called
+*
+* @param[in] ps_handle
+*  API level handle for codec
+*
+* @param[in] pv_api_ip
+*  Input argument structure
+*
+* @param[out] pv_api_op
+*  Output argument structure
+*
+* @returns  error_status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IV_STATUS_T ih264e_api_function(iv_obj_t *ps_handle,
+                                void *pv_api_ip,
+                                void *pv_api_op)
+{
+    /* api command */
+    WORD32 command = IV_CMD_NA;
+
+    /* error status */
+    IV_STATUS_T e_status;
+    WORD32 ret;
+
+    /* tmp var */
+    WORD32 *pu4_ptr_cmd = (WORD32 *) pv_api_ip;
+
+    /* validate input / output structures */
+    e_status = api_check_struct_sanity(ps_handle, pv_api_ip, pv_api_op);
+
+    if (e_status != IV_SUCCESS)
+    {
+        DEBUG("error code = %d\n", *((UWORD32 *)pv_api_op + 1));
+        return IV_FAIL;
+    }
+
+    pu4_ptr_cmd++;
+
+    command = *pu4_ptr_cmd;
+
+    switch (command)
+    {
+        case IV_CMD_GET_NUM_MEM_REC:
+            ret = ih264e_get_num_rec(pv_api_ip, pv_api_op);
+            break;
+
+        case IV_CMD_FILL_NUM_MEM_REC:
+            ret = ih264e_fill_num_mem_rec(pv_api_ip, pv_api_op);
+            break;
+
+        case IV_CMD_INIT:
+            ret = ih264e_init_mem_rec(ps_handle, pv_api_ip, pv_api_op);
+            break;
+
+        case IV_CMD_RETRIEVE_MEMREC:
+            ret = ih264e_retrieve_memrec(ps_handle, pv_api_ip, pv_api_op);
+            break;
+
+        case IVE_CMD_VIDEO_CTL:
+            ret = ih264e_ctl(ps_handle, pv_api_ip, pv_api_op);
+            break;
+
+        case IVE_CMD_VIDEO_ENCODE:
+            ret = ih264e_encode(ps_handle, pv_api_ip, pv_api_op);
+            break;
+
+        default:
+            ret = IV_FAIL;
+            break;
+    }
+
+    return (IV_STATUS_T) ret;
+}
diff --git a/encoder/ih264e_bitstream.c b/encoder/ih264e_bitstream.c
new file mode 100755
index 0000000..e5bfbe4
--- /dev/null
+++ b/encoder/ih264e_bitstream.c
@@ -0,0 +1,472 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_bitstream.c
+*
+* @brief
+*  This file contains function definitions related to bitstream generation
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_bitstrm_init()
+*  - ih264e_put_bits()
+*  - ih264e_put_bit()
+*  - ih264e_put_rbsp_trailing_bits()
+*  - ih264e_put_uev()
+*  - ih264e_put_sev()
+*  - ih264e_put_nal_start_code_prefix()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_debug.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+*  @brief Initializes the encoder bitstream engine
+*
+*  @par   Description
+*  This routine needs to be called at start of slice/frame encode
+*
+*  @param[in]   ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]   p1_bitstrm_buf
+*  bitstream buffer pointer where the encoded stream is generated in byte order
+*
+*  @param[in]   u4_max_bitstrm_size
+*  indicates maximum bitstream buffer size. (in bytes)
+*  If actual stream size exceeds the maximum size, encoder should
+*   1. Not corrupt data beyond u4_max_bitstrm_size bytes
+*   2. Report an error back to application indicating overflow
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_bitstrm_init(bitstrm_t *ps_bitstrm,
+                                   UWORD8 *pu1_bitstrm_buf,
+                                   UWORD32 u4_max_bitstrm_size)
+{
+    ps_bitstrm->pu1_strm_buffer  = pu1_bitstrm_buf;
+    ps_bitstrm->u4_max_strm_size = u4_max_bitstrm_size;
+
+    /* Default init values for other members of bitstream context */
+    ps_bitstrm->u4_strm_buf_offset  = 0;
+    ps_bitstrm->u4_cur_word         = 0;
+    ps_bitstrm->i4_bits_left_in_cw  = WORD_SIZE;
+    ps_bitstrm->i4_zero_bytes_run   = 0;
+
+    return(IH264E_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+*  @brief puts a code with specified number of bits into the bitstream
+*
+*  @par   Description
+*  inserts code_len number of bits from lsb of code_val into the
+*  bitstream. updates context members like u4_cur_word, u4_strm_buf_offset and
+*  i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+*  available size (u4_max_strm_size), returns error without corrupting data
+*  beyond it
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    u4_code_val
+*  code value that needs to be inserted in the stream.
+*
+*  @param[in]    code_len
+*  indicates code length (in bits) of code_val that would be inserted in
+*  bitstream buffer size. Range of length[1:WORD_SIZE]
+*
+*  @remarks     Assumptions: all bits from bit position code_len to msb of
+*   code_val shall be zero
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bits(bitstrm_t *ps_bitstrm,
+                               UWORD32 u4_code_val,
+                               WORD32 code_len)
+{
+    UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word;
+    WORD32  bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw;
+
+
+    /* check assumptions made in the module */
+    ASSERT(code_len > 0 && code_len <= WORD_SIZE);
+
+    if(code_len < WORD_SIZE)
+        ASSERT((u4_code_val >> code_len) == 0);
+
+
+    /* sanity check on the bitstream engine state */
+    ASSERT(bits_left_in_cw > 0 && bits_left_in_cw <= WORD_SIZE);
+
+    ASSERT(ps_bitstrm->i4_zero_bytes_run <= EPB_ZERO_BYTES);
+
+    ASSERT(ps_bitstrm->pu1_strm_buffer != NULL);
+
+
+    if(bits_left_in_cw > code_len)
+    {
+        /*******************************************************************/
+        /* insert the code in local bitstream word and return              */
+        /* code is inserted in position of bits left (post decrement)      */
+        /*******************************************************************/
+        bits_left_in_cw -= code_len;
+        u4_cur_word     |= (u4_code_val << bits_left_in_cw);
+
+        ps_bitstrm->u4_cur_word         = u4_cur_word;
+        ps_bitstrm->i4_bits_left_in_cw  = bits_left_in_cw;
+
+        return(IH264E_SUCCESS);
+    }
+    else
+    {
+        /********************************************************************/
+        /* 1. insert partial code corresponding to bits left in cur word    */
+        /* 2. flush all the bits of cur word to bitstream                   */
+        /* 3. insert emulation prevention bytes while flushing the bits     */
+        /* 4. insert remaining bits of code starting from msb of cur word   */
+        /* 5. update bitsleft in current word and stream buffer offset      */
+        /********************************************************************/
+        UWORD32 u4_strm_buf_offset  = ps_bitstrm->u4_strm_buf_offset;
+
+        UWORD32 u4_max_strm_size    = ps_bitstrm->u4_max_strm_size;
+
+        WORD32  zero_run            = ps_bitstrm->i4_zero_bytes_run;
+
+        UWORD8* pu1_strm_buf        = ps_bitstrm->pu1_strm_buffer;
+
+        WORD32  i, rem_bits = (code_len - bits_left_in_cw);
+
+
+        /*********************************************************************/
+        /* Bitstream overflow check                                          */
+        /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+        /*********************************************************************/
+        if((u4_strm_buf_offset + (WORD_SIZE>>3)) >= u4_max_strm_size)
+        {
+            /* return without corrupting the buffer beyond its size */
+            return(IH264E_BITSTREAM_BUFFER_OVERFLOW);
+        }
+
+        /* insert parital code corresponding to bits left in cur word */
+        u4_cur_word |= u4_code_val >> rem_bits;
+
+        for(i = WORD_SIZE; i > 0; i -= 8)
+        {
+            /* flush the bits in cur word byte by byte and copy to stream */
+            UWORD8   u1_next_byte = (u4_cur_word >> (i-8)) & 0xFF;
+
+            PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run);
+        }
+
+        /* insert the remaining bits from code val into current word */
+        u4_cur_word = rem_bits ? (u4_code_val << (WORD_SIZE - rem_bits)) : 0;
+
+        /* update the state variables and return success */
+        ps_bitstrm->u4_cur_word         = u4_cur_word;
+        ps_bitstrm->i4_bits_left_in_cw  = WORD_SIZE - rem_bits;
+        ps_bitstrm->i4_zero_bytes_run   = zero_run;
+        ps_bitstrm->u4_strm_buf_offset  = u4_strm_buf_offset;
+        return (IH264E_SUCCESS);
+    }
+}
+
+/**
+******************************************************************************
+*
+*  @brief inserts a 1-bit code into the bitstream
+*
+*  @par   Description
+*  inserts 1bit lsb of code_val into the bitstream
+*  updates context members like u4_cur_word, u4_strm_buf_offset and
+*  i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+*  available size (u4_max_strm_size), returns error without corrupting data
+*  beyond it
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    u4_code_val
+*  code value that needs to be inserted in the stream.
+*
+*  @remarks     Assumptions: all bits from bit position 1 to msb of code_val
+*  shall be zero
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bit(bitstrm_t *ps_bitstrm, UWORD32 u4_code_val)
+{
+    /* call the put bits function for 1 bit and return */
+    return(ih264e_put_bits(ps_bitstrm, u4_code_val, 1));
+}
+
+/**
+******************************************************************************
+*
+*  @brief inserts rbsp trailing bits at the end of stream buffer (NAL)
+*
+*  @par   Description
+*  inserts rbsp trailing bits, updates context members like u4_cur_word and
+*  i4_bits_left_in_cw and flushes the same in the bitstream buffer. If the
+*  total words (u4_strm_buf_offset) exceeds max available size
+*  (u4_max_strm_size), returns error without corrupting data beyond it
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_rbsp_trailing_bits(bitstrm_t *ps_bitstrm)
+{
+    WORD32 i;
+    UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word;
+    WORD32  bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw;
+    WORD32  bytes_left_in_cw = (bits_left_in_cw - 1) >> 3;
+
+    UWORD32 u4_strm_buf_offset  = ps_bitstrm->u4_strm_buf_offset;
+    UWORD32 u4_max_strm_size    = ps_bitstrm->u4_max_strm_size;
+    WORD32  zero_run            = ps_bitstrm->i4_zero_bytes_run;
+    UWORD8* pu1_strm_buf        = ps_bitstrm->pu1_strm_buffer;
+
+    /*********************************************************************/
+    /* Bitstream overflow check                                          */
+    /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+    /*********************************************************************/
+    if((u4_strm_buf_offset + (WORD_SIZE>>3) - bytes_left_in_cw) >=
+        u4_max_strm_size)
+    {
+        /* return without corrupting the buffer beyond its size */
+        return(IH264E_BITSTREAM_BUFFER_OVERFLOW);
+    }
+
+    /* insert a 1 at the end of current word and flush all the bits */
+    u4_cur_word |= (1 << (bits_left_in_cw - 1));
+
+    /* get the bits to be inserted in msbdb of the word */
+    //u4_cur_word <<= (WORD_SIZE - bytes_left_in_cw + 1);
+
+    for(i = WORD_SIZE; i > (bytes_left_in_cw*8); i -= 8)
+    {
+        /* flush the bits in cur word byte by byte  and copy to stream */
+        UWORD8   u1_next_byte = (u4_cur_word >> (i-8)) & 0xFF;
+
+        PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run);
+    }
+
+    /* update the stream offset */
+    ps_bitstrm->u4_strm_buf_offset  = u4_strm_buf_offset;
+
+    /* Default init values for scratch variables of bitstream context */
+    ps_bitstrm->u4_cur_word         = 0;
+    ps_bitstrm->i4_bits_left_in_cw  = WORD_SIZE;
+    ps_bitstrm->i4_zero_bytes_run   = 0;
+
+    return (IH264E_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+*  @brief puts exponential golomb code of a unsigned integer into bitstream
+*
+*  @par   Description
+*  computes uev code for given syntax element and inserts the same into
+*  bitstream by calling ih264e_put_bits() interface.
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    u4_code_num
+*  unsigned integer input whose golomb code is written in stream
+*
+*  @remarks     Assumptions: code value can be represented in less than 16bits
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_uev(bitstrm_t *ps_bitstrm, UWORD32 u4_code_num)
+{
+    UWORD32 u4_bit_str, u4_range;
+    IH264E_ERROR_T e_error;
+
+    /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */
+    u4_bit_str = u4_code_num + 1;
+
+    /* get range of the bit string and put using put_bits()                 */
+    GETRANGE(u4_range, u4_bit_str);
+
+    e_error = ih264e_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1));
+
+    return(e_error);
+}
+
+/**
+******************************************************************************
+*
+*  @brief puts exponential golomb code of a signed integer into bitstream
+*
+*  @par   Description
+*  computes sev code for given syntax element and inserts the same into
+*  bitstream by calling ih264e_put_bits() interface.
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    syntax_elem
+*  signed integer input whose golomb code is written in stream
+*
+*  @remarks     Assumptions: code value can be represented in less than 16bits
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_sev(bitstrm_t *ps_bitstrm, WORD32 syntax_elem)
+{
+    UWORD32 u4_code_num, u4_bit_str, u4_range;
+    IH264E_ERROR_T e_error;
+
+    /************************************************************************/
+    /* convert the codenum to exp-golomb bit code for signed syntax element */
+    /* See Table9-2 and Table 9-3 of standard JCTVC-J1003_d7                */
+    /************************************************************************/
+    if(syntax_elem <= 0)
+    {
+        /* codeNum for non-positive integer =  2*abs(x) : Table9-3  */
+        u4_code_num = ((-syntax_elem) << 1);
+    }
+    else
+    {
+        /* codeNum for positive integer     =  2x-1     : Table9-3  */
+        u4_code_num = (syntax_elem << 1) - 1;
+    }
+
+    /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */
+    u4_bit_str = u4_code_num + 1;
+
+    /* get range of the bit string and put using put_bits()                 */
+    GETRANGE(u4_range, u4_bit_str);
+
+    e_error = ih264e_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1));
+
+    return(e_error);
+}
+
+/**
+******************************************************************************
+*
+*  @brief insert NAL start code prefix (0x000001) into bitstream with an option
+*  of inserting leading_zero_8bits (which makes startcode prefix as 0x00000001)
+*
+*  @par   Description
+*  Although start code prefix could have been put by calling ih264e_put_bits(),
+*  ih264e_put_nal_start_code_prefix() is specially added to make sure emulation
+*  prevention insertion is not done for the NAL start code prefix which will
+*  surely happen otherwise by calling ih264e_put_bits() interface.
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    insert_leading_zero_8bits
+*  flag indicating if one more zero bytes needs to prefixed before start code
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_nal_start_code_prefix(bitstrm_t *ps_bitstrm,
+                                                WORD32 insert_leading_zero_8bits)
+{
+    UWORD32 u4_strm_buf_offset  = ps_bitstrm->u4_strm_buf_offset;
+    UWORD8* pu1_strm_buf        = ps_bitstrm->pu1_strm_buffer;
+
+    /* Bitstream buffer overflow check assuming worst case of 4 bytes */
+    if((u4_strm_buf_offset + 4) >= ps_bitstrm->u4_max_strm_size)
+    {
+        return(IH264E_BITSTREAM_BUFFER_OVERFLOW);
+    }
+
+    /* Insert leading zero 8 bits conditionally */
+    if(insert_leading_zero_8bits)
+    {
+        pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+        u4_strm_buf_offset++;
+    }
+
+    /* Insert NAL start code prefix 0x00 00 01 */
+    pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+    u4_strm_buf_offset++;
+
+    pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+    u4_strm_buf_offset++;
+
+    pu1_strm_buf[u4_strm_buf_offset] = 0x01;
+    u4_strm_buf_offset++;
+
+    /* update the stream offset */
+    ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+
+    return (IH264E_SUCCESS);
+}
+
diff --git a/encoder/ih264e_bitstream.h b/encoder/ih264e_bitstream.h
new file mode 100755
index 0000000..21360cc
--- /dev/null
+++ b/encoder/ih264e_bitstream.h
@@ -0,0 +1,401 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_bitstream.h
+*
+* @brief
+*  This file contains encoder bitstream engine related structures and
+*  interface prototypes
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_BITSTREAM_H_
+#define IH264E_BITSTREAM_H_
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief      defines the maximum number of bits in a bitstream word
+******************************************************************************
+ */
+#define WORD_SIZE         32
+
+/**
+******************************************************************************
+ *  @brief  The number of consecutive zero bytes for emulation prevention check
+******************************************************************************
+ */
+#define EPB_ZERO_BYTES      2
+
+/**
+******************************************************************************
+ *  @brief  Emulation prevention insertion byte
+******************************************************************************
+ */
+#define EPB_BYTE            0x03
+
+
+/*****************************************************************************/
+/* Function Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief   Macro to check if emulation prevention byte insertion is required
+******************************************************************************
+ */
+#define INSERT_EPB(zero_run, next_byte)                                       \
+    ((zero_run) == EPB_ZERO_BYTES) && (0 == ((next_byte) & 0xFC))
+
+/**
+******************************************************************************
+ *  @brief   returns the bit position of a leading 1 (msb) in a code value
+******************************************************************************
+ */
+#if !MSVC
+#define GETRANGE(r,value)   \
+{                           \
+    r = 0;                  \
+    if(0 == value)          \
+        r = 1;              \
+    else                    \
+    {                       \
+        r = 32-CLZ(value);  \
+    }\
+}
+#else
+#define GETRANGE(r,value)                 \
+{                                         \
+    unsigned long  msb_one_bit = 0;       \
+    r = _BitScanReverse(&msb_one_bit, value) ? (UWORD32)(msb_one_bit + 1) : 1 ; \
+}
+#endif
+
+/**
+******************************************************************************
+ *  @brief   returns bits required to code a value
+******************************************************************************
+ */
+#define UE_LENGTH(bits,x)       \
+{                           \
+    UWORD32 r_bit;              \
+    GETRANGE(r_bit,x+1)         \
+    bits =(((r_bit - 1) << 1)+1);     \
+}                           \
+
+/**
+******************************************************************************
+ *  @brief  Inserts 1 byte and Emulation Prevention Byte(if any) into bitstream
+ *          Increments the stream offset and zero run correspondingly
+******************************************************************************
+ */
+#define PUTBYTE_EPB(ptr,off,byte,zero_run)                      \
+{                                                               \
+    if( INSERT_EPB(zero_run, byte) )                            \
+    {                                                           \
+        ptr[off] = EPB_BYTE;                                    \
+        off++;                                                  \
+        zero_run = 0;                                           \
+    }                                                           \
+                                                                \
+    ptr[off] = byte;                                            \
+    off++;                                                      \
+    zero_run = byte ? 0 : zero_run+1;                           \
+}                                                               \
+
+/**
+******************************************************************************
+ *  @brief  Ensures Byte alignment of the slice header
+******************************************************************************
+ */
+#define BYTE_ALIGNMENT(ps_bitstrm) ih264e_put_rbsp_trailing_bits(ps_bitstrm)
+
+
+/*****************************************************************************/
+/* Structures                                                                */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief      Bitstream context for encoder
+******************************************************************************
+ */
+typedef struct bitstrm
+{
+    /** points to start of stream buffer.    */
+    UWORD8  *pu1_strm_buffer;
+
+    /**
+     *  max bitstream size (in bytes).
+     *  Encoded stream shall not exceed this size.
+     */
+    UWORD32 u4_max_strm_size;
+
+    /**
+     *  byte offset (w.r.t pu1_strm_buffer) where next byte would be written
+     *  Bitstream engine makes sure it would not corrupt data beyond
+     *  u4_max_strm_size bytes
+                                 */
+    UWORD32 u4_strm_buf_offset;
+
+    /**
+     *  current bitstream word; It is a scratch word containing max of
+     *  WORD_SIZE bits. Will be copied to stream buffer when the word is
+     *  full
+                                 */
+    UWORD32 u4_cur_word;
+
+    /**
+     *  signifies number of bits available in u4_cur_word
+     *  bits from msb to i4_bits_left_in_cw of u4_cur_word have already been
+     *  inserted next bits would be inserted from pos [i4_bits_left_in_cw-1]
+     *  Range of this variable [1 : WORD_SIZE]
+                                 */
+    WORD32  i4_bits_left_in_cw;
+
+    /**
+     *  signifies the number of consecutive zero bytes propogated from previous
+     *  word. It is used for emulation prevention byte insertion in the stream
+                                 */
+    WORD32  i4_zero_bytes_run;
+
+} bitstrm_t;
+
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+*  @brief Initializes the encoder bitstream engine
+*
+*  @par   Description
+*  This routine needs to be called at start of slice/frame encode
+*
+*  @param[in]   ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]   p1_bitstrm_buf
+*  bitstream buffer pointer where the encoded stream is generated in byte order
+*
+*  @param[in]   u4_max_bitstrm_size
+*  indicates maximum bitstream buffer size. (in bytes)
+*  If actual stream size exceeds the maximum size, encoder should
+*   1. Not corrupt data beyond u4_max_bitstrm_size bytes
+*   2. Report an error back to application indicating overflow
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T    ih264e_bitstrm_init
+        (
+            bitstrm_t   *ps_bitstrm,
+            UWORD8      *pu1_bitstrm_buf,
+            UWORD32     u4_max_bitstrm_size
+        );
+
+/**
+******************************************************************************
+*
+*  @brief puts a code with specified number of bits into the bitstream
+*
+*  @par   Description
+*  inserts code_len number of bits from lsb of code_val into the
+*  bitstream.  If the total bytes (u4_strm_buf_offset) exceeds max
+*  available size (u4_max_strm_size), returns error without corrupting data
+*  beyond it
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    u4_code_val
+*  code value that needs to be inserted in the stream.
+*
+*  @param[in]    code_len
+*  indicates code length (in bits) of code_val that would be inserted in
+*  bitstream buffer size.
+*
+*  @remarks     Assumptions: all bits from bit position code_len to msb of
+*   code_val shall be zero
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T    ih264e_put_bits
+        (
+            bitstrm_t   *ps_bitstrm,
+            UWORD32     u4_code_val,
+            WORD32      code_len
+        );
+
+/**
+******************************************************************************
+*
+*  @brief inserts a 1-bit code into the bitstream
+*
+*  @par   Description
+*  inserts 1bit lsb of code_val into the bitstream
+*  updates context members like u4_cur_word, u4_strm_buf_offset and
+*  i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+*  available size (u4_max_strm_size), returns error without corrupting data
+*  beyond it
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    u4_code_val
+*  code value that needs to be inserted in the stream.
+*
+*  @remarks     Assumptions: all bits from bit position 1 to msb of code_val
+*  shall be zero
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T    ih264e_put_bit
+        (
+            bitstrm_t   *ps_bitstrm,
+            UWORD32     u4_code_val
+        );
+
+/**
+******************************************************************************
+*
+*  @brief inserts rbsp trailing bits at the end of stream buffer (NAL)
+*
+*  @par   Description
+*  inserts rbsp trailing bits, updates context members like u4_cur_word and
+*  i4_bits_left_in_cw and flushes the same in the bitstream buffer. If the
+*  total words (u4_strm_buf_offset) exceeds max available size
+*  (u4_max_strm_size), returns error without corrupting data beyond it
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T    ih264e_put_rbsp_trailing_bits
+        (
+            bitstrm_t   *ps_bitstrm
+        );
+
+/**
+******************************************************************************
+*
+*  @brief puts exponential golomb code of a unsigned integer into bitstream
+*
+*  @par   Description
+*  computes uev code for given syntax element and inserts the same into
+*  bitstream by calling ih264e_put_bits() interface.
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    u4_code_num
+*  unsigned integer input whose golomb code is written in stream
+*
+*  @remarks     Assumptions: code value can be represented in less than 16bits
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T    ih264e_put_uev
+        (
+            bitstrm_t   *ps_bitstrm,
+            UWORD32     u4_code_num
+        );
+
+/**
+******************************************************************************
+*
+*  @brief puts exponential golomb code of a signed integer into bitstream
+*
+*  @par   Description
+*  computes sev code for given syntax element and inserts the same into
+*  bitstream by calling ih264e_put_bits() interface.
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    syntax_elem
+*  signed integer input whose golomb code is written in stream
+*
+*  @remarks     Assumptions: code value can be represented in less than 16bits
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T    ih264e_put_sev
+        (
+            bitstrm_t   *ps_bitstrm,
+            WORD32      syntax_elem
+        );
+
+/**
+******************************************************************************
+*
+*  @brief insert NAL start code prefix (0x000001) into bitstream with an option
+*  of inserting leading_zero_8bits (which makes startcode prefix as 0x00000001)
+*
+*  @par   Description
+*  Although start code prefix could have been put by calling ih264e_put_bits(),
+*  ih264e_put_nal_start_code_prefix() is specially added to make sure emulation
+*  prevention insertion is not done for the NAL start code prefix which will
+*  surely happen otherwise by calling ih264e_put_bits() interface.
+*
+*  @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+*  @param[in]    insert_leading_zero_8bits
+*  flag indicating if one more zero bytes needs to prefixed before start code
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T    ih264e_put_nal_start_code_prefix
+        (
+            bitstrm_t   *ps_bitstrm,
+            WORD32      insert_leading_zero_8bits
+        );
+
+#endif /* IH264E_BITSTREAM_H_ */
diff --git a/encoder/ih264e_cavlc.c b/encoder/ih264e_cavlc.c
new file mode 100755
index 0000000..1341dcd
--- /dev/null
+++ b/encoder/ih264e_cavlc.c
@@ -0,0 +1,1448 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_cavlc.c
+*
+* @brief
+*  Contains all the routines to code syntax elements and residuals when entropy
+*  coding chosen is CAVLC
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_compute_zeroruns_and_trailingones()
+*  - ih264e_write_coeff4x4_cavlc()
+*  - ih264e_write_coeff8x8_cavlc()
+*  - ih264e_encode_residue()
+*  - ih264e_write_islice_mb()
+*  - ih264e_write_pslice_mb()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_encode_header.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function computes run of zero, number of trailing ones and sign of
+*  trailing ones basing on the significant coeff map, residual block and
+*  total nnz.
+*
+* @param[in] pi2_res_block
+*  Pointer to residual block containing levels in scan order
+*
+* @param[in] u4_total_coeff
+*  Total non-zero coefficients in that sub block
+*
+* @param[in] pu1_zero_run
+*  Pointer to array to store run of zeros
+*
+* @param[in] u4_sig_coeff_map
+*  significant coefficient map
+*
+* @returns u4_totzero_sign_trailone
+*  Bits 0-8 contains number of trailing ones.
+*  Bits 8-16 contains bitwise sign information of trailing one
+*  Bits 16-24 contains total number of zeros.
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+static UWORD32 ih264e_compute_zeroruns_and_trailingones(WORD16 *pi2_res_block,
+                                                        UWORD32 u4_total_coeff,
+                                                        UWORD8 *pu1_zero_run,
+                                                        UWORD32 u4_sig_coeff_map)
+{
+    UWORD32 i = 0;
+    UWORD32 u4_nnz_coeff = 0;
+    WORD32  i4_run = -1;
+    UWORD32 u4_sign = 0;
+    UWORD32 u4_tot_zero = 0;
+    UWORD32 u4_trailing1 = 0;
+    WORD32 i4_val;
+    UWORD32 u4_totzero_sign_trailone;
+    UWORD32 *pu4_zero_run;
+
+    pu4_zero_run = (void *)pu1_zero_run;
+    pu4_zero_run[0] = 0;
+    pu4_zero_run[1] = 0;
+    pu4_zero_run[2] = 0;
+    pu4_zero_run[3] = 0;
+
+    /* Compute Runs of zeros for all nnz coefficients except the last 3 */
+    if (u4_total_coeff > 3)
+    {
+        for (i = 0; u4_nnz_coeff < (u4_total_coeff-3); i++)
+        {
+            i4_run++;
+
+            i4_val = (u4_sig_coeff_map & 0x1);
+            u4_sig_coeff_map >>= 1;
+
+            if (i4_val != 0)
+            {
+                pu1_zero_run[u4_nnz_coeff++] = i4_run;
+                i4_run = -1;
+            }
+        }
+    }
+
+    /* Compute T1's, Signof(T1's) and Runs of zeros for the last 3 */
+    while (u4_nnz_coeff != u4_total_coeff)
+    {
+        i4_run++;
+
+        i4_val = (u4_sig_coeff_map & 0x1);
+        u4_sig_coeff_map >>= 1;
+
+        if (i4_val != 0)
+        {
+            if (pi2_res_block[u4_nnz_coeff] == 1)
+            {
+                pu1_zero_run[u4_nnz_coeff] = i4_run;
+                u4_trailing1++;
+            }
+            else
+            {
+                if (pi2_res_block[u4_nnz_coeff] == -1)
+                {
+                    pu1_zero_run[u4_nnz_coeff] = i4_run;
+                    u4_sign |= 1 << u4_trailing1;
+                    u4_trailing1++;
+                }
+                else
+                {
+                    pu1_zero_run[u4_nnz_coeff] = i4_run;
+                    u4_trailing1 = 0;
+                    u4_sign = 0;
+                }
+            }
+            i4_run = -1;
+            u4_nnz_coeff++;
+        }
+        i++;
+    }
+
+    u4_tot_zero = i - u4_total_coeff;
+    u4_totzero_sign_trailone = (u4_tot_zero << 16)|(u4_sign << 8)|u4_trailing1;
+
+    return (u4_totzero_sign_trailone);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function generates CAVLC coded bit stream for the given residual block
+*
+* @param[in] pi2_res_block
+*  Pointer to residual block containing levels in scan order
+*
+* @param[in] u4_total_coeff
+*  Total non-zero coefficients in the sub block
+*
+* @param[in] u4_block_type
+*  block type
+*
+* @param[in] pu1_zero_run
+*  Pointer to array to store run of zeros
+*
+* @param[in] u4_nc
+*  average of non zero coeff from top and left blocks (when available)
+*
+* @param[in, out] ps_bit_stream
+*  structure pointing to a buffer holding output bit stream
+*
+* @param[in] u4_sig_coeff_map
+*  significant coefficient map of the residual block
+*
+* @returns
+*  error code
+*
+* @remarks
+*  If the block type is CAVLC_CHROMA_4x4_DC, then u4_nc is non-significant
+*
+*******************************************************************************
+*/
+static IH264E_ERROR_T ih264e_write_coeff4x4_cavlc(WORD16 *pi2_res_block,
+                                                  UWORD32 u4_total_coeff,
+                                                  ENTROPY_BLK_TYPE u4_block_type,
+                                                  UWORD8 *pu1_zero_run,
+                                                  UWORD32 u4_nc,
+                                                  bitstrm_t *ps_bit_stream,
+                                                  UWORD32 u4_sig_coeff_map)
+{
+    IH264E_ERROR_T error_status = IH264E_SUCCESS;
+    UWORD32 u4_totzero_sign_trailone = 0;
+    UWORD32 u4_trailing_ones = 0;
+    UWORD32 u4_tot_zeros = 0;
+    UWORD32 u4_remaining_coeff = 0;
+    UWORD32 u4_sign1 = 0;
+    UWORD32 u4_max_num_coeff = 0;
+    const UWORD32 au4_max_num_nnz_coeff[] = {16, 15, 16, 4, 15};
+
+    /* validate inputs */
+    ASSERT(u4_block_type <= CAVLC_CHROMA_4x4_AC);
+
+    u4_max_num_coeff = au4_max_num_nnz_coeff[u4_block_type];
+
+    ASSERT(u4_total_coeff <= u4_max_num_coeff);
+
+    if (!u4_total_coeff)
+    {
+        UWORD32 u4_codeword = 15;
+        UWORD32 u4_codesize = 1;
+        if (u4_block_type == CAVLC_CHROMA_4x4_DC)
+        {
+            u4_codeword = 1;
+            u4_codesize = 2;
+            DEBUG("\n[%d numcoeff, %d numtrailing ones]",u4_total_coeff, 0);
+            ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+            ENTROPY_TRACE("\tnumber of trailing ones ",0);
+        }
+        else
+        {
+            UWORD32 u4_vlcnum = u4_nc >> 1;
+
+            /* write coeff_token */
+            if (u4_vlcnum > 3)
+            {
+                /* Num-FLC */
+                u4_codeword = 3;
+                u4_codesize = 6;
+            }
+            else
+            {
+                /* Num-VLC 0, 1, 2 */
+                if (u4_vlcnum > 1)
+                {
+                    u4_vlcnum = 2;
+                }
+                u4_codesize <<= u4_vlcnum;
+                u4_codeword >>= (4 - u4_codesize);
+            }
+
+            DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]",u4_total_coeff, 0, u4_nc);
+            ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+            ENTROPY_TRACE("\tnC ",u4_nc);
+        }
+
+
+        DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+        ENTROPY_TRACE("\tcodeword ",u4_codeword);
+        ENTROPY_TRACE("\tcodesize ",u4_codesize);
+
+        error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+
+        return error_status;
+    }
+    else
+    {
+        /* Compute zero run, number of trailing ones and their sign. */
+        u4_totzero_sign_trailone =
+                ih264e_compute_zeroruns_and_trailingones(pi2_res_block,
+                        u4_total_coeff,
+                        pu1_zero_run,
+                        u4_sig_coeff_map);
+        u4_trailing_ones = u4_totzero_sign_trailone & 0xFF;
+        u4_sign1 = (u4_totzero_sign_trailone >> 8)& 0xFF;
+        u4_tot_zeros = (u4_totzero_sign_trailone >> 16) & 0xFF;
+        u4_remaining_coeff = u4_total_coeff - u4_trailing_ones;
+
+        /* write coeff_token */
+        {
+            UWORD32 u4_codeword;
+            UWORD32 u4_codesize;
+            if (u4_block_type == CAVLC_CHROMA_4x4_DC)
+            {
+                u4_codeword = gu1_code_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff-1];
+                u4_codesize = gu1_size_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff-1];
+
+                DEBUG("\n[%d numcoeff, %d numtrailing ones]",u4_total_coeff, u4_trailing_ones);
+                ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+                ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones);
+            }
+            else
+            {
+                UWORD32 u4_vlcnum = u4_nc >> 1;
+
+                if (u4_vlcnum > 3)
+                {
+                    /* Num-FLC */
+                    u4_codeword = ((u4_total_coeff-1) << 2 ) + u4_trailing_ones;
+                    u4_codesize = 6;
+                }
+                else
+                {
+                    /* Num-VLC 0, 1, 2 */
+                    if (u4_vlcnum > 1)
+                    {
+                        u4_vlcnum = 2;
+                    }
+                    u4_codeword = gu1_code_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff-1];
+                    u4_codesize = gu1_size_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff-1];
+                }
+
+                DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]",u4_total_coeff, u4_trailing_ones, u4_nc);
+                ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+                ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones);
+                ENTROPY_TRACE("\tnC ",u4_nc);
+            }
+
+            DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+            ENTROPY_TRACE("\tcodeword ",u4_codeword);
+            ENTROPY_TRACE("\tcodesize ",u4_codesize);
+
+            error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+        }
+
+        /* write sign of trailing ones */
+        if (u4_trailing_ones)
+        {
+            DEBUG("\nT1's: %d u4_codeword, %d u4_codesize",u4_sign1, u4_trailing_ones);
+            error_status = ih264e_put_bits(ps_bit_stream, u4_sign1, u4_trailing_ones);
+            ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones);
+            ENTROPY_TRACE("\tsign of trailing ones ",u4_sign1);
+        }
+
+        /* write level codes */
+        if (u4_remaining_coeff)
+        {
+            WORD32 i4_level = pi2_res_block[u4_remaining_coeff-1];
+            UWORD32 u4_escape;
+            UWORD32 u4_suffix_length = 0; // Level-VLC[N]
+            UWORD32 u4_abs_level, u4_abs_level_actual = 0;
+            WORD32 i4_sign;
+            const UWORD32 u4_rndfactor[] = {0, 0, 1, 3, 7, 15, 31};
+
+            DEBUG("\n \t%d coeff,",i4_level);
+            ENTROPY_TRACE("\tcoeff ",i4_level);
+
+            if (u4_trailing_ones < 3)
+            {
+                /* If there are less than 3 T1s, then the first non-T1 level is incremented if negative (decremented if positive)*/
+                if (i4_level < 0)
+                {
+                    i4_level += 1;
+                }
+                else
+                {
+                    i4_level -= 1;
+                }
+
+                u4_abs_level_actual = 1;
+
+                /* Initialize VLC table (Suffix Length) to encode the level */
+                if (u4_total_coeff > 10)
+                {
+                    u4_suffix_length = 1;
+                }
+            }
+
+            i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1));
+            u4_abs_level = ((i4_level + i4_sign) ^ i4_sign);
+
+            u4_abs_level_actual += u4_abs_level;
+
+            u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length;
+
+            while (1)
+            {
+                UWORD32 u4_codesize;
+                UWORD32 u4_codeword;
+                UWORD32 u4_codeval;
+
+                u4_remaining_coeff--;
+
+GATHER_CAVLC_STATS1();
+
+                {
+                    u4_codeval = u4_abs_level << 1;
+                    u4_codeval = u4_codeval - 2 - i4_sign;
+
+                    if ((!u4_suffix_length) && (u4_escape > 7) && (u4_abs_level < 16))
+                    {
+                        u4_codeword = (1 << 4) + (u4_codeval - 14);
+                        u4_codesize = 19;
+                    }
+                    else if (u4_escape > 7)
+                    {
+                        u4_codeword = (1 << 12) + (u4_codeval - (15 << u4_suffix_length));
+                        u4_codesize = 28;
+                        if (!u4_suffix_length)
+                        {
+                            u4_codeword -= 15;
+                        }
+                    }
+                    else
+                    {
+                        u4_codeword = (1 << u4_suffix_length) + (u4_codeval & ((1 << u4_suffix_length)-1));
+                        u4_codesize = (u4_codeval >> u4_suffix_length) + 1 + u4_suffix_length;
+                    }
+                }
+
+                /*put the level code in bitstream*/
+                DEBUG("\nLEVEL: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+                ENTROPY_TRACE("\tcodeword ",u4_codeword);
+                ENTROPY_TRACE("\tcodesize ",u4_codesize);
+                error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+
+                if (u4_remaining_coeff == 0) break;
+
+                /*update suffix length for next level*/
+                if (u4_suffix_length == 0)
+                {
+                    u4_suffix_length++;
+                }
+                if (u4_suffix_length < 6)
+                {
+                    if (u4_abs_level_actual > gu1_threshold_vlc_level[u4_suffix_length])
+                    {
+                        u4_suffix_length++;
+                    }
+                }
+
+                /* next level */
+                i4_level      = pi2_res_block[u4_remaining_coeff-1];
+
+                DEBUG("\n \t%d coeff,",i4_level);
+                ENTROPY_TRACE("\tcoeff ",i4_level);
+
+                i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1));
+                u4_abs_level = ((i4_level + i4_sign) ^ i4_sign);
+
+                u4_abs_level_actual = u4_abs_level;
+
+                u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length;
+            }
+        }
+
+        DEBUG("\n \t %d totalzeros",u4_tot_zeros);
+        ENTROPY_TRACE("\ttotal zeros ",u4_tot_zeros);
+
+        /* Write Total Zeros */
+        if (u4_total_coeff < u4_max_num_coeff)
+        {
+            WORD32 index;
+            UWORD32 u4_codeword;
+            UWORD32 u4_codesize;
+
+            if (u4_block_type == CAVLC_CHROMA_4x4_DC)
+            {
+                UWORD8 gu1_index_zero_table_chroma[] = {0, 4, 7};
+                index = gu1_index_zero_table_chroma[u4_total_coeff-1] + u4_tot_zeros;
+                u4_codesize = gu1_size_zero_table_chroma[index];
+                u4_codeword = gu1_code_zero_table_chroma[index];
+            }
+            else
+            {
+                index = gu1_index_zero_table[u4_total_coeff-1] + u4_tot_zeros;
+                u4_codesize = gu1_size_zero_table[index];
+                u4_codeword = gu1_code_zero_table[index];
+            }
+
+            DEBUG("\nTOTAL ZEROS: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+            ENTROPY_TRACE("\tcodeword ",u4_codeword);
+            ENTROPY_TRACE("\tcodesize ",u4_codesize);
+            error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+        }
+
+        /* Write Run Before */
+        if (u4_tot_zeros)
+        {
+            UWORD32 u4_max_num_coef = u4_total_coeff-1;
+            UWORD32 u4_codeword;
+            UWORD32 u4_codesize;
+            UWORD32 u4_zeros_left = u4_tot_zeros;
+
+            while (u4_max_num_coef)
+            {
+                UWORD32 u4_run_before = pu1_zero_run[u4_max_num_coef];
+                UWORD32 u4_index;
+
+                if (u4_zeros_left > MAX_ZERO_LEFT)
+                {
+                    u4_index = gu1_index_run_table[MAX_ZERO_LEFT];
+                }
+                else
+                {
+                    u4_index = gu1_index_run_table[u4_zeros_left - 1];
+                }
+
+                u4_codesize = gu1_size_run_table[u4_index + u4_run_before];
+                u4_codeword = gu1_code_run_table[u4_index + u4_run_before];
+
+                DEBUG("\nRUN BEFORE ZEROS: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+                ENTROPY_TRACE("\tcodeword ",u4_codeword);
+                ENTROPY_TRACE("\tcodesize ",u4_codesize);
+                error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+
+                u4_zeros_left -= u4_run_before;
+                if (!u4_zeros_left)
+                {
+                    break;
+                }
+                u4_max_num_coef--;
+            }
+        }
+    }
+
+    return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function generates CAVLC coded bit stream for the given subblock
+*
+* @param[in] ps_ent_ctxt
+*  Pointer to entropy context
+*
+* @param[in] pi2_res_block
+*  Pointers to residual blocks of all the partitions for the current subblk
+*  (containing levels in scan order)
+*
+* @param[in] pu1_nnz
+*  Total non-zero coefficients of all the partitions for the current subblk
+*
+* @param[in] pu2_sig_coeff_map
+*  Significant coefficient map of all the partitions for the current subblk
+*
+* @param[in] u4_block_type
+*  entropy coding block type
+*
+* @param[in] u4_ngbr_avbl
+*  top and left availability of all the partitions for the current subblk
+*  (packed)
+*
+* @param[in] pu1_top_nnz
+*  pointer to the buffer containing nnz of all the subblks to the top
+*
+* @param[in] pu1_left_nnz
+*  pointer to the buffer containing nnz of all the subblks to the left
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IH264E_ERROR_T ih264e_write_coeff8x8_cavlc(entropy_ctxt_t *ps_ent_ctxt,
+                                                  WORD16 **pi2_res_block,
+                                                  UWORD8 *pu1_nnz,
+                                                  UWORD16 *pu2_sig_coeff_map,
+                                                  ENTROPY_BLK_TYPE u4_block_type,
+                                                  UWORD32 u4_ngbr_avlb,
+                                                  UWORD8 *pu1_top_nnz,
+                                                  UWORD8 *pu1_left_nnz)
+{
+    IH264E_ERROR_T error_status = IH264E_SUCCESS;
+    bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+    UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run, *pu1_ngbr_avbl;
+    UWORD32 u4_nC;
+    UWORD8 u1_mb_a, u1_mb_b;
+
+    pu1_ngbr_avbl = (void *)(&u4_ngbr_avlb);
+
+    /* encode ac block index 4x4 = 0*/
+    u1_mb_a = pu1_ngbr_avbl[0] & 0x0F;
+    u1_mb_b = pu1_ngbr_avbl[0] & 0xF0;
+    u4_nC = 0;
+    if (u1_mb_a)
+        u4_nC += pu1_left_nnz[0];
+    if (u1_mb_b)
+        u4_nC += pu1_top_nnz[0];
+    if (u1_mb_a && u1_mb_b)
+        u4_nC = (u4_nC + 1) >> 1;
+    pu1_left_nnz[0] = pu1_top_nnz[0] = pu1_nnz[0];
+    error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], pu1_nnz[0], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[0]);
+
+    /* encode ac block index 4x4 = 1*/
+    u1_mb_a = pu1_ngbr_avbl[1] & 0x0F;
+    u1_mb_b = pu1_ngbr_avbl[1] & 0xF0;
+    u4_nC = 0;
+    if (u1_mb_a)
+        u4_nC += pu1_left_nnz[0];
+    if (u1_mb_b)
+        u4_nC += pu1_top_nnz[1];
+    if (u1_mb_a && u1_mb_b)
+        u4_nC = (u4_nC + 1) >> 1;
+    pu1_left_nnz[0] = pu1_top_nnz[1] = pu1_nnz[1];
+    error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[1], pu1_nnz[1], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[1]);
+
+    /* encode ac block index 4x4 = 2*/
+    u1_mb_a = pu1_ngbr_avbl[2] & 0x0F;
+    u1_mb_b = pu1_ngbr_avbl[2] & 0xF0;
+    u4_nC = 0;
+    if (u1_mb_a)
+        u4_nC += pu1_left_nnz[1];
+    if (u1_mb_b)
+        u4_nC += pu1_top_nnz[0];
+    if (u1_mb_a && u1_mb_b)
+        u4_nC = (u4_nC + 1) >> 1;
+    pu1_left_nnz[1] = pu1_top_nnz[0] = pu1_nnz[2];
+    error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[2], pu1_nnz[2], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[2]);
+
+    /* encode ac block index 4x4 = 0*/
+    u1_mb_a = pu1_ngbr_avbl[3] & 0x0F;
+    u1_mb_b = pu1_ngbr_avbl[3] & 0xF0;
+    u4_nC = 0;
+    if (u1_mb_a)
+        u4_nC += pu1_left_nnz[1];
+    if (u1_mb_b)
+        u4_nC += pu1_top_nnz[1];
+    if (u1_mb_a && u1_mb_b)
+        u4_nC = (u4_nC + 1) >> 1;
+    pu1_left_nnz[1] = pu1_top_nnz[1] = pu1_nnz[3];
+    error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[3], pu1_nnz[3], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[3]);
+
+    return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function encodes luma and chroma residues of a macro block when
+*  the entropy coding mode chosen is cavlc.
+*
+* @param[in] ps_ent_ctxt
+*  Pointer to entropy context
+*
+* @param[in] u4_mb_type
+*  current mb type
+*
+* @param[in] u4_cbp
+*  coded block pattern for the current mb
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt,
+                                            UWORD32 u4_mb_type,
+                                            UWORD32 u4_cbp)
+{
+    /* error status */
+    IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+    /* packed residue */
+    void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data;
+
+    /* bit stream buffer */
+    bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+    /* zero run */
+    UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run;
+
+    /* temp var */
+    UWORD32 u4_nC, u4_ngbr_avlb;
+    UWORD8 au1_nnz[4], *pu1_ngbr_avlb, *pu1_top_nnz, *pu1_left_nnz;
+    UWORD16 au2_sig_coeff_map[4];
+    WORD16 *pi2_res_block[4];
+    UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx;
+    tu_sblk_coeff_data_t *ps_mb_coeff_data;
+    ENTROPY_BLK_TYPE e_entropy_blk_type = CAVLC_LUMA_4x4;
+
+    /* ngbr availability */
+    UWORD8 u1_mb_a, u1_mb_b;
+
+    /* cbp */
+    UWORD32 u4_cbp_luma = u4_cbp & 0xF, u4_cbp_chroma = u4_cbp >> 4;
+
+    /* mb indices */
+    WORD32 i4_mb_x, i4_mb_y;
+
+    /* derive neighbor availability */
+    i4_mb_x = ps_ent_ctxt->i4_mb_x;
+    i4_mb_y = ps_ent_ctxt->i4_mb_y;
+    pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs);
+    /* left macroblock availability */
+    u1_mb_a = (i4_mb_x == 0 ||
+                    (pu1_slice_idx[i4_mb_x - 1 ] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+    /* top macroblock availability */
+    u1_mb_b = (i4_mb_y == 0 ||
+                    (pu1_slice_idx[i4_mb_x-ps_ent_ctxt->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+
+    pu1_ngbr_avlb = (void *)(&u4_ngbr_avlb);
+    pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x];
+    pu1_left_nnz = (UWORD8 *)&ps_ent_ctxt->u4_left_nnz_luma;
+
+    /* encode luma residue */
+
+    /* mb type intra 16x16 */
+    if (u4_mb_type == I16x16)
+    {
+        /* parse packed coeff data structure for residual data */
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+        /* estimate nnz for the current mb */
+        u4_nC = 0;
+        if (u1_mb_a)
+            u4_nC += pu1_left_nnz[0];
+        if (u1_mb_b)
+            u4_nC += pu1_top_nnz[0];
+        if (u1_mb_a && u1_mb_b)
+            u4_nC = (u4_nC + 1) >> 1;
+
+        /* encode dc block */
+        ENTROPY_TRACE("Luma DC blk idx %d",0);
+        error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_LUMA_4x4_DC, pu1_zero_run, u4_nC, ps_bitstream, au2_sig_coeff_map[0]);
+
+        e_entropy_blk_type = CAVLC_LUMA_4x4_AC;
+    }
+
+    if (u4_cbp_luma & 1)
+    {
+        /* encode ac block index 8x8 = 0*/
+        /* parse packed coeff data structure for residual data */
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+        /* derive sub block neighbor availability */
+
+        pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a);
+        pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1;
+        pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a);
+        pu1_ngbr_avlb[3] = 0x11;
+        /* encode sub blk */
+        ENTROPY_TRACE("Luma blk idx %d",0);
+        error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz);
+    }
+    else
+    {
+        pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+        pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+    }
+
+    if (u4_cbp_luma & 2)
+    {
+        /* encode ac block index 8x8 = 1*/
+        /* parse packed coeff data structure for residual data */
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+        /* derive sub block neighbor availability */
+        pu1_ngbr_avlb[1] = pu1_ngbr_avlb[0] = (u1_mb_b << 4) | 1;
+        pu1_ngbr_avlb[3] = pu1_ngbr_avlb[2] = 0x11;
+        /* encode sub blk */
+        ENTROPY_TRACE("Luma blk idx %d",1);
+        error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz+2, pu1_left_nnz);
+    }
+    else
+    {
+        (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0;
+        pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+    }
+
+    if (u4_cbp_luma & 0x4)
+    {
+        /* encode ac block index 8x8 = 2*/
+        /* parse packed coeff data structure for residual data */
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+        /* derive sub block neighbor availability */
+        pu1_ngbr_avlb[2] = pu1_ngbr_avlb[0] = (1 << 4) | u1_mb_a;
+        pu1_ngbr_avlb[1] = pu1_ngbr_avlb[3] = 0x11;
+        /* encode sub blk */
+        ENTROPY_TRACE("Luma blk idx %d",2);
+        error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz, (pu1_left_nnz+2));
+    }
+    else
+    {
+        pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+        (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0;
+    }
+
+    if (u4_cbp_luma & 0x8)
+    {
+        /* encode ac block index 8x8 = 3*/
+        /* parse packed coeff data structure for residual data */
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+        /* derive sub block neighbor availability */
+        u4_ngbr_avlb = 0x11111111;
+        /* encode sub blk */
+        ENTROPY_TRACE("Luma blk idx %d",3);
+        error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz+2, pu1_left_nnz+2);
+    }
+    else
+    {
+        (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0;
+        (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0;
+    }
+
+    /* encode chroma residue */
+    if (u4_cbp_chroma & 3)
+    {
+        /* parse packed coeff data structure for residual data */
+        /* cb, cr */
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+
+        /* encode dc block */
+        /* cb, cr */
+        ENTROPY_TRACE("Chroma DC blk idx %d",0);
+        error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_CHROMA_4x4_DC, pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[0]);
+        ENTROPY_TRACE("Chroma DC blk idx %d",1);
+        error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[1], au1_nnz[1], CAVLC_CHROMA_4x4_DC, pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[1]);
+    }
+
+    pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x];
+    pu1_left_nnz = (UWORD8 *) &ps_ent_ctxt->u4_left_nnz_cbcr;
+
+    /* encode sub blk */
+    if (u4_cbp_chroma & 0x2)
+    {
+        /* encode ac block index 8x8 = 0*/
+        /* derive sub block neighbor availability */
+        pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a);
+        pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1;
+        pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a);
+        pu1_ngbr_avlb[3] = 0x11;
+
+        /* parse packed coeff data structure for residual data */
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+        ENTROPY_TRACE("Chroma AC blk idx %d",0);
+        error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz);
+    }
+    else
+    {
+        pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+        pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+    }
+
+    pu1_top_nnz += 2;
+    pu1_left_nnz += 2;
+
+    /* encode sub blk */
+    if (u4_cbp_chroma & 0x2)
+    {
+        /* parse packed coeff data structure for residual data */
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+        PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+        ENTROPY_TRACE("Chroma AC blk idx %d",1);
+        error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz);
+    }
+    else
+    {
+        pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+        pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+    }
+
+    /* store the index of the next mb coeff data */
+    ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data;
+
+    return error_status;
+}
+
+#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + 32 - ps_bitstream->i4_bits_left_in_cw)
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function generates CAVLC coded bit stream for an Intra Slice.
+*
+* @description
+*  The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
+*  (if present), mb qp delta, coded block pattern, chroma mb mode and
+*  luma/chroma residue. These syntax elements are written as directed by table
+*  7.3.5 of h264 specification.
+*
+* @param[in] ps_ent_ctxt
+*  pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt)
+{
+    /* error status */
+    IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+    /* bit stream ptr */
+    bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+    /* packed header data */
+    UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+
+    /* mb header info */
+    /*
+     * mb_tpm : mb type plus mode
+     * mb_type : luma mb type and chroma mb type are packed
+     * cbp : coded block pattern
+     * mb_qp_delta : mb qp delta
+     * chroma_intra_mode : chroma intra mode
+     * luma_intra_mode : luma intra mode
+     */
+    WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+    WORD8 mb_qp_delta;
+
+    /* temp var */
+    WORD32 i, mb_type_stream;
+
+    WORD32 bitstream_start_offset, bitstream_end_offset;
+
+    /* Starting bitstream offset for header in bits */
+    bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+
+
+    /********************************************************************/
+    /*                    BEGIN HEADER GENERATION                       */
+    /********************************************************************/
+
+    /* mb header info */
+    mb_tpm = *pu1_byte++;
+    cbp = *pu1_byte++;
+    mb_qp_delta = *pu1_byte++;
+
+    /* mb type */
+    mb_type = mb_tpm & 0xF;
+    /* is intra ? */
+    if (mb_type == I16x16)
+    {
+        UWORD32 u4_cbp_l, u4_cbp_c;
+
+        u4_cbp_c = (cbp >> 4);
+        u4_cbp_l = (cbp & 0xF);
+        luma_intra_mode = (mb_tpm >> 4) & 3;
+        chroma_intra_mode = (mb_tpm >> 6);
+
+        mb_type_stream =  luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12;
+
+        /* write mb type */
+        PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type");
+
+        /* intra_chroma_pred_mode */
+        PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+    }
+    else if (mb_type == I4x4)
+    {
+        /* mb sub blk modes */
+        WORD32 intra_pred_mode_flag, rem_intra_mode;
+        WORD32 byte;
+
+        chroma_intra_mode = (mb_tpm >> 6);
+
+        /* write mb type */
+        PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type");
+
+        for (i = 0; i < 16; i += 2)
+        {
+            /* sub blk idx 1 */
+            byte = *pu1_byte++;
+
+            intra_pred_mode_flag = byte & 0x1;
+
+            /* prev_intra4x4_pred_mode_flag */
+            PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+            /* rem_intra4x4_pred_mode */
+            if (!intra_pred_mode_flag)
+            {
+                rem_intra_mode = (byte & 0xF) >> 1;
+                PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+            }
+
+            /* sub blk idx 2 */
+            byte >>= 4;
+
+            intra_pred_mode_flag = byte & 0x1;
+
+            /* prev_intra4x4_pred_mode_flag */
+            PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+            /* rem_intra4x4_pred_mode */
+            if (!intra_pred_mode_flag)
+            {
+                rem_intra_mode = (byte & 0xF) >> 1;
+                PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+            }
+        }
+
+        /* intra_chroma_pred_mode */
+        PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+    }
+    else if (mb_type == I8x8)
+    {
+        /* transform 8x8 flag */
+        UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
+
+        /* mb sub blk modes */
+        WORD32 intra_pred_mode_flag, rem_intra_mode;
+        WORD32 byte;
+
+        chroma_intra_mode = (mb_tpm >> 6);
+
+        ASSERT(0);
+
+        /* write mb type */
+        PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type");
+
+        /* u4_transform_size_8x8_flag */
+        PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag");
+
+        /* write sub block modes */
+        for (i = 0; i < 4; i++)
+        {
+            /* sub blk idx 1 */
+            byte = *pu1_byte++;
+
+            intra_pred_mode_flag = byte & 0x1;
+
+            /* prev_intra4x4_pred_mode_flag */
+            PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+            /* rem_intra4x4_pred_mode */
+            if (!intra_pred_mode_flag)
+            {
+                rem_intra_mode = (byte & 0xF) >> 1;
+                PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+            }
+
+            /* sub blk idx 2 */
+            byte >>= 4;
+
+            intra_pred_mode_flag = byte & 0x1;
+
+            /* prev_intra4x4_pred_mode_flag */
+            PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+            /* rem_intra4x4_pred_mode */
+            if (!intra_pred_mode_flag)
+            {
+                rem_intra_mode = (byte & 0xF) >> 1;
+                PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+            }
+        }
+
+        /* intra_chroma_pred_mode */
+        PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+    }
+    else
+    {
+    }
+
+    /* coded_block_pattern */
+    if (mb_type != I16x16)
+    {
+        PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][0], error_status, "coded_block_pattern");
+    }
+
+    if (cbp || mb_type == I16x16)
+    {
+        /* mb_qp_delta */
+        PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta");
+    }
+
+    /* Ending bitstream offset for header in bits */
+    bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+    ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset - bitstream_start_offset;
+
+    /* Starting bitstream offset for residue */
+    bitstream_start_offset = bitstream_end_offset;
+
+    /* residual */
+    error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp);
+
+    /* Ending bitstream offset for reside in bits */
+    bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+    ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset - bitstream_start_offset;
+
+    /* store the index of the next mb syntax layer */
+    ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+    return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function generates CAVLC coded bit stream for Inter slices
+*
+* @description
+*  The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+*  (if present), mb qp delta, coded block pattern, chroma mb mode and
+*  luma/chroma residue. These syntax elements are written as directed by table
+*  7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+*  pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt)
+{
+    /* error status */
+    IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+    /* bit stream ptr */
+    bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+    /* packed header data */
+    UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+
+    /* mb header info */
+    /*
+     * mb_tpm : mb type plus mode
+     * mb_type : luma mb type and chroma mb type are packed
+     * cbp : coded block pattern
+     * mb_qp_delta : mb qp delta
+     * chroma_intra_mode : chroma intra mode
+     * luma_intra_mode : luma intra mode
+     * ps_pu :  Pointer to the array of structures having motion vectors, size
+     * and position of sub partitions
+     */
+    WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+    WORD8 mb_qp_delta;
+
+    /* temp var */
+    WORD32 i, mb_type_stream, cbptable = 1;
+
+    WORD32 is_inter = 0;
+
+    WORD32 bitstream_start_offset, bitstream_end_offset;
+
+    /* Starting bitstream offset for header in bits */
+    bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+
+    /********************************************************************/
+    /*                    BEGIN HEADER GENERATION                       */
+    /********************************************************************/
+
+    /* mb header info */
+    mb_tpm = *pu1_byte++;
+
+    /* mb type */
+    mb_type = mb_tpm & 0xF;
+
+    /* check for skip */
+    if (mb_type == PSKIP)
+    {
+        UWORD32 *nnz;
+
+        is_inter = 1;
+
+        /* increment skip counter */
+        (*ps_ent_ctxt->pi4_mb_skip_run)++;
+
+        /* store the index of the next mb syntax layer */
+        ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+        /* set nnz to zero */
+        ps_ent_ctxt->u4_left_nnz_luma = 0;
+        nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x];
+        *nnz = 0;
+        ps_ent_ctxt->u4_left_nnz_cbcr = 0;
+        nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x];
+        *nnz = 0;
+
+        /* residual */
+        error_status = ih264e_encode_residue(ps_ent_ctxt, P16x16, 0);
+
+        bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+        ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+        return error_status;
+    }
+
+    /* remaining mb header info */
+    cbp = *pu1_byte++;
+    mb_qp_delta = *pu1_byte++;
+
+    /* mb skip run */
+    PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run");
+
+    /* reset skip counter */
+    *ps_ent_ctxt->pi4_mb_skip_run = 0;
+
+    /* is intra ? */
+    if (mb_type == I16x16)
+    {
+        UWORD32 u4_cbp_l, u4_cbp_c;
+
+        is_inter = 0;
+
+        u4_cbp_c = (cbp >> 4);
+        u4_cbp_l = (cbp & 0xF);
+        luma_intra_mode = (mb_tpm >> 4) & 3;
+        chroma_intra_mode = (mb_tpm >> 6);
+
+        mb_type_stream =  luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12;
+
+        mb_type_stream += 5;
+
+        /* write mb type */
+        PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type");
+
+        /* intra_chroma_pred_mode */
+        PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+    }
+    else if (mb_type == I4x4)
+    {
+        /* mb sub blk modes */
+        WORD32 intra_pred_mode_flag, rem_intra_mode;
+        WORD32 byte;
+
+        is_inter = 0;
+
+        chroma_intra_mode = (mb_tpm >> 6);
+        cbptable = 0;
+
+        /* write mb type */
+        PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type");
+
+        for (i = 0; i < 16; i += 2)
+        {
+            /* sub blk idx 1 */
+            byte = *pu1_byte++;
+
+            intra_pred_mode_flag = byte & 0x1;
+
+            /* prev_intra4x4_pred_mode_flag */
+            PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+            /* rem_intra4x4_pred_mode */
+            if (!intra_pred_mode_flag)
+            {
+                rem_intra_mode = (byte & 0xF) >> 1;
+                PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+            }
+
+            /* sub blk idx 2 */
+            byte >>= 4;
+
+            intra_pred_mode_flag = byte & 0x1;
+
+            /* prev_intra4x4_pred_mode_flag */
+            PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+            /* rem_intra4x4_pred_mode */
+            if (!intra_pred_mode_flag)
+            {
+                rem_intra_mode = (byte & 0xF) >> 1;
+                PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+            }
+        }
+
+        /* intra_chroma_pred_mode */
+        PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+    }
+    else if (mb_type == I8x8)
+    {
+        /* transform 8x8 flag */
+        UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
+
+        /* mb sub blk modes */
+        WORD32 intra_pred_mode_flag, rem_intra_mode;
+        WORD32 byte;
+
+        is_inter = 0;
+
+        chroma_intra_mode = (mb_tpm >> 6);
+        cbptable = 0;
+
+        ASSERT(0);
+
+        /* write mb type */
+        PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type");
+
+        /* u4_transform_size_8x8_flag */
+        PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag");
+
+        /* write sub block modes */
+        for (i = 0; i < 4; i++)
+        {
+            /* sub blk idx 1 */
+            byte = *pu1_byte++;
+
+            intra_pred_mode_flag = byte & 0x1;
+
+            /* prev_intra4x4_pred_mode_flag */
+            PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+            /* rem_intra4x4_pred_mode */
+            if (!intra_pred_mode_flag)
+            {
+                rem_intra_mode = (byte & 0xF) >> 1;
+                PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+            }
+
+            /* sub blk idx 2 */
+            byte >>= 4;
+
+            intra_pred_mode_flag = byte & 0x1;
+
+            /* prev_intra4x4_pred_mode_flag */
+            PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+            /* rem_intra4x4_pred_mode */
+            if (!intra_pred_mode_flag)
+            {
+                rem_intra_mode = (byte & 0xF) >> 1;
+                PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+            }
+        }
+
+        /* intra_chroma_pred_mode */
+        PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+    }
+    else
+    {
+        /* inter macro block partition cnt */
+        const UWORD8 au1_part_cnt[] = { 1, 2, 2, 4 };
+
+        /* mv ptr */
+        WORD16 *pi2_mv_ptr = (WORD16 *)pu1_byte;
+
+        /* number of partitions for the current mb */
+        UWORD32 u4_part_cnt = au1_part_cnt[mb_type - 3];
+
+        is_inter = 1;
+
+        /* write mb type */
+        PUT_BITS_UEV(ps_bitstream, mb_type - 3, error_status, "mb type");
+
+        for (i = 0; i < (WORD32)u4_part_cnt; i++)
+        {
+            PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv x");
+
+            PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv y");
+        }
+
+        pu1_byte = (UWORD8 *)pi2_mv_ptr;
+    }
+
+    /* coded_block_pattern */
+    if (mb_type != I16x16)
+    {
+        PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][cbptable], error_status, "coded_block_pattern");
+    }
+
+    if (cbp || mb_type == I16x16)
+    {
+        /* mb_qp_delta */
+        PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta");
+    }
+
+
+    /* Ending bitstream offset for header in bits */
+    bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+    ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+    /* start bitstream offset for residue in bits */
+    bitstream_start_offset = bitstream_end_offset;
+
+    /* residual */
+    error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp);
+
+    /* Ending bitstream offset for residue in bits */
+    bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+    ps_ent_ctxt->u4_residue_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+    /* store the index of the next mb syntax layer */
+    ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+    return error_status;
+}
diff --git a/encoder/ih264e_cavlc.h b/encoder/ih264e_cavlc.h
new file mode 100755
index 0000000..86f4cd4
--- /dev/null
+++ b/encoder/ih264e_cavlc.h
@@ -0,0 +1,112 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_cavlc.h
+*
+* @brief
+*  This file contains enumerations, macros and extern declarations of H264
+*  cavlc tables
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+******************************************************************************
+*/
+
+#ifndef IH264E_CAVLC_H_
+#define IH264E_CAVLC_H_
+
+/*****************************************************************************/
+/* Function macro definitions                                                */
+/*****************************************************************************/
+
+#define PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u4_nnz, u4_sig_coeff_map, pi2_res_block)   \
+    {\
+                ps_mb_coeff_data = pv_mb_coeff_data; \
+                u4_nnz = ps_mb_coeff_data->i4_sig_map_nnz & 0xff;    \
+                if (u4_nnz)\
+                {\
+                    u4_sig_coeff_map = ps_mb_coeff_data->i4_sig_map_nnz >> 16; \
+                    pi2_res_block = ps_mb_coeff_data->ai2_residue; \
+                    pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue + u4_nnz; \
+                }\
+                else\
+                {\
+                  pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue;\
+                }\
+    }
+
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function generates CAVLC coded bit stream for an Intra Slice.
+*
+* @description
+*  The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
+*  (if present), mb qp delta, coded block pattern, chroma mb mode and
+*  luma/chroma residue. These syntax elements are written as directed by table
+*  7.3.5 of h264 specification.
+*
+* @param[in] ps_ent_ctxt
+*  pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function generates CAVLC coded bit stream for Inter slices
+*
+* @description
+*  The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+*  (if present), mb qp delta, coded block pattern, chroma mb mode and
+*  luma/chroma residue. These syntax elements are written as directed by table
+*  7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+*  pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt);
+
+#endif /* IH264E_CAVLC_H_ */
diff --git a/encoder/ih264e_config.h b/encoder/ih264e_config.h
new file mode 100755
index 0000000..2446cdb
--- /dev/null
+++ b/encoder/ih264e_config.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_config.h
+*
+* @brief
+*  contains any necessary declarations/definitions that are used during codec
+*  build
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+******************************************************************************
+*/
+
+#ifndef IH264E_CONFIG_H_
+#define IH264E_CONFIG_H_
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+#define CAVLC_LEVEL_STATS 0
+#define GATING_STATS 0
+#define DEBUG_PRINT 0
+#define ENABLE_TRACE 0
+#define DEBUG_RC 0
+#define TRACE_SUPPORT 0
+
+#endif /* IH264E_CONFIG_H_ */
diff --git a/encoder/ih264e_core_coding.c b/encoder/ih264e_core_coding.c
new file mode 100755
index 0000000..5ba18de
--- /dev/null
+++ b/encoder/ih264e_core_coding.c
@@ -0,0 +1,2365 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_core_coding.c
+ *
+ * @brief
+ *  This file contains routines that perform luma and chroma core coding for
+ *  intra macroblocks
+ *
+ * @author
+ *  ittiam
+ *
+ * @par List of Functions:
+ *  - ih264e_pack_l_mb_i16()
+ *  - ih264e_pack_c_mb_i8()
+ *  - ih264e_code_luma_intra_macroblock_16x16()
+ *  - ih264e_code_luma_intra_macroblock_4x4()
+ *  - ih264e_code_chroma_intra_macroblock_8x8()
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264_trans_data.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_globals.h"
+#include "ih264e_core_coding.h"
+#include "ih264e_mc.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function performs does the DCT transform then Hadamard transform
+*  and quantization for a macroblock when the mb mode is intra 16x16 mode
+*
+* @par Description:
+*  First  cf4 is done on all 16 4x4 blocks of the 16x16 input block.
+*  Then hadamard transform is done on the DC coefficients
+*  Quantization is then performed on the 16x16 block, 4x4 wise
+*
+* @param[in] pu1_src
+*  Pointer to source sub-block
+*
+* @param[in] pu1_pred
+*  Pointer to prediction sub-block
+*
+* @param[in] pi2_out
+*  Pointer to residual sub-block
+*  The output will be in linear format
+*  The first 16 continuous locations will contain the values of Dc block
+*  After DC block and a stride 1st AC block will follow
+*  After one more stride next AC block will follow
+*  The blocks will be in raster scan order
+*
+* @param[in] src_strd
+*  Source stride
+*
+* @param[in] pred_strd
+*  Prediction stride
+*
+* @param[in] dst_strd
+*  Destination stride
+*
+* @param[in] pu2_scale_matrix
+*  The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+*  Threshold matrix
+*
+* @param[in] u4_qbits
+*  15+QP/6
+*
+* @param[in] u4_round_factor
+*  Round factor for quant
+*
+* @param[out] pu1_nnz
+*  Memory to store the non-zeros after transform
+*  The first byte will be the nnz of DC block
+*  From the next byte the AC nnzs will be stored in raster scan order
+*
+* @param u4_dc_flag
+*  Signals if Dc transform is to be done or not
+*   1 -> Dc transform will be done
+*   0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec,
+                                                UWORD8 *pu1_src,
+                                                UWORD8 *pu1_pred,
+                                                WORD16 *pi2_out,
+                                                WORD32 src_strd,
+                                                WORD32 pred_strd,
+                                                WORD32 dst_strd,
+                                                const UWORD16 *pu2_scale_matrix,
+                                                const UWORD16 *pu2_threshold_matrix,
+                                                UWORD32 u4_qbits,
+                                                UWORD32 u4_round_factor,
+                                                UWORD8 *pu1_nnz,
+                                                UWORD32 u4_dc_flag)
+
+{
+    WORD32 blk_cntr;
+    WORD32 i4_offsetx, i4_offsety;
+    UWORD8 *pu1_curr_src, *pu1_curr_pred;
+
+    WORD16 *pi2_dc_str = pi2_out;
+
+    /* Move to the ac addresses */
+    pu1_nnz++;
+    pi2_out += dst_strd;
+
+    for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++)
+    {
+        IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety);
+
+        pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
+        pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
+
+        ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred,
+                                          pi2_out + blk_cntr * dst_strd,
+                                          src_strd, pred_strd, pu2_scale_matrix,
+                                          pu2_threshold_matrix, u4_qbits,
+                                          u4_round_factor, &pu1_nnz[blk_cntr],
+                                          &pi2_dc_str[blk_cntr]);
+
+    }
+
+    if (!u4_dc_flag)
+        return;
+
+    /*
+     * In case of i16x16, we need to remove the contribution of dc coeffs into
+     * nnz of each block. We are doing that in the packing function
+     */
+
+    /* Adjust pointers to point to dc values */
+    pi2_out -= dst_strd;
+    pu1_nnz--;
+
+    u4_qbits++;
+    u4_round_factor <<= 1;
+
+    ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix,
+                                    pu2_threshold_matrix, u4_qbits,
+                                    u4_round_factor, &pu1_nnz[0]);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function performs the intra 16x16 inverse transform process for H264
+*  it includes inverse Dc transform, inverse quant and then inverse transform
+*
+* @par Description:
+*
+* @param[in] pi2_src
+*  Input data, 16x16 size
+*  First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
+*  after a stride 1st AC clock will be present again in raster can order
+*  Then each AC block of the 16x16 block will follow in raster scan order
+*
+* @param[in] pu1_pred
+*  The predicted data, 16x16 size
+*  Block by block form
+*
+* @param[in] pu1_out
+*  Output 16x16
+*  In block by block form
+*
+* @param[in] src_strd
+*  Source stride
+*
+* @param[in] pred_strd
+*  input stride for prediction buffer
+*
+* @param[in] out_strd
+*  input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+*  Inverse quantization matrix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+*  weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+*  QP/6
+*
+* @param[in] pi4_tmp
+*  Input temporary buffer
+*  needs to be at least 20 in size
+*
+* @param[in] pu4_cntrl
+*  Controls the transform path
+*  total Last 17 bits are used
+*  the 16th th bit will correspond to DC block
+*  and 32-17 will correspond to the ac blocks in raster scan order
+*  bit equaling zero indicates that the entire 4x4 block is zero for DC
+*  For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
+*
+* @param[in] pi4_tmp
+*  Input temporary buffer
+*  needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
+*
+* @returns
+*  none
+*
+* @remarks
+*  The all zero case must be taken care outside
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec,
+                                                    WORD16 *pi2_src,
+                                                    UWORD8 *pu1_pred,
+                                                    UWORD8 *pu1_out,
+                                                    WORD32 src_strd,
+                                                    WORD32 pred_strd,
+                                                    WORD32 out_strd,
+                                                    const UWORD16 *pu2_iscale_mat,
+                                                    const UWORD16 *pu2_weigh_mat,
+                                                    UWORD32 qp_div,
+                                                    UWORD32 u4_cntrl,
+                                                    UWORD32 u4_dc_trans_flag,
+                                                    WORD32 *pi4_tmp)
+{
+    /* Start index for inverse quant in a 4x4 block */
+    WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1;
+
+    /* Cntrl bits for 4x4 transforms
+     * u4_blk_cntrl       : controls if a 4x4 block should be processed in ac path
+     * u4_dc_cntrl        : controls is a 4x4 block is to be processed in dc path
+     *                    : dc block must contain only single dc coefficient
+     * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
+     *                    : ie not (ac or dc)
+     */
+    UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
+
+    /* tmp registers for block ids */
+    UWORD32 u4_blk_id;
+
+    /* Subscrripts */
+    WORD32 i4_offset_x, i4_offset_y;
+
+    UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk;
+
+    /* Src and stride for dc coeffs */
+    UWORD32 u4_dc_inc;
+    WORD16 *pi2_dc_src;
+
+    /*
+     * For intra blocks we need to do inverse dc transform
+     * In case if intra blocks, its here that we populate the dc bits in cntrl
+     * as they cannot be populated any earlier
+     */
+    if (u4_dc_trans_flag)
+    {
+        UWORD32 cntr, u4_dc_cntrl;
+        /* Do inv hadamard and place the results at the start of each AC block */
+        ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat,
+                                           pu2_weigh_mat, qp_div, pi4_tmp);
+
+        /* Update the cntrl flag */
+        u4_dc_cntrl = 0;
+        for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++)
+        {
+            u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
+        }
+        /* Mark dc bits as 1 if corresponding ac bit is 0 */
+        u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
+        /* Combine both ac and dc bits */
+        u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA)
+                        | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA);
+    }
+
+    /* Source for dc coeffs
+     * If the block is intra, we have to read dc values from first row of src
+     * then stride for each block is 1, other wise its src stride
+     */
+    pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src;
+    u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1;
+
+    /* The AC blocks starts from 2nd row */
+    pi2_src += src_strd;
+
+    /* Get the block bits */
+    u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA);
+    u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16;
+    u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000;
+
+    /* Get first block to process */
+    DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+    while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
+    {
+        /* Compute address of src blocks */
+        WORD32 i4_src_offset = u4_dc_inc * u4_blk_id;
+
+        IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+        /* Compute address of out and pred blocks */
+        pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+        pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+        /* Do inv dc transform */
+        ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset,
+                                                pu1_cur_prd_blk,
+                                                pu1_cur_out_blk, pred_strd,
+                                                out_strd, pu2_iscale_mat,
+                                                pu2_weigh_mat, qp_div, NULL,
+                                                iq_start_idx,
+                                                pi2_dc_src + i4_src_offset);
+        /* Get next DC block to process */
+        DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+    }
+
+    /* now process ac/mixed blocks */
+    DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+    while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
+    {
+
+        WORD32 i4_src_offset = src_strd * u4_blk_id;
+
+        IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+        pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+        pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+        ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset,
+                                             pu1_cur_prd_blk, pu1_cur_out_blk,
+                                             pred_strd, out_strd,
+                                             pu2_iscale_mat, pu2_weigh_mat,
+                                             qp_div, (WORD16*) pi4_tmp,
+                                             iq_start_idx,
+                                             pi2_dc_src + u4_blk_id);
+
+        DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+    }
+
+    /* Now process empty blocks */
+    DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+    while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
+    {
+        IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+        pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+        pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+        ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk,
+                                          pred_strd, out_strd, SIZE_4X4_BLK_HRZ,
+                                          SIZE_4X4_BLK_VERT, 0, 0);
+
+        DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function performs does the DCT transform then Hadamard transform
+*  and quantization for a chroma macroblock
+*
+* @par Description:
+*  First  cf4 is done on all 16 4x4 blocks of the 8x8input block
+*  Then hadamard transform is done on the DC coefficients
+*  Quantization is then performed on the 8x8 block, 4x4 wise
+*
+* @param[in] pu1_src
+*  Pointer to source sub-block
+*  The input is in interleaved format for two chroma planes
+*
+* @param[in] pu1_pred
+*  Pointer to prediction sub-block
+*  Prediction is in inter leaved format
+*
+* @param[in] pi2_out
+*  Pointer to residual sub-block
+*  The output will be in linear format
+*  The first 4 continuous locations will contain the values of DC block for U
+*  and then next 4 will contain for V.
+*  After DC block and a stride 1st AC block of U plane will follow
+*  After one more stride next AC block of V plane will follow
+*  The blocks will be in raster scan order
+*
+*  After all the AC blocks of U plane AC blocks of V plane will follow in exact
+*  same way
+*
+* @param[in] src_strd
+*  Source stride
+*
+* @param[in] pred_strd
+*  Prediction stride
+*
+* @param[in] dst_strd
+*  Destination stride
+*
+* @param[in] pu2_scale_matrix
+*  The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+*  Threshold matrix
+*
+* @param[in] u4_qbits
+*  15+QP/6
+*
+* @param[in] u4_round_factor
+*  Round factor for quant
+*
+* @param[out] pu1_nnz
+*  Memory to store the non-zeros after transform
+*  The first byte will be the nnz od DC block for U plane
+*  From the next byte the AC nnzs will be storerd in raster scan order
+*  The fifth byte will be nnz of Dc block of V plane
+*  Then Ac blocks will follow
+*
+* @param u4_dc_flag
+*  Signals if Dc transform is to be done or not
+*   1 -> Dc transform will be done
+*   0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec,
+                                                UWORD8 *pu1_src,
+                                                UWORD8 *pu1_pred,
+                                                WORD16 *pi2_out,
+                                                WORD32 src_strd,
+                                                WORD32 pred_strd,
+                                                WORD32 out_strd,
+                                                const UWORD16 *pu2_scale_matrix,
+                                                const UWORD16 *pu2_threshold_matrix,
+                                                UWORD32 u4_qbits,
+                                                UWORD32 u4_round_factor,
+                                                UWORD8 *pu1_nnz_c)
+{
+    WORD32 blk_cntr;
+    WORD32 i4_offsetx, i4_offsety;
+    UWORD8 *pu1_curr_src, *pu1_curr_pred;
+
+    WORD16 pi2_dc_str[8];
+    UWORD8 au1_dcnnz[2];
+
+    /* Move to the ac addresses */
+    pu1_nnz_c++;
+    pi2_out += out_strd;
+
+    for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++)
+    {
+        IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety);
+
+        pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
+        pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
+
+        /* For chroma, v plane nnz is populated from position 5 */
+        ps_codec->pf_resi_trans_quant_chroma_4x4(
+                        pu1_curr_src, pu1_curr_pred,
+                        pi2_out + blk_cntr * out_strd, src_strd, pred_strd,
+                        pu2_scale_matrix, pu2_threshold_matrix, u4_qbits,
+                        u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)],
+                        &pi2_dc_str[blk_cntr]);
+    }
+
+    /* Adjust pointers to point to dc values */
+    pi2_out -= out_strd;
+    pu1_nnz_c--;
+
+    u4_qbits++;
+    u4_round_factor <<= 1;
+
+    ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix,
+                                       pu2_threshold_matrix, u4_qbits,
+                                       u4_round_factor, au1_dcnnz);
+
+    /* Copy the dc nnzs */
+    pu1_nnz_c[0] = au1_dcnnz[0];
+    pu1_nnz_c[5] = au1_dcnnz[1];
+
+}
+
+/**
+*******************************************************************************
+* @brief
+*  This function performs the inverse transform with process for chroma MB of H264
+*
+* @par Description:
+*  Does inverse DC transform ,inverse quantization inverse transform
+*
+* @param[in] pi2_src
+*  Input data, 16x16 size
+*  The input is in the form of, first 4 locations will contain DC coeffs of
+*  U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
+*  in raster scan order will follow, each block as linear array in raster scan order.
+*  After a stride next AC block will follow. After all AC blocks of U plane
+*  V plane AC blocks will follow in exact same order.
+*
+* @param[in] pu1_pred
+*  The predicted data, 8x16 size, U and V interleaved
+*
+* @param[in] pu1_out
+*  Output 8x16, U and V interleaved
+*
+* @param[in] src_strd
+*  Source stride
+*
+* @param[in] pred_strd
+*  input stride for prediction buffer
+*
+* @param[in] out_strd
+*  input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+*  Inverse quantization martix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+*  weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+*  QP/6
+*
+* @param[in] pi4_tmp
+*  Input temporary buffer
+*  needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
+*  in size
+*
+* @param[in] pu4_cntrl
+*  Controls the transform path
+*  the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
+*  32-28 bits will indicate AC blocks of U plane in raster scan order
+*  27-23 bits will indicate AC blocks of V plane in rater scan order
+*  The bit 1 implies that there is at least one non zero coeff in a block
+*
+* @returns
+*  none
+*
+* @remarks
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec,
+                                                    WORD16 *pi2_src,
+                                                    UWORD8 *pu1_pred,
+                                                    UWORD8 *pu1_out,
+                                                    WORD32 src_strd,
+                                                    WORD32 pred_strd,
+                                                    WORD32 out_strd,
+                                                    const UWORD16 *pu2_iscale_mat,
+                                                    const UWORD16 *pu2_weigh_mat,
+                                                    UWORD32 qp_div,
+                                                    UWORD32 u4_cntrl,
+                                                    WORD32 *pi4_tmp)
+{
+    /* Cntrl bits for 4x4 transforms
+     * u4_blk_cntrl       : controls if a 4x4 block should be processed in ac path
+     * u4_dc_cntrl        : controls is a 4x4 block is to be processed in dc path
+     *                    : dc block must contain only single dc coefficient
+     * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
+     *                    : ie not (ac or dc)
+     */
+
+    UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
+
+    /* tmp registers for block ids */
+    WORD32 u4_blk_id;
+
+    /* Offsets for pointers */
+    WORD32 i4_offset_x, i4_offset_y;
+
+    /* Pointer to 4x4 blocks */
+    UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk;
+
+    /* Tmp register for pointer to dc coffs */
+    WORD16 *pi2_dc_src;
+
+    WORD16 i2_zero = 0;
+
+    /* Increment for dc block */
+    WORD32 i4_dc_inc;
+
+    /*
+     * Lets do the inverse transform for dc coeffs in chroma
+     */
+    if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA)
+    {
+        UWORD32 cntr, u4_dc_cntrl;
+        /* Do inv hadamard for u an v block */
+
+        ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat,
+                                              pu2_weigh_mat, qp_div, NULL);
+        /*
+         * Update the cntrl flag
+         * Flag is updated as follows bits 15-11 -> u block dc bits
+         */
+        u4_dc_cntrl = 0;
+        for (cntr = 0; cntr < 8; cntr++)
+        {
+            u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
+        }
+
+        /* Mark dc bits as 1 if corresponding ac bit is 0 */
+        u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
+        /* Combine both ac and dc bits */
+        u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA)
+                        | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA);
+
+        /* Since we populated the dc coffs, we have to read them from there */
+        pi2_dc_src = pi2_src;
+        i4_dc_inc = 1;
+    }
+    else
+    {
+        u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA;
+        pi2_dc_src = &i2_zero;
+        i4_dc_inc = 0;
+    }
+
+    /* Get the block bits */
+    u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA);
+    u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16;
+    u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000;
+
+    /* The AC blocks starts from 2nd row */
+    pi2_src += src_strd;
+
+    DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+    while (u4_blk_id < 8)
+    {
+        WORD32 dc_src_offset = u4_blk_id * i4_dc_inc;
+
+        IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+        pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+        pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+        ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc(
+                        pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk,
+                        pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0,
+                        NULL, pi2_dc_src + dc_src_offset);
+        /* Get next DC block to process */
+        DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+    }
+
+    /* now process ac/mixed blocks */
+    DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+    while (u4_blk_id < 8)
+    {
+        WORD32 i4_src_offset = src_strd * u4_blk_id;
+        WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
+
+        IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+        pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+        pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+        ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset,
+                                                    pu1_cur_4x4_prd_blk,
+                                                    pu1_cur_4x4_out_blk,
+                                                    pred_strd, out_strd,
+                                                    pu2_iscale_mat,
+                                                    pu2_weigh_mat, qp_div,
+                                                    (WORD16 *) pi4_tmp,
+                                                    pi2_dc_src + dc_src_offset);
+
+        DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+    }
+
+    /* Now process empty blocks */
+    DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+    while (u4_blk_id < 8)
+    {
+        IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+        pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+        pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+        ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk,
+                                     pred_strd, out_strd, SIZE_4X4_BLK_VERT,
+                                     SIZE_4X4_BLK_HRZ);
+
+        DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+    }
+}
+
+/**
+******************************************************************************
+*
+* @brief  This function packs residue of an i16x16 luma mb for entropy coding
+*
+* @par   Description
+*  An i16 macro block contains two classes of units, dc 4x4 block and
+*  4x4 ac blocks. while packing the mb, the dc block is sent first, and
+*  the 16 ac blocks are sent next in scan order. Each and every block is
+*  represented by 3 parameters (nnz, significant coefficient map and the
+*  residue coefficients itself). If a 4x4 unit does not have any coefficients
+*  then only nnz is sent. Inside a 4x4 block the individual coefficients are
+*  sent in scan order.
+*
+*  The first byte of each block will be nnz of the block, if it is non zero,
+*  a 2 byte significance map is sent. This is followed by nonzero coefficients.
+*  This is repeated for 1 dc + 16 ac blocks.
+*
+* @param[in]  pi2_res_mb
+*  pointer to residue mb
+*
+* @param[in, out]  pv_mb_coeff_data
+*  buffer pointing to packed residue coefficients
+*
+* @param[in]  u4_res_strd
+*  residual block stride
+*
+* @param[out]  u1_cbp_l
+*  coded block pattern luma
+*
+* @param[in]   pu1_nnz
+*  number of non zero coefficients in each 4x4 unit
+*
+* @param[out]
+*  Control signal for inverse transform of 16x16 blocks
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb,
+                          void **pv_mb_coeff_data,
+                          WORD32 i4_res_strd,
+                          UWORD8 *u1_cbp_l,
+                          UWORD8 *pu1_nnz,
+                          UWORD32 *pu4_cntrl)
+{
+    /* pointer to packed sub block buffer space */
+    tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac;
+
+    /* no of non zero coefficients in the current sub block */
+    UWORD32 u4_nnz_cnt;
+
+    /* significant coefficient map */
+    UWORD32 u4_s_map;
+
+    /* pointer to scanning matrix */
+    const UWORD8 *pu1_scan_order;
+
+    /* number of non zeros in sub block */
+    UWORD32 u4_nnz;
+
+    /* coeff scan order */
+    const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+    /* temp var */
+    UWORD32 coeff_cnt, mask, b4,u4_cntrl=0;
+
+    /*DC and AC coeff pointers*/
+    WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc;
+
+    /********************************************************/
+    /*  pack dc coeff data for entropy coding               */
+    /********************************************************/
+
+    pi2_res_mb_dc = pi2_res_mb;
+    pu1_scan_order = gu1_luma_scan_order_dc;
+
+    u4_nnz = *pu1_nnz;
+    u4_cntrl = 0;
+
+    /* write number of non zero coefficients */
+    ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+    if (u4_nnz)
+    {
+        for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+        {
+            if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]])
+            {
+                /* write residue */
+                ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]];
+                u4_s_map |= mask;
+            }
+            mask <<= 1;
+        }
+        /* write significant coeff map */
+        ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+        (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+        u4_cntrl = 0x00008000;// Set DC bit in ctrl code
+    }
+    else
+    {
+        (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+    }
+
+    /********************************************************/
+    /*  pack ac coeff data for entropy coding               */
+    /********************************************************/
+
+    pu1_nnz ++;
+    pu1_scan_order = gu1_luma_scan_order;
+    pi2_res_mb += i4_res_strd; /*Move to AC block*/
+
+    ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
+
+    for (b4 = 0; b4 < 16; b4++)
+    {
+        ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+        u4_nnz = pu1_nnz[u1_scan_order[b4]];
+
+        /* Jump according to the scan order */
+        pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
+
+        /*
+         * Since this is a i16x16 block, we should not count dc coeff on indi
+         * vidual 4x4 blocks to nnz. But due to the implementation of 16x16
+         * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that
+         * here
+         */
+        u4_nnz -= (pi2_res_mb_ac[0] != 0);
+
+        /* write number of non zero coefficients */
+        ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+        if (u4_nnz)
+        {
+            for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+            {
+                if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]])
+                {
+                    /* write residue */
+                    ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]];
+                    u4_s_map |= mask;
+                }
+                mask <<= 1;
+            }
+            /* write significant coeff map */
+            ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+            (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+            *u1_cbp_l = 15;
+
+            u4_cntrl |= (1 << (31 - u1_scan_order[b4]));
+        }
+        else
+        {
+            (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+        }
+
+    }
+
+    if (!(*u1_cbp_l))
+    {
+        (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
+    }
+
+    /* Store the cntrl signal */
+    (*pu4_cntrl) = u4_cntrl;
+    return;
+}
+
+/**
+******************************************************************************
+*
+* @brief  This function packs residue of an p16x16 luma mb for entropy coding
+*
+* @par   Description
+*  A p16x16 macro block contains two classes of units 16  4x4 ac blocks.
+*  while packing the mb, the dc block is sent first, and
+*  the 16 ac blocks are sent next in scan order. Each and every block is
+*  represented by 3 parameters (nnz, significant coefficient map and the
+*  residue coefficients itself). If a 4x4 unit does not have any coefficients
+*  then only nnz is sent. Inside a 4x4 block the individual coefficients are
+*  sent in scan order.
+*
+*  The first byte of each block will be nnz of the block, if it is non zero,
+*  a 2 byte significance map is sent. This is followed by nonzero coefficients.
+*  This is repeated for 1 dc + 16 ac blocks.
+*
+* @param[in]  pi2_res_mb
+*  pointer to residue mb
+*
+* @param[in, out]  pv_mb_coeff_data
+*  buffer pointing to packed residue coefficients
+*
+* @param[in]  i4_res_strd
+*  residual block stride
+*
+* @param[out]  u1_cbp_l
+*  coded block pattern luma
+*
+* @param[in]   pu1_nnz
+*  number of non zero coefficients in each 4x4 unit
+*
+* @param[out] pu4_cntrl
+*  Control signal for inverse transform
+*
+* @return none
+*
+* @remarks Killing coffs not yet coded
+*
+******************************************************************************
+*/
+void ih264e_pack_l_mb(WORD16 *pi2_res_mb,
+                      void **pv_mb_coeff_data,
+                      WORD32 i4_res_strd,
+                      UWORD8 *u1_cbp_l,
+                      UWORD8 *pu1_nnz,
+                      UWORD32 u4_thres_resi,
+                      UWORD32 *pu4_cntrl)
+{
+    /* pointer to packed sub block buffer space */
+    tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb;
+
+    /* no of non zero coefficients in the current sub block */
+    UWORD32 u4_nnz_cnt;
+
+    /* significant coefficient map */
+    UWORD32 u4_s_map;
+
+    /* pointer to scanning matrix */
+    const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
+
+    /* number of non zeros in sub block */
+    UWORD32 u4_nnz;
+
+    /* pointer to residual sub block */
+    WORD16  *pi2_res_sb;
+
+    /* coeff scan order */
+    const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+    /* coeff cost */
+    const UWORD8  *pu1_coeff_cost = gu1_coeff_cost;
+
+    /* temp var */
+    UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8;
+
+    /* temp var */
+    WORD32 i4_res_val, i4_run = -1, dcac_block;
+
+    /* When Hadamard transform is disabled, first row values are dont care, ignore them */
+    pi2_res_mb += i4_res_strd;
+
+    /* When Hadamard transform is disabled, first unit value is dont care, ignore this */
+    pu1_nnz ++;
+
+    ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
+
+    /********************************************************/
+    /*  pack coeff data for entropy coding                  */
+    /********************************************************/
+
+    for (b4 = 0; b4 < 16; b4++)
+    {
+        ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+        b8 = b4 >> 2;
+
+        u4_nnz = pu1_nnz[u1_scan_order[b4]];
+
+        /* Jump according to the scan order */
+        pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
+
+        /* write number of non zero coefficients */
+        ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+        if (u4_nnz)
+        {
+            for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+            {
+                /* number of runs of zero before, this is used to compute coeff cost */
+                i4_run++;
+
+                i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
+
+                if (i4_res_val)
+                {
+                    /* write residue */
+                    ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val;
+                    u4_s_map |= mask;
+
+                    if (u4_thres_resi)
+                    {
+                        /* compute coeff cost */
+                        if (i4_res_val == 1 || i4_res_val == -1)
+                        {
+                            if (i4_run < 6)
+                                u4_b8_coeff_cost += pu1_coeff_cost[i4_run];
+                        }
+                        else
+                            u4_b8_coeff_cost += 9;
+
+                        i4_run = -1;
+                    }
+                }
+
+                mask <<= 1;
+            }
+
+            /* write significant coeff map */
+            ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+            (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+            /* cbp */
+            *u1_cbp_l |= (1 << b8);
+
+            /* Cntrl map for inverse transform computation
+             *
+             * If coeff_cnt is zero, it means that only nonzero was a dc coeff
+             * Hence we have to set the 16 - u1_scan_order[b4]) position instead
+             * of 31 - u1_scan_order[b4]
+             */
+            dcac_block = (coeff_cnt == 0)?16:31;
+            u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4]));
+        }
+        else
+        {
+            (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+        }
+
+        /* Decide if the 8x8 unit has to be sent for entropy coding? */
+        if ((b4+1) % 4 == 0)
+        {
+            if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) &&
+                            (*u1_cbp_l & (1 << b8)) )
+            {
+
+
+                /*
+                 * When we want to reset the full 8x8 block, we have to reset
+                 * both the dc and ac coeff bits hence we have the symmetric
+                 * arrangement of bits
+                 */
+                const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033};
+
+                /* restore cbp */
+                *u1_cbp_l = (*u1_cbp_l & (~(1 << b8)));
+
+                /* correct cntrl flag */
+                u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]);
+
+                /* correct nnz */
+                pu1_nnz[u1_scan_order[b4 - 3]] = 0;
+                pu1_nnz[u1_scan_order[b4 - 2]] = 0;
+                pu1_nnz[u1_scan_order[b4 - 1]] = 0;
+                pu1_nnz[u1_scan_order[b4]] = 0;
+
+                /* reset blk cost */
+                u4_b8_coeff_cost = 0;
+            }
+
+            if (!(*u1_cbp_l & (1 << b8)))
+            {
+                (*pv_mb_coeff_data) = ps_mb_coeff_data_b8;
+            }
+
+            u4_mb_coeff_cost += u4_b8_coeff_cost;
+
+            u4_b8_coeff_cost = 0;
+            i4_run = -1;
+            ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
+        }
+    }
+
+    if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD)
+                    && (*u1_cbp_l))
+    {
+        (*pv_mb_coeff_data) = ps_mb_coeff_data_mb;
+        *u1_cbp_l = 0;
+        u4_cntrl = 0;
+        memset(pu1_nnz, 0, 16);
+    }
+
+    (*pu4_cntrl) = u4_cntrl;
+
+    return;
+}
+
+/**
+******************************************************************************
+*
+* @brief  This function packs residue of an i8x8 chroma mb for entropy coding
+*
+* @par   Description
+*  An i8 chroma macro block contains two classes of units, dc 2x2 block and
+*  4x4 ac blocks. while packing the mb, the dc block is sent first, and
+*  the 4 ac blocks are sent next in scan order. Each and every block is
+*  represented by 3 parameters (nnz, significant coefficient map and the
+*  residue coefficients itself). If a 4x4 unit does not have any coefficients
+*  then only nnz is sent. Inside a 4x4 block the individual coefficients are
+*  sent in scan order.
+*
+*  The first byte of each block will be nnz of the block, if it is non zero,
+*  a 2 byte significance map is sent. This is followed by nonzero coefficients.
+*  This is repeated for 1 dc + 4 ac blocks.
+*
+* @param[in]  pi2_res_mb
+*  pointer to residue mb
+*
+* @param[in, out]  pv_mb_coeff_data
+*  buffer pointing to packed residue coefficients
+*
+* @param[in]  u4_res_strd
+*  residual block stride
+*
+* @param[out]  u1_cbp_c
+*  coded block pattern chroma
+*
+* @param[in]   pu1_nnz
+*  number of non zero coefficients in each 4x4 unit
+*
+* @param[out]   pu1_nnz
+*  Control signal for inverse transform
+*
+* @param[in]   u4_swap_uv
+*  Swaps the order of U and V planes in entropy bitstream
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_c_mb(WORD16 *pi2_res_mb,
+                      void **pv_mb_coeff_data,
+                      WORD32 i4_res_strd,
+                      UWORD8 *u1_cbp_c,
+                      UWORD8 *pu1_nnz,
+                      UWORD32 u4_thres_resi,
+                      UWORD32 *pu4_cntrl,
+                      UWORD32 u4_swap_uv)
+{
+    /* pointer to packed sub block buffer space */
+    tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data);
+    tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac;
+
+    /* nnz pointer */
+    UWORD8 *pu1_nnz_ac, *pu1_nnz_dc;
+
+    /* nnz counter */
+    UWORD32 u4_nnz_cnt;
+
+    /* significant coefficient map */
+    UWORD32 u4_s_map;
+
+    /* pointer to scanning matrix */
+    const UWORD8 *pu1_scan_order;
+
+    /* no of non zero coefficients in the current sub block */
+    UWORD32 u4_nnz;
+
+    /* pointer to residual sub block, res val */
+    WORD16 *pi2_res_sb, i2_res_val;
+
+    /* temp var */
+    UWORD32 coeff_cnt, mask, b4,plane;
+
+    /* temp var */
+    UWORD32 u4_coeff_cost;
+    WORD32 i4_run;
+
+    /* coeff cost */
+    const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
+
+    /* pointer to packed buffer space */
+    UWORD32 *pu4_mb_coeff_data = NULL;
+
+    /* ac coded block pattern */
+    UWORD8 u1_cbp_ac;
+
+    /* Variable to store the current bit pos in cntrl variable*/
+    UWORD32 cntrl_pos = 0;
+
+    /********************************************************/
+    /*  pack dc coeff data for entropy coding               */
+    /********************************************************/
+    pu1_scan_order = gu1_chroma_scan_order_dc;
+    pi2_res_sb = pi2_res_mb;
+    pu1_nnz_dc = pu1_nnz;
+    (*pu4_cntrl) = 0;
+    cntrl_pos = 15;
+    ps_mb_coeff_data_dc = (*pv_mb_coeff_data);
+
+    /* Color space conversion between SP_UV and SP_VU
+     * We always assume SP_UV for all the processing
+     * Hence to get proper stream output we need to swap U and V channels here
+     *
+     * For that there are two paths we need to look for
+     * One is the path to bitstream , these variables should have the proper input
+     * configured UV or VU
+     * For the other path the inverse transform variables should have ehat ever 0ordering the
+     * input had
+     */
+
+    if (u4_swap_uv)
+    {
+        pu1_nnz_dc += 5;/* Move to NNZ of V planve */
+        pi2_res_sb += 4;/* Move to DC coff of V plane */
+
+        cntrl_pos = 14; /* Control bit for V plane */
+    }
+
+    for (plane = 0; plane < 2; plane++)
+    {
+        ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+        u4_nnz = *pu1_nnz_dc;
+        /* write number of non zero coefficients U/V */
+        ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+        if (u4_nnz)
+        {
+            for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+            {
+                i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
+                if (i2_res_val)
+                {
+                    /* write residue U/V */
+                    ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
+                    u4_s_map |= mask;
+                }
+                mask <<= 1;
+            }
+            /* write significant coeff map U/V */
+            ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+            (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+            *u1_cbp_c = 1;
+
+            (*pu4_cntrl) |= (1 << cntrl_pos);
+        }
+        else
+        {
+            (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+        }
+
+        if (u4_swap_uv)
+        {
+            cntrl_pos++; /* Control bit for U plane */
+            pu1_nnz_dc -= 5; /* Move to NNZ of U plane */
+            pi2_res_sb -= 4; /* Move to DC coff of U plane */
+
+        }
+        else
+        {
+            cntrl_pos--; /* Control bit for U plane */
+            pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */
+            pi2_res_sb += 4; /* Move to DC coff of V plane */
+        }
+    }
+
+    /********************************************************/
+    /*  pack ac coeff data for entropy coding               */
+    /********************************************************/
+
+    pu1_scan_order = gu1_chroma_scan_order;
+    ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
+
+    if (u4_swap_uv)
+    {
+        pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */
+        cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */
+        pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
+    }
+    else
+    {
+        pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */
+        cntrl_pos = 31;
+        pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */
+    }
+
+    for (plane = 0; plane < 2; plane++)
+    {
+        pu4_mb_coeff_data = (*pv_mb_coeff_data);
+
+        u4_coeff_cost = 0;
+        i4_run = -1;
+
+        /* get the current cbp, so that it automatically
+         * gets reverted in case of zero ac values */
+        u1_cbp_ac = *u1_cbp_c;
+
+        for (b4 = 0; b4 < 4; b4++)
+        {
+            ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+            u4_nnz = *pu1_nnz_ac;
+
+            /*
+             * We are scanning only ac coeffs, but the nnz is for the
+             * complete 4x4 block. Hence we have to discount the nnz contributed
+             * by the dc coefficient
+             */
+            u4_nnz -= (pi2_res_sb[0]!=0);
+
+            /* write number of non zero coefficients U/V */
+            ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+            if (u4_nnz)
+            {
+                for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+                {
+                    i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
+
+                    i4_run++;
+
+                    if (i2_res_val)
+                    {
+                        /* write residue U/V */
+                        ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
+                        u4_s_map |= mask;
+
+                        if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) )
+                        {
+                            /* compute coeff cost */
+                            if (i2_res_val == 1 || i2_res_val == -1)
+                            {
+                                if (i4_run < 6)
+                                    u4_coeff_cost += pu1_coeff_cost[i4_run];
+                            }
+                            else
+                                u4_coeff_cost += 9;
+
+                            i4_run = -1;
+                        }
+                    }
+                    mask <<= 1;
+                }
+
+                /* write significant coeff map U/V */
+                ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+                (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+                u1_cbp_ac = 2;
+
+                (*pu4_cntrl) |= 1 << cntrl_pos;
+            }
+            else
+            {
+                (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+            }
+
+            pu1_nnz_ac++;
+            pi2_res_sb += i4_res_strd;
+            cntrl_pos--;
+        }
+
+        /* reset block */
+        if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
+        {
+            pu4_mb_coeff_data[0] = 0;
+            pu4_mb_coeff_data[1] = 0;
+            pu4_mb_coeff_data[2] = 0;
+            pu4_mb_coeff_data[3] = 0;
+            (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4;
+
+            /* Generate the control signal */
+            /* Zero out the current plane's AC coefficients */
+            (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF);
+
+            /* Similarly do for the NNZ also */
+            *(pu1_nnz_ac - 4) = 0;
+            *(pu1_nnz_ac - 3) = 0;
+            *(pu1_nnz_ac - 2) = 0;
+            *(pu1_nnz_ac - 1) = 0;
+        }
+        else
+        {
+            *u1_cbp_c = u1_cbp_ac;
+        }
+
+        if (u4_swap_uv)
+        {
+            pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */
+            cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */
+            pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
+
+            pu1_nnz_ac = pu1_nnz + 1;
+        }
+        else
+            pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */
+    }
+
+    /* restore the ptr basing on cbp */
+    if (*u1_cbp_c == 0)
+    {
+        (*pv_mb_coeff_data) = ps_mb_coeff_data_dc;
+    }
+    else if (*u1_cbp_c == 1)
+    {
+        (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
+    }
+
+    return ;
+}
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i16x16
+*
+* @par Description:
+*  If the current mb is to be coded as intra of mb type i16x16, the mb is first
+*  predicted using one of i16x16 prediction filters, basing on the intra mode
+*  chosen. Then, error is computed between the input blk and the estimated blk.
+*  This error is transformed (hierarchical transform i.e., dct followed by hada-
+*  -mard), quantized. The quantized coefficients are packed in scan order for
+*  entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* pointer to ref macro block */
+    UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
+
+    /* pointer to src macro block */
+    UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb = NULL;
+
+    /* pointer to residual macro block */
+    WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+    WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+    /* intra mode */
+    UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
+
+    /* coded block pattern */
+    UWORD8 u1_cbp_l = 0;
+
+    /* number of non zero coeffs*/
+    UWORD32 au4_nnz[5];
+    UWORD8  *pu1_nnz = (UWORD8 *)au4_nnz;
+
+    /*Cntrol signal for itrans*/
+    UWORD32 u4_cntrl;
+
+    /* quantization parameters */
+    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+    /* pointer to packed mb coeff data */
+    void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+    /* init nnz */
+    au4_nnz[0] = 0;
+    au4_nnz[1] = 0;
+    au4_nnz[2] = 0;
+    au4_nnz[3] = 0;
+    au4_nnz[4] = 0;
+
+    if (u1_intra_mode == PLANE_I16x16)
+    {
+        pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane;
+    }
+    else
+    {
+        pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16;
+    }
+
+    /********************************************************/
+    /*  error estimation,                                   */
+    /*  transform                                           */
+    /*  quantization                                        */
+    /********************************************************/
+    ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+                                               pu1_pred_mb, pi2_res_mb,
+                                               i4_src_strd, i4_pred_strd,
+                                               i4_res_strd,
+                                               ps_qp_params->pu2_scale_mat,
+                                               ps_qp_params->pu2_thres_mat,
+                                               ps_qp_params->u1_qbits,
+                                               ps_qp_params->u4_dead_zone,
+                                               pu1_nnz, ENABLE_DC_TRANSFORM);
+
+    /********************************************************/
+    /*  pack coeff data for entropy coding                  */
+    /********************************************************/
+    ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
+                         pu1_nnz, &u4_cntrl);
+
+    /********************************************************/
+    /*  ierror estimation,                                  */
+    /*  itransform                                          */
+    /*  iquantization                                       */
+    /********************************************************/
+    /*
+     *if refernce frame is not to be computed
+     *we only need the right and bottom border 4x4 blocks to predict next intra
+     *blocks, hence only compute them
+     */
+    if (!ps_proc->u4_compute_recon)
+    {
+        u4_cntrl &= 0x111F8000;
+    }
+
+    if (u4_cntrl)
+    {
+        ih264e_luma_16x16_idctrans_iquant_itrans_recon(
+                        ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
+                        i4_res_strd, i4_pred_strd, i4_rec_strd,
+                        ps_qp_params->pu2_iscale_mat,
+                        ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
+                        u4_cntrl, ENABLE_DC_TRANSFORM,
+                        ps_proc->pv_scratch_buff);
+    }
+    else
+    {
+        ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd,
+                                          i4_rec_strd, MB_SIZE, MB_SIZE, NULL,
+                                          0);
+    }
+
+    return (u1_cbp_l);
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+*  If the current mb is to be coded as intra of mb type i4x4, the mb is first
+*  predicted using one of i4x4 prediction filters, basing on the intra mode
+*  chosen. Then, error is computed between the input blk and the estimated blk.
+*  This error is dct transformed and quantized. The quantized coefficients are
+*  packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern luma
+*
+* @remarks
+*  The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+*  mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* pointer to ref macro block */
+    UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
+
+    /* pointer to src macro block */
+    UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+    /* pointer to residual macro block */
+    WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+    /* pointer to neighbors: left, top, top-left */
+    UWORD8 *pu1_mb_a;
+    UWORD8 *pu1_mb_b;
+    UWORD8 *pu1_mb_c;
+    UWORD8 *pu1_mb_d;
+
+    /* intra mode */
+    UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
+
+    /* neighbor availability */
+    WORD32 i4_ngbr_avbl;
+
+    /* neighbor pels for intra prediction */
+    UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
+
+    /* coded block pattern */
+    UWORD8 u1_cbp_l = 0;
+
+    /* number of non zero coeffs*/
+    UWORD8  u1_nnz;
+
+    /* quantization parameters */
+    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+    /* pointer to packed mb coeff data */
+    void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+    /* pointer to packed mb coeff data */
+    tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
+
+    /* no of non zero coefficients in the current sub block */
+    UWORD32 u4_nnz_cnt;
+
+    /* significant coefficient map */
+    UWORD32 u4_s_map;
+
+    /* pointer to scanning matrix */
+    const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
+
+    /*Dummy variable for 4x4 trans fucntion*/
+    WORD16 i2_dc_dummy;
+
+    /* temp var */
+    UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask;
+
+    /* Process 16 4x4 lum sub-blocks of the MB in scan order */
+    for (b8 = 0; b8 < 4; b8++)
+    {
+        u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3;
+        u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3;
+
+        /* if in case cbp for the 8x8 block is zero, send no residue */
+        ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
+
+        for (b4 = 0; b4 < 4; b4++)
+        {
+            /* index of pel in MB */
+            u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2);
+            u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2);
+
+            /* Initialize source and reference pointers */
+            pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd);
+            pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd);
+
+            /* pointer to left of ref macro block */
+            pu1_mb_a = pu1_ref_mb - 1;
+            /* pointer to top of ref macro block */
+            pu1_mb_b = pu1_ref_mb - i4_rec_strd;
+            /* pointer to topright of ref macro block */
+            pu1_mb_c = pu1_mb_b + 4;
+            /* pointer to topleft macro block */
+            pu1_mb_d = pu1_mb_b - 1;
+
+            /* compute neighbor availability */
+            i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
+
+            /* sub block intra mode */
+            u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4];
+
+            /********************************************************/
+            /* gather prediction pels from neighbors for prediction */
+            /********************************************************/
+            /* left pels */
+            if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK)
+            {
+                for (i = 0; i < 4; i++)
+                    pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
+            }
+            else
+            {
+                memset(pu1_ngbr_pels_i4, 0, 4);
+            }
+
+            /* top pels */
+            if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
+            {
+                memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
+            }
+            else
+            {
+                memset(pu1_ngbr_pels_i4 + 5, 0, 4);
+            }
+            /* top left pels */
+            if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK)
+            {
+                pu1_ngbr_pels_i4[4] = *pu1_mb_d;
+            }
+            else
+            {
+                pu1_ngbr_pels_i4[4] = 0;
+            }
+            /* top right pels */
+            if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK)
+            {
+                memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4);
+            }
+            else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
+            {
+                memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4);
+            }
+
+            /********************************************************/
+            /*  prediction                                          */
+            /********************************************************/
+            (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4,
+                                                          pu1_pred_mb, 0,
+                                                          i4_pred_strd,
+                                                          i4_ngbr_avbl);
+
+            /********************************************************/
+            /*  error estimation,                                   */
+            /*  transform                                           */
+            /*  quantization                                        */
+            /********************************************************/
+            ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb,
+                                              pi2_res_mb, i4_src_strd,
+                                              i4_pred_strd,
+                                              ps_qp_params->pu2_scale_mat,
+                                              ps_qp_params->pu2_thres_mat,
+                                              ps_qp_params->u1_qbits,
+                                              ps_qp_params->u4_dead_zone,
+                                              &u1_nnz, &i2_dc_dummy);
+
+            /********************************************************/
+            /*  pack coeff data for entropy coding                  */
+            /********************************************************/
+            ps_mb_coeff_data = *pv_mb_coeff_data;
+
+            /* write number of non zero coefficients */
+            ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz;
+
+            if (u1_nnz)
+            {
+                for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++)
+                {
+                    if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
+                    {
+                        /* write residue */
+                        ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
+                        u4_s_map |= mask;
+                    }
+                    mask <<= 1;
+                }
+                /* write significant coeff map */
+                ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+
+                /* update ptr to coeff data */
+                (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+                /* cbp */
+                u1_cbp_l |= (1 << b8);
+            }
+            else
+            {
+                (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+            }
+
+            /********************************************************/
+            /*  ierror estimation,                                  */
+            /*  itransform                                          */
+            /*  iquantization                                       */
+            /********************************************************/
+            /* If the frame is not to be used for P frame reference or dumping recon
+             * we only will use the recon for only predicting intra Mbs
+             * This will need only right and bottom edge 4x4 blocks recon
+             * Hence we selectively enable them
+             */
+            if (ps_proc->u4_compute_recon || (0xF888 & (1 << ((b8 << 2) + b4))))
+            {
+                if (u1_nnz)
+                    ps_codec->pf_iquant_itrans_recon_4x4(
+                                    pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
+                                    /*No input stride,*/i4_pred_strd,
+                                    i4_rec_strd, ps_qp_params->pu2_iscale_mat,
+                                    ps_qp_params->pu2_weigh_mat,
+                                    ps_qp_params->u1_qp_div,
+                                    ps_proc->pv_scratch_buff, 0, 0);
+                else
+                    ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb,
+                                                      i4_pred_strd, i4_rec_strd,
+                                                      BLK_SIZE, BLK_SIZE, NULL,
+                                                      0);
+            }
+
+        }
+
+        /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
+        if (!(u1_cbp_l & (1 << b8)))
+        {
+            *pv_mb_coeff_data = ps_mb_coeff_data_b8;
+        }
+    }
+
+    return (u1_cbp_l);
+}
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+*  If the current mb is to be coded as intra of mb type i4x4, the mb is first
+*  predicted using one of i4x4 prediction filters, basing on the intra mode
+*  chosen. Then, error is computed between the input blk and the estimated blk.
+*  This error is dct transformed and quantized. The quantized coefficients are
+*  packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern luma
+*
+* @remarks
+*  The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+*  mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* pointer to ref macro block */
+    UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4;
+
+    /* pointer to recon buffer */
+    UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
+
+    /* pointer to residual macro block */
+    WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
+
+    /* strides */
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+    /* number of non zero coeffs*/
+    UWORD8  *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
+
+    /* coded block pattern */
+    UWORD8 u1_cbp_l = 0;
+
+    /* pointer to packed mb coeff data */
+    void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+    /* pointer to packed mb coeff data */
+    tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
+
+    /* no of non zero coefficients in the current sub block */
+    UWORD32 u4_nnz_cnt;
+
+    /* significant coefficient map */
+    UWORD32 u4_s_map;
+
+    /* pointer to scanning matrix */
+    const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
+
+    /* temp var */
+    UWORD32 b8, b4, coeff_cnt, mask;
+
+    /* Process 16 4x4 lum sub-blocks of the MB in scan order */
+    for (b8 = 0; b8 < 4; b8++)
+    {
+        /* if in case cbp for the 8x8 block is zero, send no residue */
+        ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
+
+        for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
+        {
+            /********************************************************/
+            /*  pack coeff data for entropy coding                  */
+            /********************************************************/
+            ps_mb_coeff_data = *pv_mb_coeff_data;
+
+            /* write number of non zero coefficients */
+            ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz;
+
+            if (*pu1_nnz)
+            {
+                for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++)
+                {
+                    if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
+                    {
+                        /* write residue */
+                        ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
+                        u4_s_map |= mask;
+                    }
+                    mask <<= 1;
+                }
+                /* write significant coeff map */
+                ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+
+                /* update ptr to coeff data */
+                (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+                /* cbp */
+                u1_cbp_l |= (1 << b8);
+            }
+            else
+            {
+                (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+            }
+        }
+
+        /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
+        if (!(u1_cbp_l & (1 << b8)))
+        {
+            *pv_mb_coeff_data = ps_mb_coeff_data_b8;
+        }
+    }
+
+    /* memcpy recon */
+    ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0);
+
+    return (u1_cbp_l);
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief performs chroma core coding for intra macro blocks
+*
+* @par Description:
+*  If the current MB is to be intra coded with mb type chroma I8x8, the MB is
+*  first predicted using intra 8x8 prediction filters. The predicted data is
+*  compared with the input for error and the error is transformed. The DC
+*  coefficients of each transformed sub blocks are further transformed using
+*  Hadamard transform. The resulting coefficients are quantized, packed and sent
+*  for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_c
+*  coded block pattern chroma
+*
+* @remarks
+*  The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
+*  mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* pointer to ref macro block */
+    UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
+
+    /* pointer to src macro block */
+    UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb = NULL;
+
+    /* pointer to residual macro block */
+    WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+    WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+    /* intra mode */
+    UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode;
+
+    /* coded block pattern */
+    UWORD8 u1_cbp_c = 0;
+
+    /* number of non zero coeffs*/
+    UWORD8 au1_nnz[18] = {0};
+
+    /* quantization parameters */
+    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
+
+    /* Control signal for inverse transform */
+    UWORD32 u4_cntrl;
+
+    /* pointer to packed mb coeff data */
+    void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+    /* See if we need to swap U and V plances for entropy */
+    UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
+
+    if (PLANE_CH_I8x8 == u1_intra_mode)
+    {
+        pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane;
+    }
+    else
+    {
+        pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
+    }
+
+    /********************************************************/
+    /*  error estimation,                                   */
+    /*  transform                                           */
+    /*  quantization                                        */
+    /********************************************************/
+    ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+                                               pu1_pred_mb, pi2_res_mb,
+                                               i4_src_strd, i4_pred_strd,
+                                               i4_res_strd,
+                                               ps_qp_params->pu2_scale_mat,
+                                               ps_qp_params->pu2_thres_mat,
+                                               ps_qp_params->u1_qbits,
+                                               ps_qp_params->u4_dead_zone,
+                                               au1_nnz);
+
+    /********************************************************/
+    /*  pack coeff data for entropy coding                  */
+    /********************************************************/
+    ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
+                     au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
+
+    /********************************************************/
+    /*  ierror estimation,                                  */
+    /*  itransform                                          */
+    /*  iquantization                                       */
+    /********************************************************/
+    ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb,
+                                                   pu1_pred_mb, pu1_ref_mb,
+                                                   i4_res_strd, i4_pred_strd,
+                                                   i4_rec_strd,
+                                                   ps_qp_params->pu2_iscale_mat,
+                                                   ps_qp_params->pu2_weigh_mat,
+                                                   ps_qp_params->u1_qp_div,
+                                                   u4_cntrl,
+                                                   ps_proc->pv_scratch_buff);
+    return (u1_cbp_c);
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when  mode is inter
+*
+* @par Description:
+*  If the current mb is to be coded as inter the mb is predicted based on the
+*  sub mb partitions and corresponding motion vectors generated by ME. Then,
+*  error is computed between the input blk and the estimated blk. This error is
+*  transformed, quantized. The quantized coefficients are packed in scan order
+*  for entropy coding
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* pointer to ref macro block */
+    UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
+
+    /* pointer to src macro block */
+    UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+    /* pointer to residual macro block */
+    WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+    WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+    /* coded block pattern */
+    UWORD8 u1_cbp_l = 0;
+
+    /*Control signal of itrans*/
+    UWORD32 u4_cntrl;
+
+    /* number of non zero coeffs*/
+    UWORD8  *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz;
+
+    /* quantization parameters */
+    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+    /* pointer to packed mb coeff data */
+    void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+    /* pseudo pred buffer */
+    UWORD8 *pu1_pseudo_pred = pu1_pred_mb;
+
+    /* pseudo pred buffer stride */
+    WORD32 i4_pseudo_pred_strd = i4_pred_strd;
+
+    /* init nnz */
+    ps_proc->au4_nnz[0] = 0;
+    ps_proc->au4_nnz[1] = 0;
+    ps_proc->au4_nnz[2] = 0;
+    ps_proc->au4_nnz[3] = 0;
+    ps_proc->au4_nnz[4] = 0;
+
+    /********************************************************/
+    /*  prediction                                          */
+    /********************************************************/
+    ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd);
+
+    /********************************************************/
+    /*  error estimation,                                   */
+    /*  transform                                           */
+    /*  quantization                                        */
+    /********************************************************/
+    if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0)
+    {
+        ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+                                                   pu1_pseudo_pred, pi2_res_mb,
+                                                   i4_src_strd,
+                                                   i4_pseudo_pred_strd,
+                                                   i4_res_strd,
+                                                   ps_qp_params->pu2_scale_mat,
+                                                   ps_qp_params->pu2_thres_mat,
+                                                   ps_qp_params->u1_qbits,
+                                                   ps_qp_params->u4_dead_zone,
+                                                   pu1_nnz,
+                                                   DISABLE_DC_TRANSFORM);
+
+        /********************************************************/
+        /*  pack coeff data for entropy coding                  */
+        /********************************************************/
+        ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
+                         pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl);
+    }
+    else
+    {
+        u1_cbp_l = 0;
+        u4_cntrl = 0;
+    }
+
+    /********************************************************/
+    /*  ierror estimation,                                  */
+    /*  itransform                                          */
+    /*  iquantization                                       */
+    /********************************************************/
+
+    /*If the frame is not to be used for P frame reference or dumping recon
+     * we only will use the reocn for only predicting intra Mbs
+     * THis will need only right and bottom edge 4x4 blocks recon
+     * Hence we selectively enable them using control signal(including DC)
+     */
+    if (ps_proc->u4_compute_recon != 1)
+    {
+        u4_cntrl &= 0x111F0000;
+    }
+
+    if (u4_cntrl)
+    {
+        ih264e_luma_16x16_idctrans_iquant_itrans_recon(
+                        ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb,
+                        i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd,
+                        ps_qp_params->pu2_iscale_mat,
+                        ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
+                        u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM,
+                        ps_proc->pv_scratch_buff);
+    }
+    else
+    {
+        ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb,
+                                          i4_pseudo_pred_strd, i4_rec_strd,
+                                          MB_SIZE, MB_SIZE, NULL, 0);
+    }
+
+
+    return (u1_cbp_l);
+}
+
+/**
+*******************************************************************************
+*
+* @brief performs chroma core coding for inter macro blocks
+*
+* @par Description:
+*  If the current mb is to be coded as inter predicted mb,based on the sub mb partitions
+*  and corresponding motion vectors generated by ME  ,prediction is done.
+*  Then, error is computed between the input blk and the estimated blk.
+*  This error is transformed , quantized. The quantized coefficients
+*  are packed in scan order for
+*  entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern chroma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* pointer to ref macro block */
+    UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma;
+
+    /* pointer to src macro block */
+    UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+    /* pointer to residual macro block */
+    WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+    WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+    /* coded block pattern */
+    UWORD8 u1_cbp_c = 0;
+
+    /*Control signal for inverse transform*/
+    UWORD32 u4_cntrl;
+
+    /* number of non zero coeffs*/
+    UWORD8 au1_nnz[10] = {0};
+
+    /* quantization parameters */
+    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
+
+    /* pointer to packed mb coeff data */
+    void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+    /*See if we need to swap U and V plances for entropy*/
+    UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
+
+    /********************************************************/
+    /*  prediction                                          */
+    /********************************************************/
+    ih264e_motion_comp_chroma(ps_proc);
+
+    /********************************************************/
+    /*  error estimation,                                   */
+    /*  transform                                           */
+    /*  quantization                                        */
+    /********************************************************/
+    ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+                                               pu1_pred_mb, pi2_res_mb,
+                                               i4_src_strd, i4_pred_strd,
+                                               i4_res_strd,
+                                               ps_qp_params->pu2_scale_mat,
+                                               ps_qp_params->pu2_thres_mat,
+                                               ps_qp_params->u1_qbits,
+                                               ps_qp_params->u4_dead_zone,
+                                               au1_nnz);
+
+    /********************************************************/
+    /*  pack coeff data for entropy coding                  */
+    /********************************************************/
+    ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
+                     au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
+
+    /********************************************************/
+    /*  ierror estimation,                                  */
+    /*  itransform                                          */
+    /*  iquantization                                       */
+    /********************************************************/
+
+    /* If the frame is not to be used for P frame reference or dumping recon
+     * we only will use the reocn for only predicting intra Mbs
+     * THis will need only right and bottom edge 4x4 blocks recon
+     * Hence we selectively enable them using control signal(including DC)
+     */
+    if (!ps_proc->u4_compute_recon)
+    {
+        u4_cntrl &= 0x7700C000;
+    }
+
+    if (u4_cntrl)
+    {
+        ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
+                        ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb,
+                        i4_res_strd, i4_pred_strd, i4_rec_strd,
+                        ps_qp_params->pu2_iscale_mat,
+                        ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
+                        u4_cntrl, ps_proc->pv_scratch_buff);
+    }
+    else
+    {
+        ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd,
+                                          i4_rec_strd, MB_SIZE >> 1, MB_SIZE,
+                                          NULL, 0);
+    }
+
+    return (u1_cbp_c);
+}
diff --git a/encoder/ih264e_core_coding.h b/encoder/ih264e_core_coding.h
new file mode 100755
index 0000000..1237d25
--- /dev/null
+++ b/encoder/ih264e_core_coding.h
@@ -0,0 +1,653 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_core_coding.h
+*
+* @brief
+*  This file contains extern declarations of core coding routines
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+******************************************************************************
+*/
+
+#ifndef IH264E_CORE_CODING_H_
+#define IH264E_CORE_CODING_H_
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief      Enable/Disable Hadamard transform of DC Coeff's
+******************************************************************************
+ */
+#define DISABLE_DC_TRANSFORM 0
+#define ENABLE_DC_TRANSFORM 1
+
+/**
+*******************************************************************************
+ *  @brief bit masks for DC and AC control flags
+*******************************************************************************
+ */
+
+#define DC_COEFF_CNT_LUMA_MB        16
+#define NUM_4X4_BLKS_LUMA_MB_ROW    4
+#define NUM_LUMA4x4_BLOCKS_IN_MB    16
+#define NUM_CHROMA4x4_BLOCKS_IN_MB  8
+
+#define SIZE_4X4_BLK_HRZ            TRANS_SIZE_4
+#define SIZE_4X4_BLK_VERT           TRANS_SIZE_4
+
+#define CNTRL_FLAG_DC_MASK_LUMA     0x0000FFFF
+#define CNTRL_FLAG_AC_MASK_LUMA     0xFFFF0000
+
+#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000
+#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000
+
+#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000
+#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00
+
+#define CNTRL_FLAG_AC_MASK_CHROMA   ( CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V )
+#define CNTRL_FLAG_DC_MASK_CHROMA   ( CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V )
+
+#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000
+
+/**
+*******************************************************************************
+ *  @brief macros for transforms
+*******************************************************************************
+ */
+#define DEQUEUE_BLKID_FROM_CONTROL( u4_cntrl,  blk_lin_id)                     \
+{                                                                              \
+  blk_lin_id = CLZ(u4_cntrl);                                                  \
+  u4_cntrl &= (0x7FFFFFFF >> blk_lin_id);                                      \
+};
+
+#define IND2SUB_LUMA_MB(u4_blk_id,i4_offset_x,i4_offset_y)                      \
+{                                                                               \
+     i4_offset_x = (u4_blk_id % 4) << 2;                                        \
+     i4_offset_y = (u4_blk_id / 4) << 2;                                        \
+}
+
+#define IND2SUB_CHROMA_MB(u4_blk_id,i4_offset_x,i4_offset_y)                   \
+{                                                                              \
+     i4_offset_x = ((u4_blk_id & 0x1 ) << 3) + (u4_blk_id > 3);                \
+     i4_offset_y = (u4_blk_id & 0x2) << 1;                                     \
+}
+
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function performs does the DCT transform then Hadamard transform
+*  and quantization for a macroblock when the mb mode is intra 16x16 mode
+*
+* @par Description:
+*  First  cf4 is done on all 16 4x4 blocks of the 16x16 input block.
+*  Then hadamard transform is done on the DC coefficients
+*  Quantization is then performed on the 16x16 block, 4x4 wise
+*
+* @param[in] pu1_src
+*  Pointer to source sub-block
+*
+* @param[in] pu1_pred
+*  Pointer to prediction sub-block
+*
+* @param[in] pi2_out
+*  Pointer to residual sub-block
+*  The output will be in linear format
+*  The first 16 continuous locations will contain the values of Dc block
+*  After DC block and a stride 1st AC block will follow
+*  After one more stride next AC block will follow
+*  The blocks will be in raster scan order
+*
+* @param[in] src_strd
+*  Source stride
+*
+* @param[in] pred_strd
+*  Prediction stride
+*
+* @param[in] dst_strd
+*  Destination stride
+*
+* @param[in] pu2_scale_matrix
+*  The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+*  Threshold matrix
+*
+* @param[in] u4_qbits
+*  15+QP/6
+*
+* @param[in] u4_round_factor
+*  Round factor for quant
+*
+* @param[out] pu1_nnz
+*  Memory to store the non-zeros after transform
+*  The first byte will be the nnz of DC block
+*  From the next byte the AC nnzs will be stored in raster scan order
+*
+* @param u4_dc_flag
+*  Signals if Dc transform is to be done or not
+*   1 -> Dc transform will be done
+*   0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_resi_trans_dctrans_quant(
+                codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred,
+                WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
+                WORD32 dst_strd, const UWORD16 *pu2_scale_matrix,
+                const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+                UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function performs the intra 16x16 inverse transform process for H264
+*  it includes inverse Dc transform, inverse quant and then inverse transform
+*
+* @par Description:
+*
+* @param[in] pi2_src
+*  Input data, 16x16 size
+*  First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
+*  after a stride 1st AC clock will be present again in raster can order
+*  Then each AC block of the 16x16 block will follow in raster scan order
+*
+* @param[in] pu1_pred
+*  The predicted data, 16x16 size
+*  Block by block form
+*
+* @param[in] pu1_out
+*  Output 16x16
+*  In block by block form
+*
+* @param[in] src_strd
+*  Source stride
+*
+* @param[in] pred_strd
+*  input stride for prediction buffer
+*
+* @param[in] out_strd
+*  input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+*  Inverse quantization matrix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+*  weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+*  QP/6
+*
+* @param[in] pi4_tmp
+*  Input temporary buffer
+*  needs to be at least 20 in size
+*
+* @param[in] pu4_cntrl
+*  Controls the transform path
+*  total Last 17 bits are used
+*  the 16th th bit will correspond to DC block
+*  and 32-17 will correspond to the ac blocks in raster scan order
+*  bit equaling zero indicates that the entire 4x4 block is zero for DC
+*  For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
+*
+* @param[in] pi4_tmp
+*  Input temporary buffer
+*  needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
+*
+* @returns
+*  none
+*
+* @remarks
+*  The all zero case must be taken care outside
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_idctrans_iquant_itrans_recon(
+                codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred,
+                UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
+                WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
+                const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl,
+                UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function performs does the DCT transform then Hadamard transform
+*  and quantization for a chroma macroblock
+*
+* @par Description:
+*  First  cf4 is done on all 16 4x4 blocks of the 8x8input block
+*  Then hadamard transform is done on the DC coefficients
+*  Quantization is then performed on the 8x8 block, 4x4 wise
+*
+* @param[in] pu1_src
+*  Pointer to source sub-block
+*  The input is in interleaved format for two chroma planes
+*
+* @param[in] pu1_pred
+*  Pointer to prediction sub-block
+*  Prediction is in inter leaved format
+*
+* @param[in] pi2_out
+*  Pointer to residual sub-block
+*  The output will be in linear format
+*  The first 4 continuous locations will contain the values of DC block for U
+*  and then next 4 will contain for V.
+*  After DC block and a stride 1st AC block of U plane will follow
+*  After one more stride next AC block of V plane will follow
+*  The blocks will be in raster scan order
+*
+*  After all the AC blocks of U plane AC blocks of V plane will follow in exact
+*  same way
+*
+* @param[in] src_strd
+*  Source stride
+*
+* @param[in] pred_strd
+*  Prediction stride
+*
+* @param[in] dst_strd
+*  Destination stride
+*
+* @param[in] pu2_scale_matrix
+*  The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+*  Threshold matrix
+*
+* @param[in] u4_qbits
+*  15+QP/6
+*
+* @param[in] u4_round_factor
+*  Round factor for quant
+*
+* @param[out] pu1_nnz
+*  Memory to store the non-zeros after transform
+*  The first byte will be the nnz od DC block for U plane
+*  From the next byte the AC nnzs will be storerd in raster scan order
+*  The fifth byte will be nnz of Dc block of V plane
+*  Then Ac blocks will follow
+*
+* @param u4_dc_flag
+*  Signals if Dc transform is to be done or not
+*   1 -> Dc transform will be done
+*   0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_resi_trans_dctrans_quant(
+                codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred,
+                WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
+                WORD32 out_strd, const UWORD16 *pu2_scale_matrix,
+                const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+                UWORD32 u4_round_factor, UWORD8 *pu1_nnz_c);
+
+/**
+*******************************************************************************
+* @brief
+*  This function performs the inverse transform with process for chroma MB of H264
+*
+* @par Description:
+*  Does inverse DC transform ,inverse quantization inverse transform
+*
+* @param[in] pi2_src
+*  Input data, 16x16 size
+*  The input is in the form of, first 4 locations will contain DC coeffs of
+*  U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
+*  in raster scan order will follow, each block as linear array in raster scan order.
+*  After a stride next AC block will follow. After all AC blocks of U plane
+*  V plane AC blocks will follow in exact same order.
+*
+* @param[in] pu1_pred
+*  The predicted data, 8x16 size, U and V interleaved
+*
+* @param[in] pu1_out
+*  Output 8x16, U and V interleaved
+*
+* @param[in] src_strd
+*  Source stride
+*
+* @param[in] pred_strd
+*  input stride for prediction buffer
+*
+* @param[in] out_strd
+*  input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+*  Inverse quantization martix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+*  weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+*  QP/6
+*
+* @param[in] pi4_tmp
+*  Input temporary buffer
+*  needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
+*  in size
+*
+* @param[in] pu4_cntrl
+*  Controls the transform path
+*  the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
+*  32-28 bits will indicate AC blocks of U plane in raster scan order
+*  27-23 bits will indicate AC blocks of V plane in rater scan order
+*  The bit 1 implies that there is at least one non zero coff in a block
+*
+* @returns
+*  none
+*
+* @remarks
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
+                codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred,
+                UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
+                WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
+                const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl,
+                WORD32 *pi4_tmp);
+
+/**
+******************************************************************************
+*
+* @brief  This function packs residue of an i16x16 luma mb for entropy coding
+*
+* @par   Description
+*  An i16 macro block contains two classes of units, dc 4x4 block and
+*  4x4 ac blocks. while packing the mb, the dc block is sent first, and
+*  the 16 ac blocks are sent next in scan order. Each and every block is
+*  represented by 3 parameters (nnz, significant coefficient map and the
+*  residue coefficients itself). If a 4x4 unit does not have any coefficients
+*  then only nnz is sent. Inside a 4x4 block the individual coefficients are
+*  sent in scan order.
+*
+*  The first byte of each block will be nnz of the block, if it is non zero,
+*  a 2 byte significance map is sent. This is followed by nonzero coefficients.
+*  This is repeated for 1 dc + 16 ac blocks.
+*
+* @param[in]  pi2_res_mb
+*  pointer to residue mb
+*
+* @param[in, out]  pv_mb_coeff_data
+*  buffer pointing to packed residue coefficients
+*
+* @param[in]  u4_res_strd
+*  residual block stride
+*
+* @param[out]  u1_cbp_l
+*  coded block pattern luma
+*
+* @param[in]   pu1_nnz
+*  number of non zero coefficients in each 4x4 unit
+*
+* @param[out]
+*  Control signal for inverse transform of 16x16 blocks
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, void **pv_mb_coeff_data,
+                          WORD32 i4_res_strd, UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz,
+                          UWORD32 *pu4_cntrl);
+
+/**
+******************************************************************************
+*
+* @brief  This function packs residue of an i8x8 chroma mb for entropy coding
+*
+* @par   Description
+*  An i8 chroma macro block contains two classes of units, dc 2x2 block and
+*  4x4 ac blocks. while packing the mb, the dc block is sent first, and
+*  the 4 ac blocks are sent next in scan order. Each and every block is
+*  represented by 3 parameters (nnz, significant coefficient map and the
+*  residue coefficients itself). If a 4x4 unit does not have any coefficients
+*  then only nnz is sent. Inside a 4x4 block the individual coefficients are
+*  sent in scan order.
+*
+*  The first byte of each block will be nnz of the block, if it is non zero,
+*  a 2 byte significance map is sent. This is followed by nonzero coefficients.
+*  This is repeated for 1 dc + 4 ac blocks.
+*
+* @param[in]  pi2_res_mb
+*  pointer to residue mb
+*
+* @param[in, out]  pv_mb_coeff_data
+*  buffer pointing to packed residue coefficients
+*
+* @param[in]  u4_res_strd
+*  residual block stride
+*
+* @param[out]  u1_cbp_c
+*  coded block pattern chroma
+*
+* @param[in]   pu1_nnz
+*  number of non zero coefficients in each 4x4 unit
+*
+* @param[out]   pu1_nnz
+*  Control signal for inverse transform
+*
+* @param[in]   u4_swap_uv
+*  Swaps the order of U and V planes in entropy bitstream
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_c_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data,
+                      WORD32 i4_res_strd, UWORD8 *u1_cbp_c, UWORD8 *pu1_nnz,
+                      UWORD32 u4_kill_coffs_flag, UWORD32 *pu4_cntrl,
+                      UWORD32 u4_swap_uv);
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i16x16
+*
+* @par Description:
+*  If the current mb is to be coded as intra of mb type i16x16, the mb is first
+*  predicted using one of i16x16 prediction filters, basing on the intra mode
+*  chosen. Then, error is computed between the input blk and the estimated blk.
+*  This error is transformed (hierarchical transform i.e., dct followed by hada-
+*  -mard), quantized. The quantized coefficients are packed in scan order for
+*  entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_16x16
+        (
+            process_ctxt_t *ps_proc
+        );
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+*  If the current mb is to be coded as intra of mb type i4x4, the mb is first
+*  predicted using one of i4x4 prediction filters, basing on the intra mode
+*  chosen. Then, error is computed between the input blk and the estimated blk.
+*  This error is dct transformed and quantized. The quantized coefficients are
+*  packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern luma
+*
+* @remarks
+*  The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+*  mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4
+        (
+            process_ctxt_t *ps_proc
+        );
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+*  If the current mb is to be coded as intra of mb type i4x4, the mb is first
+*  predicted using one of i4x4 prediction filters, basing on the intra mode
+*  chosen. Then, error is computed between the input blk and the estimated blk.
+*  This error is dct transformed and quantized. The quantized coefficients are
+*  packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern luma
+*
+* @remarks
+*  The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+*  mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on
+        (
+            process_ctxt_t *ps_proc
+        );
+
+/**
+*******************************************************************************
+*
+* @brief performs chroma core coding for intra macro blocks
+*
+* @par Description:
+*  If the current MB is to be intra coded with mb type chroma I8x8, the MB is
+*  first predicted using intra 8x8 prediction filters. The predicted data is
+*  compared with the input for error and the error is transformed. The DC
+*  coefficients of each transformed sub blocks are further transformed using
+*  Hadamard transform. The resulting coefficients are quantized, packed and sent
+*  for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_c
+*  coded block pattern chroma
+*
+* @remarks
+*  The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
+*  mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_intra_macroblock_8x8
+        (
+            process_ctxt_t *ps_proc
+        );
+
+/**
+*******************************************************************************
+* @brief performs luma core coding when  mode is inter
+*
+* @par Description:
+*  If the current mb is to be coded as inter predicted mb,based on the sub mb
+*  partitions and corresponding motion vectors generated by ME, prediction is done.
+*  Then, error is computed between the input blk and the estimated blk.
+*  This error is transformed ( dct and with out hadamard), quantized. The
+*  quantized coefficients are packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_inter_macroblock_16x16
+        (
+            process_ctxt_t *ps_proc
+        );
+
+/**
+*******************************************************************************
+* @brief performs chroma core coding for inter macro blocks
+*
+* @par Description:
+*  If the current mb is to be coded as inter predicted mb, based on the sub mb
+*  partitions and corresponding motion vectors generated by ME, prediction is done.
+*  Then, error is computed between the input blk and the estimated blk.
+*  This error is transformed, quantized. The quantized coefficients
+*  are packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to the current macro block context
+*
+* @returns u1_cbp_l
+*  coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_inter_macroblock_8x8
+        (
+            process_ctxt_t *ps_proc
+        );
+
+#endif /* IH264E_CORE_CODING_H_ */
diff --git a/encoder/ih264e_deblk.c b/encoder/ih264e_deblk.c
new file mode 100755
index 0000000..8a11bdb
--- /dev/null
+++ b/encoder/ih264e_deblk.c
@@ -0,0 +1,854 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_deblk.c
+ *
+ * @brief
+ *  This file contains functions that are associated with deblocking
+ *
+ * @author
+ *  ittiam
+ *
+ * @par List of Functions:
+ *  - ih264e_fill_bs_1mv_1ref_non_mbaff
+ *  - ih264e_calculate_csbp
+ *  - ih264e_compute_bs
+ *  - ih264e_filter_top_edge
+ *  - ih264e_filter_left_edge
+ *  - ih264e_deblock_mb
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_macros.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264_trans_data.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_deblk_tables.h"
+#include "ih264e_deblk.h"
+
+
+/*****************************************************************************/
+/* Extern global definitions                                                 */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief  BS Table Lookup
+* input  :
+* output :
+* @remarks none
+******************************************************************************
+*/
+static const UWORD32 gu4_bs_table[][16] =
+{
+    {
+        0x00000000, 0x02000000, 0x00020000, 0x02020000,
+        0x00000200, 0x02000200, 0x00020200, 0x02020200,
+        0x00000002, 0x02000002, 0x00020002, 0x02020002,
+        0x00000202, 0x02000202, 0x00020202, 0x02020202
+    },
+    {
+        0x01010101, 0x02010101, 0x01020101, 0x02020101,
+        0x01010201, 0x02010201, 0x01020201, 0x02020201,
+        0x01010102, 0x02010102, 0x01020102, 0x02020102,
+        0x01010202, 0x02010202, 0x01020202, 0x02020202
+    }
+};
+
+/**
+******************************************************************************
+* @brief  Transpose Matrix used in BS
+* input  :
+* output :
+* @remarks none
+******************************************************************************
+*/
+static const UWORD16  ih264e_gu2_4x4_v2h_reorder[16] =
+{
+    0x0000, 0x0001, 0x0010, 0x0011,
+    0x0100, 0x0101, 0x0110, 0x0111,
+    0x1000, 0x1001, 0x1010, 0x1011,
+    0x1100, 0x1101, 0x1110, 0x1111
+};
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Fill BS value for all the edges of an mb
+*
+* @par Description:
+*  Fill BS value for all the edges of an mb
+*
+* @param[in] pu4_horz_bs
+*  Base pointer of horizontal BS table
+*
+* @param[in] pu4_vert_bs
+*  Base pointer of vertical BS table
+*
+* @param[in] u4_left_mb_csbp
+*  coded sub block pattern of left mb
+*
+* @param[in] u4_left_mb_csbp
+*  coded sub block pattern of top mb
+*
+* @param[in] ps_leftMvPred
+*  MV of left mb
+*
+* @param[in] ps_topMvPred
+*  MV of top mb
+*
+* @param[in] ps_curMvPred
+*  MV of curr mb
+*
+* @param[in] u1_left_intra
+*  is left intra
+*
+* @param[in] u1_top_intra
+*  is top intra
+*
+* @returns  none
+*
+* @remarks  none
+*
+*******************************************************************************
+*/
+static void ih264e_fill_bs_1mv_1ref_non_mbaff(UWORD32 *pu4_horz_bs,
+                                              UWORD32 *pu4_vert_bs,
+                                              UWORD32 u4_left_mb_csbp,
+                                              UWORD32 u4_top_mb_csbp,
+                                              UWORD32 u4_cur_mb_csbp,
+                                              mv_t *ps_leftMvPred,
+                                              mv_t *ps_topMvPred,
+                                              mv_t *ps_curMvPred,
+                                              UWORD8 u1_left_intra,
+                                              UWORD8 u1_top_intra)
+{
+    /* motion vectors of blks p & q */
+    WORD16   i16_qMv0, i16_qMv1, i16_pMv0, i16_pMv1;
+
+    /* temp var */
+    UWORD32  u4_lft_flag, u4_top_flag;
+    const UWORD32  *bs_map;
+    UWORD32  u4_reordered_vert_bs_enc, u4_temp;
+
+    /* Coded Pattern for Horizontal Edge */
+    /*-----------------------------------------------------------------------*/
+    /*u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */
+    /*-----------------------------------------------------------------------*/
+    UWORD32 u4_nbr_horz_csbp        = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12);
+    UWORD32 u4_horz_bs_enc          = u4_cur_mb_csbp | u4_nbr_horz_csbp;
+
+    /* Coded Pattern for Vertical Edge */
+    /*-----------------------------------------------------------------------*/
+    /*u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0         */
+    /*-----------------------------------------------------------------------*/
+    UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp  & CSBP_RIGHT_BLOCK_MASK;
+
+    /*-----------------------------------------------------------------------*/
+    /*u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */
+    /*-----------------------------------------------------------------------*/
+    UWORD32 u4_cur_mb_masked_csbp =(u4_cur_mb_csbp<<1)&(~CSBP_LEFT_BLOCK_MASK);
+
+    /*-----------------------------------------------------------------------*/
+    /*u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */
+    /*-----------------------------------------------------------------------*/
+    UWORD32 u4_nbr_vert_csbp    = (u4_cur_mb_masked_csbp) | (u4_left_mb_masked_csbp >> 3);
+    UWORD32 u4_vert_bs_enc      = u4_cur_mb_csbp | u4_nbr_vert_csbp;
+
+    /* BS Calculation for MB Boundary Edges */
+
+    /* BS calculation for 1 2 3 horizontal boundary */
+    bs_map  = gu4_bs_table[0];
+    pu4_horz_bs[1] = bs_map[(u4_horz_bs_enc >> 4) & 0xF];
+    pu4_horz_bs[2] = bs_map[(u4_horz_bs_enc >> 8) & 0xF];
+    pu4_horz_bs[3] = bs_map[(u4_horz_bs_enc >> 12) & 0xF];
+
+    /* BS calculation for 5 6 7 vertical boundary */
+    /* Do 4x4 tranpose of u4_vert_bs_enc by using look up table for reorder */
+    u4_reordered_vert_bs_enc    = ih264e_gu2_4x4_v2h_reorder[u4_vert_bs_enc & 0xF];
+
+    u4_temp                     = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 4) & 0xF];
+    u4_reordered_vert_bs_enc   |= (u4_temp << 1);
+
+    u4_temp                     = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 8) & 0xF];
+    u4_reordered_vert_bs_enc   |= (u4_temp << 2);
+
+    u4_temp                     = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 12) & 0xF];
+    u4_reordered_vert_bs_enc   |= (u4_temp << 3);
+
+    pu4_vert_bs[1] = bs_map[(u4_reordered_vert_bs_enc >> 4) & 0xF];
+    pu4_vert_bs[2] = bs_map[(u4_reordered_vert_bs_enc >> 8) & 0xF];
+    pu4_vert_bs[3] = bs_map[(u4_reordered_vert_bs_enc >> 12) & 0xF];
+
+
+    /* BS Calculation for MB Boundary Edges */
+    i16_qMv0  = ps_curMvPred->i2_mvx;
+    i16_qMv1  = ps_curMvPred->i2_mvy;
+
+    if (u1_top_intra)
+    {
+        pu4_horz_bs[0] = 0x04040404;
+    }
+    else
+    {
+        i16_pMv0  = ps_topMvPred->i2_mvx;
+        i16_pMv1  = ps_topMvPred->i2_mvy;
+
+        u4_top_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) |
+                        (ABS((i16_pMv1 - i16_qMv1)) >= 4);
+
+        bs_map  = gu4_bs_table[!!u4_top_flag];
+        pu4_horz_bs[0] = bs_map[u4_horz_bs_enc & 0xF];
+    }
+
+    if (u1_left_intra)
+    {
+        pu4_vert_bs[0] = 0x04040404;
+    }
+    else
+    {
+        i16_pMv0  = ps_leftMvPred->i2_mvx;
+        i16_pMv1  = ps_leftMvPred->i2_mvy;
+
+
+        u4_lft_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) |
+                        (ABS((i16_pMv1 - i16_qMv1)) >= 4);
+
+        bs_map  = gu4_bs_table[!!u4_lft_flag];
+        pu4_vert_bs[0] = bs_map[u4_reordered_vert_bs_enc & 0xF];
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief calculate coded subblock pattern from nnz
+*
+* @par Description:
+*  calculate coded subblock pattern from nnz
+*
+* @param[in] ps_proc
+*  process context
+*
+* @returns  csbp
+*
+* @remarks  none
+*
+*******************************************************************************
+*/
+static UWORD32 ih264e_calculate_csbp(process_ctxt_t *ps_proc)
+{
+    /* number of non zeros for each tx blk */
+    UWORD8 *pu1_curr_nnz = (UWORD8 *)ps_proc->au4_nnz;
+
+    /* csbp */
+    UWORD32 u4_csbp = 0;
+
+    /* temp var */
+    WORD32  i4_i;
+
+    pu1_curr_nnz += 1;
+
+    /* Creating Subblock pattern for current MB */
+    /* 15C|14C|13C|12C|11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C  */
+    for (i4_i = 0; i4_i < 16; i4_i++ )
+    {
+        u4_csbp |= ((!!*(pu1_curr_nnz + i4_i))<< i4_i);
+    }
+
+    return u4_csbp;
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function computes blocking strength for an mb
+*
+* @par Description:
+*  This function computes blocking strength for an mb
+*
+* @param[in] ps_proc
+*  process context
+*
+* @returns  none
+*
+* @remarks In this module it is assumed that their is only single reference
+* frame and is always the most recently used anchor frame
+*
+*******************************************************************************
+*/
+void ih264e_compute_bs(process_ctxt_t * ps_proc)
+{
+    /* deblk bs context */
+    bs_ctxt_t *ps_bs = &(ps_proc->s_deblk_ctxt.s_bs_ctxt);
+
+    /* vertical blocking strength */
+    UWORD32 *pu4_pic_vert_bs;
+
+    /* horizontal blocking strength */
+    UWORD32 *pu4_pic_horz_bs;
+
+    /* mb indices */
+    WORD32 i4_mb_x, i4_mb_y;
+
+    /* is intra */
+    WORD32 i4_intra;
+
+    /* temp var */
+    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+    /* init indices */
+    i4_mb_x = ps_bs->i4_mb_x;
+    i4_mb_y = ps_bs->i4_mb_y;
+
+    /* init pointers */
+    pu4_pic_vert_bs = ps_bs->pu4_pic_vert_bs + ((i4_mb_y * i4_wd_mbs) + i4_mb_x) * 4;
+    pu4_pic_horz_bs = ps_bs->pu4_pic_horz_bs + ((i4_mb_y * i4_wd_mbs) + i4_mb_x) * 4;
+
+    /* is intra? */
+    i4_intra = ps_proc->u4_is_intra;
+
+    /* compute blocking strength */
+    if (i4_intra)
+    {
+        pu4_pic_vert_bs[0] = 0x04040404;
+        pu4_pic_vert_bs[1] = pu4_pic_vert_bs[2] = pu4_pic_vert_bs[3] = 0x03030303;
+
+        pu4_pic_horz_bs[0] = 0x04040404;
+        pu4_pic_horz_bs[1] = pu4_pic_horz_bs[2] = pu4_pic_horz_bs[3] = 0x03030303;
+    }
+    else
+    {
+        /* left mb syntax info */
+        mb_info_t *ps_left_mb_syntax_ele = &ps_proc->s_left_mb_syntax_ele;
+
+        /* top mb syntax info */
+        mb_info_t *ps_top_mb_syntax_ele = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
+
+        /* top row motion vector info */
+        enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
+
+        /* csbp for curr mb */
+        ps_proc->u4_csbp = ih264e_calculate_csbp(ps_proc);
+
+        /* csbp for ngbrs */
+        if (i4_mb_x == 0)
+        {
+            ps_left_mb_syntax_ele->u4_csbp = 0;
+            ps_left_mb_syntax_ele->u2_is_intra = 0;
+            ps_proc->s_left_mb_pu.s_l0_mv = ps_proc->ps_pu->s_l0_mv;
+        }
+        if (i4_mb_y == 0)
+        {
+            ps_top_mb_syntax_ele->u4_csbp = 0;
+            ps_top_mb_syntax_ele->u2_is_intra = 0;
+            ps_top_row_pu->s_l0_mv = ps_proc->ps_pu->s_l0_mv;
+        }
+
+        ih264e_fill_bs_1mv_1ref_non_mbaff(pu4_pic_horz_bs,
+                                          pu4_pic_vert_bs,
+                                          ps_left_mb_syntax_ele->u4_csbp,
+                                          ps_top_mb_syntax_ele->u4_csbp,
+                                          ps_proc->u4_csbp,
+                                          &ps_proc->s_left_mb_pu.s_l0_mv,
+                                          &ps_top_row_pu->s_l0_mv,
+                                          &ps_proc->ps_pu->s_l0_mv,
+                                          ps_left_mb_syntax_ele->u2_is_intra,
+                                          ps_top_mb_syntax_ele->u2_is_intra);
+    }
+
+    return ;
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking of top horizontal edge
+*
+* @par Description:
+*  This function performs deblocking of top horizontal edge
+*
+* @param[in] ps_codec
+*  pointer to codec context
+*
+* @param[in] ps_proc
+*  pointer to proc context
+*
+* @param[in] pu1_mb_qp
+*  pointer to mb quantization param
+*
+* @param[in] pu1_cur_pic_luma
+*  pointer to recon buffer luma
+*
+* @param[in] pu1_cur_pic_chroma
+*  pointer to recon buffer chroma
+*
+* @param[in] pu4_pic_horz_bs
+*  pointer to horizontal blocking strength
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static void ih264e_filter_top_edge(codec_t *ps_codec,
+                                   process_ctxt_t *ps_proc,
+                                   UWORD8 *pu1_mb_qp,
+                                   UWORD8 *pu1_cur_pic_luma,
+                                   UWORD8 *pu1_cur_pic_chroma,
+                                   UWORD32 *pu4_pic_horz_bs)
+{
+    /* strd */
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+    /* deblk params */
+    UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma, u4_qp_p, u4_qp_q;
+    UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma;
+
+    /* collect qp of left & top mb */
+    u4_qp_p = pu1_mb_qp[-ps_proc->i4_wd_mbs];
+    u4_qp_q = pu1_mb_qp[0];
+
+    /********/
+    /* luma */
+    /********/
+    u4_qp_luma = (u4_qp_p + u4_qp_q + 1) >> 1;
+
+    /* filter offset A and filter offset B have to be received from slice header */
+    /* TODO : for now lets set these offsets as zero */
+
+
+    u4_idx_A_luma = MIN(51, u4_qp_luma + 0);
+    u4_idx_B_luma = MIN(51, u4_qp_luma + 0);
+
+    /* alpha, beta computation */
+    u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma];
+    u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma];
+
+    /**********/
+    /* chroma */
+    /**********/
+    u4_qp_chroma = (gu1_qpc_fqpi[u4_qp_p] + gu1_qpc_fqpi[u4_qp_q] + 1) >> 1;
+
+    /* filter offset A and filter offset B have to be received from slice header */
+    /* TODO : for now lets set these offsets as zero */
+
+
+    u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0);
+    u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0);
+
+    /* alpha, beta computation */
+    u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma];
+    u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma];
+
+    /* deblk edge */
+    /* top Horizontal edge - allowed to be deblocked ? */
+    if (pu4_pic_horz_bs[0] == 0x04040404)
+    {
+        /* strong filter */
+        ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+        ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+    }
+    else
+    {
+        /* normal filter */
+        ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma,
+                                               u4_beta_luma, pu4_pic_horz_bs[0],
+                                               gu1_ih264_clip_table[u4_idx_A_luma]);
+
+        ps_codec->pf_deblk_chroma_horz_bslt4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma,
+                                             u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_horz_bs[0],
+                                             gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking of left vertical edge
+*
+* @par Description:
+*  This function performs deblocking of top horizontal edge
+*
+* @param[in] ps_codec
+*  pointer to codec context
+*
+* @param[in] ps_proc
+*  pointer to proc context
+*
+* @param[in] pu1_mb_qp
+*  pointer to mb quantization param
+*
+* @param[in] pu1_cur_pic_luma
+*  pointer to recon buffer luma
+*
+* @param[in] pu1_cur_pic_chroma
+*  pointer to recon buffer chroma
+*
+* @param[in] pu4_pic_vert_bs
+*  pointer to vertical blocking strength
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static void ih264e_filter_left_edge(codec_t *ps_codec,
+                                    process_ctxt_t *ps_proc,
+                                    UWORD8 *pu1_mb_qp,
+                                    UWORD8 *pu1_cur_pic_luma,
+                                    UWORD8 *pu1_cur_pic_chroma,
+                                    UWORD32 *pu4_pic_vert_bs)
+{
+    /* strd */
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+    /* deblk params */
+    UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma, u4_qp_p, u4_qp_q;
+    UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma;
+
+    /* collect qp of left & curr mb */
+    u4_qp_p = pu1_mb_qp[-1];
+    u4_qp_q = pu1_mb_qp[0];
+
+    /********/
+    /* luma */
+    /********/
+    u4_qp_luma = (u4_qp_p + u4_qp_q + 1) >> 1;
+
+    /* filter offset A and filter offset B have to be received from slice header */
+    /* TODO : for now lets set these offsets as zero */
+
+
+    u4_idx_A_luma = MIN(51, u4_qp_luma + 0);
+    u4_idx_B_luma = MIN(51, u4_qp_luma + 0);
+
+    /* alpha, beta computation */
+    u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma];
+    u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma];
+
+    /**********/
+    /* chroma */
+    /**********/
+    u4_qp_chroma = (gu1_qpc_fqpi[u4_qp_p] + gu1_qpc_fqpi[u4_qp_q] + 1) >> 1;
+
+    /* filter offset A and filter offset B have to be received from slice header */
+    /* TODO : for now lets set these offsets as zero */
+
+
+    u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0);
+    u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0);
+
+    /* alpha, beta computation */
+    u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma];
+    u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma];
+
+    /* deblk edge */
+    if (pu4_pic_vert_bs[0] == 0x04040404)
+    {
+        /* strong filter */
+        ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+        ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+    }
+    else
+    {
+        /* normal filter */
+        ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma, i4_rec_strd,
+                                           u4_alpha_luma, u4_beta_luma,
+                                           pu4_pic_vert_bs[0],
+                                           gu1_ih264_clip_table[u4_idx_A_luma]);
+
+        ps_codec->pf_deblk_chroma_vert_bslt4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma,
+                                             u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_vert_bs[0],
+                                             gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking on an mb
+*
+* @par Description:
+*  This function performs deblocking on an mb
+*
+* @param[in] ps_proc
+*  process context corresponding to the job
+*
+* @param[in] ps_deblk
+*  pointer to deblock context
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_deblock_mb(process_ctxt_t *ps_proc, deblk_ctxt_t * ps_deblk)
+{
+    /* codec ctxt */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* ngbr availability */
+    UWORD8  u1_mb_a, u1_mb_b;
+
+    /* mb indices */
+    WORD32  i4_mb_x = ps_deblk->i4_mb_x, i4_mb_y = ps_deblk->i4_mb_y;
+
+    /* pic qp ptr */
+    UWORD8  *pu1_pic_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp;
+
+    /* vertical blocking strength */
+    UWORD32 *pu4_pic_vert_bs = ps_deblk->s_bs_ctxt.pu4_pic_vert_bs;
+
+    /* horizontal blocking strength */
+    UWORD32 *pu4_pic_horz_bs = ps_deblk->s_bs_ctxt.pu4_pic_horz_bs;
+
+    /* src buffers luma */
+    UWORD8  *pu1_cur_pic_luma = ps_deblk->pu1_cur_pic_luma;
+
+    /* src buffers chroma */
+    UWORD8  *pu1_cur_pic_chroma = ps_deblk->pu1_cur_pic_chroma;
+
+    /* strd */
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+    /* deblk params */
+    UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma;
+    UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma;
+
+    /* temp var */
+    UWORD32 push_ptr = (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x;
+
+    /* derive neighbor availability */
+    /* In slice mode the edges of mbs that lie on the slice boundary are not deblocked */
+    /* deblocking filter idc '2' */
+    if (ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE)
+    {
+        /* slice index */
+        UWORD8  *pu1_slice_idx = ps_deblk->pu1_slice_idx;
+
+        pu1_slice_idx += (i4_mb_y * ps_proc->i4_wd_mbs);
+        /* left macroblock availability */
+        u1_mb_a = (i4_mb_x == 0 ||
+                        (pu1_slice_idx[i4_mb_x - 1 ] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+        /* top macroblock availability */
+        u1_mb_b = (i4_mb_y == 0 ||
+                        (pu1_slice_idx[i4_mb_x-ps_proc->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+    }
+    else
+    {
+        /* left macroblock availability */
+        u1_mb_a = (i4_mb_x == 0)? 0 : 1;
+        /* top macroblock availability */
+        u1_mb_b = (i4_mb_y == 0)? 0 : 1;
+    }
+
+    pu1_pic_qp += push_ptr;
+    pu4_pic_vert_bs += push_ptr * 4;
+    pu4_pic_horz_bs += push_ptr * 4;
+
+    /********/
+    /* luma */
+    /********/
+    u4_qp_luma = pu1_pic_qp[0];
+
+    /* filter offset A and filter offset B have to be received from slice header */
+    /* TODO : for now lets set these offsets as zero */
+
+
+    u4_idx_A_luma = MIN(51, u4_qp_luma + 0);
+    u4_idx_B_luma = MIN(51, u4_qp_luma + 0);
+
+    /* alpha, beta computation */
+    u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma];
+    u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma];
+
+    /**********/
+    /* chroma */
+    /**********/
+    u4_qp_chroma = gu1_qpc_fqpi[u4_qp_luma];
+
+    /* filter offset A and filter offset B have to be received from slice header */
+    /* TODO : for now lets set these offsets as zero */
+
+
+    u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0);
+    u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0);
+
+    /* alpha, beta computation */
+    u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma];
+    u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma];
+
+    /* Deblock vertical edges */
+    /* left vertical edge 0 - allowed to be deblocked ? */
+    if (u1_mb_a)
+    {
+        ih264e_filter_left_edge(ps_codec, ps_proc, pu1_pic_qp, pu1_cur_pic_luma, pu1_cur_pic_chroma, pu4_pic_vert_bs);
+    }
+
+    /* vertical edge 1 */
+    if (pu4_pic_vert_bs[1] == 0x04040404)
+    {
+        /* strong filter */
+        ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 4, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+    }
+    else
+    {
+        /* normal filter */
+        ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 4, i4_rec_strd,
+                                           u4_alpha_luma, u4_beta_luma,
+                                           pu4_pic_vert_bs[1],
+                                           gu1_ih264_clip_table[u4_idx_A_luma]);
+    }
+
+    /* vertical edge 2 */
+    if (pu4_pic_vert_bs[2] == 0x04040404)
+    {
+        /* strong filter */
+        ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 8, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+        ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma + 8, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+    }
+    else
+    {
+        /* normal filter */
+        ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 8, i4_rec_strd, u4_alpha_luma,
+                                           u4_beta_luma, pu4_pic_vert_bs[2],
+                                           gu1_ih264_clip_table[u4_idx_A_luma]);
+
+        ps_codec->pf_deblk_chroma_vert_bslt4(pu1_cur_pic_chroma + 8, i4_rec_strd, u4_alpha_chroma,
+                                             u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_vert_bs[2],
+                                             gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+    }
+
+    /* vertical edge 3 */
+    if (pu4_pic_vert_bs[3] == 0x04040404)
+    {
+        /* strong filter */
+        ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 12, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+    }
+    else
+    {
+        /* normal filter */
+        ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 12, i4_rec_strd, u4_alpha_luma,
+                                           u4_beta_luma, pu4_pic_vert_bs[3],
+                                           gu1_ih264_clip_table[u4_idx_A_luma]);
+    }
+
+    /* Deblock Horizontal edges */
+    /* Horizontal edge 0 */
+    if (u1_mb_b)
+    {
+        ih264e_filter_top_edge(ps_codec, ps_proc, pu1_pic_qp, pu1_cur_pic_luma, pu1_cur_pic_chroma, pu4_pic_horz_bs);
+    }
+
+    /* horizontal edge 1 */
+    if (pu4_pic_horz_bs[1] == 0x04040404)
+    {
+        /* strong filter */
+        ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+    }
+    else
+    {
+        /* normal filter */
+        ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_luma,
+                                           u4_beta_luma, pu4_pic_horz_bs[1],
+                                           gu1_ih264_clip_table[u4_idx_A_luma]);
+    }
+
+    /* horizontal edge 2 */
+    if (pu4_pic_horz_bs[2] == 0x04040404)
+    {
+        /* strong filter */
+        ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 8 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+        ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+    }
+    else
+    {
+        /* normal filter */
+        ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 8 * i4_rec_strd, i4_rec_strd, u4_alpha_luma,
+                                           u4_beta_luma, pu4_pic_horz_bs[2],
+                                           gu1_ih264_clip_table[u4_idx_A_luma]);
+
+        ps_codec->pf_deblk_chroma_horz_bslt4(pu1_cur_pic_chroma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_chroma,
+                                             u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_horz_bs[2],
+                                             gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+    }
+
+    /* horizontal edge 3 */
+    if (pu4_pic_horz_bs[3] == 0x04040404)
+    {
+        /* strong filter */
+        ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 12 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+    }
+    else
+    {
+        /* normal filter */
+        ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 12 * i4_rec_strd, i4_rec_strd, u4_alpha_luma,
+                                           u4_beta_luma, pu4_pic_horz_bs[3],
+                                           gu1_ih264_clip_table[u4_idx_A_luma]);
+    }
+
+    return ;
+}
diff --git a/encoder/ih264e_deblk.h b/encoder/ih264e_deblk.h
new file mode 100755
index 0000000..9b3b67b
--- /dev/null
+++ b/encoder/ih264e_deblk.h
@@ -0,0 +1,99 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_deblk.h
+*
+* @brief
+*  This file contains extern declarations of deblocking routines
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+******************************************************************************
+*/
+
+#ifndef IH264E_DEBLK_H_
+#define IH264E_DEBLK_H_
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief  masks to extract csbp
+******************************************************************************
+ */
+#define CSBP_LEFT_BLOCK_MASK  0x1111
+#define CSBP_RIGHT_BLOCK_MASK 0x8888
+
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief This function computes blocking strength for an mb
+*
+* @par Description:
+*  This function computes blocking strength for an mb
+*
+* @param[in] ps_proc
+*  process context
+*
+* @returns  none
+*
+* @remarks In this module it is assumed that their is only single reference
+* frame and is always the most recently used anchor frame
+*
+*******************************************************************************
+*/
+void ih264e_compute_bs(process_ctxt_t * ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking on an mb
+*
+* @par Description:
+*  This function performs deblocking on an mb
+*
+* @param[in] ps_proc
+*  process context corresponding to the job
+*
+* @param[in] ps_deblk
+*  pointer to deblock context
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_deblock_mb(process_ctxt_t *ps_proc, deblk_ctxt_t * ps_deblk);
+
+#endif /* IH264E_DEBLK_H_ */
diff --git a/encoder/ih264e_debug.h b/encoder/ih264e_debug.h
new file mode 100755
index 0000000..5cb0434
--- /dev/null
+++ b/encoder/ih264e_debug.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_debug.h
+*
+* @brief
+*  This file contains extern declarations of routines that could be helpful
+*  for debugging purposes.
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+******************************************************************************
+*/
+
+#ifndef IH264E_DEBUG_H_
+#define IH264E_DEBUG_H_
+
+#if DEBUG_RC
+
+#define DEBUG_DUMP_QP(pic_cnt, qp, num_cores) \
+    ih264e_debug_dump_qp(pic_cnt, qp, num_cores);
+
+#define DEBUG_DUMP_RC(ps_rc) ih264e_debug_print_rc(ps_rc);
+
+#define DEBUG_DUMP_COST_SAD_PU(ps_proc) ih264e_debug_dump_cost_sad_pu(ps_proc);
+
+#define DEBUG_DUMP_INP_TO_RC_POST_ENC(ps_frame_info, pic_cnt, num_cores) \
+                ih264e_debug_dump_inp_to_post_enc(ps_frame_info, pic_cnt, num_cores);
+
+#else
+
+#define DEBUG_DUMP_QP(pic_cnt, qp, num_cores) (void);
+
+#define DEBUG_DUMP_RC(ps_rc) (void);
+
+#define DEBUG_DUMP_COST_SAD_PU(ps_proc) (void);
+
+#define DEBUG_DUMP_INP_TO_RC_POST_ENC(ps_frame_info, pic_cnt, num_cores) (void);
+
+#endif
+
+#endif /* IH264E_DEBUG_H_ */
diff --git a/encoder/ih264e_defs.h b/encoder/ih264e_defs.h
new file mode 100755
index 0000000..76929ef
--- /dev/null
+++ b/encoder/ih264e_defs.h
@@ -0,0 +1,538 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264e_defs.h
+*
+* @brief
+*  Definitions used in the encoder
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_DEFS_H_
+#define IH264E_DEFS_H_
+
+
+/*****************************************************************************/
+/* Width and height restrictions                                             */
+/*****************************************************************************/
+/**
+ * Minimum width supported by codec
+ */
+#define MIN_WD   16
+
+/**
+ * Maximum width supported by codec
+ */
+
+#define MAX_WD   1920
+
+/**
+ * Minimum height supported by codec
+ */
+#define MIN_HT   16
+
+/**
+ * Maximum height supported by codec
+ */
+
+#define MAX_HT   1920
+
+/*****************************************************************************/
+/* Padding sizes                                                             */
+/*****************************************************************************/
+/**
+ * Padding used for top of the frame
+ */
+#define PAD_TOP     32
+
+/**
+ * Padding used for bottom of the frame
+ */
+#define PAD_BOT     32
+
+/**
+ * Padding used at left of the frame
+ */
+#define PAD_LEFT    32
+
+/**
+ * Padding used at right of the frame
+ */
+#define PAD_RIGHT   32
+/**
+ * Padding for width
+ */
+#define PAD_WD      (PAD_LEFT + PAD_RIGHT)
+/**
+ * Padding for height
+ */
+#define PAD_HT      (PAD_TOP  + PAD_BOT)
+
+/*
+ * buffer width and height for half pel buffers
+ */
+#define HP_BUFF_WD  24
+#define HP_BUFF_HT  18
+
+/*****************************************************************************/
+/* Number of frame restrictions                                              */
+/*****************************************************************************/
+/**
+ *  Maximum number of reference buffers in DPB manager
+ */
+#define MAX_REF_CNT  32
+
+/*****************************************************************************/
+/* Num cores releated defs                                                   */
+/*****************************************************************************/
+/**
+ *  Maximum number of cores
+ */
+#define MAX_NUM_CORES       8
+
+/**
+ *  Maximum number of threads for pixel processing
+ */
+#define MAX_PROCESS_THREADS MAX_NUM_CORES
+
+/**
+ * Maximum process context sets
+ * Used to stagger encoding of MAX_CTXT_SETS in parallel
+ */
+#define MAX_CTXT_SETS   2
+/**
+ * Maximum number of contexts
+ * Kept as twice the number of threads, to make it easier to initialize the contexts
+ * from master thread
+ */
+#define MAX_PROCESS_CTXT    MAX_NUM_CORES * MAX_CTXT_SETS
+
+/*****************************************************************************/
+/* Profile and level restrictions                                            */
+/*****************************************************************************/
+/**
+ * Max level supported by the codec
+ */
+#define MAX_LEVEL  IH264_LEVEL_51
+
+/**
+ * Min level supported by the codec
+ */
+#define MIN_LEVEL  IH264_LEVEL_10
+
+/**
+ * Maximum number of slice headers that are held in memory simultaneously
+ * For single core implementation only 1 slice header is enough.
+ * But for multi-core parsing thread needs to ensure that slice headers are
+ * stored till the last CB in a slice is decoded.
+ * Parsing thread has to wait till last CB of a slice is consumed before reusing
+ * overwriting the slice header
+ * MAX_SLICE_HDR_CNT is assumed to be a power of 2
+ */
+
+#define LOG2_MAX_SLICE_HDR_CNT 8
+#define MAX_SLICE_HDR_CNT (1 << LOG2_MAX_SLICE_HDR_CNT)
+
+/* Generic declarations */
+#define DEFAULT_MAX_LEVEL               40
+#define DEFAULT_RECON_ENABLE            0
+#define DEFAULT_RC                      IVE_RC_STORAGE
+#define DEFAULT_MAX_FRAMERATE           120000
+#define DEFAULT_MAX_BITRATE             20000000
+#define DEFAULT_MAX_SRCH_RANGE_X        256
+#define DEFAULT_MAX_SRCH_RANGE_Y        256
+#define DEFAULT_SLICE_PARAM             256
+#define DEFAULT_SRC_FRAME_RATE          30000
+#define DEFAULT_TGT_FRAME_RATE          30000
+#define DEFAULT_BITRATE                 6000000
+#define DEFAULT_QP_MIN                  10
+#define DEFAULT_QP_MAX                  51
+#define DEFAULT_I_QP                    25
+#define DEFAULT_P_QP                    28
+#define DEFAULT_B_QP                    28
+#define DEFAULT_AIR_MODE                IVE_AIR_MODE_NONE
+#define DEFAULT_AIR_REFRESH_PERIOD      30
+#define DEFAULT_VBV_DELAY               1000
+#define DEFAULT_VBV_SIZE                16800000 /* level 3.1 */
+#define DEFAULT_NUM_CORES               1
+#define DEFAULT_ME_SPEED_PRESET         100
+#define DEFAULT_HPEL                    1
+#define DEFAULT_QPEL                    1
+#define DEFAULT_I4                      1
+#define DEFAULT_I8                      0
+#define DEFAULT_I16                     1
+#define DEFAULT_ENABLE_FAST_SAD         0
+#define DEFAULT_ENABLE_SATQD            1
+#define DEFAULT_MIN_SAD_ENABLE          0
+#define DEFAULT_MIN_SAD_DISABLE         -1
+#define DEFAULT_SRCH_RNG_X              64
+#define DEFAULT_SRCH_RNG_Y              48
+#define DEFAULT_I_INTERVAL              30
+#define DEFAULT_IDR_INTERVAL            1000
+#define DEFAULT_B_FRAMES                0
+#define DEFAULT_DISABLE_DEBLK_LEVEL     0
+#define DEFAULT_PROFILE                 IV_PROFILE_BASE
+#define DEFAULT_MIN_INTRA_FRAME_RATE    1
+#define DEFAULT_MAX_INTRA_FRAME_RATE    2147483647
+#define DEFAULT_MIN_BUFFER_DELAY        30
+#define DEFAULT_MAX_BUFFER_DELAY        20000
+#define DEFAULT_STRIDE                  0
+#define DEFAULT_ENC_SPEED_PRESET        IVE_USER_DEFINED
+#define DEFAULT_PRE_ENC_ME              0
+#define DEFAULT_PRE_ENC_IPE             0
+
+/** Maximum number of entries in input buffer list */
+#define MAX_INP_BUF_LIST_ENTRIES         32
+
+/** Maximum number of entries in output buffer list */
+#define MAX_OUT_BUF_LIST_ENTRIES         32
+
+/** Maximum number of entries in recon buffer list used within the encoder */
+#define MAX_REC_LIST_ENTRIES             16
+
+/** Number of buffers created to hold half-pel planes for every reference buffer */
+    #define HPEL_PLANES_CNT                 1
+
+/**
+ *****************************************************************************
+ * Macro to compute total size required to hold on set of scaling matrices
+ *****************************************************************************
+ */
+#define SCALING_MAT_SIZE(m_scaling_mat_size)                                 \
+{                                                                            \
+    m_scaling_mat_size = 6 * TRANS_SIZE_4 * TRANS_SIZE_4;                    \
+    m_scaling_mat_size += 6 * TRANS_SIZE_8 * TRANS_SIZE_8;                   \
+    m_scaling_mat_size += 6 * TRANS_SIZE_16 * TRANS_SIZE_16;                 \
+    m_scaling_mat_size += 2 * TRANS_SIZE_32 * TRANS_SIZE_32;                 \
+}
+
+/**
+ ******************************************************************************
+ *  @brief Macros to get raster scan position of a block[8x8] / sub block[4x4]
+ ******************************************************************************
+ */
+#define GET_BLK_RASTER_POS_X(x)     ((x & 0x01))
+#define GET_BLK_RASTER_POS_Y(y)     ((y >> 1))
+#define GET_SUB_BLK_RASTER_POS_X(x) ((x & 0x01))
+#define GET_SUB_BLK_RASTER_POS_Y(y) ((y >> 1))
+
+#define NUM_RC_MEMTABS 17
+
+/**
+ ***************************************************************************
+ * Enum to hold various mem records being request
+ ****************************************************************************
+ */
+enum
+{
+    /**
+     * Codec Object at API level
+     */
+    MEM_REC_IV_OBJ,
+
+    /**
+     * Codec context
+     */
+    MEM_REC_CODEC,
+
+    /**
+     * entropy context
+     */
+    MEM_REC_ENTROPY,
+
+    /**
+     * Buffer to hold coeff data
+     */
+    MEM_REC_MB_COEFF_DATA,
+
+    /**
+     * Buffer to hold coeff data
+     */
+    MEM_REC_MB_HEADER_DATA,
+
+    /**
+     * Motion vector bank
+     */
+    MEM_REC_MVBANK,
+
+    /**
+     * Motion vector bits
+     */
+    MEM_REC_MVBITS,
+
+    /**
+     * Holds mem records passed to the codec.
+     */
+    MEM_REC_BACKUP,
+
+    /**
+     * Holds SPS
+     */
+    MEM_REC_SPS,
+
+    /**
+     * Holds PPS
+     */
+    MEM_REC_PPS,
+
+    /**
+     * Holds Slice Headers
+     */
+    MEM_REC_SLICE_HDR,
+
+    /**
+     * Contains map indicating slice index per MB basis
+     */
+    MEM_REC_SLICE_MAP,
+
+    /**
+     * Holds thread handles
+     */
+    MEM_REC_THREAD_HANDLE,
+
+    /**
+     * Holds control call mutex
+     */
+    MEM_REC_CTL_MUTEX,
+
+    /**
+     * Holds entropy call mutex
+     */
+    MEM_REC_ENTROPY_MUTEX,
+
+    /**
+     * Holds memory for Process JOB Queue
+     */
+    MEM_REC_PROC_JOBQ,
+
+    /**
+     * Holds memory for Entropy JOB Queue
+     */
+    MEM_REC_ENTROPY_JOBQ,
+
+    /**
+     * Contains status map indicating processing status per MB basis
+     */
+    MEM_REC_PROC_MAP,
+
+    /**
+     * Contains status map indicating deblocking status per MB basis
+     */
+    MEM_REC_DBLK_MAP,
+
+    /*
+     * Contains AIR map and mask
+     */
+    MEM_REC_AIR_MAP,
+
+    /**
+     * Contains status map indicating ME status per MB basis
+     */
+    MEM_REC_ME_MAP,
+
+    /**
+     * Holds dpb manager context
+     */
+    MEM_REC_DPB_MGR,
+
+    /**
+     * Holds intermediate buffers needed during processing stage
+     * Memory for process contexts is allocated in this memtab
+     */
+    MEM_REC_PROC_SCRATCH,
+
+    /**
+     * Holds buffers for vert_bs, horz_bs and QP (all frame level)
+     */
+    MEM_REC_QUANT_PARAM,
+
+    /**
+     * Holds top row syntax information
+     */
+    MEM_REC_TOP_ROW_SYN_INFO,
+
+    /**
+     * Holds buffers for vert_bs, horz_bs and QP (all frame level)
+     */
+    MEM_REC_BS_QP,
+
+    /**
+     * Holds input buffer manager context
+     */
+    MEM_REC_INP_PIC,
+
+    /**
+     * Holds output buffer manager context
+     */
+    MEM_REC_OUT,
+
+    /**
+     * Holds picture buffer manager context and array of pic_buf_ts
+     * Also holds reference picture buffers in non-shared mode
+     */
+    MEM_REC_REF_PIC,
+
+    /*
+     * Mem record for color space conversion
+     */
+    MEM_REC_CSC,
+
+    /**
+     * NMB info struct
+     */
+    MEM_REC_MB_INFO_NMB,
+
+    /**
+     * Rate control of memory records.
+     */
+    MEM_REC_RC,
+
+    /**
+     * Place holder to compute number of memory records.
+     */
+    MEM_REC_CNT = MEM_REC_RC + NUM_RC_MEMTABS,
+
+    /*
+     * Do not add anything below
+     */
+};
+
+#define DISABLE_DEBLOCK_INTERVAL 8
+
+/**
+ ****************************************************************************
+ * Disable deblock levels
+ * Level 0 enables deblocking completely and level 4 disables completely
+ * Other levels are intermediate values to control deblocking level
+ ****************************************************************************
+ */
+enum
+{
+    /**
+     * Enable deblocking completely
+     */
+    DISABLE_DEBLK_LEVEL_0,
+
+    /**
+     * Disable only within MB edges - Not supported currently
+     */
+    DISABLE_DEBLK_LEVEL_1,
+
+    /**
+     * Enable deblocking once in DEBLOCK_INTERVAL number of pictures
+     * and for I slices
+     */
+    DISABLE_DEBLK_LEVEL_2,
+
+    /**
+     * Enable deblocking only for I slices
+     */
+    DISABLE_DEBLK_LEVEL_3,
+
+    /**
+     * Disable deblocking completely
+     */
+    DISABLE_DEBLK_LEVEL_4
+};
+
+/**
+ ****************************************************************************
+ * Number of buffers for I/O based on format
+ ****************************************************************************
+ */
+
+/** Minimum number of input buffers */
+#define MIN_INP_BUFS                 2
+
+/** Minimum number of output buffers */
+#define MIN_OUT_BUFS                1
+
+/** Minimum number of components in bitstream buffer */
+#define MIN_BITS_BUFS_COMP           1
+
+/** Minimum number of components in raw buffer */
+#define MIN_RAW_BUFS_420_COMP        3
+#define MIN_RAW_BUFS_422ILE_COMP     1
+#define MIN_RAW_BUFS_RGB565_COMP     1
+#define MIN_RAW_BUFS_RGBA8888_COMP   1
+#define MIN_RAW_BUFS_420SP_COMP      2
+
+#define MAX_NMB 120
+
+/** Maximum number of active config paramter sets */
+#define MAX_ACTIVE_CONFIG_PARAMS 32
+
+/**
+******************************************************************************
+ *  @brief Thresholds for luma & chroma to determine if the 8x8 subblock needs
+ *  to be encoded or skipped
+******************************************************************************
+*/
+#define LUMA_SUB_BLOCK_SKIP_THRESHOLD 4
+#define LUMA_BLOCK_SKIP_THRESHOLD 5
+#define CHROMA_BLOCK_SKIP_THRESHOLD 4
+
+/**
+******************************************************************************
+ *  @brief      defines the first byte of a NAL unit
+ *  forbidden zero bit - nal_ref_idc - nal_unit_type
+******************************************************************************
+*/
+/* [0 - 11 - 00111] */
+#define NAL_SPS_FIRST_BYTE 0x67
+
+/* [0 - 11 - 01000] */
+#define NAL_PPS_FIRST_BYTE 0x68
+
+/* [0 - 11 - 00001] */
+#define NAL_SLICE_FIRST_BYTE 0x61
+
+/* [0 - 00 - 00001] */
+#define NAL_NON_REF_SLICE_FIRST_BYTE 0x01
+
+/* [0 - 11 - 00101] */
+#define NAL_IDR_SLICE_FIRST_BYTE 0x65
+
+/* [0 - 00 - 01100] */
+#define NAL_FILLER_FIRST_BYTE 0x0C
+
+/* [0 - 00 - 00110] */
+#define NAL_SEI_FIRST_BYTE 0x06
+
+#define H264_ALLOC_INTER_FRM_INTV        1
+
+#define H264_MPEG_QP_MAP    191
+
+#define MPEG2_QP_ELEM       (H264_MPEG_QP_MAP + 1)
+#define H264_QP_ELEM        (MAX_H264_QP + 1)
+
+#define H264_INIT_QUANT_I                26
+#define H264_INIT_QUANT_P                34
+
+#endif /*IH264E_DEFS_H_*/
diff --git a/encoder/ih264e_encode.c b/encoder/ih264e_encode.c
new file mode 100755
index 0000000..ffc6fb7
--- /dev/null
+++ b/encoder/ih264e_encode.c
@@ -0,0 +1,580 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_encode.c
+*
+* @brief
+*  This file contains functions for encoding the input yuv frame in synchronous
+*  api mode
+*
+* @author
+*  ittiam
+*
+* List of Functions
+*  - ih264e_join_threads()
+*  - ih264e_wait_for_thread()
+*  - ih264e_encode()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_debug.h"
+#include "ih264_structs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_list.h"
+#include "ih264e_error.h"
+#include "ih264e_defs.h"
+#include "ih264_padding.h"
+#include "ih264e_bitstream.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_structs.h"
+#include "ih264e_master.h"
+#include "ih264e_process.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264e_utils.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_config.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264e_debug.h"
+#ifdef LOGO_EN
+#include "ih264e_ittiam_logo.h"
+#endif
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+*  This function joins all the spawned threads after successful completion of
+*  their tasks
+*
+* @par   Description
+*
+* @param[in] ps_codec
+*  pointer to codec context
+*
+* @returns  none
+*
+******************************************************************************
+*/
+void ih264e_join_threads(codec_t *ps_codec)
+{
+    /* temp var */
+   WORD32 i = 0;
+   WORD32 ret = 0;
+
+   /* join spawned threads */
+   while (i < ps_codec->i4_proc_thread_cnt)
+   {
+       if (ps_codec->ai4_process_thread_created[i])
+       {
+           ret = ithread_join(ps_codec->apv_proc_thread_handle[i], NULL);
+           if (ret != 0)
+           {
+               printf("pthread Join Failed");
+               assert(0);
+           }
+           ps_codec->ai4_process_thread_created[i] = 0;
+           i++;
+       }
+   }
+
+   ps_codec->i4_proc_thread_cnt = 0;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function puts the current thread to sleep for a duration
+*  of sleep_us
+*
+* @par Description
+*  ithread_yield() method causes the calling thread to yield execution to another
+*  thread that is ready to run on the current processor. The operating system
+*  selects the thread to yield to. ithread_usleep blocks the current thread for
+*  the specified number of milliseconds. In other words, yield just says,
+*  end my timeslice prematurely, look around for other threads to run. If there
+*  is nothing better than me, continue. Sleep says I don't want to run for x
+*  milliseconds. Even if no other thread wants to run, don't make me run.
+*
+* @param[in] sleep_us
+*  thread sleep duration
+*
+* @returns error_status
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_wait_for_thread(UWORD32 sleep_us)
+{
+    /* yield thread */
+    ithread_yield();
+
+    /* put thread to sleep */
+    ithread_usleep(sleep_us);
+
+    return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+*  Encodes in synchronous api mode
+*
+* @par Description
+*  This routine processes input yuv, encodes it and outputs bitstream and recon
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns  Status
+*
+******************************************************************************
+*/
+WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
+{
+    /* error status */
+    IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+    /* codec ctxt */
+    codec_t *ps_codec = (codec_t *)ps_codec_obj->pv_codec_handle;
+
+    /* input frame to encode */
+    ih264e_video_encode_ip_t *ps_video_encode_ip = pv_api_ip;
+
+    /* output buffer to write stream */
+    ih264e_video_encode_op_t *ps_video_encode_op = pv_api_op;
+
+    /* i/o structures */
+    inp_buf_t s_inp_buf;
+    out_buf_t s_out_buf;
+
+    /* temp var */
+    WORD32 ctxt_sel = 0, i;
+
+    /********************************************************************/
+    /*                            BEGIN INIT                            */
+    /********************************************************************/
+    /* reset output structure */
+    ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+    ps_video_encode_op->s_ive_op.output_present  = 0;
+    ps_video_encode_op->s_ive_op.dump_recon = 0;
+    ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
+
+    /* copy input info. to internal structure */
+    s_inp_buf.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
+    s_inp_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
+    s_inp_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
+    s_inp_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
+    s_inp_buf.pv_mb_info = ps_video_encode_ip->s_ive_ip.pv_mb_info;
+    s_inp_buf.u4_mb_info_type = ps_video_encode_ip->s_ive_ip.u4_mb_info_type;
+    s_inp_buf.pv_pic_info = ps_video_encode_ip->s_ive_ip.pv_pic_info;
+    s_inp_buf.u4_pic_info_type = ps_video_encode_ip->s_ive_ip.u4_pic_info_type;
+
+    /* copy output info. to internal structure */
+    s_out_buf.s_bits_buf = ps_video_encode_ip->s_ive_ip.s_out_buf;
+    s_out_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
+    s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
+    s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
+
+    /* api call cnt */
+    ps_codec->i4_encode_api_call_cnt += 1;
+
+    /* curr pic cnt */
+    ps_codec->i4_pic_cnt += 1;
+
+    /* codec context selector */
+    ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+    /* reset status flags */
+    ps_codec->ai4_pic_cnt[ctxt_sel] = -1;
+    ps_codec->s_rate_control.post_encode_skip[ctxt_sel] = 0;
+    ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = 0;
+
+    /* pass output buffer to codec */
+    ps_codec->as_out_buf[ctxt_sel] = s_out_buf;
+
+    /* initialize codec ctxt with default params for the first encode api call */
+    if (ps_codec->i4_encode_api_call_cnt == 0)
+    {
+        ih264e_codec_init(ps_codec);
+    }
+
+    /* parse configuration params */
+    for (i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++)
+    {
+        cfg_params_t *ps_cfg = &ps_codec->as_cfg[i];
+
+        if (1 == ps_cfg->u4_is_valid)
+        {
+            if ( ((ps_cfg->u4_timestamp_high == s_inp_buf.u4_timestamp_high) &&
+                            (ps_cfg->u4_timestamp_low == s_inp_buf.u4_timestamp_low)) ||
+                            ((WORD32)ps_cfg->u4_timestamp_high == -1) ||
+                            ((WORD32)ps_cfg->u4_timestamp_low == -1) )
+            {
+                error_status |= ih264e_codec_update_config(ps_codec, ps_cfg);
+                SET_ERROR_ON_RETURN(error_status,
+                                    IVE_UNSUPPORTEDPARAM,
+                                    ps_video_encode_op->s_ive_op.u4_error_code,
+                                    IV_FAIL);
+
+                ps_cfg->u4_is_valid = 0;
+            }
+        }
+    }
+
+    /******************************************************************
+     * INSERT LOGO
+     *****************************************************************/
+#ifdef LOGO_EN
+    if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL &&
+                    ps_codec->i4_header_mode != 1)
+    {
+        ih264e_insert_logo(s_inp_buf.s_raw_buf.apv_bufs[0],
+                           s_inp_buf.s_raw_buf.apv_bufs[1],
+                           s_inp_buf.s_raw_buf.apv_bufs[2],
+                           s_inp_buf.s_raw_buf.au4_strd[0],
+                           0,
+                           0,
+                           ps_codec->s_cfg.e_inp_color_fmt,
+                           ps_codec->s_cfg.u4_disp_wd,
+                           ps_codec->s_cfg.u4_disp_ht);
+    }
+#endif /*LOGO_EN*/
+
+    if (ps_codec->i4_encode_api_call_cnt == 0)
+    {
+        /********************************************************************/
+        /*   number of mv/ref bank buffers used by the codec,               */
+        /*      1 to handle curr frame                                      */
+        /*      1 to store information of ref frame                         */
+        /*      1 more additional because of the codec employs 2 ctxt sets  */
+        /*        to assist asynchronous API                                */
+        /********************************************************************/
+
+        /* initialize mv bank buffer manager */
+        error_status |= ih264e_mv_buf_mgr_add_bufs(ps_codec);
+        SET_ERROR_ON_RETURN(error_status,
+                            IVE_FATALERROR,
+                            ps_video_encode_op->s_ive_op.u4_error_code,
+                            IV_FAIL);
+
+        /* initialize ref bank buffer manager */
+        error_status |= ih264e_pic_buf_mgr_add_bufs(ps_codec);
+        SET_ERROR_ON_RETURN(error_status,
+                            IVE_FATALERROR,
+                            ps_video_encode_op->s_ive_op.u4_error_code,
+                            IV_FAIL);
+
+        /* for the first frame, generate header when not requested explicitly */
+        if (ps_codec->i4_header_mode == 0 &&
+                        ps_codec->u4_header_generated == 0)
+        {
+            ps_codec->i4_gen_header = 1;
+        }
+    }
+
+    /* generate header and return when encoder is operated in header mode */
+    if (ps_codec->i4_header_mode == 1)
+    {
+        /* whenever the header is generated, this implies a start of sequence
+         * and a sequence needs to be started with IDR
+         */
+        ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+
+        /* generate header */
+        error_status |= ih264e_generate_sps_pps(ps_codec);
+
+        /* api call cnt */
+        ps_codec->i4_encode_api_call_cnt --;
+
+        /* curr pic cnt */
+        ps_codec->i4_pic_cnt --;
+
+        /* header mode tag is not sticky */
+        ps_codec->i4_header_mode = 0;
+
+        /* send the input to app */
+        ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
+
+        /* send the output to app */
+        ps_video_encode_op->s_ive_op.output_present  = 1;
+        ps_video_encode_op->s_ive_op.dump_recon = 0;
+        ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].s_bits_buf;
+
+        /* error status */
+        SET_ERROR_ON_RETURN(error_status,
+                            IVE_FATALERROR,
+                            ps_video_encode_op->s_ive_op.u4_error_code,
+                            IV_FAIL);
+
+        /* indicates that header has been generated previously */
+        ps_codec->u4_header_generated = 1;
+
+        return IV_SUCCESS;
+    }
+
+
+    if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL)
+    {
+        /* array giving pic cnt that is being processed in curr context set */
+        ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt;
+
+        /* initialize all relevant process ctxts */
+        error_status |= ih264e_pic_init(ps_codec, &s_inp_buf);
+        SET_ERROR_ON_RETURN(error_status,
+                            IVE_FATALERROR,
+                            ps_video_encode_op->s_ive_op.u4_error_code,
+                            IV_FAIL);
+
+        if (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0)
+        {
+            /* proc ctxt base idx */
+            WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
+
+            /* proc ctxt */
+            process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
+
+            WORD32 ret = 0;
+
+            /* number of addl. threads to be created */
+            WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1;
+
+            for (i = 0; i < num_thread_cnt; i++)
+            {
+                ret = ithread_create(ps_codec->apv_proc_thread_handle[i],
+                                     NULL,
+                                     (void*)ih264e_process_thread,
+                                     &ps_codec->as_process[i + 1]);
+                if (ret != 0)
+                {
+                    printf("pthread Create Failed");
+                    assert(0);
+                }
+
+                ps_codec->ai4_process_thread_created[i] = 1;
+
+                ps_codec->i4_proc_thread_cnt++;
+            }
+
+
+            /* launch job */
+            ih264e_process_thread(ps_proc);
+
+            /* Join threads at the end of encoding a frame */
+            ih264e_join_threads(ps_codec);
+
+            ih264_list_reset(ps_codec->pv_proc_jobq);
+
+            ih264_list_reset(ps_codec->pv_entropy_jobq);
+        }
+    }
+
+    if (-1 != ps_codec->ai4_pic_cnt[ctxt_sel])
+    {
+        /* proc ctxt base idx */
+        WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
+
+        /* proc ctxt */
+        process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
+
+        /* receive output back from codec */
+        s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+        /* send the output to app */
+        ps_video_encode_op->s_ive_op.output_present  = 1;
+        ps_video_encode_op->s_ive_op.dump_recon = 1;
+        ps_video_encode_op->s_ive_op.s_out_buf = s_out_buf.s_bits_buf;
+        ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+        /* receive input back from codec */
+        s_inp_buf = ps_proc->s_inp_buf;
+
+        /* send the input to app */
+        ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
+
+        if (ps_codec->s_cfg.u4_enable_recon &&
+                        ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0)
+        {
+            /* error status */
+            IH264_ERROR_T ret = IH264_SUCCESS;
+
+            /* recon buffer */
+            rec_buf_t *ps_rec_buf = &ps_codec->as_rec_buf[ctxt_sel];
+
+            ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf;
+
+            /* copy/convert the recon buffer and return */
+            ih264e_fmt_conv(ps_codec, &ps_rec_buf->s_pic_buf,
+                            ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0],
+                            ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1],
+                            ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2],
+                            ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0],
+                            ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1],
+                            0,
+                            ps_codec->s_cfg.u4_disp_ht);
+
+            ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_rec_buf->s_pic_buf.i4_buf_id, BUF_MGR_IO);
+            if (IH264_SUCCESS != ret)
+            {
+                SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+                                    IVE_FATALERROR,
+                                    ps_video_encode_op->s_ive_op.u4_error_code,
+                                    IV_FAIL);
+            }
+        }
+
+        /* release buffers from ref list */
+        if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1)
+        {
+            /* pic info */
+            pic_buf_t *ps_cur_pic;
+
+            /* mv info */
+            mv_buf_t *ps_cur_mv_buf;
+
+            /* error status */
+            IH264_ERROR_T ret = IH264_SUCCESS;
+
+            /* Decrement coded pic count */
+            ps_codec->i4_coded_pic_cnt--;
+
+            /* loop through to get the min pic cnt among the list of pics stored in ref list */
+            /* since the skipped frame may not be on reference list, we may not have an MV bank
+             * hence free only if we have allocated */
+            for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+            {
+                if (ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt)
+                {
+                    ps_codec->as_ref_set[i].i4_pic_cnt = -1;
+                    ps_codec->as_ref_set[i].i4_poc = -1;
+
+                    ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf;
+
+                    ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_mv_buf;
+
+                    /* release this frame from reference list */
+                    ret = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_REF);
+                    SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+                                        IVE_FATALERROR,
+                                        ps_video_encode_op->s_ive_op.u4_error_code,
+                                        IV_FAIL);
+
+                    ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_REF);
+                    SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+                                        IVE_FATALERROR,
+                                        ps_video_encode_op->s_ive_op.u4_error_code,
+                                        IV_FAIL);
+                    break;
+                }
+            }
+        }
+
+        if ((ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1) ||
+                        (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 1))
+        {
+            ps_video_encode_op->s_ive_op.dump_recon = 0;
+        }
+        else
+        {
+            /* set output pic type */
+            if (ps_codec->i4_slice_type == PSLICE)
+            {
+                ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME;
+            }
+            else if (ps_codec->i4_slice_type == ISLICE && ps_codec->u4_is_idr != 1)
+            {
+                ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME;
+            }
+            else
+            {
+                ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME;
+            }
+        }
+
+        /* loop through to get the error status */
+        for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
+        {
+            error_status |= ps_codec->as_process[ctxt_sel + i].i4_error_code;
+        }
+        SET_ERROR_ON_RETURN(error_status,
+                            IVE_FATALERROR,
+                            ps_video_encode_op->s_ive_op.u4_error_code,
+                            IV_FAIL);
+    }
+
+    if (1 == s_inp_buf.u4_is_last)
+    {
+        ps_video_encode_op->s_ive_op.output_present = 0;
+        ps_video_encode_op->s_ive_op.dump_recon = 0;
+    }
+
+    return IV_SUCCESS;
+}
diff --git a/encoder/ih264e_encode_header.c b/encoder/ih264e_encode_header.c
new file mode 100755
index 0000000..67e5409
--- /dev/null
+++ b/encoder/ih264e_encode_header.c
@@ -0,0 +1,1187 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_encode_header.c
+*
+* @brief
+*  This file contains function definitions related to header encoding.
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_generate_nal_unit_header()
+*  - ih264e_generate_sps()
+*  - ih264e_generate_pps()
+*  - ih264e_generate_slice_header()
+*  - ih264e_get_level()
+*  - ih264e_populate_sps()
+*  - ih264e_populate_pps()
+*  - ih264e_populate_slice_header()
+*  - ih264e_add_filler_nal_unit()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264e_config.h"
+#include "ih264e_trace.h"
+#include "ih264_typedefs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_encode_header.h"
+#include "ih264_common_tables.h"
+#include "ih264_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Generate nal unit header in the stream as per section 7.4.1
+*
+* @par   Description
+*  Inserts Nal unit header syntax as per section 7.4.1
+*
+* @param[inout]   ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+* @param[in]   nal_unit_type
+*  nal type to be inserted
+*
+* @param[in]   nal_ref_idc
+*  nal ref idc to be inserted
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+static WORD32 ih264e_generate_nal_unit_header(bitstrm_t *ps_bitstrm,
+                                              WORD32 nal_unit_type,
+                                              WORD32 nal_ref_idc)
+{
+    WORD32 return_status = IH264E_SUCCESS;
+
+    /* sanity checks */
+    ASSERT((nal_unit_type > 0) && (nal_unit_type < 32));
+
+    /* forbidden_zero_bit + nal_ref_idc + nal_unit_type */
+    PUT_BITS(ps_bitstrm,
+             ((nal_ref_idc << 5) + nal_unit_type),
+             (1+2+5), /*1 forbidden zero bit + 2 nal_ref_idc + 5 nal_unit_type */
+             return_status,
+             "nal_unit_header");
+
+    return(return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates SPS (Sequence Parameter Set)
+*
+* @par   Description
+*  This function generates Sequence Parameter Set header as per the spec
+*
+* @param[in]   ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+* @param[in]   ps_sps
+*  pointer to structure containing SPS data
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps)
+{
+    WORD32 return_status = IH264E_SUCCESS;
+    WORD32 i;
+    WORD8  i1_nal_unit_type = 7;
+    WORD8  i1_nal_ref_idc = 3;
+
+    /* Insert Start Code */
+    return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+    /* Insert Nal Unit Header */
+    return_status |= ih264e_generate_nal_unit_header(ps_bitstrm, i1_nal_unit_type, i1_nal_ref_idc);
+
+    /* profile_idc */
+    PUT_BITS(ps_bitstrm, ps_sps->u1_profile_idc, 8, return_status, "profile_idc");
+
+    /* constrained_set_flags */
+    PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set0_flag, 1, return_status, "constrained_set0_flag");
+    PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set1_flag, 1, return_status, "constrained_set1_flag");
+    PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set2_flag, 1, return_status, "constrained_set2_flag");
+    PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set3_flag, 1, return_status, "constrained_set3_flag");
+
+    /* reserved_zero_four_bits */
+    PUT_BITS(ps_bitstrm, 0, 4, return_status, "reserved_zero_four_bits");
+
+    /* level_idc */
+    PUT_BITS(ps_bitstrm, ps_sps->u1_level_idc, 8, return_status, "level_idc");
+
+    /* seq_parameter_set_id */
+    PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_sps_id, return_status, "seq_parameter_set_id");
+
+    if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)
+    {
+        /* chroma_format_idc */
+        PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_chroma_format_idc, return_status, "chroma_format_idc");
+
+        if (ps_sps->u1_chroma_format_idc == CHROMA_FMT_IDC_YUV444)
+        {
+            /* i1_residual_colour_transform_flag */
+            PUT_BITS(ps_bitstrm, ps_sps->i1_residual_colour_transform_flag, 1, return_status, "i1_residual_colour_transform_flag");
+        }
+
+        /* bit_depth_luma_minus8 */
+        PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_luma - 8), return_status, "bit_depth_luma_minus8");
+
+        /* bit_depth_chroma_minus8 */
+        PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_chroma - 8), return_status, "bit_depth_chroma_minus8");
+
+        /* qpprime_y_zero_transform_bypass_flag */
+        PUT_BITS(ps_bitstrm, ps_sps->i1_qpprime_y_zero_transform_bypass_flag, 1, return_status, "qpprime_y_zero_transform_bypass_flag");
+
+        /* seq_scaling_matrix_present_flag */
+        PUT_BITS(ps_bitstrm, ps_sps->i1_seq_scaling_matrix_present_flag, 1, return_status, "seq_scaling_matrix_present_flag");
+
+        /* seq_scaling_list */
+        if (ps_sps->i1_seq_scaling_matrix_present_flag)
+        {
+            /* TODO_LATER: Will be enabled once scaling list support is added */
+        }
+    }
+
+    /* log2_max_frame_num_minus4 */
+    PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_frame_num - 4), return_status, "log2_max_frame_num_minus4");
+
+    /* pic_order_cnt_type */
+    PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_pic_order_cnt_type, return_status, "pic_order_cnt_type");
+
+    if (ps_sps->i1_pic_order_cnt_type == 0)
+    {
+        /* log2_max_pic_order_cnt_lsb_minus4 */
+        PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_pic_order_cnt_lsb - 4), return_status, "log2_max_pic_order_cnt_lsb_minus4");
+    }
+    else if (ps_sps->i1_pic_order_cnt_type == 1)
+    {
+        /* delta_pic_order_always_zero_flag */
+        PUT_BITS(ps_bitstrm, ps_sps->i1_delta_pic_order_always_zero_flag, 1, return_status, "delta_pic_order_always_zero_flag");
+
+        /* offset_for_non_ref_pic */
+        PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_non_ref_pic, return_status, "offset_for_non_ref_pic");
+
+        /* offset_for_top_to_bottom_field */
+        PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_top_to_bottom_field, return_status, "offset_for_top_to_bottom_field");
+
+        /* num_ref_frames_in_pic_order_cnt_cycle */
+        PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle, return_status, "num_ref_frames_in_pic_order_cnt_cycle");
+
+        /* Offset for ref frame */
+        for (i=0; i<ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle; i++)
+        {
+            /* offset_for_ref_frame */
+            PUT_BITS_SEV(ps_bitstrm, ps_sps->ai4_offset_for_ref_frame[i], return_status, "offset_for_ref_frame");
+        }
+    }
+
+    /* num_ref_frames */
+    PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_max_num_ref_frames, return_status, "num_ref_frames");
+
+    /* gaps_in_frame_num_value_allowed_flag */
+    PUT_BITS(ps_bitstrm, ps_sps->i1_gaps_in_frame_num_value_allowed_flag, 1, return_status, "gaps_in_frame_num_value_allowed_flag");
+
+    /* pic_width_in_mbs_minus1 */
+    PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_width_in_mbs_minus1, return_status, "pic_width_in_mbs_minus1");
+
+    /* pic_height_in_map_units_minus1 */
+    PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_height_in_map_units_minus1, return_status, "pic_height_in_map_units_minus1");
+
+    /* frame_mbs_only_flag */
+    PUT_BITS(ps_bitstrm, ps_sps->i1_frame_mbs_only_flag, 1, return_status, "frame_mbs_only_flag");
+
+    if (!ps_sps->i1_frame_mbs_only_flag)
+    {
+        /* mb_adaptive_frame_field_flag */
+        PUT_BITS(ps_bitstrm, ps_sps->i1_mb_adaptive_frame_field_flag, 1, return_status, "mb_adaptive_frame_field_flag");
+    }
+
+    /* direct_8x8_inference_flag */
+    PUT_BITS(ps_bitstrm, ps_sps->i1_direct_8x8_inference_flag, 1, return_status, "direct_8x8_inference_flag");
+
+    /* frame_cropping_flag */
+    PUT_BITS(ps_bitstrm, ps_sps->i1_frame_cropping_flag, 1, return_status, "frame_cropping_flag");
+
+    if (ps_sps->i1_frame_cropping_flag)
+    {
+        /* frame_crop_left_offset */
+        PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_left_offset, return_status, "frame_crop_left_offset");
+
+        /* frame_crop_right_offset */
+        PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_right_offset, return_status, "frame_crop_right_offset");
+
+        /* frame_crop_top_offset */
+        PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_top_offset, return_status, "frame_crop_top_offset");
+
+        /* frame_crop_bottom_offset */
+        PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_bottom_offset, return_status, "frame_crop_bottom_offset");
+    }
+
+    /* vui_parameters_present_flag */
+    PUT_BITS(ps_bitstrm, ps_sps->i1_vui_parameters_present_flag, 1, return_status, "vui_parameters_present_flag");
+
+    if (ps_sps->i1_vui_parameters_present_flag)
+    {
+        /* Add vui parameters to the bitstream */;
+    }
+
+    /* rbsp trailing bits */
+    return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+    return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates PPS (Picture Parameter Set)
+*
+* @par   Description
+*  Generate Picture Parameter Set as per Section 7.3.2.2
+*
+* @param[in]   ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+* @param[in]   ps_pps
+*  pointer to structure containing PPS data
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps, sps_t *ps_sps)
+{
+    WORD32 return_status = IH264E_SUCCESS;
+
+    /* Insert the NAL start code */
+    return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+    /* Insert Nal Unit Header */
+    PUT_BITS(ps_bitstrm, NAL_PPS_FIRST_BYTE, 8, return_status, "pps_header");
+
+    /* pic_parameter_set_id */
+    PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_pps_id, return_status, "pic_parameter_set_id");
+
+    /* seq_parameter_set_id */
+    PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_sps_id, return_status, "seq_parameter_set_id");
+
+    /* Entropy coding : 0-VLC; 1 - CABAC */
+    PUT_BITS(ps_bitstrm, ps_pps->u1_entropy_coding_mode_flag, 1, return_status, "Entropy coding : 0-VLC; 1 - CABAC");
+
+    /* Pic order present flag */
+    PUT_BITS(ps_bitstrm, ps_pps->u1_pic_order_present_flag, 1, return_status, "Pic order present flag");
+
+    /* Number of slice groups */
+    PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_num_slice_groups - 1, return_status, "Number of slice groups");
+
+    if (ps_pps->u1_num_slice_groups > 1)
+    {
+        /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+         * If this is not the case, we have to add Slice group map type to the bit stream*/
+    }
+
+    /* num_ref_idx_l0_default_active_minus1 */
+    PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l0_default_active - 1, return_status, "num_ref_idx_l0_default_active_minus1");
+
+    /* num_ref_idx_l1_default_active_minus1 */
+    PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l1_default_active - 1, return_status, "num_ref_idx_l1_default_active_minus1");
+
+    /* weighted_pred_flag */
+    PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_pred_flag, 1, return_status, "weighted_pred_flag");
+
+    /* weighted_bipred_flag */
+    PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_bipred_idc, 2, return_status, "weighted_bipred_idc");
+
+    /* pic_init_qp_minus26 */
+    PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qp - 26, return_status, "pic_init_qp_minus26");
+
+    /* pic_init_qs_minus26 */
+    PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qs - 26, return_status, "pic_init_qs_minus26");
+
+    /* chroma_qp_index_offset */
+    PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_chroma_qp_index_offset, return_status, "chroma_qp_index_offset");
+
+    /* deblocking_filter_control_present_flag */
+    PUT_BITS(ps_bitstrm, ps_pps->i1_deblocking_filter_control_present_flag, 1, return_status, "deblocking_filter_control_present_flag");
+
+    /* constrained_intra_pred_flag */
+    PUT_BITS(ps_bitstrm, ps_pps->i1_constrained_intra_pred_flag, 1, return_status, "constrained_intra_pred_flag");
+
+    /*redundant_pic_cnt_present_flag */
+    PUT_BITS(ps_bitstrm, ps_pps->i1_redundant_pic_cnt_present_flag, 1, return_status, "redundant_pic_cnt_present_flag");
+
+    if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)
+    {
+        /* transform_8x8_mode_flag */
+        PUT_BITS(ps_bitstrm, ps_pps->i1_transform_8x8_mode_flag, 1, return_status, "transform_8x8_mode_flag");
+
+        /* pic_scaling_matrix_present_flag */
+        PUT_BITS(ps_bitstrm, ps_pps->i1_pic_scaling_matrix_present_flag, 1, return_status, "pic_scaling_matrix_present_flag");
+
+        if(ps_pps->i1_pic_scaling_matrix_present_flag)
+        {
+            /* TODO_LATER: Will be enabled once scaling list support is added */
+        }
+
+        /* Second chroma QP offset */
+        PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_second_chroma_qp_index_offset, return_status, "Second chroma QP offset");
+    }
+
+    return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+    return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Slice Header
+*
+* @par   Description
+*  Generate Slice Header as per Section 7.3.5.1
+*
+* @param[inout]   ps_bitstrm
+*  pointer to bitstream context for generating slice header
+*
+* @param[in]   ps_slice_hdr
+*  pointer to slice header params
+*
+* @param[in]   ps_pps
+*  pointer to pps params referred by slice
+*
+* @param[in]   ps_sps
+*  pointer to sps params referred by slice
+*
+* @param[out]   ps_dup_bit_strm_ent_offset
+*  Bitstream struct to store bitstream state
+*
+* @param[out]   pu4_first_slice_start_offset
+*  first slice offset is returned
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_slice_header(bitstrm_t *ps_bitstrm,
+                                    slice_header_t *ps_slice_hdr,
+                                    pps_t *ps_pps,
+                                    sps_t *ps_sps)
+{
+
+    WORD32 return_status = IH264E_SUCCESS;
+
+    /* Insert start code */
+    return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+    /* Insert Nal Unit Header */
+    return_status |= ih264e_generate_nal_unit_header(ps_bitstrm, ps_slice_hdr->i1_nal_unit_type, ps_slice_hdr->i1_nal_unit_idc);
+
+    /* first_mb_in_slice */
+    PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_first_mb_in_slice, return_status, "first_mb_in_slice");
+
+    /* slice_type */
+    PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_slice_type, return_status, "slice_type");
+
+    /* pic_parameter_set_id */
+    PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_pps_id, return_status, "pic_parameter_set_id");
+
+    /* frame_num */
+    PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_frame_num, ps_sps->i1_log2_max_frame_num, return_status, "frame_num");
+
+    if (!ps_sps->i1_frame_mbs_only_flag)
+    {
+        /* field_pic_flag */
+        PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_field_pic_flag, 1, return_status, "field_pic_flag");
+
+        if(ps_slice_hdr->i1_field_pic_flag)
+        {
+            /* bottom_field_flag */
+            PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_bottom_field_flag, 1, return_status, "bottom_field_flag");
+        }
+    }
+
+    if (ps_slice_hdr->i1_nal_unit_type == 5)
+    {
+        /* u2_idr_pic_id */
+        PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_idr_pic_id, return_status, "u2_idr_pic_id");
+    }
+
+    if (ps_sps->i1_pic_order_cnt_type == 0)
+    {
+        /* pic_order_cnt_lsb */
+        PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_pic_order_cnt_lsb, ps_sps->i1_log2_max_pic_order_cnt_lsb, return_status, "pic_order_cnt_lsb");
+
+        if(ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag)
+        {
+            /* delta_pic_order_cnt_bottom */
+            PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i4_delta_pic_order_cnt_bottom, return_status, "delta_pic_order_cnt_bottom");
+        }
+    }
+
+    if (ps_sps->i1_pic_order_cnt_type == 1 && !ps_sps->i1_delta_pic_order_always_zero_flag)
+    {
+        /* delta_pic_order_cnt[0] */
+        PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[0], return_status, "delta_pic_order_cnt[0]");
+
+        if (ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag)
+        {
+            /* delta_pic_order_cnt[1] */
+            PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[1], return_status, "delta_pic_order_cnt[1]");
+        }
+    }
+
+    if (ps_pps->i1_redundant_pic_cnt_present_flag)
+    {
+        /* redundant_pic_cnt */
+        PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_redundant_pic_cnt, return_status, "redundant_pic_cnt");
+    }
+
+    if (ps_slice_hdr->u1_slice_type == BSLICE)
+    {
+        /* direct_spatial_mv_pred_flag */
+        PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_direct_spatial_mv_pred_flag, 1, return_status, "direct_spatial_mv_pred_flag");
+    }
+
+    if (ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE || ps_slice_hdr->u1_slice_type == BSLICE)
+    {
+        /* num_ref_idx_active_override_flag */
+        PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_num_ref_idx_active_override_flag, 1, return_status, "num_ref_idx_active_override_flag");
+
+        if (ps_slice_hdr->u1_num_ref_idx_active_override_flag)
+        {
+            /* num_ref_idx_l0_active_minus1 */
+            PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l0_active - 1, return_status, "num_ref_idx_l0_active_minus1");
+        }
+        if (ps_slice_hdr->u1_slice_type == BSLICE)
+        {
+            /* num_ref_idx_l1_active_minus1 */
+            PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status, "num_ref_idx_l1_active_minus1");
+        }
+    }
+
+    /* ref_idx_reordering */
+    /* TODO: ref_idx_reordering */
+    if ((ps_slice_hdr->u1_slice_type != ISLICE) && (ps_slice_hdr->u1_slice_type != SISLICE))
+    {
+        /* ref_pic_list_reordering_flag_l0 */
+        PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_ref_idx_reordering_flag_l0, 1, return_status, "ref_pic_list_reordering_flag_l0");
+
+        if (ps_slice_hdr->u1_ref_idx_reordering_flag_l0)
+        {
+
+        }
+    }
+
+    if ((ps_pps->i1_weighted_pred_flag &&
+                    (ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE)) ||
+                    (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_slice_hdr->u1_slice_type == BSLICE))
+    {
+        /* TODO_LATER: Currently there is no support for weighted prediction.
+         This needs to be updated when the support is added */
+    }
+
+    if (ps_slice_hdr->i1_nal_unit_idc != 0)
+    {
+        if (ps_slice_hdr->i1_nal_unit_type == 5)
+        {
+            /* no_output_of_prior_pics_flag  */
+            PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_no_output_of_prior_pics_flag , 1, return_status, "no_output_of_prior_pics_flag ");
+
+            /* long_term_reference_flag  */
+            PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_long_term_reference_flag , 1, return_status, "long_term_reference_flag ");
+        }
+        else
+        {
+            /* adaptive_ref_pic_marking_mode_flag  */
+            PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag , 1, return_status, "adaptive_ref_pic_marking_mode_flag ");
+
+            if (ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag)
+            {
+                /* TODO: if the reference picture marking mode is adaptive
+                 add these fields in the bit-stream */
+            }
+        }
+    }
+
+    if (ps_slice_hdr->u1_entropy_coding_mode_flag && ps_slice_hdr->u1_slice_type != ISLICE &&
+                    ps_slice_hdr->u1_slice_type != SISLICE)
+    {
+        /* cabac_init_idc */
+        PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_cabac_init_idc, return_status, "cabac_init_idc");
+    }
+
+    /* slice_qp_delta */
+    PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_qp - ps_pps->i1_pic_init_qp, return_status, "slice_qp_delta");
+
+    if (ps_slice_hdr->u1_slice_type == SPSLICE || ps_slice_hdr->u1_slice_type == SISLICE)
+    {
+        if (ps_slice_hdr->u1_slice_type == SPSLICE)
+        {
+            /* sp_for_switch_flag */
+            PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_sp_for_switch_flag , 1, return_status, "sp_for_switch_flag");
+        }
+        /* slice_qs_delta */
+        PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->u1_slice_qs - ps_pps->i1_pic_init_qs, return_status, "slice_qs_delta");
+    }
+
+    if (ps_pps->i1_deblocking_filter_control_present_flag)
+    {
+        /* disable_deblocking_filter_idc */
+        PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_disable_deblocking_filter_idc, return_status, "disable_deblocking_filter_idc");
+
+        if(ps_slice_hdr->u1_disable_deblocking_filter_idc != 1)
+        {
+            /* slice_alpha_c0_offset_div2 */
+            PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_alpha_c0_offset_div2, return_status, "slice_alpha_c0_offset_div2");
+
+            /* slice_beta_offset_div2 */
+            PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_beta_offset_div2, return_status, "slice_beta_offset_div2");
+        }
+    }
+
+    if (ps_slice_hdr->u1_num_slice_groups_minus1 > 0 &&
+                    ps_pps->u1_slice_group_map_type >= 3 &&
+                    ps_pps->u1_slice_group_map_type <= 5)
+    {
+        /* slice_group_change_cycle */
+        /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+         * If this is not the case, we have to add Slice group map type to the bit stream */
+    }
+
+    return return_status;
+}
+
+
+
+/**
+******************************************************************************
+*
+* @brief Populates sps structure
+*
+* @par   Description
+*  Populates sps structure for its use in header generation
+*
+* @param[in]   ps_codec
+*  pointer to encoder context
+*
+* @param[out]  ps_sps
+*  pointer to sps params that needs to be populated
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps)
+{
+    /* active config parameters */
+    cfg_params_t    *ps_cfg = &(ps_codec->s_cfg);
+
+//    /* level */
+//    IH264_LEVEL_T   level_idc;
+
+    /* error_status */
+    IH264E_ERROR_T i4_err_code = IH264E_FAIL;
+
+    /* profile */
+    /*
+     * Baseline profile supports, 8 bits per sample, 4:2:0 format, CAVLC.
+     * B frames are not allowed. Further, Flexible mb ordering, Redundant slices, Arbitrary slice ordering are supported.
+     * The constrained baseline profile is baseline profile minus ASO, FMO and redundant slices.
+     * To the constrained baseline profile if we add support for B slices, support for encoding interlaced frames,
+     * support for weighted prediction and introduce CABAC entropy coding then we have Main Profile.
+     */
+    if ((ps_cfg->u4_num_b_frames) || (ps_cfg->e_content_type != IV_PROGRESSIVE) ||
+         (ps_cfg->u4_entropy_coding_mode == CABAC) || (ps_cfg->u4_weighted_prediction))
+    {
+        ps_sps->u1_profile_idc = IH264_PROFILE_MAIN;
+    }
+    else
+    {
+        ps_sps->u1_profile_idc = IH264_PROFILE_BASELINE;
+    }
+
+    /* level */
+    ps_sps->u1_level_idc = ps_cfg->u4_max_level;
+//    i4_err_code = ih264e_get_level(ps_cfg, &level_idc);
+//    if (i4_err_code == IH264E_SUCCESS)
+//    {
+//        ps_sps->u1_level_idc = level_idc;
+//
+//    }
+//    else
+//    {
+//        return i4_err_code;
+//    }
+
+    /* constrained flags */
+    /*
+     * baseline profile automatically implies set 0 flag
+     */
+    ps_sps->u1_constraint_set0_flag = (ps_sps->u1_profile_idc == IH264_PROFILE_BASELINE);
+    /*
+     * main profile automatically implies set 1 flag
+     * Although the encoder says it supports Baseline profile it actually supports constrained
+     * baseline profile as ASO, FMO and redundant slices are not supported
+     */
+    ps_sps->u1_constraint_set1_flag = (ps_sps->u1_profile_idc <= IH264_PROFILE_MAIN);
+    /*
+     * extended profile is not supported
+     */
+    ps_sps->u1_constraint_set2_flag = 0x00;
+    /*
+     * level 1b or level 11
+     */
+    if (ps_sps->u1_level_idc == IH264_LEVEL_1B)
+    {
+        ps_sps->u1_constraint_set3_flag = 0;
+        ps_sps->u1_level_idc = IH264_LEVEL_11;
+    }
+    else
+    {
+        ps_sps->u1_constraint_set3_flag = 0;
+    }
+
+    /* active sps id */
+    ps_sps->u1_sps_id = ps_codec->i4_sps_id;
+
+    if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)
+    {
+        /* chroma format idc */
+        ps_sps->u1_chroma_format_idc = CHROMA_FMT_IDC_YUV420;
+
+        /* residual_colour_transform_flag */
+        ps_sps->i1_residual_colour_transform_flag = 0;
+
+        /* luma bit depth 8 */
+        ps_sps->i1_bit_depth_luma = 8;
+
+        /* chroma bit depth 8 */
+        ps_sps->i1_bit_depth_chroma = 8;
+
+        /* qpprime_y_zero_transform_bypass_flag */
+        ps_sps->i1_qpprime_y_zero_transform_bypass_flag = 0;
+
+        /* seq_scaling_matrix_present_flag */
+        ps_sps->i1_seq_scaling_matrix_present_flag = 0;
+
+        if (ps_sps->i1_seq_scaling_matrix_present_flag)
+        {
+            /* TODO_LATER: Will be enabled once scaling list support is added */
+        }
+    }
+
+    /* log2_max_frame_num_minus4 */
+    ps_sps->i1_log2_max_frame_num = 16;
+
+    /* pic_order_cnt_type */
+    ps_sps->i1_pic_order_cnt_type = 2;
+
+    if(ps_cfg->u4_enable_alt_ref)
+        ps_sps->i1_pic_order_cnt_type = 0;
+
+    /* log2_max_pic_order_cnt_lsb_minus4 */
+    ps_sps->i1_log2_max_pic_order_cnt_lsb = 8;
+
+    /* TODO : add support for other poc types */
+    if (ps_sps->i1_pic_order_cnt_type == 0)
+    {
+
+    }
+    else if (ps_sps->i1_pic_order_cnt_type == 1)
+    {
+
+    }
+
+    /* num_ref_frames */
+    /* FIXME : Fix this hard coding */
+    ps_sps->u1_max_num_ref_frames = 1;
+
+    /* gaps_in_frame_num_value_allowed_flag */
+    ps_sps->i1_gaps_in_frame_num_value_allowed_flag = 0;
+
+    /* pic width in mb - 1 */
+    ps_sps->i2_pic_width_in_mbs_minus1 = ps_cfg->i4_wd_mbs - 1;
+
+    /* pic height in mb - 1 */
+    ps_sps->i2_pic_height_in_map_units_minus1 = ps_cfg->i4_ht_mbs - 1;;
+
+    /* frame_mbs_only_flag, no support for interlace encoding */
+    ps_sps->i1_frame_mbs_only_flag = 1;
+
+    /* mb_adaptive_frame_field_flag */
+    if (ps_sps->i1_frame_mbs_only_flag == 0)
+    {
+        ps_sps->i1_mb_adaptive_frame_field_flag = 0;
+    }
+
+    /* direct_8x8_inference_flag */
+    ps_sps->i1_direct_8x8_inference_flag = 0;
+
+    /* cropping params */
+    /*NOTE : Cropping values depend on the chroma format
+     * For our case ,decoder interprets the cropping values as 2*num pixels
+     * Hence the difference in the disp width and width must be halved before sending
+     * to get the expected results
+     */
+    ps_sps->i1_frame_cropping_flag      = 0;
+    ps_sps->i2_frame_crop_left_offset   = 0;
+    ps_sps->i2_frame_crop_right_offset  = (ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd)>>1;
+    ps_sps->i2_frame_crop_top_offset    = 0;
+    ps_sps->i2_frame_crop_bottom_offset = (ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht)>>1;
+
+    if (ps_sps->i2_frame_crop_left_offset    ||
+                    ps_sps->i2_frame_crop_right_offset   ||
+                    ps_sps->i2_frame_crop_top_offset     ||
+                    ps_sps->i2_frame_crop_bottom_offset)
+    {
+        ps_sps->i1_frame_cropping_flag      = 1;
+    }
+
+    /* vui params */
+    ps_sps->i1_vui_parameters_present_flag = 0;
+
+    if (ps_sps->i1_vui_parameters_present_flag)
+    {
+        /* populate vui params */
+    }
+
+    return i4_err_code;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates pps structure
+*
+* @par   Description
+*  Populates pps structure for its use in header generation
+*
+* @param[in]   ps_codec
+*  pointer to encoder context
+*
+* @param[out]  ps_pps
+*  pointer to pps params that needs to be populated
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_pps(codec_t *ps_codec, pps_t *ps_pps)
+{
+    /* active config parameters */
+    cfg_params_t    *ps_cfg = &(ps_codec->s_cfg);
+
+    /* seq_parameter_set_id */
+    ps_pps->u1_sps_id = ps_codec->i4_sps_id;
+
+    /* pic_parameter_set_id */
+    ps_pps->u1_pps_id = ps_codec->i4_pps_id;
+
+    /* entropy_coding_mode */
+    ps_pps->u1_entropy_coding_mode_flag = ps_cfg->u4_entropy_coding_mode;
+
+    /* pic_order_present_flag is unset for POC type 2 */
+    ps_pps->u1_pic_order_present_flag = 0;
+
+    /* Currently number of slice groups supported are 1 */
+    ps_pps->u1_num_slice_groups = 1;
+
+    if (ps_pps->u1_num_slice_groups - 1)
+    {
+        /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+         * If this is not the case, we have to add Slice group map type to the bit stream*/
+    }
+
+    /* number of reference frames for list 0 */
+    /* FIXME : fix this hard coded value */
+    ps_pps->i1_num_ref_idx_l0_default_active = 1;
+
+    /* number of reference frames for list 1 */
+    ps_pps->i1_num_ref_idx_l1_default_active = 1;
+
+    /* weighted prediction for now is disabled */
+    ps_pps->i1_weighted_pred_flag = 0;
+    ps_pps->i1_weighted_bipred_idc = 0;
+
+    /* The intent is to not signal qp from pps. Rather send the same in slice headers */
+    ps_pps->i1_pic_init_qp = 0;
+
+    /* The intent is to not signal qp from pps. Rather send the same in slice headers */
+    ps_pps->i1_pic_init_qs = 0;
+
+    /* The intent is to not signal qp from pps. Rather send the same in slice headers */
+    ps_pps->i1_chroma_qp_index_offset = 0;
+
+    /* deblocking filter flags present in slice header */
+    ps_pps->i1_deblocking_filter_control_present_flag = 1;
+
+    /* constrained intra prediction */
+    ps_pps->i1_constrained_intra_pred_flag = ps_cfg->u4_constrained_intra_pred;
+
+    /* sending redundant slices is not supported for now */
+    ps_pps->i1_redundant_pic_cnt_present_flag = 0;
+
+    ps_pps->u1_slice_group_map_type = 0;
+    return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates slice header structure
+*
+* @par   Description
+*  Populates slice header structure for its use in header generation
+*
+* @param[in]  ps_proc
+*  pointer to proc context
+*
+* @param[out]  ps_slice_hdr
+*  pointer to slice header structure that needs to be populated
+*
+* @param[in]  ps_pps
+*  pointer to pps params structure referred by the slice
+*
+* @param[in]   ps_sps
+*  pointer to sps params referred by the pps
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc,
+                                    slice_header_t *ps_slice_hdr,
+                                    pps_t *ps_pps,
+                                    sps_t *ps_sps)
+{
+    /* entropy context */
+    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    if (ps_proc->ps_codec->u4_is_curr_frm_ref)
+    {
+        ps_slice_hdr->i1_nal_unit_idc = 3;
+    }
+    else
+    {
+        ps_slice_hdr->i1_nal_unit_idc = 0;
+    }
+
+    /* start mb address */
+    ps_slice_hdr->u2_first_mb_in_slice = ps_entropy->i4_mb_start_add;
+
+    /* slice type */
+    ps_slice_hdr->u1_slice_type = ps_proc->i4_slice_type;
+
+    /* pic_parameter_set_id */
+    ps_slice_hdr->u1_pps_id = ps_pps->u1_pps_id;
+
+    /* Separate color plane flag is 0,
+     * hence the syntax element color_plane_id not included */
+
+    /* frame num */
+    ps_slice_hdr->i4_frame_num = ps_proc->i4_frame_num;
+
+    /* frame_mbs_only_flag, no support for interlace encoding */
+    if (!ps_sps->i1_frame_mbs_only_flag)
+    {
+        ps_slice_hdr->i1_field_pic_flag = 0;
+
+        if (ps_slice_hdr->i1_field_pic_flag)
+        {
+            ps_slice_hdr->i1_bottom_field_flag = 0;
+        }
+    }
+
+    /* idr pic id */
+    if (ps_proc->u4_is_idr)
+    {
+        ps_slice_hdr->u2_idr_pic_id = ps_proc->u4_idr_pic_id;
+        ps_slice_hdr->i1_nal_unit_type = 5;
+    }
+    else
+    {
+        ps_slice_hdr->i1_nal_unit_type = 1;
+    }
+
+    if (ps_sps->i1_pic_order_cnt_type == 0)
+    {
+
+        WORD32 val;
+        val = ps_codec->i4_coded_pic_cnt;
+        val %= (1 << ps_sps->i1_log2_max_pic_order_cnt_lsb);
+        ps_slice_hdr->i4_pic_order_cnt_lsb = val;
+    }
+    else if (ps_sps->i1_pic_order_cnt_type == 1)
+    {
+
+    }
+
+    if(0 == ps_slice_hdr->u2_first_mb_in_slice)
+        ps_codec->i4_coded_pic_cnt++;
+
+    /*
+     * redundant slices are not currently supported.
+     * Hence the syntax element redundant slice cnt is not initialized
+     */
+    if (ps_pps->i1_redundant_pic_cnt_present_flag)
+    {
+
+    }
+
+    /* direct spatial mv pred flag */
+    if (ps_proc->i4_slice_type == BSLICE)
+    {
+
+    }
+
+    if (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == BSLICE)
+    {
+        /* num_ref_idx_active_override_flag */
+        ps_slice_hdr->u1_num_ref_idx_active_override_flag = 0;
+
+        if (ps_slice_hdr->u1_num_ref_idx_active_override_flag)
+        {
+            /* num_ref_idx_l0_active_minus1 */
+
+            if (ps_proc->i4_slice_type == BSLICE)
+            {
+                /* num_ref_idx_l1_active_minus1 */
+
+            }
+        }
+    }
+
+    /* ref_idx_reordering */
+    /* TODO: ref_idx_reordering */
+    if ((ps_proc->i4_slice_type != ISLICE) && (ps_proc->i4_slice_type != SISLICE))
+    {
+        /* ref_pic_list_reordering_flag_l0 */
+        ps_slice_hdr->u1_ref_idx_reordering_flag_l0 = 0;
+
+        if (ps_slice_hdr->u1_ref_idx_reordering_flag_l0)
+        {
+
+        }
+    }
+
+    if ((ps_pps->i1_weighted_pred_flag &&
+                    (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE)) ||
+                    (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_proc->i4_slice_type == BSLICE))
+    {
+        /* TODO_LATER: Currently there is no support for weighted prediction.
+             This needs to be updated when the support is added */
+    }
+
+    if (ps_slice_hdr->i1_nal_unit_idc != 0)
+    {
+        if (ps_slice_hdr->i1_nal_unit_type == 5)
+        {
+            /* no_output_of_prior_pics_flag  */
+            ps_slice_hdr->u1_no_output_of_prior_pics_flag = 0;
+
+            /* long_term_reference_flag  */
+            ps_slice_hdr->u1_long_term_reference_flag = 0;
+        }
+        else
+        {
+            /* adaptive_ref_pic_marking_mode_flag  */
+            ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag = 0;
+
+            if (ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag)
+            {
+                /* TODO: if the reference picture marking mode is adaptive
+                     add these fields in the bit-stream */
+            }
+        }
+    }
+
+    /* entropy coding mode flag */
+    ps_slice_hdr->u1_entropy_coding_mode_flag = ps_entropy->u1_entropy_coding_mode_flag;
+
+    if (ps_slice_hdr->u1_entropy_coding_mode_flag && ps_proc->i4_slice_type != ISLICE &&
+                    ps_proc->i4_slice_type != SISLICE)
+    {
+        /* cabac_init_idc */
+    }
+
+    /* slice qp */
+    ps_slice_hdr->i1_slice_qp = ps_proc->u4_frame_qp;
+
+    if (ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == SISLICE)
+    {
+        if (ps_proc->i4_slice_type == SPSLICE)
+        {
+            /* sp_for_switch_flag */
+        }
+        /* slice_qs_delta */
+    }
+
+    if (ps_pps->i1_deblocking_filter_control_present_flag)
+    {
+        /* disable_deblocking_filter_idc */
+        ps_slice_hdr->u1_disable_deblocking_filter_idc = ps_proc->u4_disable_deblock_level;
+
+        if (ps_slice_hdr->u1_disable_deblocking_filter_idc != 1)
+        {
+            /* slice_alpha_c0_offset_div2 */
+            ps_slice_hdr->i1_slice_alpha_c0_offset_div2 = 0;
+
+            /* slice_beta_offset_div2 */
+            ps_slice_hdr->i1_slice_beta_offset_div2 = 0;
+        }
+    }
+    ps_slice_hdr->u1_num_slice_groups_minus1 = 0;
+    if(ps_slice_hdr->u1_num_slice_groups_minus1 > 0 &&
+        ps_pps->u1_slice_group_map_type >= 3 &&
+        ps_pps->u1_slice_group_map_type <= 5)
+    {
+        /* slice_group_change_cycle */
+        /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+         * If this is not the case, we have to add Slice group map type to the bit stream */
+    }
+
+    return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief inserts FILLER Nal Unit.
+*
+* @par   Description
+*  In constant bit rate rc mode, when the bits generated by the codec is
+*  underflowing the target bit rate, the encoder library inserts filler nal unit.
+*
+* @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+* @param[in]    insert_fill_bytes
+*  Number of fill bytes to be inserted
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_add_filler_nal_unit(bitstrm_t *ps_bitstrm,
+                                          WORD32 insert_fill_bytes)
+{
+    WORD32  i4_num_words_to_fill, i4_words_filled;
+
+    IH264E_ERROR_T return_status = IH264E_SUCCESS;
+
+    /* Insert the NAL start code */
+    return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+    if (ps_bitstrm->u4_strm_buf_offset + insert_fill_bytes >= ps_bitstrm->u4_max_strm_size)
+    {
+        return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
+    }
+
+    /* Insert Nal Unit Header */
+    PUT_BITS(ps_bitstrm, NAL_FILLER_FIRST_BYTE, 8, return_status, "filler_header");
+
+    PUT_BITS(ps_bitstrm, 0xFFFFFF, 24, return_status, "fill bytes");
+
+    /* Initializing Variables                           */
+    i4_words_filled    = 1;
+
+    /****************************************************/
+    /* Flooring the number of bytes for be stuffed to   */
+    /* WORD unit                                        */
+    /****************************************************/
+    i4_num_words_to_fill = (insert_fill_bytes >> 2);
+
+    /****************************************************/
+    /* Reducing already 4 bytes filled. In case stuffing*/
+    /* is <= 4 bytes, we are actually not stuffing      */
+    /* anything                                         */
+    /****************************************************/
+    i4_num_words_to_fill -= i4_words_filled;
+
+    while (i4_num_words_to_fill > 0)
+    {
+        /* Insert Nal Unit Header */
+        PUT_BITS(ps_bitstrm, 0xFFFFFFFF, 32, return_status, "fill bytes");
+
+        i4_num_words_to_fill-- ;
+    }
+
+    return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+    return return_status;
+}
+
diff --git a/encoder/ih264e_encode_header.h b/encoder/ih264e_encode_header.h
new file mode 100755
index 0000000..acae5b6
--- /dev/null
+++ b/encoder/ih264e_encode_header.h
@@ -0,0 +1,278 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_encode_header.h
+*
+* @brief
+*  This file contains structures and interface prototypes for h264 bitstream
+*  header encoding
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_ENCODE_HEADER_H_
+#define IH264E_ENCODE_HEADER_H_
+
+/*****************************************************************************/
+/* Function Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief   Macro to put a code with specified number of bits into the
+ *           bitstream
+******************************************************************************
+ */
+#define PUT_BITS(ps_bitstrm, code_val, code_len, ret_val, syntax_string) \
+         ENTROPY_TRACE(syntax_string, code_val);\
+        ret_val |= ih264e_put_bits((ps_bitstrm), (code_val), (code_len))
+
+/**
+******************************************************************************
+ *  @brief   Macro to put a code with specified number of bits into the
+ *           bitstream using 0th order exponential Golomb encoding for
+ *           signed numbers
+******************************************************************************
+ */
+#define PUT_BITS_UEV(ps_bitstrm, code_val, ret_val, syntax_string) \
+        ENTROPY_TRACE(syntax_string, code_val);\
+        ret_val |= ih264e_put_uev((ps_bitstrm), (code_val))
+
+/**
+******************************************************************************
+ *  @brief   Macro to put a code with specified number of bits into the
+ *           bitstream using 0th order exponential Golomb encoding for
+ *           signed numbers
+******************************************************************************
+ */
+#define PUT_BITS_SEV(ps_bitstrm, code_val, ret_val, syntax_string) \
+        ENTROPY_TRACE(syntax_string, code_val);\
+        ret_val |= ih264e_put_sev((ps_bitstrm), (code_val))
+
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Generates SPS (Sequence Parameter Set)
+*
+* @par   Description
+*  This function generates Sequence Parameter Set header as per the spec
+*
+* @param[in]   ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+* @param[in]   ps_sps
+*  pointer to structure containing SPS data
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32      ih264e_generate_sps
+    (
+        bitstrm_t   *ps_bitstrm,
+        sps_t       *ps_sps
+    );
+
+/**
+******************************************************************************
+*
+* @brief Generates PPS (Picture Parameter Set)
+*
+* @par   Description
+*  Generate Picture Parameter Set as per Section 7.3.2.2
+*
+* @param[in]   ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+* @param[in]   ps_pps
+*  pointer to structure containing PPS data
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32      ih264e_generate_pps
+    (
+        bitstrm_t   *ps_bitstrm,
+        pps_t       *ps_pps,
+        sps_t       *ps_sps
+    );
+
+/**
+******************************************************************************
+*
+* @brief Generates Slice Header
+*
+* @par   Description
+*  Generate Slice Header as per Section 7.3.5.1
+*
+* @param[inout]   ps_bitstrm
+*  pointer to bitstream context for generating slice header
+*
+* @param[in]   ps_slice_hdr
+*  pointer to slice header params
+*
+* @param[in]   ps_pps
+*  pointer to pps params referred by slice
+*
+* @param[in]   ps_sps
+*  pointer to sps params referred by slice
+*
+* @param[out]   ps_dup_bit_strm_ent_offset
+*  Bitstream struct to store bitstream state
+*
+* @param[out]   pu4_first_slice_start_offset
+*  first slice offset is returned
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32      ih264e_generate_slice_header
+    (
+        bitstrm_t       *ps_bitstrm,
+        slice_header_t  *ps_slice_hdr,
+        pps_t           *ps_pps,
+        sps_t           *ps_sps
+    );
+
+/**
+******************************************************************************
+*
+* @brief Populates sps structure
+*
+* @par   Description
+*  Populates sps structure for its use in header generation
+*
+* @param[in]   ps_codec
+*  pointer to encoder context
+*
+* @param[out]  ps_sps
+*  pointer to sps params that needs to be populated
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T      ih264e_populate_sps
+        (
+            codec_t *ps_codec,
+            sps_t   *ps_sps
+        );
+
+/**
+******************************************************************************
+*
+* @brief Populates pps structure
+*
+* @par   Description
+*  Populates pps structure for its use in header generation
+*
+* @param[in]   ps_codec
+*  pointer to encoder context
+*
+* @param[out]  ps_pps
+*  pointer to pps params that needs to be populated
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_pps
+        (
+            codec_t *ps_codec,
+            pps_t *ps_pps
+        );
+
+
+/**
+******************************************************************************
+*
+* @brief Populates slice header structure
+*
+* @par   Description
+*  Populates slice header structure for its use in header generation
+*
+* @param[in]  ps_proc
+*  pointer to proc context
+*
+* @param[out]  ps_slice_hdr
+*  pointer to slice header structure that needs to be populated
+*
+* @param[in]  ps_pps
+*  pointer to pps params structure referred by the slice
+*
+* @param[in]   ps_sps
+*  pointer to sps params referred by the pps
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_populate_slice_header
+        (
+            process_ctxt_t *ps_proc,
+            slice_header_t *ps_slice_hdr,
+            pps_t *ps_pps,
+            sps_t *ps_sps
+        );
+
+
+/**
+******************************************************************************
+*
+* @brief inserts FILLER Nal Unit.
+*
+* @par   Description
+*  In constant bit rate rc mode, when the bits generated by the codec is
+*  underflowing the target bit rate, the encoder library inserts filler nal unit.
+*
+* @param[in]    ps_bitstrm
+*  pointer to bitstream context (handle)
+*
+* @param[in]    insert_fill_bytes
+*  Number of fill bytes to be inserted
+*
+* @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_add_filler_nal_unit
+        (
+            bitstrm_t   *ps_bitstrm,
+            WORD32      insert_fill_bytes
+        );
+
+
+#endif //IH264E_ENCODE_HEADER_H_
diff --git a/encoder/ih264e_error.h b/encoder/ih264e_error.h
new file mode 100755
index 0000000..8fe9dac
--- /dev/null
+++ b/encoder/ih264e_error.h
@@ -0,0 +1,229 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_error.h
+*
+* @brief
+*  Definitions related to error handling
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_ERROR_H_
+#define IH264E_ERROR_H_
+
+/**
+******************************************************************************
+*  @brief   Error start codes for various classes of errors in H264 encoder
+******************************************************************************
+*/
+#define SET_ERROR_ON_RETURN(error, severity, out_status, ret_code) \
+    if (error != IH264E_SUCCESS) \
+    {\
+        out_status = ((1 << severity) | error);\
+        return (ret_code);\
+    }
+
+
+/**
+******************************************************************************
+ *  @brief   Extended error code for each error in  H264 encoder
+******************************************************************************
+ */
+typedef enum
+{
+    /* NOTE: the ive error codes ends at 0x80 */
+    IVE_ERR_CODEC_EXTENSIONS                                        = 0x80,
+
+    /* bit stream error start */
+    IH264E_BITSTREAM_ERROR_START                                    = IVE_ERR_CODEC_EXTENSIONS,
+
+    /* codec error start */
+    IH264E_CODEC_ERROR_START                                        = IH264E_BITSTREAM_ERROR_START + 0x10,
+
+    /** no error */
+    IH264E_SUCCESS                                                  = 0,
+
+    /** bitstream init failure, buffer ptr not aligned to WORD (32bits)     */
+    IH264E_BITSTREAM_BUFPTR_ALIGN_FAIL                              = IH264E_BITSTREAM_ERROR_START + 0x01,
+
+    /** bitstream init failure, buf size not multiple of WORD size (32bits) */
+    IH264E_BITSTREAM_BUFSIZE_ALIGN_FAIL                             = IH264E_BITSTREAM_ERROR_START + 0x02,
+
+    /** bitstream runtime failure, buf size limit exceeded during encode    */
+    IH264E_BITSTREAM_BUFFER_OVERFLOW                                = IH264E_BITSTREAM_ERROR_START + 0x03,
+
+    /**width not set within supported limit */
+    IH264E_WIDTH_NOT_SUPPORTED                                      = IH264E_CODEC_ERROR_START + 0x01,
+
+    /**height not set within supported limit */
+    IH264E_HEIGHT_NOT_SUPPORTED                                     = IH264E_CODEC_ERROR_START + 0x02,
+
+    /**Unsupported number of reference pictures passed as an argument */
+    IH264E_NUM_REF_UNSUPPORTED                                      = IH264E_CODEC_ERROR_START + 0x03,
+
+    /**Unsupported number of reference pictures passed as an argument */
+    IH264E_NUM_REORDER_UNSUPPORTED                                  = IH264E_CODEC_ERROR_START + 0x04,
+
+    /**codec level not supported */
+    IH264E_CODEC_LEVEL_NOT_SUPPORTED                                = IH264E_CODEC_ERROR_START + 0x05,
+
+    /**input chroma format not supported */
+    IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED                        = IH264E_CODEC_ERROR_START + 0x06,
+
+    /**recon chroma format not supported */
+    IH264E_RECON_CHROMA_FORMAT_NOT_SUPPORTED                        = IH264E_CODEC_ERROR_START + 0x07,
+
+    /**rate control option configured is not supported */
+    IH264E_RATE_CONTROL_MODE_NOT_SUPPORTED                          = IH264E_CODEC_ERROR_START + 0x08,
+
+    /**frame rate configured is not supported */
+    IH264E_FRAME_RATE_NOT_SUPPORTED                                 = IH264E_CODEC_ERROR_START + 0x09,
+
+    /**bit rate configured is not supported */
+    IH264E_BITRATE_NOT_SUPPORTED                                    = IH264E_CODEC_ERROR_START + 0x0A,
+
+    /**frame rate not supported */
+    IH264E_BFRAMES_NOT_SUPPORTED                                    = IH264E_CODEC_ERROR_START + 0x0B,
+
+    /**content type not supported */
+    IH264E_CONTENT_TYPE_NOT_SUPPORTED                               = IH264E_CODEC_ERROR_START + 0x0C,
+
+    /**unsupported horizontal search range */
+    IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED                    = IH264E_CODEC_ERROR_START + 0x0D,
+
+    /**unsupported vertical search range */
+    IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED                      = IH264E_CODEC_ERROR_START + 0x0E,
+
+    /**Unsupported slice type input */
+    IH264E_SLICE_TYPE_INPUT_INVALID                                 = IH264E_CODEC_ERROR_START + 0x0F,
+
+    /**unsupported architecture type */
+    IH264E_ARCH_TYPE_NOT_SUPPORTED                                  = IH264E_CODEC_ERROR_START + 0x10,
+
+    /**unsupported soc type */
+    IH264E_SOC_TYPE_NOT_SUPPORTED                                   = IH264E_CODEC_ERROR_START + 0x11,
+
+    /**target frame rate exceeds source frame rate */
+    IH264E_TGT_FRAME_RATE_EXCEEDS_SRC_FRAME_RATE                    = IH264E_CODEC_ERROR_START + 0x12,
+
+    /**invalid force frame input */
+    IH264E_INVALID_FORCE_FRAME_INPUT                                = IH264E_CODEC_ERROR_START + 0x13,
+
+    /**invalid me speed preset */
+    IH264E_INVALID_ME_SPEED_PRESET                                  = IH264E_CODEC_ERROR_START + 0x14,
+
+    /**invalid encoder speed preset */
+    IH264E_INVALID_ENC_SPEED_PRESET                                 = IH264E_CODEC_ERROR_START + 0x15,
+
+    /**invalid deblocking param */
+    IH264E_INVALID_DEBLOCKING_TYPE_INPUT                            = IH264E_CODEC_ERROR_START + 0x16,
+
+    /**invalid max qp */
+    IH264E_INVALID_MAX_FRAME_QP                                     = IH264E_CODEC_ERROR_START + 0x17,
+
+    /**invalid min qp */
+    IH264E_INVALID_MIN_FRAME_QP                                     = IH264E_CODEC_ERROR_START + 0x18,
+
+    /**invalid init qp */
+    IH264E_INVALID_INIT_QP                                          = IH264E_CODEC_ERROR_START + 0x19,
+
+    /**version buffer size is insufficient */
+    IH264E_CXA_VERS_BUF_INSUFFICIENT                                = IH264E_CODEC_ERROR_START + 0x1A,
+
+    /**init not done */
+    IH264E_INIT_NOT_DONE                                            = IH264E_CODEC_ERROR_START + 0x1B,
+
+    /**invalid refresh type input */
+    IH264E_INVALID_AIR_MODE                                         = IH264E_CODEC_ERROR_START + 0x1C,
+
+    /** Unsupported air mode */
+    IH264E_INVALID_AIR_REFRESH_PERIOD                               = IH264E_CODEC_ERROR_START + 0x1D,
+
+    /**In sufficient memory allocated for MV Bank */
+    IH264E_INSUFFICIENT_MEM_MVBANK                                  = IH264E_CODEC_ERROR_START + 0x1E,
+
+    /**In sufficient memory allocated for MV Bank */
+    IH264E_INSUFFICIENT_MEM_PICBUF                                  = IH264E_CODEC_ERROR_START + 0x1F,
+
+    /**Buffer manager error */
+    IH264E_BUF_MGR_ERROR                                            = IH264E_CODEC_ERROR_START + 0x20,
+
+    /**No free MV Bank buffer available to store current pic */
+    IH264E_NO_FREE_MVBANK                                           = IH264E_CODEC_ERROR_START + 0x21,
+
+    /**No free picture buffer available to store current pic */
+    IH264E_NO_FREE_PICBUF                                           = IH264E_CODEC_ERROR_START + 0x22,
+
+    /**Invalid encoder operation mode */
+    IH264E_INVALID_ENC_OPERATION_MODE                               = IH264E_CODEC_ERROR_START + 0x23,
+
+    /**Invalid half pel option */
+    IH264E_INVALID_HALFPEL_OPTION                                   = IH264E_CODEC_ERROR_START + 0x24,
+
+    /**Invalid quarter pel option */
+    IH264E_INVALID_QPEL_OPTION                                      = IH264E_CODEC_ERROR_START + 0x25,
+
+    /**Invalid fast sad option */
+    IH264E_INVALID_FAST_SAD_OPTION                                  = IH264E_CODEC_ERROR_START + 0x26,
+
+    /**Invalid intra 4x4 option */
+    IH264E_INVALID_INTRA4x4_OPTION                                  = IH264E_CODEC_ERROR_START + 0x27,
+
+    /**Invalid intra frame interval */
+    IH264E_INVALID_INTRA_FRAME_INTERVAL                             = IH264E_CODEC_ERROR_START + 0x28,
+
+    /**Invalid idr frame interval */
+    IH264E_INVALID_IDR_FRAME_INTERVAL                               = IH264E_CODEC_ERROR_START + 0x29,
+
+    /**Invalid buffer delay */
+    IH264E_INVALID_BUFFER_DELAY                                     = IH264E_CODEC_ERROR_START + 0x2A,
+
+    /**Invalid num cores */
+    IH264E_INVALID_NUM_CORES                                        = IH264E_CODEC_ERROR_START + 0x2B,
+
+    /**profile not supported */
+    IH264E_PROFILE_NOT_SUPPORTED                                    = IH264E_CODEC_ERROR_START + 0x2C,
+
+    /**Unsupported slice type input */
+    IH264E_SLICE_PARAM_INPUT_INVALID                                = IH264E_CODEC_ERROR_START + 0x2D,
+
+    /**Invalid alt ref option */
+    IH264E_INVALID_ALT_REF_OPTION                                   = IH264E_CODEC_ERROR_START + 0x2E,
+
+    /**No free picture buffer available to store recon pic */
+    IH264E_NO_FREE_RECONBUF                                           = IH264E_CODEC_ERROR_START + 0x2F,
+
+    /**max failure error code to ensure enum is 32 bits wide */
+    IH264E_FAIL                                                     = -1,
+
+}IH264E_ERROR_T;
+
+
+#endif /* IH264E_ERROR_H_ */
diff --git a/encoder/ih264e_fmt_conv.c b/encoder/ih264e_fmt_conv.c
new file mode 100755
index 0000000..393d6ca
--- /dev/null
+++ b/encoder/ih264e_fmt_conv.c
@@ -0,0 +1,864 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_fmt_conv.c
+*
+* @brief
+*  Contains functions for format conversion or frame copy of output buffer
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_fmt_conv_420sp_to_rgb565()
+*  - ih264e_fmt_conv_420sp_to_rgba8888()
+*  - ih264e_fmt_conv_420sp_to_420sp()
+*  - ih264e_fmt_conv_420sp_to_420sp_swap_uv()
+*  - ih264e_fmt_conv_420sp_to_420p()
+*  - ih264e_fmt_conv_420p_to_420sp()
+*  - ih264e_fmt_conv_422i_to_420sp()
+*  - ih264e_fmt_conv()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "ih264_buf_mgr.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_fmt_conv.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+void ih264e_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
+                                     UWORD8 *pu1_uv_src,
+                                     UWORD16 *pu2_rgb_dst,
+                                     WORD32 wd,
+                                     WORD32 ht,
+                                     WORD32 src_y_strd,
+                                     WORD32 src_uv_strd,
+                                     WORD32 dst_strd,
+                                     WORD32 is_u_first)
+{
+    WORD16 i2_r, i2_g, i2_b;
+    UWORD32 u4_r, u4_g, u4_b;
+    WORD16 i2_i, i2_j;
+    UWORD8 *pu1_y_src_nxt;
+    UWORD16 *pu2_rgb_dst_NextRow;
+
+    UWORD8 *pu1_u_src, *pu1_v_src;
+
+    if (is_u_first)
+    {
+        pu1_u_src = (UWORD8 *) pu1_uv_src;
+        pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
+    }
+    else
+    {
+        pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
+        pu1_v_src = (UWORD8 *) pu1_uv_src;
+    }
+
+    pu1_y_src_nxt = pu1_y_src + src_y_strd;
+    pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
+
+    for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
+    {
+        for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
+        {
+            i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
+            i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
+                            >> 13;
+            i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
+
+            pu1_u_src += 2;
+            pu1_v_src += 2;
+            /* pixel 0 */
+            /* B */
+            u4_b = CLIP_U8(*pu1_y_src + i2_b);
+            u4_b >>= 3;
+            /* G */
+            u4_g = CLIP_U8(*pu1_y_src + i2_g);
+            u4_g >>= 2;
+            /* R */
+            u4_r = CLIP_U8(*pu1_y_src + i2_r);
+            u4_r >>= 3;
+
+            pu1_y_src++;
+            *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+            /* pixel 1 */
+            /* B */
+            u4_b = CLIP_U8(*pu1_y_src + i2_b);
+            u4_b >>= 3;
+            /* G */
+            u4_g = CLIP_U8(*pu1_y_src + i2_g);
+            u4_g >>= 2;
+            /* R */
+            u4_r = CLIP_U8(*pu1_y_src + i2_r);
+            u4_r >>= 3;
+
+            pu1_y_src++;
+            *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+            /* pixel 2 */
+            /* B */
+            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+            u4_b >>= 3;
+            /* G */
+            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+            u4_g >>= 2;
+            /* R */
+            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+            u4_r >>= 3;
+
+            pu1_y_src_nxt++;
+            *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+            /* pixel 3 */
+            /* B */
+            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+            u4_b >>= 3;
+            /* G */
+            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+            u4_g >>= 2;
+            /* R */
+            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+            u4_r >>= 3;
+
+            pu1_y_src_nxt++;
+            *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+        }
+
+        pu1_u_src = pu1_u_src + src_uv_strd - wd;
+        pu1_v_src = pu1_v_src + src_uv_strd - wd;
+
+        pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
+        pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
+
+        pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
+        pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
+    }
+
+}
+
+void ih264e_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
+                                       UWORD8 *pu1_uv_src,
+                                       UWORD32 *pu4_rgba_dst,
+                                       WORD32 wd,
+                                       WORD32 ht,
+                                       WORD32 src_y_strd,
+                                       WORD32 src_uv_strd,
+                                       WORD32 dst_strd,
+                                       WORD32 is_u_first)
+{
+    WORD16 i2_r, i2_g, i2_b;
+    UWORD32 u4_r, u4_g, u4_b;
+    WORD16 i2_i, i2_j;
+    UWORD8 *pu1_y_src_nxt;
+    UWORD32 *pu4_rgba_dst_NextRow;
+    UWORD8 *pu1_u_src, *pu1_v_src;
+
+    if (is_u_first)
+    {
+        pu1_u_src = (UWORD8 *) pu1_uv_src;
+        pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
+    }
+    else
+    {
+        pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
+        pu1_v_src = (UWORD8 *) pu1_uv_src;
+    }
+
+    pu1_y_src_nxt = pu1_y_src + src_y_strd;
+
+    pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
+
+    for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
+    {
+        for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
+        {
+            i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
+            i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
+                            >> 13;
+            i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
+
+            pu1_u_src += 2;
+            pu1_v_src += 2;
+            /* pixel 0 */
+            /* B */
+            u4_b = CLIP_U8(*pu1_y_src + i2_b);
+            /* G */
+            u4_g = CLIP_U8(*pu1_y_src + i2_g);
+            /* R */
+            u4_r = CLIP_U8(*pu1_y_src + i2_r);
+
+            pu1_y_src++;
+            *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+            /* pixel 1 */
+            /* B */
+            u4_b = CLIP_U8(*pu1_y_src + i2_b);
+            /* G */
+            u4_g = CLIP_U8(*pu1_y_src + i2_g);
+            /* R */
+            u4_r = CLIP_U8(*pu1_y_src + i2_r);
+
+            pu1_y_src++;
+            *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+            /* pixel 2 */
+            /* B */
+            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+            /* G */
+            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+            /* R */
+            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+
+            pu1_y_src_nxt++;
+            *pu4_rgba_dst_NextRow++ =
+                            ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+            /* pixel 3 */
+            /* B */
+            u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+            /* G */
+            u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+            /* R */
+            u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+
+            pu1_y_src_nxt++;
+            *pu4_rgba_dst_NextRow++ =
+                            ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+        }
+
+        pu1_u_src = pu1_u_src + src_uv_strd - wd;
+        pu1_v_src = pu1_v_src + src_uv_strd - wd;
+
+        pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
+        pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
+
+        pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
+        pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
+    }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used for copying a 420SP buffer
+*
+* @par   Description
+*  Function used for copying a 420SP buffer
+*
+* @param[in] pu1_y_src
+*  Input Y pointer
+*
+* @param[in] pu1_uv_src
+*  Input UV pointer (UV is interleaved either in UV or VU format)
+*
+* @param[in] pu1_y_dst
+*  Output Y pointer
+*
+* @param[in] pu1_uv_dst
+*  Output UV pointer (UV is interleaved in the same format as that of input)
+*
+* @param[in] wd
+*  Width
+*
+* @param[in] ht
+*  Height
+*
+* @param[in] src_y_strd
+*  Input Y Stride
+*
+* @param[in] src_uv_strd
+*  Input UV stride
+*
+* @param[in] dst_y_strd
+*  Output Y stride
+*
+* @param[in] dst_uv_strd
+*  Output UV stride
+*
+* @returns None
+*
+* @remarks In case there is a need to perform partial frame copy then
+* by passion appropriate source and destination pointers and appropriate
+* values for wd and ht it can be done
+*
+*******************************************************************************
+*/
+void ih264e_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
+                                    UWORD8 *pu1_uv_src,
+                                    UWORD8 *pu1_y_dst,
+                                    UWORD8 *pu1_uv_dst,
+                                    WORD32 wd,
+                                    WORD32 ht,
+                                    WORD32 src_y_strd,
+                                    WORD32 src_uv_strd,
+                                    WORD32 dst_y_strd,
+                                    WORD32 dst_uv_strd)
+{
+    UWORD8 *pu1_src, *pu1_dst;
+    WORD32 num_rows, num_cols, src_strd, dst_strd;
+    WORD32 i;
+
+    /* copy luma */
+    pu1_src = (UWORD8 *) pu1_y_src;
+    pu1_dst = (UWORD8 *) pu1_y_dst;
+
+    num_rows = ht;
+    num_cols = wd;
+
+    src_strd = src_y_strd;
+    dst_strd = dst_y_strd;
+
+    for (i = 0; i < num_rows; i++)
+    {
+        memcpy(pu1_dst, pu1_src, num_cols);
+        pu1_dst += dst_strd;
+        pu1_src += src_strd;
+    }
+
+    /* copy U and V */
+    pu1_src = (UWORD8 *) pu1_uv_src;
+    pu1_dst = (UWORD8 *) pu1_uv_dst;
+
+    num_rows = ht >> 1;
+    num_cols = wd;
+
+    src_strd = src_uv_strd;
+    dst_strd = dst_uv_strd;
+
+    for (i = 0; i < num_rows; i++)
+    {
+        memcpy(pu1_dst, pu1_src, num_cols);
+        pu1_dst += dst_strd;
+        pu1_src += src_strd;
+    }
+    return;
+}
+
+
+void ih264e_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
+                                            UWORD8 *pu1_uv_src,
+                                            UWORD8 *pu1_y_dst,
+                                            UWORD8 *pu1_uv_dst,
+                                            WORD32 wd,
+                                            WORD32 ht,
+                                            WORD32 src_y_strd,
+                                            WORD32 src_uv_strd,
+                                            WORD32 dst_y_strd,
+                                            WORD32 dst_uv_strd)
+{
+    UWORD8 *pu1_src, *pu1_dst;
+    WORD32 num_rows, num_cols, src_strd, dst_strd;
+    WORD32 i;
+
+    /* copy luma */
+    pu1_src = (UWORD8 *) pu1_y_src;
+    pu1_dst = (UWORD8 *) pu1_y_dst;
+
+    num_rows = ht;
+    num_cols = wd;
+
+    src_strd = src_y_strd;
+    dst_strd = dst_y_strd;
+
+    for (i = 0; i < num_rows; i++)
+    {
+        memcpy(pu1_dst, pu1_src, num_cols);
+        pu1_dst += dst_strd;
+        pu1_src += src_strd;
+    }
+
+    /* copy U and V */
+    pu1_src = (UWORD8 *) pu1_uv_src;
+    pu1_dst = (UWORD8 *) pu1_uv_dst;
+
+    num_rows = ht >> 1;
+    num_cols = wd;
+
+    src_strd = src_uv_strd;
+    dst_strd = dst_uv_strd;
+
+    for (i = 0; i < num_rows; i++)
+    {
+        WORD32 j;
+        for (j = 0; j < num_cols; j += 2)
+        {
+            pu1_dst[j + 0] = pu1_src[j + 1];
+            pu1_dst[j + 1] = pu1_src[j + 0];
+        }
+        pu1_dst += dst_strd;
+        pu1_src += src_strd;
+    }
+    return;
+}
+
+void ih264e_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
+                                   UWORD8 *pu1_uv_src,
+                                   UWORD8 *pu1_y_dst,
+                                   UWORD8 *pu1_u_dst,
+                                   UWORD8 *pu1_v_dst,
+                                   WORD32 wd,
+                                   WORD32 ht,
+                                   WORD32 src_y_strd,
+                                   WORD32 src_uv_strd,
+                                   WORD32 dst_y_strd,
+                                   WORD32 dst_uv_strd,
+                                   WORD32 is_u_first,
+                                   WORD32 disable_luma_copy)
+{
+    UWORD8 *pu1_src, *pu1_dst;
+    UWORD8 *pu1_u_src, *pu1_v_src;
+    WORD32 num_rows, num_cols, src_strd, dst_strd;
+    WORD32 i, j;
+
+    if (0 == disable_luma_copy)
+    {
+        /* copy luma */
+        pu1_src = (UWORD8 *) pu1_y_src;
+        pu1_dst = (UWORD8 *) pu1_y_dst;
+
+        num_rows = ht;
+        num_cols = wd;
+
+        src_strd = src_y_strd;
+        dst_strd = dst_y_strd;
+
+        for (i = 0; i < num_rows; i++)
+        {
+            memcpy(pu1_dst, pu1_src, num_cols);
+            pu1_dst += dst_strd;
+            pu1_src += src_strd;
+        }
+    }
+    /* de-interleave U and V and copy to destination */
+    if (is_u_first)
+    {
+        pu1_u_src = (UWORD8 *) pu1_uv_src;
+        pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
+    }
+    else
+    {
+        pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
+        pu1_v_src = (UWORD8 *) pu1_uv_src;
+    }
+
+    num_rows = ht >> 1;
+    num_cols = wd >> 1;
+
+    src_strd = src_uv_strd;
+    dst_strd = dst_uv_strd;
+
+    for (i = 0; i < num_rows; i++)
+    {
+        for (j = 0; j < num_cols; j++)
+        {
+            pu1_u_dst[j] = pu1_u_src[j * 2];
+            pu1_v_dst[j] = pu1_v_src[j * 2];
+        }
+
+        pu1_u_dst += dst_strd;
+        pu1_v_dst += dst_strd;
+        pu1_u_src += src_strd;
+        pu1_v_src += src_strd;
+    }
+    return;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used to perform color space conversion from 420P to 420SP
+*
+* @par   Description
+* Function used to perform color space conversion from 420P to 420SP
+*
+* @param[in] pu1_y_src
+*  Input Y pointer
+*
+* @param[in] pu1_u_src
+*  Input U pointer
+*
+* @param[in] pu1_v_dst
+*  Input V pointer
+*
+* @param[in] pu1_y_dst
+*  Output Y pointer
+*
+* @param[in] pu1_uv_dst
+*  Output UV pointer
+*
+* @param[in] u4_width
+*  Width
+*
+* @param[in] u4_height
+*  Height
+*
+* @param[in] src_y_strd
+*  Input Y Stride
+*
+* @param[in] src_u_strd
+*  Input U stride
+*
+* @param[in] src_v_strd
+*  Input V stride
+*
+* @param[in] dst_y_strd
+*  Output Y stride
+*
+* @param[in] dst_uv_strd
+*  Output UV stride
+*
+* @param[in] convert_uv_only
+*  Flag to indicate if only UV copy needs to be done
+*
+* @returns none
+*
+* @remarks In case there is a need to perform partial frame copy then
+* by passion appropriate source and destination pointers and appropriate
+* values for wd and ht it can be done
+*
+*******************************************************************************
+*/
+void ih264e_fmt_conv_420p_to_420sp(UWORD8 *pu1_y_src,
+                                   UWORD8 *pu1_u_src,
+                                   UWORD8 *pu1_v_src,
+                                   UWORD8 *pu1_y_dst,
+                                   UWORD8 *pu1_uv_dst,
+                                   UWORD16 u2_height,
+                                   UWORD16 u2_width,
+                                   UWORD16 src_y_strd,
+                                   UWORD16 src_u_strd,
+                                   UWORD16 src_v_strd,
+                                   UWORD16 dst_y_strd,
+                                   UWORD16 dst_uv_strd,
+                                   UWORD32 convert_uv_only)
+{
+    UWORD8 *pu1_src, *pu1_dst;
+    UWORD8 *pu1_src_u, *pu1_src_v;
+    UWORD16 i;
+    UWORD32 u2_width_uv;
+    UWORD32 dest_inc_Y = 0, dest_inc_UV = 0;
+
+    dest_inc_UV = dst_uv_strd;
+
+    if (0 == convert_uv_only)
+    {
+
+        /* Copy Y buffer */
+        pu1_dst = (UWORD8 *) pu1_y_dst;
+        pu1_src = (UWORD8 *) pu1_y_src;
+
+        dest_inc_Y = dst_y_strd;
+
+        for (i = 0; i < u2_height; i++)
+        {
+            memcpy((void *) pu1_dst, (void *) pu1_src, u2_width);
+            pu1_dst += dest_inc_Y;
+            pu1_src += src_y_strd;
+        }
+    }
+
+    /* Interleave Cb and Cr buffers */
+    pu1_src_u = pu1_u_src;
+    pu1_src_v = pu1_v_src;
+    pu1_dst = pu1_uv_dst;
+
+    u2_height = (u2_height + 1) >> 1;
+    u2_width_uv = (u2_width + 1) >> 1;
+    for (i = 0; i < u2_height; i++)
+    {
+        UWORD32 j;
+        for (j = 0; j < u2_width_uv; j++)
+        {
+            *pu1_dst++ = *pu1_src_u++;
+            *pu1_dst++ = *pu1_src_v++;
+        }
+
+        pu1_dst += dest_inc_UV - u2_width;
+        pu1_src_u += src_u_strd - u2_width_uv;
+        pu1_src_v += src_v_strd - u2_width_uv;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used to convert 422 interleaved to 420sp
+*
+* @par   Description
+*  Function used to convert 422 interleaved to 420sp
+*
+* @param[in] pu1_y_buf
+*  Output Y pointer
+*
+* @param[in] pu1_u_buf
+*  Output u pointer
+*
+* @param[in[ pu1_v_buf
+*  Output V pointer
+*
+* @param[in] pu1_422i_buf
+*  Input 422i pointer
+*
+* @param[in] u4_y_width
+*  Width of Y component
+*
+* @param[in] u4_y_height
+*  Height of Y component
+*
+* @param[in] u4_y_stride
+*  Stride of pu1_y_buf
+*
+* @param[in] u4_u_stride
+*  Stride of pu1_u_buf
+*
+* @param[in] u4_v_stride
+*  Stride of pu1_v_buf
+*
+* @param[in] u4_422i_stride
+*  Stride of pu1_422i_buf
+*
+* @returns None
+*
+* @remarks For conversion
+* pu1_v_buf = pu1_u_buf+1
+* u4_u_stride = u4_v_stride
+*
+* The extra parameters are for maintaining API with assembly function
+*
+*******************************************************************************
+*/
+void ih264e_fmt_conv_422i_to_420sp(UWORD8 *pu1_y_buf,
+                                   UWORD8 *pu1_u_buf,
+                                   UWORD8 *pu1_v_buf,
+                                   UWORD8 *pu1_422i_buf,
+                                   WORD32 u4_y_width,
+                                   WORD32 u4_y_height,
+                                   WORD32 u4_y_stride,
+                                   WORD32 u4_u_stride,
+                                   WORD32 u4_v_stride,
+                                   WORD32 u4_422i_stride)
+{
+    WORD32 row, col;
+    UWORD8 *row_even_422 = pu1_422i_buf;
+    UWORD8 *row_odd_422 = row_even_422 + (u4_422i_stride << 1);
+    UWORD8 *row_even_luma = pu1_y_buf;
+    /* Since at the end of loop, we have row_even_luma += (luma_width << 1),
+     * it should be same here right? */
+    UWORD8 *row_odd_luma = row_even_luma + u4_y_stride;
+    UWORD8 *row_cb = pu1_u_buf;
+    UWORD8 *row_cr = pu1_v_buf;
+
+    for (row = 0; row < u4_y_height; row = row + 2)
+    {
+        for (col = 0; col < (u4_y_width << 1); col = col + 4)
+        {
+            UWORD8 cb_even = row_even_422[col];
+            UWORD8 cr_even = row_even_422[col + 2];
+
+            row_cb[col >> 1] = cb_even;
+            row_cr[col >> 1] = cr_even;
+
+            row_even_luma[col >> 1] = row_even_422[col + 1];
+            row_even_luma[(col >> 1) + 1] = row_even_422[col + 3];
+
+            row_odd_luma[col >> 1] = row_odd_422[col + 1];
+            row_odd_luma[(col >> 1) + 1] = row_odd_422[col + 3];
+        }
+
+        row_even_422 += (u4_422i_stride << 2);
+        row_odd_422 += (u4_422i_stride << 2);
+
+        row_even_luma += (u4_y_stride << 1);
+        row_odd_luma += (u4_y_stride << 1);
+
+        row_cb += u4_u_stride;
+        row_cr += u4_v_stride;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used from format conversion or frame copy
+*
+* @par   Description
+* Function used from copying or converting a reference frame to display buffer
+* in non shared mode
+*
+* @param[in] pu1_y_dst
+*  Output Y pointer
+*
+* @param[in] pu1_u_dst
+*  Output U/UV pointer ( UV is interleaved in the same format as that of input)
+*
+* @param[in] pu1_v_dst
+*  Output V pointer ( used in 420P output case)
+*
+* @param[in] u4_dst_y_strd
+*  Stride of destination Y buffer
+*
+* @param[in] u4_dst_u_strd
+*  Stride of destination  U/V buffer
+*
+* @param[in] blocking
+*  To indicate whether format conversion should wait till frame is reconstructed
+*  and then return after complete copy is done. To be set to 1 when called at the
+*  end of frame processing and set to 0 when called between frame processing modules
+*  in order to utilize available MCPS
+*
+* @returns error status
+*
+* @remarks
+* Assumes that the stride of U and V buffers are same.
+* This is correct in most cases
+* If a case comes where this is not true we need to modify the fmt conversion
+* functions called inside also
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec,
+                               pic_buf_t *ps_pic,
+                               UWORD8 *pu1_y_dst,
+                               UWORD8 *pu1_u_dst,
+                               UWORD8 *pu1_v_dst,
+                               UWORD32 u4_dst_y_strd,
+                               UWORD32 u4_dst_uv_strd,
+                               WORD32 cur_row,
+                               WORD32 num_rows)
+{
+    IH264E_ERROR_T ret = IH264E_SUCCESS;
+    UWORD8 *pu1_y_src, *pu1_uv_src;
+    UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
+    UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
+    UWORD16 *pu2_rgb_dst_tmp;
+    UWORD32 *pu4_rgb_dst_tmp;
+    WORD32 is_u_first;
+    UWORD8 *pu1_luma;
+    UWORD8 *pu1_chroma;
+    WORD32 dst_stride, wd;
+
+
+    if (0 == num_rows)
+        return ret;
+
+    pu1_luma = ps_pic->pu1_luma;
+    pu1_chroma = ps_pic->pu1_chroma;
+
+
+    dst_stride = ps_codec->s_cfg.u4_wd;
+    wd = ps_codec->s_cfg.u4_disp_wd;
+    is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0;
+
+    /* In case of 420P output luma copy is disabled for shared mode */
+    {
+        pu1_y_src = pu1_luma + cur_row * ps_codec->i4_rec_strd;
+        pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_rec_strd;
+
+        pu2_rgb_dst_tmp = (UWORD16 *) pu1_y_dst;
+        pu2_rgb_dst_tmp += cur_row * dst_stride;
+        pu4_rgb_dst_tmp = (UWORD32 *) pu1_y_dst;
+        pu4_rgb_dst_tmp += cur_row * dst_stride;
+
+        pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd;
+        pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
+        pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
+        pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd;
+
+        /* If the call is non-blocking and there are no rows to be copied then return */
+        /* In non-shared mode, reference buffers are in 420SP UV format,
+         * if output also is in 420SP_UV, then just copy
+         * if output is in 420SP_VU then swap UV values
+         */
+        if ((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) ||
+                        (IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt))
+        {
+            ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
+                                           pu1_uv_dst_tmp, wd, num_rows,
+                                           ps_codec->i4_rec_strd,
+                                           ps_codec->i4_rec_strd, u4_dst_y_strd,
+                                           u4_dst_uv_strd);
+        }
+        else if (IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt)
+        {
+            ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
+                                          pu1_u_dst_tmp, pu1_v_dst_tmp, wd,
+                                          num_rows, ps_codec->i4_rec_strd,
+                                          ps_codec->i4_rec_strd, u4_dst_y_strd,
+                                          u4_dst_uv_strd, is_u_first, 0);
+        }
+    }
+    return(ret);
+}
+
diff --git a/encoder/ih264e_fmt_conv.h b/encoder/ih264e_fmt_conv.h
new file mode 100755
index 0000000..6b33bf0
--- /dev/null
+++ b/encoder/ih264e_fmt_conv.h
@@ -0,0 +1,142 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_fmt_conv.h
+*
+* @brief
+*  The file contains extern declarations of color space conversion routines
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_FMT_CONV_H_
+#define IH264E_FMT_CONV_H_
+
+#define COEFF1          13073
+#define COEFF2          -3207
+#define COEFF3          -6664
+#define COEFF4          16530
+
+IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec,
+                               pic_buf_t *ps_pic,
+                               UWORD8 *pu1_y_dst,
+                               UWORD8 *pu1_u_dst,
+                               UWORD8 *pu1_v_dst,
+                               UWORD32 u4_dst_y_strd,
+                               UWORD32 u4_dst_uv_strd,
+                               WORD32 cur_row,
+                               WORD32 num_rows);
+
+typedef void ih264e_fmt_conv_420sp_to_rgba8888_ft(UWORD8 *pu1_y_src,
+                                                  UWORD8 *pu1_uv_src,
+                                                  UWORD32 *pu4_rgba_dst,
+                                                  WORD32 wd,
+                                                  WORD32 ht,
+                                                  WORD32 src_y_strd,
+                                                  WORD32 src_uv_strd,
+                                                  WORD32 dst_strd,
+                                                  WORD32 is_u_first);
+
+typedef void ih264e_fmt_conv_420sp_to_rgb565_ft(UWORD8 *pu1_y_src,
+                                                UWORD8 *pu1_uv_src,
+                                                UWORD16 *pu2_rgb_dst,
+                                                WORD32 wd,
+                                                WORD32 ht,
+                                                WORD32 src_y_strd,
+                                                WORD32 src_uv_strd,
+                                                WORD32 dst_strd,
+                                                WORD32 is_u_first);
+
+typedef void ih264e_fmt_conv_420sp_to_420sp_ft(UWORD8 *pu1_y_src,
+                                               UWORD8 *pu1_uv_src,
+                                               UWORD8 *pu1_y_dst,
+                                               UWORD8 *pu1_uv_dst,
+                                               WORD32 wd,
+                                               WORD32 ht,
+                                               WORD32 src_y_strd,
+                                               WORD32 src_uv_strd,
+                                               WORD32 dst_y_strd,
+                                               WORD32 dst_uv_strd);
+
+typedef void ih264e_fmt_conv_420sp_to_420p_ft(UWORD8 *pu1_y_src,
+                                              UWORD8 *pu1_uv_src,
+                                              UWORD8 *pu1_y_dst,
+                                              UWORD8 *pu1_u_dst,
+                                              UWORD8 *pu1_v_dst,
+                                              WORD32 wd,
+                                              WORD32 ht,
+                                              WORD32 src_y_strd,
+                                              WORD32 src_uv_strd,
+                                              WORD32 dst_y_strd,
+                                              WORD32 dst_uv_strd,
+                                              WORD32 is_u_first,
+                                              WORD32 disable_luma_copy);
+
+typedef void ih264e_fmt_conv_420p_to_420sp_ft(UWORD8 *pu1_y_src, UWORD8 *pu1_u_src, UWORD8 *pu1_v_src,
+                                              UWORD8 *pu1_y_dst, UWORD8 *pu1_uv_dst,
+                                              UWORD16 u2_height, UWORD16 u2_width, UWORD16 src_y_strd,
+                                              UWORD16 src_u_strd, UWORD16 src_v_strd,
+                                              UWORD16 dst_y_strd, UWORD16 dst_uv_strd,
+                                              UWORD32 convert_uv_only);
+
+typedef void ih264e_fmt_conv_422i_to_420sp_ft(UWORD8 *pu1_y_buf,UWORD8 *pu1_u_buf,UWORD8 *pu1_v_buf,
+                                              UWORD8 *pu1_422i_buf,
+                                              WORD32 u4_y_width,WORD32 u4_y_height,
+                                              WORD32 u4_y_stride,WORD32 u4_u_stride,WORD32 u4_v_stride,
+                                              WORD32 u4_422i_stride);
+
+
+/* C function declarations */
+ih264e_fmt_conv_420sp_to_rgba8888_ft    ih264e_fmt_conv_420sp_to_rgba8888;
+ih264e_fmt_conv_420sp_to_rgb565_ft      ih264e_fmt_conv_420sp_to_rgb565;
+ih264e_fmt_conv_420sp_to_420sp_ft       ih264e_fmt_conv_420sp_to_420sp;
+ih264e_fmt_conv_420sp_to_420p_ft        ih264e_fmt_conv_420sp_to_420p;
+ih264e_fmt_conv_420p_to_420sp_ft        ih264e_fmt_conv_420p_to_420sp;
+ih264e_fmt_conv_422i_to_420sp_ft        ih264e_fmt_conv_422i_to_420sp;
+
+/* A9Q function declarations */
+ih264e_fmt_conv_420sp_to_rgba8888_ft    ih264e_fmt_conv_420sp_to_rgba8888_a9q;
+ih264e_fmt_conv_420sp_to_420sp_ft       ih264e_fmt_conv_420sp_to_420sp_a9q;
+ih264e_fmt_conv_420sp_to_420p_ft        ih264e_fmt_conv_420sp_to_420p_a9q;
+ih264e_fmt_conv_420p_to_420sp_ft        ih264e_fmt_conv_420p_to_420sp_a9q;
+ih264e_fmt_conv_422i_to_420sp_ft        ih264e_fmt_conv_422i_to_420sp_a9q;
+
+
+/* A9A function declarations */
+ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888_a9a;
+ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp_a9a;
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_a9a;
+
+/* SSSe31 function declarations */
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_ssse31;
+
+/* SSE4 function declarations */
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_sse42;
+
+#endif /* IH264E_FMT_CONV_H_ */
diff --git a/encoder/ih264e_function_selector_generic.c b/encoder/ih264e_function_selector_generic.c
new file mode 100755
index 0000000..65f943a
--- /dev/null
+++ b/encoder/ih264e_function_selector_generic.c
@@ -0,0 +1,259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_function_selector_generic.c
+*
+* @brief
+*  Contains functions to initialize function pointers of codec context
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_init_function_ptr_generic
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec)
+{
+    WORD32 i = 0;
+
+    /* curr proc ctxt */
+    process_ctxt_t *ps_proc = NULL;
+    me_ctxt_t *ps_me_ctxt = NULL;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 16x16 */
+    ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert;
+    ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz;
+    ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc;
+    ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 4x4 */
+    ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert;
+    ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz;
+    ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc;
+    ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl;
+    ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr;
+    ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r;
+    ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d;
+    ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l;
+    ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u;
+
+    /* Init function pointers for intra pred leaf level functions luma
+     * Intra 8x8 */
+    ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert;
+    ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc;
+    ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl;
+    ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr;
+    ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r;
+    ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d;
+    ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l;
+    ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u;
+
+    /* Init function pointers for intra pred leaf level functions chroma
+     * Intra 8x8 */
+    ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc;
+    ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz;
+    ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert;
+    ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane;
+
+    /* Init luma forward transform fn ptr */
+    ps_codec->pf_resi_trans_quant_8x8            = ih264_resi_trans_quant_8x8;
+    ps_codec->pf_resi_trans_quant_4x4            = ih264_resi_trans_quant_4x4;
+    ps_codec->pf_resi_trans_quant_chroma_4x4     = ih264_resi_trans_quant_chroma_4x4;
+    ps_codec->pf_hadamard_quant_4x4              = ih264_hadamard_quant_4x4;
+    ps_codec->pf_hadamard_quant_2x2_uv           = ih264_hadamard_quant_2x2_uv;
+
+    /* Init inverse transform fn ptr */
+    ps_codec->pf_iquant_itrans_recon_8x8          = ih264_iquant_itrans_recon_8x8;
+    ps_codec->pf_iquant_itrans_recon_4x4          = ih264_iquant_itrans_recon_4x4;
+    ps_codec->pf_iquant_itrans_recon_4x4_dc       = ih264_iquant_itrans_recon_4x4_dc;
+    ps_codec->pf_iquant_itrans_recon_chroma_4x4   = ih264_iquant_itrans_recon_chroma_4x4;
+    ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc;
+
+    ps_codec->pf_ihadamard_scaling_4x4            = ih264_ihadamard_scaling_4x4;
+    ps_codec->pf_ihadamard_scaling_2x2_uv         = ih264_ihadamard_scaling_2x2_uv;
+    ps_codec->pf_interleave_copy                  = ih264_interleave_copy;
+
+    /* Init fn ptr luma core coding */
+    ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+    ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+    ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+    /* Init fn ptr chroma core coding */
+    ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+    ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+    /* Init fn ptr luma deblocking */
+    ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4;
+    ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4;
+    ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4;
+    ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4;
+
+    /* Init fn ptr chroma deblocking */
+    ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4;
+    ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4;
+    ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4;
+    ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4;
+
+    /* write mb syntax layer */
+    ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
+    ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+
+    /* Padding Functions */
+    ps_codec->pf_pad_top = ih264_pad_top;
+    ps_codec->pf_pad_bottom = ih264_pad_bottom;
+    ps_codec->pf_pad_left_luma = ih264_pad_left_luma;
+    ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma;
+    ps_codec->pf_pad_right_luma = ih264_pad_right_luma;
+    ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma;
+
+    /* Inter pred leaf level functions */
+    ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy;
+    ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz;
+    ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert;
+    ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
+    ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma;
+
+    /* sad me level functions */
+    ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16;
+    ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
+    ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8;
+
+    /* memory handling operations */
+    ps_codec->pf_mem_cpy = ih264_memcpy;
+    ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8;
+    ps_codec->pf_mem_set = ih264_memset;
+    ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8;
+
+    /* sad me level functions */
+    for (i = 0; i < (MAX_PROCESS_CTXT); i++)
+    {
+        ps_proc = &ps_codec->as_process[i];
+
+        ps_me_ctxt = &ps_proc->s_me_ctxt;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
+        ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8;
+        ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog;
+        ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog;
+        ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog;
+        ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16;
+        ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter;
+    }
+
+    /* intra mode eval -encoder level function */
+    ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes;
+    ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes;
+    ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
+
+    /* csc */
+    ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
+    ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
+
+    /* Halp pel generation function - encoder level*/
+    ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz;
+    ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert;
+
+    return;
+}
diff --git a/encoder/ih264e_globals.c b/encoder/ih264e_globals.c
new file mode 100755
index 0000000..e2b46a4
--- /dev/null
+++ b/encoder/ih264e_globals.c
@@ -0,0 +1,261 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264e_globals.c
+*
+* @brief
+*  Contains definitions of global variables used across the encoder
+*
+* @author
+*  ittiam
+*
+* @par List of functions
+*
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+
+/*****************************************************************************/
+/* Extern global definitions                                                 */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief  lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input  : qp
+* output : lambda
+* @remarks lambda = 0.85 * pow(2, (qp - 12)/3), when SSD is used as metric
+* for computing distortion (Bit rate estimation for cost function of H.264/
+* AVC by Mohd Golam Sarwer et. al.)  If the use of distortion metric is SAD
+* rather than SSD in the stage of encoding, consider sqrt(lambda) simply to
+* adjust lambda for the lack of squaring operation in the error computation
+* (from rate distortion optimization for video compression by sullivan).
+******************************************************************************
+*/
+const UWORD16 gu2_qp_lambda[52]=
+{
+       0,      0,      0,      0,      0,      0,      0,      1,
+       1,      1,      1,      1,      1,      1,      1,      1,
+       1,      2,      2,      2,      2,      3,      3,      3,
+       4,      4,      5,      5,      6,      7,      7,      8,
+       9,     10,     12,     13,     15,     17,     19,     21,
+      23,     26,     30,     33,     37,     42,     47,     53,
+      59,     66,     74,     83,
+};
+
+/**
+******************************************************************************
+* @brief  Lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input  : qp
+* output : lambda
+* @remarks lambda = pow(2, (qp - 12)/6)
+******************************************************************************
+*/
+const UWORD8 gu1_qp0[52]=
+{
+       0,      0,      0,      0,      0,      0,      0,      0,
+       0,      0,      0,      0,      1,      1,      1,      1,
+       2,      2,      2,      2,      3,      3,      3,      4,
+       4,      4,      5,      6,      6,      7,      8,      9,
+      10,     11,     13,     14,     16,     18,     20,     23,
+      25,     29,     32,     36,     40,     45,     51,     57,
+      64,     72,     81,     91,
+};
+
+/**
+******************************************************************************
+* @brief  unsigned exp. goulumb codelengths to assign cost to a coefficient of
+* mb types.
+* input  : Integer
+* output : codelength
+* @remarks Refer sec. 9-1 in h264 specification
+******************************************************************************
+*/
+const UWORD8 u1_uev_codelength[32] =
+{
+     1,      3,      3,      5,      5,      5,      5,      7,
+     7,      7,      7,      7,      7,      7,      7,      9,
+     9,      9,      9,      9,      9,      9,      9,      9,
+     9,      9,      9,      9,      9,      9,      9,      11,
+};
+
+
+/**
+******************************************************************************
+* @brief  Look up table to assign cost to a coefficient of a residual block
+* basing on its surrounding coefficients
+* input  : Numbers of T1's
+* output : coeff_cost
+* @remarks Refer Section 2.3 Elimination of single coefficients in inter
+* macroblocks in document JVT-O079
+******************************************************************************
+*/
+const UWORD8 gu1_coeff_cost[6] =
+{
+     3, 2, 2, 1, 1, 1
+};
+
+/**
+******************************************************************************
+* @brief  Indices map to raster scan for luma 4x4 block
+* input  : scan index
+* output : scan location
+* @remarks None
+******************************************************************************
+*/
+const UWORD8 gu1_luma_scan_order[16] =
+{
+     0,  1,  4,  8,  5,  2,  3,  6,  9,  12, 13, 10, 7,  11, 14, 15
+};
+
+/**
+******************************************************************************
+* @brief  Indices map to raster scan for chroma AC block
+* input  : scan index
+* output : scan location
+* @remarks None
+******************************************************************************
+*/
+const UWORD8 gu1_chroma_scan_order[15] =
+{
+     1,  4,  8,  5,  2,  3,  6,  9,  12, 13, 10, 7,  11, 14, 15
+};
+
+/**
+******************************************************************************
+* @brief  Indices map to raster scan for luma 4x4 dc block
+* input  : scan index
+* output : scan location
+* @remarks : None
+******************************************************************************
+*/
+const UWORD8 gu1_luma_scan_order_dc[16] =
+{
+     0, 1,  4,  8,  5,  2,  3,  6,  9,  12, 13, 10, 7,  11, 14, 15
+};
+
+/**
+******************************************************************************
+* @brief  Indices map to raster scan for chroma 2x2 dc block
+* input  : scan index
+* output : scan location
+* @remarks None
+******************************************************************************
+*/
+const UWORD8 gu1_chroma_scan_order_dc[4] =
+{
+     0, 1,  2,  3
+};
+
+/**
+******************************************************************************
+* @brief  choice of motion vectors to be used during mv prediction
+* input  : formatted reference idx comparison metric
+* output : mv prediction has to be median or a simple straight forward selec
+* tion from neighbors.
+* @remarks If only one of the candidate blocks has a reference frame equal to
+    the current block then use the same block as the final predictor. A simple
+    look up table to assist this mv prediction condition
+******************************************************************************
+*/
+const WORD8 gi1_mv_pred_condition[8] =
+{
+     -1,    0,    1,    -1,    2,    -1,    -1,    -1
+};
+
+/**
+******************************************************************************
+* @brief  maps the h264 quantizer to the mpeg2 quantizer scale
+* input  : h264 qp
+* output : equivalent mpeg 2 qp
+* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1]
+******************************************************************************
+*/
+const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM] =
+{
+       1,      1,      1,      1,      1,      1,      1,      1,
+       2,      2,      2,      2,      3,      3,      3,      4,
+       4,      4,      5,      6,      6,      7,      8,      9,
+      10,     11,     13,     14,     16,     18,     20,     23,
+      25,     29,     32,     36,     40,     45,     51,     57,
+      64,     72,     81,     91,    102,    114,    128,    144,
+     161,    181,    203,    228,
+};
+
+/**
+******************************************************************************
+* @brief  maps the mpeg2 quantizer to the h264 quantizer scale
+* input  : mpeg2 qp
+* output : equivalent h264qp
+* @remarks  MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32
+*      k = 0 (for intra)  k = sign(QFij)
+*   H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6)
+*
+*   Excluding the portion of R(QP%6,i,j) that is due to
+*   the DCT scale factors, the 6 entries after dividing by 64 (2^6)
+*   correspond to dequant values of
+*   2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375.
+*   (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc)
+*
+*   Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2
+*   (the actual mapping seems to be to MPEG2 qscale of 2.5),
+*   and the fact that the effective h264 quantizer changes by
+*   a factor of 2 for every 6 steps, the following mapping is
+*   obtained:
+*    h264qp = 6*(log2(mpeg2qscale/2)) + 12.
+*
+*   Note that the quant matrix entry assumed for the above
+*   equality is 16. Hence when the mpeg2 quant matrix entries
+*   are all 16, this lookup can be used as is (which is the
+*   default inter quant matrix in mpeg-2).
+******************************************************************************
+*/
+const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM] =
+{
+       0,      4,     10,     14,     16,     18,     20,     21,     22,     23,     24,     25,     26,     26,     27,     27,
+      28,     29,     29,     29,     30,     30,     31,     31,     32,     32,     32,     33,     33,     33,     33,     34,
+      34,     34,     35,     35,     35,     35,     35,     36,     36,     36,     36,     37,     37,     37,     37,     37,
+      38,     38,     38,     38,     38,     38,     39,     39,     39,     39,     39,     39,     39,     40,     40,     40,
+      40,     40,     40,     40,     41,     41,     41,     41,     41,     41,     41,     41,     41,     42,     42,     42,
+      42,     42,     42,     42,     42,     42,     43,     43,     43,     43,     43,     43,     43,     43,     43,     43,
+      44,     44,     44,     44,     44,     44,     44,     44,     44,     44,     44,     44,     45,     45,     45,     45,
+      45,     45,     45,     45,     45,     45,     45,     45,     45,     46,     46,     46,     46,     46,     46,     46,
+      46,     46,     46,     46,     46,     46,     46,     46,     47,     47,     47,     47,     47,     47,     47,     47,
+      47,     47,     47,     47,     47,     47,     47,     47,     47,     48,     48,     48,     48,     48,     48,     48,
+      48,     48,     48,     48,     48,     48,     48,     48,     48,     48,     48,     49,     49,     49,     49,     49,
+      49,     49,     49,     49,     49,     49,     49,     49,     49,     49,     49,     49,     49,     49,     49,     49,
+};
+
diff --git a/encoder/ih264e_globals.h b/encoder/ih264e_globals.h
new file mode 100755
index 0000000..4c3de23
--- /dev/null
+++ b/encoder/ih264e_globals.h
@@ -0,0 +1,192 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_globals.h
+*
+* @brief
+*  Contains declarations of global variables for H264 encoder
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_GLOBALS_H_
+#define IH264E_GLOBALS_H_
+
+
+/*****************************************************************************/
+/* Extern global declarations                                                */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief  Computes the lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input  : qp
+* output : lambda
+* @remarks lambda = 0.85 * pow(2, (qp - 12)/3), when SSD is used as metric
+* for computing distortion (Bit rate estimation for cost function of H.264/
+* AVC by Mohd Golam Sarwer et. al.)  If the use of distortion metric is SAD
+* rather than SSD in the stage of encoding, consider sqrt(lambda) simply to
+* adjust lambda for the lack of squaring operation in the error computation
+* (from rate distortion optimization for video compression by sullivan).
+******************************************************************************
+*/
+extern const UWORD16 gu2_qp_lambda[52];
+
+/**
+******************************************************************************
+* @brief  Computes the lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input  : qp
+* output : lambda
+* @remarks lambda = pow(2, (qp - 12)/6). When Lagrangian multiplier is disabled
+* the same constant is used across mode decision and mv decisions.
+******************************************************************************
+*/
+extern const UWORD8 gu1_qp0[52];
+
+/**
+******************************************************************************
+* @brief  unsigned exp. goulumb codelengths to assign cost to a coefficient of
+* mb types.
+* input  : Integer
+* output : codelength
+* @remarks Refer sec. 9-1 in h264 specification
+******************************************************************************
+*/
+extern const UWORD8 u1_uev_codelength[32];
+
+/**
+******************************************************************************
+* @brief  Look up table to assign cost to a coefficient of a residual block
+* basing on its surrounding coefficients
+* input  : Numbers of T1's
+* output : coeff_cost
+* @remarks Refer Section 2.3 Elimination of single coefficients in inter
+* macroblocks in document JVT-O079
+******************************************************************************
+*/
+extern const UWORD8 gu1_coeff_cost[6];
+
+/**
+******************************************************************************
+* @brief  Indices map to raster scan for luma 4x4 block
+* input  : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 16
+******************************************************************************
+*/
+extern const UWORD8 gu1_luma_scan_order[16];
+
+/**
+******************************************************************************
+* @brief  Indices map to raster scan for chroma AC block
+* input  : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 32
+******************************************************************************
+*/
+extern const UWORD8 gu1_chroma_scan_order[15];
+
+/**
+******************************************************************************
+* @brief  Indices map to raster scan for luma 4x4 dc block
+* input  : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 16
+******************************************************************************
+*/
+extern const UWORD8 gu1_luma_scan_order_dc[16];
+
+/**
+******************************************************************************
+* @brief  Indices map to raster scan for chroma 2x2 dc block
+* input  : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 16
+******************************************************************************
+*/
+extern const UWORD8 gu1_chroma_scan_order_dc[4];
+
+
+/**
+******************************************************************************
+* @brief  choice of motion vectors to be used during mv prediction
+* input  : formatted reference idx comparison metric
+* output : mv prediction has to be median or a simple straight forward selec
+* tion from neighbors.
+* @remarks If only one of the candidate blocks has a reference frame equal to
+    the current block then use the same block as the final predictor. A simple
+    look up table to assist this mv prediction condition
+******************************************************************************
+*/
+extern const WORD8 gi1_mv_pred_condition[8];
+
+
+/**
+******************************************************************************
+* @brief  maps the h264 quantizer to the mpeg2 quantizer scale
+* input  : h264 qp
+* output : eqvivalent mpeg 2 qp
+* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1]
+******************************************************************************
+*/
+extern const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM];
+
+/**
+******************************************************************************
+* @brief  maps the mpeg2 quantizer to the h264 quantizer scale
+* input  : mpeg2 qp
+* output : eqvivalent h264q p
+* @remarks  MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32
+*      k = 0 (for intra)  k = sign(QFij)
+*   H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6)
+*
+*   Excluding the portion of R(QP%6,i,j) that is due to
+*   the DCT scale factors, the 6 entries after dividing by 64 (2^6)
+*   correspond to dequant values of
+*   2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375.
+*   (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc)
+*
+*   Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2
+*   (the actual mapping seems to be to MPEG2 qscale of 2.5),
+*   and the fact that the effective h264 quantizer changes by
+*   a factor of 2 for every 6 steps, the following mapping is
+*   obtained:
+*    h264qp = 6*(log2(mpeg2qscale/2)) + 12.
+*
+*   Note that the quant matrix entry assumed for the above
+*   equality is 16. Hence when the mpeg2 quant matrix entries
+*   are all 16, this lookup can be used as is (which is the
+*   default inter quant matrix in mpeg-2).
+******************************************************************************
+*/
+extern const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM];
+
+
+#endif /* IH264E_GLOBALS_H_ */
diff --git a/encoder/ih264e_half_pel.c b/encoder/ih264e_half_pel.c
new file mode 100755
index 0000000..cb475a1
--- /dev/null
+++ b/encoder/ih264e_half_pel.c
@@ -0,0 +1,226 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_half_pel.c
+*
+* @brief
+*  This file contains functions that are used for computing subpixel planes
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_sixtapfilter_horz
+*  - ih264e_sixtap_filter_2dvh_vert
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_defs.h"
+#include "ih264e_half_pel.h"
+#include "ih264_macros.h"
+#include "ih264e_half_pel.h"
+#include "ih264e_debug.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Interprediction luma filter for horizontal input (Filter run for width = 17
+*  and height =16)
+*
+* @par Description:
+*  Applies a 6 tap horizontal filter .The output is  clipped to 8 bits
+*  sec 8.4.2.2.1 titled "Luma sample interpolation process"
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
+                              UWORD8 *pu1_dst,
+                              WORD32 src_strd,
+                              WORD32 dst_strd)
+{
+    UWORD32  u4_i, u4_j;
+    UWORD32  u4_w, u4_h;
+
+    /* width and height of interpolation */
+    u4_w = HP_PL_WD;
+    u4_h = MB_SIZE;
+
+    pu1_src -= 2;
+
+    for (u4_i = 0; u4_i < u4_h; u4_i++)
+    {
+        for (u4_j = 0; u4_j < u4_w; u4_j++, pu1_dst++, pu1_src++)
+        {
+            WORD16 i16_temp;
+
+            i16_temp = ih264_g_six_tap[0] * (*pu1_src + pu1_src[5])
+                            + ih264_g_six_tap[1] * (pu1_src[1] + pu1_src[4])
+                            + ih264_g_six_tap[2] * (pu1_src[2] + pu1_src[3]);
+
+            i16_temp = (i16_temp + 16) >> 5;
+
+            *pu1_dst = CLIP_U8(i16_temp);
+        }
+        pu1_src += src_strd - u4_w;
+        pu1_dst += dst_strd - u4_w;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function implements a two stage cascaded six tap filter. It applies
+*  the six tap filter in the vertical direction on the predictor values,
+*  followed by applying the same filter in the horizontal direction on the
+*  output of the first stage. The six tap filtering operation is described in
+*  sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for
+*  width = 17 and height = 17)
+*
+* @par Description:
+*  The function interpolates the predictors first in the vertical direction and
+*  then in the horizontal direction to output the (1/2,1/2). The output of the
+*  first stage of the filter is stored in the buffer pointed to by
+*  pi16_pred1(only in C) in 16 bit precision.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst1
+*  UWORD8 pointer to the destination (Horizontal filtered output)
+*
+* @param[out] pu1_dst2
+*  UWORD8 pointer to the destination (output after applying vertical filter to
+*  the intermediate horizontal output)
+*
+* @param[in] src_strd
+*  integer source stride
+
+* @param[in] dst_strd
+*  integer destination stride of pu1_dst
+*
+* @param[in] pi4_pred
+*  Pointer to 16bit intermediate buffer (used only in c)
+*
+* @param[in] i4_pred_strd
+*  integer destination stride of pi16_pred1
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
+                                    UWORD8 *pu1_dst1,
+                                    UWORD8 *pu1_dst2,
+                                    WORD32 src_strd,
+                                    WORD32 dst_strd,
+                                    WORD32 *pi4_pred,
+                                    WORD32 i4_pred_strd)
+{
+    WORD32 row, col;
+    WORD32 tmp;
+    WORD32 *pi4_pred_temp = pi4_pred;
+    WORD32 ht = HP_PL_HT, wd = HP_PL_WD;
+
+    for (row = 0; row < ht; row++)
+    {
+        for (col = -2; col < wd + 3; col++)
+        {
+            tmp = ih264_g_six_tap[0] * (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) +
+                            ih264_g_six_tap[1] * (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) +
+                            ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1 * src_strd]);
+
+            pi4_pred_temp[col] = tmp;
+        }
+
+        pu1_src += src_strd;
+        pi4_pred_temp += i4_pred_strd;
+    }
+
+    for (row = 0; row < ht; row++)
+    {
+        for (col = 0; col < wd; col++)
+        {
+            tmp = (pi4_pred[col - 2] + pi4_pred[col + 3]) +
+                            ih264_g_six_tap[1] * (pi4_pred[col - 1] + pi4_pred[col + 2]) +
+                            ih264_g_six_tap[2] * (pi4_pred[col] + pi4_pred[col + 1]);
+
+            tmp = (tmp + 512) >> 10;
+
+            pu1_dst2[col] = CLIP_U8(tmp);
+            pu1_dst1[col] = CLIP_U8((pi4_pred[col] + 16) >> 5);
+        }
+        pi4_pred += i4_pred_strd;
+        pu1_dst2 += dst_strd;
+        pu1_dst1 += dst_strd;
+    }
+}
+
diff --git a/encoder/ih264e_half_pel.h b/encoder/ih264e_half_pel.h
new file mode 100755
index 0000000..92bd37f
--- /dev/null
+++ b/encoder/ih264e_half_pel.h
@@ -0,0 +1,162 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_half_pel.h
+ *
+ * @brief
+ *  Contains extern declarations of subpel functions used by the encoder
+ *
+ * @author
+ *  ittiam
+ *
+ * @remarks
+ *  none
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264E_HALF_PEL_H_
+#define IH264E_HALF_PEL_H_
+
+/*****************************************************************************/
+/* Global constants                                                          */
+/*****************************************************************************/
+/*
+ * Dimensions of subpel plane buffers
+ */
+#define HP_PL_WD  MB_SIZE + 1
+#define HP_PL_HT  MB_SIZE + 1
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Interprediction luma filter for horizontal input (Filter run for width = 17
+*  and height =16)
+*
+* @par Description:
+*  Applies a 6 tap horizontal filter .The output is  clipped to 8 bits
+*  sec 8.4.2.2.1 titled "Luma sample interpolation process"
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+typedef void ih264e_sixtapfilter_horz_ft(UWORD8 *pu1_src,
+                                         UWORD8 *pu1_dst,
+                                         WORD32 src_strd,
+                                         WORD32 dst_strd);
+
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz;
+
+/* arm assembly */
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_a9q;
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_av8;
+
+/* x86 intrinsics*/
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_ssse3;
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function implements a two stage cascaded six tap filter. It applies
+*  the six tap filter in the vertical direction on the predictor values,
+*  followed by applying the same filter in the horizontal direction on the
+*  output of the first stage. The six tap filtering operation is described in
+*  sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for
+*  width = 17 and height = 17)
+*
+* @par Description:
+*  The function interpolates the predictors first in the vertical direction and
+*  then in the horizontal direction to output the (1/2,1/2). The output of the
+*  first stage of the filter is stored in the buffer pointed to by
+*  pi16_pred1(only in C) in 16 bit precision.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst1
+*  UWORD8 pointer to the destination (Horizontal filtered output)
+*
+* @param[out] pu1_dst2
+*  UWORD8 pointer to the destination (output after applying vertical filter to
+*  the intermediate horizontal output)
+*
+* @param[in] src_strd
+*  integer source stride
+
+* @param[in] dst_strd
+*  integer destination stride of pu1_dst
+*
+* @param[in] pi4_pred
+*  Pointer to 16bit intermediate buffer (used only in c)
+*
+* @param[in] i4_pred_strd
+*  integer destination stride of pi16_pred1
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+typedef void ih264e_sixtap_filter_2dvh_vert_ft(UWORD8 *pu1_src,
+                                               UWORD8 *pu1_dst1,
+                                               UWORD8 *pu1_dst2,
+                                               WORD32 src_strd,
+                                               WORD32 dst_strd,
+                                               WORD32 *pi4_pred,
+                                               WORD32 i4_pred_strd);
+
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert;
+
+/* assembly */
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_a9q;
+
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_av8;
+
+/* x86 intrinsics */
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_ssse3;
+
+#endif /* IH264E_HALF_PEL_H_ */
diff --git a/encoder/ih264e_intra_modes_eval.c b/encoder/ih264e_intra_modes_eval.c
new file mode 100755
index 0000000..b41d717
--- /dev/null
+++ b/encoder/ih264e_intra_modes_eval.c
@@ -0,0 +1,2296 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_intra_modes_eval.c
+*
+* @brief
+*  This file contains definitions of routines that perform rate distortion
+*  analysis on a macroblock if they are to be coded as intra.
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_derive_neighbor_availability_of_mbs()
+*  - ih264e_derive_ngbr_avbl_of_mb_partitions()
+*  - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
+*  - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
+*  - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
+*  - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton()
+*  - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
+*  - ih264e_evaluate_intra16x16_modes()
+*  - ih264e_evaluate_intra4x4_modes()
+*  - ih264e_evaluate_intra_chroma_modes()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264e_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ime_distortion_metrics.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_globals.h"
+#include "ime_platform_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+*  derivation process for macroblock availability
+*
+* @par   Description
+*  Calculates the availability of the left, top, topright and topleft macroblocks.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to proc context (handle)
+*
+* @remarks Based on section 6.4.5 in H264 spec
+*
+* @return  none
+*
+******************************************************************************
+*/
+void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc)
+{
+    UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx;
+    UWORD8 *pu1_slice_idx_b;
+    UWORD8 *pu1_slice_idx_a;
+    UWORD8 *pu1_slice_idx_c;
+    UWORD8 *pu1_slice_idx_d;
+    block_neighbors_t *ps_ngbr_avbl;
+    WORD32 i4_mb_x, i4_mb_y;
+    WORD32 i4_wd_mbs;
+
+    i4_mb_x = ps_proc->i4_mb_x;
+    i4_mb_y = ps_proc->i4_mb_y;
+
+    i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+    pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x;
+    pu1_slice_idx_a = pu1_slice_idx_curr - 1;
+    pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs;
+    pu1_slice_idx_c = pu1_slice_idx_b + 1;
+    pu1_slice_idx_d = pu1_slice_idx_b - 1;
+    ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+    /**********************************************************************/
+    /* The macroblock is marked as available, unless one of the following */
+    /* conditions is true in which case the macroblock shall be marked as */
+    /* not available.                                                     */
+    /* 1. mbAddr < 0                                                      */
+    /* 2  mbAddr > CurrMbAddr                                             */
+    /* 3. the macroblock with address mbAddr belongs to a different slice */
+    /* than the macroblock with address CurrMbAddr                        */
+    /**********************************************************************/
+
+    /* left macroblock availability */
+    if (i4_mb_x == 0)
+    { /* macroblocks along first column */
+        ps_ngbr_avbl->u1_mb_a = 0;
+    }
+    else
+    { /* macroblocks belong to same slice? */
+        if (*pu1_slice_idx_a != *pu1_slice_idx_curr)
+            ps_ngbr_avbl->u1_mb_a = 0;
+        else
+            ps_ngbr_avbl->u1_mb_a = 1;
+    }
+
+    /* top macroblock availability */
+    if (i4_mb_y == 0)
+    { /* macroblocks along first row */
+        ps_ngbr_avbl->u1_mb_b = 0;
+    }
+    else
+    { /* macroblocks belong to same slice? */
+        if (*pu1_slice_idx_b != *pu1_slice_idx_curr)
+            ps_ngbr_avbl->u1_mb_b = 0;
+        else
+            ps_ngbr_avbl->u1_mb_b = 1;
+    }
+
+    /* top right macroblock availability */
+    if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0)
+    { /* macroblocks along last column */
+        ps_ngbr_avbl->u1_mb_c = 0;
+    }
+    else
+    { /* macroblocks belong to same slice? */
+        if (*pu1_slice_idx_c != *pu1_slice_idx_curr)
+            ps_ngbr_avbl->u1_mb_c = 0;
+        else
+            ps_ngbr_avbl->u1_mb_c = 1;
+    }
+
+    /* top left macroblock availability */
+    if (i4_mb_x == 0 || i4_mb_y == 0)
+    { /* macroblocks along first column */
+        ps_ngbr_avbl->u1_mb_d = 0;
+    }
+    else
+    { /* macroblocks belong to same slice? */
+        if (*pu1_slice_idx_d != *pu1_slice_idx_curr)
+            ps_ngbr_avbl->u1_mb_d = 0;
+        else
+            ps_ngbr_avbl->u1_mb_d = 1;
+    }
+}
+
+/**
+******************************************************************************
+*
+* @brief
+*  derivation process for subblock/partition availability
+*
+* @par   Description
+*  Calculates the availability of the left, top, topright and topleft subblock
+*  or partitions.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to macroblock context (handle)
+*
+* @param[in]    i1_pel_pos_x
+*  column position of the pel wrt the current block
+*
+* @param[in]    i1_pel_pos_y
+*  row position of the pel in wrt current block
+*
+* @remarks     Assumptions: before calling this function it is assumed that
+*   the neighbor availability of the current macroblock is already derived.
+*   Based on table 6-3 of H264 specification
+*
+* @return      availability status (yes or no)
+*
+******************************************************************************
+*/
+UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl,
+                                                WORD8 i1_pel_pos_x,
+                                                WORD8 i1_pel_pos_y)
+{
+    UWORD8 u1_neighbor_avail=0;
+
+    /**********************************************************************/
+    /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to   */
+    /* various columns of a macroblock                                    */
+    /*                                                                    */
+    /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to   */
+    /* various rows of a macroblock                                       */
+    /*                                                                    */
+    /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements    */
+    /* outside the bound of an mb ie., represents its neighbors.          */
+    /**********************************************************************/
+    if (i1_pel_pos_x < 0)
+    { /* column(-1) */
+        if (i1_pel_pos_y < 0)
+        { /* row(-1) */
+            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
+        }
+        else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
+        { /* all rows of a macroblock */
+            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
+        }
+        else /* if (i1_pel_pos_y >= 16) */
+        { /* rows(+16) */
+            u1_neighbor_avail = 0;  /* current mb bottom left availability */
+        }
+    }
+    else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
+    { /* all columns of a macroblock */
+        if (i1_pel_pos_y < 0)
+        { /* row(-1) */
+            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
+        }
+        else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
+        { /* all rows of a macroblock */
+            u1_neighbor_avail = 1; /* current mb availability */
+            /* availability of the partition is dependent on the position of the partition inside the mb */
+            /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */
+        }
+        else /* if (i1_pel_pos_y >= 16) */
+        { /* rows(+16) */
+            u1_neighbor_avail = 0;  /* current mb bottom availability */
+        }
+    }
+    else if (i1_pel_pos_x >= 16)
+    { /* column(+16) */
+        if (i1_pel_pos_y < 0)
+        { /* row(-1) */
+            u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
+        }
+        else /* if (i1_pel_pos_y >= 0) */
+        { /* all other rows */
+            u1_neighbor_avail = 0;  /* current mb right & bottom right availability */
+        }
+    }
+
+    return u1_neighbor_avail;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 16x16 mode (rate distortion opt off)
+*
+* @par Description
+*  This function evaluates all the possible intra 16x16 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]   ps_proc_ctxt
+*  pointer to process context (handle)
+*
+* @remarks
+*  Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
+*  the SAD and cost are one and the same.
+*
+* @return     none
+*
+******************************************************************************
+*/
+
+void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* SAD(distortion metric) of an 8x8 block */
+    WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
+
+    /* lambda */
+    UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+    /* cost = distortion + lambda*rate */
+    WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX;
+
+    /* intra mode */
+    UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
+
+    /* neighbor pels for intra prediction */
+    UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
+
+    /* neighbor availability */
+    WORD32 i4_ngbr_avbl;
+
+    /* pointer to src macro block */
+    UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+    UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
+    UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+    /* pointer to neighbors left, top, topleft */
+    UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
+    UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
+    UWORD8 *pu1_mb_d = pu1_mb_b - 1;
+
+    /* valid intra modes map */
+    UWORD32 u4_valid_intra_modes;
+
+    /* lut for valid intra modes */
+    const UWORD8 u1_valid_intra_modes[8] = {4, 6, 12, 14, 5, 7, 13, 15};
+
+    /* temp var */
+    UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
+
+    /* init temp var */
+    if (ps_proc->i4_slice_type == PSLICE)
+    {
+        offset = 5;
+        u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
+    }
+
+    /* locating neighbors that are available for prediction */
+    /* TODO : update the neighbor availability information basing on constrained intra pred information */
+    /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines
+     * basing on neighbors available and hence evade the computation of neighbor availability totally. */
+    /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
+    i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1);
+    ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
+
+    /* gather prediction pels from the neighbors, if particular set is not available
+     * it is set to zero*/
+    /* left pels */
+    if (ps_proc->ps_ngbr_avbl->u1_mb_a)
+    {
+        for(i = 0; i < 16; i++)
+            pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd];
+    }
+    else
+    {
+        ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE);
+    }
+    /* top pels */
+    if (ps_proc->ps_ngbr_avbl->u1_mb_b)
+    {
+        ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16);
+        /*for(i = 0; i < 16; i++)
+            pu1_ngbr_pels_i16[16+1+i] = pu1_mb_b[i];*/
+    }
+    else
+    {
+        ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE);
+    }
+    /* topleft pels */
+    if (ps_proc->ps_ngbr_avbl->u1_mb_d)
+        pu1_ngbr_pels_i16[16] = *pu1_mb_d;
+    else
+        pu1_ngbr_pels_i16[16] = 0;
+
+    /* set valid intra modes for evaluation */
+//    u4_valid_intra_modes = 15;
+////    ih264e_filter_intra16x16modes(pu1_mb_curr, i4_src_strd, &u4_valid_intra_modes);
+//    if (!ps_proc->ps_ngbr_avbl->u1_mb_a)
+//        u4_valid_intra_modes &= ~(1 << HORZ_I16x16);
+//    if (!ps_proc->ps_ngbr_avbl->u1_mb_b)
+//        u4_valid_intra_modes &= ~(1 << VERT_I16x16);
+////    if (!ps_proc->ps_ngbr_avbl->u1_mb_a || !ps_proc->ps_ngbr_avbl->u1_mb_b || !ps_proc->ps_ngbr_avbl->u1_mb_d)
+//    if (i4_ngbr_avbl != 7)
+//        u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
+
+    u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
+
+    if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
+        u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
+
+    /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
+    ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16,
+                                                  i4_src_strd, i4_pred_strd,
+                                                  i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least,
+                                                  u4_valid_intra_modes);
+
+    /* cost = distortion + lambda*rate */
+    i4_mb_cost_least = i4_mb_distortion_least;
+
+    if (( (u4_valid_intra_modes >> 3) & 1) != 0 && (ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST ||
+                    ps_proc->i4_slice_type == ISLICE))
+    {
+        /* intra prediction for PLANE mode*/
+        (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
+
+        /* evaluate distortion between the actual blk and the estimated blk for the given mode */
+        ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion);
+
+        /* cost = distortion + lambda*rate */
+        i4_mb_cost = i4_mb_distortion;
+
+        /* update the least cost information if necessary */
+        if(i4_mb_cost < i4_mb_distortion_least)
+        {
+            u4_intra_mode = PLANE_I16x16;
+
+            i4_mb_cost_least = i4_mb_cost;
+            i4_mb_distortion_least = i4_mb_distortion;
+        }
+    }
+
+    u4_best_intra_16x16_mode = u4_intra_mode;
+
+    DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
+
+    ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
+
+    /* cost = distortion + lambda*rate */
+    i4_mb_cost_least    = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode];
+
+
+    /* update the type of the mb if necessary */
+    if (i4_mb_cost_least < ps_proc->i4_mb_cost)
+    {
+        ps_proc->i4_mb_cost = i4_mb_cost_least;
+        ps_proc->i4_mb_distortion = i4_mb_distortion_least;
+        ps_proc->u4_mb_type = I16x16;
+    }
+
+    return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 8x8 mode (rate distortion opt on)
+*
+* @par Description
+*  This function evaluates all the possible intra 8x8 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to proc ctxt
+*
+* @remarks Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: TODO: This function needs to be tested
+*
+*  @return      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* SAD(distortion metric) of an 4x4 block */
+    WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
+
+    /* lambda */
+    UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+    /* cost = distortion + lambda*rate */
+    WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
+
+    /* cost due to mbtype */
+    UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
+
+    /* intra mode */
+    UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
+
+    /* neighbor pels for intra prediction */
+    UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
+
+    /* pointer to curr partition */
+    UWORD8 *pu1_mb_curr;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+    /* neighbors left, top, top right, top left */
+    UWORD8 *pu1_mb_a;
+    UWORD8 *pu1_mb_b;
+    UWORD8 *pu1_mb_d;
+
+    /* neighbor availability */
+    WORD32 i4_ngbr_avbl;
+    block_neighbors_t s_ngbr_avbl;
+
+    /* temp vars */
+    UWORD32  b8, u4_pix_x, u4_pix_y;
+
+    /* ngbr mb syntax information */
+    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
+    mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+
+    /* valid intra modes map */
+    UWORD32 u4_valid_intra_modes;
+
+    for(b8 = 0; b8 < 4; b8++)
+    {
+        u4_pix_x = (b8 & 0x01) << 3;
+        u4_pix_y = (b8 >> 1) << 3;
+
+        pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
+        /* when rdopt is off, we use the input as reference for constructing prediction buffer */
+        /* as opposed to using the recon pels. (open loop intra prediction) */
+        pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
+        pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
+        pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
+
+        /* locating neighbors that are available for prediction */
+        /* TODO : update the neighbor availability information basing on constrained intra pred information */
+        /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
+        /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
+        s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
+        s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
+        s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
+        s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
+
+        /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
+        i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) +  (s_ngbr_avbl.u1_mb_c << 3) +
+                        (s_ngbr_avbl.u1_mb_a << 4);
+        /* if top partition is available and top right is not available for intra prediction, then */
+        /* padd top right samples using top sample and make top right also available */
+        /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) +  ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
+        ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
+
+
+        ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
+                                                     i4_src_strd, i4_ngbr_avbl);
+
+        i4_partition_cost_least = INT_MAX;
+        /* set valid intra modes for evaluation */
+        u4_valid_intra_modes = 0x1ff;
+
+        if (!s_ngbr_avbl.u1_mb_b)
+        {
+            u4_valid_intra_modes &= ~(1 << VERT_I4x4);
+            u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
+            u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
+        }
+        if (!s_ngbr_avbl.u1_mb_a)
+        {
+            u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
+            u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
+        }
+        if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
+        {
+            u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
+            u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
+            u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
+        }
+
+        /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */
+        if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
+        {
+            u4_estimated_intra_8x8_mode = DC_I8x8;
+        }
+        else
+        {
+            UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
+            UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
+
+            if (u4_pix_x == 0)
+            {
+                if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
+                {
+                    u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1];
+                }
+                else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
+                {
+                    u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2];
+                }
+            }
+            else
+            {
+                u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1];
+            }
+
+            if (u4_pix_y == 0)
+            {
+                if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
+                {
+                    u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2];
+                }
+                else if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
+                {
+                    u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2];
+                }
+            }
+            else
+            {
+                u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2];
+            }
+
+            u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
+        }
+
+        /* perform intra mode 8x8 evaluation */
+        for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1)
+        {
+            if ( (u4_valid_intra_modes & 1) == 0)
+                continue;
+
+            /* intra prediction */
+            (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl);
+
+            /* evaluate distortion between the actual blk and the estimated blk for the given mode */
+            ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion);
+
+            i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits);
+
+            /* update the least cost information if necessary */
+            if (i4_partition_cost < i4_partition_cost_least)
+            {
+                i4_partition_cost_least = i4_partition_cost;
+                i4_partition_distortion_least = i4_partition_distortion;
+                u4_best_intra_8x8_mode = u4_intra_mode;
+            }
+        }
+        /* macroblock distortion */
+        i4_total_cost += i4_partition_cost_least;
+        i4_total_distortion += i4_partition_distortion_least;
+        /* mb partition mode */
+        ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
+
+    }
+
+    /* update the type of the mb if necessary */
+    if (i4_total_cost < ps_proc->i4_mb_cost)
+    {
+        ps_proc->i4_mb_cost = i4_total_cost;
+        ps_proc->i4_mb_distortion = i4_total_distortion;
+        ps_proc->u4_mb_type = I8x8;
+    }
+
+    return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 4x4 mode (rate distortion opt off)
+*
+* @par Description
+*  This function evaluates all the possible intra 4x4 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to proc ctxt
+*
+* @remarks
+*  Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+*  24*lambda is added to the SAD before comparison with the best SAD for
+*  inter prediction. This is an empirical value to prevent using too many intra
+*  blocks.
+*
+* @return      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* SAD(distortion metric) of an 4x4 block */
+    WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
+
+    /* lambda */
+    UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+    /* cost = distortion + lambda*rate */
+    WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
+
+    /* cost due to mbtype */
+    UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
+
+    /* intra mode */
+    UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
+
+    /* neighbor pels for intra prediction */
+    UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
+
+    /* pointer to curr partition */
+    UWORD8 *pu1_mb_curr;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+    /* neighbors left, top, top right, top left */
+    UWORD8 *pu1_mb_a;
+    UWORD8 *pu1_mb_b;
+    UWORD8 *pu1_mb_c;
+    UWORD8 *pu1_mb_d;
+
+    /* neighbor availability */
+    WORD32 i4_ngbr_avbl;
+    block_neighbors_t s_ngbr_avbl;
+
+    /* temp vars */
+    UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
+
+    /* scan order inside 4x4 block */
+    const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+    /* ngbr sub mb modes */
+    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
+    mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+
+    /* valid intra modes map */
+    UWORD32 u4_valid_intra_modes;
+    UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
+
+    i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3);
+    memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
+
+    for (b8 = 0; b8 < 4; b8++)
+    {
+        u4_blk_x = (b8 & 0x01) << 3;
+        u4_blk_y = (b8 >> 1) << 3;
+        for (b4 = 0; b4 < 4; b4++)
+        {
+            u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
+            u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
+
+            pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
+            /* when rdopt is off, we use the input as reference for constructing prediction buffer */
+            /* as opposed to using the recon pels. (open loop intra prediction) */
+            pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
+            pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
+            pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
+            pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
+
+            /* locating neighbors that are available for prediction */
+            /* TODO : update the neighbor availability information basing on constrained intra pred information */
+            /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
+            /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
+
+            i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
+            s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
+            s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
+            s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
+            s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
+            /* set valid intra modes for evaluation */
+            u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
+
+            /* if top partition is available and top right is not available for intra prediction, then */
+            /* padd top right samples using top sample and make top right also available */
+            /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
+
+            /* gather prediction pels from the neighbors */
+            if (s_ngbr_avbl.u1_mb_a)
+            {
+                for(i = 0; i < 4; i++)
+                    pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd];
+            }
+            else
+            {
+                memset(pu1_ngbr_pels_i4, 0, 4);
+            }
+
+            if (s_ngbr_avbl.u1_mb_b)
+            {
+                memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
+            }
+            else
+            {
+                memset(pu1_ngbr_pels_i4 + 5, 0, 4);
+            }
+
+            if (s_ngbr_avbl.u1_mb_d)
+                pu1_ngbr_pels_i4[4] = *pu1_mb_d;
+            else
+                pu1_ngbr_pels_i4[4] = 0;
+
+            if (s_ngbr_avbl.u1_mb_c)
+            {
+                memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
+            }
+            else if (s_ngbr_avbl.u1_mb_b)
+            {
+                memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
+                s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
+            }
+
+            i4_partition_cost_least = INT_MAX;
+
+            /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
+            if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
+            {
+                u4_estimated_intra_4x4_mode = DC_I4x4;
+            }
+            else
+            {
+                UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
+                UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
+
+                if (u4_pix_x == 0)
+                {
+                    if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
+                    {
+                        u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
+                    }
+                    else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
+                    {
+                        u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
+                    }
+                }
+                else
+                {
+                    u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
+                }
+
+                if (u4_pix_y == 0)
+                {
+                    if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
+                    {
+                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
+                    }
+                    else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
+                    {
+                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
+                    }
+                }
+                else
+                {
+                    u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
+                }
+
+                u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
+            }
+
+            ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
+
+            /* mode evaluation and prediction */
+            ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
+                                                         pu1_ngbr_pels_i4,
+                                                         pu1_pred_mb, i4_src_strd,
+                                                         i4_pred_strd, i4_ngbr_avbl,
+                                                         &u4_best_intra_4x4_mode,
+                                                         &i4_partition_cost_least,
+                                                         u4_valid_intra_modes,
+                                                         u4_lambda,
+                                                         u4_estimated_intra_4x4_mode);
+
+
+            i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits);
+
+            DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
+            /* macroblock distortion */
+            i4_total_distortion += i4_partition_distortion_least;
+            i4_total_cost += i4_partition_cost_least;
+            /* mb partition mode */
+            ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
+        }
+    }
+
+    /* update the type of the mb if necessary */
+    if (i4_total_cost < ps_proc->i4_mb_cost)
+    {
+        ps_proc->i4_mb_cost = i4_total_cost;
+        ps_proc->i4_mb_distortion = i4_total_distortion;
+        ps_proc->u4_mb_type = I4x4;
+    }
+
+    return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief evaluate best intra 4x4 mode (rate distortion opt on)
+*
+* @par Description
+*  This function evaluates all the possible intra 4x4 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to proc ctxt
+*
+* @remarks
+*  Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+*  24*lambda is added to the SAD before comparison with the best SAD for
+*  inter prediction. This is an empirical value to prevent using too many intra
+*  blocks.
+*
+* @return      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* SAD(distortion metric) of an 4x4 block */
+    WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
+
+    /* lambda */
+    UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+    /* cost = distortion + lambda*rate */
+    WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
+
+    /* cost due to mbtype */
+    UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
+
+    /* intra mode */
+    UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
+
+    /* neighbor pels for intra prediction */
+    UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
+
+    /* pointer to curr partition */
+    UWORD8 *pu1_mb_curr;
+    UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
+    UWORD8 *pu1_ref_mb_intra_4x4;
+
+    /* pointer to residual macro block */
+    WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+    WORD32 i4_ref_strd_left, i4_ref_strd_top;
+
+    /* neighbors left, top, top right, top left */
+    UWORD8 *pu1_mb_a;
+    UWORD8 *pu1_mb_b;
+    UWORD8 *pu1_mb_c;
+    UWORD8 *pu1_mb_d;
+
+    /* number of non zero coeffs*/
+    UWORD8  *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
+
+    /* quantization parameters */
+    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+    /* neighbor availability */
+    WORD32 i4_ngbr_avbl;
+    block_neighbors_t s_ngbr_avbl;
+
+    /* temp vars */
+    UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
+
+    /* scan order inside 4x4 block */
+    const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+    /* ngbr sub mb modes */
+    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
+    mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+
+    /* valid intra modes map */
+    UWORD32 u4_valid_intra_modes;
+    UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
+
+    /* Dummy variable for 4x4 trans function */
+    WORD16 i2_dc_dummy;
+
+    /* compute ngbr availability for sub blks */
+    i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3);
+    memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
+
+    for(b8 = 0; b8 < 4; b8++)
+    {
+        u4_blk_x = (b8 & 0x01) << 3;
+        u4_blk_y = (b8 >> 1) << 3;
+        for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
+        {
+            u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
+            u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
+
+            pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
+            pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
+            if (u4_pix_x == 0)
+            {
+                i4_ref_strd_left = ps_proc->i4_rec_strd;
+                pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left);
+            }
+            else
+            {
+                i4_ref_strd_left = i4_pred_strd;
+                pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
+            }
+            if (u4_pix_y == 0)
+            {
+                i4_ref_strd_top = ps_proc->i4_rec_strd;
+                pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top);
+            }
+            else
+            {
+                i4_ref_strd_top = i4_pred_strd;
+                pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
+            }
+
+            pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
+            pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
+            pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
+            if (u4_pix_y == 0)
+                pu1_mb_d = pu1_mb_b - 1;
+            else
+                pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
+
+            /* locating neighbors that are available for prediction */
+            /* TODO : update the neighbor availability information basing on constrained intra pred information */
+            /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
+            /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
+
+            i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
+            s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
+            s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
+            s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
+            s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
+            /* set valid intra modes for evaluation */
+            u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
+
+            /* if top partition is available and top right is not available for intra prediction, then */
+            /* padd top right samples using top sample and make top right also available */
+            /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
+
+            /* gather prediction pels from the neighbors */
+            if (s_ngbr_avbl.u1_mb_a)
+            {
+                for(i = 0; i < 4; i++)
+                    pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left];
+            }
+            else
+            {
+                memset(pu1_ngbr_pels_i4,0,4);
+            }
+            if(s_ngbr_avbl.u1_mb_b)
+            {
+                memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
+            }
+            else
+            {
+                memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
+            }
+            if (s_ngbr_avbl.u1_mb_d)
+                pu1_ngbr_pels_i4[4] = *pu1_mb_d;
+            else
+                pu1_ngbr_pels_i4[4] = 0;
+            if (s_ngbr_avbl.u1_mb_c)
+            {
+                memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
+            }
+            else if (s_ngbr_avbl.u1_mb_b)
+            {
+                memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
+                s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
+            }
+
+            i4_partition_cost_least = INT_MAX;
+
+            /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
+            if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
+            {
+                u4_estimated_intra_4x4_mode = DC_I4x4;
+            }
+            else
+            {
+                UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
+                UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
+
+                if (u4_pix_x == 0)
+                {
+                    if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
+                    {
+                        u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
+                    }
+                    else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
+                    {
+                        u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
+                    }
+                }
+                else
+                {
+                    u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
+                }
+
+                if (u4_pix_y == 0)
+                {
+                    if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
+                    {
+                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
+                    }
+                    else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
+                    {
+                        u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
+                    }
+                }
+                else
+                {
+                    u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
+                }
+
+                u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
+            }
+
+            ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
+
+            /*mode evaluation and prediction*/
+            ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
+                                                         pu1_ngbr_pels_i4,
+                                                         pu1_pred_mb, i4_src_strd,
+                                                         i4_pred_strd, i4_ngbr_avbl,
+                                                         &u4_best_intra_4x4_mode,
+                                                         &i4_partition_cost_least,
+                                                         u4_valid_intra_modes,
+                                                         u4_lambda,
+                                                         u4_estimated_intra_4x4_mode);
+
+
+            i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits);
+
+            DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
+
+            /* macroblock distortion */
+            i4_total_distortion += i4_partition_distortion_least;
+            i4_total_cost += i4_partition_cost_least;
+
+            /* mb partition mode */
+            ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
+
+
+            /********************************************************/
+            /*  error estimation,                                   */
+            /*  transform                                           */
+            /*  quantization                                        */
+            /********************************************************/
+            ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb,
+                                              pi2_res_mb, i4_src_strd,
+                                              i4_pred_strd,
+                                              /* No op stride, this implies a buff of lenght 1x16 */
+                                              ps_qp_params->pu2_scale_mat,
+                                              ps_qp_params->pu2_thres_mat,
+                                              ps_qp_params->u1_qbits,
+                                              ps_qp_params->u4_dead_zone,
+                                              pu1_nnz, &i2_dc_dummy);
+
+            /********************************************************/
+            /*  ierror estimation,                                  */
+            /*  itransform                                          */
+            /*  iquantization                                       */
+            /********************************************************/
+            ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb,
+                                                 pu1_ref_mb_intra_4x4,
+                                                 i4_pred_strd, i4_pred_strd,
+                                                 ps_qp_params->pu2_iscale_mat,
+                                                 ps_qp_params->pu2_weigh_mat,
+                                                 ps_qp_params->u1_qp_div,
+                                                 ps_proc->pv_scratch_buff, 0,
+                                                 NULL);
+        }
+    }
+
+    /* update the type of the mb if necessary */
+    if (i4_total_cost < ps_proc->i4_mb_cost)
+    {
+        ps_proc->i4_mb_cost = i4_total_cost;
+        ps_proc->i4_mb_distortion = i4_total_distortion;
+        ps_proc->u4_mb_type = I4x4;
+    }
+
+    return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best chroma intra 8x8 mode (rate distortion opt off)
+*
+* @par Description
+*  This function evaluates all the possible chroma intra 8x8 modes and finds
+*  the mode that best represents the macroblock (least distortion) and occupies
+*  fewer bits in the bitstream.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to macroblock context (handle)
+*
+* @remarks
+*  For chroma best intra pred mode is calculated based only on SAD
+*
+* @returns none
+*
+******************************************************************************
+*/
+
+void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+    /* Codec Context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* SAD(distortion metric) of an 8x8 block */
+    WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
+
+    /* intra mode */
+    UWORD32  u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
+
+    /* neighbor pels for intra prediction */
+    UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
+
+    /* pointer to curr macro block */
+    UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
+    UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
+
+    /* pointer to prediction macro block */
+    UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
+    UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
+
+    /* strides */
+    WORD32 i4_src_strd_c = ps_proc->i4_src_strd;
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+    WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd;
+
+    /* neighbors left, top, top left */
+    UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
+    UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
+    UWORD8 *pu1_mb_d = pu1_mb_b - 2;
+
+    /* neighbor availability */
+    const UWORD8  u1_valid_intra_modes[8] = {1, 3, 9, 11, 5, 7, 13, 15,};
+    WORD32 i4_ngbr_avbl;
+
+    /* valid intra modes map */
+    UWORD32 u4_valid_intra_modes;
+
+    /* temp var */
+    UWORD8 i;
+
+    /* locating neighbors that are available for prediction */
+    /* TODO : update the neighbor availability information basing on constrained intra pred information */
+    /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines
+     * basing on neighbors available and hence evade the computation of neighbor availability totally. */
+    /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
+    i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1);
+    ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
+
+    /* gather prediction pels from the neighbors */
+    /* left pels */
+    if (ps_proc->ps_ngbr_avbl->u1_mb_a)
+    {
+        for (i = 0; i < 16; i += 2)
+        {
+            pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
+            pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
+        }
+    }
+    else
+    {
+        ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
+    }
+
+    /* top pels */
+    if (ps_proc->ps_ngbr_avbl->u1_mb_b)
+    {
+        ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
+    }
+    else
+    {
+        ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
+    }
+
+    /* top left pels */
+    if (ps_proc->ps_ngbr_avbl->u1_mb_d)
+    {
+        pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
+        pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
+    }
+
+    u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
+
+    if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
+        u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
+
+    i4_chroma_mb_distortion = INT_MAX;
+
+    /* perform intra mode chroma  8x8 evaluation */
+    /* intra prediction */
+    ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb,
+                                                    pu1_ngbr_pels_c_i8x8,
+                                                    pu1_pred_mb,
+                                                    i4_src_strd_c,
+                                                    i4_pred_strd,
+                                                    i4_ngbr_avbl,
+                                                    &u4_best_chroma_intra_8x8_mode,
+                                                    &i4_chroma_mb_distortion,
+                                                    u4_valid_intra_modes);
+
+    if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/
+    {
+        (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl);
+
+        /* evaluate distortion(sad) */
+        ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion);
+
+        /* update the least distortion information if necessary */
+        if(i4_mb_distortion < i4_chroma_mb_distortion)
+        {
+            i4_chroma_mb_distortion = i4_mb_distortion;
+            u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
+        }
+    }
+
+    DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode);
+
+    ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
+
+    return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief
+*  Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+*  prediction.
+*
+* @par Description
+*  This function evaluates first three 16x16 modes and compute corresponding sad
+*  and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+*  availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+*  Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+*  Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+*  Says what all modes are valid
+*
+* @returns      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
+                                      UWORD8 *pu1_ngbr_pels_i16,
+                                      UWORD8 *pu1_dst,
+                                      UWORD32 src_strd,
+                                      UWORD32 dst_strd,
+                                      WORD32 u4_n_avblty,
+                                      UWORD32 *u4_intra_mode,
+                                      WORD32 *pu4_sadmin,
+                                      UWORD32 u4_valid_intra_modes)
+{
+    UWORD8 *pu1_neighbour;
+    UWORD8 *pu1_src_temp = pu1_src;
+    UWORD8 left = 0, top = 0;
+    WORD32 u4_dcval = 0;
+    WORD32 i, j;
+    WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
+                    i4_min_sad = INT_MAX;
+    UWORD8 val;
+
+    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+    /* left available */
+    if (left)
+    {
+        i4_sad_horz = 0;
+
+        for (i = 0; i < 16; i++)
+        {
+            val = pu1_ngbr_pels_i16[15 - i];
+
+            u4_dcval += val;
+
+            for (j = 0; j < 16; j++)
+            {
+                i4_sad_horz += ABS(val - pu1_src_temp[j]);
+            }
+
+            pu1_src_temp += src_strd;
+        }
+        u4_dcval += 8;
+    }
+
+    pu1_src_temp = pu1_src;
+    /* top available */
+    if (top)
+    {
+        i4_sad_vert = 0;
+
+        for (i = 0; i < 16; i++)
+        {
+            u4_dcval += pu1_ngbr_pels_i16[17 + i];
+
+            for (j = 0; j < 16; j++)
+            {
+                i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
+            }
+            pu1_src_temp += src_strd;
+
+        }
+        u4_dcval += 8;
+    }
+
+    u4_dcval = (u4_dcval) >> (3 + left + top);
+
+    pu1_src_temp = pu1_src;
+
+    /* none available */
+    u4_dcval += (left == 0) * (top == 0) * 128;
+
+    i4_sad_dc = 0;
+
+    for (i = 0; i < 16; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+            i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
+        }
+        pu1_src_temp += src_strd;
+    }
+
+    if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */
+        i4_sad_dc = INT_MAX;
+
+    if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */
+        i4_sad_vert = INT_MAX;
+
+    if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */
+        i4_sad_horz = INT_MAX;
+
+    i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
+
+    /* Finding Minimum sad and doing corresponding prediction */
+    if (i4_min_sad < *pu4_sadmin)
+    {
+        *pu4_sadmin = i4_min_sad;
+        if (i4_min_sad == i4_sad_vert)
+        {
+            *u4_intra_mode = VERT_I16x16;
+            pu1_neighbour = pu1_ngbr_pels_i16 + 17;
+            for (j = 0; j < 16; j++)
+            {
+                memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
+                pu1_dst += dst_strd;
+            }
+        }
+        else if (i4_min_sad == i4_sad_horz)
+        {
+            *u4_intra_mode = HORZ_I16x16;
+            for (j = 0; j < 16; j++)
+            {
+                val = pu1_ngbr_pels_i16[15 - j];
+                memset(pu1_dst, val, MB_SIZE);
+                pu1_dst += dst_strd;
+            }
+        }
+        else
+        {
+            *u4_intra_mode = DC_I16x16;
+            for (j = 0; j < 16; j++)
+            {
+                memset(pu1_dst, u4_dcval, MB_SIZE);
+                pu1_dst += dst_strd;
+            }
+        }
+    }
+    return;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+*  Evaluate best intra 4x4 mode and perform prediction.
+*
+* @par Description
+*  This function evaluates  4x4 modes and compute corresponding sad
+*  and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+*  availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+*  Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+*  Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+*  Says what all modes are valid
+*
+* @param[in] u4_lambda
+*  Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+*  Predicted mode for cost computation
+*
+* @returns      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
+                                     UWORD8 *pu1_ngbr_pels,
+                                     UWORD8 *pu1_dst,
+                                     UWORD32 src_strd,
+                                     UWORD32 dst_strd,
+                                     WORD32 u4_n_avblty,
+                                     UWORD32 *u4_intra_mode,
+                                     WORD32 *pu4_sadmin,
+                                     UWORD32 u4_valid_intra_modes,
+                                     UWORD32  u4_lambda,
+                                     UWORD32 u4_predictd_mode)
+{
+    UWORD8 *pu1_src_temp = pu1_src;
+    UWORD8 *pu1_pred = pu1_ngbr_pels;
+    UWORD8 left = 0, top = 0;
+    UWORD8 u1_pred_val = 0;
+    UWORD8 u1_pred_vals[4] = {0};
+    UWORD8 *pu1_pred_val = NULL;
+    /* To store FILT121 operated values*/
+    UWORD8 u1_pred_vals_diag_121[15] = {0};
+    /* To store FILT11 operated values*/
+    UWORD8 u1_pred_vals_diag_11[15] = {0};
+    UWORD8 u1_pred_vals_vert_r[8] = {0};
+    UWORD8 u1_pred_vals_horz_d[10] = {0};
+    UWORD8 u1_pred_vals_horz_u[10] = {0};
+    WORD32 u4_dcval = 0;
+    WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+                               INT_MAX, INT_MAX, INT_MAX, INT_MAX};
+
+    WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+                                INT_MAX, INT_MAX, INT_MAX, INT_MAX};
+    WORD32 i, i4_min_cost = INT_MAX;
+
+    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+    /* Computing SAD */
+
+    /* VERT mode valid */
+    if (u4_valid_intra_modes & 1)
+    {
+        pu1_pred = pu1_ngbr_pels + 5;
+        i4_sad[VERT_I4x4] = 0;
+        i4_cost[VERT_I4x4] = 0;
+
+        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+        pu1_src_temp += src_strd;
+        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+        pu1_src_temp += src_strd;
+        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+        pu1_src_temp += src_strd;
+        USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+
+        i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ?
+                                        u4_lambda : 4 * u4_lambda);
+    }
+
+    /* HORZ mode valid */
+    if (u4_valid_intra_modes & 2)
+    {
+        i4_sad[HORZ_I4x4] = 0;
+        i4_cost[HORZ_I4x4] =0;
+        pu1_src_temp = pu1_src;
+
+        u1_pred_val = pu1_ngbr_pels[3];
+
+        i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+                        + ABS(pu1_src_temp[1] - u1_pred_val)
+                        + ABS(pu1_src_temp[2] - u1_pred_val)
+                        + ABS(pu1_src_temp[3] - u1_pred_val);
+        pu1_src_temp += src_strd;
+
+        u1_pred_val = pu1_ngbr_pels[2];
+
+        i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+                        + ABS(pu1_src_temp[1] - u1_pred_val)
+                        + ABS(pu1_src_temp[2] - u1_pred_val)
+                        + ABS(pu1_src_temp[3] - u1_pred_val);
+        pu1_src_temp += src_strd;
+
+        u1_pred_val = pu1_ngbr_pels[1];
+
+        i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+                        + ABS(pu1_src_temp[1] - u1_pred_val)
+                        + ABS(pu1_src_temp[2] - u1_pred_val)
+                        + ABS(pu1_src_temp[3] - u1_pred_val);
+        pu1_src_temp += src_strd;
+
+        u1_pred_val = pu1_ngbr_pels[0];
+
+        i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+                        + ABS(pu1_src_temp[1] - u1_pred_val)
+                        + ABS(pu1_src_temp[2] - u1_pred_val)
+                        + ABS(pu1_src_temp[3] - u1_pred_val);
+
+        i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ?
+                                        u4_lambda : 4 * u4_lambda);
+    }
+
+    /* DC mode valid */
+    if (u4_valid_intra_modes & 4)
+    {
+        i4_sad[DC_I4x4] = 0;
+        i4_cost[DC_I4x4] = 0;
+        pu1_src_temp = pu1_src;
+
+        if (left)
+            u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2]
+                            + pu1_ngbr_pels[3] + 2;
+        if (top)
+            u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7]
+                            + pu1_ngbr_pels[8] + 2;
+
+        u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
+
+        /* none available */
+        memset(u1_pred_vals, u4_dcval, 4);
+        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+        pu1_src_temp += src_strd;
+        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+        pu1_src_temp += src_strd;
+        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+        pu1_src_temp += src_strd;
+        USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+        pu1_src_temp += src_strd;
+
+        i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ?
+                                        u4_lambda : 4 * u4_lambda);
+    }
+
+    /* if modes other than VERT, HORZ and DC are  valid */
+    if (u4_valid_intra_modes > 7)
+    {
+        pu1_pred = pu1_ngbr_pels;
+        pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
+
+        /* Performing FILT121 and FILT11 operation for all neighbour values*/
+        for (i = 0; i < 13; i++)
+        {
+            u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
+            u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
+
+            pu1_pred++;
+        }
+
+        if (u4_valid_intra_modes & 8)/* DIAG_DL */
+        {
+            i4_sad[DIAG_DL_I4x4] = 0;
+            i4_cost[DIAG_DL_I4x4] = 0;
+            pu1_src_temp = pu1_src;
+            pu1_pred_val = u1_pred_vals_diag_121 + 5;
+
+            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
+            pu1_src_temp += src_strd;
+            i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ?
+                                            u4_lambda : 4 * u4_lambda);
+        }
+
+        if (u4_valid_intra_modes & 16)/* DIAG_DR */
+        {
+            i4_sad[DIAG_DR_I4x4] = 0;
+            i4_cost[DIAG_DR_I4x4] = 0;
+            pu1_src_temp = pu1_src;
+            pu1_pred_val = u1_pred_vals_diag_121 + 3;
+
+            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
+            pu1_src_temp += src_strd;
+            i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ?
+                                            u4_lambda : 4 * u4_lambda);
+
+        }
+
+        if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
+        {
+            i4_sad[VERT_R_I4x4] = 0;
+
+            pu1_src_temp = pu1_src;
+            u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
+            memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
+            u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
+            memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
+
+            pu1_pred_val = u1_pred_vals_diag_11 + 4;
+            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
+            pu1_pred_val = u1_pred_vals_diag_121 + 3;
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4),
+                   i4_sad[VERT_R_I4x4]);
+
+            i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ?
+                                            u4_lambda : 4 * u4_lambda);
+        }
+
+        if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
+        {
+            i4_sad[HORZ_D_I4x4] = 0;
+
+            pu1_src_temp = pu1_src;
+            u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
+            memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
+            u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
+            u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
+            u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
+            u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
+            u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
+            u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
+
+            pu1_pred_val = u1_pred_vals_horz_d;
+            USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
+
+            i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ?
+                                            u4_lambda : 4 * u4_lambda);
+        }
+
+        if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
+        {
+            i4_sad[VERT_L_I4x4] = 0;
+            pu1_src_temp = pu1_src;
+            pu1_pred_val = u1_pred_vals_diag_11 + 5;
+            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+            pu1_src_temp += src_strd;
+            pu1_pred_val = u1_pred_vals_diag_121 + 5;
+            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+            pu1_src_temp += src_strd;
+            pu1_pred_val = u1_pred_vals_diag_11 + 6;
+            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+            pu1_src_temp += src_strd;
+            pu1_pred_val = u1_pred_vals_diag_121 + 6;
+            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+
+            i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ?
+                                            u4_lambda : 4 * u4_lambda);
+        }
+
+        if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
+        {
+            i4_sad[HORZ_U_I4x4] = 0;
+            pu1_src_temp = pu1_src;
+            u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
+            u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
+            u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
+            u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
+            u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
+            u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
+
+            memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
+
+            pu1_pred_val = u1_pred_vals_horz_u;
+            USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
+            pu1_src_temp += src_strd;
+            USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
+
+            i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ?
+                                            u4_lambda : 4 * u4_lambda);
+        }
+
+        i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]),
+                        MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
+                        MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
+
+    }
+    else
+    {
+        /* Only first three modes valid */
+        i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
+    }
+
+    *pu4_sadmin = i4_min_cost;
+
+    if (i4_min_cost == i4_cost[0])
+    {
+        *u4_intra_mode = VERT_I4x4;
+        pu1_pred_val = pu1_ngbr_pels + 5;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+    }
+    else if (i4_min_cost == i4_cost[1])
+    {
+        *u4_intra_mode = HORZ_I4x4;
+        memset(pu1_dst, pu1_ngbr_pels[3], 4);
+        pu1_dst += dst_strd;
+        memset(pu1_dst, pu1_ngbr_pels[2], 4);
+        pu1_dst += dst_strd;
+        memset(pu1_dst, pu1_ngbr_pels[1], 4);
+        pu1_dst += dst_strd;
+        memset(pu1_dst, pu1_ngbr_pels[0], 4);
+    }
+    else if (i4_min_cost == i4_cost[2])
+    {
+        *u4_intra_mode = DC_I4x4;
+        memset(pu1_dst, u4_dcval, 4);
+        pu1_dst += dst_strd;
+        memset(pu1_dst, u4_dcval, 4);
+        pu1_dst += dst_strd;
+        memset(pu1_dst, u4_dcval, 4);
+        pu1_dst += dst_strd;
+        memset(pu1_dst, u4_dcval, 4);
+    }
+
+    else if (i4_min_cost == i4_cost[3])
+    {
+        *u4_intra_mode = DIAG_DL_I4x4;
+        pu1_pred_val = u1_pred_vals_diag_121 + 5;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val + 1), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val + 2), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val + 3), 4);
+    }
+    else if (i4_min_cost == i4_cost[4])
+    {
+        *u4_intra_mode = DIAG_DR_I4x4;
+        pu1_pred_val = u1_pred_vals_diag_121 + 3;
+
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val - 1), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val - 2), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val - 3), 4);
+    }
+
+    else if (i4_min_cost == i4_cost[5])
+    {
+        *u4_intra_mode = VERT_R_I4x4;
+        pu1_pred_val = u1_pred_vals_diag_11 + 4;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        pu1_pred_val = u1_pred_vals_diag_121 + 3;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
+    }
+    else if (i4_min_cost == i4_cost[6])
+    {
+        *u4_intra_mode = HORZ_D_I4x4;
+        pu1_pred_val = u1_pred_vals_horz_d;
+        memcpy(pu1_dst, (pu1_pred_val + 6), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val + 4), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val + 2), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+    }
+    else if (i4_min_cost == i4_cost[7])
+    {
+        *u4_intra_mode = VERT_L_I4x4;
+        pu1_pred_val = u1_pred_vals_diag_11 + 5;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        pu1_pred_val = u1_pred_vals_diag_121 + 5;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        pu1_pred_val = u1_pred_vals_diag_11 + 6;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        pu1_pred_val = u1_pred_vals_diag_121 + 6;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+    }
+    else if (i4_min_cost == i4_cost[8])
+    {
+        *u4_intra_mode = HORZ_U_I4x4;
+        pu1_pred_val = u1_pred_vals_horz_u;
+        memcpy(pu1_dst, (pu1_pred_val), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val + 2), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val + 4), 4);
+        pu1_dst += dst_strd;
+        memcpy(pu1_dst, (pu1_pred_val + 6), 4);
+        pu1_dst += dst_strd;
+    }
+
+    return;
+}
+
+/**
+******************************************************************************
+*
+* @brief:
+*  Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction.
+*
+* @par Description
+*  This function evaluates  first three intra chroma modes and compute corresponding sad
+*  and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+*  availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+*  Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+*  Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+*  Says what all modes are valid
+*
+* @return      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
+                                        UWORD8 *pu1_ngbr_pels,
+                                        UWORD8 *pu1_dst,
+                                        UWORD32 src_strd,
+                                        UWORD32 dst_strd,
+                                        WORD32 u4_n_avblty,
+                                        UWORD32 *u4_intra_mode,
+                                        WORD32 *pu4_sadmin,
+                                        UWORD32 u4_valid_intra_modes)
+{
+    UWORD8 *pu1_neighbour;
+    UWORD8 *pu1_src_temp = pu1_src;
+    UWORD8 left = 0, top = 0;
+    WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */
+           u4_dcval_u_t[2] = { 0, 0 };  /*sum top neighbours for 'U'*/
+
+    WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/
+           u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/
+
+    WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX,
+                    i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
+    UWORD8 val_u, val_v;
+
+    WORD32 u4_dc_val[2][2][2];/*  -----------
+                                  |    |    |  Chroma can have four
+                                  | 00 | 01 |  separate dc value...
+                                  -----------  u4_dc_val corresponds to this dc values
+                                  |    |    |  with u4_dc_val[2][2][U] and u4_dc_val[2][2][V]
+                                  | 10 | 11 |
+                                  -----------                */
+    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+    /*Evaluating HORZ*/
+    if (left)/* Ifleft available*/
+    {
+        i4_sad_horz = 0;
+
+        for (i = 0; i < 8; i++)
+        {
+            val_v = pu1_ngbr_pels[15 - 2 * i];
+            val_u = pu1_ngbr_pels[15 - 2 * i - 1];
+            row = i / 4;
+            u4_dcval_u_l[row] += val_u;
+            u4_dcval_v_l[row] += val_v;
+            for (j = 0; j < 8; j++)
+            {
+                i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/
+                i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
+            }
+
+            pu1_src_temp += src_strd;
+        }
+        u4_dcval_u_l[0] += 2;
+        u4_dcval_u_l[1] += 2;
+        u4_dcval_v_l[0] += 2;
+        u4_dcval_v_l[1] += 2;
+    }
+
+    /*Evaluating VERT**/
+    pu1_src_temp = pu1_src;
+    if (top) /* top available*/
+    {
+        i4_sad_vert = 0;
+
+        for (i = 0; i < 8; i++)
+        {
+            col = i / 4;
+
+            val_u = pu1_ngbr_pels[18 + i * 2];
+            val_v = pu1_ngbr_pels[18 + i * 2 + 1];
+            u4_dcval_u_t[col] += val_u;
+            u4_dcval_v_t[col] += val_v;
+
+            for (j = 0; j < 16; j++)
+            {
+                i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/
+            }
+            pu1_src_temp += src_strd;
+
+        }
+        u4_dcval_u_t[0] += 2;
+        u4_dcval_u_t[1] += 2;
+        u4_dcval_v_t[0] += 2;
+        u4_dcval_v_t[1] += 2;
+    }
+
+    /* computing DC value*/
+    /* Equation  8-128 in spec*/
+    u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
+    u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
+    u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
+    u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
+
+    if (top)
+    {
+        /* Equation  8-132 in spec*/
+        u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
+        u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
+    }
+    else
+    {
+        u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
+        u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
+    }
+
+    if (left)
+    {
+        u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
+        u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
+    }
+    else
+    {
+        u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
+        u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
+    }
+
+    if (!(left || top))
+    {
+        /*none available*/
+        u4_dc_val[0][0][0] = u4_dc_val[0][0][1] =
+        u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
+        u4_dc_val[1][0][0] = u4_dc_val[1][0][1] =
+        u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
+    }
+
+    /* Evaluating DC */
+    pu1_src_temp = pu1_src;
+    i4_sad_dc = 0;
+    for (i = 0; i < 8; i++)
+    {
+        for (j = 0; j < 8; j++)
+        {
+            col = j / 4;
+            row = i / 4;
+            val_u = u4_dc_val[row][col][0];
+            val_v = u4_dc_val[row][col][1];
+
+            i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/
+            i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
+        }
+        pu1_src_temp += src_strd;
+    }
+
+    if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/
+        i4_sad_dc = INT_MAX;
+    if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/
+        i4_sad_horz = INT_MAX;
+    if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/
+        i4_sad_vert = INT_MAX;
+
+    i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
+
+    /* Finding Minimum sad and doing corresponding prediction*/
+    if (i4_min_sad < *pu4_sadmin)
+    {
+        *pu4_sadmin = i4_min_sad;
+
+        if (i4_min_sad == i4_sad_dc)
+        {
+            *u4_intra_mode = DC_CH_I8x8;
+            for (i = 0; i < 8; i++)
+            {
+                for (j = 0; j < 8; j++)
+                {
+                    col = j / 4;
+                    row = i / 4;
+
+                    pu1_dst[2 * j] = u4_dc_val[row][col][0];
+                    pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
+                }
+                pu1_dst += dst_strd;
+            }
+        }
+        else if (i4_min_sad == i4_sad_horz)
+        {
+            *u4_intra_mode = HORZ_CH_I8x8;
+            for (j = 0; j < 8; j++)
+            {
+                val_v = pu1_ngbr_pels[15 - 2 * j];
+                val_u = pu1_ngbr_pels[15 - 2 * j - 1];
+
+                for (i = 0; i < 8; i++)
+                {
+                    pu1_dst[2 * i] = val_u;
+                    pu1_dst[2 * i + 1] = val_v;
+
+                }
+                pu1_dst += dst_strd;
+            }
+        }
+        else
+        {
+            *u4_intra_mode = VERT_CH_I8x8;
+            pu1_neighbour = pu1_ngbr_pels + 18;
+            for (j = 0; j < 8; j++)
+            {
+                memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
+                pu1_dst += dst_strd;
+            }
+        }
+    }
+
+    return;
+}
diff --git a/encoder/ih264e_intra_modes_eval.h b/encoder/ih264e_intra_modes_eval.h
new file mode 100755
index 0000000..c8402e5
--- /dev/null
+++ b/encoder/ih264e_intra_modes_eval.h
@@ -0,0 +1,418 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_intra_modes_eval.h
+*
+* @brief
+*  This file contains declarations of routines that perform rate distortion
+*  analysis on a macroblock if coded as intra.
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_INTRA_MODES_EVAL_H_
+#define IH264E_INTRA_MODES_EVAL_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+*  derivation process for macroblock availability
+*
+* @par   Description
+*  Calculates the availability of the left, top, topright and topleft macroblocks.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to proc context (handle)
+*
+* @remarks Based on section 6.4.5 in H264 spec
+*
+* @return  none
+*
+******************************************************************************
+*/
+void ih264e_derive_nghbr_avbl_of_mbs
+        (
+            process_ctxt_t *ps_proc_ctxt
+        );
+
+/**
+******************************************************************************
+*
+* @brief
+*  derivation process for subblock/partition availability
+*
+* @par   Description
+*  Calculates the availability of the left, top, topright and topleft subblock
+*  or partitions.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to macroblock context (handle)
+*
+* @param[in]    i1_pel_pos_x
+*  column position of the pel wrt the current block
+*
+* @param[in]    i1_pel_pos_y
+*  row position of the pel in wrt current block
+*
+* @remarks     Assumptions: before calling this function it is assumed that
+*   the neighbor availability of the current macroblock is already derived.
+*   Based on table 6-3 of H264 specification
+*
+* @return      availability status (yes or no)
+*
+******************************************************************************
+*/
+UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions
+        (
+            block_neighbors_t *s_ngbr_avbl,
+            WORD8 i1_pel_pos_x,
+            WORD8 i1_pel_pos_y
+        );
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 16x16 mode (rate distortion opt off)
+*
+* @par Description
+*  This function evaluates all the possible intra 16x16 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]   ps_proc_ctxt
+*  pointer to process context (handle)
+*
+* @remarks
+*  Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
+*  the SAD and cost are one and the same.
+*
+* @return     none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff
+        (
+            process_ctxt_t *ps_proc_ctxt
+        );
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 8x8 mode (rate distortion opt on)
+*
+* @par Description
+*  This function evaluates all the possible intra 8x8 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to proc ctxt
+*
+* @remarks Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: TODO: This function needs to be tested
+*
+*  @return      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff
+        (
+            process_ctxt_t *ps_proc_ctxt
+        );
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 4x4 mode (rate distortion opt on)
+*
+* @par Description
+*  This function evaluates all the possible intra 4x4 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to proc ctxt
+*
+* @remarks
+*  Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+*  24*lambda is added to the SAD before comparison with the best SAD for
+*  inter prediction. This is an empirical value to prevent using too many intra
+*  blocks.
+*
+* @return      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton
+        (
+            process_ctxt_t *ps_proc_ctxt
+        );
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 4x4 mode (rate distortion opt off)
+*
+* @par Description
+*  This function evaluates all the possible intra 4x4 modes and finds the mode
+*  that best represents the macro-block (least distortion) and occupies fewer
+*  bits in the bit-stream.
+*
+* @param[in]    ps_proc_ctxt
+*  pointer to proc ctxt
+*
+* @remarks
+*  Ideally the cost of encoding a macroblock is calculated as
+*  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+*  input block and the reconstructed block and rate is the number of bits taken
+*  to place the macroblock in the bit-stream. In this routine the rate does not
+*  exactly point to the total number of bits it takes, rather it points to header
+*  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+*  and residual bits fall in to texture bits the number of bits taken to encoding
+*  mbtype is considered as rate, we compute cost. Further we will approximate
+*  the distortion as the deviation b/w input and the predicted block as opposed
+*  to input and reconstructed block.
+*
+*  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+*  24*lambda is added to the SAD before comparison with the best SAD for
+*  inter prediction. This is an empirical value to prevent using too many intra
+*  blocks.
+*
+* @return      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff
+        (
+            process_ctxt_t *ps_proc_ctxt
+        );
+
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best chroma intra 8x8 mode (rate distortion opt off)
+*
+* @par Description
+*  This function evaluates all the possible chroma intra 8x8 modes and finds
+*  the mode that best represents the macroblock (least distortion) and occupies
+*  fewer bits in the bitstream.
+*
+* @param[in] ps_proc_ctxt
+*  pointer to macroblock context (handle)
+*
+* @remarks
+*  For chroma best intra pred mode is calculated based only on SAD
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff
+        (
+            process_ctxt_t *ps_proc_ctxt
+        );
+
+
+/**
+******************************************************************************
+*
+* @brief
+*  Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+*  prediction.
+*
+* @par Description
+*  This function evaluates first three 16x16 modes and compute corresponding sad
+*  and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+*  availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+*  Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+*  Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+*  Says what all modes are valid
+*
+* @returns      none
+*
+******************************************************************************
+*/
+typedef void ih264e_evaluate_intra_modes_ft(UWORD8 *pu1_src,
+                                            UWORD8 *pu1_ngbr_pels_i16,
+                                            UWORD8 *pu1_dst,
+                                            UWORD32 src_strd,
+                                            UWORD32 dst_strd,
+                                            WORD32 u4_n_avblty,
+                                            UWORD32 *u4_intra_mode,
+                                            WORD32 *pu4_sadmin,
+                                            UWORD32 u4_valid_intra_modes);
+
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes;
+
+/* assembly */
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_a9q;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_a9q;
+
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_av8;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_av8;
+
+/* x86 intrinsics */
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_ssse3;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_ssse3;
+
+/**
+******************************************************************************
+*
+* @brief
+*  Evaluate best intra 4x4 mode and perform prediction.
+*
+* @par Description
+*  This function evaluates  4x4 modes and compute corresponding sad
+*  and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+*  availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+*  Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+*  Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+*  Says what all modes are valid
+*
+* @param[in] u4_lambda
+*  Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+*  Predicted mode for cost computation
+*
+* @returns      none
+*
+******************************************************************************
+*/
+typedef void ih264e_evaluate_intra_4x4_modes_ft(UWORD8 *pu1_src,
+                                                UWORD8 *pu1_ngbr_pels,
+                                                UWORD8 *pu1_dst,
+                                                UWORD32 src_strd,
+                                                UWORD32 dst_strd,
+                                                WORD32 u4_n_avblty,
+                                                UWORD32 *u4_intra_mode,
+                                                WORD32 *pu4_sadmin,
+                                                UWORD32 u4_valid_intra_modes,
+                                                UWORD32  u4_lambda,
+                                                UWORD32 u4_predictd_mode);
+
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes;
+
+/* x86 intrinsics */
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_ssse3;
+
+/* assembly */
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_a9q;
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_av8;
+
+#endif /* IH264E_INTRA_MODES_EVAL_H_ */
diff --git a/encoder/ih264e_list.h b/encoder/ih264e_list.h
new file mode 100755
index 0000000..782c007
--- /dev/null
+++ b/encoder/ih264e_list.h
@@ -0,0 +1,42 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_list.h
+*
+* @brief
+*  The file contains declarations of functions for encoder queue management
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_LIST_H_
+#define IH264E_LIST_H_
+
+
+#endif /* IH264E_LIST_H_ */
diff --git a/encoder/ih264e_master.h b/encoder/ih264e_master.h
new file mode 100755
index 0000000..6c7505a
--- /dev/null
+++ b/encoder/ih264e_master.h
@@ -0,0 +1,132 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_master.h
+*
+* @brief
+*  Contains declarations of functions used by master thread
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_MASTER_H_
+#define IH264E_MASTER_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+*  This function joins all the spawned threads after successful completion of
+*  their tasks
+*
+* @par   Description
+*
+* @param[in] ps_codec
+*  pointer to codec context
+*
+* @returns  none
+*
+******************************************************************************
+*/
+void ih264e_join_threads(codec_t *ps_codec);
+
+/**
+******************************************************************************
+*
+* @brief This function puts the current thread to sleep for a duration
+*  of sleep_us
+*
+* @par Description
+*  ithread_yield() method causes the calling thread to yield execution to another
+*  thread that is ready to run on the current processor. The operating system
+*  selects the thread to yield to. ithread_usleep blocks the current thread for
+*  the specified number of milliseconds. In other words, yield just says,
+*  end my timeslice prematurely, look around for other threads to run. If there
+*  is nothing better than me, continue. Sleep says I don't want to run for x
+*  milliseconds. Even if no other thread wants to run, don't make me run.
+*
+* @param[in] sleep_us
+*  thread sleep duration
+*
+* @returns error_status
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_wait_for_thread(UWORD32 sleep_us);
+
+/**
+******************************************************************************
+*
+* @brief
+*  Encodes in synchronous api mode
+*
+* @par Description
+*  This routine processes input yuv, encodes it and outputs bitstream and recon
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns  Status
+*
+******************************************************************************
+*/
+WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op);
+
+/**
+*******************************************************************************
+*
+* @brief update encoder configuration parameters
+*
+* @par Description:
+*  updates encoder configuration parameters from the given config set.
+*  Initialize/reinitialize codec parameters according to new configurations.
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @param[in] ps_cfg
+*  Pointer to config param set
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec, cfg_params_t *ps_cfg);
+
+#endif /* IH264E_MASTER_H_ */
diff --git a/encoder/ih264e_mc.c b/encoder/ih264e_mc.c
new file mode 100755
index 0000000..2dd0974
--- /dev/null
+++ b/encoder/ih264e_mc.c
@@ -0,0 +1,320 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_mc.c
+*
+* @brief
+*  Contains definition of functions for motion compensation
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_motion_comp_luma()
+*  - ih264e_motion_comp_chroma()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_mc.h"
+#include "ih264e_half_pel.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+*  performs motion compensation for a luma mb for the given mv.
+*
+* @par Description
+*  This routine performs motion compensation of an inter mb. When the inter
+*  mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+*  to pred buffer. In this case the function returns pointer and stride of the
+*  ref. buffer and this info is used in place of pred buffer else where.
+*  In other cases, the pred buffer is populated via copy / filtering + copy
+*  (q pel cases) and returned.
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @param[out] pu1_pseudo_pred
+*  pseudo prediction buffer
+*
+* @param[out] u4_pseudo_pred_strd
+*  pseudo pred buffer stride
+*
+* @return  none
+*
+* @remarks Assumes half pel buffers for the entire frame are populated.
+*
+******************************************************************************
+*/
+void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
+                             UWORD8 **pu1_pseudo_pred,
+                             WORD32 *pi4_pseudo_pred_strd)
+{
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* me ctxt */
+    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+    /* Pointer to the structure having motion vectors, size and position of curr partitions */
+    enc_pu_t *ps_curr_pu;
+
+    /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
+    UWORD8 *pu1_ref[4];
+
+    /* pred buffer ptr */
+    UWORD8 *pu1_pred;
+
+    /* strides of full pel, half pel x, half pel y, half pel xy reference buffer */
+    WORD32 i4_ref_strd[4];
+
+    /* pred buffer stride */
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+    /* full pel motion vectors */
+    WORD32 u4_mv_x_full, u4_mv_y_full;
+
+    /* half pel motion vectors */
+    WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
+
+    /* quarter pel motion vectors */
+    WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
+
+    /* width & height of the partition */
+    UWORD32 wd, ht;
+
+    /* partition idx */
+    UWORD32 u4_num_prtn;
+
+    /* half / qpel coefficient */
+    UWORD32 u4_subpel_factor;
+
+    /* temp var */
+    UWORD32 u4_lkup_idx1;
+
+    /* Init */
+    i4_ref_strd[0] = ps_proc->i4_rec_strd;
+
+    i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_hp_buf_strd;
+
+    for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+    {
+        /* update ptr to curr partition */
+        ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
+
+
+        /* get full pel mv's (full pel units) */
+        u4_mv_x_full = ps_curr_pu->s_l0_mv.i2_mvx >> 2;
+        u4_mv_y_full = ps_curr_pu->s_l0_mv.i2_mvy >> 2;
+
+        /* get half pel mv's */
+        u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
+        u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+
+        /* get quarter pel mv's */
+        u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
+        u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+
+        /* width and height of partition */
+        wd = (ps_curr_pu->b4_wd + 1) << 2;
+        ht = (ps_curr_pu->b4_ht + 1) << 2;
+
+        /* decision ? qpel/hpel, fpel */
+        u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
+
+        /* update ref buffer ptrs */
+        pu1_ref[0] = ps_proc->pu1_ref_buf_luma + (u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full;
+
+        pu1_ref[1] =  ps_proc->pu1_best_subpel_buf;
+        i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
+
+
+        /* update pred buff ptr */
+        pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 4 * ps_curr_pu->b4_pos_x;
+
+        /*u4_lkup_idx1 will be non zero for half pel*/
+        u4_lkup_idx1 = (u4_subpel_factor >> 2 ) != 0 ;
+
+        {
+            /********************************************************************/
+            /* if the block is P16x16 MB and mv are not quarter pel motion      */
+            /* vectors, there is no need to copy 16x16 unit from reference frame*/
+            /* to pred buffer. We might as well send the reference frame buffer */
+            /* pointer as pred buffer (ofc with updated stride) to fwd transform*/
+            /* and inverse transform unit.                                      */
+            /********************************************************************/
+            if (ps_proc->u4_num_sub_partitions == 1)
+            {
+                *pu1_pseudo_pred = pu1_ref[u4_lkup_idx1];
+                *pi4_pseudo_pred_strd = i4_ref_strd[u4_lkup_idx1];
+
+            }
+            /*
+             * Copying half pel or full pel to prediction buffer
+             * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only support 16x16 in P mbs
+             */
+            else
+            {
+                ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred, i4_ref_strd[u4_lkup_idx1], i4_pred_strd, ht, wd, NULL, 0);
+            }
+
+        }
+    }
+}
+
+/**
+******************************************************************************
+*
+* @brief
+*  performs motion compensation for chroma mb
+*
+* @par   Description
+*  Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+*  according to the motion vectors given
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @return  none
+*
+* @remarks Assumes half pel and quarter pel buffers for the entire frame are
+*  populated.
+******************************************************************************
+*/
+void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc)
+{
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* Pointer to the structure having motion vectors, size and position of curr partitions */
+    enc_pu_t *ps_curr_pu;
+
+    /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
+    UWORD8 *pu1_ref;
+
+    /* pred buffer ptr */
+    UWORD8 *pu1_pred;
+
+    /* strides of full pel reference buffer */
+    WORD32 i4_ref_strd = ps_proc->i4_rec_strd;
+
+    /* pred buffer stride */
+    WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+    /* full pel motion vectors */
+    WORD32 u4_mv_x_full, u4_mv_y_full;
+
+    /* half pel motion vectors */
+    WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
+
+    /* quarter pel motion vectors */
+    WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
+
+    /* width & height of the partition */
+    UWORD32 wd, ht;
+
+    /* partition idx */
+    UWORD32 u4_num_prtn;
+
+    WORD32 u4_mv_x;
+    WORD32 u4_mv_y;
+    UWORD8 u1_dx, u1_dy;
+
+    for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+    {
+        ps_curr_pu =ps_proc->ps_pu + u4_num_prtn;
+
+        u4_mv_x = ps_curr_pu->s_l0_mv.i2_mvx >> 3;
+        u4_mv_y = ps_curr_pu->s_l0_mv.i2_mvy >> 3;
+
+        /*  corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed with dx, dy =4*/
+        u4_mv_x_full = (ps_curr_pu->s_l0_mv.i2_mvx & 0x4) >> 2;
+        u4_mv_y_full = (ps_curr_pu->s_l0_mv.i2_mvy & 0x4) >> 2;
+
+        /* get half pel mv's */
+        u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
+        u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+
+        /* get quarter pel mv's */
+        u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
+        u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+
+        /* width and height of sub macro block */
+        wd = (ps_curr_pu->b4_wd + 1) << 1;
+        ht = (ps_curr_pu->b4_ht + 1) << 1;
+
+        /* move the pointers so that they point to the motion compensated locations */
+        pu1_ref = ps_proc->pu1_ref_buf_chroma + (u4_mv_y * i4_ref_strd) + (u4_mv_x << 1);
+
+        pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 2 * ps_curr_pu->b4_pos_x;
+
+        u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
+        u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
+
+        ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd,
+                                   u1_dx, u1_dy, ht, wd);
+    }
+}
diff --git a/encoder/ih264e_mc.h b/encoder/ih264e_mc.h
new file mode 100755
index 0000000..965e1d1
--- /dev/null
+++ b/encoder/ih264e_mc.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_mc.h
+*
+* @brief
+*  This file contains declarations of routines that perform motion compensation
+*  of luma and chroma macroblocks.
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_MC_H_
+#define IH264E_MC_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+*  performs motion compensation for a luma mb for the given mv.
+*
+* @par Description
+*  This routine performs motion compensation of an inter mb. When the inter
+*  mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+*  to pred buffer. In this case the function returns pointer and stride of the
+*  ref. buffer and this info is used in place of pred buffer else where.
+*  In other cases, the pred buffer is populated via copy / filtering + copy
+*  (q pel cases) and returned.
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @param[out] pu1_pseudo_pred
+*  pseudo prediction buffer
+*
+* @param[out] u4_pseudo_pred_strd
+*  pseudo pred buffer stride
+*
+* @return  none
+*
+* @remarks Assumes half pel buffers for the entire frame are populated.
+*
+******************************************************************************
+*/
+void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
+                             UWORD8 **pu1_pseudo_pred,
+                             WORD32 *pi4_pseudo_pred_strd);
+
+/**
+******************************************************************************
+*
+* @brief
+*  performs motion compensation for chroma mb
+*
+* @par   Description
+*  Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+*  according to the motion vectors given
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @return  none
+*
+* @remarks Assumes half pel and quarter pel buffers for the entire frame are
+*  populated.
+******************************************************************************
+*/
+void ih264e_motion_comp_chroma
+        (
+            process_ctxt_t *ps_proc
+        );
+
+
+#endif // IH264E_MC_H_
diff --git a/encoder/ih264e_me.c b/encoder/ih264e_me.c
new file mode 100755
index 0000000..9e8d7a3
--- /dev/null
+++ b/encoder/ih264e_me.c
@@ -0,0 +1,1153 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_me.c
+ *
+ * @brief
+ *  Contains definition of functions for motion estimation
+ *
+ * @author
+ *  ittiam
+ *
+ * @par List of Functions:
+ *  - ih264e_init_mv_bits()
+ *  - ih264e_skip_analysis_chroma()
+ *  - ih264e_skip_analysis_luma()
+ *  - ih264e_analyse_skip()
+ *  - ih264e_get_search_candidates()
+ *  - ih264e_find_skip_motion_vector()
+ *  - ih264e_get_mv_predictor()
+ *  - ih264e_mv_pred()
+ *  - ih264e_mv_pred_me()
+ *  - ih264e_init_me()
+ *  - ih264e_compute_me()
+ *  - ih264e_compute_me_nmb()
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_defs.h"
+#include "ime_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_globals.h"
+#include "ih264_macros.h"
+#include "ih264e_me.h"
+#include "ime.h"
+#include "ime_distortion_metrics.h"
+#include "ih264_debug.h"
+#include "ithread.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_core_coding.h"
+#include "ih264e_mc.h"
+#include "ih264e_debug.h"
+#include "ih264e_half_pel.h"
+#include "ime_statistics.h"
+#include "ih264e_platform_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function populates the length of the codewords for motion vectors in the
+*  range (-search range, search range) in pixels
+*
+* @param[in] ps_me
+*  Pointer to me ctxt
+*
+* @param[out] pu1_mv_bits
+*  length of the codeword for all mv's
+*
+* @remarks The length of the code words are derived from signed exponential
+* goloumb codes.
+*
+*******************************************************************************
+*/
+void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
+{
+    /* temp var */
+    WORD32 i, codesize = 3, diff, limit;
+    UWORD32 u4_code_num, u4_range;
+    UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
+
+    /* max srch range */
+    diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
+    /* sub pel */
+    diff <<= 2;
+    /* delta mv */
+    diff <<= 1;
+
+    /* codeNum for positive integer     =  2x-1     : Table9-3  */
+    u4_code_num = (diff << 1);
+
+    /* get range of the bit string and put using put_bits()                 */
+    GETRANGE(u4_range, u4_code_num);
+
+    limit = 2*u4_range - 1;
+
+    /* init mv bits */
+    ps_me_ctxt->pu1_mv_bits[0] = 1;
+
+    while (codesize < limit)
+    {
+        u4_uev_min = (1 << (codesize >> 1));
+        u4_uev_max = 2*u4_uev_min - 1;
+
+        u4_sev_min = u4_uev_min >> 1;
+        u4_sev_max = u4_uev_max >> 1;
+
+        DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
+
+        for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
+        {
+            ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
+        }
+
+        codesize += 2;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determines the valid candidates for which the initial search shall happen.
+* The best of these candidates is used to center the diamond pixel search.
+*
+* @par Description: The function sends the skip, (0,0), left, top and top-right
+* neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
+* these are the same MVs that are used to form the MV predictor. This initial MV
+* search candidates need not take care of slice boundaries and hence neighbor
+* availability checks are not made here.
+*
+* @param[in] ps_left_mb_pu
+*  pointer to left mb motion vector info
+*
+* @param[in] ps_top_mb_pu
+*  pointer to top & top right mb motion vector info
+*
+* @param[in] ps_top_left_mb_pu
+*  pointer to top left mb motion vector info
+*
+* @param[out] ps_skip_mv
+*  pointer to skip motion vectors for the curr mb
+*
+* @param[in] i4_mb_x
+*  mb index x
+*
+* @param[in] i4_mb_y
+*  mb index y
+*
+* @param[in] i4_wd_mbs
+*  pic width in mbs
+*
+* @param[in] ps_motionEst
+*  pointer to me context
+*
+* @returns  The list of MVs to be used of priming the full pel search and the
+* number of such MVs
+*
+* @remarks
+*   Assumptions : 1. Assumes Single reference frame
+*                 2. Assumes Only partition of size 16x16
+*
+*******************************************************************************
+*/
+static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
+                                         me_ctxt_t *ps_me_ctxt)
+{
+    /* curr mb indices */
+    WORD32 i4_mb_x = ps_proc->i4_mb_x;
+
+    /* left mb motion vector */
+    mv_t *ps_left_mv;
+
+    /* top left mb motion vector */
+    mv_t *ps_top_mv;
+
+    /* top left mb motion vector */
+    mv_t *ps_top_left_mv;
+
+    /* top left mb motion vector */
+    mv_t *ps_top_right_mv;
+
+    /* skip mv */
+    mv_t *ps_skip_mv = ps_proc->ps_skip_mv;
+
+    /* mb part info */
+    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+    /* num of candidate search candidates */
+    UWORD32 u4_num_candidates = 0;
+
+    /* mvs */
+    WORD32 mvx, mvy;
+
+    /* ngbr availability */
+    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+    /* srch range*/
+    WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
+    WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
+    WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
+    WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
+
+    ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_l0_mv;
+    ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_l0_mv;
+    ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_l0_mv;
+    ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_l0_mv;
+
+    /************************************************************/
+    /* Taking the Zero motion vector as one of the candidates   */
+    /************************************************************/
+    ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = 0;
+    ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = 0;
+
+    u4_num_candidates++;
+
+    /************************************************************/
+    /* Taking the Left MV Predictor as one of the candidates    */
+    /************************************************************/
+    if (ps_ngbr_avbl->u1_mb_a)
+    {
+        mvx      = (ps_left_mv->i2_mvx + 2) >> 2;
+        mvy      = (ps_left_mv->i2_mvy + 2) >> 2;
+
+        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+        ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+        ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+        u4_num_candidates ++;
+    }
+    /*else
+    {
+        ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvx = 0;
+        ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvy = 0;
+    }*/
+
+    /************************************************************/
+    /* Taking the Top MV Predictor as one of the candidates     */
+    /************************************************************/
+    if (ps_ngbr_avbl->u1_mb_b)
+    {
+        mvx      = (ps_top_mv->i2_mvx + 2) >> 2;
+        mvy      = (ps_top_mv->i2_mvy + 2) >> 2;
+
+        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+        ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+        ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+        u4_num_candidates ++;
+
+        /************************************************************/
+        /* Taking the TopRt MV Predictor as one of the candidates   */
+        /************************************************************/
+        if (ps_ngbr_avbl->u1_mb_c)
+        {
+            mvx      = (ps_top_right_mv->i2_mvx + 2) >> 2;
+            mvy      = (ps_top_right_mv->i2_mvy + 2)>> 2;
+
+            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+            ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+            ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+            u4_num_candidates ++;
+        }
+        /************************************************************/
+        /* Taking the TopLt MV Predictor as one of the candidates   */
+        /************************************************************/
+        else if (ps_ngbr_avbl->u1_mb_d)
+        {
+            mvx      = (ps_top_left_mv->i2_mvx + 2) >> 2;
+            mvy      = (ps_top_left_mv->i2_mvy + 2) >> 2;
+
+            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+            ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+            ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+            u4_num_candidates ++;
+        }
+        /*else
+        {
+            ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0;
+            ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0;
+        }*/
+    }
+    /*else
+    {
+        ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvx = 0;
+        ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvy = 0;
+
+        ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0;
+        ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0;
+    }*/
+
+
+    /********************************************************************/
+    /*                            MV Prediction                         */
+    /********************************************************************/
+    ih264e_mv_pred_me(ps_proc);
+
+    ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv->i2_mvx;
+    ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv->i2_mvy;
+
+    /************************************************************/
+    /* Get the skip motion vector                               */
+    /************************************************************/
+    ih264e_find_skip_motion_vector(ps_proc, 1);
+
+    /************************************************************/
+    /* Taking the Skip motion vector as one of the candidates   */
+    /************************************************************/
+    mvx = (ps_skip_mv->i2_mvx + 2) >> 2;
+    mvy = (ps_skip_mv->i2_mvy + 2) >> 2;
+
+    mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+    mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+    ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+    ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+    u4_num_candidates++;
+
+    ASSERT(u4_num_candidates <= 5);
+
+    ps_me_ctxt->u4_num_candidates = u4_num_candidates;
+}
+
+/**
+*******************************************************************************
+*
+* @brief The function gives the skip motion vector
+*
+* @par Description:
+*  The function gives the skip motion vector
+*
+* @param[in] ps_left_mb_pu
+*  pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+*  pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+*  pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+*   specification.
+*
+*******************************************************************************
+*/
+void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me)
+{
+    /* left mb motion vector */
+    enc_pu_t *ps_left_mb_pu ;
+
+    /* top mb motion vector */
+    enc_pu_t *ps_top_mb_pu ;
+
+    /* skip mv */
+    mv_t *ps_skip_mv = ps_proc->ps_skip_mv;
+
+    if (u4_for_me == 1)
+    {
+        ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+        ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
+    }
+    else
+    {
+        ps_left_mb_pu = &ps_proc->s_left_mb_pu ;
+        ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
+    }
+
+    if (  (!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
+          (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
+          ((ps_left_mb_pu->i1_l0_ref_idx | ps_left_mb_pu->s_l0_mv.i2_mvx | ps_left_mb_pu->s_l0_mv.i2_mvy) == 0) ||
+          ((ps_top_mb_pu->i1_l0_ref_idx | ps_top_mb_pu->s_l0_mv.i2_mvx | ps_top_mb_pu->s_l0_mv.i2_mvy) == 0) )
+    {
+        ps_skip_mv->i2_mvx = 0;
+        ps_skip_mv->i2_mvy = 0;
+    }
+    else
+    {
+        ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv->i2_mvx;
+        ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv->i2_mvy;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief motion vector predictor
+*
+* @par Description:
+*  The routine calculates the motion vector predictor for a given block,
+*  given the candidate MV predictors.
+*
+* @param[in] ps_left_mb_pu
+*  pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+*  pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+*  pointer to candidate predictors for the current block
+*
+* @returns  The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
+*   specification.
+*   Assumptions : 1. Assumes Single reference frame
+*                 2. Assumes Only partition of size 16x16
+*
+*******************************************************************************
+*/
+void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
+                             enc_pu_t *ps_top_row_pu,
+                             mv_t *ps_pred_mv)
+{
+    /* curr frame ref idx */
+    /* we are assuming that we are operating on single reference frame
+     * hence the ref idx is insignificant during mv prediction.
+     */
+    WORD32 u4_ref_idx = 0;
+
+    /* temp var */
+    WORD32 pred_algo = 3, a, b, c;
+
+    /* If only one of the candidate blocks has a reference frame equal to
+     * the current block then use the same block as the final predictor */
+    a = (ps_left_mb_pu->i1_l0_ref_idx == u4_ref_idx)? 0:-1;
+    b = (ps_top_row_pu[0].i1_l0_ref_idx == u4_ref_idx)? 0:-1;
+    c = (ps_top_row_pu[1].i1_l0_ref_idx == u4_ref_idx)? 0:-1;
+
+    if (a == 0 && b == -1 && c == -1)
+        pred_algo = 0; /* LEFT */
+    else if (a == -1 && b == 0 && c == -1)
+        pred_algo = 1; /* TOP */
+    else if (a == -1 && b == -1 && c == 0)
+        pred_algo = 2; /* TOP RIGHT */
+
+    switch (pred_algo)
+    {
+        case 0:
+            /* left */
+            ps_pred_mv->i2_mvx = ps_left_mb_pu->s_l0_mv.i2_mvx;
+            ps_pred_mv->i2_mvy = ps_left_mb_pu->s_l0_mv.i2_mvy;
+            break;
+        case 1:
+            /* top */
+            ps_pred_mv->i2_mvx = ps_top_row_pu[0].s_l0_mv.i2_mvx;
+            ps_pred_mv->i2_mvy = ps_top_row_pu[0].s_l0_mv.i2_mvy;
+            break;
+        case 2:
+            /* top right */
+            ps_pred_mv->i2_mvx = ps_top_row_pu[1].s_l0_mv.i2_mvx;
+            ps_pred_mv->i2_mvy = ps_top_row_pu[1].s_l0_mv.i2_mvy;
+            break;
+        case 3:
+            /* median */
+            MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvx,
+                   ps_top_row_pu[0].s_l0_mv.i2_mvx,
+                   ps_top_row_pu[1].s_l0_mv.i2_mvx,
+                   ps_pred_mv->i2_mvx);
+            MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvy,
+                   ps_top_row_pu[0].s_l0_mv.i2_mvy,
+                   ps_top_row_pu[1].s_l0_mv.i2_mvy,
+                   ps_pred_mv->i2_mvy);
+
+            break;
+        default:
+            break;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs MV prediction
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  none
+*
+* @remarks none
+*  This function will update the MB availability since intra inter decision
+*  should be done before the call
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred(process_ctxt_t *ps_proc)
+{
+
+    /* left mb motion vector */
+    enc_pu_t *ps_left_mb_pu ;
+
+    /* top left mb motion vector */
+    enc_pu_t *ps_top_left_mb_pu ;
+
+    /* top row motion vector info */
+    enc_pu_t *ps_top_row_pu;
+
+    /* predicted motion vector */
+    mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
+
+    /* zero mv */
+    mv_t zero_mv = {0, 0};
+
+    /*  mb neighbor availability */
+    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+    /* mb syntax elements of neighbors */
+    mb_info_t   *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+    mb_info_t   *ps_top_left_syn;
+    UWORD32     u4_left_is_intra;
+
+    ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
+    u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
+    ps_left_mb_pu = &ps_proc->s_left_mb_pu;
+    ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
+    ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
+
+    /* Before performing mv prediction prepare the ngbr information and
+     * reset motion vectors basing on their availability */
+    if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1) )
+    {
+        /* left mv */
+        ps_left_mb_pu->i1_l0_ref_idx = -1;
+        ps_left_mb_pu->s_l0_mv = zero_mv;
+    }
+    if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra)
+    {
+        /* top mv */
+        ps_top_row_pu[0].i1_l0_ref_idx = -1;
+        ps_top_row_pu[0].s_l0_mv = zero_mv;
+    }
+    if (!ps_ngbr_avbl->u1_mb_c)
+    {
+        /* top right mv - When top right partition is not available for
+         * prediction if top left is available use it for prediction else
+         * set the mv information to -1 and (0, 0)
+         * */
+        if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra)
+        {
+            ps_top_row_pu[1].i1_l0_ref_idx = -1;
+            ps_top_row_pu[1].s_l0_mv = zero_mv;
+        }
+        else
+        {
+            ps_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx;
+            ps_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv;
+        }
+    }
+    else if (ps_top_syn[1].u2_is_intra)
+    {
+        ps_top_row_pu[1].i1_l0_ref_idx = -1;
+        ps_top_row_pu[1].s_l0_mv = zero_mv;
+    }
+
+    ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, ps_pred_mv);
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function approximates Pred. MV
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  none
+*
+* @remarks none
+*  Motion estimation happens at nmb level. For cost calculations, mv is appro
+*  ximated using this function
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred_me(process_ctxt_t *ps_proc)
+{
+    /* left mb motion vector */
+    enc_pu_t *ps_left_mb_pu ;
+
+    /* top left mb motion vector */
+    enc_pu_t *ps_top_left_mb_pu ;
+
+    /* top row motion vector info */
+    enc_pu_t *ps_top_row_pu;
+
+    enc_pu_t s_top_row_pu[2];
+
+    /* predicted motion vector */
+    mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
+
+    /* zero mv */
+    mv_t zero_mv = {0, 0};
+
+    /*  mb neighbor availability */
+    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+    ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+    ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
+    ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
+
+    s_top_row_pu[0] = ps_top_row_pu[0];
+    s_top_row_pu[1] = ps_top_row_pu[1];
+
+    /* Before performing mv prediction prepare the ngbr information and
+     * reset motion vectors basing on their availability */
+    if (!ps_ngbr_avbl->u1_mb_a  )
+    {
+        /* left mv */
+        ps_left_mb_pu->i1_l0_ref_idx = -1;
+        ps_left_mb_pu->s_l0_mv = zero_mv;
+    }
+    if (!ps_ngbr_avbl->u1_mb_b )
+    {
+        /* top mv */
+        s_top_row_pu[0].i1_l0_ref_idx = -1;
+        s_top_row_pu[0].s_l0_mv = zero_mv;
+    }
+    if (!ps_ngbr_avbl->u1_mb_c)
+    {
+        /* top right mv - When top right partition is not available for
+         * prediction if top left is available use it for prediction else
+         * set the mv information to -1 and (0, 0)
+         * */
+        if (!ps_ngbr_avbl->u1_mb_d)
+        {
+            s_top_row_pu[1].i1_l0_ref_idx = -1;
+            s_top_row_pu[1].s_l0_mv = zero_mv;
+        }
+        else
+        {
+            s_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx;
+            s_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv;
+        }
+    }
+
+    ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]), ps_pred_mv);
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function initializes me ctxt
+*
+* @par Description:
+*  Before dispatching the current job to me thread, the me context associated
+*  with the job is initialized.
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_me(process_ctxt_t *ps_proc)
+{
+    /* me ctxt */
+    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+    /* src ptr */
+    ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
+
+    /* ref ptr */
+    ps_me_ctxt->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma;
+
+    /* lagrange param */
+    ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current mb
+*
+* @par Description:
+*  The current mb is compared with a list of mb's in the reference frame for
+*  least cost. The mb that offers least cost is chosen as predicted mb and the
+*  displacement of the predicted mb from index location of the current mb is
+*  signaled as mv. The list of the mb's that are chosen in the reference frame
+*  are dependent on the speed of the ME configured.
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  motion vector of the pred mb, sad, cost.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me(process_ctxt_t *ps_proc)
+{
+    /* me ctxt */
+    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+//    /* mb syntax elements of neighbors */
+//    mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+//    mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
+
+    /* mb part info */
+    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+    mb_part_ctxt skip_mb_part_info;
+
+    /* temp var */
+    WORD32 rows_above, rows_below, columns_left, columns_right,u4_use_stat_sad;
+
+    /* Motion vectors in full-pel units */
+    WORD16 mv_x, mv_y;
+
+    /* recon stride */
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+    /* source buffer for halp pel generation functions */
+    UWORD8 *pu1_hpel_src;
+
+    /* quantization parameters */
+    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+    /* Sad therholds */
+    ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
+
+    /*Best half pel buffer*/
+    UWORD8 *pu1_best_subpel_buf = ps_proc->pu1_best_subpel_buf;
+    UWORD32 u4_bst_spel_strd = ps_proc->u4_bst_spel_buf_strd;
+
+    /* During evaluation for motion vectors do not search through padded regions */
+    /* Obtain number of rows and columns that are effective for computing for me evaluation */
+    rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
+    rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
+    columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
+    columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
+
+    /* init srch range */
+    /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
+     * on all sides.
+     */
+//    ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, ps_me_ctxt->ai2_srch_boundaries[0]);
+//    ps_me_ctxt->i4_srch_range_e = MIN(columns_right, ps_me_ctxt->ai2_srch_boundaries[0]);
+//    ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, ps_me_ctxt->ai2_srch_boundaries[1]);
+//    ps_me_ctxt->i4_srch_range_s = MIN(rows_below, ps_me_ctxt->ai2_srch_boundaries[1]);
+
+    ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+    ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+    ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+    ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+
+    /* this is to facilitate fast sub pel computation with minimal loads */
+    if (ps_me_ctxt->u4_enable_hpel)
+    {
+        ps_me_ctxt->i4_srch_range_w += 1;
+        ps_me_ctxt->i4_srch_range_e -= 1;
+        ps_me_ctxt->i4_srch_range_n += 1;
+        ps_me_ctxt->i4_srch_range_s -= 1;
+    }
+
+    /*Initialize the min sad option*/
+    ps_me_ctxt->u4_min_sad_reached  = 0;    /*Not yet found min sad*/
+    ps_me_ctxt->i4_min_sad          = ps_proc->ps_cur_mb->u4_min_sad;
+
+    /************************************************************/
+    /* Get the seed motion vector candidates                    */
+    /************************************************************/
+    ih264e_get_search_candidates(ps_proc, ps_me_ctxt);
+
+    /************************************************************/
+    /* Init the MB part ctxt structure                          */
+    /************************************************************/
+    ps_mb_part->s_mv_curr.i2_mvx = 0;
+    ps_mb_part->s_mv_curr.i2_mvy = 0;
+    ps_mb_part->i4_mb_cost = INT_MAX;
+    ps_mb_part->i4_mb_distortion = INT_MAX;
+
+    /* With NMB changes this logic will not work as we cannot exit NME in between*/
+    /********************************************************************/
+    /*                  Analyse skip                                    */
+    /********************************************************************/
+//    if (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 0
+//                    && u4_frame_level_me == 0)
+//    {
+//        if ( (ps_proc->ps_ngbr_avbl->u1_mb_a && (ps_me_ctxt->u4_left_is_skip == 1)) ||
+//                        (ps_proc->ps_ngbr_avbl->u1_mb_b && ps_top_syn->u2_mb_type == PSKIP) ||
+//                        (ps_proc->ps_ngbr_avbl->u1_mb_d && ps_top_left_syn->u2_mb_type == PSKIP) )
+//        {
+//            if ( 0 == ih264e_analyse_skip(ps_proc, ps_me_ctxt) )
+//            {
+//                return;
+//            }
+//        }
+//    }
+
+    /********************************************************************/
+    /*                  compute skip cost                               */
+    /********************************************************************/
+    /* See if we need to use modified sad */
+    u4_use_stat_sad = (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 1);
+
+    /* init the cost of skip MB */
+    skip_mb_part_info.i4_mb_cost = INT_MAX;
+    ime_compute_skip_cost(ps_me_ctxt, ps_proc->ps_skip_mv, &skip_mb_part_info, u4_use_stat_sad);
+
+
+    if (ps_me_ctxt->u4_min_sad_reached == 0)
+    {
+        /************************************************************/
+        /* Evaluate search candidates for initial mv pt.            */
+        /************************************************************/
+        ime_evaluate_init_srchposn_16x16(ps_me_ctxt);
+
+        /********************************************************************/
+        /*                  full pel motion estimation                      */
+        /********************************************************************/
+        ime_full_pel_motion_estimation_16x16(ps_me_ctxt);
+
+        DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
+                               (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
+
+        DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
+        /********************************************************************/
+        /*                   sub pel motion estimation                      */
+        /********************************************************************/
+        if (ps_me_ctxt->u4_enable_hpel)
+        {
+            /* motion vectors in terms of full pel values */
+            mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
+            mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
+
+            /* moving src pointer to the converged motion vector location*/
+            pu1_hpel_src = ps_me_ctxt->pu1_ref_buf_luma + mv_x + (mv_y * i4_rec_strd);
+
+            ps_me_ctxt->pu1_half_x = ps_proc->pu1_half_x;
+            ps_me_ctxt->pu1_half_y = ps_proc->pu1_half_y;
+            ps_me_ctxt->pu1_half_xy = ps_proc->pu1_half_xy;
+            ps_me_ctxt->u4_hp_buf_strd = HP_BUFF_WD;
+
+            /* half  pel search is done for both sides of full pel,
+             * hence half_x of width x height = 17x16 is created
+             * starting from left half_x of converged full pel */
+            pu1_hpel_src -= 1;
+
+            /* computing half_x */
+            ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
+                                                  ps_proc->pu1_half_x,
+                                                  i4_rec_strd,
+                                                  ps_me_ctxt->u4_hp_buf_strd);
+
+            /*
+             * Halfpel search is done for both sides of full pel,
+             * hence half_y of width x height = 16x17 is created
+             * starting from top half_y of converged full pel
+             * for half_xy top_left is required
+             * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
+             */
+
+            pu1_hpel_src -= i4_rec_strd;
+
+            /* computing half_y , and half_xy*/
+            ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
+                            pu1_hpel_src, ps_proc->pu1_half_y,
+                            ps_proc->pu1_half_xy, i4_rec_strd,
+                            ps_me_ctxt->u4_hp_buf_strd, ps_proc->ai16_pred1 + 3,
+                            ps_me_ctxt->u4_hp_buf_strd);
+
+            ime_sub_pel_motion_estimation_16x16(ps_me_ctxt);
+        }
+    }
+
+    {
+
+        /* if skip gives a better cost than other search, copy the cost accordingly*/
+        if (skip_mb_part_info.i4_mb_cost < ps_mb_part->i4_mb_cost)
+        {
+            ps_mb_part->i4_mb_cost = skip_mb_part_info.i4_mb_cost;
+            ps_mb_part->i4_mb_distortion = skip_mb_part_info.i4_mb_distortion;
+            ps_mb_part->s_mv_curr.i2_mvx = skip_mb_part_info.s_mv_curr.i2_mvx;
+            ps_mb_part->s_mv_curr.i2_mvy = skip_mb_part_info.s_mv_curr.i2_mvy;
+        }
+        else
+        {
+            /*
+             * If the current MB has a sub pel component,
+             * we need to copy that to the best subpel buffer
+             */
+            if (ps_me_ctxt->u4_enable_hpel && ps_mb_part->pu1_best_hpel_buf)
+            {
+                ps_codec->pf_inter_pred_luma_copy(ps_mb_part->pu1_best_hpel_buf,
+                                                  pu1_best_subpel_buf,
+                                                  ps_me_ctxt->u4_hp_buf_strd,
+                                                  u4_bst_spel_strd, MB_SIZE,
+                                                  MB_SIZE, NULL, 0);
+            }
+        }
+    }
+
+    DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 0);
+
+    /* update the type of the mb if necessary */
+    if (ps_me_ctxt->s_mb_part.i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
+    {
+        /* mb cost */
+        ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->s_mb_part.i4_mb_cost;
+
+        /* mb distortion */
+        ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->s_mb_part.i4_mb_distortion;
+
+        /* mb type */
+        ps_proc->ps_cur_mb->u4_mb_type  = P16x16;
+    }
+
+    /* number of partitions */
+    ps_proc->u4_num_sub_partitions = 1;
+    *(ps_proc->pu4_mb_pu_cnt) = 1;
+
+    /* position in-terms of PU */
+    ps_proc->ps_pu->b4_pos_x = 0;
+    ps_proc->ps_pu->b4_pos_y = 0;
+
+    /* PU size */
+    ps_proc->ps_pu->b4_wd = 3;
+    ps_proc->ps_pu->b4_ht = 3;
+
+    /* ref idx */
+    ps_proc->ps_pu->i1_l0_ref_idx = 0;
+
+    /* motion vector L0 */
+    ps_proc->ps_pu->s_l0_mv.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx;
+    ps_proc->ps_pu->s_l0_mv.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy;
+
+    /* Update min sad conditions */
+    if (ps_me_ctxt->u4_min_sad_reached == 1)
+    {
+        ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
+        ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current NMB
+*
+* @par Description:
+* Intializes input and output pointers required by the function ih264e_compute_me
+* and calls the function ih264e_compute_me in a loop to process NMBs.
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
+{
+    /* pic pu */
+    enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
+
+    /* ME map */
+    UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
+
+    /* temp var */
+    UWORD32 u4_i;
+
+    ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
+    ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
+
+    for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
+    {
+        /* Wait for ME map */
+        if (ps_proc->i4_mb_y > 0)
+        {
+            /* Wait for top right ME to be done */
+            UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
+
+            while (1)
+            {
+                volatile UWORD8 *pu1_buf;
+                WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
+
+                idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
+                pu1_buf =  pu1_me_map_tp_rw + idx;
+                if(*pu1_buf)
+                    break;
+                ithread_yield();
+            }
+        }
+
+        ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].s_skip_mv);
+        ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
+        ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].s_pred_mv);
+
+        ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
+
+        ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
+        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
+
+        ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
+        ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
+
+        /* Set the best subpel buf to the correct mb so that the buffer can be copied */
+        ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
+        ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
+
+        /* Set the min sad conditions */
+        ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
+        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
+
+        /* Derive neighbor availability for the current macroblock */
+        ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
+
+        /* init me */
+        ih264e_init_me(ps_proc);
+
+        ih264e_compute_me(ps_proc);
+
+        /* update top and left structs */
+        {
+            mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+            mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
+            enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+            enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
+            enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
+
+            *ps_top_left_syn = *ps_top_syn;
+
+            *ps_top_left_mb_pu = *ps_top_mv;
+            *ps_left_mb_pu = *ps_proc->ps_pu;
+        }
+
+        ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
+
+        /* Copy the min sad reached info */
+        ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
+        ps_proc->ps_nmb_info[u4_i].u4_min_sad   = ps_proc->ps_cur_mb->u4_min_sad;
+
+        /*
+         * To make sure that the MV map is properly sync to the
+         * cache we need to do a DDB
+         */
+        {
+            DATA_SYNC();
+
+            pu1_me_map[ps_proc->i4_mb_x] = 1;
+        }
+        ps_proc->i4_mb_x++;
+
+        ps_proc->s_me_ctxt.u4_left_is_intra = 0;
+        ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type  == PSKIP);
+
+        /* update buffers pointers */
+        ps_proc->pu1_src_buf_luma += MB_SIZE;
+        ps_proc->pu1_rec_buf_luma += MB_SIZE;
+        ps_proc->pu1_ref_buf_luma += MB_SIZE;
+
+        /*
+         * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+         * the stride per MB is MB_SIZE
+         */
+        ps_proc->pu1_src_buf_chroma += MB_SIZE;
+        ps_proc->pu1_rec_buf_chroma += MB_SIZE;
+        ps_proc->pu1_ref_buf_chroma += MB_SIZE;
+
+        ps_proc->pu4_mb_pu_cnt += 1;
+    }
+
+
+    ps_proc->ps_pu = ps_pu_begin;
+    ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
+
+    /* update buffers pointers */
+    ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
+    ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
+    ps_proc->pu1_ref_buf_luma -= MB_SIZE * u4_nmb_count;
+
+    /*
+     * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+     * the stride per MB is MB_SIZE
+     */
+    ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
+    ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
+    ps_proc->pu1_ref_buf_chroma -= MB_SIZE * u4_nmb_count;
+
+    ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
+}
diff --git a/encoder/ih264e_me.h b/encoder/ih264e_me.h
new file mode 100755
index 0000000..c4834a1
--- /dev/null
+++ b/encoder/ih264e_me.h
@@ -0,0 +1,278 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_me.h
+ *
+ * @brief
+ *  Contains declarations of global variables for H264 encoder
+ *
+ * @author
+ *  ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264E_ME_H_
+#define IH264E_ME_H_
+
+/*****************************************************************************/
+/* Function Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief      compute median of 3 elements (a, b, c) and store the output
+ *  in to result. This is used for mv prediction
+******************************************************************************
+ */
+
+#define MEDIAN(a, b, c, result) if (a > b){\
+                                    if (b > c)\
+                                        result = b;\
+                                    else {\
+                                        if (a > c)\
+                                            result = c;\
+                                        else \
+                                            result = a;\
+                                    }\
+                                }\
+                                else {\
+                                    if (c > b)\
+                                        result = b;\
+                                    else {\
+                                        if (c > a)\
+                                            result = c;\
+                                        else \
+                                            result = a;\
+                                    }\
+                                }
+
+
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function populates the length of the codewords for motion vectors in the
+*  range (-search range, search range) in pixels
+*
+* @param[in] ps_me
+*  Pointer to me ctxt
+*
+* @param[out] pu1_mv_bits
+*  length of the codeword for all mv's
+*
+* @remarks The length of the code words are derived from signed exponential
+* goloumb codes.
+*
+*******************************************************************************
+*/
+void ih264e_init_mv_bits
+    (
+        me_ctxt_t *ps_me
+    );
+
+/**
+*******************************************************************************
+*
+* @brief The function gives the skip motion vector
+*
+* @par Description:
+*  The function gives the skip motion vector
+*
+* @param[in] ps_left_mb_pu
+*  pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+*  pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+*  pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+*   specification.
+*
+*******************************************************************************
+*/
+void ih264e_find_skip_motion_vector
+    (
+        process_ctxt_t *ps_proc,
+        UWORD32 u4_for_me
+    );
+
+/**
+*******************************************************************************
+*
+* @brief motion vector predictor
+*
+* @par Description:
+*  The routine calculates the motion vector predictor for a given block,
+*  given the candidate MV predictors.
+*
+* @param[in] ps_left_mb_pu
+*  pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+*  pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+*  pointer to candidate predictors for the current block
+*
+* @returns  The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
+*   specification.
+*   Assumptions : 1. Assumes Single reference frame
+*                 2. Assumes Only partition of size 16x16
+*
+*******************************************************************************
+*/
+void ih264e_get_mv_predictor
+        (
+            enc_pu_t *ps_left_mb_pu,
+            enc_pu_t *ps_top_row_pu,
+            mv_t *ps_pred_mv
+        );
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector for the current mb
+*
+* @par Description:
+*  This function currently does nothing except set motion vectors from external
+*  source
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me
+    (
+        process_ctxt_t *ps_proc
+    );
+
+/**
+*******************************************************************************
+*
+* @brief This function initializes me ctxt
+*
+* @par Description:
+*  Before dispatching the current job to me thread, the me context associated
+*  with the job is initialized.
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_me(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current NMB
+*
+* @par Description:
+*  Intializes input and output pointers required by the function ih264e_compute_me
+*  and calls the function ih264e_compute_me in a loop to process NMBs.
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me_nmb
+    (
+        process_ctxt_t *ps_proc,
+        UWORD32 u4_nmb_count
+    );
+
+/**
+*******************************************************************************
+*
+* @brief This function performs MV prediction
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  none
+*
+* @remarks none
+*  This function will update the MB availability since intra inter decision
+*  should be done before the call
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred
+    (
+        process_ctxt_t *ps_proc
+    );
+
+/**
+*******************************************************************************
+*
+* @brief This function approximates Pred. MV
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  none
+*
+* @remarks none
+*  Motion estimation happens at nmb level. For cost calculations, mv is appro
+*  ximated using this function
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred_me
+    (
+        process_ctxt_t *ps_proc
+    );
+
+#endif /* IH264E_ME_H_ */
diff --git a/encoder/ih264e_modify_frm_rate.c b/encoder/ih264e_modify_frm_rate.c
new file mode 100755
index 0000000..bc0e873
--- /dev/null
+++ b/encoder/ih264e_modify_frm_rate.c
@@ -0,0 +1,240 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_modify_frm_rate.c
+*
+* @brief
+*  Functions used to modify frame rate
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_pd_frm_rate_get_init_free_memtab()
+*  - ih264e_init_pd_frm_rate()
+*  - ih264e_update_pd_frm_rate()
+*  - ih264e_get_pd_avg_frm_rate()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264e_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to init pd frame rate memtab
+*
+* @par Description
+*  Function to init pull down frame rate memtab
+*
+* @param[in] pps_pd_frm_rate
+*  pull down frame rate context
+*
+* @param[in] ps_memtab
+*  Handle to memtab
+*
+* @param[in] e_func_type
+*  Function type (get memtab/ update memtab)
+*
+* @returns  Number of memtabs used
+*
+* @remarks  None
+*
+*******************************************************************************
+*/
+WORD32 ih264e_pd_frm_rate_get_init_free_memtab(pd_frm_rate_handle *pps_pd_frm_rate,
+                                               itt_memtab_t *ps_memtab,
+                                               ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static pd_frm_rate_t s_temp_pd_frm_rate_t;
+
+    /* Hack for al alloc, during which we dont have any state memory.
+     Dereferencing can cause issues */
+    if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_pd_frm_rate) = &s_temp_pd_frm_rate_t;
+
+    /* for src rate control state structure */
+    if (e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(pd_frm_rate_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**) pps_pd_frm_rate, e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    return (i4_mem_tab_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Initializes the pull down frame rate state structure based on input
+*  frame rate
+*
+* @par Description
+*  Initializes the pull down frame rate state structure based on input frame rate
+*
+* @param[in] ps_pd_frm_rate
+*  Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+*  Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_pd_frm_rate(pd_frm_rate_t *ps_pd_frm_rate,
+                             UWORD32 u4_input_frm_rate)
+{
+    WORD32 i;
+
+    ps_pd_frm_rate->u4_input_frm_rate = u4_input_frm_rate;
+
+    for (i = 0; i < (WORD32) (u4_input_frm_rate / 1000); i++)
+    {
+        ps_pd_frm_rate->u4_cur_frm_rate[i] = u4_input_frm_rate;
+    }
+
+    ps_pd_frm_rate->u4_frm_num = 0;
+
+    ps_pd_frm_rate->u4_tot_frm_encoded = 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update pull down frame rate
+*
+* @par   Description
+*  For each frame a run time frame rate value is sent based on whether a frame
+*  is skipped or not. If it is skipped for pull down then the current frame
+*  rate for the pull down period is signaled as 4/5th of the original frame
+*  rate. Thus when this is averaged the frame rate gradually switches from the
+*  input frame rate to 4/5th of input frame rate as and when more 3:2 pull
+*  down patterns are detected
+*
+* @param[in] ps_pd_frm_rate
+*  Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+*  Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_pd_frm_rate(pd_frm_rate_t *ps_pd_frm_rate,
+                               UWORD32 u4_cur_frm_rate)
+{
+    ps_pd_frm_rate->u4_cur_frm_rate[ps_pd_frm_rate->u4_frm_num] = u4_cur_frm_rate;
+
+    ps_pd_frm_rate->u4_frm_num++;
+
+    /* Increment the frame number */
+    if (ps_pd_frm_rate->u4_tot_frm_encoded < (ps_pd_frm_rate->u4_input_frm_rate / 1000))
+    {
+        ps_pd_frm_rate->u4_tot_frm_encoded++;
+    }
+
+    /* Reset frm_num to zero  */
+    if (ps_pd_frm_rate->u4_frm_num >= (ps_pd_frm_rate->u4_input_frm_rate / 1000))
+    {
+        ps_pd_frm_rate->u4_frm_num = 0;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief returns average frame rate in 1 sec duration
+*
+* @par Description
+*  Averages the last N frame in period(1 sec) and then gives that
+*  as the current frames frame rate. Thus this averages out the sudden
+*  variation in frame rate
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frame rate context
+*
+* @returns average frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD32 ih264e_get_pd_avg_frm_rate(pd_frm_rate_t *ps_pd_frm_rate)
+{
+    WORD32 i;
+    WORD32 i4_avg_frm_rate = 0;
+
+    for (i = 0; i < (WORD32) ps_pd_frm_rate->u4_tot_frm_encoded; i++)
+    {
+        i4_avg_frm_rate += ps_pd_frm_rate->u4_cur_frm_rate[i];
+    }
+
+    i4_avg_frm_rate = i4_avg_frm_rate / ps_pd_frm_rate->u4_tot_frm_encoded;
+
+    return i4_avg_frm_rate;
+}
diff --git a/encoder/ih264e_modify_frm_rate.h b/encoder/ih264e_modify_frm_rate.h
new file mode 100755
index 0000000..c301e2c
--- /dev/null
+++ b/encoder/ih264e_modify_frm_rate.h
@@ -0,0 +1,182 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_modify_frm_rate.h
+*
+* @brief
+*  Functions declarations used to modify frame rate
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_MODIFY_FRM_RATE_H_
+#define IH264E_MODIFY_FRM_RATE_H_
+
+/*****************************************************************************/
+/* Constant Definitions                                                      */
+/*****************************************************************************/
+
+#define MAX_NUM_FRAME   120
+
+
+/*****************************************************************************/
+/* Structures                                                                */
+/*****************************************************************************/
+typedef struct pd_frm_rate_t
+{
+    /*
+     * The input frame rate set in the encoder (per 1000 sec)
+     */
+    UWORD32 u4_input_frm_rate;
+
+    /*
+     * Frame rate of current frame due to pull down
+     */
+    UWORD32 u4_cur_frm_rate[MAX_NUM_FRAME];
+
+    /*
+     * current frame num in the above buffer
+     */
+    UWORD32 u4_frm_num;
+
+    /*
+     * Total number of frames encoded.
+     * if greater than input frame rate stays at input frame rate
+     */
+    UWORD32 u4_tot_frm_encoded;
+
+}pd_frm_rate_t;
+
+typedef struct pd_frm_rate_t *pd_frm_rate_handle;
+
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to init pd frame rate memtab
+*
+* @par Description
+*  Function to init pull down frame rate memtab
+*
+* @param[in] pps_pd_frm_rate
+*  pull down frame rate context
+*
+* @param[in] ps_memtab
+*  Handle to memtab
+*
+* @param[in] e_func_type
+*  Function type (get memtab/ update memtab)
+*
+* @returns  Number of memtabs used
+*
+* @remarks  None
+*
+*******************************************************************************
+*/
+WORD32 ih264e_pd_frm_rate_get_init_free_memtab(pd_frm_rate_handle *pps_pd_frm_rate,
+                                               itt_memtab_t *ps_memtab,
+                                               ITT_FUNC_TYPE_E e_func_type);
+/**
+*******************************************************************************
+*
+* @brief Initializes the pull down frame rate state structure based on input
+*  frame rate
+*
+* @par Description
+*  Initializes the pull down frame rate state structure based on input frame rate
+*
+* @param[in] ps_pd_frm_rate
+*  Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+*  Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_pd_frm_rate(pd_frm_rate_handle ps_pd_frm_rate,
+                             UWORD32 u4_input_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update pull down frame rate
+*
+* @par   Description
+*  For each frame a run time frame rate value is sent based on whether a frame
+*  is skipped or not. If it is skipped for pull down then the current frame
+*  rate for the pull down period is signaled as 4/5th of the original frame
+*  rate. Thus when this is averaged the frame rate gradually switches from the
+*  input frame rate to 4/5th of input frame rate as and when more 3:2 pull
+*  down patterns are detected
+*
+* @param[in] ps_pd_frm_rate
+*  Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+*  Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_pd_frm_rate(pd_frm_rate_handle ps_pd_frm_rate,
+                               UWORD32 u4_cur_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief returns average frame rate in 1 sec duration
+*
+* @par Description
+*  Averages the last N frame in period(1 sec) and then gives that
+*  as the current frames frame rate. Thus this averages out the sudden
+*  variation in frame rate
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frame rate context
+*
+* @returns average frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD32 ih264e_get_pd_avg_frm_rate(pd_frm_rate_handle ps_pd_frm_rate);
+
+#endif /* IH264E_MODIFY_FRM_RATE_H_ */
diff --git a/encoder/ih264e_process.c b/encoder/ih264e_process.c
new file mode 100755
index 0000000..9a468e9
--- /dev/null
+++ b/encoder/ih264e_process.c
@@ -0,0 +1,2369 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_process.c
+*
+* @brief
+*  Contains functions for codec thread
+*
+* @author
+*  Harish
+*
+* @par List of Functions:
+* - ih264e_generate_sps_pps()
+* - ih264e_init_entropy_ctxt()
+* - ih264e_entropy()
+* - ih264e_pack_header_data()
+* - ih264e_update_proc_ctxt()
+* - ih264e_init_proc_ctxt()
+* - ih264e_pad_recon_buffer()
+* - ih264e_dblk_pad_hpel_processing_n_mbs()
+* - ih264e_process()
+* - ih264e_set_rc_pic_params()
+* - ih264e_update_rc_post_enc()
+* - ih264e_process_thread()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_platform_macros.h"
+#include "ih264_macros.h"
+#include "ih264_error.h"
+#include "ih264_buf_mgr.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+#include "ih264_list.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_process.h"
+#include "ithread.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_encode_header.h"
+#include "ih264e_globals.h"
+#include "ih264e_config.h"
+#include "ih264e_trace.h"
+#include "ih264e_statistics.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_deblk.h"
+#include "ih264e_me.h"
+#include "ih264e_debug.h"
+#include "ih264e_process.h"
+#include "ih264e_master.h"
+#include "ih264e_utils.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_padding.h"
+#include "ime_statistics.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+*  @brief This function generates sps, pps set on request
+*
+*  @par   Description
+*  When the encoder is set in header generation mode, the following function
+*  is called. This generates sps and pps headers and returns the control back
+*  to caller.
+*
+*  @param[in]    ps_codec
+*  pointer to codec context
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
+{
+    /* choose between ping-pong process buffer set */
+    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+    /* entropy ctxt */
+    entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
+
+    /* Bitstream structure */
+    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
+
+    /* sps */
+    sps_t *ps_sps = NULL;
+
+    /* pps */
+    pps_t *ps_pps = NULL;
+
+    /* output buff */
+    out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
+
+
+    /********************************************************************/
+    /*      initialize the bit stream buffer                            */
+    /********************************************************************/
+    ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
+
+    /********************************************************************/
+    /*                    BEGIN HEADER GENERATION                       */
+    /********************************************************************/
+    /*ps_codec->i4_pps_id ++;*/
+    ps_codec->i4_pps_id %= MAX_PPS_CNT;
+
+    /*ps_codec->i4_sps_id ++;*/
+    ps_codec->i4_sps_id %= MAX_SPS_CNT;
+
+    /* populate sps header */
+    ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
+    ih264e_populate_sps(ps_codec, ps_sps);
+
+    /* populate pps header */
+    ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
+    ih264e_populate_pps(ps_codec, ps_pps);
+
+    ps_entropy->i4_error_code = IH264E_SUCCESS;
+
+    /* generate sps */
+    ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
+
+    /* generate pps */
+    ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
+
+    /* queue output buffer */
+    ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
+
+    return ps_entropy->i4_error_code;
+}
+
+/**
+*******************************************************************************
+*
+* @brief   initialize entropy context.
+*
+* @par Description:
+*  Before invoking the call to perform to entropy coding the entropy context
+*  associated with the job needs to be initialized. This involves the start
+*  mb address, end mb address, slice index and the pointer to location at
+*  which the mb residue info and mb header info are packed.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
+{
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* entropy ctxt */
+    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+    /* start address */
+    ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
+
+    /* end address */
+    ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
+
+    /* slice index */
+    ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
+
+    /* sof */
+    /* @ start of frame or start of a new slice, set sof flag */
+    if (ps_entropy->i4_mb_start_add == 0)
+    {
+        ps_entropy->i4_sof = 1;
+    }
+
+    if (ps_entropy->i4_mb_x == 0)
+    {
+        /* packed mb coeff data */
+        ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
+                        ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
+
+        /* packed mb header data */
+        ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
+                        ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
+    }
+
+    return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief entry point for entropy coding
+*
+* @par Description
+*  This function calls lower level functions to perform entropy coding for a
+*  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
+*  back the control, updates the ctxt and calls lower level functions again.
+*  This process is repeated till all the rows or group of mb's (which ever is
+*  minimum) are coded
+*
+* @param[in] ps_proc
+*  process context
+*
+* @returns  error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + WORD_SIZE - ps_bitstream->i4_bits_left_in_cw)
+
+IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
+{
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* entropy context */
+    entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+    /* sps */
+    sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
+
+    /* pps */
+    pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
+
+    /* slice header */
+    slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
+
+    /* slice type */
+    WORD32 i4_slice_type = ps_proc->i4_slice_type;
+
+    /* Bitstream structure */
+    bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
+
+    /* output buff */
+    out_buf_t s_out_buf;
+
+    /* proc map */
+    UWORD8  *pu1_proc_map;
+
+    /* entropy map */
+    UWORD8  *pu1_entropy_map_curr;
+
+    /* proc base idx */
+    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+
+    /* temp var */
+    WORD32 i4_wd_mbs, i4_ht_mbs;
+    UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
+
+    /********************************************************************/
+    /*                            BEGIN INIT                            */
+    /********************************************************************/
+
+    /* entropy encode start address */
+    u4_mb_idx = ps_entropy->i4_mb_start_add;
+
+    /* entropy encode end address */
+    u4_mb_end_idx = ps_entropy->i4_mb_end_add;
+
+    /* width in mbs */
+    i4_wd_mbs = ps_entropy->i4_wd_mbs;
+
+    /* height in mbs */
+    i4_ht_mbs = ps_entropy->i4_ht_mbs;
+
+    /* total mb cnt */
+    u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
+
+    /* proc map */
+    pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+
+    /* entropy map */
+    pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+
+    /********************************************************************/
+    /* @ start of frame / slice,                                        */
+    /*      initialize the output buffer,                               */
+    /*      initialize the bit stream buffer,                           */
+    /*      check if sps and pps headers have to be generated,          */
+    /*      populate and generate slice header                          */
+    /********************************************************************/
+    if (ps_entropy->i4_sof)
+    {
+        /********************************************************************/
+        /*      initialize the output buffer                                */
+        /********************************************************************/
+        s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+        /* is last frame to encode */
+        s_out_buf.u4_is_last = ps_entropy->u4_is_last;
+
+        /* frame idx */
+        s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
+        s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
+
+        /********************************************************************/
+        /*      initialize the bit stream buffer                            */
+        /********************************************************************/
+        ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
+
+        /********************************************************************/
+        /*                    BEGIN HEADER GENERATION                       */
+        /********************************************************************/
+        if (1 == ps_entropy->i4_gen_header)
+        {
+            /* generate sps */
+            ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
+
+            /* generate pps */
+            ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
+
+            /* reset i4_gen_header */
+            ps_entropy->i4_gen_header = 0;
+        }
+
+        /* populate slice header */
+        ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
+
+        /* generate slice header */
+        ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
+                                                                  ps_pps, ps_sps);
+
+        /* once start of frame / slice is done, you can reset it */
+        /* it is the responsibility of the caller to set this flag */
+        ps_entropy->i4_sof = 0;
+    }
+
+    /* begin entropy coding for the mb set */
+    while (u4_mb_idx < u4_mb_end_idx)
+    {
+        /* init ptrs/indices */
+        if (ps_entropy->i4_mb_x == i4_wd_mbs)
+        {
+            ps_entropy->i4_mb_y ++;
+            ps_entropy->i4_mb_x = 0;
+
+            /* packed mb coeff data */
+            ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
+                            ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
+
+            /* packed mb header data */
+            ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
+                            ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
+
+            /* proc map */
+            pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y  * i4_wd_mbs;
+
+            /* entropy map */
+            pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+        }
+
+        DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
+        ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
+        ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
+
+        /* wait until the curr mb is core coded */
+        /* The wait for curr mb to be core coded is essential when entropy is launched
+         * as a separate job
+         */
+        while (1)
+        {
+            volatile UWORD8 *pu1_buf1;
+            WORD32 idx = ps_entropy->i4_mb_x;
+
+            pu1_buf1 =  pu1_proc_map + idx;
+            if(*pu1_buf1)
+                break;
+            ithread_yield();
+        }
+
+        /* write mb layer */
+        ps_codec->pf_write_mb_syntax_layer[i4_slice_type](ps_entropy);
+
+        /* set entropy map */
+        pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
+
+        u4_mb_idx ++;
+        ps_entropy->i4_mb_x ++;
+
+        if (ps_entropy->i4_mb_x == i4_wd_mbs)
+        {
+            /* if slices are enabled */
+            if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
+            {
+                /* current slice index */
+                WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
+
+                /* slice map */
+                UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
+
+                /* No need to open a slice at end of frame. The current slice can be closed at the time
+                 * of signaling eof flag.
+                 */
+                if ( (u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx != pu1_slice_idx[u4_mb_idx]))
+                {
+                    /* mb skip run */
+                    if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
+                    {
+                        if (*ps_entropy->pi4_mb_skip_run)
+                        {
+                            PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
+                            *ps_entropy->pi4_mb_skip_run = 0;
+                        }
+                    }
+
+                    /* put rbsp trailing bits for the previous slice */
+                    ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+                    /* update slice header pointer */
+                    i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
+                    ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
+                    ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
+
+                    /* populate slice header */
+                    ps_entropy->i4_mb_start_add = u4_mb_idx;
+                    ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
+
+                    /* generate slice header */
+                    ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
+                                                                              ps_pps, ps_sps);
+                }
+            }
+
+            /* Dont execute any further instructions until store synchronization took place */
+            DATA_SYNC();
+        }
+    }
+
+    /* check for eof */
+    if (u4_mb_idx == u4_mb_cnt)
+    {
+        /* set end of frame flag */
+        ps_entropy->i4_eof = 1;
+    }
+
+    if (ps_entropy->i4_eof)
+    {
+        /* mb skip run */
+        if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
+        {
+            if (*ps_entropy->pi4_mb_skip_run)
+            {
+                PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
+                *ps_entropy->pi4_mb_skip_run = 0;
+            }
+        }
+
+        /* put rbsp trailing bits */
+        ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+        /* update current frame stats to rc library */
+        if (IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode)
+        {
+            /* number of bytes to stuff */
+            WORD32 i4_stuff_bytes;
+
+            /* update */
+            i4_stuff_bytes = ih264e_update_rc_post_enc(ps_codec, ctxt_sel, ps_proc->i4_pic_cnt);
+
+            /* cbr rc - house keeping */
+            if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
+            {
+                ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
+            }
+            else if (i4_stuff_bytes)
+            {
+                /* add filler nal units */
+                ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
+            }
+        }
+
+        /********************************************************************/
+        /*      signal the output                                           */
+        /********************************************************************/
+        ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = ps_entropy->ps_bitstrm->u4_strm_buf_offset;
+
+        DEBUG("entropy status %x", ps_entropy->i4_error_code);
+    }
+
+    /* allow threads to dequeue entropy jobs */
+    ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
+
+    return ps_entropy->i4_error_code;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Packs header information of a mb in to a buffer
+*
+* @par Description:
+*  After the deciding the mode info of a macroblock, the syntax elements
+*  associated with the mb are packed and stored. The entropy thread unpacks
+*  this buffer and generates the end bit stream.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
+{
+    /* curr mb type */
+    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
+
+    /* pack mb syntax layer of curr mb (used for entropy coding) */
+    if (u4_mb_type == I4x4)
+    {
+        /* pointer to mb header storage space */
+        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+        /* temp var */
+        WORD32 i4, byte;
+
+        /* mb type plus mode */
+        *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
+
+        /* cbp */
+        *pu1_ptr++ = ps_proc->u4_cbp;
+
+        /* mb qp delta */
+        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+        /* sub mb modes */
+        for (i4 = 0; i4 < 16; i4 ++)
+        {
+            byte = 0;
+
+            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
+                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
+            {
+                byte |= 1;
+            }
+            else
+            {
+
+                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
+                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
+                {
+                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
+                }
+                else
+                {
+                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
+                }
+            }
+
+            i4++;
+
+            if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
+                            ps_proc->au1_intra_luma_mb_4x4_modes[i4])
+            {
+                byte |= 16;
+            }
+            else
+            {
+
+                if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
+                                ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
+                {
+                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
+                }
+                else
+                {
+                    byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
+                }
+            }
+
+            *pu1_ptr++ = byte;
+        }
+
+        /* end of mb layer */
+        ps_proc->pv_mb_header_data = pu1_ptr;
+    }
+    else if (u4_mb_type == I16x16)
+    {
+        /* pointer to mb header storage space */
+        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+        /* mb type plus mode */
+        *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
+
+        /* cbp */
+        *pu1_ptr++ = ps_proc->u4_cbp;
+
+        /* mb qp delta */
+        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+        /* end of mb layer */
+        ps_proc->pv_mb_header_data = pu1_ptr;
+    }
+    else if (u4_mb_type == P16x16)
+    {
+        /* pointer to mb header storage space */
+        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+        WORD16 *i2_mv_ptr;
+
+        /* mb type plus mode */
+        *pu1_ptr++ = u4_mb_type;
+
+        /* cbp */
+        *pu1_ptr++ = ps_proc->u4_cbp;
+
+        /* mb qp delta */
+        *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+        i2_mv_ptr = (WORD16 *)pu1_ptr;
+
+        *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvx - ps_proc->ps_pred_mv->i2_mvx;
+
+        *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvy - ps_proc->ps_pred_mv->i2_mvy;
+
+        /* end of mb layer */
+        ps_proc->pv_mb_header_data = i2_mv_ptr;
+    }
+    else if (u4_mb_type == PSKIP)
+    {
+        /* pointer to mb header storage space */
+        UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+        /* mb type plus mode */
+        *pu1_ptr++ = u4_mb_type;
+
+        /* end of mb layer */
+        ps_proc->pv_mb_header_data = pu1_ptr;
+    }
+
+    return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief   update process context after encoding an mb. This involves preserving
+* the current mb information for later use, initialize the proc ctxt elements to
+* encode next mb.
+*
+* @par Description:
+*  This function performs house keeping tasks after encoding an mb.
+*  After encoding an mb, various elements of the process context needs to be
+*  updated to encode the next mb. For instance, the source, recon and reference
+*  pointers, mb indices have to be adjusted to the next mb. The slice index of
+*  the current mb needs to be updated. If mb qp modulation is enabled, then if
+*  the qp changes the quant param structure needs to be updated. Also to encoding
+*  the next mb, the current mb info is used as part of mode prediction or mv
+*  prediction. Hence the current mb info has to preserved at top/top left/left
+*  locations.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
+{
+    /* error status */
+    WORD32 error_status = IH264_SUCCESS;
+
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* curr mb indices */
+    WORD32 i4_mb_x = ps_proc->i4_mb_x;
+    WORD32 i4_mb_y = ps_proc->i4_mb_y;
+
+    /* mb syntax elements of neighbors */
+    mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
+    mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
+    mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
+
+    /* curr mb type */
+    UWORD32 u4_mb_type = ps_proc->u4_mb_type;
+
+    /* curr mb type */
+    UWORD32 u4_is_intra = ps_proc->u4_is_intra;
+
+    /* width in mbs */
+    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+    /*height in mbs*/
+    WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
+
+    /* proc map */
+    UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
+
+    /* deblk context */
+    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+    /* deblk bs context */
+    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
+
+    /* top row motion vector info */
+    enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
+
+    /* top left mb motion vector */
+    enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
+
+    /* left mb motion vector */
+    enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
+
+    /* sub mb modes */
+    UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
+
+//    /* zero mv */
+//    mv_t zero_mv = {0, 0};
+
+    /* Pad the MB to support non standard sizes */
+    UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
+    UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
+
+    /*************************************************************/
+    /* During MV prediction, when top right mb is not available, */
+    /* top left mb info. is used for prediction. Hence the curr  */
+    /* top, which will be top left for the next mb needs to be   */
+    /* preserved before updating it with curr mb info.           */
+    /*************************************************************/
+
+    /* mb type, mb class, csbp */
+    *ps_top_left_syn = *ps_top_syn;
+
+    if (ps_proc->i4_slice_type == PSLICE)
+    {
+        /*****************************************/
+        /* update top left with top info results */
+        /*****************************************/
+
+        /* mv */
+        *ps_top_left_mb_pu = *ps_top_row_pu;
+    }
+
+    /*************************************************/
+    /* update top and left with curr mb info results */
+    /*************************************************/
+
+    /* mb type */
+    ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
+
+    /* mb class */
+    ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
+
+    /* csbp */
+    ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
+
+    /* distortion */
+    ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
+
+    if (u4_is_intra)
+    {
+        /* mb / sub mb modes */
+        if (I16x16 == u4_mb_type)
+        {
+            pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
+        }
+        else if (I4x4 == u4_mb_type)
+        {
+            ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
+            ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
+        }
+        else if (I8x8 == u4_mb_type)
+        {
+            memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
+            memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
+        }
+
+        if (ps_proc->i4_slice_type == PSLICE)
+        {
+            /* mv */
+            *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
+
+//            /* reset ngbr mv's */
+//            ps_top_row_pu->i1_l0_ref_idx = -1;
+//            ps_top_row_pu->s_l0_mv = zero_mv;
+//
+//            *ps_left_mb_pu = *ps_top_row_pu;
+        }
+    }
+    else
+    {
+        /* mv */
+        *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
+    }
+
+    /*
+     * Mark that the MB has been coded intra
+     * So that future AIRs can skip it
+     */
+    ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
+
+    /**************************************************/
+    /* pack mb header info. for entropy coding        */
+    /**************************************************/
+    ih264e_pack_header_data(ps_proc);
+
+    /* update previous mb qp */
+    ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
+
+    /* store qp */
+    ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
+
+    /*
+     * We need to sync the cache to make sure that the nmv content of proc
+     * is updated to cache properly
+     */
+    DATA_SYNC();
+
+    /* Just before finishing the row, enqueue the job in to entropy queue.
+     * The master thread depending on its convenience shall dequeue it and
+     * performs entropy.
+     *
+     * WARN !! Placing this block post proc map update can cause queuing of
+     * entropy jobs in out of order.
+     */
+    if (i4_mb_x == i4_wd_mbs - 1)
+    {
+        /* job structures */
+        job_t s_job;
+
+        /* job class */
+        s_job.i4_cmd = CMD_ENTROPY;
+
+        /* number of mbs to be processed in the current job */
+        s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
+
+        /* job start index x */
+        s_job.i2_mb_x = 0;
+
+        /* job start index y */
+        s_job.i2_mb_y = ps_proc->i4_mb_y;
+
+        /* proc base idx */
+        s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ;
+
+        /* queue the job */
+        error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
+
+        if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
+            ih264_list_terminate(ps_codec->pv_entropy_jobq);
+    }
+
+    /* update proc map */
+    pu1_proc_map[i4_mb_x] = 1;
+
+    /**************************************************/
+    /* update proc ctxt elements for encoding next mb */
+    /**************************************************/
+    /* update indices */
+    i4_mb_x ++;
+    ps_proc->i4_mb_x = i4_mb_x;
+
+    if (ps_proc->i4_mb_x == i4_wd_mbs)
+    {
+        ps_proc->i4_mb_y++;
+        ps_proc->i4_mb_x = 0;
+    }
+
+    /* update slice index */
+    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
+
+    /* update buffers pointers */
+    ps_proc->pu1_src_buf_luma += MB_SIZE;
+    ps_proc->pu1_rec_buf_luma += MB_SIZE;
+    ps_proc->pu1_ref_buf_luma += MB_SIZE;
+
+    /*
+     * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+     * the stride per MB is MB_SIZE
+     */
+    ps_proc->pu1_src_buf_chroma += MB_SIZE;
+    ps_proc->pu1_rec_buf_chroma += MB_SIZE;
+    ps_proc->pu1_ref_buf_chroma += MB_SIZE;
+
+    /* pad right edge */
+    if (u4_pad_right_sz && (ps_proc->i4_mb_x == i4_wd_mbs - 1))
+    {
+        ih264_pad_right_luma(
+                        ps_proc->pu1_src_buf_luma + MB_SIZE - u4_pad_right_sz,
+                        ps_proc->i4_src_strd, MB_SIZE, u4_pad_right_sz);
+
+        ih264_pad_right_chroma(
+                        ps_proc->pu1_src_buf_chroma + MB_SIZE - u4_pad_right_sz,
+                        ps_proc->i4_src_strd, BLK8x8SIZE, u4_pad_right_sz);
+    }
+
+    /* pad bottom edge */
+    if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == i4_ht_mbs - 1) &&
+                    ps_proc->i4_mb_x != 0)
+    {
+        ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
+                         ps_proc->i4_src_strd, MB_SIZE, u4_pad_bottom_sz);
+
+        ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2,
+                         ps_proc->i4_src_strd, MB_SIZE, (u4_pad_bottom_sz / 2));
+    }
+
+    /* Reset cost, distortion params */
+    ps_proc->i4_mb_cost = INT_MAX;
+    ps_proc->i4_mb_distortion = SHRT_MAX;
+
+    ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
+
+    ps_proc->pu4_mb_pu_cnt += 1;
+
+    /* deblk ctxts */
+    if (ps_proc->u4_disable_deblock_level != 1)
+    {
+        /* indices */
+        ps_bs->i4_mb_x = ps_proc->i4_mb_x;
+        ps_bs->i4_mb_y = ps_proc->i4_mb_y;
+
+#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
+        ps_deblk->i4_mb_x ++;
+
+        ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+        /*
+         * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+         * the stride per MB is MB_SIZE
+         */
+        ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+#endif
+    }
+
+    return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief   initialize process context.
+*
+* @par Description:
+*  Before dispatching the current job to process thread, the process context
+*  associated with the job is initialized. Usually every job aims to encode one
+*  row of mb's. Basing on the row indices provided by the job, the process
+*  context's buffer ptrs, slice indices and other elements that are necessary
+*  during core-coding are initialized.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
+{
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* nmb processing context*/
+    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
+
+    /* indices */
+    WORD32 i4_mb_x, i4_mb_y;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_proc->i4_src_strd;
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+    /* quant params */
+    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+    /* deblk ctxt */
+    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+    /* deblk bs context */
+    bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
+
+    /* Pointer to mv_buffer of current frame */
+    mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
+
+    /* Pointers for color space conversion */
+    UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
+
+    /* Pad the MB to support non standard sizes */
+    UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
+
+    /********************************************************************/
+    /*                            BEGIN INIT                            */
+    /********************************************************************/
+
+    i4_mb_x = ps_proc->i4_mb_x;
+    i4_mb_y = ps_proc->i4_mb_y;
+
+    /* Number of mbs processed in one loop of process function */
+    ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs;
+    ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs;
+
+    /* init buffer pointers */
+    ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
+    ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * BLK8x8SIZE);
+    ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
+    ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
+    ps_proc->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
+    ps_proc->pu1_ref_buf_chroma = ps_proc->pu1_ref_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
+
+    /*
+     * Do color space conversion
+     * NOTE : We assume there that the number of MB's to process will not span multiple rows
+     */
+    switch (ps_codec->s_cfg.e_inp_color_fmt)
+    {
+        case IV_YUV_420SP_UV:
+        case IV_YUV_420SP_VU:
+            break;
+
+        case IV_YUV_420P :
+            pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
+                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
+
+            pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
+                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
+
+            pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
+                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
+
+            ps_codec->pf_ih264e_conv_420p_to_420sp(
+                            pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
+                            ps_proc->pu1_src_buf_luma,
+                            ps_proc->pu1_src_buf_chroma, MB_SIZE,
+                            ps_proc->i4_wd_mbs * MB_SIZE,
+                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
+                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
+                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
+                            ps_proc->i4_src_strd, ps_proc->i4_src_strd, 1);
+            break;
+
+        case IV_YUV_422ILE :
+            pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
+                              + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
+
+            ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
+                            ps_proc->pu1_src_buf_luma,
+                            ps_proc->pu1_src_buf_chroma,
+                            ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
+                            ps_proc->i4_wd_mbs * MB_SIZE, MB_SIZE,
+                            ps_proc->i4_src_strd, ps_proc->i4_src_strd,
+                            ps_proc->i4_src_strd,
+                            ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
+            break;
+
+        default:
+            break;
+    }
+
+    /* pad bottom edge */
+    if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
+    {
+        ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
+                         ps_proc->i4_src_strd, MB_SIZE, u4_pad_bottom_sz);
+
+        ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2,
+                         ps_proc->i4_src_strd, MB_SIZE, (u4_pad_bottom_sz / 2));
+    }
+
+    /* packed mb coeff data */
+    ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
+
+    /* packed mb header data */
+    ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
+
+    /* slice index */
+    ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
+
+    /*********************************************************************/
+    /* ih264e_init_quant_params() routine is called at the pic init level*/
+    /* this would have initialized the qp.                               */
+    /* TODO_LATER: currently it is assumed that quant params donot change*/
+    /* across mb's. When they do calculate update ps_qp_params accordingly*/
+    /*********************************************************************/
+
+    /* init mv buffer ptr */
+    ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+
+    if (i4_mb_y == 0)
+    {
+        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
+    }
+    else
+    {
+        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+    }
+
+    ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
+
+    /* mb type */
+    ps_proc->u4_mb_type = I16x16;
+
+    /* lambda */
+    ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
+
+    /* mb distortion */
+    ps_proc->i4_mb_distortion = SHRT_MAX;
+
+    if (i4_mb_x == 0)
+    {
+        ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
+
+        ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
+
+        ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
+
+        if (i4_mb_y == 0)
+        {
+            memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
+        }
+    }
+
+    /* mb cost */
+    ps_proc->i4_mb_cost = INT_MAX;
+
+    /**********************/
+    /* init deblk context */
+    /**********************/
+    ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
+    /* deblk lags the current mb proc by 1 row */
+    /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
+    /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
+    /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
+    ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
+
+    /* buffer ptrs */
+    ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
+    ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
+
+    /* init deblk bs context */
+    /* mb indices */
+    ps_bs->i4_mb_x = ps_proc->i4_mb_x;
+    ps_bs->i4_mb_y = ps_proc->i4_mb_y;
+
+    /* init n_mb_process  context */
+    ps_n_mb_ctxt->i4_mb_x = 0;
+    ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
+    ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
+
+    return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma padding
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+*  Pointer to luma buffer
+*
+* @param[in] pu1_curr_pic_chroma
+*  Pointer to chroma buffer
+*
+* @param[in] i4_mb_x
+*  mb index x
+*
+* @param[in] i4_mb_y
+*  mb index y
+*
+*  @param[in] i4_pad_ht
+*  number of rows to be padded
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
+                                       UWORD8 *pu1_curr_pic_luma,
+                                       UWORD8 *pu1_curr_pic_chroma,
+                                       WORD32 i4_mb_x,
+                                       WORD32 i4_mb_y,
+                                       WORD32 i4_pad_ht)
+{
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* strides */
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+    if (i4_mb_x == 0)
+    {
+        /* padding left luma */
+        ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
+
+        /* padding left chroma */
+        ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
+    }
+    else if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+    {
+        /* padding right luma */
+        ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
+
+        /* padding right chroma */
+        ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
+
+        if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
+        {
+            UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
+            UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
+
+            /* padding bottom luma */
+            ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
+
+            /* padding bottom chroma */
+            ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
+        }
+    }
+
+    if (i4_mb_y == 0)
+    {
+        UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
+        UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
+        WORD32 wd = MB_SIZE;
+
+        if (i4_mb_x == 0)
+        {
+            pu1_rec_luma -= PAD_LEFT;
+            pu1_rec_chroma -= PAD_LEFT;
+
+            wd += PAD_LEFT;
+        }
+        else if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+        {
+            wd += PAD_RIGHT;
+        }
+
+        /* padding top luma */
+        ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
+
+        /* padding top chroma */
+        ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
+    }
+
+    return IH264E_SUCCESS;
+}
+
+
+
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking, padding and halfpel generation for
+*  'n' MBs
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+* Current MB being processed(Luma)
+*
+* @param[in] pu1_curr_pic_chroma
+* Current MB being processed(Chroma)
+*
+* @param[in] i4_mb_x
+* Column value of current MB processed
+*
+* @param[in] i4_mb_y
+* Curent row processed
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
+                                                     UWORD8 *pu1_curr_pic_luma,
+                                                     UWORD8 *pu1_curr_pic_chroma,
+                                                     WORD32 i4_mb_x,
+                                                     WORD32 i4_mb_y)
+{
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* n_mb processing context */
+    n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
+
+    /* deblk context */
+    deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+    /* strides */
+    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+    /* loop variables */
+    WORD32 row, i, j, col;
+
+    /* Padding Width */
+    UWORD32 u4_pad_wd;
+
+    /* deblk_map of the row being deblocked */
+    UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
+
+    /* deblk_map_previous row */
+    UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
+
+    WORD32 u4_pad_top = 0;
+
+    WORD32 u4_deblk_prev_row = 0;
+
+    /* Number of mbs to be processed */
+    WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
+
+    /* Number of mbs  actually processed
+     * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
+    WORD32 i4_n_mb_process_count = 0;
+
+    UWORD8 *pu1_pad_bottom_src = NULL;
+
+    UWORD8 *pu1_pad_src_luma = NULL;
+    UWORD8 *pu1_pad_src_chroma = NULL;
+
+    if (ps_proc->u4_disable_deblock_level == 1)
+    {
+        /* If left most MB is processed, then pad left */
+        if (i4_mb_x == 0)
+        {
+            /* padding left luma */
+            ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+            /* padding left chroma */
+            ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
+        }
+        /*last col*/
+        if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
+        {
+            /* padding right luma */
+            ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+            /* padding right chroma */
+            ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
+        }
+    }
+
+    if (i4_mb_y > 0)
+    {
+        /* if number of mb's to be processed are less than 'N', go back.
+         * exception to the above clause is end of row */
+        if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
+        {
+            return IH264E_SUCCESS;
+        }
+        else
+        {
+            i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
+
+            u4_deblk_prev_row = 1;
+
+            /* checking whether the top rows are deblocked */
+            for (col = 0; col < i4_n_mb_process_count; col++)
+            {
+                u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
+            }
+
+            /* checking whether the top right MB is deblocked */
+            if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
+            {
+                u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
+            }
+
+            /* performing deblocking for required number of MBs */
+            if (ps_proc->u4_disable_deblock_level != 1)
+            {
+                /* Top or Top right MBs not deblocked */
+                if (u4_deblk_prev_row != 1)
+                {
+                    return IH264E_SUCCESS;
+                }
+
+                for (row = 0; row < i4_n_mb_process_count; row++)
+                {
+                    ih264e_deblock_mb(ps_proc, ps_deblk);
+
+                    pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
+
+                    if (ps_deblk->i4_mb_y > 0)
+                    {
+                        if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
+                        {
+                            /* padding left luma */
+                            ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+                            /* padding left chroma */
+                            ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
+                        }
+
+                        if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
+                        {
+                            /* padding right luma */
+                            ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+                            /* padding right chroma */
+                            ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
+                        }
+                    }
+                    ps_deblk->i4_mb_x++;
+
+                    ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+                    ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+
+                }
+            }
+            else
+            {
+                ps_deblk->i4_mb_x += i4_n_mb_process_count;
+
+                ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
+                ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
+            }
+
+            if (i4_mb_y == 2)
+            {
+                u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
+                u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
+
+                if (ps_n_mb_ctxt->i4_mb_x == 0)
+                {
+                    u4_pad_wd += PAD_LEFT;
+                    u4_pad_top = -PAD_LEFT;
+                }
+
+                if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+                {
+                    u4_pad_wd += PAD_RIGHT;
+                }
+
+                /* padding top luma */
+                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
+
+                /* padding top chroma */
+                ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
+            }
+
+            ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
+
+            if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+            {
+                if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
+                {
+                    /* Bottom Padding is done in one stretch for the entire width */
+                    if (ps_proc->u4_disable_deblock_level != 1)
+                    {
+                        ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
+
+                        ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
+
+                        ps_n_mb_ctxt->i4_mb_x = 0;
+                        ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
+                        ps_deblk->i4_mb_x = 0;
+                        ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
+
+                        /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
+                        ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
+
+                        i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
+
+                        j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
+
+                        for (i = 0; i < j; i++)
+                        {
+                            for (col = 0; col < i4_n_mbs; col++)
+                            {
+                                ih264e_deblock_mb(ps_proc, ps_deblk);
+
+                                pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
+
+                                ps_deblk->i4_mb_x++;
+                                ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+                                ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+                                ps_n_mb_ctxt->i4_mb_x++;
+                            }
+                        }
+
+                        for (col = 0; col < i4_n_mb_process_count; col++)
+                        {
+                            ih264e_deblock_mb(ps_proc, ps_deblk);
+
+                            pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
+
+                            ps_deblk->i4_mb_x++;
+                            ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+                            ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+                            ps_n_mb_ctxt->i4_mb_x++;
+                        }
+
+                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
+
+                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
+
+                        /* padding left luma */
+                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+                        /* padding left chroma */
+                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
+
+                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
+                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
+
+                        /* padding left luma */
+                        ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+                        /* padding left chroma */
+                        ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
+
+                        pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
+
+                        pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
+
+                        /* padding right luma */
+                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+                        /* padding right chroma */
+                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
+
+                        pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
+                        pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
+
+                        /* padding right luma */
+                        ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+                        /* padding right chroma */
+                        ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
+
+                    }
+
+                    /* padding bottom luma */
+                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
+                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
+
+                    /* padding bottom chroma */
+                    pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
+                    ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
+                }
+            }
+        }
+    }
+
+    return IH264E_SUCCESS;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma core coding for a set of mb's.
+*
+* @par Description:
+*  The mb to be coded is taken and is evaluated over a predefined set of modes
+*  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
+*  is selected and using intra/inter prediction filters, prediction is carried out.
+*  The deviation between src and pred signal constitutes error signal. This error
+*  signal is transformed (hierarchical transform if necessary) and quantized. The
+*  quantized residue is packed in to entropy buffer for entropy coding. This is
+*  repeated for all the mb's enlisted under the job.
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process(process_ctxt_t *ps_proc)
+{
+    /* error status */
+    WORD32 error_status = IH264_SUCCESS;
+
+    /* codec context */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* cbp luma, chroma */
+    UWORD32 u4_cbp_l, u4_cbp_c;
+
+    /* width in mbs */
+    WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+    /* loop var */
+    WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
+
+    /* valid modes */
+    UWORD32 u4_valid_modes = 0;
+
+    /* gate threshold */
+    WORD32 i4_gate_threshold = 0;
+
+    /* is intra */
+    WORD32 luma_idx, chroma_idx, is_intra;
+
+    /* temp variables */
+    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+
+    /* list of modes for evaluation */
+    if (ps_proc->i4_slice_type == ISLICE)
+    {
+        /* enable intra 16x16 */
+        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+
+        /* enable intra 8x8 */
+        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
+
+        /* enable intra 4x4 */
+        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
+    }
+    else if (ps_proc->i4_slice_type == PSLICE)
+    {
+        /* enable intra 16x16 */
+        u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+
+        /* enable intra 4x4 */
+        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+        {
+            u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
+        }
+
+        /* enable inter 16x16 */
+        u4_valid_modes |= (1 << P16x16);
+    }
+
+
+    /* init entropy */
+    ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
+    ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
+    ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
+
+    /* compute recon when :
+     *   1. current frame is to be used as a reference
+     *   2. dump recon for bit stream sanity check
+     */
+    ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
+                                ps_codec->s_cfg.u4_enable_recon;
+
+    /* Encode 'n' macroblocks,
+     * 'n' being the number of mbs dictated by current proc ctxt */
+    for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
+    {
+        /* since we have not yet found sad, we have not yet got min sad */
+        /* we need to initialize these variables for each MB */
+        /* TODO how to get the min sad into the codec */
+        ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
+        ps_proc->u4_min_sad_reached = 0;
+
+        /* mb analysis */
+        {
+            /* temp var */
+            WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
+
+            /* force intra refresh ? */
+            WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
+                            (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) ||
+                            (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
+
+            /* evaluate inter 16x16 modes */
+            if (u4_valid_modes & (1 << P16x16))
+            {
+                /* compute nmb me */
+                if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
+                {
+                    ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
+                                                       i4_wd_mbs - ps_proc->i4_mb_x));
+                }
+
+                /* set pointers to ME data appropriately for other modules to use */
+                {
+                    UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
+
+                    /* get the min sad condition for current mb */
+                    ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
+                    ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
+
+                    ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_skip_mv);
+                    ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
+                    ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_pred_mv);
+
+                    ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
+                    ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
+                    ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
+                    ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
+                    ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
+
+                    /* get the best sub pel buffer */
+                    ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
+                    ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
+                }
+                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
+            }
+            else
+            {
+                /* Derive neighbor availability for the current macroblock */
+                ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
+
+                ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
+            }
+
+            /*
+             * If air says intra, we need to force the following code path to evaluate intra
+             * The easy way is just to say that the inter cost is too much
+             */
+            if (!i4_air_enable_inter)
+            {
+                ps_proc->u4_min_sad_reached = 0;
+                ps_proc->i4_mb_cost = INT_MAX;
+                ps_proc->i4_mb_distortion = INT_MAX;
+            }
+            else if (ps_proc->u4_mb_type == PSKIP)
+            {
+                goto UPDATE_MB_INFO;
+            }
+
+            /* wait until the proc of [top + 1] mb is computed.
+             * We wait till the proc dependencies are satisfied */
+             if(ps_proc->i4_mb_y > 0)
+             {
+                /* proc map */
+                UWORD8  *pu1_proc_map_top;
+
+                pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
+
+                while (1)
+                {
+                    volatile UWORD8 *pu1_buf;
+                    WORD32 idx = i4_mb_idx + 1;
+
+                    idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
+                    pu1_buf =  pu1_proc_map_top + idx;
+                    if(*pu1_buf)
+                        break;
+                    ithread_yield();
+                }
+            }
+
+            /* If we already have the minimum sad, there is no point in searching for sad again */
+            if (ps_proc->u4_min_sad_reached == 0)
+            {
+                /* intra gating in inter slices */
+                /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
+                if (i4_air_enable_inter && ps_proc->i4_slice_type == PSLICE && ps_codec->u4_inter_gate)
+                {
+                    /* distortion of neighboring blocks */
+                    WORD32 i4_distortion[4];
+
+                    i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
+
+                    i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
+
+                    i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
+
+                    i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
+
+                    i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
+
+                }
+
+                /* If we are going to force intra we need to evaluate intra irrespective of gating */
+                if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
+                {
+                    /* evaluate intra 4x4 modes */
+                    if (u4_valid_modes & (1 << I4x4))
+                    {
+                        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+                        {
+                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
+                        }
+                        else
+                        {
+                            ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
+                        }
+                    }
+
+                    /* evaluate intra 16x16 modes */
+                    if (u4_valid_modes & (1 << I16x16))
+                    {
+                        ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
+                    }
+
+                    /* evaluate intra 8x8 modes */
+                    if (u4_valid_modes & (1 << I8x8))
+                    {
+                        ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
+                    }
+                }
+
+            }
+        }
+
+        /* is intra */
+        if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
+        {
+            luma_idx = ps_proc->u4_mb_type;
+            chroma_idx = 0;
+            is_intra = 1;
+
+            /* evaluate chroma blocks for intra */
+            ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
+        }
+        else
+        {
+            luma_idx = 3;
+            chroma_idx = 1;
+            is_intra = 0;
+        }
+        ps_proc->u4_is_intra = is_intra;
+
+        /* redo MV pred of neighbors in the case intra mb */
+        /* TODO : currently called unconditionally, needs to be called only in the case of intra
+         * to modify neighbors */
+        if (ps_proc->i4_slice_type != ISLICE)
+        {
+            ih264e_mv_pred(ps_proc);
+        }
+
+        /* Perform luma mb core coding */
+        u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
+
+        /* Perform luma mb core coding */
+        u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
+
+        /* coded block pattern */
+        ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
+
+        /* mb skip */
+        if (is_intra == 0)
+        {
+            if (ps_proc->u4_cbp == 0)
+            {
+                /* get skip mv */
+                UWORD32 u4_for_me = 0;
+                ih264e_find_skip_motion_vector(ps_proc,u4_for_me);
+
+                /* skip ? */
+                if (ps_proc->ps_skip_mv->i2_mvx == ps_proc->ps_pu->s_l0_mv.i2_mvx &&
+                                ps_proc->ps_skip_mv->i2_mvy == ps_proc->ps_pu->s_l0_mv.i2_mvy)
+                {
+                    ps_proc->u4_mb_type = PSKIP;
+                }
+            }
+        }
+
+UPDATE_MB_INFO:
+
+        /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
+        ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
+
+        /**********************************************************************/
+        /* if disable deblock level is '0' this implies enable deblocking for */
+        /* all edges of all macroblocks with out any restrictions             */
+        /*                                                                    */
+        /* if disable deblock level is '1' this implies disable deblocking for*/
+        /* all edges of all macroblocks with out any restrictions             */
+        /*                                                                    */
+        /* if disable deblock level is '2' this implies enable deblocking for */
+        /* all edges of all macroblocks except edges overlapping with slice   */
+        /* boundaries. This option is not currently supported by the encoder  */
+        /* hence the slice map should be of no significance to perform debloc */
+        /* king                                                               */
+        /**********************************************************************/
+
+        if (ps_proc->u4_compute_recon)
+        {
+            /* deblk context */
+            /* src pointers */
+            UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
+            UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
+
+            /* src indices */
+            UWORD32 i4_mb_x = ps_proc->i4_mb_x;
+            UWORD32 i4_mb_y = ps_proc->i4_mb_y;
+
+            /* compute blocking strength */
+            if (ps_proc->u4_disable_deblock_level != 1)
+            {
+                ih264e_compute_bs(ps_proc);
+            }
+
+            /* nmb deblocking and hpel and padding */
+            ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
+                                                  pu1_cur_pic_chroma, i4_mb_x,
+                                                  i4_mb_y);
+        }
+
+        /* update the context after for coding next mb */
+        error_status |= ih264e_update_proc_ctxt(ps_proc);
+
+        /* Once the last row is processed, mark the buffer status appropriately */
+        if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
+        {
+            /* Pointer to current picture buffer structure */
+            pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
+
+            /* Pointer to current picture's mv buffer structure */
+            mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
+
+            /**********************************************************************/
+            /* if disable deblock level is '0' this implies enable deblocking for */
+            /* all edges of all macroblocks with out any restrictions             */
+            /*                                                                    */
+            /* if disable deblock level is '1' this implies disable deblocking for*/
+            /* all edges of all macroblocks with out any restrictions             */
+            /*                                                                    */
+            /* if disable deblock level is '2' this implies enable deblocking for */
+            /* all edges of all macroblocks except edges overlapping with slice   */
+            /* boundaries. This option is not currently supported by the encoder  */
+            /* hence the slice map should be of no significance to perform debloc */
+            /* king                                                               */
+            /**********************************************************************/
+            error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
+
+            error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
+
+            if (ps_codec->s_cfg.u4_enable_recon)
+            {
+                /* pic cnt */
+                ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
+
+                /* rec buffers */
+                ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
+
+                /* is last? */
+                ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
+
+                /* frame time stamp */
+                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
+                ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
+            }
+
+        }
+    }
+
+    DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
+
+    return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  function to receive frame qp and pic type before encoding
+*
+* @par Description:
+*  Before encoding the frame, this function calls the rc library for frame qp
+*  and picture type
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @param[in] pic_cnt
+*  pic count
+*
+* @param[out] pi4_pic_type
+*  pic type
+
+* @returns skip_src
+*  if the source frame rate and target frame rate are not identical, the encoder
+*  skips few source frames. skip_src is set when the source need not be encoded.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type)
+{
+    /* rate control context */
+    rate_control_ctxt_t *ps_rate_control = &ps_codec->s_rate_control;
+
+    /* frame qp */
+    UWORD8 u1_frame_qp;
+
+    /* pic type */
+    PIC_TYPE_T pic_type = PIC_NA;
+
+    /* should src be skipped */
+    WORD32 skip_src = 0;
+
+    /* temp var */
+    WORD32 delta_time_stamp = 1;
+
+    /* see if the app requires any specific frame */
+    if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
+    {
+        irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api);
+    }
+
+    /* call rate control lib to get curr pic type and qp to be used */
+    skip_src = ih264e_rc_pre_enc(ps_rate_control->pps_rate_control_api,
+                                 ps_rate_control->pps_pd_frm_rate,
+                                 ps_rate_control->pps_time_stamp,
+                                 ps_rate_control->pps_frame_time,
+                                 delta_time_stamp,
+                                 (ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs),
+                                 &ps_rate_control->e_pic_type,
+                                 &u1_frame_qp);
+
+    switch (ps_rate_control->e_pic_type)
+    {
+        case I_PIC:
+            pic_type = PIC_I;
+            break;
+
+        case P_PIC:
+            pic_type = PIC_P;
+            break;
+
+        case B_PIC:
+            pic_type = PIC_B;
+            break;
+
+        default:
+            break;
+    }
+
+    /* is idr? */
+    if ((0 == cur_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval) ||
+                    ps_codec->force_curr_frame_type == IV_IDR_FRAME)
+    {
+        pic_type = PIC_IDR;
+    }
+
+    /* force frame tag is not sticky */
+    if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
+    {
+        ps_codec->force_curr_frame_type = IV_NA_FRAME;
+    }
+
+    /* qp */
+    ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp];
+
+    /* pic type */
+    *pi4_pic_type = pic_type;
+
+    return skip_src;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to update rc context after encoding
+*
+* @par   Description
+*  This function updates the rate control context after the frame is encoded.
+*  Number of bits consumed by the current frame, frame distortion, frame cost,
+*  number of intra/inter mb's, ... are passed on to rate control context for
+*  updating the rc model.
+*
+* @param[in] ps_codec
+*  Handle to codec context
+*
+* @param[in] ctxt_sel
+*  frame context selector
+*
+* @param[in] pic_cnt
+*  pic count
+*
+* @returns i4_stuffing_byte
+*  number of stuffing bytes (if necessary)
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt)
+{
+    /* proc set base idx */
+    WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
+
+    /* proc ctxt */
+    process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
+
+    /* frame qp */
+    UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
+
+    /* cbr rc return status */
+    WORD32 i4_stuffing_byte = 0;
+
+    /* current frame stats */
+    frame_info_t s_frame_info;
+    picture_type_e rc_pic_type;
+
+    /* temp var */
+    WORD32 i, j;
+
+    /********************************************************************/
+    /*                            BEGIN INIT                            */
+    /********************************************************************/
+
+    /* init frame info */
+    irc_init_frame_info(&s_frame_info);
+
+    /* get frame info */
+    for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
+    {
+        /*****************************************************************/
+        /* One frame can be encoded by max of u4_num_cores threads       */
+        /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
+        /* u4_num_cores threads                                          */
+        /*****************************************************************/
+        for (j = 0; j< MAX_MB_TYPE; j++)
+        {
+            s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
+
+            s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
+
+            s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
+        }
+
+        s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
+
+        s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
+
+        /*****************************************************************/
+        /* gather number of residue and header bits consumed by the frame*/
+        /*****************************************************************/
+        ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
+    }
+
+    /* get pic type */
+    switch (ps_codec->pic_type)
+    {
+        case PIC_I:
+        case PIC_IDR:
+            rc_pic_type = I_PIC;
+            break;
+        case PIC_P:
+            rc_pic_type = P_PIC;
+            break;
+        case PIC_B:
+            rc_pic_type = B_PIC;
+            break;
+        default:
+            assert(0);
+            break;
+    }
+
+    /* update rc lib with current frame stats */
+    i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
+                                          &(s_frame_info),
+                                          ps_codec->s_rate_control.pps_pd_frm_rate,
+                                          ps_codec->s_rate_control.pps_time_stamp,
+                                          ps_codec->s_rate_control.pps_frame_time,
+                                          (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
+                                          &rc_pic_type,
+                                          pic_cnt,
+                                          &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
+                                          u1_frame_qp,
+                                          &ps_codec->s_rate_control.num_intra_in_prev_frame,
+                                          &ps_codec->s_rate_control.i4_avg_activity);
+
+    /* in case the frame needs to be skipped, the frame num should not be incremented */
+    if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
+    {
+        ps_codec->i4_frame_num --;
+    }
+
+    return i4_stuffing_byte;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  entry point of a spawned encoder thread
+*
+* @par Description:
+*  The encoder thread dequeues a proc/entropy job from the encoder queue and
+*  calls necessary routines.
+*
+* @param[in] pv_proc
+*  Process context corresponding to the thread
+*
+* @returns  error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process_thread(void *pv_proc)
+{
+    /* error status */
+    IH264_ERROR_T ret = IH264_SUCCESS;
+    WORD32 error_status = IH264_SUCCESS;
+
+    /* proc ctxt */
+    process_ctxt_t *ps_proc = pv_proc;
+
+    /* codec ctxt */
+    codec_t *ps_codec = ps_proc->ps_codec;
+
+    /* structure to represent a processing job entry */
+    job_t s_job;
+
+    /* blocking call : entropy dequeue is non-blocking till all
+     * the proc jobs are processed */
+    WORD32 is_blocking = 0;
+
+    /* set affinity */
+    ithread_set_affinity(ps_proc->i4_id);
+
+    while(1)
+    {
+        /* dequeue a job from the entropy queue */
+        {
+            int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
+
+            /* codec context selector */
+            WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+            volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
+
+            /* have the lock */
+            if (error == 0)
+            {
+                if (*pu4_buf == 0)
+                {
+                    /* no entropy threads are active, try dequeuing a job from the entropy queue */
+                    ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
+                    if (IH264_SUCCESS == ret)
+                    {
+                        *pu4_buf = 1;
+                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
+                        goto WORKER;
+                    }
+                    else if(is_blocking)
+                    {
+                        ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
+                        break;
+                    }
+                }
+                ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
+            }
+        }
+
+        /* dequeue a job from the process queue */
+        ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
+        if (IH264_SUCCESS != ret)
+        {
+            if(ps_proc->i4_id)
+                break;
+            else
+            {
+                is_blocking = 1;
+                continue;
+            }
+        }
+
+WORKER:
+        /* choose appropriate proc context based on proc_base_idx */
+        ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
+
+        switch (s_job.i4_cmd)
+        {
+            case CMD_PROCESS:
+                ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
+                ps_proc->i4_mb_x = s_job.i2_mb_x;
+                ps_proc->i4_mb_y = s_job.i2_mb_y;
+
+                /* init process context */
+                ih264e_init_proc_ctxt(ps_proc);
+
+                /* core code all mbs enlisted under the current job */
+                error_status |= ih264e_process(ps_proc);
+                break;
+
+            case CMD_ENTROPY:
+                ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
+                ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
+                ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
+
+                /* init entropy */
+                ih264e_init_entropy_ctxt(ps_proc);
+
+                /* entropy code all mbs enlisted under the current job */
+                error_status |= ih264e_entropy(ps_proc);
+                break;
+
+            default:
+                error_status |= IH264_FAIL;
+                break;
+        }
+    }
+
+    /* send error code */
+    ps_proc->i4_error_code = error_status;
+    return ret;
+}
diff --git a/encoder/ih264e_process.h b/encoder/ih264e_process.h
new file mode 100755
index 0000000..9715434
--- /dev/null
+++ b/encoder/ih264e_process.h
@@ -0,0 +1,364 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_process.h
+*
+* @brief
+*  Contains functions for codec thread
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_PROCESS_H_
+#define IH264E_PROCESS_H_
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+*  @brief This function generates sps, pps set on request
+*
+*  @par   Description
+*  When the encoder is set in header generation mode, the following function
+*  is called. This generates sps and pps headers and returns the control back
+*  to caller.
+*
+*  @param[in]    ps_codec
+*  pointer to codec context
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T    ih264e_generate_sps_pps
+        (
+            codec_t   *ps_codec
+        );
+
+/**
+*******************************************************************************
+*
+* @brief   initialize entropy context.
+*
+* @par Description:
+*  Before invoking the call to perform to entropy coding the entropy context
+*  associated with the job needs to be initialized. This involves the start
+*  mb address, end mb address, slice index and the pointer to location at
+*  which the mb residue info and mb header info are packed.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief entry point for entropy coding
+*
+* @par Description
+*  This function calls lower level functions to perform entropy coding for a
+*  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
+*  back the control, updates the ctxt and calls lower level functions again.
+*  This process is repeated till all the rows or group of mb's (which ever is
+*  minimum) are coded
+*
+* @param[in] ps_proc
+*  process context
+*
+* @returns  error status
+*
+* @remarks
+* NOTE : It is assumed that this routine is invoked at the start of a slice,
+* so the slice header is generated by default.
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief Packs header information of a mb in to a buffer
+*
+* @par Description:
+*  After the deciding the mode info of a macroblock, the syntax elements
+*  associated with the mb are packed and stored. The entropy thread unpacks
+*  this buffer and generates the end bit stream.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pack_header_data
+    (
+        process_ctxt_t *ps_proc
+    );
+
+/**
+*******************************************************************************
+*
+* @brief   update process context after encoding an mb. This involves preserving
+* the current mb information for later use, initialize the proc ctxt elements to
+* encode next mb.
+*
+* @par Description:
+*  This function performs house keeping tasks after encoding an mb.
+*  After encoding an mb, various elements of the process context needs to be
+*  updated to encode the next mb. For instance, the source, recon and reference
+*  pointers, mb indices have to be adjusted to the next mb. The slice index of
+*  the current mb needs to be updated. If mb qp modulation is enabled, then if
+*  the qp changes the quant param structure needs to be updated. Also to encoding
+*  the next mb, the current mb info is used as part of mode prediction or mv
+*  prediction. Hence the current mb info has to preserved at top/top left/left
+*  locations.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_proc_ctxt
+    (
+        process_ctxt_t *ps_proc
+    );
+
+/**
+*******************************************************************************
+*
+* @brief   initialize process context.
+*
+* @par Description:
+*  Before dispatching the current job to process thread, the process context
+*  associated with the job is initialized. Usually every job aims to encode one
+*  row of mb's. Basing on the row indices provided by the job, the process
+*  context's buffer ptrs, slice indices and other elements that are necessary
+*  during core-coding are initialized.
+*
+* @param[in] ps_proc
+*  Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma padding
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+*  Pointer to luma buffer
+*
+* @param[in] pu1_curr_pic_chroma
+*  Pointer to chroma buffer
+*
+* @param[in] i4_mb_x
+*  mb index x
+*
+* @param[in] i4_mb_y
+*  mb index y
+*
+* @param[in] i4_pad_ht
+*  number of rows to be padded
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pad_recon_buffer
+    (
+        process_ctxt_t *ps_proc,
+        UWORD8 *pu1_curr_pic_luma,
+        UWORD8 *pu1_curr_pic_chroma,
+        WORD32 i4_mb_x,
+        WORD32 i4_mb_y,
+        WORD32 i4_pad_ht
+    );
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma half pel planes generation
+*
+* @par Description:
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_halfpel_generation
+    (
+        process_ctxt_t *ps_proc,
+        UWORD8 *pu1_curr_pic_luma,
+        WORD32 i4_mb_x,
+        WORD32 i4_mb_y
+    );
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma core coding for a set of mb's.
+*
+* @par Description:
+*  The mb to be coded is taken and is evaluated over a predefined set of modes
+*  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
+*  is selected and using intra/inter prediction filters, prediction is carried out.
+*  The deviation between src and pred signal constitutes error signal. This error
+*  signal is transformed (hierarchical transform if necessary) and quantized. The
+*  quantized residue is packed in to entropy buffer for entropy coding. This is
+*  repeated for all the mb's enlisted under the job.
+*
+* @param[in] ps_proc
+*  Process context corresponding to the job
+*
+* @returns  error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  function to receive frame qp and pic type before encoding
+*
+* @par Description:
+*  Before encoding the frame, this function calls the rc library for frame qp
+*  and picture type
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @param[in] pic_cnt
+*  pic count
+*
+* @param[out] pi4_pic_type
+*  pic type
+
+* @returns skip_src
+*  if the source frame rate and target frame rate are not identical, the encoder
+*  skips few source frames. skip_src is set when the source need not be encoded.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type);
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to update rc context after encoding
+*
+* @par   Description
+*  This function updates the rate control context after the frame is encoded.
+*  Number of bits consumed by the current frame, frame distortion, frame cost,
+*  number of intra/inter mb's, ... are passed on to rate control context for
+*  updating the rc model.
+*
+* @param[in] ps_codec
+*  Handle to codec context
+*
+* @param[in] ctxt_sel
+*  frame context selector
+*
+* @param[in] pic_cnt
+*  pic count
+*
+* @returns i4_stuffing_byte
+*  number of stuffing bytes (if necessary)
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  entry point of a spawned encoder thread
+*
+* @par Description:
+*  The encoder thread dequeues a proc/entropy job from the encoder queue and
+*  calls necessary routines.
+*
+* @param[in] pv_proc
+*  Process context corresponding to the thread
+*
+* @returns  error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process_thread(void *pv_proc);
+
+#endif /* IH264E_PROCESS_H_ */
diff --git a/encoder/ih264e_rate_control.c b/encoder/ih264e_rate_control.c
new file mode 100755
index 0000000..1e2fe4f
--- /dev/null
+++ b/encoder/ih264e_rate_control.c
@@ -0,0 +1,801 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_rate_control.c
+*
+* @brief
+*  Contains api function definitions for h264 rate control
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_rc_init()
+*  - ih264e_rc_get_picture_details()
+*  - ih264e_rc_pre_enc()
+*  - ih264e_update_rc_mb_info()
+*  - ih264e_rc_get_buffer_status()
+*  - ih264e_rc_post_enc()
+*  - ih264e_update_rc_bits_info()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_common_tables.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "irc_trace_support.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief This function does nothing
+*
+* @par Description
+*  This function does nothing
+*
+* @param[in] variadic function
+
+* @returns none
+*
+* @remarks This function is used by the rc library for debugging purposes.
+*  However this function was not part of rc library. So this is defined here
+*  to resolve link issues.
+*
+*******************************************************************************
+*/
+int trace_printf(const WORD8 *format, ...)
+{
+    UNUSED(format);
+    return(0);
+};
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function initializes rate control context and variables
+*
+* @par Description
+*  This function initializes rate control type, source and target frame rate,
+*  average and peak bitrate, intra-inter frame interval and initial
+*  quantization parameter
+*
+* @param[in] pv_rc_api
+*  Handle to rate control api
+*
+* @param[in] pv_frame_time
+*  Handle to frame time context
+*
+* @param[in] pv_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] pv_pd_frm_rate
+*  Handle to pull down frame time context
+*
+* @param[in] u4_max_frm_rate
+*  Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+*  Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+*  Target frame rate
+*
+* @param[in] e_rate_control_type
+*  Rate control type
+*
+* @param[in] u4_avg_bit_rate
+*  Average bit rate
+*
+* @param[in] u4_peak_bit_rate
+*  Peak bit rate
+*
+* @param[in] u4_max_delay
+*  Maximum delay between frames
+*
+* @param[in] u4_intra_frame_interval
+*  Intra frame interval
+*
+* @param[in] pu1_init_qp
+*  Initial qp
+*
+* @param[in] i4_max_inter_frm_int
+*  Maximum inter frame interval
+*
+* @param[in] pu1_min_max_qp
+*  Array of min/max qp
+*
+* @param[in] u1_profile_level
+*  Encoder profile level
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_init(void *pv_rc_api,
+                    void *pv_frame_time,
+                    void *pv_time_stamp,
+                    void *pv_pd_frm_rate,
+                    UWORD32 u4_max_frm_rate,
+                    UWORD32 u4_src_frm_rate,
+                    UWORD32 u4_tgt_frm_rate,
+                    rc_type_e e_rate_control_type,
+                    UWORD32 u4_avg_bit_rate,
+                    UWORD32 u4_peak_bit_rate,
+                    UWORD32 u4_max_delay,
+                    UWORD32 u4_intra_frame_interval,
+                    UWORD8 *pu1_init_qp,
+                    WORD32 i4_max_inter_frm_int,
+                    UWORD8 *pu1_min_max_qp,
+                    UWORD8 u1_profile_level)
+{
+//    UWORD8  u1_is_mb_level_rc_on = 0;
+    UWORD32 au4_peak_bit_rate[2] = {0,0};
+    UWORD32 u4_min_bit_rate      = 0;
+    WORD32  i4_is_gop_closed     = 0;
+//    WORD32  i4_use_est_intra_sad = 1;
+    UWORD32 u4_src_ticks         = 0;
+    UWORD32 u4_tgt_ticks         = 0;
+    UWORD8  u1_level_idx         = ih264e_get_lvl_idx(u1_profile_level);
+    UWORD32 u4_max_cpb_size      = 1200 * gas_ih264_lvl_tbl[u1_level_idx].u4_max_cpb_size;
+
+    /* Fill the params needed for the RC init */
+    if (e_rate_control_type == CBR_NLDRC)
+    {
+        au4_peak_bit_rate[0] = u4_avg_bit_rate;
+        au4_peak_bit_rate[1] = u4_avg_bit_rate;
+    }
+    else
+    {
+        au4_peak_bit_rate[0] = u4_peak_bit_rate;
+        au4_peak_bit_rate[1] = u4_peak_bit_rate;
+    }
+
+    /* Initialize frame time computation module*/
+    ih264e_init_frame_time(pv_frame_time,
+                           u4_src_frm_rate,  /* u4_src_frm_rate */
+                           u4_tgt_frm_rate); /* u4_tgt_frm_rate */
+
+    /* Initialize the pull_down frame rate */
+    ih264e_init_pd_frm_rate(pv_pd_frm_rate,
+                            u4_src_frm_rate);  /* u4_input_frm_rate */
+
+    /* Initialize time stamp structure */
+    ih264e_init_time_stamp(pv_time_stamp,
+                           u4_max_frm_rate,    /* u4_max_frm_rate */
+                           u4_src_frm_rate);   /* u4_src_frm_rate */
+
+    u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time);
+    u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time);
+
+    /* Initialize the rate control */
+    irc_initialise_rate_control(pv_rc_api,                  /* RC handle */
+                                e_rate_control_type,        /* RC algo type */
+                                0,                          /* MB activity on/off */
+                                u4_avg_bit_rate,            /* Avg Bitrate */
+                                au4_peak_bit_rate,          /* Peak bitrate array[2]:[I][P] */
+                                u4_min_bit_rate,            /* Min Bitrate */
+                                u4_src_frm_rate,            /* Src frame_rate */
+                                u4_max_delay,               /* Max buffer delay */
+                                u4_intra_frame_interval,    /* Intra frm_interval */
+                                pu1_init_qp,                /* Init QP array[3]:[I][P][B] */
+                                u4_max_cpb_size,            /* Max VBV/CPB Buffer Size */
+                                i4_max_inter_frm_int,       /* Max inter frm_interval */
+                                i4_is_gop_closed,           /* Open/Closed GOP */
+                                pu1_min_max_qp,             /* Min-max QP array[6]:[Imax][Imin][Pmax][Pmin][Bmax][Bmin] */
+                                0,                          /* How to calc the I-frame estimated_sad */
+                                u4_src_ticks,               /* Src_ticks = LCM(src_frm_rate,tgt_frm_rate)/src_frm_rate */
+                                u4_tgt_ticks);              /* Tgt_ticks = LCM(src_frm_rate,tgt_frm_rate)/tgt_frm_rate */
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get picture details
+*
+* @par   Description
+*  This function returns the Picture type(I/P/B)
+*
+* @param[in] pv_rc_api
+*  Handle to Rate control api
+*
+* @returns
+*  Picture type
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api)
+{
+    WORD32 i4_pic_id = 0;
+    WORD32 i4_pic_disp_order_no = 0;
+    picture_type_e e_rc_pic_type = P_PIC;
+
+    irc_get_picture_details(pv_rc_api, &i4_pic_id, &i4_pic_disp_order_no,
+                            &e_rc_pic_type);
+
+    return (e_rc_pic_type);
+}
+
+/**
+*******************************************************************************
+*
+* @brief  Function to get rate control output before encoding
+*
+* @par Description
+*  This function is called before encoding the current frame and gets the qp
+*  for the current frame from rate control module
+*
+* @param[in] ps_rate_control_api
+*  Handle to rate control api
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frm rate context
+*
+* @param[in] ps_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @param[in] i4_delta_time_stamp
+*  Time stamp difference between frames
+*
+* @param[in] i4_total_mb_in_frame
+*  Total Macro Blocks in frame
+*
+* @param[in/out] pe_vop_coding_type
+*  Picture coding type(I/P/B)
+*
+* @param[in/out] pu1_frame_qp
+*  QP for current frame
+*
+* @returns
+*  Skip or encode the current frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api,
+                         void * ps_pd_frm_rate,
+                         void * ps_time_stamp,
+                         void * ps_frame_time,
+                         WORD32 i4_delta_time_stamp,
+                         WORD32 i4_total_mb_in_frame,
+                         picture_type_e *pe_vop_coding_type,
+                         UWORD8 *pu1_frame_qp)
+{
+    WORD8 i4_skip_src = 0, i4_num_app_skips = 0;
+    UWORD32 u4_src_not_skipped_for_dts = 0;
+
+    /* Variables for the update_frm_level_info */
+    WORD32  ai4_tot_mb_in_type[MAX_MB_TYPE];
+    WORD32  ai4_tot_mb_type_qp[MAX_MB_TYPE]    = {0, 0};
+    WORD32  ai4_mb_type_sad[MAX_MB_TYPE]       = {0, 0};
+    WORD32  ai4_mb_type_tex_bits[MAX_MB_TYPE]  = {0, 0};
+    WORD32   i4_total_frame_bits               = 0;
+    WORD32   i4_total_hdr_bits                 = 0;
+    WORD32   i4_avg_mb_activity                = 0;
+    WORD32   i4_intra_frm_cost                 = 0;
+    UWORD8   u1_is_scd                         = 0;
+
+    /* Set all the MBs to Intra */
+    ai4_tot_mb_in_type[0] = i4_total_mb_in_frame;
+    ai4_tot_mb_in_type[1] = 0;
+
+    /* If delta time stamp is greater than 1, do rcupdate that many times */
+    for (i4_num_app_skips = 0; (i4_num_app_skips < i4_delta_time_stamp - 1); i4_num_app_skips++)
+    {
+        /*update the missing frames frm_rate with 0 */
+        ih264e_update_pd_frm_rate(ps_pd_frm_rate,0);
+
+        /* Update the time stamp */
+        ih264e_update_time_stamp(ps_time_stamp);
+
+        /* Do a pre encode skip update */
+
+        irc_update_frame_level_info(ps_rate_control_api,
+                                    (*pe_vop_coding_type),
+                                    ai4_mb_type_sad,        /* Frame level SAD for each type of MB[Intra/Inter] */
+                                    i4_total_frame_bits,    /* Total frame bits actually consumed */
+                                    i4_total_hdr_bits,      /*header bits for model updation*/
+                                    ai4_mb_type_tex_bits,   /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
+                                    ai4_tot_mb_type_qp,     /* Total qp of all MBs based on mb type */
+                                    ai4_tot_mb_in_type,     /* total number of mbs in each mb type */
+                                    i4_avg_mb_activity,     /* Average mb activity in frame */
+                                    u1_is_scd,              /* Is a scene change detected at the current frame */
+                                    1,                      /* If it's a pre-encode skip */
+                                    i4_intra_frm_cost,      /* Sum of Intra cost for each frame */
+                                    0);                     /* Is pic handling [irc_update_pic_handling_state] done before update */
+    }
+
+    /* Update the time stamp for the current frame */
+    ih264e_update_time_stamp(ps_time_stamp);
+
+    /* Check if a src not needs to be skipped */
+    i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time,
+                                               i4_delta_time_stamp,
+                                               &u4_src_not_skipped_for_dts);
+
+    /***********************************************************************
+       Based on difference in source and target frame rate frames are skipped
+     ***********************************************************************/
+    if (i4_skip_src)
+    {
+        /*update the missing frames frm_rate with 0 */
+        ih264e_update_pd_frm_rate(ps_pd_frm_rate,0);
+
+        /* Do a pre encode skip update */
+        irc_update_frame_level_info(ps_rate_control_api,
+                                    (*pe_vop_coding_type),
+                                    ai4_mb_type_sad,        /* Frame level SAD for each type of MB[Intra/Inter] */
+                                    i4_total_frame_bits,    /* Total frame bits actually consumed */
+                                    i4_total_hdr_bits,      /*header bits for model updation*/
+                                    ai4_mb_type_tex_bits,   /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
+                                    ai4_tot_mb_type_qp,     /* Total qp of all MBs based on mb type */
+                                    ai4_tot_mb_in_type,     /* total number of mbs in each mb type */
+                                    i4_avg_mb_activity,     /* Average mb activity in frame */
+                                    u1_is_scd,              /* Is a scene change detected at the current frame */
+                                    1,                      /* If it's a pre-encode skip */
+                                    i4_intra_frm_cost,      /* Sum of Intra cost for each frame */
+                                    0);                     /* Is pic handling [irc_update_pic_handling_state] done before update */
+
+        /* Set the current frame type to NA */
+        *pe_vop_coding_type = BUF_PIC;
+    }
+    else
+    {
+#define MAX_FRAME_BITS 0x7FFFFFFF
+//        WORD32         i4_pic_id;
+//        WORD32         i4_pic_disp_order_no;
+        WORD32 i4_avg_frm_rate, i4_source_frame_rate;
+
+        i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time);
+
+        /* Update the frame rate of the frame present with the tgt_frm_rate */
+        /* If the frm was not skipped due to delta_time_stamp, update the
+           frame_rate with double the tgt_frame_rate value, so that it makes
+           up for one of the frames skipped by the application */
+        ih264e_update_pd_frm_rate(ps_pd_frm_rate,
+                                  i4_source_frame_rate);
+
+        /* Based on the update get the average frame rate */
+        i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate);
+
+        /* Call the RC library function to change the frame_rate to the
+           actually achieved frm_rate */
+        irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate);
+
+        /* --------Rate control related things.  Get pic type and frame Qp---------*/
+        /* Add picture to the stack. For IPP encoder we push the variable
+           into the stack and get back the variables by requesting RC.
+           This interface is designed for IPB encoder */
+        irc_add_picture_to_stack(ps_rate_control_api, 1);
+
+        /* Query the picture_type */
+        *pe_vop_coding_type = ih264e_rc_get_picture_details(ps_rate_control_api);
+
+        /* Get current frame Qp */
+        pu1_frame_qp[0] = (UWORD8)irc_get_frame_level_qp(ps_rate_control_api,
+                                                         (picture_type_e)(pe_vop_coding_type[0]),
+                                                         MAX_FRAME_BITS);
+    }
+
+    return(i4_skip_src);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update mb info for rate control context
+*
+* @par   Description
+*  After encoding a mb, information such as mb type, qp used, mb distortion
+*  resulted in encoding the block and so on needs to be preserved for modeling
+*  RC. This is preserved via this function call.
+*
+* @param[in] ps_frame_info
+*  Handle Frame info context
+*
+* @param[in] ps_proc
+*  Process context
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc)
+{
+    /* proc ctxt */
+    process_ctxt_t *ps_proc = pv_proc;
+
+    /* is intra or inter */
+    WORD32 mb_type = !ps_proc->u4_is_intra;
+
+    /* distortion */
+    ps_frame_info->tot_mb_sad[mb_type] += ps_proc->i4_mb_distortion;
+
+    /* qp */
+    ps_frame_info->qp_sum[mb_type] += gau1_h264_to_mpeg2_qmap[ps_proc->u4_mb_qp];
+
+    /* mb cnt */
+    ps_frame_info->num_mbs[mb_type]++;
+
+    /* cost */
+    if (ps_proc->u4_is_intra)
+    {
+        ps_frame_info->intra_mb_cost_sum += ps_proc->i4_mb_cost;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control buffer status
+*
+* @par Description
+*  This function is used to get buffer status(underflow/overflow) by rate
+*  control module
+*
+* @param[in] pv_rc_api
+*  Handle to rate control api context
+*
+* @param[in] i4_total_frame_bits
+*  Total frame bits
+*
+* @param[in] u1_pic_type
+*  Picture type
+*
+* @param[in] pi4_num_bits_to_prevent_vbv_underflow
+*  Number of bits to prevent underflow
+*
+* @param[out] pu1_is_enc_buf_overflow
+*  Buffer overflow indication flag
+*
+* @param[out] pu1_is_enc_buf_underflow
+*  Buffer underflow indication flag
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_get_buffer_status(void *pv_rc_api,
+                                 WORD32 i4_total_frame_bits,
+                                 picture_type_e e_pic_type,
+                                 WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
+                                 UWORD8 *pu1_is_enc_buf_overflow,
+                                 UWORD8 *pu1_is_enc_buf_underflow)
+{
+    vbv_buf_status_e e_vbv_buf_status = VBV_NORMAL;
+
+    e_vbv_buf_status = irc_get_buffer_status(pv_rc_api,
+                                             i4_total_frame_bits,
+                                             e_pic_type,
+                                             pi4_num_bits_to_prevent_vbv_underflow);
+
+    if (e_vbv_buf_status == VBV_OVERFLOW)
+    {
+        *pu1_is_enc_buf_underflow = 1;
+        *pu1_is_enc_buf_overflow = 0;
+    }
+    else if (e_vbv_buf_status == VBV_UNDERFLOW)
+    {
+        *pu1_is_enc_buf_underflow = 0;
+        *pu1_is_enc_buf_overflow = 1;
+    }
+    else
+    {
+        *pu1_is_enc_buf_underflow = 0;
+        *pu1_is_enc_buf_overflow = 0;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update rate control module after encoding
+*
+* @par Description
+*  This function is used to update the rate control module after the current
+*  frame encoding is done with details such as bits consumed, SAD for I/P/B,
+*  intra cost ,mb type and other
+*
+* @param[in] ps_rate_control_api
+*  Handle to rate control api context
+*
+* @param[in] ps_frame_info
+*  Handle to frame info context
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frame rate context
+*
+* @param[in] ps_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @param[in] i4_total_mb_in_frame
+*  Total mb in frame
+*
+* @param[in] pe_vop_coding_type
+*  Picture coding type
+*
+* @param[in] i4_is_first_frame
+*  Is first frame
+*
+* @param[in] pi4_is_post_encode_skip
+*  Post encoding skip flag
+*
+* @param[in] u1_frame_qp
+*  Frame qp
+*
+* @param[in] pi4_num_intra_in_prev_frame
+*  Numberf of intra mbs in previous frame
+*
+* @param[in] pi4_avg_activity
+*  Average activity
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_post_enc(void * ps_rate_control_api,
+                          frame_info_t *ps_frame_info,
+                          void * ps_pd_frm_rate,
+                          void * ps_time_stamp,
+                          void * ps_frame_time,
+                          WORD32   i4_total_mb_in_frame,
+                          picture_type_e *pe_vop_coding_type,
+                          WORD32 i4_is_first_frame,
+                          WORD32 *pi4_is_post_encode_skip,
+                          UWORD8 u1_frame_qp,
+                          WORD32 *pi4_num_intra_in_prev_frame,
+                          WORD32 *pi4_avg_activity)
+{
+    /* Variables for the update_frm_level_info */
+    WORD32  ai4_tot_mb_in_type[MAX_MB_TYPE];
+    WORD32  ai4_tot_mb_type_qp[MAX_MB_TYPE]    = {0, 0};
+    WORD32  ai4_mb_type_sad[MAX_MB_TYPE]       = {0, 0};
+    WORD32  ai4_mb_type_tex_bits[MAX_MB_TYPE]  = {0, 0};
+    WORD32   i4_total_frame_bits               = 0;
+    WORD32   i4_total_hdr_bits                 = 0;
+    WORD32   i4_total_texturebits;
+    WORD32   i4_avg_mb_activity                = 0;
+    WORD32   i4_intra_frm_cost                 = 0;
+    UWORD8   u1_is_scd                         = 0;
+    WORD32  i4_cbr_bits_to_stuff               = 0;
+    UWORD32   u4_num_intra_in_prev_frame        = *pi4_num_intra_in_prev_frame;
+    UNUSED(ps_pd_frm_rate);
+    UNUSED(ps_time_stamp);
+    UNUSED(ps_frame_time);
+    UNUSED(u1_frame_qp);
+    /* Accumulate RC stats */
+    ai4_tot_mb_in_type[MB_TYPE_INTRA]    = irc_fi_get_total_mb(ps_frame_info,MB_TYPE_INTRA);
+    ai4_tot_mb_in_type[MB_TYPE_INTER]    = irc_fi_get_total_mb(ps_frame_info,MB_TYPE_INTER);
+    /* ai4_tot_mb_type_qp[MB_TYPE_INTRA]    = 0;
+    ai4_tot_mb_type_qp[MB_TYPE_INTER]    = ps_enc->pu1_h264_mpg2quant[u1_frame_qp] * i4_total_mb_in_frame;*/
+    ai4_tot_mb_type_qp[MB_TYPE_INTRA]    = irc_fi_get_total_mb_qp(ps_frame_info,MB_TYPE_INTRA);
+    ai4_tot_mb_type_qp[MB_TYPE_INTER]    = irc_fi_get_total_mb_qp(ps_frame_info,MB_TYPE_INTER);
+    ai4_mb_type_sad[MB_TYPE_INTRA]       = irc_fi_get_total_mb_sad(ps_frame_info,MB_TYPE_INTRA);
+    ai4_mb_type_sad[MB_TYPE_INTER]       = irc_fi_get_total_mb_sad(ps_frame_info,MB_TYPE_INTER);
+    i4_intra_frm_cost                    = irc_fi_get_total_intra_mb_cost(ps_frame_info);
+    i4_avg_mb_activity                   = irc_fi_get_avg_activity(ps_frame_info);
+    i4_total_hdr_bits                    = irc_fi_get_total_header_bits(ps_frame_info);
+    i4_total_texturebits                 = irc_fi_get_total_mb_texture_bits(ps_frame_info,MB_TYPE_INTRA);
+    i4_total_texturebits                 += irc_fi_get_total_mb_texture_bits(ps_frame_info,MB_TYPE_INTER);
+    i4_total_frame_bits                  = i4_total_hdr_bits + i4_total_texturebits ;
+
+    *pi4_avg_activity = i4_avg_mb_activity;
+
+
+    /* Texture bits are not accumulated. Hence subtracting hdr bits from total bits */
+    ai4_mb_type_tex_bits[MB_TYPE_INTRA]  = 0;
+    ai4_mb_type_tex_bits[MB_TYPE_INTER]  = i4_total_frame_bits - i4_total_hdr_bits;
+
+    /* Set post encode skip to zero */
+    pi4_is_post_encode_skip[0]= 0;
+
+    /* For NLDRC, get the buffer status for stuffing or skipping */
+    if (irc_get_rc_type(ps_rate_control_api) == CBR_NLDRC)
+    {
+        WORD32 i4_get_num_bit_to_prevent_vbv_overflow;
+        UWORD8 u1_enc_buf_overflow,u1_enc_buf_underflow;
+
+        /* Getting the buffer status */
+        ih264e_rc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
+            pe_vop_coding_type[0],  &i4_get_num_bit_to_prevent_vbv_overflow,
+            &u1_enc_buf_overflow,&u1_enc_buf_underflow);
+
+        /* We skip the frame if decoder buffer is underflowing. But we never skip first I frame */
+        // if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1))
+        if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0))
+        {
+            irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e)pe_vop_coding_type[0]);
+            // i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc);
+            i4_total_frame_bits = 0;
+
+            *pi4_is_post_encode_skip = 1;
+
+            /* Adjust the GOP if in case we skipped an I-frame */
+            if (*pe_vop_coding_type == I_PIC)
+                irc_force_I_frame(ps_rate_control_api);
+
+            /* Since this frame is skipped by writing 7 bytes header, we say this is a P frame */
+            // *pe_vop_coding_type = P;
+
+            /* Getting the buffer status again,to check if it underflows  */
+            irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
+                (picture_type_e)pe_vop_coding_type[0], &i4_get_num_bit_to_prevent_vbv_overflow);
+
+        }
+
+        /* In this case we stuff bytes as buffer is overflowing */
+        if (u1_enc_buf_underflow == 1)
+        {
+            /* The stuffing function is directly pulled out from split controller workspace.
+               encode_vop_data() function makes sure alignment data is dumped at the end of a
+               frame. Split controller was identifying this alignment byte, overwriting it with
+               the stuff data and then finally aligning the buffer. Here every thing is inside
+               the DSP. So, ideally encode_vop_data needn't align, and we can start stuffing directly.
+               But in that case, it'll break the logic for a normal frame.
+               Hence for simplicity, not changing this part since it is ok to align and
+               then overwrite since stuffing is not done for every frame */
+            i4_cbr_bits_to_stuff = irc_get_bits_to_stuff(ps_rate_control_api, i4_total_frame_bits, pe_vop_coding_type[0]);
+
+            /* Just add extra 32 bits to make sure we don't stuff lesser */
+            i4_cbr_bits_to_stuff += 32;
+
+            /* We can not stuff more than the outbuf size. So have a check here */
+            /* Add stuffed bits to total bits */
+            i4_total_frame_bits += i4_cbr_bits_to_stuff;
+        }
+    }
+
+#define ENABLE_SCD 1
+#if ENABLE_SCD
+    /* If number of intra MBs are more than 2/3rd of total MBs, assume it as a scene change */
+    if ((ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((2 * i4_total_mb_in_frame) / 3)) &&
+       (*pe_vop_coding_type == P_PIC) &&
+       (ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((11 * (WORD32)u4_num_intra_in_prev_frame) / 10)))
+    {
+        u1_is_scd = 1;
+    }
+#endif
+
+    /* Update num intra mbs of this frame */
+    if (pi4_is_post_encode_skip[0] == 0)
+    {
+        *pi4_num_intra_in_prev_frame = ai4_tot_mb_in_type[MB_TYPE_INTRA];
+    }
+
+    /* Reset intra count to zero, if u encounter an I frame */
+    if (*pe_vop_coding_type == I_PIC)
+    {
+        *pi4_num_intra_in_prev_frame = 0;
+    }
+
+    /* Do an update of rate control after post encode */
+    irc_update_frame_level_info(ps_rate_control_api,        /* RC state */
+                                pe_vop_coding_type[0],      /* PIC type */
+                                ai4_mb_type_sad,            /* SAD for [Intra/Inter] */
+                                i4_total_frame_bits,        /* Total frame bits */
+                                i4_total_hdr_bits,          /* header bits for */
+                                ai4_mb_type_tex_bits,       /* for MB[Intra/Inter] */
+                                ai4_tot_mb_type_qp,         /* for MB[Intra/Inter] */
+                                ai4_tot_mb_in_type,         /* for MB[Intra/Inter] */
+                                i4_avg_mb_activity,         /* Average mb activity in frame */
+                                u1_is_scd,                  /* Is a scene change detected */
+                                0,                          /* Pre encode skip  */
+                                (WORD32)i4_intra_frm_cost,  /* Intra cost for frame */
+                                0);                         /* Not done outside */
+
+    return (i4_cbr_bits_to_stuff >> 3);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update bits consumed info to rate control context
+*
+* @par Description
+*  Function to update bits consume info to rate control context
+*
+* @param[in] ps_frame_info
+*  Frame info context
+*
+* @param[in] ps_entropy
+*  Entropy context
+*
+* @returns
+*  total bits consumed by the frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy)
+{
+    entropy_ctxt_t *ps_entropy = pv_entropy;
+
+    ps_frame_info->mb_header_bits[MB_TYPE_INTRA] += ps_entropy->u4_header_bits[MB_TYPE_INTRA];
+
+    ps_frame_info->mb_texture_bits[MB_TYPE_INTRA] += ps_entropy->u4_residue_bits[MB_TYPE_INTRA];
+
+    ps_frame_info->mb_header_bits[MB_TYPE_INTER] += ps_entropy->u4_header_bits[MB_TYPE_INTER];
+
+    ps_frame_info->mb_texture_bits[MB_TYPE_INTER] += ps_entropy->u4_residue_bits[MB_TYPE_INTER];
+
+    return;
+}
+
diff --git a/encoder/ih264e_rate_control.h b/encoder/ih264e_rate_control.h
new file mode 100755
index 0000000..de9466a
--- /dev/null
+++ b/encoder/ih264e_rate_control.h
@@ -0,0 +1,351 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_rate_control.h
+*
+* @brief
+*  This file contains function declarations of api functions for h264 rate
+*  control
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_RATE_CONTROL_H_
+#define IH264E_RATE_CONTROL_H_
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  This function initializes rate control context and variables
+*
+* @par Description
+*  This function initializes rate control type, source and target frame rate,
+*  average and peak bitrate, intra-inter frame interval and initial
+*  quantization parameter
+*
+* @param[in] pv_rc_api
+*  Handle to rate control api
+*
+* @param[in] pv_frame_time
+*  Handle to frame time context
+*
+* @param[in] pv_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] pv_pd_frm_rate
+*  Handle to pull down frame time context
+*
+* @param[in] u4_max_frm_rate
+*  Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+*  Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+*  Target frame rate
+*
+* @param[in] e_rate_control_type
+*  Rate control type
+*
+* @param[in] u4_avg_bit_rate
+*  Average bit rate
+*
+* @param[in] u4_peak_bit_rate
+*  Peak bit rate
+*
+* @param[in] u4_max_delay
+*  Maximum delay between frames
+*
+* @param[in] u4_intra_frame_interval
+*  Intra frame interval
+*
+* @param[in] pu1_init_qp
+*  Initial qp
+*
+* @param[in] i4_max_inter_frm_int
+*  Maximum inter frame interval
+*
+* @param[in] pu1_min_max_qp
+*  Array of min/max qp
+*
+* @param[in] u1_profile_level
+*  Encoder profile level
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_init(void *pv_rc_api,
+                    void *pv_frame_time,
+                    void *pv_time_stamp,
+                    void *pv_pd_frm_rate,
+                    UWORD32 u4_max_frm_rate,
+                    UWORD32 u4_src_frm_rate,
+                    UWORD32 u4_tgt_frm_rate,
+                    rc_type_e e_rate_control_type,
+                    UWORD32 u4_avg_bit_rate,
+                    UWORD32 u4_peak_bit_rate,
+                    UWORD32 u4_max_delay,
+                    UWORD32 u4_intra_frame_interval,
+                    UWORD8 *pu1_init_qp,
+                    WORD32 i4_max_inter_frm_int,
+                    UWORD8 *pu1_min_max_qp,
+                    UWORD8 u1_profile_level);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get picture details
+*
+* @par   Description
+*  This function returns the Picture type(I/P/B)
+*
+* @param[in] pv_rc_api
+*  Handle to Rate control api
+*
+* @returns
+*  Picture type
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api);
+
+
+/**
+*******************************************************************************
+*
+* @brief  Function to get rate control output before encoding
+*
+* @par Description
+*  This function is called before encoding the current frame and gets the qp
+*  for the current frame from rate control module
+*
+* @param[in] ps_rate_control_api
+*  Handle to rate control api
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frm rate context
+*
+* @param[in] ps_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @param[in] i4_delta_time_stamp
+*  Time stamp difference between frames
+*
+* @param[in] i4_total_mb_in_frame
+*  Total Macro Blocks in frame
+*
+* @param[in/out] pe_vop_coding_type
+*  Picture coding type(I/P/B)
+*
+* @param[in/out] pu1_frame_qp
+*  QP for current frame
+*
+* @returns
+*  Skip or encode the current frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api,
+                         void * ps_pd_frm_rate,
+                         void * ps_time_stamp,
+                         void * ps_frame_time,
+                         WORD32 i4_delta_time_stamp,
+                         WORD32 i4_total_mb_in_frame,
+                         picture_type_e *pe_vop_coding_type,
+                         UWORD8 *pu1_frame_qp);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update mb info for rate control context
+*
+* @par   Description
+*  After encoding a mb, information such as mb type, qp used, mb distortion
+*  resulted in encoding the block and so on needs to be preserved for modelling
+*  RC. This is preserved via this function call.
+*
+* @param[in] ps_frame_info
+*  Handle Frame info context
+*
+* @param[in] ps_proc
+*  Process context
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control buffer status
+*
+* @par Description
+*  This function is used to get buffer status(underflow/overflow) by rate
+*  control module
+*
+* @param[in] pv_rc_api
+*  Handle to rate control api context
+*
+* @param[in] i4_total_frame_bits
+*  Total frame bits
+*
+* @param[in] u1_pic_type
+*  Picture type
+*
+* @param[in] pi4_num_bits_to_prevent_vbv_underflow
+*  Number of bits to prevent underflow
+*
+* @param[out] pu1_is_enc_buf_overflow
+*  Buffer overflow indication flag
+*
+* @param[out] pu1_is_enc_buf_underflow
+*  Buffer underflow indication flag
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_get_buffer_status(void *pv_rc_api,
+                                 WORD32 i4_total_frame_bits,
+                                 picture_type_e e_pic_type,
+                                 WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
+                                 UWORD8 *pu1_is_enc_buf_overflow,
+                                 UWORD8 *pu1_is_enc_buf_underflow);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update rate control module after encoding
+*
+* @par Description
+*  This function is used to update the rate control module after the current
+*  frame encoding is done with details such as bits consumed, SAD for I/P/B,
+*  intra cost ,mb type and other
+*
+* @param[in] ps_rate_control_api
+*  Handle to rate control api context
+*
+* @param[in] ps_frame_info
+*  Handle to frame info context
+*
+* @param[in] ps_pd_frm_rate
+*  Handle to pull down frame rate context
+*
+* @param[in] ps_time_stamp
+*  Handle to time stamp context
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @param[in] i4_total_mb_in_frame
+*  Total mb in frame
+*
+* @param[in] pe_vop_coding_type
+*  Picture coding type
+*
+* @param[in] i4_is_first_frame
+*  Is first frame
+*
+* @param[in] pi4_is_post_encode_skip
+*  Post encoding skip flag
+*
+* @param[in] u1_frame_qp
+*  Frame qp
+*
+* @param[in] pi4_num_intra_in_prev_frame
+*  Number of intra mbs in previous frame
+*
+* @param[in] pi4_avg_activity
+*  Average activity
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_post_enc(void *ps_rate_control_api,
+                         frame_info_t *ps_frame_info,
+                         void *ps_pd_frm_rate,
+                         void *ps_time_stamp,
+                         void *ps_frame_time,
+                         WORD32 i4_total_mb_in_frame,
+                         picture_type_e *pe_vop_coding_type,
+                         WORD32 i4_is_first_frame,
+                         WORD32 *pi4_is_post_encode_skip,
+                         UWORD8 u1_frame_qp,
+                         WORD32 *pi4_num_intra_in_prev_frame,
+                         WORD32 *pi4_avg_activity);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update bits consumed info to rate control context
+*
+* @par Description
+*  Function to update bits consume info to rate control context
+*
+* @param[in] ps_frame_info
+*  Frame info context
+*
+* @param[in] ps_entropy
+*  Entropy context
+*
+* @returns
+*  total bits consumed by the frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy);
+
+#endif /* IH264E_RATE_CONTROL_H */
+
diff --git a/encoder/ih264e_rc_mem_interface.c b/encoder/ih264e_rc_mem_interface.c
new file mode 100755
index 0000000..e4d5781
--- /dev/null
+++ b/encoder/ih264e_rc_mem_interface.c
@@ -0,0 +1,395 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_rc_mem_interface.c
+*
+* @brief
+*  This file contains api function definitions for rate control memtabs
+*
+* @author
+*  ittiam
+*
+* List of Functions
+*  - fill_memtab()
+*  - use_or_fill_base()
+*  - ih264e_map_rc_mem_recs_to_itt_api()
+*  - ih264e_map_itt_mem_rec_to_rc_mem_rec()
+*  - ih264e_get_rate_control_mem_tab()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User Include Files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_size_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264e.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_common_tables.h"
+#include "ih264_list.h"
+#include "ih264e_error.h"
+#include "ih264e_defs.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_master.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264e_utils.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_config.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264e_statistics.h"
+#include "ih264e_error.h"
+#include "ih264e_utils.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "irc_common.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_mb_model_based.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_vbr_str_prms.h"
+#include "irc_rate_control_api.h"
+#include "irc_rate_control_api_structs.h"
+#include "ih264e_modify_frm_rate.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par   Description
+*  This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+*  pointer to mem records
+*
+* @param[in] u4_size
+*  size of the record
+*
+* @param[in] i4_alignment
+*  memory alignment size
+*
+* @param[in] e_usage
+*  usage
+*
+* @param[in] e_mem_region
+*  mem region
+*
+* @return void
+*
+******************************************************************************
+*/
+void fill_memtab(itt_memtab_t *ps_mem_tab,
+                 WORD32 u4_size,
+                 WORD32 i4_alignment,
+                 ITT_MEM_USAGE_TYPE_E e_usage,
+                 ITT_MEM_REGION_E e_mem_region)
+{
+    /* Make the size next multiple of alignment */
+    WORD32 i4_aligned_size   = (((u4_size) + (i4_alignment-1)) & (~(i4_alignment-1)));
+
+    /* Fill the memtab */
+    ps_mem_tab->u4_size      = i4_aligned_size;
+    ps_mem_tab->i4_alignment = i4_alignment;
+    ps_mem_tab->e_usage      = e_usage;
+    ps_mem_tab->e_mem_region = e_mem_region;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par   Description
+*  This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+*  pointer to mem records
+*
+* @param[in] ptr_to_be_filled
+*  handle to the memory record storage space
+*
+* @param[in] e_func_type
+*  enum that dictates fill memory records or use memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab,
+                        void **ptr_to_be_filled,
+                        ITT_FUNC_TYPE_E e_func_type)
+{
+    /* Fill base for freeing the allocated memory */
+    if (e_func_type == FILL_BASE)
+    {
+        if (ptr_to_be_filled[0] != 0)
+        {
+            ps_mem_tab->pv_base = ptr_to_be_filled[0];
+            return (0);
+        }
+        else
+        {
+            return (-1);
+        }
+    }
+    /* obtain the allocated memory from base pointer */
+    if (e_func_type == USE_BASE)
+    {
+        if (ps_mem_tab->pv_base != 0)
+        {
+            ptr_to_be_filled[0] = ps_mem_tab->pv_base;
+            return (0);
+        }
+        else
+        {
+            return (-1);
+        }
+    }
+    return (0);
+}
+
+/**
+******************************************************************************
+*
+* @brief This function maps rc mem records structure to encoder lib mem records
+*  structure
+*
+* @par   Description
+*  This function maps rc mem records structure to encoder lib mem records
+*  structure
+*
+* @param[in]   ps_mem
+*  pointer to encoder lib mem records
+*
+* @param[in]   rc_memtab
+*  pointer to rc mem records
+*
+* @param[in]   num_mem_recs
+*  number of memory records
+*
+* @return      void
+*
+******************************************************************************
+*/
+void ih264e_map_rc_mem_recs_to_itt_api(iv_mem_rec_t *ps_mem,
+                                       itt_memtab_t *rc_memtab,
+                                       UWORD32 num_mem_recs)
+{
+    UWORD32 j;
+    UWORD32 Size, align;
+
+    for (j = 0; j < num_mem_recs; j++)
+    {
+        Size = rc_memtab->u4_size;
+        align = rc_memtab->i4_alignment;
+
+        /* we always ask for external persistent cacheable memory */
+        FILL_MEMTAB(ps_mem, j, Size, align, IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM);
+
+        rc_memtab++;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function maps encoder lib mem records structure to RC memory
+* records structure
+*
+* @par   Description
+*  This function maps encoder lib mem records structure to RC memory
+*  records structure
+*
+* @param[in] ps_mem
+*  pointer to encoder lib mem records
+*
+* @param[in] rc_memtab
+*  pointer to rc mem records
+*
+* @param[in] num_mem_recs
+*  Number of memory records
+
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_map_itt_mem_rec_to_rc_mem_rec(iv_mem_rec_t *ps_mem,
+                                          itt_memtab_t *rc_memtab,
+                                          UWORD32 num_mem_recs)
+{
+    UWORD32 i;
+
+    for (i = 0; i < num_mem_recs; i++)
+    {
+        rc_memtab->i4_alignment = ps_mem->u4_mem_alignment;
+        rc_memtab->u4_size = ps_mem->u4_mem_size;
+        rc_memtab->pv_base = ps_mem->pv_base;
+
+        /* only DDR memory is available */
+        rc_memtab->e_mem_region = DDR;
+        rc_memtab->e_usage = PERSISTENT;
+
+        rc_memtab++;
+        ps_mem++;
+    }
+}
+
+/**
+******************************************************************************
+*
+* @brief Get memtabs for rate control
+*
+* @par   Description
+*  This routine is used to Get/init memtabs for rate control
+*
+* @param[in] pv_rate_control
+*  pointer to rate control context (handle)
+*
+* @param[in] ps_mem
+*  pointer to encoder lib mem records
+*
+* @param[in] e_func_type
+*  enum that dictates fill memory records or Init memory records
+*
+* @return total number of mem records
+*
+******************************************************************************
+*/
+WORD32 ih264e_get_rate_control_mem_tab(void *pv_rate_control,
+                                       iv_mem_rec_t  *ps_mem,
+                                       ITT_FUNC_TYPE_E e_func_type)
+{
+    static itt_memtab_t as_itt_memtab[NUM_RC_MEMTABS];
+    WORD32 i4_num_memtab = 0, j = 0;
+    void *refptr2[4];
+    void **refptr1[4];
+    rate_control_ctxt_t *ps_rate_control = pv_rate_control;
+
+    for (j = 0; j < 4; j++)
+        refptr1[j] = &(refptr2[j]);
+
+    j = 0;
+
+    if (e_func_type == USE_BASE || e_func_type == FILL_BASE)
+    {
+        refptr1[1] = &ps_rate_control->pps_frame_time;
+        refptr1[2] = &ps_rate_control->pps_time_stamp;
+        refptr1[3] = &ps_rate_control->pps_pd_frm_rate;
+        refptr1[0] = &ps_rate_control->pps_rate_control_api;
+    }
+
+    /* Get the total number of memtabs used by Rate Controller */
+    i4_num_memtab = irc_rate_control_num_fill_use_free_memtab((rate_control_api_t **)refptr1[0], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+    /* Fill the memtabs used by Rate Controller */
+    i4_num_memtab = irc_rate_control_num_fill_use_free_memtab((rate_control_api_t **)refptr1[0],as_itt_memtab+j,e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    /* Get the total number of memtabs used by Frame time Module */
+    i4_num_memtab = ih264e_frame_time_get_init_free_memtab((frame_time_t **)refptr1[1], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+    /* Fill the memtabs used by Frame time Module */
+    i4_num_memtab = ih264e_frame_time_get_init_free_memtab((frame_time_t **)refptr1[1], as_itt_memtab+j, e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    /* Get the total number of memtabs used by Time stamp Module */
+    i4_num_memtab = ih264e_time_stamp_get_init_free_memtab((time_stamp_t **)refptr1[2], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+    /* Fill the memtabs used by Time Stamp Module */
+    i4_num_memtab = ih264e_time_stamp_get_init_free_memtab((time_stamp_t **)refptr1[2], as_itt_memtab+j, e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    /* Get the total number of memtabs used by Frame rate Module */
+    i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab((pd_frm_rate_t **)refptr1[3], NULL, GET_NUM_MEMTAB);
+    /* Few extra steps during init */
+    ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+    /* Fill the memtabs used by Frame Rate Module */
+    i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab((pd_frm_rate_t **)refptr1[3], as_itt_memtab+j, e_func_type);
+    /* Mapping ittiam memtabs to App. memtabs */
+    ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+    j += i4_num_memtab;
+
+    return j; /* Total MemTabs Needed by Rate Control Module */
+}
diff --git a/encoder/ih264e_rc_mem_interface.h b/encoder/ih264e_rc_mem_interface.h
new file mode 100755
index 0000000..a2946a7
--- /dev/null
+++ b/encoder/ih264e_rc_mem_interface.h
@@ -0,0 +1,179 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_rc_mem_interface.h
+*
+* @brief
+*  This file contains function declaration and structures for rate control
+*  memtabs
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  The rate control library is a global library across various codecs. It
+*  anticipates certain structures definitions. Those definitions are to be
+*  imported from global workspace. Instead of that, the structures needed for
+*  rc library are copied in to this file and exported to rc library. If the
+*  structures / enums / ... in the global workspace change, this file also needs
+*  to be modified accordingly.
+*
+******************************************************************************
+*/
+#ifndef IH264E_RC_MEM_INTERFACE_H_
+#define IH264E_RC_MEM_INTERFACE_H_
+
+
+/*****************************************************************************/
+/* Function Macros                                                           */
+/*****************************************************************************/
+
+#define FILL_MEMTAB(m_pv_mem_rec, m_j, m_mem_size, m_align, m_type)      \
+{                                                                        \
+    m_pv_mem_rec[m_j].u4_size = sizeof(iv_mem_rec_t);                    \
+    m_pv_mem_rec[m_j].u4_mem_size = m_mem_size;                          \
+    m_pv_mem_rec[m_j].u4_mem_alignment = m_align;                        \
+    m_pv_mem_rec[m_j].e_mem_type = m_type;                               \
+}
+
+/*****************************************************************************/
+/* Enums                                                                     */
+/*****************************************************************************/
+typedef enum
+{
+    ALIGN_BYTE = 1,
+    ALIGN_WORD16 = 2,
+    ALIGN_WORD32 = 4,
+    ALIGN_WORD64 = 8,
+    ALIGN_128_BYTE = 128
+}ITT_MEM_ALIGNMENT_TYPE_E;
+
+typedef enum
+{
+    SCRATCH = 0,
+    PERSISTENT = 1,
+    WRITEONCE  = 2
+}ITT_MEM_USAGE_TYPE_E;
+
+typedef enum
+{
+    L1D = 0,
+    SL2 = 1,
+    DDR = 3
+}ITT_MEM_REGION_E;
+
+typedef enum
+{
+    GET_NUM_MEMTAB = 0,
+    FILL_MEMTAB = 1,
+    USE_BASE = 2,
+    FILL_BASE =3
+}ITT_FUNC_TYPE_E;
+
+
+/*****************************************************************************/
+/* Structures                                                                */
+/*****************************************************************************/
+
+/*NOTE : This should be an exact replica of IALG_MemRec, any change in IALG_MemRec
+         must be replicated here*/
+typedef struct
+{
+    /* Size in bytes */
+    UWORD32 u4_size;
+
+    /* Alignment in bytes */
+    WORD32 i4_alignment;
+
+    /* decides which memory region to be placed */
+    ITT_MEM_REGION_E e_mem_region;
+
+    /* memory is scratch or persistent */
+    ITT_MEM_USAGE_TYPE_E e_usage;
+
+    /* Base pointer for allocated memory */
+    void *pv_base;
+} itt_memtab_t;
+
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par   Description
+*  This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+*  pointer to mem records
+*
+* @param[in] u4_size
+*  size of the record
+*
+* @param[in] i4_alignment
+*  memory alignment size
+*
+* @param[in] e_usage
+*  usage
+*
+* @param[in] e_mem_region
+*  mem region
+*
+* @return void
+*
+******************************************************************************
+*/
+void fill_memtab(itt_memtab_t *ps_mem_tab, WORD32 u4_size, WORD32 i4_alignment,
+                 ITT_MEM_USAGE_TYPE_E e_usage, ITT_MEM_REGION_E e_mem_region);
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par   Description
+*  This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+*  pointer to mem records
+*
+* @param[in] ptr_to_be_filled
+*  handle to the memory record storage space
+*
+* @param[in] e_func_type
+*  enum that dictates fill memory records or use memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab, void **ptr_to_be_filled,
+                        ITT_FUNC_TYPE_E e_func_type);
+
+
+#endif // IH264E_RC_MEM_INTERFACE_H_
+
diff --git a/encoder/ih264e_statistics.h b/encoder/ih264e_statistics.h
new file mode 100755
index 0000000..0ab33ca
--- /dev/null
+++ b/encoder/ih264e_statistics.h
@@ -0,0 +1,141 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_statistics.h
+*
+* @brief
+*  Contains macros for generating stats about h264 encoder
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_STATISTICS_H_
+#define IH264E_STATISTICS_H_
+
+#if  CAVLC_LEVEL_STATS
+
+/*****************************************************************************/
+/* Extern global declarations                                                */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief  In cavlc encoding, a lut is used for encoding levels. It is not possible
+ * to use look up for all possible levels. The extent to which look up is generated
+ * is based on the statistics that were collected in the following global variables.
+ *
+ * gu4_cavlc_level_bin_lt_4 represents the number coefficients with abs(level) < 4
+ * gu4_cavlc_level_bin_lt_16 represents the number coefficients with 4 < abs(level) < 16
+ * gu4_cavlc_level_bin_lt_32 represents the number coefficients with 16 < abs(level) < 32
+ * and so on ...
+ * ******************************************************************************
+ */
+extern UWORD32 gu4_cavlc_level_bin_lt_4;
+extern UWORD32 gu4_cavlc_level_bin_lt_16;
+extern UWORD32 gu4_cavlc_level_bin_lt_32;
+extern UWORD32 gu4_cavlc_level_bin_lt_64;
+extern UWORD32 gu4_cavlc_level_bin_lt_128;
+extern UWORD32 gu4_cavlc_level_bin_else_where;
+extern UWORD32 gu4_cavlc_level_lut_hit_rate;
+
+/*****************************************************************************/
+/* Extern function declarations                                              */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*  @brief print cavlc stats
+******************************************************************************
+*/
+void print_cavlc_level_stats(void);
+
+#define GATHER_CAVLC_STATS1() \
+    if (u4_abs_level < 4)\
+        gu4_cavlc_level_bin_lt_4 ++; \
+    else if  (u4_abs_level < 16) \
+        gu4_cavlc_level_bin_lt_16 ++; \
+    else if  (u4_abs_level < 32) \
+        gu4_cavlc_level_bin_lt_32 ++; \
+    else if  (u4_abs_level < 64) \
+        gu4_cavlc_level_bin_lt_64 ++; \
+    else if  (u4_abs_level < 128) \
+        gu4_cavlc_level_bin_lt_128 ++; \
+    else \
+        gu4_cavlc_level_bin_else_where ++;
+
+#define GATHER_CAVLC_STATS2() \
+                gu4_cavlc_level_lut_hit_rate ++;
+
+#else
+
+#define GATHER_CAVLC_STATS1()
+
+#define GATHER_CAVLC_STATS2()
+
+#endif
+
+
+#if  GATING_STATS
+
+/*****************************************************************************/
+/* Extern global declarations                                                */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief  During encoding at fastest preset, some times if the inter threshold
+* is lesser than the predefined threshold, intra analysis is not done. The
+* below variable keeps track of the number of mb for which intra analysis is not
+* done
+* ******************************************************************************
+*/
+extern UWORD32 gu4_mb_gated_cnt;
+
+/*****************************************************************************/
+/* Extern function declarations                                              */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*  @brief print gating stats
+******************************************************************************
+*/
+void print_gating_stats(void);
+
+#define GATHER_GATING_STATS() \
+        gu4_mb_gated_cnt ++;
+
+#else
+
+#define GATHER_GATING_STATS()
+
+#endif
+
+
+#endif /* IH264E_STATISTICS_H_ */
diff --git a/encoder/ih264e_structs.h b/encoder/ih264e_structs.h
new file mode 100755
index 0000000..1043a53
--- /dev/null
+++ b/encoder/ih264e_structs.h
@@ -0,0 +1,2566 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_structs.h
+*
+* @brief
+*  Structure definitions used in the encoder
+*
+* @author
+*  Harish
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_STRUCTS_H_
+#define IH264E_STRUCTS_H_
+
+/*****************************************************************************/
+/* Extern Function type definitions                                          */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief      intra prediction filters leaf level
+******************************************************************************
+ */
+typedef void (*pf_intra_pred)(UWORD8 *pu1_src, UWORD8 *pu1_dst,
+                              WORD32 src_strd, WORD32 dst_strd,
+                              WORD32 ui_neighboravailability);
+
+/**
+******************************************************************************
+ *  @brief      inter prediction filters leaf level
+******************************************************************************
+ */
+
+typedef void (*pf_inter_pred_luma_bilinear)(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst,
+                                            WORD32 src_strd1, WORD32 src_strd2, WORD32 dst_strd,
+                                            WORD32 height, WORD32 width);
+
+/**
+******************************************************************************
+ *  @brief      fwd transform leaf level
+******************************************************************************
+ */
+typedef void (*pf_trans_quant)(UWORD8*pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out,
+                               WORD32 i4_src_stride, UWORD32 u4_pred_stride, UWORD32 u4_dst_stride,
+                               const UWORD16 *pu2_scale_mat, const UWORD16 *pu2_thresh_mat,
+                               UWORD32 u4_qbit, UWORD32 u4_round_fact, UWORD8 *pu1_nnz);
+
+typedef void (*pf_iquant_itrans)(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out,
+                                 WORD32 i4_src_stride, UWORD32 u4_pred_stride, UWORD32 u4_out_stride,
+                                 const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat,
+                                 UWORD32 qp_div, WORD32 *pi4_tmp);
+
+/**
+******************************************************************************
+ *  @brief      Padding leaf level
+******************************************************************************
+ */
+typedef void (*pf_pad)(UWORD8 *pu1_src, WORD32 src_strd, WORD32 wd, WORD32 pad_size);
+
+/**
+******************************************************************************
+ *  @brief      memory handling leaf level
+******************************************************************************
+ */
+typedef void (*pf_memcpy)(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
+
+typedef void (*pf_memset)(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
+
+typedef void (*pf_memcpy_mul8)(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
+
+typedef void (*pf_memset_mul8)(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
+
+/**
+******************************************************************************
+ *  @brief      Sad computation
+******************************************************************************
+ */
+typedef void (*pf_compute_sad)(UWORD8 *pu1_src, UWORD8 *pu1_est,
+                               UWORD32 src_strd, UWORD32 est_strd,
+                               WORD32 i4_max_sad, WORD32 *pi4_mb_distortion);
+
+/**
+******************************************************************************
+ *  @brief     Intra mode eval:encoder level
+******************************************************************************
+ */
+typedef void (*pf_evaluate_intra_modes)(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst,
+                                        UWORD32 src_strd, UWORD32 dst_strd,
+                                        WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
+                                        WORD32 *pu4_sadmin,
+                                        UWORD32 u4_valid_intra_modes);
+
+typedef void (*pf_evaluate_intra_4x4_modes)(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
+                                            UWORD32 src_strd, UWORD32 dst_strd,
+                                            WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
+                                            WORD32 *pu4_sadmin,
+                                            UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda,
+                                            UWORD32 u4_predictd_mode);
+
+/**
+******************************************************************************
+ *  @brief     half_pel generation :encoder level
+******************************************************************************
+ */
+typedef void (*pf_sixtapfilter_horz)(UWORD8 *pu1_src, UWORD8 *pu1_dst,
+                                     WORD32 src_strd, WORD32 dst_strd);
+
+typedef void (*pf_sixtap_filter_2dvh_vert)(UWORD8 *pu1_src, UWORD8 *pu1_dst1, UWORD8 *pu1_dst2,
+                                           WORD32 src_strd, WORD32 dst_strd,
+                                           WORD32 *pi16_pred1,
+                                           WORD32 pi16_pred1_strd);
+/**
+******************************************************************************
+ *  @brief     color space conversion
+******************************************************************************
+ */
+typedef void (*pf_fmt_conv_420p_to_420sp)(UWORD8 *pu1_y_src, UWORD8 *pu1_u_src, UWORD8 *pu1_v_src,
+                                          UWORD8 *pu1_y_dst, UWORD8 *pu1_uv_dst,
+                                          UWORD16 u2_height, UWORD16 u2_width,
+                                          UWORD16 src_y_strd, UWORD16 src_u_strd, UWORD16 src_v_strd,
+                                          UWORD16 dst_y_strd, UWORD16 dst_uv_strd,
+                                          UWORD32 convert_uv_only);
+
+typedef void (*pf_fmt_conv_422ile_to_420sp)(UWORD8 *pu1_y_buf, UWORD8 *pu1_u_buf, UWORD8 *pu1_v_buf,
+                                            UWORD8 *pu1_422i_buf,
+                                            WORD32 u4_y_width, WORD32 u4_y_height, WORD32 u4_y_stride,
+                                            WORD32 u4_u_stride, WORD32 u4_v_stride,
+                                            WORD32 u4_422i_stride);
+
+
+/*****************************************************************************/
+/* Enums                                                                     */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ *  @enum  CODEC_STATE_T
+ *  @brief codec state
+ ******************************************************************************
+ */
+typedef enum
+{
+    INIT_DONE,
+    HEADER_DONE,
+    FIRST_FRAME_DONE,
+} CODEC_STATE_T;
+
+
+/**
+ ******************************************************************************
+ *  @enum  JOBQ_CMD_T
+ *  @brief list of job commands (used during job instantiation)
+ ******************************************************************************
+ */
+typedef enum
+{
+    CMD_PROCESS,
+    CMD_ENTROPY,
+    CMD_FMTCONV,
+    CMD_ME,
+}JOBQ_CMD_T;
+
+
+/*****************************************************************************/
+/* Structures                                                                */
+/*****************************************************************************/
+
+/**
+ * PU information
+ */
+typedef struct
+{
+
+    /**
+     *  L0 Motion Vector
+     */
+    mv_t s_l0_mv;
+
+    /**
+     *  PU X position in terms of min PU (4x4) units
+     */
+    UWORD32     b4_pos_x        : 4;
+
+    /**
+     *  PU Y position in terms of min PU (4x4) units
+     */
+    UWORD32     b4_pos_y        : 4;
+
+    /**
+     *  PU width in pixels = (b4_wd + 1) << 2
+     */
+    UWORD32     b4_wd           : 2;
+
+    /**
+     *  PU height in pixels = (b4_ht + 1) << 2
+     */
+    UWORD32     b4_ht           : 2;
+
+    /**
+     *  L0 Ref index
+     */
+    WORD8   i1_l0_ref_idx;
+
+} enc_pu_t;
+
+typedef struct _codec_t codec_t;
+
+typedef struct
+{
+    /** Descriptor of raw buffer                                     */
+    iv_raw_buf_t                            s_raw_buf;
+
+    /** Lower 32bits of time stamp corresponding to the above buffer */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to the above buffer */
+    UWORD32                                 u4_timestamp_high;
+
+    /** Flag to indicate if the current buffer is last buffer */
+    UWORD32                                 u4_is_last;
+
+    /** Flag to indicate if mb info is sent along with input buffer     */
+    UWORD32                                 u4_mb_info_type;
+
+    /** Flag to indicate the size of mb info structure                  */
+    UWORD32                                 u4_mb_info_size;
+
+    /** Buffer containing mb info if mb_info_type is non-zero           */
+    void                                    *pv_mb_info;
+
+    /** Flag to indicate if pic info is sent along with input buffer     */
+    UWORD32                                 u4_pic_info_type;
+
+    /** Buffer containing pic info if mb_info_type is non-zero           */
+    void                                    *pv_pic_info;
+
+}inp_buf_t;
+
+typedef struct
+{
+    /** Descriptor of bitstream buffer                                     */
+    iv_bits_buf_t                           s_bits_buf;
+
+    /** Lower 32bits of time stamp corresponding to the above buffer */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to the above buffer */
+    UWORD32                                 u4_timestamp_high;
+
+    /** Flag to indicate if the current buffer is last buffer */
+    UWORD32                                 u4_is_last;
+
+}out_buf_t;
+
+typedef struct
+{
+    /** Descriptor of picture buffer                                     */
+    pic_buf_t                               s_pic_buf;
+
+    /** Lower 32bits of time stamp corresponding to the above buffer */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to the above buffer */
+    UWORD32                                 u4_timestamp_high;
+
+    /** Flag to indicate if the current buffer is last buffer */
+    UWORD32                                 u4_is_last;
+
+    /** Picture count corresponding to current picture */
+    WORD32                                  i4_pic_cnt;
+
+}rec_buf_t;
+
+typedef struct
+{
+    /** maximum width for which codec should request memory requirements    */
+    UWORD32                                     u4_max_wd;
+
+    /** maximum height for which codec should request memory requirements   */
+    UWORD32                                     u4_max_ht;
+
+    /** Maximum number of reference frames                                  */
+    UWORD32                                     u4_max_ref_cnt;
+
+    /** Maximum number of reorder frames                                    */
+    UWORD32                                     u4_max_reorder_cnt;
+
+    /** Maximum level supported                                             */
+    UWORD32                                     u4_max_level;
+
+    /** Input color format                                                  */
+    IV_COLOR_FORMAT_T                           e_inp_color_fmt;
+
+    /** Flag to enable/disable - To be used only for debugging/testing      */
+    UWORD32                                     u4_enable_recon;
+
+    /** Recon color format                                                  */
+    IV_COLOR_FORMAT_T                           e_recon_color_fmt;
+
+    /** Encoder Speed preset - Value between 0 (slowest) and 100 (fastest)  */
+    IVE_SPEED_CONFIG                            u4_enc_speed_preset;
+
+    /** Rate control mode                                                   */
+    IVE_RC_MODE_T                               e_rc_mode;
+
+    /** Maximum frame rate to be supported                                  */
+    UWORD32                                     u4_max_framerate;
+
+    /** Maximum bitrate to be supported                                     */
+    UWORD32                                     u4_max_bitrate;
+
+    /** Maximum number of consecutive  B frames                             */
+    UWORD32                                     u4_max_num_bframes;
+
+    /** Content type Interlaced/Progressive                                 */
+    IV_CONTENT_TYPE_T                           e_content_type;
+
+    /** Maximum search range to be used in X direction                      */
+    UWORD32                                     u4_max_srch_rng_x;
+
+    /** Maximum search range to be used in Y direction                      */
+    UWORD32                                     u4_max_srch_rng_y;
+
+    /** Slice Mode                                                          */
+    IVE_SLICE_MODE_T                            e_slice_mode;
+
+    /** Slice parameter                                                     */
+    UWORD32                                     u4_slice_param;
+
+    /** Processor architecture                                          */
+    IV_ARCH_T                                   e_arch;
+
+    /** SOC details                                                     */
+    IV_SOC_T                                    e_soc;
+
+    /** Input width to be sent in bitstream                                */
+    UWORD32                                     u4_disp_wd;
+
+    /** Input height to be sent in bitstream                               */
+    UWORD32                                     u4_disp_ht;
+
+    /** Input width                                                     */
+    UWORD32                                     u4_wd;
+
+    /** Input height                                                    */
+    UWORD32                                     u4_ht;
+
+    /** Input stride                                                    */
+    UWORD32                                     u4_strd;
+
+    /** Source frame rate                                               */
+    UWORD32                                     u4_src_frame_rate;
+
+    /** Target frame rate                                               */
+    UWORD32                                     u4_tgt_frame_rate;
+
+    /** Target bitrate in kilobits per second                           */
+    UWORD32                                     u4_target_bitrate;
+
+    /** Force current frame type                                        */
+    IV_PICTURE_CODING_TYPE_T                    e_frame_type;
+
+    /** Encoder mode                                                    */
+    IVE_ENC_MODE_T                              e_enc_mode;
+
+    /** Set initial Qp for I pictures                                   */
+    UWORD32                                     u4_i_qp;
+
+    /** Set initial Qp for P pictures                                   */
+    UWORD32                                     u4_p_qp;
+
+    /** Set initial Qp for B pictures                                   */
+    UWORD32                                     u4_b_qp;
+
+    /** Set minimum Qp for I pictures                                   */
+    UWORD32                                     u4_i_qp_min;
+
+    /** Set maximum Qp for I pictures                                   */
+    UWORD32                                     u4_i_qp_max;
+
+    /** Set minimum Qp for P pictures                                   */
+    UWORD32                                     u4_p_qp_min;
+
+    /** Set maximum Qp for P pictures                                   */
+    UWORD32                                     u4_p_qp_max;
+
+    /** Set minimum Qp for B pictures                                   */
+    UWORD32                                     u4_b_qp_min;
+
+    /** Set maximum Qp for B pictures                                   */
+    UWORD32                                     u4_b_qp_max;
+
+    /** Adaptive intra refresh mode                                     */
+    IVE_AIR_MODE_T                              e_air_mode;
+
+    /** Adaptive intra refresh period in frames                         */
+    UWORD32                                     u4_air_refresh_period;
+
+    /** VBV buffer delay                                                */
+    UWORD32                                     u4_vbv_buffer_delay;
+
+    /** VBV buffer size                                                 */
+    UWORD32                                     u4_vbv_buf_size;
+
+    /** Number of cores to be used                                      */
+    UWORD32                                     u4_num_cores;
+
+    /** ME speed preset - Value between 0 (slowest) and 100 (fastest)      */
+    UWORD32                                     u4_me_speed_preset;
+
+    /** Flag to enable/disable half pel motion estimation               */
+    UWORD32                                     u4_enable_hpel;
+
+    /** Flag to enable/disable quarter pel motion estimation            */
+    UWORD32                                     u4_enable_qpel;
+
+    /** Flag to enable/disable intra 4x4 analysis                       */
+    UWORD32                                     u4_enable_intra_4x4;
+
+    /** Flag to enable/disable intra 8x8 analysis                       */
+    UWORD32                                     u4_enable_intra_8x8;
+
+    /** Flag to enable/disable intra 16x16 analysis                     */
+    UWORD32                                     u4_enable_intra_16x16;
+
+    /** Flag to enable/disable fast SAD approximation                   */
+    UWORD32                                     u4_enable_fast_sad;
+
+    /*flag to enable/disable alternate reference frames                 */
+    UWORD32                                     u4_enable_alt_ref;
+
+    /*Flag to enable/disable computation of SATDQ in ME*/
+    UWORD32                                     u4_enable_satqd;
+
+    /*Minimum SAD to search for*/
+    WORD32                                     i4_min_sad;
+
+    /** Maximum search range in X direction for farthest reference      */
+    UWORD32                                     u4_srch_rng_x;
+
+    /** Maximum search range in Y direction for farthest reference      */
+    UWORD32                                     u4_srch_rng_y;
+
+    /** I frame interval                                                */
+    UWORD32                                     u4_i_frm_interval;
+
+    /** IDR frame interval                                              */
+    UWORD32                                     u4_idr_frm_interval;
+
+    /** consecutive B frames                                            */
+    UWORD32                                     u4_num_b_frames;
+
+    /** Disable deblock level (0: Enable completely, 3: Disable completely */
+    UWORD32                                     u4_disable_deblock_level;
+
+    /** Profile                                                         */
+    IV_PROFILE_T                                e_profile;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                     u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                     u4_timestamp_high;
+
+    /** Flag to say if the current config parameter set is valid
+     * Will be zero to start with and will be set to 1, when configured
+     * Once encoder uses the parameter set, this will be set to zero */
+    UWORD32                                     u4_is_valid;
+
+    /** Command associated with this config param set */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_cmd;
+
+    /** Input width in mbs                                                    */
+    UWORD32                                     i4_wd_mbs;
+
+    /** Input height in mbs                                                   */
+    UWORD32                                     i4_ht_mbs;
+
+    /** entropy coding mode flag                                              */
+    UWORD32                                     u4_entropy_coding_mode;
+
+    /** enable weighted prediction                                            */
+    UWORD32                                     u4_weighted_prediction;
+
+    /** enable constrained intra prediction                                   */
+    UWORD32                                     u4_constrained_intra_pred;
+
+    /** Pic info type */
+    UWORD32                                     u4_pic_info_type;
+    /**
+     * MB info type
+     */
+    UWORD32                                     u4_mb_info_type;
+
+}cfg_params_t;
+
+
+
+/** Structure to hold format conversion context */
+typedef struct
+{
+    /** Current row for which format conversion should be done */
+    WORD32 i4_cur_row;
+
+    /** Number of rows for which format conversion should be done */
+    WORD32 i4_num_rows;
+
+}fmt_conv_t;
+
+
+/**
+ * Structure to represent a processing job entry
+ */
+typedef struct
+{
+    /**
+     * Command
+     */
+    WORD32 i4_cmd;
+
+    /**
+     * MB x of the starting MB
+     */
+    WORD16 i2_mb_x;
+
+    /**
+     * MB y of the starting MB
+     */
+
+    WORD16 i2_mb_y;
+
+    /**
+     * Number of MBs that need to be processed in this job
+     */
+    WORD16 i2_mb_cnt;
+
+    /**
+     * Process contexts base index
+     * Will toggle between 0 and MAX_PROCESS_THREADS
+     */
+    WORD16 i2_proc_base_idx;
+
+} job_t;
+
+
+/**
+ * Structure to represent a MV Bank buffer
+ */
+typedef struct
+{
+    /**
+     *  Pointer to hold num PUs each MB in a picture
+     */
+    UWORD32 *pu4_mb_pu_cnt;
+
+    /**
+     * Pointer to hold enc_pu_t for each PU in a picture
+     */
+    enc_pu_t *ps_pic_pu;
+
+    /**
+     * Pointer to hold PU map for each MB in a picture
+     */
+    UWORD8 *pu1_pic_pu_map;
+
+    /**
+     * Pointer to hold the Slice map
+     */
+    UWORD16 *pu1_pic_slice_map;
+
+    /**
+     * Absolute POC for the current MV Bank
+     */
+    WORD32 i4_abs_poc;
+
+    /**
+     * Buffer Id
+     */
+    WORD32     i4_buf_id;
+
+} mv_buf_t;
+
+
+/**
+ * Reference set containing pointers to MV buf and pic buf
+ */
+typedef struct
+{
+    /** Picture count */
+    WORD32    i4_pic_cnt;
+
+    /** POC */
+    WORD32    i4_poc;
+
+    /** picture buffer */
+    pic_buf_t *ps_pic_buf;
+
+    /** mv buffer */
+    mv_buf_t  *ps_mv_buf;
+
+}ref_set_t;
+
+typedef struct
+{
+
+    /**
+     * Pointer to current PPS
+     */
+    pps_t *ps_pps;
+
+    /**
+     * Pointer to current SPS
+     */
+    sps_t *ps_sps;
+
+    /**
+     * Pointer to current slice header structure
+     */
+    slice_header_t *ps_slice_hdr;
+
+    /**
+     * MB's x position within a picture in raster scan in MB units
+     */
+    WORD32 i4_mb_x;
+
+    /**
+     * MB's y position within a picture in raster scan in MB units
+     */
+
+    WORD32 i4_mb_y;
+
+    /**
+     * Current PU structure - set to MB enc_pu_t pointer at the start of MB processing and incremented
+     * for every TU
+     */
+    enc_pu_t *ps_pu;
+
+    /**
+     * Pointer to frame level enc_pu_t for the current frame being parsed
+     * where MVs and Intra pred modes will be updated
+     */
+    enc_pu_t *ps_pic_pu;
+
+    /**
+     *  Pointer to hold num PUs each MB in a picture
+     */
+    UWORD32 *pu4_mb_pu_cnt;
+
+    /** PU Index map per MB. The indices in this map are w.r.t picture pu array and not
+     * w.r.t MB pu array.
+     * This will be used during mv prediction and since neighbors will have different MB pu map
+     * it will be easier if they all have indices w.r.t picture level PU array rather than MB level
+     * PU array.
+     * pu1_pic_pu_map is map w.r.t MB's enc_pu_t array
+     */
+    UWORD32 *pu4_pic_pu_idx_map;
+
+    /**
+      * Pointer to pu_map for the current frame being parsed
+      * where MVs and Intra pred modes will be updated
+      */
+     UWORD8 *pu1_pic_pu_map;
+
+     /**
+      *  PU count in current MB
+      */
+     WORD32 i4_mb_pu_cnt;
+
+     /**
+      *  PU count in current MB
+      */
+     WORD32 i4_mb_start_pu_idx;
+
+     /**
+      *  Top availability for current MB level
+      */
+     UWORD8 u1_top_mb_avail;
+
+     /**
+      *  Top right availability for current MB level
+      */
+     UWORD8 u1_top_rt_mb_avail;
+     /**
+      *  Top left availability for current MB level
+      */
+     UWORD8 u1_top_lt_mb_avail;
+     /**
+      *  left availability for current MB level
+      */
+     UWORD8 u1_left_mb_avail;
+
+}mv_ctxt_t;
+
+typedef struct
+{
+    /**
+     * MB's x position within a picture in raster scan in MB units
+     */
+    WORD32 i4_mb_x;
+
+    /**
+     * MB's y position within a picture in raster scan in MB units
+     */
+    WORD32 i4_mb_y;
+
+    /**
+     * MB's x position within a Slice in raster scan in MB units
+     */
+    WORD32 i4_mb_slice_x;
+
+    /**
+     * MB's y position within a Slice in raster scan in MB units
+     */
+    WORD32 i4_mb_slice_y;
+
+    /**
+     * Vertical strength, Two bits per edge.
+     * Stored in format. BS[15] | BS[14] | .. |BS[0]
+     */
+    UWORD32 *pu4_pic_vert_bs;
+
+    /**
+     * Boundary strength, Two bits per edge.
+     * Stored in format. BS[15] | BS[14] | .. |BS[0]
+     */
+    UWORD32 *pu4_pic_horz_bs;
+
+    /**
+     *  Qp array stored for each mb
+     */
+    UWORD8  *pu1_pic_qp;
+
+}bs_ctxt_t;
+
+typedef struct
+{
+    /**
+     * MB's x position within a picture in raster scan in MB units
+     */
+    WORD32 i4_mb_x;
+
+    /**
+     * MB's y position within a picture in raster scan in MB units
+     */
+    WORD32 i4_mb_y;
+
+    /**
+     * structure that contains BS and QP frame level arrays
+     */
+    bs_ctxt_t s_bs_ctxt;
+
+    /**
+     * Pointer to 0th luma pixel in current pic
+     */
+    UWORD8 *pu1_cur_pic_luma;
+
+    /**
+     * Pointer to 0th chroma pixel in current pic
+     */
+    UWORD8 *pu1_cur_pic_chroma;
+
+    /**
+     *  Points to the array of slice indices which is used to identify the slice
+     *  to which each MB in a frame belongs.
+     */
+    UWORD8 *pu1_slice_idx;
+
+}deblk_ctxt_t;
+
+
+/**
+ ******************************************************************************
+ *  @brief      Structure to hold data and flags for 'n' mb processing for
+ *                deblocking , padding and half pel generation.
+ ******************************************************************************
+ */
+typedef struct
+{
+    /**
+     * MB's x position last processed + 1
+     */
+    WORD32 i4_mb_x;
+
+    /**
+     * MB's y position ,current processing.
+     */
+    WORD32 i4_mb_y;
+
+    /**
+     * Number of MBs processed in a stretch
+     */
+    WORD32 i4_n_mbs;
+
+}n_mb_process_ctxt_t;
+
+
+/**
+******************************************************************************
+ *  @brief      Structure to hold coefficient info for a 4x4 subblock.
+ *  The following can be used to type-cast coefficient data that is stored
+ *  per subblock. Note that though i2_level is shown as an array that
+ *  holds 16 coefficients, only the first few entries will be valid. Next
+ *  subblocks data starts after the valid number of coefficients. Number
+ *  of non-zero coefficients will be derived using number of non-zero bits
+ *  in sig coeff map
+******************************************************************************
+ */
+typedef struct
+{
+    /**
+     * significant coefficient map and nnz are packed in
+     * to msb (2 bytes) and lsb (2 bytes) respectively
+     */
+    WORD32  i4_sig_map_nnz;
+
+    /**
+     * array of non zero residue coefficients
+     */
+    WORD16  ai2_residue[16];
+
+}tu_sblk_coeff_data_t;
+
+/**
+******************************************************************************
+ *  @brief      Structure contains few common state variables such as MB indices,
+ *  current SPS, PPS etc which are to be used in the entropy thread. By keeping
+ *  it a different structure it is being explicitly signaled that these
+ * variables are specific to entropy threads context and other threads should
+ * not update these elements
+******************************************************************************
+ */
+typedef struct
+{
+
+    /**
+     * start of frame / start of slice flag
+     */
+    WORD32 i4_sof;
+
+    /**
+     * end of frame / end of slice flag
+     */
+    WORD32 i4_eof;
+
+    /**
+     * generate header upon request
+     */
+    WORD32 i4_gen_header;
+
+    /**
+     *  seq_parameter_set_id
+     */
+    UWORD32 u4_sps_id;
+
+    /**
+     * Pointer to base of sequence parameter set structure array
+     */
+    sps_t *ps_sps_base;
+
+    /**
+     *  pic_parameter_set_id
+     */
+    UWORD32 u4_pps_id;
+
+    /**
+     * Pointer to base of Picture parameter set structure array
+     */
+    pps_t *ps_pps_base;
+
+    /**
+     * Current slice idx
+     */
+    WORD32 i4_cur_slice_idx;
+
+    /**
+     * Points to the array of slice indices which is used to identify the independent slice
+     * to which each MB in a frame belongs.
+     */
+    UWORD8 *pu1_slice_idx;
+
+    /**
+     * Pointer to base of slice header structure array
+     */
+    slice_header_t *ps_slice_hdr_base;
+
+    /**
+     * entropy status
+     */
+    UWORD8  *pu1_entropy_map;
+
+    /**
+     * MB's x position within a picture in raster scan in MB units
+     */
+    WORD32 i4_mb_x;
+
+    /**
+     * MB's y position within a picture in raster scan in MB units
+     */
+    WORD32 i4_mb_y;
+
+    /**
+     * MB start address
+     */
+    WORD32 i4_mb_cnt;
+
+    /**
+     * MB start address
+     */
+    WORD32 i4_mb_start_add;
+
+    /**
+     * MB end address
+     */
+    WORD32 i4_mb_end_add;
+
+    /**
+     * Input width in mbs
+     */
+    WORD32 i4_wd_mbs;
+
+    /**
+     * Input height in mbs
+     */
+    WORD32 i4_ht_mbs;
+
+    /**
+     * Bitstream structure
+     */
+    bitstrm_t *ps_bitstrm;
+
+    /**
+     *  transform_8x8_mode_flag
+     */
+    WORD8 i1_transform_8x8_mode_flag;
+
+    /**
+     *  entropy_coding_mode_flag
+     */
+    WORD8 u1_entropy_coding_mode_flag;
+
+    /**
+     * Pointer to the top row nnz for luma
+     */
+    UWORD8 (*pu1_top_nnz_luma)[4];
+
+    /**
+     * left nnz for luma
+     */
+    UWORD32 u4_left_nnz_luma;
+
+    /**
+     * Pointer to zero runs before for the mb
+     */
+    UWORD8  au1_zero_run[16];
+
+    /**
+     * Pointer to the top row nnz for chroma
+     */
+    UWORD8 (*pu1_top_nnz_cbcr)[4];
+
+    /**
+     * left nnz for chroma
+     */
+    UWORD8 u4_left_nnz_cbcr;
+
+    /**
+     * Pointer frame level mb subblock coeff data
+     */
+    void *pv_pic_mb_coeff_data;
+
+    /**
+     * Pointer to mb subblock coeff data and number of subblocks and scan idx
+     * Incremented each time a coded subblock is processed
+     */
+    void *pv_mb_coeff_data;
+
+    /**
+     * Pointer frame level mb header data
+     */
+    void *pv_pic_mb_header_data;
+
+    /**
+     * Pointer to mb header data and
+     * incremented each time a coded mb is encoded
+     */
+    void *pv_mb_header_data;
+
+    /**
+     * Error code during parse stage
+     */
+    IH264E_ERROR_T i4_error_code;
+
+    /**
+     * Void pointer to job context
+     */
+    void *pv_proc_jobq, *pv_entropy_jobq;
+
+    /**
+     * Flag to signal end of frame
+     */
+    WORD32 i4_end_of_frame;
+
+    /**
+     * Abs POC count of the frame
+     */
+     WORD32 i4_abs_pic_order_cnt;
+
+     /**
+      * mb skip run
+      */
+     WORD32 *pi4_mb_skip_run;
+
+     /**
+      * Flag to signal end of sequence
+      */
+     UWORD32 u4_is_last;
+
+     /**
+      * Lower 32bits of time-stamp corresponding to the buffer being encoded
+      */
+     UWORD32 u4_timestamp_low;
+
+     /**
+      * Upper 32bits of time-stamp corresponding to the buffer being encoded
+      */
+     UWORD32 u4_timestamp_high;
+
+     /**
+      * Current Picture count - used for synchronization
+      */
+     WORD32  i4_pic_cnt;
+
+     /**
+      * Number of bits consumed by header for I and P mb types
+      */
+     UWORD32 u4_header_bits[MAX_MB_TYPE];
+
+     /**
+      * Number of bits consumed by residue for I and P mb types
+      */
+     UWORD32 u4_residue_bits[MAX_MB_TYPE];
+
+} entropy_ctxt_t;
+
+/**
+******************************************************************************
+*  @brief      macro block info.
+******************************************************************************
+*/
+typedef struct
+{
+    /**
+     * mb type
+     */
+    UWORD16 u2_is_intra;
+
+    /**
+     * mb type
+     */
+    UWORD16 u2_mb_type;
+
+    /**
+     * csbp
+     */
+    UWORD32 u4_csbp;
+
+    /**
+     * mb distortion
+     */
+    WORD32 i4_mb_distortion;
+
+}mb_info_t;
+
+/**
+******************************************************************************
+*  @brief      structure presenting the neighbor availability of a mb
+*  or subblk or any other partition
+******************************************************************************
+*/
+typedef struct
+{
+    /**
+     * left blk/subblk/partition
+     */
+    UWORD8 u1_mb_a;
+
+    /**
+     * top blk/subblk/partition
+     */
+    UWORD8 u1_mb_b;
+
+    /**
+     * topright blk/subblk/partition
+     */
+    UWORD8 u1_mb_c;
+
+    /**
+     * topleft blk/subblk/partition
+     */
+    UWORD8 u1_mb_d;
+
+}block_neighbors_t;
+
+/**
+ ******************************************************************************
+ *  @brief      MB info  related variables used during NMB processing
+ ******************************************************************************
+ */
+typedef struct
+{
+    UWORD32 u4_mb_type;
+    UWORD32 u4_min_sad;
+    UWORD32 u4_min_sad_reached;
+    WORD32  i4_mb_cost;
+    WORD32  i4_mb_distortion;
+
+
+    mv_t    s_skip_mv;
+    mv_t    s_pred_mv;
+
+    block_neighbors_t s_ngbr_avbl;
+
+    /*
+     * Buffer to hold best subpel buffer in each MB of NMB
+     */
+    UWORD8 *pu1_best_sub_pel_buf;
+
+    /*
+     * Stride for subpel buffer
+     */
+    UWORD32 u4_bst_spel_buf_strd;
+
+}mb_info_nmb_t;
+
+/**
+ ******************************************************************************
+ *  @brief      Pixel processing thread context
+ ******************************************************************************
+ */
+typedef struct
+{
+    /**
+     * entropy context
+     */
+    entropy_ctxt_t s_entropy;
+
+    /**
+     * me context
+     */
+    me_ctxt_t s_me_ctxt;
+
+    /**
+     * Pointer to codec context
+     */
+    codec_t *ps_codec;
+
+    /**
+     * N mb process contest
+     */
+    n_mb_process_ctxt_t s_n_mb_ctxt;
+
+    /**
+     * Source pointer to current MB luma
+     */
+    UWORD8 *pu1_src_buf_luma;
+
+    /**
+     * Source pointer to current MB chroma
+     */
+    UWORD8 *pu1_src_buf_chroma;
+
+    /**
+     * Recon pointer to current MB luma
+     */
+    UWORD8 *pu1_rec_buf_luma;
+
+    /**
+     * Recon pointer to current MB chroma
+     */
+    UWORD8 *pu1_rec_buf_chroma;
+
+    /**
+     * Ref pointer to current MB luma
+     */
+    UWORD8 *pu1_ref_buf_luma;
+
+    /**
+     * Ref pointer to current MB chroma
+     */
+    UWORD8 *pu1_ref_buf_chroma;
+
+    /**
+     * pointer to luma plane of input buffer (base :: mb (0,0))
+     */
+    UWORD8 *pu1_src_buf_luma_base;
+
+    /**
+     * pointer to luma plane of reconstructed buffer (base :: mb (0,0))
+     */
+    UWORD8 *pu1_rec_buf_luma_base;
+
+    /**
+     * pointer to luma plane of ref buffer (base :: mb (0,0))
+     */
+    UWORD8 *pu1_ref_buf_luma_base;
+
+    /**
+     * pointer to  chroma plane of input buffer (base :: mb (0,0))
+     */
+    UWORD8 *pu1_src_buf_chroma_base;
+
+    /*
+     * Buffer for color space conversion of luma
+     */
+    UWORD8 *pu1_y_csc_buf;
+
+    /*
+     * Buffer for color space conversion of luma
+     */
+
+    UWORD8 *pu1_uv_csc_buf;
+
+    /**
+     * pointer to  chroma plane of reconstructed buffer (base :: mb (0,0))
+     */
+    UWORD8 *pu1_rec_buf_chroma_base;
+
+    /**
+     * pointer to  chroma plane of reconstructed buffer (base :: mb (0,0))
+     */
+    UWORD8 *pu1_ref_buf_chroma_base;
+
+    /**
+     * Pointer to ME NMB info
+     */
+    mb_info_nmb_t *ps_nmb_info;
+
+    mb_info_nmb_t *ps_cur_mb;
+
+    /**
+     * source stride
+     * (strides for luma and chroma are the same)
+     */
+    WORD32 i4_src_strd;
+
+    /**
+     * recon stride & ref stride
+     * (strides for luma and chroma are the same)
+     */
+    WORD32 i4_rec_strd;
+
+    /**
+     * Offset for half pel x plane from the pic buf
+     */
+    UWORD32 u4_half_x_offset;
+
+    /**
+     * Offset for half pel y plane from half x plane
+     */
+    UWORD32 u4_half_y_offset;
+
+    /**
+     * Offset for half pel xy plane from half y plane
+     */
+    UWORD32 u4_half_xy_offset;
+
+    /**
+     * pred buffer pointer (temp buffer 1)
+     */
+    UWORD8 *pu1_pred_mb;
+
+    /**
+     * pred buffer pointer (prediction buffer for intra 16x16
+     */
+    UWORD8 *pu1_pred_mb_intra_16x16;
+
+    /**
+     * pred buffer pointer (prediction buffer for intra 16x16_plane
+     */
+    UWORD8 *pu1_pred_mb_intra_16x16_plane;
+
+    /**
+     * pred buffer pointer (prediction buffer for intra chroma
+     */
+    UWORD8 *pu1_pred_mb_intra_chroma;
+
+    /**
+     * pred buffer pointer (prediction buffer for intra chroma plane
+     */
+    UWORD8 *pu1_pred_mb_intra_chroma_plane;
+
+    /**
+     * temp. reference buffer ptr for intra 4x4 when rdopt is on
+     */
+    UWORD8 *pu1_ref_mb_intra_4x4;
+
+    /**
+     * prediction buffer stride
+     */
+    WORD32 i4_pred_strd;
+
+    /**
+     * transform buffer pointer (temp buffer 2)
+     */
+    WORD16 *pi2_res_buf;
+
+    /**
+     * temp. transform buffer ptr for intra 4x4 when rdopt is on
+     */
+    WORD16 *pi2_res_buf_intra_4x4;
+
+    /**
+     * transform buffer stride
+     */
+    WORD32 i4_res_strd;
+
+    /**
+     * scratch buffer for inverse transform (temp buffer 3)
+     */
+    void *pv_scratch_buff;
+
+    /**
+     * frame num
+     */
+    WORD32 i4_frame_num;
+
+    /**
+     * start address of frame / sub-frame
+     */
+    WORD32 i4_frame_strt_add;
+
+    /**
+     *  IDR pic
+     */
+    UWORD32 u4_is_idr;
+
+    /**
+     *  idr_pic_id
+     */
+    UWORD32 u4_idr_pic_id;
+
+    /**
+     * Input width in mbs
+     */
+    WORD32 i4_wd_mbs;
+
+    /**
+     * Input height in mbs
+     */
+    WORD32 i4_ht_mbs;
+
+    /**
+     *  slice_type
+     */
+    WORD32  i4_slice_type;
+
+    /**
+     * Current slice idx
+     */
+    WORD32 i4_cur_slice_idx;
+
+    /**
+     * MB's x position within a picture in raster scan in MB units
+     */
+    WORD32 i4_mb_x;
+
+    /**
+     * MB's y position within a picture in raster scan in MB units
+     */
+    WORD32 i4_mb_y;
+
+    /**
+     * MB's x position within a Slice in raster scan in MB units
+     */
+    WORD32 i4_mb_slice_x;
+
+    /**
+     * MB's y position within a Slice in raster scan in MB units
+     */
+    WORD32 i4_mb_slice_y;
+
+    /**
+     *  mb type
+     */
+    UWORD32 u4_mb_type;
+
+    /**
+     *  is intra
+     */
+    UWORD32 u4_is_intra;
+
+    /**
+     * mb neighbor availability pointer
+     */
+    block_neighbors_t *ps_ngbr_avbl;
+
+    /**
+     * lambda (lagrange multiplier for cost computation)
+     */
+    UWORD32 u4_lambda;
+
+    /**
+     * mb distortion
+     */
+    WORD32 i4_mb_distortion;
+
+    /**
+     * mb cost
+     */
+    WORD32 i4_mb_cost;
+
+    /********************************************************************/
+    /* i4_ngbr_avbl_mb_16 - ngbr avbl of curr mb                        */
+    /* i4_ngbr_avbl_sb_8 - ngbr avbl of all 8x8 sub blocks of curr mb   */
+    /* i4_ngbr_avbl_sb_4 - ngbr avbl of all 4x4 sub blocks of curr mb   */
+    /* i4_ngbr_avbl_mb_c - chroma ngbr avbl of curr mb                  */
+    /********************************************************************/
+    WORD32  i4_ngbr_avbl_16x16_mb;
+    WORD32  ai4_neighbor_avail_8x8_subblks[4];
+    UWORD8  au1_ngbr_avbl_4x4_subblks[16];
+    WORD32  i4_chroma_neighbor_avail_8x8_mb;
+
+    /**
+     * array to store the mode of mb sub blocks
+     */
+    UWORD8  au1_intra_luma_mb_4x4_modes[16];
+
+    /**
+     * array to store the predicted mode of mb sub blks
+     */
+    UWORD8  au1_predicted_intra_luma_mb_4x4_modes[16];
+
+    /**
+     * macro block intra 16x16 mode
+     */
+    UWORD8  u1_l_i16_mode;
+
+    /**
+     * array to store the mode of the macro block intra 8x8 4 modes
+     */
+    UWORD8  au1_intra_luma_mb_8x8_modes[4];
+
+    /**
+     * intra chroma mb mode
+     */
+    UWORD8  u1_c_i8_mode;
+
+    /********************************************************************/
+    /* array to store pixels from the neighborhood for intra prediction */
+    /* i16 - 16 left pels + 1 top left pel + 16 top pels = 33 pels      */
+    /* i8 - 8 lpels + 1 tlpels + 8 tpels + 8 tr pels = 25 pels          */
+    /* i4 - 4 lpels + 1 tlpels + 4 tpels + 4 tr pels = 13 pels          */
+    /* ic - 8 left pels + 1 top left pel + 8 top pels )*2               */
+    /********************************************************************/
+    UWORD8 au1_ngbr_pels[34];
+
+    /**
+     * array for 8x8 intra pels filtering (temp buff 4)
+     */
+    UWORD8 au1_neighbor_pels_i8x8_unfiltered[25];
+
+    /**
+     * Number of sub partitons in the inter pred MB
+     */
+    UWORD32 u4_num_sub_partitions;
+
+    /**
+     *  Pointer to hold num PUs each MB in a picture
+     */
+    UWORD32 *pu4_mb_pu_cnt;
+
+    /**
+     * Pointer to the array of structures having motion vectors, size
+     *  and position of sub partitions
+     */
+    enc_pu_t *ps_pu;
+
+    /**
+     * predicted motion vector
+     */
+    mv_t *ps_pred_mv;
+
+    /**
+     * top row mb syntax information base
+     * In normal working scenarios, for a given context set,
+     * the mb syntax info pointer is identical across all process threads.
+     * But when the hard bound on slices are enabled, in multi core, frame
+     * is partitioned in to sections equal to set number of cores and each
+     * partition is run independently. In this scenario, a ctxt set will alone
+     * appear to run multiple frames at a time. For this to occur, the common
+     * pointers across the proc ctxt should disappear.
+     *
+     * This is done by allocating MAX_PROCESS_THREADS memory and distributing
+     * across individual ctxts when byte bnd per slice is enabled.
+     */
+    mb_info_t *ps_top_row_mb_syntax_ele_base;
+
+    /**
+     * top row mb syntax information
+     */
+    mb_info_t *ps_top_row_mb_syntax_ele;
+
+    /**
+     * left mb syntax information
+     */
+    mb_info_t s_left_mb_syntax_ele;
+
+    /**
+     * top left mb syntax information
+     */
+    mb_info_t s_top_left_mb_syntax_ele;
+
+    /**
+     * top left mb syntax information
+     */
+
+    mb_info_t s_top_left_mb_syntax_ME;
+
+    /**
+     * left mb motion vector
+     */
+    enc_pu_t s_left_mb_pu_ME;
+
+    /**
+     * top left mb motion vector
+     */
+    enc_pu_t s_top_left_mb_pu_ME;
+
+
+    /**
+     * mb neighbor availability pointer
+     */
+    block_neighbors_t s_ngbr_avbl;
+
+    /**
+     * In case the macroblock type is intra, the intra modes of all
+     * partitions for the left mb are stored in the array below
+     */
+    UWORD8 au1_left_mb_intra_modes[16];
+
+    /**
+     * In case the macroblock type is intra, the intra modes of all
+     * partitions for the top mb are stored in the array below
+     *
+     * In normal working scenarios, for a given context set,
+     * the mb syntax info pointer is identical across all process threads.
+     * But when the hard bound on slices are enabled, in multi core, frame
+     * is partitioned in to sections equal to set number of cores and each
+     * partition is run independently. In this scenario, a ctxt set will alone
+     * appear to run multiple frames at a time. For this to occur, the common
+     * pointers across the proc ctxt should disappear.
+     *
+     * This is done by allocating MAX_PROCESS_THREADS memory and distributing
+     * across individual ctxts when byte bnd per slice is enabled.
+     */
+    UWORD8 *pu1_top_mb_intra_modes_base;
+
+    /**
+     * In case the macroblock type is intra, the intra modes of all
+     * partitions for the top mb are stored in the array below
+     */
+    UWORD8 *pu1_top_mb_intra_modes;
+
+    /**
+     * skip motion vector info
+     */
+    mv_t *ps_skip_mv;
+
+    /**
+     * left mb motion vector
+     */
+    enc_pu_t s_left_mb_pu;
+
+    /**
+     * top left mb motion vector
+     */
+    enc_pu_t s_top_left_mb_pu;
+
+    /**
+     * top row motion vector info
+     *
+     * In normal working scenarios, for a given context set,
+     * the top row pu pointer is identical across all process threads.
+     * But when the hard bound on slices are enabled, in multi core, frame
+     * is partitioned in to sections equal to set number of cores and each
+     * partition is run independently. In this scenario, a ctxt set will alone
+     * appear to run multiple frames at a time. For this to occur, the common
+     * pointers across the proc ctxt should disappear.
+     *
+     * This is done by allocating MAX_PROCESS_THREADS memory and distributing
+     * across individual ctxts when byte bnd per slice is enabled.
+     */
+    enc_pu_t *ps_top_row_pu_base;
+
+    /**
+     * top row motion vector info
+     */
+    enc_pu_t *ps_top_row_pu;
+
+    enc_pu_t *ps_top_row_pu_ME;
+
+    /**
+     * coded block pattern
+     */
+    UWORD32 u4_cbp;
+
+    /**
+     * csbp
+     */
+    UWORD32 u4_csbp;
+
+    /**
+     *  number of non zero coeffs
+     */
+    UWORD32 au4_nnz[5];
+
+    /**
+     *  number of non zero coeffs for intra 4x4 when rdopt is on
+     */
+    UWORD32 au4_nnz_intra_4x4[4];
+
+    /**
+     * frame qp & mb qp
+     */
+    UWORD32 u4_frame_qp, u4_mb_qp;
+
+    /**
+     * mb qp previous
+     */
+    UWORD32 u4_mb_qp_prev;
+
+    /**
+     * quantization parameters for luma & chroma planes
+     */
+    quant_params_t *ps_qp_params[3];
+
+    /**
+     * Pointer frame level mb subblock coeff data
+     */
+    void *pv_pic_mb_coeff_data;
+
+    /**
+     * Pointer to mb subblock coeff data and number of subblocks and scan idx
+     * Incremented each time a coded subblock is processed
+     */
+    void *pv_mb_coeff_data;
+
+    /**
+     * Pointer frame level mb header data
+     */
+    void *pv_pic_mb_header_data;
+
+    /**
+     * Pointer to mb header data and
+     * incremented each time a coded mb is encoded
+     */
+    void *pv_mb_header_data;
+
+    /**
+     * Signal that pic_init is called first time
+     */
+    WORD32 i4_first_pic_init;
+
+    /**
+     * Current MV Bank's buffer ID
+     */
+    WORD32 i4_cur_mv_bank_buf_id;
+
+    /**
+     * Void pointer to job context
+     */
+    void *pv_proc_jobq, *pv_entropy_jobq;
+
+    /**
+     * Number of MBs to be processed in the current Job
+     */
+    WORD32 i4_mb_cnt;
+
+    /**
+     * ID for the current context - Used for debugging
+     */
+    WORD32 i4_id;
+
+    /**
+     * Pointer to current picture buffer structure
+     */
+    pic_buf_t *ps_cur_pic;
+
+    /**
+     * Pointer to current picture's mv buffer structure
+     */
+    mv_buf_t *ps_cur_mv_buf;
+
+    /**
+     * Flag to indicate if ps_proc was initialized at least once in a frame.
+     * This is needed to handle cases where a core starts to handle format
+     * conversion jobs directly
+     */
+    WORD32 i4_init_done;
+
+    /**
+     * Process status: one byte per MB
+     */
+    UWORD8 *pu1_proc_map;
+
+    /**
+     * Deblk status: one byte per MB
+     */
+    UWORD8 *pu1_deblk_map;
+
+    /**
+     * Process status: one byte per MB
+     */
+    UWORD8 *pu1_me_map;
+
+    /*
+     * Intra refresh mask.
+     * Indicates if an Mb is coded in intra mode within the current AIR interval
+     * NOTE Refreshes after each AIR period
+     * NOTE The map is shared between process
+     */
+    UWORD8 *pu1_is_intra_coded;
+
+    /**
+     * Disable deblock level (0: Enable completely, 3: Disable completely
+     */
+    UWORD32 u4_disable_deblock_level;
+
+    /**
+     * Pointer to the structure that contains deblock context
+     */
+    deblk_ctxt_t s_deblk_ctxt;
+
+    /**
+     * Points to the array of slice indices which is used to identify the independent
+     * slice to which each MB in a frame belongs.
+     */
+    UWORD8 *pu1_slice_idx;
+
+    /**
+     * Pointer to base of slice header structure array
+     */
+    slice_header_t *ps_slice_hdr_base;
+
+    /**
+     * Number of mb's to process in one loop
+     */
+    WORD32 i4_nmb_ntrpy;
+
+    /**
+     * Number of mb's to process in one loop
+     */
+    UWORD32 u4_nmb_me;
+
+    /**
+     * Structure for current input buffer
+     */
+    inp_buf_t s_inp_buf;
+
+    /**
+     * api call cnt
+     */
+    WORD32 i4_encode_api_call_cnt;
+
+    /**
+     * Current Picture count - used for synchronization
+     */
+    WORD32 i4_pic_cnt;
+
+    /**
+      * Intermediate buffer for interpred leaf level functions
+      */
+    WORD32 ai16_pred1[HP_BUFF_WD * HP_BUFF_HT];
+
+    /**
+     * Reference picture for the current picture
+     * TODO: Only 1 reference assumed currently
+     */
+    pic_buf_t *ps_ref_pic;
+
+    /**
+     * frame info used by RC
+     */
+    frame_info_t s_frame_info;
+
+    /*
+     * NOTE NOT PERSISTANT INSIDE FUNCTIONS
+     * Min sad for current MB
+     * will be populated initially
+     * Once a sad less than eq to u4_min_sad is reached, the value will be copied to the cariable
+     */
+    UWORD32  u4_min_sad;
+
+    /*
+     * indicates weather we have rached minimum sa or not
+     */
+    UWORD32 u4_min_sad_reached;
+
+    /**
+     * Current error code
+     */
+    WORD32 i4_error_code;
+
+    /*
+     * Enables or disables computation of recon
+     */
+    UWORD32 u4_compute_recon;
+
+   /*
+    * Buffer for holding half_x (1/2,1 - interpolated)
+    * values when halfpel generation
+    *  for the entire plane is not enabled
+    */
+    UWORD8 *pu1_half_x;
+
+    /*
+     * Buffer for holding half_x (1,1/2 - interpolated)
+     * values when halfpel generation
+     *  for the entire plane is not enabled
+     */
+    UWORD8 *pu1_half_y;
+
+    /*
+     * Buffer for holding half_x (1/2,1/2 - interpolated)
+     * values when halfpel generation
+     *  for the entire plane is not enabled
+     *
+     */
+    UWORD8 *pu1_half_xy;
+
+    /*
+     * Buffer holding best sub pel values
+     */
+    UWORD8 *pu1_best_subpel_buf;
+
+    /*
+     * Stride for buffer holding best sub pel
+     */
+    UWORD32 u4_bst_spel_buf_strd;
+
+} process_ctxt_t;
+
+/**
+ ******************************************************************************
+ *  @brief      Rate control related variables
+ ******************************************************************************
+ */
+typedef struct
+{
+    void *pps_rate_control_api;
+
+    void *pps_frame_time;
+
+    void *pps_time_stamp;
+
+    void *pps_pd_frm_rate;
+
+    /**
+     * frame rate pull down
+     */
+    WORD32 pre_encode_skip[MAX_CTXT_SETS];
+
+    /**
+     * skip frame (cbr)
+     */
+    WORD32 post_encode_skip[MAX_CTXT_SETS];
+
+    /**
+     * rate control type
+     */
+    rc_type_e e_rc_type;
+
+    /**
+     * pic type
+     */
+    picture_type_e e_pic_type;
+
+    /**
+     * intra cnt in previous frame
+     */
+    WORD32 num_intra_in_prev_frame;
+
+    /**
+     * avg activity of prev frame
+     */
+    WORD32 i4_avg_activity;
+
+}rate_control_ctxt_t;
+
+/**
+ * Codec context
+ */
+struct _codec_t
+{
+    /**
+     * Number of coded pictures
+     */
+    WORD32 i4_coded_pic_cnt;
+
+    /**
+     * Number of encode frame API calls made
+     */
+    WORD32 i4_encode_api_call_cnt;
+
+    /**
+     * Number of pictures encoded
+     */
+    WORD32 i4_pic_cnt;
+
+    /**
+     * Number of threads created
+     */
+    WORD32 i4_proc_thread_cnt;
+
+    /**
+     * Mutex used to keep the control calls thread-safe
+     */
+    void *pv_ctl_mutex;
+
+    /**
+     * Current active config parameters
+     */
+    cfg_params_t s_cfg;
+
+    /**
+     * Array containing the config parameter sets
+     */
+    cfg_params_t as_cfg[MAX_ACTIVE_CONFIG_PARAMS];
+
+    /**
+     * Color format used by encoder internally
+     */
+    IV_COLOR_FORMAT_T e_codec_color_format;
+
+    /**
+     * source stride
+     * (strides for luma and chroma are the same)
+     */
+    WORD32 i4_src_strd;
+
+    /**
+     * recon stride
+     * (strides for luma and chroma are the same)
+     */
+    WORD32 i4_rec_strd;
+
+    /**
+     * Flag to enable/disable deblocking of a frame
+     */
+    WORD32 i4_disable_deblk_pic;
+
+    /**
+     * Number of continuous frames where deblocking was disabled
+     */
+    WORD32 i4_disable_deblk_pic_cnt;
+
+    /**
+     * frame type
+     */
+    PIC_TYPE_T pic_type;
+
+    /**
+     * frame qp
+     */
+    UWORD32 u4_frame_qp;
+
+    /**
+     * frame num
+     */
+    WORD32 i4_frame_num;
+
+    /**
+     *  slice_type
+     */
+    WORD32  i4_slice_type;
+
+    /*
+     * Force current frame to specific type
+     */
+    IV_PICTURE_CODING_TYPE_T force_curr_frame_type;
+
+    /**
+     *  IDR pic
+     */
+    UWORD32 u4_is_idr;
+
+    /**
+     *  idr_pic_id
+     */
+    WORD32 i4_idr_pic_id;
+
+    /**
+     * Flush mode
+     */
+    WORD32 i4_flush_mode;
+
+    /**
+     * Encode header mode
+     */
+    WORD32 i4_header_mode;
+
+    /**
+     * Flag to indicate if header has already
+     * been generated when i4_api_call_cnt 0
+     */
+    UWORD32 u4_header_generated;
+
+    /**
+     * Encode generate header
+     */
+    WORD32 i4_gen_header;
+
+    /**
+     * To signal successful completion of init
+     */
+    WORD32 i4_init_done;
+
+    /**
+     * To signal that at least one picture was decoded
+     */
+    WORD32 i4_first_pic_done;
+
+    /**
+     * Reset flag - Codec is reset if this flag is set
+     */
+    WORD32 i4_reset_flag;
+
+    /**
+     * Current error code
+     */
+    WORD32 i4_error_code;
+
+    /**
+     * threshold residue
+     */
+    WORD32 u4_thres_resi;
+
+    /**
+     * disable intra inter gating
+     */
+    UWORD32 u4_inter_gate;
+
+    /**
+     * Holds mem records passed during init.
+     * This will be used to return the mem records during retrieve call
+     */
+    iv_mem_rec_t *ps_mem_rec_backup;
+
+    /**
+     * Flag to determine if the entropy thread is active
+     */
+    volatile UWORD32 au4_entropy_thread_active[MAX_CTXT_SETS];
+
+    /**
+     * Mutex used to keep the entropy calls thread-safe
+     */
+    void *pv_entropy_mutex;
+
+    /**
+     * Job queue buffer base
+     */
+    void *pv_proc_jobq_buf, *pv_entropy_jobq_buf;
+
+    /**
+     * Job Queue mem tab size
+     */
+    WORD32 i4_proc_jobq_buf_size, i4_entropy_jobq_buf_size;
+
+    /**
+     * Memory for MV Bank buffer manager
+     */
+    void *pv_mv_buf_mgr_base;
+
+    /**
+     * MV Bank buffer manager
+     */
+    void *pv_mv_buf_mgr;
+
+    /**
+     * Pointer to MV Buf structure array
+     */
+    void *ps_mv_buf;
+
+    /**
+     * Base address for Motion Vector bank buffer
+     */
+    void *pv_mv_bank_buf_base;
+
+    /**
+     * MV Bank size allocated
+     */
+    WORD32 i4_total_mv_bank_size;
+
+    /**
+     * Memory for Picture buffer manager for reference pictures
+     */
+    void *pv_ref_buf_mgr_base;
+
+    /**
+     * Picture buffer manager for reference pictures
+     */
+    void *pv_ref_buf_mgr;
+
+    /**
+     * Number of reference buffers added to the buffer manager
+     */
+    WORD32 i4_ref_buf_cnt;
+
+    /**
+     * Pointer to Pic Buf structure array
+     */
+    void *ps_pic_buf;
+
+    /**
+     * Base address for Picture buffer
+     */
+    void *pv_pic_buf_base;
+
+    /**
+     * Total pic buffer size allocated
+     */
+    WORD32 i4_total_pic_buf_size;
+
+    /**
+     * Memory for Buffer manager for output buffers
+     */
+     void *pv_out_buf_mgr_base;
+
+    /**
+     * Buffer manager for output buffers
+     */
+     void *pv_out_buf_mgr;
+
+    /**
+     * Current output buffer's buffer ID
+     */
+    WORD32 i4_out_buf_id;
+
+    /**
+     * Number of output buffers added to the buffer manager
+     */
+    WORD32 i4_out_buf_cnt;
+
+    /**
+     * Memory for Picture buffer manager for input buffers
+     */
+     void *pv_inp_buf_mgr_base;
+
+    /**
+     * Picture buffer manager for input buffers
+     */
+     void *pv_inp_buf_mgr;
+
+    /**
+     * Current input buffer's buffer ID
+     */
+    WORD32 i4_inp_buf_id;
+
+    /**
+     * Number of input buffers added to the buffer manager
+     */
+    WORD32 i4_inp_buf_cnt;
+
+    /**
+     * Current input buffer
+     */
+    pic_buf_t *ps_inp_buf;
+
+    /**
+     * Pointer to dpb manager structure
+     */
+    void *pv_dpb_mgr;
+
+    /**
+     * Pointer to base of Sequence parameter set structure array
+     */
+    sps_t *ps_sps_base;
+
+    /**
+     * Pointer to base of Picture parameter set structure array
+     */
+    pps_t *ps_pps_base;
+
+    /**
+     *  seq_parameter_set_id
+     */
+    WORD32 i4_sps_id;
+
+    /**
+     *  pic_parameter_set_id
+     */
+    WORD32 i4_pps_id;
+
+    /**
+     * Pointer to base of slice header structure array
+     */
+    slice_header_t *ps_slice_hdr_base;
+
+    /**
+     * packed residue coeff data size for 1 row of mbs
+     */
+    UWORD32 u4_size_coeff_data;
+
+    /**
+     * packed header data size for 1 row of mbs
+     */
+    UWORD32 u4_size_header_data;
+
+    /**
+     * Processing context - One for each processing thread
+     * Create two sets, each set used for alternate frames
+     */
+    process_ctxt_t as_process[MAX_PROCESS_CTXT];
+
+    /**
+     * Thread handle for each of the processing threads
+     */
+    void *apv_proc_thread_handle[MAX_PROCESS_THREADS];
+
+    /**
+     * Thread created flag for each of the processing threads
+     */
+    WORD32 ai4_process_thread_created[MAX_PROCESS_THREADS];
+
+    /**
+     * Void pointer to process job context
+     */
+    void *pv_proc_jobq, *pv_entropy_jobq;
+
+    /**
+     * Number of MBs processed together for better instruction cache handling
+     */
+    WORD32 i4_proc_nmb;
+
+    /**
+     * Previous POC lsb
+     */
+    WORD32 i4_prev_poc_lsb;
+
+    /**
+     * Previous POC msb
+     */
+    WORD32 i4_prev_poc_msb;
+
+    /**
+     * Max POC lsb that has arrived till now
+     */
+    WORD32 i4_max_prev_poc_lsb;
+
+    /**
+     * Context for format conversion
+     */
+    fmt_conv_t s_fmt_conv;
+
+    /**
+     * Absolute pic order count
+     */
+    WORD32 i4_abs_pic_order_cnt;
+
+    /**
+     *  Pic order count of lsb
+     */
+    WORD32 i4_pic_order_cnt_lsb;
+
+    /**
+     * Array giving current picture being processed in each context set
+     */
+    WORD32 ai4_pic_cnt[MAX_CTXT_SETS];
+
+    /*
+     * Min sad to search for
+     */
+    UWORD32 u4_min_sad;
+
+    /**
+     * Reference picture set
+     */
+    ref_set_t as_ref_set[MAX_DPB_SIZE + MAX_CTXT_SETS];
+
+    /*
+     * Air pic cnt
+     * Contains the number of pictures that have been encoded with air
+     * This value is moudulo air refresh period
+     */
+    WORD32 i4_air_pic_cnt;
+
+    /*
+     * Intra refresh map
+     * Stores the frames at which intra refresh should occur for a MB
+     */
+    UWORD16 *pu2_intr_rfrsh_map;
+
+    /*
+     * Alternate reference frames
+     * Indicates if the current frame is used as a reference frame
+     */
+    UWORD32 u4_is_curr_frm_ref;
+
+    /*
+     * Memory for color space conversion for luma plane
+     */
+    UWORD8 *pu1_y_csc_buf_base;
+
+    /*
+     * Memory for color space conversion foe chroma plane
+     */
+    UWORD8 *pu1_uv_csc_buf_base;
+
+    /**
+     * Function pointers for intra pred leaf level functions luma
+     */
+    pf_intra_pred apf_intra_pred_16_l[MAX_I16x16];
+    pf_intra_pred apf_intra_pred_8_l[MAX_I8x8];
+    pf_intra_pred apf_intra_pred_4_l[MAX_I4x4];
+
+    /**
+     * Function pointers for intra pred leaf level functions chroma
+     */
+    pf_intra_pred apf_intra_pred_c[MAX_CH_I8x8];
+
+    /**
+     * luma core coding function pointer
+     */
+    UWORD8 (*luma_energy_compaction[4])(process_ctxt_t *ps_proc);
+
+    /**
+     * chroma core coding function pointer
+     */
+    UWORD8 (*chroma_energy_compaction[2])(process_ctxt_t *ps_proc);
+
+    /**
+     * forward transform for intra blk of mb type 16x16
+     */
+    ih264_luma_16x16_resi_trans_dctrans_quant_ft *pf_resi_trans_dctrans_quant_16x16;
+
+    /**
+     * inverse transform for intra blk of mb type 16x16
+     */
+    ih264_luma_16x16_idctrans_iquant_itrans_recon_ft *pf_idctrans_iquant_itrans_recon_16x16;
+
+    /**
+     * forward transform for 4x4 blk luma
+     */
+    ih264_resi_trans_quant_ft *pf_resi_trans_quant_4x4;
+
+    /**
+     * forward transform for 4x4 blk luma
+     */
+    ih264_resi_trans_quant_ft *pf_resi_trans_quant_chroma_4x4;
+
+    /*
+     * hadamard transform and quant for a 4x4 block
+     */
+    ih264_hadamard_quant_ft *pf_hadamard_quant_4x4;
+
+    /*
+     *  hadamard transform and quant for a 4x4 block
+     */
+    ih264_hadamard_quant_ft *pf_hadamard_quant_2x2_uv;
+
+    /**
+     * inverse transform for 4x4 blk
+     */
+    ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_4x4;
+
+    /**
+     * inverse transform for chroma 4x4 blk
+     */
+    ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4;
+
+    /**
+     * inverse transform for 4x4 blk with only single dc coeff
+     */
+    ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_4x4_dc;
+
+    /**
+     * inverse transform for chroma 4x4 blk with only single dc coeff
+     */
+    ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc;
+
+    /*
+     * Inverse hadamard transform and iquant for a 4x4 block
+     */
+    ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4;
+
+    /*
+     * Inverse hadamard transform and iquant for a 4x4 block
+     */
+    ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_2x2_uv;
+
+    /*
+     * Function for interleave copy*
+     */
+    ih264_interleave_copy_ft *pf_interleave_copy;
+
+    /**
+     * forward transform for 8x8 blk
+     */
+    ih264_resi_trans_quant_ft *pf_resi_trans_quant_8x8;
+
+    /**
+     * inverse transform for 8x8 blk
+     */
+    /**
+     * inverse transform for 4x4 blk
+     */
+    ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_8x8;
+
+    /**
+     * forward transform for chroma MB
+     */
+    ih264_chroma_8x8_resi_trans_dctrans_quant_ft *pf_resi_trans_dctrans_quant_8x8_chroma;
+
+    /**
+     * inverse transform for chroma MB
+     */
+    ih264_idctrans_iquant_itrans_recon_ft *pf_idctrans_iquant_itrans_recon_8x8_chroma;
+
+    /**
+     * deblock vertical luma edge with blocking strength 4
+     */
+    ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4;
+
+    /**
+     * deblock vertical chroma edge with blocking strength 4
+     */
+    ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4;
+
+    /**
+     * deblock vertical luma edge with blocking strength less than 4
+     */
+    ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4;
+
+    /**
+     * deblock vertical chroma edge with blocking strength less than 4
+     */
+    ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4;
+
+    /**
+     * deblock horizontal luma edge with blocking strength 4
+     */
+    ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4;
+
+    /**
+     * deblock horizontal chroma edge with blocking strength 4
+     */
+    ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4;
+
+    /**
+     * deblock horizontal luma edge with blocking strength less than 4
+     */
+    ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4;
+
+    /**
+     * deblock horizontal chroma edge with blocking strength less than 4
+     */
+    ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4;
+
+
+    /**
+     * functions for padding
+     */
+    pf_pad pf_pad_top;
+    pf_pad pf_pad_bottom;
+    pf_pad pf_pad_left_luma;
+    pf_pad pf_pad_left_chroma;
+    pf_pad pf_pad_right_luma;
+    pf_pad pf_pad_right_chroma;
+
+    /**
+     * Inter pred leaf level functions
+     */
+    ih264_inter_pred_luma_ft    *pf_inter_pred_luma_copy;
+    ih264_inter_pred_luma_ft    *pf_inter_pred_luma_horz;
+    ih264_inter_pred_luma_ft    *pf_inter_pred_luma_vert;
+    pf_inter_pred_luma_bilinear  pf_inter_pred_luma_bilinear;
+    ih264_inter_pred_chroma_ft  *pf_inter_pred_chroma;
+
+    /**
+     * fn ptrs for compute sad routines
+     */
+    ime_compute_sad_ft *apf_compute_sad_16x16[2];
+    ime_compute_sad_ft *pf_compute_sad_16x8;
+
+    /**
+     * fn ptrs for memory handling operations
+     */
+    pf_memcpy pf_mem_cpy;
+    pf_memset pf_mem_set;
+    pf_memcpy_mul8 pf_mem_cpy_mul8;
+    pf_memset_mul8 pf_mem_set_mul8;
+
+    /**
+     * intra mode eval -encoder level function
+     */
+    pf_evaluate_intra_modes pf_ih264e_evaluate_intra16x16_modes;
+    pf_evaluate_intra_modes pf_ih264e_evaluate_intra_chroma_modes;
+    pf_evaluate_intra_4x4_modes pf_ih264e_evaluate_intra_4x4_modes;
+
+    /* Half pel generation function - encoder level
+     *
+     */
+    pf_sixtapfilter_horz pf_ih264e_sixtapfilter_horz;
+    pf_sixtap_filter_2dvh_vert pf_ih264e_sixtap_filter_2dvh_vert;
+
+    /**
+     * color space conversion form YUV 420P to YUV 420Sp
+     */
+    pf_fmt_conv_420p_to_420sp pf_ih264e_conv_420p_to_420sp;
+
+
+    /**
+     * color space conversion form YUV 420P to YUV 420Sp
+     */
+    pf_fmt_conv_422ile_to_420sp pf_ih264e_fmt_conv_422i_to_420sp;
+
+    /**
+     * write mb layer for a given slice I, P, B
+     */
+    IH264E_ERROR_T (*pf_write_mb_syntax_layer[3]) ( entropy_ctxt_t *ps_ent_ctxt );
+
+
+    /**
+     * Output buffer
+     */
+    out_buf_t as_out_buf[MAX_CTXT_SETS];
+
+    /**
+     * recon buffer
+     */
+    rec_buf_t as_rec_buf[MAX_CTXT_SETS];
+
+    /**
+     * rate control context
+     */
+    rate_control_ctxt_t s_rate_control;
+};
+#endif /* IH264E_STRUCTS_H_ */
diff --git a/encoder/ih264e_time_stamp.c b/encoder/ih264e_time_stamp.c
new file mode 100755
index 0000000..a6a7f3c
--- /dev/null
+++ b/encoder/ih264e_time_stamp.c
@@ -0,0 +1,748 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_time_stamp.c
+*
+* @brief
+*  This file contains functions used for source and target time stamp management
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - gcd()
+*  - ih264e_get_range()
+*  - ih264e_frame_time_get_init_free_memtab()
+*  - ih264e_init_frame_time()
+*  - ih264e_should_src_be_skipped()
+*  - ih264e_time_stamp_get_init_free_memtab()
+*  - ih264e_init_time_stamp()
+*  - ih264e_update_time_stamp()
+*  - ih264e_frame_time_get_src_frame_rate()
+*  - ih264e_frame_time_get_tgt_frame_rate()
+*  - ih264e_frame_time_get_src_ticks()
+*  - ih264e_frame_time_get_tgt_ticks()
+*  - ih264e_frame_time_get_src_time()
+*  - ih264e_frame_time_get_tgt_time()
+*  - ih264e_frame_time_update_src_frame_rate()
+*  - ih264e_frame_time_update_tgt_frame_rate()
+*  - ih264_time_stamp_update_frame_rate()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* user include files */
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_structs.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "irc_rate_control_api.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to compute gcd of two numbers
+*
+* @par   Description
+*  Function to compute gcd of two numbers
+*
+* @param[in] i4_x
+*  value 1
+*
+* @param[in] i4_y
+*  value 2
+*
+* @returns
+*  GCD(value 1, value 2)
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 gcd(WORD32 i4_x, WORD32 i4_y)
+{
+    if (i4_x > i4_y)
+    {
+        i4_x = i4_y + i4_x;
+        i4_y = i4_x - i4_y;
+        i4_x = i4_x - i4_y;
+    }
+    while (i4_y != 0)
+    {
+        WORD32 temp;
+        i4_x = i4_x % i4_y;
+        temp = i4_x;
+        i4_x = i4_y;
+        i4_y = temp;
+    }
+    return (i4_x);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to determine number of bits required to represent a given
+*  value
+*
+* @par   Description
+*  This function determines the number of bits required to represent the given
+*  value. It is used to find out number of bits to read when the data size is
+*  not fixed (e.g. vop_time_increment_resolution).
+*
+* @param[in] u4_value
+*  Value for which the number of bits required to represent is to be determined
+*
+* @param[in] u1_no_of_bits
+*  Represents the value's word type = 8/16/32
+*
+* @returns
+*  The number of bits required to represent the given number
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static UWORD8 ih264e_get_range(UWORD32 u4_value, UWORD8 u1_no_of_bits)
+{
+    UWORD8 count;
+    UWORD32 temp;
+
+    if (u4_value > (UWORD32) ((1 << (u1_no_of_bits >> 1)) - 1))
+    {
+        temp = (1 << (u1_no_of_bits - 1));
+        for (count = 0; count < (u1_no_of_bits >> 1); count++)
+        {
+            if ((temp & u4_value) != 0)
+            {
+                return (UWORD8) (u1_no_of_bits - count);
+            }
+            else
+            {
+                temp >>= 1;
+            }
+        }
+        return 0;
+    }
+    else
+    {
+        temp = (1 << ((u1_no_of_bits >> 1) - 1));
+        for (count = 0; count < ((u1_no_of_bits >> 1) - 1); count++)
+        {
+            if ((temp & u4_value) != 0)
+            {
+                return (UWORD8) ((u1_no_of_bits >> 1) - count);
+            }
+            else
+            {
+                temp >>= 1;
+            }
+        }
+        return 1;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to init frame time memtabs
+*
+* @par Description
+*  Function to init frame time memtabs
+*
+* @param[in] pps_frame_time
+*  Pointer to frame time contexts
+*
+* @param[in] ps_memtab
+*  Pointer to memtab
+*
+* @param[in] e_func_type
+*  Function type (get memtabs/init memtabs)
+*
+* @returns
+*  none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_init_free_memtab(frame_time_handle *pps_frame_time,
+                                              itt_memtab_t *ps_memtab,
+                                              ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static frame_time_t s_temp_frame_time_t;
+
+    /* Hack for al alloc, during which we dont have any state memory.
+     Dereferencing can cause issues */
+    if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_frame_time) = &s_temp_frame_time_t;
+
+    /* for src rate control state structure */
+    if (e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(frame_time_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**) pps_frame_time, e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    return (i4_mem_tab_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to init frame time context
+*
+* @par Description
+*  Frame time structure stores the time of the source and the target frames to
+*  be encoded. Based on the time we decide whether or not to encode the source
+*  frame
+*
+* @param[in] ps_frame_time
+*  Pointer Frame time context
+*
+* @param[in] u4_src_frm_rate
+*  Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+*  Target frame rate
+*
+* @returns
+*  none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_frame_time(frame_time_t *ps_frame_time,
+                            UWORD32 u4_src_frm_rate,
+                            UWORD32 u4_tgt_frm_rate)
+{
+    /* Initialise the common time base based on which the source and target
+     * frame times increase */
+    WORD32 i4_gcd = gcd(u4_src_frm_rate, u4_tgt_frm_rate);
+
+    ps_frame_time->common_time_base = (u4_src_frm_rate * u4_tgt_frm_rate)
+                    / i4_gcd;
+
+    /* The source and target increment per vop is initialized */
+    ps_frame_time->u4_src_frm_time_incr = ps_frame_time->common_time_base
+                    / u4_src_frm_rate;
+    ps_frame_time->u4_tgt_frm_time_incr = ps_frame_time->common_time_base
+                    / u4_tgt_frm_rate;
+
+    /* Initialise the source and target times to 0 (RESET) */
+    ps_frame_time->u4_src_frm_time = 0;
+    ps_frame_time->u4_tgt_frm_time = 0;
+
+    /* Initialize the number of frms not to be skipped to 0 */
+    ps_frame_time->u4_num_frms_dont_skip = 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to check if frame can be skipped
+*
+* @par Description
+*  Based on the source and target frame time and the delta time stamp
+*  we decide whether to code the source or not.
+*  This is based on the assumption
+*  that the source frame rate is greater that target frame rate.
+*  Updates the time_stamp structure
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @param[in] u4_delta_time_stamp
+*  Time stamp difference between frames
+*
+* @param[out] pu4_frm_not_skipped_for_dts
+*  Flag to indicate if frame is already skipped by application
+*
+* @returns
+*  Flag to skip frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_should_src_be_skipped(frame_time_t *ps_frame_time,
+                                    UWORD32 u4_delta_time_stamp,
+                                    UWORD32 *pu4_frm_not_skipped_for_dts)
+{
+    UWORD8 skip_src = 0;
+
+    if (ps_frame_time->u4_tgt_frm_time > ps_frame_time->u4_src_frm_time &&
+        ps_frame_time->u4_tgt_frm_time >= (ps_frame_time->u4_src_frm_time +
+                        ps_frame_time->u4_src_frm_time_incr))
+    {
+        skip_src = 1;
+    }
+
+    /* source time gets updated every frame */
+    ps_frame_time->u4_src_frm_time += ps_frame_time->u4_src_frm_time_incr;
+
+    /* target time gets updated only when the source is coded */
+    if (!skip_src)
+    {
+        ps_frame_time->u4_tgt_frm_time += ps_frame_time->u4_tgt_frm_time_incr;
+    }
+
+    /* If the source and target frame times get incremented properly
+     both should be equal to the common time base at the same time. If
+     that happens we reset the time to zero*/
+    if (( ps_frame_time->common_time_base ==(WORD32)ps_frame_time->u4_src_frm_time)
+         && (ps_frame_time->common_time_base ==(WORD32) ps_frame_time->u4_tgt_frm_time ))
+    {
+        ps_frame_time->u4_src_frm_time = 0;
+        ps_frame_time->u4_tgt_frm_time = 0;
+    }
+
+    /* This keeps a count of how many frames need not be skipped in order
+     to take care of the delta time stamp */
+    ps_frame_time->u4_num_frms_dont_skip += (u4_delta_time_stamp - 1);
+
+    /** If this frame is to be skipped in order to maintain the tgt_frm_rate
+     check if already a frame has been skipped by the application.
+     In that case, do not skip this frame **/
+    if (ps_frame_time->u4_num_frms_dont_skip && skip_src)
+    {
+        skip_src = 0;
+        *pu4_frm_not_skipped_for_dts = 1;
+        ps_frame_time->u4_num_frms_dont_skip -= 1;
+    }
+    else
+    {
+        pu4_frm_not_skipped_for_dts[0] = 0;
+    }
+
+    return (skip_src);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to inititialize time stamp memtabs
+*
+* @par Description
+*  Function to initialize time stamp memtabs
+*
+* @param[in] pps_time_stamp
+*  Pointer to time stamp context
+*
+* @param[in] ps_memtab
+*  Pointer to memtab
+*
+* @param[in] e_func_type
+*  Funcion type (Get memtab/ init memtab)
+*
+* @returns
+*   number of memtabs used
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_time_stamp_get_init_free_memtab(time_stamp_handle *pps_time_stamp,
+                                              itt_memtab_t *ps_memtab,
+                                              ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static time_stamp_t s_temp_time_stamp_t;
+
+    /* Hack for al alloc, during which we dont have any state memory.
+     Dereferencing can cause issues */
+    if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_time_stamp) = &s_temp_time_stamp_t;
+
+    /* for src rate control state structure */
+    if (e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(time_stamp_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**) pps_time_stamp, e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    return (i4_mem_tab_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to initialize time stamp context
+*
+* @par Description
+*  Time stamp structure stores the time stamp data that
+*  needs to be sent in to the header of MPEG4. Based on the
+*  max target frame rate the vop_time increment resolution is set
+*  so as to support all the frame rates below max frame rate.
+*  A support till the third decimal point is assumed.
+*
+* @param[in] ps_time_stamp
+*  Pointer to time stamp structure
+*
+* @param[in] u4_max_frm_rate
+*  Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+*  Source frame rate
+*
+* @returns
+*  none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_time_stamp(time_stamp_t *ps_time_stamp,
+                            UWORD32 u4_max_frm_rate,
+                            UWORD32 u4_src_frm_rate)
+{
+    /* We expect the max frame rate to be less than 60000,
+     * if not we divide it by zero and work with it */
+    if (u4_max_frm_rate > 60000)
+    {
+        u4_max_frm_rate >>= 1;
+        ps_time_stamp->is_max_frame_rate_scaled = 1;
+    }
+    else
+    {
+        ps_time_stamp->is_max_frame_rate_scaled = 0;
+    }
+
+    ps_time_stamp->u4_vop_time_incr_res = u4_max_frm_rate;
+    ps_time_stamp->u4_vop_time_incr_range = ih264e_get_range(u4_max_frm_rate, 32);
+    ps_time_stamp->u4_vop_time_incr = (ps_time_stamp->u4_vop_time_incr_res * 1000) / u4_src_frm_rate;/* Since frm rate is in millisec */
+    ps_time_stamp->u4_vop_time = 0;
+    ps_time_stamp->u4_cur_tgt_vop_time = 0;
+    ps_time_stamp->u4_prev_tgt_vop_time = 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update time stamp context
+*
+* @par Description
+*  Vop time is incremented by increment value. When vop time goes
+*  more than the vop time resolution set the modulo time base to
+*  1 and reduce the vop time by vop time resolution so that the
+*  excess value is present in vop time and get accumulated over time
+*  so that the corresponding frame rate is achieved at a average of
+*  1000 seconds
+*
+* @param[in] ps_time_stamp
+*  Pointer to time stamp structure
+*
+* @returns
+*  none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_time_stamp(time_stamp_t *ps_time_stamp)
+{
+    /* Since get time stamp is called after the update
+     A copy of the vop time and the modulo time is stored */
+    ps_time_stamp->u4_cur_tgt_vop_time = ps_time_stamp->u4_vop_time;
+
+    ps_time_stamp->u4_vop_time += ps_time_stamp->u4_vop_time_incr;
+    if (ps_time_stamp->u4_vop_time >= ps_time_stamp->u4_vop_time_incr_res)
+    {
+        ps_time_stamp->u4_vop_time -= ps_time_stamp->u4_vop_time_incr_res;
+    }
+}
+
+/****************************************************************************
+                       Run-Time Modifying functions
+****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to get source frame rate
+*
+* @par Description
+*  Function to get source frame rate
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  source frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_frame_rate(frame_time_t *ps_frame_time)
+{
+    return (ps_frame_time->common_time_base / ps_frame_time->u4_src_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target frame rate
+*
+* @par Description
+*  Function to get target frame rate
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*   target frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_frame_rate(frame_time_t *ps_frame_time)
+{
+    return (ps_frame_time->common_time_base / ps_frame_time->u4_tgt_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get source time increment
+*
+* @par Description
+*  Function to get source time increment
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  source time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_ticks(frame_time_t *ps_frame_time)
+{
+    return (ps_frame_time->u4_src_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target time increment
+*
+* @par Description
+*  Function to get target time increment
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  target time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_ticks(frame_time_t *ps_frame_time)
+{
+    return (ps_frame_time->u4_tgt_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get src frame time
+*
+* @par Description
+*  Function to get src frame time
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  src frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_time(frame_time_t *frame_time)
+{
+    return (frame_time->u4_src_frm_time);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get tgt frame time
+*
+* @par Description
+*  Function to get tgt frame time
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  tgt frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_time(frame_time_t *frame_time)
+{
+    return (frame_time->u4_tgt_frm_time);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update source frame time with a new source frame rate
+*
+* @par Description
+*  Function to update source frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @param[in] src_frm_rate
+*  source frame rate
+*
+* @returns
+*  None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_src_frame_rate(frame_time_t *ps_frame_time,
+                                             WORD32 src_frm_rate)
+{
+    /* Since tgt frame rate does not change deriving the tgt_frm rate from
+     * common_time_base */
+    WORD32 tgt_frm_rate = ps_frame_time->common_time_base / ps_frame_time->u4_tgt_frm_time_incr;
+
+    /* Re-initialise frame_time based on the new src_frame_rate and
+     * old tgt_frame_rate */
+    ih264e_init_frame_time(ps_frame_time, src_frm_rate, tgt_frm_rate);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+*  Function to update target frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @param[in] tgt_frm_rate
+*  target frame rate
+*
+* @returns
+*  None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_tgt_frame_rate(frame_time_t *ps_frame_time,
+                                             WORD32 tgt_frm_rate)
+{
+    /* Since src frame rate does not change deriving the src_frm rate from
+     * common_time_base */
+    WORD32 src_frm_rate = ps_frame_time->common_time_base / ps_frame_time->u4_src_frm_time_incr;
+
+    /* Re-initialise frame_time based on the new tgt_frame_rate and
+     * old src_frame_rate */
+    ih264e_init_frame_time(ps_frame_time, src_frm_rate, tgt_frm_rate);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+*  When the frame rate changes the time increment is modified by appropriate ticks
+*
+* @param[in] ps_time_stamp
+*  Pointer to time stamp structure
+*
+* @param[in] src_frm_rate
+*  source frame rate
+*
+* @returns
+*  None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264_time_stamp_update_frame_rate(time_stamp_t *ps_time_stamp,
+                                        UWORD32 src_frm_rate)
+{
+    ps_time_stamp->u4_vop_time_incr = (ps_time_stamp->u4_vop_time_incr_res * 1000) / src_frm_rate;/* Since frm rate is in millisec */
+}
diff --git a/encoder/ih264e_time_stamp.h b/encoder/ih264e_time_stamp.h
new file mode 100755
index 0000000..1ee559d
--- /dev/null
+++ b/encoder/ih264e_time_stamp.h
@@ -0,0 +1,498 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_time_stamp.h
+*
+* @brief
+*  This file contains function declarations used for managing input and output
+*  frame time stamps
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_TIME_STAMP_H_
+#define IH264E_TIME_STAMP_H_
+
+/*****************************************************************************/
+/* Structures                                                                */
+/*****************************************************************************/
+
+/**
+ * Parameters for Src/Tgt frames that are encoded
+ */
+typedef struct frame_time_t
+{
+    /* common time base(=LCM) between source and target frame rate (in ticks)*/
+    WORD32 common_time_base;
+
+    /* number of ticks between two source frames */
+    UWORD32 u4_src_frm_time_incr;
+
+    /* number of ticks between two target frames */
+    UWORD32 u4_tgt_frm_time_incr;
+
+    /* Source frame time - measured as modulo of common time base
+     and incremented by src_frm_time_incr */
+    UWORD32 u4_src_frm_time;
+
+    /* Target frame time - measured as modulo of common time base
+     and incremented by tgt_frm_time_incr */
+    UWORD32 u4_tgt_frm_time;
+
+    /* Number of frames not to be skipped while maintaining
+     tgt_frm_rate due to delta_time_stamp  */
+    UWORD32 u4_num_frms_dont_skip;
+}frame_time_t;
+
+typedef struct frame_time_t *frame_time_handle;
+
+/**
+ *  Parameters that go in the bitstream based on tgt_frm_rate
+ *   1) Initialize the vop_time_incr_res with the max_frame_rate (in frames per 1000 bits)
+ *      - To represent all kinds of frame rates
+ *   2) Decide the vop_time_incr based on the source frame rate
+ *      - The decoder would like to know which source frame is encoded i.e. the source time
+ *    id of the target frame encoded and there by adjusting its time of delay
+ *   3) vop_time increments every source frame and whenever a frame is encoded (target frame),
+ *      the encoder queries the vop time of the source frame and sends it in the bit stream.
+ *   4) Since the Source frame skip logic is taken care by the frame_time module, whenever the
+ *      encoder queries the time stamp module (which gets updated outside the encoder) the
+ *      time stamp module would have the source time
+ */
+typedef struct time_stamp_t
+{
+    /*vop_time_incr_res is a integer that indicates
+     the number of evenly spaced subintervals, called ticks,
+     within one modulo time. */
+    UWORD32 u4_vop_time_incr_res;
+
+    /* number of bits to represent vop_time_incr_res */
+    UWORD32 u4_vop_time_incr_range;
+
+    /* The number of ticks elapsed between two source vops */
+    UWORD32 u4_vop_time_incr;
+
+    /* incremented by vop_time_incr for every source frame.
+     Represents the time offset after a modulo_time_base = 1 is sent
+     in bit stream*/
+    UWORD32 u4_vop_time;
+
+    /* A temporary buffer to copy of vop time and modulo time base
+     is stored since update is called before query (get time stamp) and
+     so these extra variables cur_tgt_vop_time,  */
+    UWORD32 u4_cur_tgt_vop_time;
+
+    UWORD32 u4_prev_tgt_vop_time;
+
+    /* This variable is set to 1 if we scale max frame rate by a factor of 2.
+     For mpeg4 standard, we just have 16bits and we can't accommodate more than 60000 as frame rate.
+     So we scale it and work with it */
+    WORD32 is_max_frame_rate_scaled;
+} time_stamp_t;
+
+typedef struct time_stamp_t *time_stamp_handle;
+
+/*****************************************************************************/
+/* Extern function declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to init frame time context
+*
+* @par Description
+*  Frame time structure stores the time of the source and the target frames to
+*  be encoded. Based on the time we decide whether or not to encode the source
+*  frame
+*
+* @param[in] ps_frame_time
+*  Pointer Frame time context
+*
+* @param[in] u4_src_frm_rate
+*  Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+*  Target frame rate
+*
+* @returns
+*  none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_frame_time(frame_time_t *ps_frame_time,
+                            UWORD32 u4_src_frm_rate,
+                            UWORD32 u4_tgt_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to check if frame can be skipped
+*
+* @par Description
+*  Based on the source and target frame time and the delta time stamp
+*  we decide whether to code the source or not.
+*  This is based on the assumption
+*  that the source frame rate is greater that target frame rate.
+*  Updates the time_stamp structure
+*
+* @param[in] ps_frame_time
+*  Handle to frame time context
+*
+* @param[in] u4_delta_time_stamp
+*  Time stamp difference between frames
+*
+* @param[out] pu4_frm_not_skipped_for_dts
+*  Flag to indicate if frame is already skipped by application
+*
+* @returns
+*  Flag to skip frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_should_src_be_skipped(frame_time_t *ps_frame_time,
+                                    UWORD32 u4_delta_time_stamp,
+                                    UWORD32 *pu4_frm_not_skipped_for_dts);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to initialize time stamp context
+*
+* @par Description
+*  Time stamp structure stores the time stamp data that
+*  needs to be sent in to the header of MPEG4. Based on the
+*  max target frame rate the vop_time increment resolution is set
+*  so as to support all the frame rates below max frame rate.
+*  A support till the third decimal point is assumed.
+*
+* @param[in] ps_time_stamp
+*  Pointer to time stamp structure
+*
+* @param[in] u4_max_frm_rate
+*  Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+*  Source frame rate
+*
+* @returns
+*  none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_time_stamp(time_stamp_handle time_stamp,
+                            UWORD32 max_frm_rate,
+                            UWORD32 src_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update time stamp context
+*
+* @par Description
+*  Vop time is incremented by increment value. When vop time goes
+*  more than the vop time resolution set the modulo time base to
+*  1 and reduce the vop time by vop time resolution so that the
+*  excess value is present in vop time and get accumulated over time
+*  so that the corresponding frame rate is achieved at a average of
+*  1000 seconds
+*
+* @param[in] ps_time_stamp
+*  Pointer to time stamp structure
+*
+* @returns
+*  none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_time_stamp(time_stamp_handle time_stamp);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to init frame time memtabs
+*
+* @par Description
+*  Function to init frame time memtabs
+*
+* @param[in] pps_frame_time
+*  Pointer to frame time contexts
+*
+* @param[in] ps_memtab
+*  Pointer to memtab
+*
+* @param[in] e_func_type
+*  Function type (get memtabs/init memtabs)
+*
+* @returns
+*  none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_init_free_memtab(frame_time_handle *pps_frame_time,
+                                              itt_memtab_t *ps_memtab,
+                                              ITT_FUNC_TYPE_E e_func_type);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to initialize time stamp memtabs
+*
+* @par Description
+*  Function to initialize time stamp memtabs
+*
+* @param[in] pps_time_stamp
+*  Pointer to time stamp context
+*
+* @param[in] ps_memtab
+*  Pointer to memtab
+*
+* @param[in] e_func_type
+*  Funcion type (Get memtab/ init memtab)
+*
+* @returns
+*   number of memtabs used
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_time_stamp_get_init_free_memtab(time_stamp_handle *pps_time_stamp,
+                                              itt_memtab_t *ps_memtab,
+                                              ITT_FUNC_TYPE_E e_func_type);
+
+/****************************************************************************
+                       Run-Time Modifying functions
+****************************************************************************/
+/**
+*******************************************************************************
+*
+* @brief Function to get source frame rate
+*
+* @par Description
+*  Function to get source frame rate
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  source frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_frame_rate(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target frame rate
+*
+* @par Description
+*  Function to get target frame rate
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*   target frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_frame_rate(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get source time increment
+*
+* @par Description
+*  Function to get source time increment
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  source time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_ticks(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target time increment
+*
+* @par Description
+*  Function to get target time increment
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  target time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_ticks(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get src frame time
+*
+* @par Description
+*  Function to get src frame time
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  src frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_time(frame_time_t *frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get tgt frame time
+*
+* @par Description
+*  Function to get tgt frame time
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @returns
+*  tgt frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_time(frame_time_t *frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update source frame time with a new source frame rate
+*
+* @par Description
+*  Function to update source frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @param[in] src_frm_rate
+*  source frame rate
+*
+* @returns
+*  None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_src_frame_rate(frame_time_t *ps_frame_time, WORD32 src_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+*  Function to update target frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+*  Pointer to frame time context
+*
+* @param[in] tgt_frm_rate
+*  target frame rate
+*
+* @returns
+*  None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_tgt_frame_rate(frame_time_t *ps_frame_time, WORD32 tgt_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+*  When the frame rate changes the time increment is modified by appropriate ticks
+*
+* @param[in] ps_time_stamp
+*  Pointer to time stamp structure
+*
+* @param[in] src_frm_rate
+*  source frame rate
+*
+* @returns
+*  None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264_time_stamp_update_frame_rate(time_stamp_t *ps_time_stamp, UWORD32 src_frm_rate);
+
+#endif /*IH264E_TIME_STAMP_H_*/
+
diff --git a/encoder/ih264e_trace.h b/encoder/ih264e_trace.h
new file mode 100755
index 0000000..8134524
--- /dev/null
+++ b/encoder/ih264e_trace.h
@@ -0,0 +1,161 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_trace.h
+*
+* @brief
+*  This file contains extern declarations of routines that could be helpful
+*  for debugging purposes.
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_TRACE_H_
+#define IH264E_TRACE_H_
+
+#if ENABLE_TRACE
+/*****************************************************************************/
+/* Structures                                                                */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief      Data for the trace functionality
+******************************************************************************
+ */
+typedef struct
+{
+    /**
+     * fp
+     */
+    FILE    *fp;
+}enc_trace_t;
+
+/*****************************************************************************/
+/* Extern variable declarations                                              */
+/*****************************************************************************/
+extern enc_trace_t g_enc_trace;
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief      defines flag used for enabling trace
+******************************************************************************
+ */
+
+
+/*****************************************************************************/
+/* Function Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief   Macro to print trace messages
+******************************************************************************
+ */
+
+#define ENTROPY_TRACE(syntax_string, value)                                           \
+    {                                                                                 \
+        if(g_enc_trace.fp)                                                            \
+        {                                                                             \
+            fprintf( g_enc_trace.fp, "%-40s : %d\n", syntax_string, value );          \
+            fflush ( g_enc_trace.fp);                                                 \
+        }                                                                             \
+    }
+
+
+/**
+******************************************************************************
+ *  @brief   Macro to print CABAC trace messages
+******************************************************************************
+ */
+
+#define AEV_TRACE(string, value, range)                                      \
+    if(range && g_enc_trace.fp)                                                                \
+    {                                                                        \
+        fprintf( g_enc_trace.fp, "%-40s:%8d R:%d\n", string, value, range);  \
+        fflush ( g_enc_trace.fp);                                            \
+    }
+
+#else
+
+/* Dummy macros when trace is disabled */
+#define ENTROPY_TRACE(syntax_string, value)
+
+#define AEV_TRACE(string, value, range)
+
+#endif
+
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+
+/**
+******************************************************************************
+*
+*  @brief Dummy trace init when trace is disabled in encoder
+*
+*  @par   Description
+*  This routine needs to be called at start of trace
+*
+*  @param[in]   pu1_file_name
+*  Name of file where trace outputs need to be stores (handle)
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+extern WORD32    ih264e_trace_init
+        (
+            const char        *pu1_file_name
+        );
+
+/**
+******************************************************************************
+*
+*  @brief Dummy trace de-init function when trace is disabled
+*
+*  @par   Description
+*  This routine needs to be called at end of trace
+*
+*  @return      success or failure error code
+*
+******************************************************************************
+*/
+extern WORD32    ih264e_trace_deinit
+        (
+            void
+        );
+
+#endif // IH264E_TRACE_H_
diff --git a/encoder/ih264e_trace_support.h b/encoder/ih264e_trace_support.h
new file mode 100755
index 0000000..c35bd4f
--- /dev/null
+++ b/encoder/ih264e_trace_support.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_trace_support.h
+*
+* @brief
+*  This file contains extern declarations of routines that could be helpful
+*  for debugging purposes.
+*
+* @author
+*  Harish
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef TRACE_SUPPORT_H_
+#define TRACE_SUPPORT_H_
+
+/*****************************************************************************/
+/* Structures                                                                */
+/*****************************************************************************/
+
+typedef struct
+{
+    WORD8 * pu1_buf;
+    WORD32 i4_offset;
+    WORD32 i4_max_size;
+}trace_support_t;
+
+/*****************************************************************************/
+/* Extern function declarations                                              */
+/*****************************************************************************/
+
+void init_trace_support(WORD8 *pu1_buf, WORD32 i4_size);
+
+int trace_printf(const WORD8 *format, ...);
+
+#endif // TRACE_SUPPORT_H_
diff --git a/encoder/ih264e_utils.c b/encoder/ih264e_utils.c
new file mode 100755
index 0000000..f0086cb
--- /dev/null
+++ b/encoder/ih264e_utils.c
@@ -0,0 +1,1804 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_utils.c
+*
+* @brief
+*  Contains miscellaneous utility functions used by the encoder
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+*  - ih264e_get_min_level()
+*  - ih264e_get_lvl_idx()
+*  - ih264e_get_dpb_size()
+*  - ih264e_get_total_pic_buf_size()
+*  - ih264e_get_pic_mv_bank_size()
+*  - ih264e_pic_buf_mgr_add_bufs()
+*  - ih264e_mv_buf_mgr_add_bufs()
+*  - ih264e_init_quant_params()
+*  - ih264e_init_air_map()
+*  - ih264e_codec_init()
+*  - ih264e_pic_init()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* system include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* user include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+#include "ih264_common_tables.h"
+#include "ih264_debug.h"
+#include "ih264_trans_data.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "ih264e_config.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264_list.h"
+#include "ih264e_encode_header.h"
+#include "ih264e_me.h"
+#include "ime_defs.h"
+#include "ime.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_core_coding.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_debug.h"
+#include "ih264e_process.h"
+#include "ih264e_master.h"
+#include "irc_rate_control_api.h"
+#include "ime_statistics.h"
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Used to get minimum level index for a given picture size
+*
+* @par Description:
+*  Gets the minimum level index and then gets corresponding level.
+*  Also used to ignore invalid levels like 2.3, 3.3 etc
+*
+* @param[in] level
+*  Level of the stream
+*
+* @returns  Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_min_level(WORD32 pic_size)
+{
+    WORD32 lvl_idx = MAX_LEVEL, i;
+
+    for (i = 0; i < MAX_LEVEL; i++)
+    {
+        if (pic_size <= gai4_ih264_max_luma_pic_size[i])
+        {
+            lvl_idx = i;
+            break;
+        }
+    }
+
+    return gai4_ih264_levels[lvl_idx];
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Used to get level index for a given level
+*
+* @par Description:
+*  Converts from level_idc (which is multiplied by 30) to an index that can be
+*  used as a lookup. Also used to ignore invalid levels like 2.2 , 3.2 etc
+*
+* @param[in] level
+*  Level of the stream
+*
+* @returns  Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_lvl_idx(WORD32 level)
+{
+    WORD32 lvl_idx = 0;
+
+    if (level < IH264_LEVEL_11)
+    {
+        lvl_idx = 0;
+    }
+    else if (level < IH264_LEVEL_12)
+    {
+        lvl_idx = 1;
+    }
+    else if (level < IH264_LEVEL_13)
+    {
+        lvl_idx = 2;
+    }
+    else if (level < IH264_LEVEL_20)
+    {
+        lvl_idx = 3;
+    }
+    else if (level < IH264_LEVEL_21)
+    {
+        lvl_idx = 4;
+    }
+    else if (level < IH264_LEVEL_22)
+    {
+        lvl_idx = 5;
+    }
+    else if (level < IH264_LEVEL_30)
+    {
+        lvl_idx = 6;
+    }
+    else if (level < IH264_LEVEL_31)
+    {
+        lvl_idx = 7;
+    }
+    else if (level < IH264_LEVEL_32)
+    {
+        lvl_idx = 8;
+    }
+    else if (level < IH264_LEVEL_40)
+    {
+        lvl_idx = 9;
+    }
+    else if (level < IH264_LEVEL_41)
+    {
+        lvl_idx = 10;
+    }
+    else if (level < IH264_LEVEL_42)
+    {
+        lvl_idx = 11;
+    }
+    else if (level < IH264_LEVEL_50)
+    {
+        lvl_idx = 12;
+    }
+
+    return (lvl_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief returns maximum number of pictures allowed in dpb for a given level
+*
+* @par Description:
+*  For given width, height and level, number of pictures allowed in decoder
+*  picture buffer is computed as per Annex A.3.1
+*
+* @param[in] level
+*  level of the bit-stream
+*
+* @param[in] pic_size
+*  width * height
+*
+* @returns  Number of buffers in DPB
+*
+* @remarks
+*  From annexure A.3.1 of H264 specification,
+*  max_dec_frame_buffering <= MaxDpbSize, where MaxDpbSize is equal to
+*  Min( 1024 * MaxDPB / ( PicWidthInMbs * FrameHeightInMbs * 384 ), 16 ) and
+*  MaxDPB is given in Table A-1 in units of 1024 bytes. However the MaxDPB size
+*  presented in the look up table gas_ih264_lvl_tbl is in units of 512
+*  bytes. Hence the expression is modified accordingly.
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_dpb_size(WORD32 level, WORD32 pic_size)
+{
+    /* dpb size */
+    WORD32 max_dpb_size_bytes = 0;
+
+    /* dec frame buffering */
+    WORD32 max_dpb_size_frames = 0;
+
+    /* temp var */
+    WORD32 i;
+
+    /* determine max luma samples */
+    for (i = 0; i < 16; i++)
+        if (level == (WORD32)gas_ih264_lvl_tbl[i].u4_level_idc)
+            max_dpb_size_bytes = gas_ih264_lvl_tbl[i].u4_max_dpb_size;
+
+    /* from Annexure A.3.1 h264 specification */
+    max_dpb_size_frames =
+                    MIN( 1024 * max_dpb_size_bytes / ( pic_size * 3 ), MAX_DPB_SIZE );
+
+    return max_dpb_size_frames;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Used to get reference picture buffer size for a given level and
+*  and padding used
+*
+* @par Description:
+*  Used to get reference picture buffer size for a given level and padding used
+*  Each picture is padded on all four sides
+*
+* @param[in] pic_size
+*  Number of luma samples (Width * Height)
+*
+* @param[in] level
+*  Level
+*
+* @param[in] horz_pad
+*  Total padding used in horizontal direction
+*
+* @param[in] vert_pad
+*  Total padding used in vertical direction
+*
+* @returns  Total picture buffer size
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size,
+                                     WORD32 level,
+                                     WORD32 horz_pad,
+                                     WORD32 vert_pad,
+                                     WORD32 num_ref_frames,
+                                     WORD32 num_reorder_frames)
+{
+    WORD32 size;
+    WORD32 num_luma_samples;
+    WORD32 lvl_idx;
+    WORD32 max_wd, min_ht;
+    WORD32 num_samples;
+    WORD32 max_num_bufs;
+    WORD32 pad = MAX(horz_pad, vert_pad);
+    UNUSED(pic_size);
+    /*
+     * If num_ref_frames and num_reorder_frmaes is specified
+     * Use minimum value
+     */
+    max_num_bufs = (num_ref_frames + num_reorder_frames + MAX_CTXT_SETS);
+
+    /* Get level index */
+    lvl_idx = ih264e_get_lvl_idx(level);
+
+    /* Maximum number of luma samples in a picture at given level */
+    num_luma_samples = gai4_ih264_max_luma_pic_size[lvl_idx];
+
+    /* Account for chroma */
+    num_samples = num_luma_samples * 3 / 2;
+
+    /* Maximum width of luma samples in a picture at given level */
+    max_wd = gai4_ih264_max_wd_ht[lvl_idx];
+
+    /* Minimum height of luma samples in a picture at given level */
+    min_ht = gai4_ih264_min_wd_ht[lvl_idx];
+
+    /* Allocation is required for
+     * (Wd + horz_pad) * (Ht + vert_pad) * (2 * max_dpb_size + 1)
+     *
+     * Above expanded as
+     * ((Wd * Ht) + (horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1)
+     * (Wd * Ht) * (2 * max_dpb_size + 1) + ((horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1)
+     * Now  max_dpb_size increases with smaller Wd and Ht, but Wd * ht * max_dpb_size will still be lesser or equal to max_wd * max_ht * dpb_size
+     *
+     * In the above equation (Wd * Ht) * (2 * max_dpb_size + 1) is accounted by using num_samples * (2 * max_dpb_size + 1) below
+     *
+     * For the padded area use MAX(horz_pad, vert_pad) as pad
+     * ((pad * pad) + pad * (Wd + Ht)) * (2 * max_dpb_size + 1) has to accounted from the above for padding
+     *
+     * Since Width and Height can change worst Wd + Ht is when One of the dimensions is max and other is min
+     * So use max_wd and min_ht
+     */
+
+    /* Number of bytes in reference pictures */
+    size = num_samples * max_num_bufs;
+
+    /* Account for padding area */
+    size += ((pad * pad) + pad * (max_wd + min_ht)) * max_num_bufs;
+
+    return size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Returns MV bank buffer size for a given number of luma samples
+*
+* @par Description:
+*  For given number of luma samples  one MV bank size is computed.
+*  Each MV bank includes pu_map and enc_pu_t for all the min PUs(4x4) in a picture
+*
+* @param[in] num_luma_samples
+*  Max number of luma pixels in the frame
+*
+* @returns  Total MV Bank size
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples)
+{
+    /* mv bank buffer size */
+    WORD32 mv_bank_size = 0;
+
+    /* number of sub mb partitions possible */
+    WORD32 num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+
+    /* number of mbs */
+    WORD32 num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
+
+    /* Size for storing enc_pu_t start index each MB */
+    /* One extra entry is needed to compute number of PUs in the last MB */
+    mv_bank_size += num_mb * sizeof(WORD32);
+
+    /* Size for pu_map */
+    mv_bank_size += num_pu;
+
+    /* Size for storing enc_pu_t for each PU */
+    mv_bank_size += num_pu * sizeof(enc_pu_t);
+
+    return mv_bank_size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to initialize ps_pic_buf structs add pic buffers to
+*  buffer manager in case of non-shared mode
+*
+* @par Description:
+*  Function to initialize ps_pic_buf structs add pic buffers to
+*  buffer manager in case of non-shared mode
+*  To be called once per stream or for every reset
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @returns  error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_buf_mgr_add_bufs(codec_t *ps_codec)
+{
+    /* error status */
+    IH264E_ERROR_T ret = IH264E_SUCCESS;
+
+    /* max ref buffer cnt */
+    WORD32 max_num_bufs = ps_codec->i4_ref_buf_cnt;
+
+    /* total size for pic buffers */
+    WORD32 pic_buf_size_allocated = ps_codec->i4_total_pic_buf_size
+                    - BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
+
+    /* temp var */
+    UWORD8 *pu1_buf = (UWORD8 *) ps_codec->ps_pic_buf;
+    pic_buf_t *ps_pic_buf = (pic_buf_t *) ps_codec->ps_pic_buf;
+    WORD32 i;
+
+    pu1_buf += BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
+
+    /* In case of non-shared mode, add picture buffers to buffer manager
+     * In case of shared mode, buffers are added in the run-time
+     */
+    {
+        WORD32 buf_ret;
+
+        WORD32 luma_samples = (ps_codec->i4_rec_strd)
+                        * (ps_codec->s_cfg.u4_ht + PAD_HT);
+
+        WORD32 chroma_samples = luma_samples >> 1;
+
+        /* Try and add as many buffers as possible for the memory that is allocated */
+        /* If the number of buffers that can be added is less than max_num_bufs
+         * return with an error */
+        for (i = 0; i < max_num_bufs; i++)
+        {
+            pic_buf_size_allocated -= (luma_samples + chroma_samples);
+
+            if (pic_buf_size_allocated < 0)
+            {
+                ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_PICBUF;
+                return IH264E_INSUFFICIENT_MEM_PICBUF;
+            }
+
+            ps_pic_buf->pu1_luma = pu1_buf + ps_codec->i4_rec_strd * PAD_TOP
+                            + PAD_LEFT;
+            pu1_buf += luma_samples;
+
+            ps_pic_buf->pu1_chroma = pu1_buf
+                            + ps_codec->i4_rec_strd * (PAD_TOP / 2)+ PAD_LEFT;
+            pu1_buf += chroma_samples;
+
+            buf_ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_ref_buf_mgr,
+                                        ps_pic_buf, i);
+
+            if (0 != buf_ret)
+            {
+                ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR;
+                return IH264E_BUF_MGR_ERROR;
+            }
+            pu1_buf += (HPEL_PLANES_CNT - 1) * (chroma_samples + luma_samples);
+            ps_pic_buf++;
+        }
+    }
+
+    return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to add buffers to MV Bank buffer manager
+*
+* @par Description:
+*  Function to add buffers to MV Bank buffer manager.  To be called once per
+*  stream or for every reset
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @returns  error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
+{
+    /* error status */
+    IH264E_ERROR_T error_status = IH264E_SUCCESS;
+    IH264_ERROR_T ret;
+
+    /* max dpb size in frames */
+    WORD32 max_dpb_size = 0;
+
+    /* mv bank size for the entire dpb */
+    WORD32 mv_bank_size_allocated = 0;
+
+    /* mv bank size per pic */
+    WORD32 pic_mv_bank_size = 0;
+
+    /* mv buffer ptr */
+    mv_buf_t *ps_mv_buf = NULL;
+
+    /* num of luma samples */
+    WORD32 num_luma_samples = ALIGN16(ps_codec->s_cfg.u4_wd)
+                    * ALIGN16(ps_codec->s_cfg.u4_ht);
+
+    /* number of mb's & frame partitions */
+    WORD32 num_pu, num_mb;
+
+    /* temp var */
+    UWORD8 *pu1_buf = NULL;
+    WORD32 i;
+
+    /* Compute the number of MB Bank buffers needed */
+    max_dpb_size = ps_codec->i4_ref_buf_cnt;
+
+    /* allocate memory for mv buffer array */
+    ps_codec->ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
+    pu1_buf = ps_codec->pv_mv_bank_buf_base;
+    pu1_buf += BUF_MGR_MAX_CNT * sizeof(mv_buf_t);
+
+    /********************************************************************/
+    /* allocate memory for individual elements of mv buffer ptr         */
+    /********************************************************************/
+    mv_bank_size_allocated = ps_codec->i4_total_mv_bank_size
+                    - (BUF_MGR_MAX_CNT * sizeof(mv_buf_t));
+
+    /* compute MV bank size per picture */
+    pic_mv_bank_size = ih264e_get_pic_mv_bank_size(num_luma_samples);
+
+    num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+    num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
+    i = 0;
+    ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
+
+    while (i < max_dpb_size)
+    {
+        mv_bank_size_allocated -= pic_mv_bank_size;
+
+        if (mv_bank_size_allocated < 0)
+        {
+            ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_MVBANK;
+
+            error_status = IH264E_INSUFFICIENT_MEM_MVBANK;
+
+            return error_status;
+        }
+
+        ps_mv_buf->pu4_mb_pu_cnt = (UWORD32 *) pu1_buf;
+
+        ps_mv_buf->pu1_pic_pu_map = (pu1_buf + num_mb * sizeof(WORD32));
+
+        ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf + num_mb * sizeof(WORD32)
+                        + num_pu);
+
+        ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
+                                ps_mv_buf, i);
+
+        if (IH264_SUCCESS != ret)
+        {
+            ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR;
+            error_status = IH264E_BUF_MGR_ERROR;
+            return error_status;
+        }
+
+        pu1_buf += pic_mv_bank_size;
+        ps_mv_buf++;
+        i++;
+    }
+
+    return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to initialize quant params structure
+*
+* @par Description:
+*  The forward quantization modules depends on qp/6, qp mod 6, forward scale
+*  matrix, forward threshold matrix, weight list. The inverse quantization
+*  modules depends on qp/6, qp mod 6, inverse scale matrix, weight list.
+*  These params are initialized in this function.
+*
+* @param[in] ps_proc
+*  pointer to process context
+*
+* @param[in] qp
+*  quantization parameter
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_quant_params(process_ctxt_t *ps_proc, int qp)
+{
+    /* quant params */
+    quant_params_t *ps_qp_params;
+
+    /* ptr to forward quant threshold matrix */
+    const UWORD16 *pu2_thres_mat = NULL;
+
+    /* ptr to forward scale matrix */
+    const UWORD16 *pu2_scale_mat = gu2_quant_scale_matrix_4x4;
+
+    /* ptr to inverse scale matrix */
+    const UWORD16 *pu2_iscale_mat = gau2_ih264_iquant_scale_matrix_4x4;
+
+    /* temp var */
+    UWORD32 u4_qp[3], u4_qp_div6, u4_qp_mod6;
+    COMPONENT_TYPE plane;
+    WORD32 i;
+    UWORD32 u4_satdq_t;
+    const UWORD16 *pu2_smat;
+
+    /********************************************************************/
+    /* init quant params for all planes Y, U and V                      */
+    /********************************************************************/
+    /* luma qp */
+    u4_qp[Y] = qp;
+
+    /* chroma qp
+     * TODO_LATER : just in case if the chroma planes use different qp's this
+     * needs to be corrected accordingly.
+     */
+    u4_qp[U] = gu1_qpc_fqpi[qp];
+    u4_qp[V] = gu1_qpc_fqpi[qp];
+
+    plane = Y;
+    while (plane <= V)
+    {
+        u4_qp_div6 = (u4_qp[plane] / 6);
+        u4_qp_mod6 = (u4_qp[plane] % 6);
+
+        ps_qp_params = ps_proc->ps_qp_params[plane];
+
+        /* mb qp */
+        ps_qp_params->u1_mb_qp = u4_qp[plane];
+
+        /* mb qp / 6 */
+        ps_qp_params->u1_qp_div = u4_qp_div6;
+
+        /* mb qp % 6 */
+        ps_qp_params->u1_qp_rem = u4_qp_mod6;
+
+        /* QP bits */
+        ps_qp_params->u1_qbits = QP_BITS_h264_4x4 + u4_qp_div6;
+
+        /* forward scale matrix */
+        ps_qp_params->pu2_scale_mat = pu2_scale_mat + (u4_qp_mod6 * 16);
+
+        /* threshold matrix & weight for quantization */
+        pu2_thres_mat = gu2_forward_quant_threshold_4x4 + (u4_qp_mod6 * 16);
+        for (i = 0; i < 16; i++)
+        {
+            ps_qp_params->pu2_thres_mat[i] = pu2_thres_mat[i]
+                            >> (8 - u4_qp_div6);
+            ps_qp_params->pu2_weigh_mat[i] = 16;
+        }
+
+        /* qp dependent rounding constant */
+        ps_qp_params->u4_dead_zone =
+                        gu4_forward_quant_round_factor_4x4[u4_qp_div6];
+
+        /* slice dependent rounding constant */
+        if (ps_proc->i4_slice_type != ISLICE
+                        && ps_proc->i4_slice_type != SISLICE)
+        {
+            ps_qp_params->u4_dead_zone >>= 1;
+        }
+
+        /* SATQD threshold for zero block prediction */
+        if (ps_proc->ps_codec->s_cfg.u4_enable_satqd)
+        {
+            pu2_smat = ps_qp_params->pu2_scale_mat;
+
+            u4_satdq_t = ((1 << (ps_qp_params->u1_qbits)) - ps_qp_params->u4_dead_zone);
+
+            ps_qp_params->pu2_sad_thrsh[0] = u4_satdq_t / MAX(pu2_smat[3], pu2_smat[11]);
+            ps_qp_params->pu2_sad_thrsh[1] = u4_satdq_t / MAX(pu2_smat[1], pu2_smat[9]);
+            ps_qp_params->pu2_sad_thrsh[2] = u4_satdq_t / pu2_smat[15];
+            ps_qp_params->pu2_sad_thrsh[3] = u4_satdq_t / pu2_smat[7];
+            ps_qp_params->pu2_sad_thrsh[4] = u4_satdq_t / MAX(pu2_smat[12], pu2_smat[14]);
+            ps_qp_params->pu2_sad_thrsh[5] = u4_satdq_t / MAX(pu2_smat[4], pu2_smat[6]);
+            ps_qp_params->pu2_sad_thrsh[6] = u4_satdq_t / pu2_smat[13];
+            ps_qp_params->pu2_sad_thrsh[7] = u4_satdq_t / pu2_smat[5];
+            ps_qp_params->pu2_sad_thrsh[8] = u4_satdq_t / MAX(MAX3(pu2_smat[0], pu2_smat[2], pu2_smat[8]), pu2_smat[10]);
+        }
+
+        /* inverse scale matrix */
+        ps_qp_params->pu2_iscale_mat = pu2_iscale_mat + (u4_qp_mod6 * 16);
+
+        plane += 1;
+    }
+    return ;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Initialize AIR mb frame Map
+*
+* @par Description:
+*  Initialize AIR mb frame map
+*  MB frame map indicates which frame an Mb should be coded as intra according to AIR
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @returns  error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_air_map(codec_t *ps_codec)
+{
+    /* intra refresh map */
+    UWORD16 *pu2_intr_rfrsh_map = ps_codec->pu2_intr_rfrsh_map;
+
+    /* air mode */
+    IVE_AIR_MODE_T air_mode = ps_codec->s_cfg.e_air_mode;
+
+    /* refresh period */
+    UWORD32 air_period = ps_codec->s_cfg.u4_air_refresh_period;
+
+    /* mb cnt */
+    UWORD32 u4_mb_cnt = ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs;
+
+    /* temp var */
+    UWORD32 curr_mb, seed_rand = 1;
+
+    switch (air_mode)
+    {
+        case IVE_AIR_MODE_CYCLIC:
+
+            for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++)
+            {
+                pu2_intr_rfrsh_map[curr_mb] = curr_mb % air_period;
+            }
+            break;
+
+        case IVE_AIR_MODE_RANDOM:
+
+            for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++)
+            {
+                seed_rand = (seed_rand * 32719 + 3) % 32749;
+                pu2_intr_rfrsh_map[curr_mb] = seed_rand % air_period;
+            }
+            break;
+
+        default:
+
+            break;
+    }
+
+    return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Codec level initializations
+*
+* @par Description:
+*  Initializes the codec with parameters that needs to be set before encoding
+*  first frame
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @param[in] ps_inp_buf
+*  Pointer to input buffer context
+*
+* @returns  error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec)
+{
+    /********************************************************************
+     *                     INITIALIZE CODEC CONTEXT                     *
+     ********************************************************************/
+    /* encoder presets */
+    if (ps_codec->s_cfg.u4_enc_speed_preset != IVE_CONFIG)
+    {
+        if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+        {/* high quality */
+            /* enable diamond search */
+            ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+            ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+            /* disable intra 4x4 */
+            ps_codec->s_cfg.u4_enable_intra_4x4 = 1;
+            ps_codec->luma_energy_compaction[1] =
+                            ih264e_code_luma_intra_macroblock_4x4_rdopt_on;
+
+            /* sub pel off */
+            ps_codec->s_cfg.u4_enable_hpel = 1;
+
+            /* deblocking off */
+            ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+            /* disabled intra inter gating in Inter slices */
+            ps_codec->u4_inter_gate = 0;
+        }
+        else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_NORMAL)
+        {/* normal */
+            /* enable diamond search */
+            ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+            ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+            /* disable intra 4x4 */
+            ps_codec->s_cfg.u4_enable_intra_4x4 = 1;
+
+            /* sub pel off */
+            ps_codec->s_cfg.u4_enable_hpel = 1;
+
+            /* deblocking off */
+            ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+            /* disabled intra inter gating in Inter slices */
+            ps_codec->u4_inter_gate = 0;
+        }
+        else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
+         {/* normal */
+             /* enable diamond search */
+             ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+             ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+             /* disable intra 4x4 */
+             ps_codec->s_cfg.u4_enable_intra_4x4 = 0;
+
+             /* sub pel off */
+             ps_codec->s_cfg.u4_enable_hpel = 1;
+
+             /* deblocking off */
+             ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+             /* disabled intra inter gating in Inter slices */
+             ps_codec->u4_inter_gate = 1;
+         }
+        else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_HIGH_SPEED)
+        {/* fast */
+            /* enable diamond search */
+            ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+            ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+            /* disable intra 4x4 */
+            ps_codec->s_cfg.u4_enable_intra_4x4 = 0;
+
+            /* sub pel off */
+            ps_codec->s_cfg.u4_enable_hpel = 0;
+
+            /* deblocking off */
+            ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+            /* disabled intra inter gating in Inter slices */
+            ps_codec->u4_inter_gate = 0;
+        }
+        else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
+        {/* fastest */
+            /* enable diamond search */
+            ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+
+            /* disable intra 4x4 */
+            ps_codec->s_cfg.u4_enable_intra_4x4 = 0;
+
+            /* sub pel off */
+            ps_codec->s_cfg.u4_enable_hpel = 0;
+
+            /* deblocking off */
+            ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+            /* disabled intra inter gating in Inter slices */
+            ps_codec->u4_inter_gate = 1;
+        }
+    }
+
+    /*****************************************************************
+     * Initialize AIR inside codec
+     *****************************************************************/
+    if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode)
+    {
+        ih264e_init_air_map(ps_codec);
+
+        ps_codec->i4_air_pic_cnt = -1;
+    }
+
+    /****************************************************/
+    /*           INITIALIZE RATE CONTROL                */
+    /****************************************************/
+    {
+        /* init qp */
+        UWORD8 au1_init_qp[MAX_PIC_TYPE];
+
+        /* min max qp */
+        UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
+
+        /* init i,p,b qp */
+        au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
+        au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
+        au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
+
+        /* init min max qp */
+        au1_min_max_qp[2 * I_PIC] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
+        au1_min_max_qp[2 * I_PIC + 1] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
+
+        au1_min_max_qp[2 * P_PIC] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
+        au1_min_max_qp[2 * P_PIC + 1] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
+
+        au1_min_max_qp[2 * B_PIC] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
+        au1_min_max_qp[2 * B_PIC + 1] =
+                        gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
+
+        /* get rc mode */
+        switch (ps_codec->s_cfg.e_rc_mode)
+        {
+            case IVE_RC_STORAGE:
+                ps_codec->s_rate_control.e_rc_type = VBR_STORAGE;
+                break;
+            case IVE_RC_CBR_NON_LOW_DELAY:
+                ps_codec->s_rate_control.e_rc_type = CBR_NLDRC;
+                break;
+            case IVE_RC_CBR_LOW_DELAY:
+                ps_codec->s_rate_control.e_rc_type = CBR_LDRC;
+                break;
+            case IVE_RC_NONE:
+                ps_codec->s_rate_control.e_rc_type = CONST_QP;
+                break;
+            default:
+                break;
+        }
+
+        /* init rate control */
+        ih264e_rc_init(ps_codec->s_rate_control.pps_rate_control_api,
+                       ps_codec->s_rate_control.pps_frame_time,
+                       ps_codec->s_rate_control.pps_time_stamp,
+                       ps_codec->s_rate_control.pps_pd_frm_rate,
+                       ps_codec->s_cfg.u4_max_framerate,
+                       ps_codec->s_cfg.u4_src_frame_rate,
+                       ps_codec->s_cfg.u4_tgt_frame_rate,
+                       ps_codec->s_rate_control.e_rc_type,
+                       ps_codec->s_cfg.u4_target_bitrate,
+                       ps_codec->s_cfg.u4_max_bitrate,
+                       ps_codec->s_cfg.u4_vbv_buffer_delay,
+                       ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp,
+                       H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp,
+                       ps_codec->s_cfg.u4_max_level);
+    }
+
+    /* src stride */
+    ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd;
+
+    /* recon stride */
+    ps_codec->i4_rec_strd = ALIGN16(ps_codec->s_cfg.u4_max_wd) + PAD_WD;
+
+    /* max ref and reorder cnt */
+    ps_codec->i4_ref_buf_cnt = ps_codec->s_cfg.u4_max_ref_cnt
+                    + ps_codec->s_cfg.u4_max_reorder_cnt;
+    ps_codec->i4_ref_buf_cnt += MAX_CTXT_SETS;
+
+    DEBUG_HISTOGRAM_INIT();
+
+    return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Picture level initializations
+*
+* @par Description:
+*  Before beginning to encode the frame, the current function initializes all
+*  the ctxts (proc, entropy, me, ...) basing on the input configured params.
+*  It locates space for storing recon in the encoder picture buffer set, fetches
+*  reference frame from encoder picture buffer set. Calls RC pre-enc to get
+*  qp and pic type for the current frame. Queues proc jobs so that
+*  the other threads can begin encoding. In brief, this function sets up the
+*  tone for the entire encoder.
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @param[in] ps_inp_buf
+*  Pointer to input buffer context
+*
+* @returns  error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
+{
+    /* error status */
+    IH264E_ERROR_T error_status = IH264E_SUCCESS;
+    IH264_ERROR_T ret = IH264_SUCCESS;
+
+    /* mv buff bank */
+    mv_buf_t *ps_mv_buf = NULL;
+    WORD32 cur_mv_bank_buf_id;
+
+    /* recon buffer set */
+    pic_buf_t *ps_cur_pic;
+    WORD32 cur_pic_buf_id;
+    UWORD8 *pu1_cur_pic_luma, *pu1_cur_pic_chroma;
+
+    /* ref buffer set */
+    pic_buf_t *ps_ref_pic;
+    WORD32 ref_set_id;
+
+    /* pic time stamp */
+    UWORD32 u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
+    UWORD32 u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
+
+    /* indices to access curr/prev frame info */
+    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+    /* curr pic type */
+    PIC_TYPE_T *pic_type = &ps_codec->pic_type;
+
+    /* should src be skipped */
+    WORD32 *skip_src = &ps_codec->s_rate_control.pre_encode_skip[ctxt_sel];
+
+    /* Diamond search Iteration Max Cnt */
+    UWORD32 u4_num_layers =
+                    (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) ?
+                                    (NUM_LAYERS >> 2) : NUM_LAYERS;
+
+    /* enable fast sad */
+    UWORD32 u4_enable_fast_sad = ps_codec->s_cfg.u4_enable_fast_sad;
+
+    /********************************************************************/
+    /*                     INITIALIZE CODEC CONTEXT                     */
+    /********************************************************************/
+
+    /* pre enc rc call */
+    *skip_src = ih264e_set_rc_pic_params(ps_codec,
+                                         ps_codec->i4_encode_api_call_cnt,
+                                         (WORD32 *) pic_type);
+    if (*skip_src == 1)
+    {
+        ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_inp_buf =
+                        *ps_inp_buf;
+
+        /* inform output bytes generated as zero */
+        ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 0;
+
+        return error_status;
+    }
+
+    /********************************************************************/
+    /*                     Alternate reference frame                    */
+    /********************************************************************/
+    if (ps_codec->s_cfg.u4_enable_alt_ref)
+    {
+        if (PIC_IDR == *pic_type || PIC_I == *pic_type)
+        {
+            ps_codec->u4_is_curr_frm_ref = 1;
+        }
+        else
+        {
+            ps_codec->u4_is_curr_frm_ref = 1;
+                if(ps_codec->i4_encode_api_call_cnt % (ps_codec->s_cfg.u4_enable_alt_ref + 1))
+                    ps_codec->u4_is_curr_frm_ref = 0;
+            }
+
+        if ((ps_codec->u4_is_curr_frm_ref == 1) || (ps_codec->i4_frame_num < 0))
+        {
+            ps_codec->i4_frame_num++;
+        }
+    }
+    else
+    {
+        ps_codec->u4_is_curr_frm_ref = 1;
+
+        ps_codec->i4_frame_num++;
+    }
+
+    /* slice_type */
+    ps_codec->i4_slice_type = PSLICE;
+
+    if ((PIC_I == *pic_type) || (PIC_IDR == *pic_type))
+    {
+        ps_codec->i4_slice_type = ISLICE;
+    }
+    else if (PIC_P == *pic_type)
+    {
+        ps_codec->i4_slice_type = PSLICE;
+    }
+
+    /* is this an IDR pic */
+    ps_codec->u4_is_idr = 0;
+
+    if (PIC_IDR == *pic_type)
+    {
+        /* set idr flag */
+        ps_codec->u4_is_idr = 1;
+
+        /* reset frame num */
+        ps_codec->i4_frame_num = 0;
+
+        /* idr_pic_id */
+        ps_codec->i4_idr_pic_id++;
+    }
+
+    /* set deblock disable flags based on disable deblock level */
+    ps_codec->i4_disable_deblk_pic = 1;
+
+    if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_0)
+    {
+        /* enable deblocking */
+        ps_codec->i4_disable_deblk_pic = 0;
+    }
+    else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_2)
+    {
+        /* enable deblocking after a period of frames */
+        if (ps_codec->i4_disable_deblk_pic_cnt == DISABLE_DEBLOCK_INTERVAL
+                        || ps_codec->i4_slice_type == ISLICE)
+        {
+            ps_codec->i4_disable_deblk_pic = 0;
+        }
+    }
+    else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_3)
+    {
+        if (ps_codec->i4_slice_type == ISLICE)
+        {
+            ps_codec->i4_disable_deblk_pic = 0;
+        }
+    }
+
+    if (ps_codec->i4_disable_deblk_pic)
+    {
+        ps_codec->i4_disable_deblk_pic_cnt++;
+    }
+    else
+    {
+        ps_codec->i4_disable_deblk_pic_cnt = 0;
+    }
+
+    /* In slice mode - lets not deblk mb edges that lie along slice boundaries */
+    if (ps_codec->i4_disable_deblk_pic == 0)
+    {
+        if (ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE)
+        {
+            ps_codec->i4_disable_deblk_pic = 2;
+        }
+    }
+
+    /* error status */
+    ps_codec->i4_error_code = IH264E_SUCCESS;
+
+    /* populate header */
+    if (ps_codec->i4_gen_header)
+    {
+        /* sps */
+        sps_t *ps_sps = NULL;
+
+        /* pps */
+        pps_t *ps_pps = NULL;
+
+        /*ps_codec->i4_pps_id ++;*/
+        ps_codec->i4_pps_id %= MAX_PPS_CNT;
+
+        /*ps_codec->i4_sps_id ++;*/
+        ps_codec->i4_sps_id %= MAX_SPS_CNT;
+
+        /* populate sps header */
+        ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
+        ih264e_populate_sps(ps_codec, ps_sps);
+
+        /* populate pps header */
+        ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
+        ih264e_populate_pps(ps_codec, ps_pps);
+    }
+
+    /* Reference and MV bank Buffer Manager */
+    {
+        /* min pic cnt among the list of pics stored in ref list */
+        WORD32 min_pic_cnt;
+
+        /* max pic cnt among the list of pics stored in ref list */
+        WORD32 max_pic_cnt;
+
+        /* temp var */
+        WORD32 i;
+
+        ps_ref_pic = NULL;
+
+        /* get reference picture when necessary */
+        /* Only nearest picture encoded (max pic cnt) is used as reference */
+        if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I))
+        {
+            max_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt;
+
+            ps_ref_pic = ps_codec->as_ref_set[0].ps_pic_buf;
+
+            /* loop through to get the max pic cnt among the list of pics stored in ref list */
+            for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++)
+            {
+                if (max_pic_cnt < ps_codec->as_ref_set[i].i4_pic_cnt)
+                {
+                    max_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt;
+                    ps_ref_pic = ps_codec->as_ref_set[i].ps_pic_buf;
+                }
+            }
+        }
+
+        /* get a location at which the curr pic info can be stored for future reference */
+        ref_set_id = -1;
+
+        for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+        {
+            if (-1 == ps_codec->as_ref_set[i].i4_pic_cnt)
+            {
+                ref_set_id = i;
+                break;
+            }
+        }
+
+        /* If all the entries in the ref_set array are filled, then remove the entry with least pic_cnt */
+        if (ref_set_id == -1)
+        {
+            /* pic info */
+            pic_buf_t *ps_cur_pic;
+
+            /* mv info */
+            mv_buf_t *ps_cur_mv_buf;
+
+            ref_set_id = 0;
+            min_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt;
+
+            /* loop through to get the min pic cnt among the list of pics stored in ref list */
+            for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++)
+            {
+                if (min_pic_cnt > ps_codec->as_ref_set[i].i4_pic_cnt)
+                {
+                    min_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt;
+                    ref_set_id = i;
+                }
+            }
+
+            ps_cur_pic = ps_codec->as_ref_set[ref_set_id].ps_pic_buf;
+
+            ps_cur_mv_buf = ps_codec->as_ref_set[ref_set_id].ps_mv_buf;
+
+            /* release this frame from reference list */
+            ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
+                                  ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF);
+
+            ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
+                                  ps_cur_pic->i4_buf_id, BUF_MGR_REF);
+        }
+
+        if (ps_codec->s_cfg.u4_enable_recon)
+        {
+            ret = ih264_buf_mgr_check_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr);
+
+            if (ret != IH264_SUCCESS)
+            {
+                return IH264E_NO_FREE_RECONBUF;
+            }
+        }
+    }
+
+    {
+        /*****************************************************************/
+        /* Get free MV Bank to hold current picture's motion vector data */
+        /* If there are no free buffers then return with an error code.  */
+        /* If the buffer is to be freed by another thread, change the    */
+        /* following to call thread yield and wait for buffer to be freed*/
+        /*****************************************************************/
+        ps_mv_buf = (mv_buf_t *) ih264_buf_mgr_get_next_free(
+                        (buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
+                        &cur_mv_bank_buf_id);
+
+        if (NULL == ps_mv_buf)
+        {
+            ps_codec->i4_error_code = IH264E_NO_FREE_MVBANK;
+            return IH264E_NO_FREE_MVBANK;
+        }
+
+        /* mark the buffer as needed for reference if the curr pic is available for ref */
+        if (ps_codec->u4_is_curr_frm_ref)
+        {
+            ih264_buf_mgr_set_status(ps_codec->pv_mv_buf_mgr,
+                                     cur_mv_bank_buf_id, BUF_MGR_REF);
+        }
+
+        /* Set current ABS poc to ps_mv_buf, so that while freeing a reference buffer
+         * corresponding mv buffer can be found by looping through ps_codec->ps_mv_buf array
+         * and getting a buffer id to free
+         */
+        ps_mv_buf->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt;
+
+        ps_mv_buf->i4_buf_id = cur_mv_bank_buf_id;
+    }
+
+    {
+        /*****************************************************************/
+        /* Get free pic buf to hold current picture's recon data         */
+        /* If there are no free buffers then return with an error code.  */
+        /* If the buffer is to be freed by another thread, change the    */
+        /* following to call thread yield and wait for buffer to be freed*/
+        /*****************************************************************/
+        ps_cur_pic = (pic_buf_t *) ih264_buf_mgr_get_next_free(
+                        (buf_mgr_t *) ps_codec->pv_ref_buf_mgr,
+                        &cur_pic_buf_id);
+
+        if (NULL == ps_cur_pic)
+        {
+            ps_codec->i4_error_code = IH264E_NO_FREE_PICBUF;
+            return IH264E_NO_FREE_PICBUF;
+        }
+
+        /* mark the buffer as needed for reference if the curr pic is available for ref */
+        if (1 == ps_codec->u4_is_curr_frm_ref)
+        {
+            ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id,
+                                     BUF_MGR_REF);
+        }
+
+        /* Mark the current buffer as needed for IO if recon is enabled */
+        if (1 == ps_codec->s_cfg.u4_enable_recon)
+        {
+            ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id,
+                                     BUF_MGR_IO);
+        }
+
+        /* Associate input timestamp with current buffer */
+        ps_cur_pic->u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
+        ps_cur_pic->u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
+
+        ps_cur_pic->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt;
+        ps_cur_pic->i4_poc_lsb = ps_codec->i4_pic_order_cnt_lsb;
+
+        ps_cur_pic->i4_buf_id = cur_pic_buf_id;
+
+        pu1_cur_pic_luma = ps_cur_pic->pu1_luma;
+        pu1_cur_pic_chroma = ps_cur_pic->pu1_chroma;
+    }
+
+    /* in case the current picture is used for reference then add it to the reference set */
+    if (ps_codec->u4_is_curr_frm_ref
+                    && ((*pic_type == PIC_IDR) || (*pic_type == PIC_I)
+                                    || (*pic_type == PIC_P)))
+    {
+        ps_codec->as_ref_set[ref_set_id].i4_pic_cnt = ps_codec->i4_pic_cnt;
+
+        /* TODO: Currently pic_cnt and poc are same - Once frame drops are introduced change appropriately */
+        ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_pic_cnt;
+
+        ps_codec->as_ref_set[ref_set_id].ps_mv_buf = ps_mv_buf;
+
+        ps_codec->as_ref_set[ref_set_id].ps_pic_buf = ps_cur_pic;
+    }
+
+    /********************************************************************/
+    /*                     INITIALIZE PROCESS CONTEXT                   */
+    /********************************************************************/
+    {
+        /* temp var */
+        WORD32 i, j = 0;
+
+        /* curr proc ctxt */
+        process_ctxt_t *ps_proc = NULL;
+
+        j = ctxt_sel * MAX_PROCESS_THREADS;
+
+        /* begin init */
+        for (i = j; i < (j + MAX_PROCESS_THREADS); i++)
+        {
+            ps_proc = &ps_codec->as_process[i];
+
+            /* luma src buffer */
+            if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
+            {
+                ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
+            }
+            else
+            {
+                ps_proc->pu1_src_buf_luma_base =
+                                ps_inp_buf->s_raw_buf.apv_bufs[0];
+            }
+
+            /* chroma src buffer */
+            if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE
+                            || ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P)
+            {
+                ps_proc->pu1_src_buf_chroma_base =
+                                ps_codec->pu1_uv_csc_buf_base;
+            }
+            else
+            {
+                ps_proc->pu1_src_buf_chroma_base =
+                                ps_inp_buf->s_raw_buf.apv_bufs[1];
+            }
+
+            /* luma rec buffer */
+            ps_proc->pu1_rec_buf_luma_base = pu1_cur_pic_luma;
+
+            /* chroma rec buffer */
+            ps_proc->pu1_rec_buf_chroma_base = pu1_cur_pic_chroma;
+
+            /* src stride */
+            ps_proc->i4_src_strd = ps_codec->i4_src_strd;
+
+            /* rec stride */
+            ps_proc->i4_rec_strd = ps_codec->i4_rec_strd;
+
+            /* frame num */
+            ps_proc->i4_frame_num = ps_codec->i4_frame_num;
+
+            /* is idr */
+            ps_proc->u4_is_idr = ps_codec->u4_is_idr;
+
+            /* idr pic id */
+            ps_proc->u4_idr_pic_id = ps_codec->i4_idr_pic_id;
+
+            /* slice_type */
+            ps_proc->i4_slice_type = ps_codec->i4_slice_type;
+
+            /* Input width in mbs */
+            ps_proc->i4_wd_mbs = ps_codec->s_cfg.i4_wd_mbs;
+
+            /* Input height in mbs */
+            ps_proc->i4_ht_mbs = ps_codec->s_cfg.i4_ht_mbs;
+
+            /* Half x plane offset from pic buf */
+            ps_proc->u4_half_x_offset = 0;
+
+            /* Half y plane offset from half x plane */
+            ps_proc->u4_half_y_offset = 0;
+
+            /* Half x plane offset from half y plane */
+            ps_proc->u4_half_xy_offset = 0;
+
+            /* top row syntax elements */
+            ps_proc->ps_top_row_mb_syntax_ele =
+                            ps_proc->ps_top_row_mb_syntax_ele_base;
+
+            ps_proc->pu1_top_mb_intra_modes =
+                            ps_proc->pu1_top_mb_intra_modes_base;
+
+            ps_proc->ps_top_row_pu = ps_proc->ps_top_row_pu_base;
+
+            /* initialize quant params */
+            ps_proc->u4_frame_qp = ps_codec->u4_frame_qp;
+            ps_proc->u4_mb_qp = ps_codec->u4_frame_qp;
+            ih264e_init_quant_params(ps_proc, ps_proc->u4_frame_qp);
+
+            /* previous mb qp*/
+            ps_proc->u4_mb_qp_prev = ps_proc->u4_frame_qp;
+
+            /* Reset frame info */
+            memset(&ps_proc->s_frame_info, 0, sizeof(frame_info_t));
+
+            /* initialize proc, deblk and ME map */
+            if (i == j)
+            {
+                /* row '-1' */
+                memset(ps_proc->pu1_proc_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+                /* row 0 to ht in mbs */
+                memset(ps_proc->pu1_proc_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+                /* row '-1' */
+                memset(ps_proc->pu1_deblk_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+                /* row 0 to ht in mbs */
+                memset(ps_proc->pu1_deblk_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+                /* row '-1' */
+                memset(ps_proc->pu1_me_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+                /* row 0 to ht in mbs */
+                memset(ps_proc->pu1_me_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+                /* at the start of air refresh period, reset intra coded map */
+                if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode)
+                {
+                    ps_codec->i4_air_pic_cnt = (ps_codec->i4_air_pic_cnt + 1)
+                                    % ps_codec->s_cfg.u4_air_refresh_period;
+
+                    if (!ps_codec->i4_air_pic_cnt)
+                    {
+                        memset(ps_proc->pu1_is_intra_coded, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+                    }
+                }
+            }
+
+            /* deblock level */
+            ps_proc->u4_disable_deblock_level = ps_codec->i4_disable_deblk_pic;
+
+            /* slice index map */
+            /* no slice */
+            if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_NONE)
+            {
+                memset(ps_proc->pu1_slice_idx, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+            }
+            /* generate slices for every 'n' rows, 'n' is given through slice param */
+            else if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
+            {
+                /* slice idx map */
+                UWORD8 *pu1_slice_idx = ps_proc->pu1_slice_idx;
+
+                /* temp var */
+                WORD32 i4_mb_y = 0, slice_idx = 0, cnt;
+
+                while (i4_mb_y < ps_proc->i4_ht_mbs)
+                {
+                    if (i4_mb_y +(WORD32)ps_codec->s_cfg.u4_slice_param < ps_proc->i4_ht_mbs)
+                    {
+                        cnt = ps_codec->s_cfg.u4_slice_param * ps_proc->i4_wd_mbs;
+                        i4_mb_y += ps_codec->s_cfg.u4_slice_param;
+                    }
+                    else
+                    {
+                        cnt = (ps_proc->i4_ht_mbs - i4_mb_y) * ps_proc->i4_wd_mbs;
+                        i4_mb_y += (ps_proc->i4_ht_mbs - i4_mb_y);
+                    }
+                    memset(pu1_slice_idx, slice_idx, cnt);
+                    slice_idx++;
+                    pu1_slice_idx += cnt;
+                }
+            }
+
+            /* Current MV Bank's buffer ID */
+            ps_proc->i4_cur_mv_bank_buf_id = cur_mv_bank_buf_id;
+
+            /* Pointer to current picture buffer structure */
+            ps_proc->ps_cur_pic = ps_cur_pic;
+
+            /* Pointer to current pictures mv buffers */
+            ps_proc->ps_cur_mv_buf = ps_mv_buf;
+
+            /* pointer to ref picture */
+            ps_proc->ps_ref_pic = ps_ref_pic;
+
+            if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I))
+            {
+                /* ref pointer luma */
+                ps_proc->pu1_ref_buf_luma_base = ps_ref_pic->pu1_luma;
+
+                /* ref pointer chroma */
+                ps_proc->pu1_ref_buf_chroma_base = ps_ref_pic->pu1_chroma;
+            }
+
+            /* Structure for current input buffer */
+            ps_proc->s_inp_buf = *ps_inp_buf;
+
+            /* Number of encode frame API calls made */
+            ps_proc->i4_encode_api_call_cnt = ps_codec->i4_encode_api_call_cnt;
+
+            /* Current Picture count */
+            ps_proc->i4_pic_cnt = ps_codec->i4_pic_cnt;
+
+            /* error status */
+            ps_proc->i4_error_code = 0;
+
+            /********************************************************************/
+            /*                     INITIALIZE ENTROPY CONTEXT                   */
+            /********************************************************************/
+            {
+                entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+                /* start of frame */
+                ps_entropy->i4_sof = 0;
+
+                /* end of frame */
+                ps_entropy->i4_eof = 0;
+
+                /* generate header */
+                ps_entropy->i4_gen_header = ps_codec->i4_gen_header;
+
+                /* sps ref_set_id */
+                ps_entropy->u4_sps_id = ps_codec->i4_sps_id;
+
+                /* sps base */
+                ps_entropy->ps_sps_base = ps_codec->ps_sps_base;
+
+                /* sps id */
+                ps_entropy->u4_pps_id = ps_codec->i4_pps_id;
+
+                /* sps base */
+                ps_entropy->ps_pps_base = ps_codec->ps_pps_base;
+
+                /* slice map */
+                ps_entropy->pu1_slice_idx = ps_proc->pu1_slice_idx;
+
+                /* slice hdr base */
+                ps_entropy->ps_slice_hdr_base = ps_proc->ps_slice_hdr_base;
+
+                /* initialize entropy map */
+                if (i == j)
+                {
+                    /* row '-1' */
+                    memset(ps_entropy->pu1_entropy_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+                    /* row 0 to ht in mbs */
+                    memset(ps_entropy->pu1_entropy_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+                }
+
+                /* wd in mbs */
+                ps_entropy->i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+                /* ht in mbs */
+                ps_entropy->i4_ht_mbs = ps_proc->i4_ht_mbs;
+
+                /* transform_8x8_mode_flag */
+                ps_entropy->i1_transform_8x8_mode_flag = 0;
+
+                /* entropy_coding_mode_flag */
+                ps_entropy->u1_entropy_coding_mode_flag =
+                                ps_codec->s_cfg.u4_entropy_coding_mode;
+
+                /* error code */
+                ps_entropy->i4_error_code = IH264E_SUCCESS;
+
+                /* mb skip run */
+                *(ps_proc->s_entropy.pi4_mb_skip_run) = 0;
+
+                /* last frame to encode */
+                ps_proc->s_entropy.u4_is_last = ps_inp_buf->u4_is_last;
+
+                /* Current Picture count */
+                ps_proc->s_entropy.i4_pic_cnt = ps_codec->i4_pic_cnt;
+
+                /* time stamps */
+                ps_entropy->u4_timestamp_low = u4_timestamp_low;
+                ps_entropy->u4_timestamp_high = u4_timestamp_high;
+
+                /* init frame statistics */
+                ps_entropy->u4_header_bits[MB_TYPE_INTRA] = 0;
+                ps_entropy->u4_header_bits[MB_TYPE_INTER] = 0;
+                ps_entropy->u4_residue_bits[MB_TYPE_INTRA] = 0;
+                ps_entropy->u4_residue_bits[MB_TYPE_INTER] = 0;
+            }
+
+            /********************************************************************/
+            /*                     INITIALIZE DEBLOCK CONTEXT                   */
+            /********************************************************************/
+            {
+                /* deblk ctxt */
+                deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+                /* slice idx map */
+                ps_deblk->pu1_slice_idx = ps_proc->pu1_slice_idx;
+            }
+
+            /********************************************************************/
+            /*                     INITIALIZE ME CONTEXT                        */
+            /********************************************************************/
+            {
+                /* me ctxt */
+                me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+                /* srch range x */
+                ps_me_ctxt->ai2_srch_boundaries[0] =
+                                ps_codec->s_cfg.u4_srch_rng_x;
+
+                /* srch range y */
+                ps_me_ctxt->ai2_srch_boundaries[1] =
+                                ps_codec->s_cfg.u4_srch_rng_y;
+
+                /* src stride */
+                ps_me_ctxt->i4_src_strd = ps_codec->i4_src_strd;
+
+                /* rec stride */
+                ps_me_ctxt->i4_rec_strd = ps_codec->i4_rec_strd;
+
+                /* Half x plane offset from pic buf */
+                ps_me_ctxt->u4_half_x_offset = ps_proc->u4_half_x_offset;
+
+                /* Half y plane offset from half x plane */
+                ps_me_ctxt->u4_half_y_offset = ps_proc->u4_half_y_offset;
+
+                /* Half x plane offset from half y plane */
+                ps_me_ctxt->u4_half_xy_offset = ps_proc->u4_half_xy_offset;
+
+                /* enable fast sad */
+                ps_me_ctxt->u4_enable_fast_sad = u4_enable_fast_sad;
+
+                /* half pel */
+                ps_me_ctxt->u4_enable_hpel = ps_codec->s_cfg.u4_enable_hpel;
+
+                /* Diamond search Iteration Max Cnt */
+                ps_me_ctxt->u4_num_layers = u4_num_layers;
+
+                /* me speed preset */
+                ps_me_ctxt->u4_me_speed_preset =
+                                ps_codec->s_cfg.u4_me_speed_preset;
+
+                /* qp */
+                ps_me_ctxt->u1_mb_qp = ps_codec->u4_frame_qp;
+
+                if ((i == 0) && (0 == ps_codec->i4_pic_cnt))
+                {
+                    /* init mv bits tables */
+                    ih264e_init_mv_bits(ps_me_ctxt);
+                }
+            }
+
+            ps_proc->ps_ngbr_avbl = &(ps_proc->s_ngbr_avbl);
+
+        }
+
+        /* reset encoder header */
+        ps_codec->i4_gen_header = 0;
+    }
+
+    /********************************************************************/
+    /*                       ADD JOBS TO THE QUEUE                      */
+    /********************************************************************/
+    {
+        /* job structures */
+        job_t s_job;
+
+        /* temp var */
+        WORD32 i;
+
+        /* job class */
+        s_job.i4_cmd = CMD_PROCESS;
+
+        /* number of mbs to be processed in the current job */
+        s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
+
+        /* job start index x */
+        s_job.i2_mb_x = 0;
+
+        /* proc base idx */
+        s_job.i2_proc_base_idx = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
+
+        for (i = 0; i < (WORD32)ps_codec->s_cfg.i4_ht_mbs; i++)
+        {
+            /* job start index y */
+            s_job.i2_mb_y = i;
+
+            /* queue the job */
+            ret = ih264_list_queue(ps_codec->pv_proc_jobq, &s_job, 1);
+            if (ret != IH264_SUCCESS)
+            {
+                ps_codec->i4_error_code = ret;
+                return IH264E_FAIL;
+            }
+        }
+
+        /* Once all the jobs are queued, terminate the queue */
+        /* Since the threads are created and deleted in each call, terminating
+        here is not an issue */
+        ih264_list_terminate(ps_codec->pv_proc_jobq);
+    }
+
+    return error_status;
+}
diff --git a/encoder/ih264e_utils.h b/encoder/ih264e_utils.h
new file mode 100755
index 0000000..651dad9
--- /dev/null
+++ b/encoder/ih264e_utils.h
@@ -0,0 +1,327 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_utils.h
+*
+* @brief
+*  Contains declarations of miscellaneous utility functions used by the encoder
+*
+* @author
+*  Harish
+*
+* @par List of Functions:
+*  -ih264e_get_min_level()
+*  -ih264e_get_lvl_idx()
+*  -ih264e_get_dpb_size()
+*  -ih264e_get_total_pic_buf_size()
+*  -ih264e_get_pic_mv_bank_size()
+*  -ih264e_pic_buf_mgr_add_bufs()
+*  -ih264e_mv_buf_mgr_add_bufs()
+*  -ih264e_init_quant_params()
+*  -ih264e_init_air_map()
+*  -ih264e_codec_init()
+*  -ih264e_pic_init()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_UTILS_H_
+#define IH264E_UTILS_H_
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Used to get minimum level index for a given picture size
+*
+* @par Description:
+*  Gets the minimum level index and then gets corresponding level.
+*  Also used to ignore invalid levels like 2.3, 3.3 etc
+*
+* @param[in] level
+*  Level of the stream
+*
+* @returns  Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_min_level(WORD32 pic_size);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Used to get level index for a given level
+*
+* @par Description:
+*  Converts from level_idc (which is multiplied by 30) to an index that can be
+*  used as a lookup. Also used to ignore invalid levels like 2.2 , 3.2 etc
+*
+* @param[in] level
+*  Level of the stream
+*
+* @returns  Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_lvl_idx(WORD32 level);
+
+/**
+*******************************************************************************
+*
+* @brief returns maximum number of pictures allowed in dpb for a given level
+*
+* @par Description:
+*  For given width, height and level, number of pictures allowed in decoder
+*  picture buffer is computed as per Annex A.3.1
+*
+* @param[in] level
+*  level of the bit-stream
+*
+* @param[in] pic_size
+*  width * height
+*
+* @returns  Number of buffers in DPB
+*
+* @remarks
+*  From annexure A.3.1 of H264 specification,
+*  max_dec_frame_buffering <= MaxDpbSize, where MaxDpbSize is equal to
+*  Min( 1024 * MaxDPB / ( PicWidthInMbs * FrameHeightInMbs * 384 ), 16 ) and
+*  MaxDPB is given in Table A-1 in units of 1024 bytes. However the MaxDPB size
+*  presented in the look up table gas_ih264_lvl_tbl is in units of 512
+*  bytes. Hence the expression is modified accordingly.
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_dpb_size(WORD32 level, WORD32 pic_size);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Used to get reference picture buffer size for a given level and
+*  and padding used
+*
+* @par Description:
+*  Used to get reference picture buffer size for a given level and padding used
+*  Each picture is padded on all four sides
+*
+* @param[in] pic_size
+*  Number of luma samples (Width * Height)
+*
+* @param[in] level
+*  Level
+*
+* @param[in] horz_pad
+*  Total padding used in horizontal direction
+*
+* @param[in] vert_pad
+*  Total padding used in vertical direction
+*
+* @returns  Total picture buffer size
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size, WORD32 level,
+                                     WORD32 horz_pad, WORD32 vert_pad,
+                                     WORD32 num_ref_frames,
+                                     WORD32 num_reorder_frames);
+
+/**
+*******************************************************************************
+*
+* @brief Returns MV bank buffer size for a given number of luma samples
+*
+* @par Description:
+*  For given number of luma samples  one MV bank size is computed.
+*  Each MV bank includes pu_map and enc_pu_t for all the min PUs(4x4) in a picture
+*
+* @param[in] num_luma_samples
+*  Max number of luma pixels in the frame
+*
+* @returns  Total MV Bank size
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Function to initialize ps_pic_buf structs add pic buffers to
+*  buffer manager in case of non-shared mode
+*
+* @par Description:
+*  Function to initialize ps_pic_buf structs add pic buffers to
+*  buffer manager in case of non-shared mode
+*  To be called once per stream or for every reset
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @returns  error status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_buf_mgr_add_bufs(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Function to add buffers to MV Bank buffer manager
+*
+* @par Description:
+*  Function to add buffers to MV Bank buffer manager.  To be called once per
+*  stream or for every reset
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @returns  error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Function to initialize quant params structure
+*
+* @par Description:
+*  The forward quantization modules depends on qp/6, qp mod 6, forward scale
+*  matrix, forward threshold matrix, weight list. The inverse quantization
+*  modules depends on qp/6, qp mod 6, inverse scale matrix, weight list.
+*  These params are initialized in this function.
+*
+* @param[in] ps_proc
+*  pointer to process context
+*
+* @param[in] qp
+*  quantization parameter
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_quant_params(process_ctxt_t *ps_proc, int qp);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Initialize AIR mb frame Map
+*
+* @par Description:
+*  Initialize AIR mb frame map
+*  MB frame map indicates which frame an Mb should be coded as intra according to AIR
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @returns  error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_air_map(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Codec level initializations
+*
+* @par Description:
+*  Initializes the codec with parameters that needs to be set before encoding
+*  first frame
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @param[in] ps_inp_buf
+*  Pointer to input buffer context
+*
+* @returns  error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Picture level initializations
+*
+* @par Description:
+*  Before beginning to encode the frame, the current function initializes all
+*  the ctxts (proc, entropy, me, ...) basing on the input configured params.
+*  It locates space for storing recon in the encoder picture buffer set, fetches
+*  reference frame from encoder picture buffer set. Calls RC pre-enc to get
+*  qp and pic type for the current frame. Queues proc jobs so that
+*  the other threads can begin encoding. In brief, this function sets up the
+*  tone for the entire encoder.
+*
+* @param[in] ps_codec
+*  Pointer to codec context
+*
+* @param[in] ps_inp_buf
+*  Pointer to input buffer context
+*
+* @returns  error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf);
+
+#endif /* IH264E_UTILS_H_ */
diff --git a/encoder/ih264e_version.c b/encoder/ih264e_version.c
new file mode 100755
index 0000000..3dcba8d
--- /dev/null
+++ b/encoder/ih264e_version.c
@@ -0,0 +1,143 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_version.c
+*
+* @brief
+*  Contains version info for H264 encoder
+*
+* @author
+*  ittiam
+*
+* @par List of Functions:
+* - ih264e_get_version()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+/* system include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* user include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ih264_structs.h"
+#include "ih264e_version.h"
+
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+/**
+ * Name of the codec and target platform (All Cortex A processors in this case)
+ */
+#define CODEC_NAME              "H264ENC"
+/**
+ * Codec release type, production or evaluation
+ */
+#define CODEC_RELEASE_TYPE      "production"
+/**
+ * Version string. First two digits signify major version and last two minor
+ */
+#define CODEC_RELEASE_VER       "01.00"
+/**
+ * Vendor name
+ */
+#define CODEC_VENDOR            "ITTIAM"
+
+#define MAX_STRLEN              511
+/**
+*******************************************************************************
+* Concatenates various strings to form a version string
+*******************************************************************************
+*/
+#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor)    \
+    strncpy(version_string,"@(#)Id:", MAX_STRLEN);                                                               \
+    strncat(version_string,codec_name, MAX_STRLEN);                                                              \
+    strncat(version_string,"_", MAX_STRLEN);                                                                     \
+    strncat(version_string,codec_release_type, MAX_STRLEN);                                                      \
+    strncat(version_string," Ver:", MAX_STRLEN);                                                                 \
+    strncat(version_string,codec_release_ver, MAX_STRLEN);                                                       \
+    strncat(version_string," Released by ", MAX_STRLEN);                                                         \
+    strncat(version_string,codec_vendor, MAX_STRLEN);                                                            \
+    strncat(version_string," Build: ", MAX_STRLEN);                                                              \
+    strncat(version_string,__DATE__, MAX_STRLEN);                                                                \
+    strncat(version_string," @ ", MAX_STRLEN);                                                                   \
+    strncat(version_string,__TIME__, MAX_STRLEN);
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Fills the version info in the given char pointer
+*
+* @par Description:
+*  Fills the version info in the given char pointer
+*
+* @param[in] pc_version
+*  Pointer to hold version info
+*
+* @param[in] u4_version_bufsize
+*  Size of the buffer passed
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_STATUS_T ih264e_get_version(CHAR *pc_version, UWORD32 u4_version_bufsize)
+{
+    CHAR ac_version_tmp[MAX_STRLEN];
+
+    VERSION(ac_version_tmp, CODEC_NAME, CODEC_RELEASE_TYPE, CODEC_RELEASE_VER,
+            CODEC_VENDOR);
+
+    if (u4_version_bufsize >= (strnlen(ac_version_tmp, MAX_STRLEN) + 1))
+    {
+        memcpy(pc_version, ac_version_tmp, (strnlen(ac_version_tmp, MAX_STRLEN) + 1));
+        return IV_SUCCESS;
+    }
+    else
+    {
+        return IV_FAIL;
+    }
+}
diff --git a/encoder/ih264e_version.h b/encoder/ih264e_version.h
new file mode 100755
index 0000000..303a1e2
--- /dev/null
+++ b/encoder/ih264e_version.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_version.h
+*
+* @brief
+*  Contains declarations of miscellaneous utility functions used by the encoder
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_VERSION_H_
+#define IH264E_VERSION_H_
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Fills the version info in the given char pointer
+*
+* @par Description:
+*  Fills the version info in the given char pointer
+*
+* @param[in] pc_version
+*  Pointer to hold version info
+*
+* @param[in] u4_version_bufsize
+*  Size of the buffer passed
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_STATUS_T ih264e_get_version(CHAR *pc_version, UWORD32 u4_version_bufsize);
+
+#endif /* IH264E_VERSION_H_ */
diff --git a/encoder/ime.c b/encoder/ime.c
new file mode 100755
index 0000000..c89aaab
--- /dev/null
+++ b/encoder/ime.c
@@ -0,0 +1,836 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_me.c
+ *
+ * @brief
+ *
+ *
+ * @author
+ *  Ittiam
+ *
+ * @par List of Functions:
+ *  -
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+/* User include files */
+#include "ime_typedefs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ime_defs.h"
+#include "ime_macros.h"
+#include "ime.h"
+#include "ime_statistics.h"
+
+/**
+*******************************************************************************
+*
+* @brief Diamond Search
+*
+* @par Description:
+*  This function computes the sad at vertices of several layers of diamond grid
+*  at a time. The number of layers of diamond grid that would be evaluated is
+*  configurable.The function computes the sad at vertices of a diamond grid. If
+*  the sad at the center of the diamond grid is lesser than the sad at any other
+*  point of the diamond grid, the function marks the candidate Mb partition as
+*  mv.
+*
+* @param[in] ps_mb_part
+*  pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] ps_me_ctxt
+*  pointer to me context
+*
+* @param[in] u4_lambda_motion
+*  lambda motion
+*
+* @param[in] u4_enable_fast_sad
+*  enable/disable fast sad computation
+*
+* @returns  mv pair & corresponding distortion and cost
+*
+* @remarks Diamond Srch, radius is 1
+*
+*******************************************************************************
+*/
+void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt)
+{
+    /* MB partition info */
+    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+    /* lagrange parameter */
+    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+    /* srch range*/
+    WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
+    WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
+    WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
+    WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
+
+    /* enabled fast sad computation */
+//    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+    /* pointer to src macro block */
+    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+    UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+    WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
+
+    /* least cost */
+    WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
+
+    /* least sad */
+    WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
+
+    /* mv pair */
+    WORD16 i2_mvx, i2_mvy;
+
+    /* mv bits */
+    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
+
+    /* temp var */
+    WORD32 i4_cost[4];
+    WORD32 i4_sad[4];
+    UWORD8 *pu1_ref;
+    WORD16 i2_mv_u_x, i2_mv_u_y;
+
+    /* Diamond search Iteration Max Cnt */
+    UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
+
+    /* temp var */
+//    UWORD8 u1_prev_jump = NONE;
+//    UWORD8 u1_curr_jump = NONE;
+//    UWORD8 u1_next_jump;
+//    WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
+//    WORD32 mask;
+//    UWORD8 *apu1_ref[4];
+//    WORD32 i, cnt;
+//    WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
+
+    /* mv with best sad during initial evaluation */
+    i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
+    i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
+
+    i2_mv_u_x = i2_mvx;
+    i2_mv_u_y = i2_mvy;
+
+    while (u4_num_layers--)
+    {
+        /* FIXME : is this the write way to check for out of bounds ? */
+        if ( (i2_mvx - 1 < i4_srch_range_w) ||
+                        (i2_mvx + 1 > i4_srch_range_e) ||
+                        (i2_mvy - 1 < i4_srch_range_n) ||
+                        (i2_mvy + 1 > i4_srch_range_s) )
+        {
+            break;
+        }
+
+        pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
+
+        ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
+                                                pu1_curr_mb,
+                                                i4_ref_strd,
+                                                i4_src_strd,
+                                                i4_sad);
+
+        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
+        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
+        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
+        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
+
+        /* compute cost */
+        i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+                                                                   + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+        i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+                                                                   + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+        i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+                                                                   + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+        i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+                                                                   + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+
+
+        if (i4_cost_least > i4_cost[0])
+        {
+            i4_cost_least = i4_cost[0];
+            i4_distortion_least = i4_sad[0];
+
+            i2_mv_u_x = (i2_mvx - 1);
+            i2_mv_u_y = i2_mvy;
+        }
+
+        if (i4_cost_least > i4_cost[1])
+        {
+            i4_cost_least = i4_cost[1];
+            i4_distortion_least = i4_sad[1];
+
+            i2_mv_u_x = (i2_mvx + 1);
+            i2_mv_u_y = i2_mvy;
+        }
+
+        if (i4_cost_least > i4_cost[2])
+        {
+            i4_cost_least = i4_cost[2];
+            i4_distortion_least = i4_sad[2];
+
+            i2_mv_u_x = i2_mvx;
+            i2_mv_u_y = i2_mvy - 1;
+        }
+
+        if (i4_cost_least > i4_cost[3])
+        {
+            i4_cost_least = i4_cost[3];
+            i4_distortion_least = i4_sad[3];
+
+            i2_mv_u_x = i2_mvx;
+            i2_mv_u_y = i2_mvy + 1;
+        }
+
+        if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
+        {
+            ps_mb_part->u4_exit = 1;
+            break;
+        }
+        else
+        {
+            i2_mvx = i2_mv_u_x;
+            i2_mvy = i2_mv_u_y;
+        }
+
+
+    }
+
+    if (i4_cost_least < ps_mb_part->i4_mb_cost)
+    {
+        ps_mb_part->i4_mb_cost = i4_cost_least;
+        ps_mb_part->i4_mb_distortion = i4_distortion_least;
+        ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
+        ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
+    }
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector among the tentative mv
+* candidates chosen.
+*
+* @par Description:
+*  This function determines the position in the search window at which the motion
+*  estimation should begin in order to minimise the number of search iterations.
+*
+* @param[in] ps_mb_part
+*  pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] u4_lambda_motion
+*  lambda motion
+*
+* @param[in] u4_fast_flag
+*  enable/disable fast sad computation
+*
+* @returns  mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_evaluate_init_srchposn_16x16
+        (
+            me_ctxt_t *ps_me_ctxt
+        )
+{
+    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+    /* candidate mv cnt */
+    UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates;
+
+    /* list of candidate mvs */
+    ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search;
+
+    /* pointer to src macro block */
+    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+    UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+    WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
+
+    /* enabled fast sad computation */
+    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+    /* SAD(distortion metric) of an 8x8 block */
+    WORD32 i4_mb_distortion;
+
+    /* cost = distortion + u4_lambda_motion * rate */
+    WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
+
+    /* mb partitions info */
+    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+    /* mv bits */
+    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
+
+    /* temp var */
+    UWORD32  i, j, u4_srch_pos_idx = 0;
+    UWORD8 *pu1_ref = NULL;
+    WORD16 mv_x, mv_y;
+
+    if (0)
+    {
+        /************************************************************/
+        /* Compute SKIP Cost                                        */
+        /************************************************************/
+        mv_x = ps_mv_list[SKIP_CAND].i2_mvx;
+        mv_y = ps_mv_list[SKIP_CAND].i2_mvy;
+
+        /* adjust ref pointer */
+        pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd);
+
+        /* compute distortion */
+        ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
+
+        /* for skip mode cost & distortion are identical
+         * But we shall add a bias to favor skip mode.
+         * Doc. JVT B118 Suggests SKIP_BIAS as 16.
+         * TODO : Empirical analysis of SKIP_BIAS is necessary */
+
+        i4_distortion_least = i4_mb_distortion;
+
+        u4_srch_pos_idx = 0;
+
+#define SKIP_BIAS 8
+
+        i4_mb_cost_least = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS);
+
+#undef SKIP_BIAS
+    }
+
+
+    /* Carry out a search using each of the motion vector pairs identified above as predictors. */
+    /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
+    for(i = 0; i < u4_num_candidates; i++)
+    {
+        /* compute sad */
+        WORD32 c_sad = 1;
+
+        for(j = 0; j < i; j++ )
+        {
+            if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
+                            (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
+            {
+                c_sad = 0;
+                break;
+            }
+        }
+        if(c_sad)
+        {
+            /* adjust ref pointer */
+            pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
+
+            /* compute distortion */
+            ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
+            DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
+            /* compute cost */
+            i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+                            + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+
+            if (i4_mb_cost < i4_mb_cost_least)
+            {
+                i4_mb_cost_least = i4_mb_cost;
+
+                i4_distortion_least = i4_mb_distortion;
+
+                u4_srch_pos_idx = i;
+            }
+        }
+    }
+
+    if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
+    {
+        ps_mb_part->u4_srch_pos_idx = u4_srch_pos_idx;
+        ps_mb_part->i4_mb_cost = i4_mb_cost_least;
+        ps_mb_part->i4_mb_distortion = i4_distortion_least;
+        ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[u4_srch_pos_idx].i2_mvx;
+        ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[u4_srch_pos_idx].i2_mvy;
+    }
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching full pixel predictor within the search
+* range
+*
+* @par Description:
+*  This function begins by computing the mv predict vector for the current mb.
+*  This is used for cost computations. Further basing on the algo. chosen, it
+*  looks through a set of candidate vectors that best represent the mb a least
+*  cost and returns this information.
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+*  pointer to me context
+*
+* @returns  mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_full_pel_motion_estimation_16x16
+    (
+        me_ctxt_t *ps_me_ctxt
+    )
+{
+    /* mb part info */
+    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+    /******************************************************************/
+    /* Modify Search range about initial candidate instead of zero mv */
+    /******************************************************************/
+    /*
+     * FIXME: The motion vectors in a way can become unbounded. It may so happen that
+     * MV might exceed the limit of the profile configured.
+     */
+    ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
+                                      -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
+    ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
+                                       ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
+    ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
+                                      -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
+    ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
+                                       ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
+
+    /************************************************************/
+    /* Traverse about best initial candidate for mv             */
+    /************************************************************/
+
+    switch (ps_me_ctxt->u4_me_speed_preset)
+    {
+        case DMND_SRCH:
+            ime_diamond_search_16x16(ps_me_ctxt);
+            break;
+        default:
+            assert(0);
+            break;
+    }
+
+    ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx << 2;
+    ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy << 2;
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching sub pixel predictor within the search
+* range
+*
+* @par Description:
+*  This function begins by searching across all sub pixel sample points
+*  around the full pel motion vector. The vector with least cost is chosen as
+*  the mv for the current mb. If the skip mode is not evaluated while analysing
+*  the initial search candidates then analyse it here and update the mv.
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+*  pointer to me context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_sub_pel_motion_estimation_16x16
+    (
+        me_ctxt_t *ps_me_ctxt
+    )
+{
+    /* pointers to src & ref macro block */
+    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+
+
+    /* pointers to ref. half pel planes */
+    UWORD8 *pu1_ref_mb_half_x;
+    UWORD8 *pu1_ref_mb_half_y;
+    UWORD8 *pu1_ref_mb_half_xy;
+
+    /* pointers to ref. half pel planes */
+    UWORD8 *pu1_ref_mb_half_x_temp;
+    UWORD8 *pu1_ref_mb_half_y_temp;
+    UWORD8 *pu1_ref_mb_half_xy_temp;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+
+    WORD32 i4_ref_strd = ps_me_ctxt->u4_hp_buf_strd;
+
+    /* mb partitions info */
+    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+    /* SAD(distortion metric) of an mb */
+    WORD32 i4_mb_distortion;
+    WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
+
+    /* cost = distortion + u4_lambda_motion * rate */
+    WORD32 i4_mb_cost;
+    WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
+
+    /*Best half pel buffer*/
+    UWORD8 *pu1_best_hpel_buf = NULL;
+
+
+    /* mv bits */
+    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
+
+    /* Motion vectors in full-pel units */
+    WORD16 mv_x, mv_y;
+
+    /* lambda - lagrange constant */
+    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+    /* Flags to check if half pel points needs to be evaluated */
+    /**************************************/
+    /* 1 bit for each half pel candidate  */
+    /* bit 0 - half x = 1, half y = 0     */
+    /* bit 1 - half x = -1, half y = 0    */
+    /* bit 2 - half x = 0, half y = 1     */
+    /* bit 3 - half x = 0, half y = -1    */
+    /* bit 4 - half x = 1, half y = 1     */
+    /* bit 5 - half x = -1, half y = 1    */
+    /* bit 6 - half x = 1, half y = -1    */
+    /* bit 7 - half x = -1, half y = -1   */
+    /**************************************/
+    /* temp var */
+    WORD16 i2_mv_u_x, i2_mv_u_y;
+    WORD32 i, j;
+    WORD32 ai4_sad[8];
+
+    i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
+    i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
+
+    /************************************************************/
+    /* Evaluate half pel                                        */
+    /************************************************************/
+    mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
+    mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
+
+
+    /**************************************************************/
+    /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
+    /* left side of full pel                                      */
+    /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
+    /* top  side of full pel                                      */
+    /* ps_me_ctxt->pu1_half_xy points to the half pel pixel       */
+    /* on the top left side of full pel                           */
+    /* for the function pf_ime_sub_pel_compute_sad_16x16 the      */
+    /* default postions are                                       */
+    /* ps_me_ctxt->pu1_half_x = right halp_pel                    */
+    /*  ps_me_ctxt->pu1_half_y = bottom halp_pel                  */
+    /*  ps_me_ctxt->pu1_half_xy = bottom right halp_pel           */
+    /* Hence corresponding adjustments made here                  */
+    /**************************************************************/
+
+    pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->pu1_half_x + 1;
+    pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->pu1_half_y + 1 + i4_ref_strd;
+    pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->pu1_half_xy + 1 + i4_ref_strd;
+
+
+    ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
+                                                 pu1_ref_mb_half_y,
+                                                 pu1_ref_mb_half_xy,
+                                                 i4_src_strd, i4_ref_strd,
+                                                 ai4_sad);
+
+    /* Half x plane */
+    for(i = 0; i < 2; i++)
+    {
+        WORD32 mv_x_tmp = (mv_x << 2) + 2;
+        WORD32 mv_y_tmp = (mv_y << 2);
+
+        mv_x_tmp -= (i * 4);
+
+        i4_mb_distortion = ai4_sad[i];
+
+        /* compute cost */
+        i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
+                        + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
+
+        if (i4_mb_cost < i4_mb_cost_least)
+        {
+            i4_mb_cost_least = i4_mb_cost;
+
+            i4_distortion_least = i4_mb_distortion;
+
+            i2_mv_u_x = mv_x_tmp;
+
+            i2_mv_u_y = mv_y_tmp;
+
+#ifndef HP_PL /*choosing whether left or right half_x*/
+            ps_me_ctxt->pu1_half_x = pu1_ref_mb_half_x_temp - i;
+            pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
+#endif
+        }
+
+    }
+
+    /* Half y plane */
+    for(i = 0; i < 2; i++)
+    {
+        WORD32 mv_x_tmp = (mv_x << 2);
+        WORD32 mv_y_tmp = (mv_y << 2) + 2;
+
+        mv_y_tmp -= (i * 4);
+
+        i4_mb_distortion = ai4_sad[2 + i];
+
+        /* compute cost */
+        i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
+                        + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
+
+        if (i4_mb_cost < i4_mb_cost_least)
+        {
+            i4_mb_cost_least = i4_mb_cost;
+
+            i4_distortion_least = i4_mb_distortion;
+
+            i2_mv_u_x = mv_x_tmp;
+
+            i2_mv_u_y = mv_y_tmp;
+
+#ifndef HP_PL/*choosing whether top or bottom half_y*/
+            ps_me_ctxt->pu1_half_y = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
+            pu1_best_hpel_buf = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
+#endif
+        }
+
+    }
+
+    /* Half xy plane */
+    for(j = 0; j < 2; j++)
+    {
+        for(i = 0; i < 2; i++)
+        {
+            WORD32 mv_x_tmp = (mv_x << 2) + 2;
+            WORD32 mv_y_tmp = (mv_y << 2) + 2;
+
+            mv_x_tmp -= (i * 4);
+            mv_y_tmp -= (j * 4);
+
+            i4_mb_distortion = ai4_sad[4 + i + 2 * j];
+
+            /* compute cost */
+            i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
+                            + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
+
+            if (i4_mb_cost < i4_mb_cost_least)
+            {
+                i4_mb_cost_least = i4_mb_cost;
+
+                i4_distortion_least = i4_mb_distortion;
+
+                i2_mv_u_x = mv_x_tmp;
+
+                i2_mv_u_y = mv_y_tmp;
+
+#ifndef HP_PL /*choosing between four half_xy */
+                ps_me_ctxt->pu1_half_xy = pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
+                pu1_best_hpel_buf =  pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
+#endif
+            }
+
+        }
+    }
+
+    ps_mb_part->i4_mb_cost = i4_mb_cost_least;
+    ps_mb_part->i4_mb_distortion = i4_distortion_least;
+    ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
+    ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
+    ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes cost of skip macroblocks
+*
+* @par Description:
+*
+* @param[in] ps_me_ctxt
+*  pointer to me ctxt
+*
+* @param[in] ps_skip_mv
+*  pointer to skip mv
+*
+* @returns  none
+*
+* @remarks
+* NOTE: while computing the skip cost, do not enable early exit from compute
+* sad function because, a negative bias gets added later
+*
+*******************************************************************************
+*/
+void ime_compute_skip_cost
+    (
+         me_ctxt_t *ps_me_ctxt,
+         void *pv_skip_mv,
+         mb_part_ctxt *ps_smb_part_info,
+         UWORD32 u4_use_stat_sad
+    )
+{
+
+    /* pointers to src & ref macro block */
+    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+    UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+
+    /* strides */
+    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+    WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
+
+    /* enabled fast sad computation */
+    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+    /* SAD(distortion metric) of an mb */
+    WORD32 i4_mb_distortion;
+
+    /* cost = distortion + u4_lambda_motion * rate */
+    WORD32 i4_mb_cost;
+
+    /* Motion vectors in full-pel units */
+    WORD16 mv_x, mv_y;
+
+    /* lambda - lagrange constant */
+    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+    /* skip mv */
+    ime_mv_t *ps_skip_mv = pv_skip_mv, s_clip_skip_mv;
+
+    /* temp var */
+    UWORD8 *pu1_ref = NULL;
+    UWORD32 u4_is_nonzero;
+
+    s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, ps_skip_mv->i2_mvx);
+    s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, ps_skip_mv->i2_mvy);
+
+    if ((s_clip_skip_mv.i2_mvx != ps_skip_mv->i2_mvx) ||
+                    (s_clip_skip_mv.i2_mvy != ps_skip_mv->i2_mvy))
+    {
+        /* skip motion vector not with in bounds */
+        /* it is possible that mv is already evaluated */
+        return ;
+    }
+
+    mv_x = (ps_skip_mv->i2_mvx + 2) >> 2;
+    mv_y = (ps_skip_mv->i2_mvy + 2) >> 2;
+
+    if ((mv_x << 2) != ps_skip_mv->i2_mvx || (mv_y << 2) != ps_skip_mv->i2_mvy)
+    {
+
+
+        return ;
+
+
+    }
+    else
+    {
+        /* adjust ref pointer */
+        pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd);
+    }
+
+    if(u4_use_stat_sad == 1)
+    {
+        ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd,
+                ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion,&u4_is_nonzero);
+
+        /*
+         *NOTE The check here is two fold
+         * One is checking if the sad has been reached, ie min sad, which a configurable parameter
+         * If that is reached,we need not do any mode evaluation
+         * Similary if we find a distortion of zero there is no point of doing any further mode evaluation
+         * as sad is a non negative quantity
+         * hence in this case too, no further evaluation is necessary
+         */
+        /*
+         *NOTE in case we need to disable the zero check using satdq,
+         *  we need only to set the u4_is_zero to a non zero value
+         */
+        if(u4_is_nonzero==0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
+        {
+            ps_me_ctxt->u4_min_sad_reached = 1;    /* found min sad*/
+            ps_me_ctxt->i4_min_sad =  (u4_is_nonzero == 0)?0:i4_mb_distortion;
+        }
+    }
+    else
+    {
+        ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, INT_MAX, &i4_mb_distortion);
+
+        if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
+        {
+            ps_me_ctxt->i4_min_sad = i4_mb_distortion;
+            ps_me_ctxt->u4_min_sad_reached = 1;    /* found min sad*/
+        }
+    }
+
+    /* for skip mode cost & distortion are identical
+     * But we shall add a bias to favor skip mode.
+     * Doc. JVT B118 Suggests SKIP_BIAS as 16.
+     * TODO : Empirical analysis of SKIP_BIAS is necessary */
+#define SKIP_BIAS 8
+    i4_mb_cost = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS);
+#undef SKIP_BIAS
+
+    if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
+    {
+        ps_smb_part_info->i4_mb_cost = i4_mb_cost;
+        ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
+        ps_smb_part_info->s_mv_curr.i2_mvx = ps_skip_mv->i2_mvx;
+        ps_smb_part_info->s_mv_curr.i2_mvy = ps_skip_mv->i2_mvy;
+    }
+}
+
diff --git a/encoder/ime.h b/encoder/ime.h
new file mode 100755
index 0000000..5c039e8
--- /dev/null
+++ b/encoder/ime.h
@@ -0,0 +1,209 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ *  ime.h
+ *
+ * @brief
+ *  Contains declarations of global variables for H264 encoder
+ *
+ * @author
+ *  Ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#ifndef IME_H_
+#define IME_H_
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ *  @brief      Number of iterations before exiting during diamond search
+******************************************************************************
+ */
+#define NUM_LAYERS 16
+
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+
+/**
+*******************************************************************************
+*
+* @brief Diamond Search
+*
+* @par Description:
+*  This function computes the sad at vertices of several layers of diamond grid
+*  at a time. The number of layers of diamond grid that would be evaluated is
+*  configurable.The function computes the sad at vertices of a diamond grid. If
+*  the sad at the center of the diamond grid is lesser than the sad at any other
+*  point of the diamond grid, the function marks the candidate Mb partition as
+*  mv.
+*
+* @param[in] ps_mb_part
+*  pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] ps_me_ctxt
+*  pointer to me context
+*
+* @param[in] u4_lambda
+*  lambda motion
+*
+* @param[in] u4_fast_flag
+*  enable/disable fast sad computation
+*
+* @returns  mv pair & corresponding distortion and cost
+*
+* @remarks This module cannot be part of the final product due to its lack of
+* computational feasibility. This is only for quality eval purposes.
+*
+*******************************************************************************
+*/
+extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt);
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector among the tentative mv
+* candidates chosen.
+*
+* @par Description:
+*  This function determines the position in the search window at which the motion
+*  estimation should begin in order to minimise the number of search iterations.
+*
+* @param[in] ps_mb_part
+*  pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] u4_lambda_motion
+*  lambda motion
+*
+* @param[in] u4_fast_flag
+*  enable/disable fast sad computation
+*
+* @returns  mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+extern void ime_evaluate_init_srchposn_16x16
+        (
+            me_ctxt_t *ps_me_ctxt
+        );
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching full pixel predictor within the search
+* range
+*
+* @par Description:
+*  This function begins by computing the mv predict vector for the current mb.
+*  This is used for cost computations. Further basing on the algo. chosen, it
+*  looks through a set of candidate vectors that best represent the mb a least
+*  cost and returns this information.
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+*  pointer to me context
+*
+* @returns  mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+extern void ime_full_pel_motion_estimation_16x16
+    (
+        me_ctxt_t *ps_me_ctxt
+    );
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching sub pixel predictor within the search
+* range
+*
+* @par Description:
+*  This function begins by searching across all sub pixel sample points
+*  around the full pel motion vector. The vector with least cost is chosen as
+*  the mv for the current mb. If the skip mode is not evaluated while analysing
+*  the initial search candidates then analyse it here and update the mv.
+*
+* @param[in] ps_proc
+*  pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+*  pointer to me context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+extern void ime_sub_pel_motion_estimation_16x16
+    (
+        me_ctxt_t *ps_me_ctxt
+    );
+
+/**
+*******************************************************************************
+*
+* @brief This function computes cost of skip macroblocks
+*
+* @par Description:
+*
+* @param[in] ps_me_ctxt
+*  pointer to me ctxt
+*
+* @param[in] ps_skip_mv
+*  pointer to skip mv
+*
+* @returns  none
+*
+* @remarks
+* NOTE: while computing the skip cost, do not enable early exit from compute
+* sad function because, a negative bias gets added later
+*
+*******************************************************************************
+*/
+extern void ime_compute_skip_cost
+    (
+        me_ctxt_t *ps_me_ctxt,
+        void *pv_skip_mv,
+        mb_part_ctxt *ps_smb_part_info,
+        UWORD32 u4_use_stat_sad
+    );
+
+
+#endif /* IME_H_ */
diff --git a/encoder/ime_defs.h b/encoder/ime_defs.h
new file mode 100755
index 0000000..14d9c55
--- /dev/null
+++ b/encoder/ime_defs.h
@@ -0,0 +1,59 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ihevc_typedefs.h
+*
+* @brief
+*  Type definitions used in the code
+*
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef _IME_DEFS_H_
+#define _IME_DEFS_H_
+
+
+/* Macros to Label candidates */
+#define     SKIP_CAND 0
+#define     ZERO_CAND 1
+#define     LEFT_CAND 2
+#define     TOP_CAND  3
+#define     TOPR_CAND 4
+
+#define NONE 0
+#define LEFT 1
+#define RIGHT 2
+#define TOP 3
+#define BOTTOM 4
+
+#define MB_SIZE 16
+
+#define FULL_SRCH 0
+#define DMND_SRCH 100
+#define NSTEP_SRCH 50
+#define HEX_SRCH 75
+
+#endif /*_IME_DEFS_H_*/
+
diff --git a/encoder/ime_distortion_metrics.c b/encoder/ime_distortion_metrics.c
new file mode 100755
index 0000000..23a1fbc
--- /dev/null
+++ b/encoder/ime_distortion_metrics.c
@@ -0,0 +1,1262 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ih264e_distortion_metrics.c
+*
+* @brief
+*  This file contains definitions of routines that compute distortion
+*  between two macro/sub blocks of identical dimensions
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - ime_sub_pel_compute_sad_16x16()
+*  - ime_calculate_sad4_prog()
+*  - ime_calculate_sad3_prog()
+*  - ime_calculate_sad2_prog()
+*  - ime_compute_sad_16x16()
+*  - ime_compute_sad_16x16_fast()
+*  - ime_compute_sad_16x16_ea8()
+*  - ime_compute_sad_8x8()
+*  - ime_compute_sad_4x4()
+*  - ime_compute_sad_16x8()
+*  - ime_compute_satqd_16x16_lumainter()
+*  - ime_compute_satqd_8x16_chroma()
+*  - ime_compute_satqd_16x16_lumaintra()
+*
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "ime_typedefs.h"
+#include "ime_defs.h"
+#include "ime_macros.h"
+#include "ime_statistics.h"
+#include "ime_platform_macros.h"
+#include "ime_distortion_metrics.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) at all subpel points about the src location
+*
+* @par Description
+*   This functions computes SAD at all points at a subpel distance from the
+*   current source location.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_ref_half_x
+*  UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_y
+*  UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_xy
+*  UWORD8 pointer to half pel buffer
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] ref_strd
+*  integer ref stride
+*
+* @param[out] pi4_sad
+*  integer evaluated sad
+*  pi4_sad[0] - half x
+*  pi4_sad[1] - half x - 1
+*  pi4_sad[2] - half y
+*  pi4_sad[3] - half y - 1
+*  pi4_sad[4] - half xy
+*  pi4_sad[5] - half xy - 1
+*  pi4_sad[6] - half xy - strd
+*  pi4_sad[7] - half xy - 1 - strd
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src,
+                                   UWORD8 *pu1_ref_half_x,
+                                   UWORD8 *pu1_ref_half_y,
+                                   UWORD8 *pu1_ref_half_xy,
+                                   WORD32 src_strd,
+                                   WORD32 ref_strd,
+                                   WORD32 *pi4_sad)
+{
+    UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
+    UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
+    UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
+    UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
+    UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
+
+    WORD32 row, col;
+
+    memset(pi4_sad, 0, 8 * sizeof(WORD32));
+
+    for(row = 0; row < MB_SIZE; row++)
+    {
+        for(col = 0; col < MB_SIZE; col++)
+        {
+            WORD32 src;
+            WORD32 diff;
+
+            src = pu1_src[col];
+
+            diff = src - pu1_ref_half_x[col];
+            pi4_sad[0] += ABS(diff);
+
+            diff = src - pu1_ref_half_x_left[col];
+            pi4_sad[1] += ABS(diff);
+
+            diff = src - pu1_ref_half_y[col];
+            pi4_sad[2] += ABS(diff);
+
+            diff = src - pu1_ref_half_y_top[col];
+            pi4_sad[3] += ABS(diff);
+
+            diff = src - pu1_ref_half_xy[col];
+            pi4_sad[4] += ABS(diff);
+
+            diff = src - pu1_ref_half_xy_left[col];
+            pi4_sad[5] += ABS(diff);
+
+            diff = src - pu1_ref_half_xy_top[col];
+            pi4_sad[6] += ABS(diff);
+
+            diff = src - pu1_ref_half_xy_top_left[col];
+            pi4_sad[7] += ABS(diff);
+        }
+
+        pu1_src += src_strd;
+
+        pu1_ref_half_x += ref_strd;
+        pu1_ref_half_x_left += ref_strd;
+
+        pu1_ref_half_y += ref_strd;
+        pu1_ref_half_y_top += ref_strd;
+
+        pu1_ref_half_xy += ref_strd;
+        pu1_ref_half_xy_left += ref_strd;
+        pu1_ref_half_xy_top += ref_strd;
+        pu1_ref_half_xy_top_left += ref_strd;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref
+*  UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+*  integer reference stride
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[out] pi4_sad
+*  pointer to integer array evaluated sad
+*
+* @returns  sad at all evaluated vertexes
+*
+* @remarks  none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad4_prog(UWORD8 *pu1_ref,
+                             UWORD8 *pu1_src,
+                             WORD32 ref_strd,
+                             WORD32 src_strd,
+                             WORD32 *pi4_sad)
+{
+
+    /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
+    UWORD8 *left_ptr    = pu1_ref - 1;
+    UWORD8 *right_ptr   = pu1_ref + 1;
+    UWORD8 *top_ptr     = pu1_ref - ref_strd;
+    UWORD8 *bot_ptr     = pu1_ref + ref_strd;
+
+    /* temp var */
+    WORD32 count2, count3;
+    UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
+    UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
+
+    memset(pi4_sad, 0, 4 * sizeof(WORD32));
+
+    for(count2 = MB_SIZE; count2 > 0; count2--)
+    {
+        for(count3 = MB_SIZE; count3 > 0 ; count3--)
+        {
+            WORD32 src;
+            WORD32 diff;
+
+            src = *pu1_src++;
+
+            diff = src - *left_ptr++;
+            pi4_sad[0] += ABS(diff);
+
+            diff = src - *right_ptr++;
+            pi4_sad[1] += ABS(diff);
+
+            diff = src - *top_ptr++;
+            pi4_sad[2] += ABS(diff);
+
+            diff = src - *bot_ptr++;
+            pi4_sad[3]  += ABS(diff);
+        }
+
+        bot_ptr    += u4_ref_buf_offset;
+        left_ptr   += u4_ref_buf_offset;
+        right_ptr  += u4_ref_buf_offset;
+        top_ptr    += u4_ref_buf_offset;
+
+        pu1_src += u4_cur_buf_offset;
+    }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref1, pu1_ref2, pu1_ref3
+*  UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+*  integer reference stride
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[out] pi4_sad
+*  pointer to integer array evaluated sad
+*
+* @returns  sad at all evaluated vertexes
+*
+* @remarks  none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad3_prog(UWORD8 *pu1_ref1,
+                             UWORD8 *pu1_ref2,
+                             UWORD8 *pu1_ref3,
+                             UWORD8 *pu1_src,
+                             WORD32 ref_strd,
+                             WORD32 src_strd,
+                             WORD32 *pi4_sad)
+{
+    /* temp var */
+    WORD32 i;
+    UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
+    UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
+
+    for(i = 16; i > 0; i--)
+    {
+        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+        USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+        pu1_src += 4;
+        pu1_ref1 += 4;
+        pu1_ref2 += 4;
+        pu1_ref3 += 4;
+
+        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+        USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+        pu1_src += 4;
+        pu1_ref1 += 4;
+        pu1_ref2 += 4;
+        pu1_ref3 += 4;
+
+        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+        USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+        pu1_src += 4;
+        pu1_ref1 += 4;
+        pu1_ref2 += 4;
+        pu1_ref3 += 4;
+
+        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+        USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+        pu1_src += 4;
+        pu1_ref1 += 4;
+        pu1_ref2 += 4;
+        pu1_ref3 += 4;
+
+        pu1_src += u4_cur_buf_offset;
+        pu1_ref1 += u4_ref_buf_offset;
+        pu1_ref2 += u4_ref_buf_offset;
+        pu1_ref3 += u4_ref_buf_offset;
+    }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref1, pu1_ref2
+*  UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+*  integer reference stride
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[out] pi4_sad
+*  pointer to integer array evaluated sad
+*
+* @returns  sad at all evaluated vertexes
+*
+* @remarks  none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad2_prog(UWORD8 *pu1_ref1,
+                             UWORD8 *pu1_ref2,
+                             UWORD8 *pu1_src,
+                             WORD32 ref_strd,
+                             WORD32 src_strd,
+                             WORD32 *pi4_sad)
+{
+    /* temp var */
+    WORD32 i;
+    UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
+    UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
+
+    for(i = 16; i > 0; i--)
+    {
+        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+        pu1_src += 4;
+        pu1_ref1 += 4;
+        pu1_ref2 += 4;
+
+        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+        pu1_src += 4;
+        pu1_ref1 += 4;
+        pu1_ref2 += 4;
+
+        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+        pu1_src += 4;
+        pu1_ref1 += 4;
+        pu1_ref2 += 4;
+
+        USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+        USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+        pu1_src += 4;
+        pu1_ref1 += 4;
+        pu1_ref2 += 4;
+
+        pu1_src += u4_cur_buf_offset;
+        pu1_ref1 += u4_ref_buf_offset;
+        pu1_ref2 += u4_ref_buf_offset;
+    }
+
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par   Description
+*   This functions computes SAD between 2 16x16 blocks. There is a provision
+*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] i4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16(UWORD8 *pu1_src,
+                           UWORD8 *pu1_est,
+                           WORD32 src_strd,
+                           WORD32 est_strd,
+                           WORD32 i4_max_sad,
+                           WORD32 *pi4_mb_distortion)
+{
+    WORD32 i4_sad = 0;
+    UWORD32 u4_src_offset = src_strd - 16;
+    UWORD32 u4_est_offset = est_strd - 16;
+    UWORD32 i;
+
+GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16);
+
+    for(i = 16; i > 0; i--)
+    {
+        USADA8(pu1_src, pu1_est, i4_sad);
+        pu1_src += 4;
+        pu1_est += 4;
+
+        USADA8(pu1_src, pu1_est, i4_sad);
+        pu1_src += 4;
+        pu1_est += 4;
+
+        USADA8(pu1_src, pu1_est, i4_sad);
+        pu1_src += 4;
+        pu1_est += 4;
+
+        USADA8(pu1_src, pu1_est, i4_sad);
+        pu1_src += 4;
+        pu1_est += 4;
+
+        /* early exit */
+        if(i4_max_sad < i4_sad)
+        {
+
+GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i);
+
+            *pi4_mb_distortion = i4_sad;
+            return ;
+        }
+        pu1_src += u4_src_offset;
+        pu1_est += u4_est_offset;
+    }
+
+    *pi4_mb_distortion = i4_sad;
+    return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+*
+* @par   Description
+*   This functions computes SAD between 2 16x16 blocks. There is a provision
+*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] i4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_fast(UWORD8 *pu1_src,
+                                UWORD8 *pu1_est,
+                                WORD32 src_strd,
+                                WORD32 est_strd,
+                                WORD32 i4_max_sad,
+                                WORD32 *pi4_mb_distortion)
+{
+
+    WORD32 i4_sad = 0;
+    UWORD32 u4_src_offset = 2 * src_strd - 16;
+    UWORD32 u4_est_offset = 2 * est_strd - 16;
+    UWORD32 i;
+
+    UNUSED(i4_max_sad);
+
+    for(i = 16; i > 0; i-= 2)
+    {
+        USADA8(pu1_src, pu1_est, i4_sad);
+        pu1_src += 4;
+        pu1_est += 4;
+
+        USADA8(pu1_src, pu1_est, i4_sad);
+        pu1_src += 4;
+        pu1_est += 4;
+
+        USADA8(pu1_src, pu1_est, i4_sad);
+        pu1_src += 4;
+        pu1_est += 4;
+
+        USADA8(pu1_src, pu1_est, i4_sad);
+        pu1_src += 4;
+        pu1_est += 4;
+
+        pu1_src += u4_src_offset;
+        pu1_est += u4_est_offset;
+    }
+
+    *pi4_mb_distortion = (i4_sad << 1);
+    return ;
+}
+
+/**
+******************************************************************************
+*
+*  @brief computes distortion (SAD) between 2 8x8 blocks
+*
+*  @par   Description
+*   This functions computes SAD between 2 8x8 blocks. There is a provision
+*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] i4_sad
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+ */
+
+void ime_compute_sad_8x8(UWORD8 *pu1_src,
+                         UWORD8 *pu1_est,
+                         WORD32 src_strd,
+                         WORD32 est_strd,
+                         WORD32 i4_max_sad,
+                         WORD32 *pi4_mb_distortion)
+{
+    WORD32 i4_sad = 0;
+    UWORD32 u4_src_offset = src_strd - 8;
+    UWORD32 u4_est_offset = est_strd - 8;
+    UWORD32 i, j;
+    WORD16 temp;
+
+    for(i = 8; i > 0; i--)
+    {
+        for(j = 8; j > 0; j--)
+        {
+            /* SAD */
+            temp = *pu1_src++ - *pu1_est++;
+            i4_sad += ABS(temp);
+        }
+        /* early exit */
+        if(i4_max_sad < i4_sad)
+        {
+            *pi4_mb_distortion = i4_sad;
+            return;
+        }
+        pu1_src += u4_src_offset;
+        pu1_est += u4_est_offset;
+    }
+    *pi4_mb_distortion = i4_sad;
+}
+
+/**
+******************************************************************************
+*
+*  @brief computes distortion (SAD) between 2 4x4 blocks
+*
+*  @par   Description
+*   This functions computes SAD between 2 4x4 blocks. There is a provision
+*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_4x4
+        (
+            UWORD8 *pu1_src,
+            UWORD8 *pu1_est,
+            WORD32 src_strd,
+            WORD32 est_strd,
+            WORD32 i4_max_sad,
+            WORD32 *pi4_mb_distortion
+        )
+{
+    WORD32 i4_sad = 0;
+
+    UNUSED(i4_max_sad);
+
+    USADA8(pu1_src, pu1_est, i4_sad);
+    pu1_src += src_strd;
+    pu1_est += est_strd;
+
+    USADA8(pu1_src, pu1_est, i4_sad);
+    pu1_src += src_strd;
+    pu1_est += est_strd;
+
+    USADA8(pu1_src, pu1_est, i4_sad);
+    pu1_src += src_strd;
+    pu1_est += est_strd;
+
+    USADA8(pu1_src, pu1_est, i4_sad);
+    *pi4_mb_distortion = i4_sad;
+}
+
+
+/**
+******************************************************************************
+*
+*  @brief computes distortion (SAD) between 2 16x8  blocks
+*
+*
+*  @par   Description
+*   This functions computes SAD between 2 16x8 blocks. There is a provision
+*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x8
+        (
+            UWORD8 *pu1_src,
+            UWORD8 *pu1_est,
+            WORD32 src_strd,
+            WORD32 est_strd,
+            WORD32 i4_max_sad,
+            WORD32 *pi4_mb_distortion
+        )
+{
+    WORD32 i4_sad = 0;
+    UWORD32 u4_src_offset = src_strd - 16;
+    UWORD32 u4_est_offset = est_strd - 16;
+    UWORD32 i, j;
+    WORD16 temp;
+
+GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8);
+
+    for(i = 8; i > 0; i--)
+    {
+        for(j = 16; j > 0; j--)
+        {
+            /* SAD */
+            temp = *pu1_src++ - *pu1_est++;
+            i4_sad += ABS(temp);
+        }
+        /* early exit */
+        if(i4_max_sad < i4_sad)
+        {
+
+GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i);
+
+            *pi4_mb_distortion = i4_sad;
+
+            return;
+        }
+        pu1_src += u4_src_offset;
+        pu1_est += u4_est_offset;
+    }
+
+    *pi4_mb_distortion = i4_sad;
+    return;
+
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par   Description
+*   This functions computes SAD between 2 16x16 blocks. There is a provision
+*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] i4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src,
+                               UWORD8 *pu1_est,
+                               WORD32 src_strd,
+                               WORD32 est_strd,
+                               WORD32 i4_max_sad,
+                               WORD32 *pi4_mb_distortion)
+{
+    WORD32 i4_sad = 0;
+    UWORD32 u4_src_offset = src_strd - 16;
+    UWORD32 u4_est_offset = est_strd - 16;
+    UWORD32 i, j;
+    WORD16 temp;
+    UWORD8 *pu1_src_temp = pu1_src + src_strd;
+    UWORD8 *pu1_est_temp = pu1_est + est_strd;
+
+    for(i = 16; i > 0; i -= 2)
+    {
+        for(j = 16; j > 0; j--)
+        {
+            /* SAD */
+            temp = *pu1_src++ - *pu1_est++;
+            i4_sad += ABS(temp);
+        }
+
+        pu1_src += (u4_src_offset + src_strd);
+        pu1_est += (u4_est_offset + est_strd);
+
+    }
+
+    /* early exit */
+    if(i4_max_sad < i4_sad)
+    {
+        *pi4_mb_distortion = i4_sad;
+        return;
+    }
+
+    pu1_src = pu1_src_temp;
+    pu1_est = pu1_est_temp;
+
+    for(i = 16; i > 0; i -= 2)
+    {
+        for(j = 16; j > 0; j--)
+        {
+            /* SAD */
+            temp = *pu1_src++ - *pu1_est++;
+            i4_sad += ABS(temp);
+        }
+
+        pu1_src += u4_src_offset + src_strd;
+        pu1_est += u4_est_offset + est_strd;
+    }
+
+    *pi4_mb_distortion = i4_sad;
+    return;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes SAD between two 16x16 blocks
+*        It also computes if the block will be zero after H264 transform and quant for
+*        Intra 16x16 blocks
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] pu2_thrsh
+*  Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @param[out] pu4_is_zero
+*  Poitner to store if the block is zero after transform and quantization
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src,
+                                         UWORD8 *pu1_est,
+                                         WORD32 src_strd,
+                                         WORD32 est_strd,
+                                         UWORD16 *pu2_thrsh,
+                                         WORD32 *pi4_mb_distortion,
+                                         UWORD32 *pu4_is_non_zero)
+{
+    UWORD32 i,j;
+    WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
+    UWORD8 *pu1_src_lp,*pu1_est_lp;
+    UWORD32 sad = 0;
+
+    (*pi4_mb_distortion) = 0;
+    for(i=0;i<4;i++)
+    {
+        for(j=0;j<4;j++)
+        {
+            pu1_src_lp = pu1_src + 4*j;
+            pu1_est_lp = pu1_est + 4*j;
+
+            s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+            s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+            pu1_src_lp += src_strd;
+            pu1_est_lp += est_strd;
+
+            s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+            s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+            pu1_src_lp += src_strd;
+            pu1_est_lp += est_strd;
+
+            s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+            s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+            pu1_src_lp += src_strd;
+            pu1_est_lp += est_strd;
+
+            s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+            s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+            sad_1 = s1+s2+s3+s4;
+
+            if(sad == 0)
+            {
+                sad_2 = sad_1<<1;
+
+                ls1 = sad_2 -(s2 + s3);
+                ls2 = sad_2 -(s1 + s4);
+                ls3 = sad_2 -(s3 + s4);
+                ls4 = sad_2 -(s3 - (s1<<1));
+                ls5 = sad_2 -(s4 - (s2<<1));
+                ls6 = sad_2 -(s1 + s2);
+                ls7 = sad_2 -(s2 - (s4<<1));
+                ls8 = sad_2 -(s1 - (s3<<1));
+
+                if(
+                        pu2_thrsh[8] <= sad_1   ||
+                        pu2_thrsh[0] <=  ls2    ||
+                        pu2_thrsh[1] <=  ls1    ||
+                        pu2_thrsh[2] <=  ls8    ||
+                        pu2_thrsh[3] <=  ls5    ||
+
+                        pu2_thrsh[4] <=  ls6    ||
+                        pu2_thrsh[5] <=  ls3    ||
+                        pu2_thrsh[6] <=  ls7    ||
+                        pu2_thrsh[7] <=  ls4
+
+                )sad = 1;
+            }
+            (*pi4_mb_distortion) += sad_1;
+        }
+        pu1_src +=  (src_strd *4);
+        pu1_est +=  (est_strd *4);
+    }
+    *pu4_is_non_zero = sad;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma blocks
+*
+*
+* @par   Description
+*   This functions computes SAD between2 16x8 chroma blocks(interleaved)
+*   It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
+*   If SAQTD is zero, it gives back zero
+*   Other wise sad is retrned
+*   There is no provison for early exit
+*
+*   The transform done here is the transform for chroma blocks in H264
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] pu2_thrsh
+*  Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+* Fucntion code is nit updated.
+* Will require debugging and minor modifications
+*
+******************************************************************************
+*/
+void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src,
+                                     UWORD8 *pu1_est,
+                                     WORD32 src_strd,
+                                     WORD32 est_strd,
+                                     WORD32 max_sad,
+                                     UWORD16 *thrsh)
+{
+    WORD32 i,j,plane;
+    WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
+    UWORD8 *pu1_src_lp,*pu1_est_lp,*pu1_src_plane,*pu1_est_plane;
+    WORD32 sad =0;
+    UNUSED(max_sad);
+
+    pu1_src_plane = pu1_src;
+    pu1_est_plane = pu1_est;
+
+    for(plane =0;plane<2;plane++)
+    {
+        for(i=0;i<4;i++)
+        {
+            for(j=0;j<4;j++)
+            {
+                pu1_src_lp = pu1_src + 8*j;
+                pu1_est_lp = pu1_est + 8*j;
+
+                s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+                s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+                pu1_src_lp += src_strd;
+                pu1_est_lp += est_strd;
+
+                s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+                s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+                pu1_src_lp += src_strd;
+                pu1_est_lp += est_strd;
+
+                s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+                s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+                pu1_src_lp += src_strd;
+                pu1_est_lp += est_strd;
+
+                s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+                s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+                sad_1 = s1+s2+s3+s4;
+                sad_2 = sad_1<<1;
+
+                ls1 = sad_2 -(s2 + s3);
+                ls2 = sad_2 -(s1 + s4);
+                ls3 = sad_2 -(s3 + s4);
+                ls4 = sad_2 -(s3 - (s1<<1));
+                ls5 = sad_2 -(s4 - (s2<<1));
+                ls6 = sad_2 -(s1 + s2);
+                ls7 = sad_2 -(s2 - (s4<<1));
+                ls8 = sad_2 -(s1 - (s3<<1));
+
+                if(
+                        //thrsh[0] >  sad_1     && Chroma Dc is checked later
+                        thrsh[1] >  ls1     &&
+                        thrsh[2] >  sad_1   &&
+                        thrsh[3] >  ls2     &&
+
+                        thrsh[4] >  ls3     &&
+                        thrsh[5] >  ls4     &&
+                        thrsh[6] >  ls3     &&
+                        thrsh[7] >  ls5     &&
+
+                        thrsh[8] >  sad_1   &&
+                        thrsh[9] >  ls1     &&
+                        thrsh[10]>  sad_1   &&
+                        thrsh[11]>  ls2     &&
+
+                        thrsh[12]>  ls6     &&
+                        thrsh[13]>  ls7     &&
+                        thrsh[14]>  ls6     &&
+                        thrsh[15]>  ls8
+                )
+                {
+                    /*set current sad to be zero*/
+                }
+                else
+                    return ;
+
+                sad += sad_1;
+            }
+            pu1_src +=  (src_strd *4);
+            pu1_est +=  (est_strd *4);
+        }
+        if(sad < (thrsh[0]<<1))sad = 0;
+        else return ;
+
+        pu1_src = pu1_src_plane+1;
+        pu1_est = pu1_est_plane+1;
+    }
+    return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks
+*
+* @par   Description
+*   This functions computes SAD between 2 16x16 blocks.
+*   It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
+*   If SAQTD is zero, it gives back zero
+*   Other wise sad is retrned
+*   There is no provison for early exit
+*
+*   The transform done here is the transform for inter 16x16 blocks in H264
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] pu2_thrsh
+*  Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
+                                         UWORD8 *pu1_est,
+                                         WORD32 src_strd,
+                                         WORD32 est_strd,
+                                         WORD32 max_sad,
+                                         UWORD16 *thrsh,
+                                         WORD32 *pi4_mb_distortion,
+                                         UWORD8 *sig_nz_sad)
+{
+    UWORD32 i,j;
+    WORD16 s1[4],s2[4],s3[4],s4[4],sad[4];
+    UWORD8 *pu1_src_lp,*pu1_est_lp;
+    UWORD8 *sig_sad_dc;
+    UWORD32 nz_sad_sig = 0;
+    UNUSED(max_sad);
+    *pi4_mb_distortion =0;
+
+    sig_sad_dc = sig_nz_sad;
+    sig_nz_sad++;
+
+    for(i=0;i<4;i++)
+    {
+        for(j=0;j<4;j++)
+        {
+            pu1_src_lp = pu1_src + 4*j;
+            pu1_est_lp = pu1_est + 4*j;
+
+            s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+            s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+            pu1_src_lp += src_strd;
+            pu1_est_lp += est_strd;
+
+            s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+            s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+            pu1_src_lp += src_strd;
+            pu1_est_lp += est_strd;
+
+            s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+            s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+            pu1_src_lp += src_strd;
+            pu1_est_lp += est_strd;
+
+            s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+            s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+            sad[j] = ((s1[j]+s2[j]+s3[j]+s4[j])<<1);
+        }
+
+        for(j=0;j<4;j++)
+        {
+
+            if(
+                    //thrsh[0] > (sad[j] >> 1) &&Dc goes in the other part
+                    thrsh[1] > (sad[j] -(s2[j] + s3[j])) &&
+                    thrsh[2] > (sad[j]>>1) &&
+                    thrsh[3] > (sad[j] -(s1[j] + s4[j])) &&
+
+                    thrsh[4] > (sad[j] -(s3[j] + s4[j])) &&
+                    thrsh[5] > (sad[j] -(s3[j] - (s1[j]<<1))) &&
+                    thrsh[6] > (sad[j] -(s3[j] + s4[j])) &&
+                    thrsh[7] > (sad[j] -(s4[j] - (s2[j]<<1))) &&
+
+                    thrsh[8] > (sad[j]>>1) &&
+                    thrsh[9] > (sad[j] -(s2[j] + s3[j])) &&
+                    thrsh[10]> (sad[j]>>1) &&
+                    thrsh[11]> (sad[j] -(s1[j] + s4[j])) &&
+
+                    thrsh[12]> (sad[j] -(s1[j] + s2[j])) &&
+                    thrsh[13]> (sad[j] -(s2[j] - (s4[j]<<1))) &&
+                    thrsh[14]> (sad[j] -(s1[j] + s2[j])) &&
+                    thrsh[15]> (sad[j] -(s1[j] - (s3[j]<<1)))
+            )
+            {
+                //sad[j] = 0;   /*set current sad to be zero*/
+                sig_nz_sad[j] = 0;/*Signal that the sad is zero*/
+            }
+            else
+            {
+                sig_nz_sad[j] = 1;/*signal that sad is non zero*/
+                nz_sad_sig = 1;
+            }
+
+            (*pi4_mb_distortion) += (sad[j]>>1);
+            //if((*pi4_mb_distortion) >= max_sad)return; /*return or some thing*/
+        }
+
+        sig_nz_sad += 4;
+        pu1_src +=  (src_strd *4);
+        pu1_est +=  (est_strd *4);
+    }
+
+    if((*pi4_mb_distortion) < thrsh[0]<<2)
+    {
+        *sig_sad_dc = 0;
+        if(nz_sad_sig == 0)(*pi4_mb_distortion) = 0;
+    }
+    else *sig_sad_dc = 1;
+}
+
diff --git a/encoder/ime_distortion_metrics.h b/encoder/ime_distortion_metrics.h
new file mode 100755
index 0000000..a30e1fc
--- /dev/null
+++ b/encoder/ime_distortion_metrics.h
@@ -0,0 +1,170 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ih264e_distortion_metrics.h
+*
+* @brief
+*  This file contains declarations of routines that compute distortion
+*  between two macro/sub blocks of identical dimensions
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef IME_DISTORTION_METRICS_H_
+#define IME_DISTORTION_METRICS_H_
+
+
+/*****************************************************************************/
+/* Type definitions for function prototypes                                  */
+/*****************************************************************************/
+
+typedef void ime_compute_sad_ft(UWORD8 *pu1_src,
+                                UWORD8 *pu1_est,
+                                WORD32 src_strd,
+                                WORD32 est_strd,
+                                WORD32 i4_max_sad,
+                                WORD32 *pi4_mb_distortion);
+
+typedef void ime_compute_sad4_diamond(UWORD8 *pu1_ref,
+                                      UWORD8 *pu1_src,
+                                      WORD32 ref_strd,
+                                      WORD32 src_strd,
+                                      WORD32 *pi4_sad);
+
+typedef void ime_compute_sad3_diamond(UWORD8 *pu1_ref1,
+                                      UWORD8 *pu1_ref2,
+                                      UWORD8 *pu1_ref3,
+                                      UWORD8 *pu1_src,
+                                      WORD32 ref_strd,
+                                      WORD32 src_strd,
+                                      WORD32 *pi4_sad);
+
+typedef void ime_compute_sad2_diamond(UWORD8 *pu1_ref1,
+                                      UWORD8 *pu1_ref2,
+                                      UWORD8 *pu1_src,
+                                      WORD32 ref_strd,
+                                      WORD32 src_strd,
+                                      WORD32 *pi4_sad);
+
+typedef void ime_sub_pel_compute_sad_16x16_ft(UWORD8 *pu1_src,
+                                              UWORD8 *pu1_ref_half_x,
+                                              UWORD8 *pu1_ref_half_y,
+                                              UWORD8 *pu1_ref_half_xy,
+                                              WORD32 src_strd,
+                                              WORD32 ref_strd,
+                                              WORD32 *pi4_sad);
+
+typedef void ime_compute_sad_stat(UWORD8 *pu1_src,
+                                  UWORD8 *pu1_est,
+                                  WORD32 src_strd,
+                                  WORD32 est_strd,
+                                  UWORD16 *pu2_thrsh,
+                                  WORD32 *pi4_mb_distortion,
+                                  UWORD32 *pu4_is_zero);
+
+typedef void ime_compute_satqd_16x16_lumainter_ft(UWORD8 *pu1_src,
+                                         UWORD8 *pu1_est,
+                                         WORD32 src_strd,
+                                         WORD32 est_strd,
+                                         UWORD16 *pu2_thrsh,
+                                         WORD32 *pi4_mb_distortion,
+                                         UWORD32 *pu4_is_zero);
+
+typedef void ime_compute_satqd_8x16_chroma_ft(UWORD8 *pu1_src,
+                                     UWORD8 *pu1_est,
+                                     WORD32 src_strd,
+                                     WORD32 est_strd,
+                                     WORD32 i4_max_sad,
+                                     UWORD16 *thrsh);
+
+typedef void ime_compute_satqd_16x16_lumaintra_ft(UWORD8 *pu1_src,
+                                         UWORD8 *pu1_est,
+                                         WORD32 src_strd,
+                                         WORD32 est_strd,
+                                         WORD32 i4_max_sad,
+                                         UWORD16 *thrsh,
+                                         WORD32 *pi4_mb_distortion,
+                                         UWORD8 *sig_nz_sad);
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+ime_compute_sad_ft ime_compute_sad_16x16;
+ime_compute_sad_ft ime_compute_sad_16x16_fast;
+ime_compute_sad_ft ime_compute_sad_16x8;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8;
+ime_compute_sad_ft ime_compute_sad_8x8;
+ime_compute_sad_ft ime_compute_sad_4x4;
+ime_compute_sad4_diamond ime_calculate_sad4_prog;
+ime_compute_sad3_diamond ime_calculate_sad3_prog;
+ime_compute_sad2_diamond ime_calculate_sad2_prog;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16;
+ime_compute_sad_stat ime_compute_16x16_sad_stat;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter;
+ime_compute_satqd_8x16_chroma_ft ime_compute_satqd_8x16_chroma;
+ime_compute_satqd_16x16_lumaintra_ft ime_compute_satqd_16x16_lumaintra;
+
+/*SSE4.2 Declarations*/
+ime_compute_sad_ft ime_compute_sad_16x16_sse42;
+ime_compute_sad_ft ime_compute_sad_16x16_fast_sse42;
+ime_compute_sad_ft ime_compute_sad_16x8_sse42;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8_sse42;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_sse42;
+ime_compute_sad4_diamond ime_calculate_sad4_prog_sse42;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_sse42;
+
+/* assembly */
+ime_compute_sad_ft ime_compute_sad_16x16_a9q;
+ime_compute_sad_ft ime_compute_sad_16x16_fast_a9q;
+ime_compute_sad_ft ime_compute_sad_16x8_a9q;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8_a9q;
+ime_compute_sad4_diamond ime_calculate_sad4_prog_a9q;
+ime_compute_sad3_diamond ime_calculate_sad3_prog_a9q;
+ime_compute_sad2_diamond ime_calculate_sad2_prog_a9q;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_a9q;
+ime_compute_sad_stat ime_compute_16x16_sad_stat_a9;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_a9q;
+
+
+/* assembly - AV8 declarations */
+ime_compute_sad_ft ime_compute_sad_16x16_av8;
+ime_compute_sad_ft ime_compute_sad_16x16_fast_av8;
+ime_compute_sad_ft ime_compute_sad_16x8_av8;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8_av8;
+ime_compute_sad4_diamond ime_calculate_sad4_prog_av8;
+ime_compute_sad3_diamond ime_calculate_sad3_prog_av8;
+ime_compute_sad2_diamond ime_calculate_sad2_prog_av8;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_av8;
+ime_compute_sad_stat ime_compute_16x16_sad_stat_av8;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_av8;
+
+
+#endif /* IME_DISTORTION_METRICS_H_ */
+
+
diff --git a/encoder/ime_macros.h b/encoder/ime_macros.h
new file mode 100755
index 0000000..a7b8c65
--- /dev/null
+++ b/encoder/ime_macros.h
@@ -0,0 +1,44 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ihevc_typedefs.h
+*
+* @brief
+*  Type definitions used in the code
+*
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef _IME_MACROS_H_
+#define _IME_MACROS_H_
+
+#define ABS(x)          ((x) < 0 ? (-(x)) : (x))
+#define MAX(a,b) ((a > b)?(a):(b))
+#define MIN(a,b) ((a < b)?(a):(b))
+
+#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > maxy)?(maxy):(y)))
+#define UNUSED(x) ((void)(x))
+
+#endif /*_IME_MACROS_H_*/
diff --git a/encoder/ime_statistics.h b/encoder/ime_statistics.h
new file mode 100755
index 0000000..eeacaf2
--- /dev/null
+++ b/encoder/ime_statistics.h
@@ -0,0 +1,86 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ihevc_typedefs.h
+*
+* @brief
+*  Type definitions used in the code
+*
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef _IME_STATISTICS_H_
+#define _IME_STATISTICS_H_
+#define DEBUG_HISTOGRAM_ENABLE 0
+#define SAD_EXIT_STATS 0
+
+
+#if SAD_EXIT_STATS
+
+/**
+******************************************************************************
+* @brief  While computing sad, if we want to do a early exit, how often we
+* should check if the sad computed till now has exceeded min sad param is
+* chosen statistically.
+* ******************************************************************************
+*/
+extern UWORD32 gu4_16x16_sad_ee_stats[16+1];
+extern UWORD32 gu4_16x8_sad_ee_stats[8+1];
+
+/**
+******************************************************************************
+*  @brief print sad early exit stats
+******************************************************************************
+*/
+extern void print_sad_ee_stats(void);
+
+#define GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, i) \
+                gu4_16x16_sad_ee_stats[i]++;
+#define GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, i) \
+                gu4_16x8_sad_ee_stats[i]++;
+
+#else
+
+#define GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, i)
+#define GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, i)
+
+#endif
+
+
+#if DEBUG_HISTOGRAM_ENABLE
+#define DEBUG_HISTOGRAM_INIT() debug_histogram_init()
+#define DEBUG_HISTOGRAM_DUMP(condition) if(condition) debug_histogram_dump()
+#define DEBUG_MV_HISTOGRAM_ADD(mv_x, mv_y) debug_mv_histogram_add(mv_x, mv_y)
+#define DEBUG_SAD_HISTOGRAM_ADD(sad, level) debug_sad_histogram_add(sad, level)
+#else
+#define DEBUG_HISTOGRAM_INIT()
+#define DEBUG_HISTOGRAM_DUMP(condition)
+#define DEBUG_MV_HISTOGRAM_ADD(mv_x, mv_y)
+#define DEBUG_SAD_HISTOGRAM_ADD(sad, level)
+#endif
+
+
+
+#endif /*_IME_STATISTICS_H_*/
diff --git a/encoder/ime_structs.h b/encoder/ime_structs.h
new file mode 100755
index 0000000..7819b91
--- /dev/null
+++ b/encoder/ime_structs.h
@@ -0,0 +1,305 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_me.h
+ *
+ * @brief
+ *
+ *
+ * @author
+ *  Ittiam
+ *
+ * @par List of Functions:
+ *  -
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+#ifndef _IME_STRUCTS_H_
+#define _IME_STRUCTS_H_
+
+/**
+ * Motion vector
+ */
+typedef struct
+{
+    /**
+     * Horizontal Motion Vector
+     */
+    WORD16 i2_mvx;
+
+    /**
+     * Vertical Motion Vector
+     */
+    WORD16 i2_mvy;
+} ime_mv_t;
+
+
+/**
+**************************************************************************
+*   @brief   mb_part_ctxt
+*
+*   Structure that would hold the information for individual MB partitions
+*   gathered during the full pel ME stage
+**************************************************************************
+*/
+typedef struct
+{
+    /**
+     * best mvs
+     */
+    ime_mv_t  s_mv_curr;
+
+    /**
+     * mv predictor
+     */
+    ime_mv_t  s_mv_pred;
+
+    /**
+     * SAD associated with the MB partition
+     */
+    WORD32 i4_mb_distortion;
+
+    /**
+     * cost for the MB partition
+     */
+    WORD32 i4_mb_cost;
+
+    /**
+     * Search position for least cost among the list of candidates
+     */
+    UWORD32 u4_srch_pos_idx;
+
+    /**
+     * Search position for least cost among the list of candidates
+     */
+    UWORD32 u4_exit;
+
+    /*
+     * Buffer corresponding to best half pel cost
+     */
+    UWORD8 *pu1_best_hpel_buf;
+
+} mb_part_ctxt;
+
+
+/**
+**************************************************************************
+*   @brief   me_ctxt_t
+*
+*   Structure encapsulating the parameters used in the motion estimation
+*   context
+**************************************************************************
+*/
+typedef struct
+{
+    /**
+     * Ref pointer to current MB luma
+     */
+    UWORD8 *pu1_ref_buf_luma;
+
+    /**
+     * Src pointer to current MB luma
+     */
+    UWORD8 *pu1_src_buf_luma;
+
+    /**
+     * source stride
+     * (strides for luma and chroma are the same)
+     */
+    WORD32 i4_src_strd;
+
+    /**
+     * recon stride
+     * (strides for luma and chroma are the same)
+     */
+    WORD32 i4_rec_strd;
+
+    /**
+     * Offset for half pel x plane from the pic buf
+     */
+    UWORD32 u4_half_x_offset;
+
+    /**
+     * Offset for half pel y plane from half x plane
+     */
+    UWORD32 u4_half_y_offset;
+
+    /**
+     * Offset for half pel xy plane from half y plane
+     */
+    UWORD32 u4_half_xy_offset;
+
+    /**
+     *  Search range in the X, Y axis in terms of pixels
+     */
+    WORD32 ai2_srch_boundaries[2];
+
+    /**
+     *  Search range in the north direction in terms of pixels
+     */
+    WORD32 i4_srch_range_n;
+
+    /**
+     *  Search range in the south direction in terms of pixels
+     */
+    WORD32 i4_srch_range_s;
+
+    /**
+     *  Search range in the east direction in terms of pixels
+     */
+    WORD32 i4_srch_range_e;
+
+    /**
+     *  Search range in the west direction in terms of pixels
+     */
+    WORD32 i4_srch_range_w;
+
+    /**
+     * left mb motion vector
+     */
+    ime_mv_t s_left_mv;
+
+    /**
+     * top left mb motion vector
+     */
+    ime_mv_t s_top_left_mv;
+
+    /**
+     * Number of valid candidates for the Initial search position
+     */
+    UWORD32 u4_num_candidates;
+
+    /**
+     * Motion vector predictors derived from neighbouring
+     * blocks for each of the six block partitions
+     */
+    ime_mv_t as_mv_init_search[5];
+
+    /**
+     * mv bits
+     */
+    UWORD8 *pu1_mv_bits;
+
+    /**
+     * lambda (lagrange multiplier for cost computation)
+     */
+    UWORD32 u4_lambda_motion;
+
+    /**
+     * enabled fast sad computation
+     */
+    UWORD32 u4_enable_fast_sad;
+
+    /*
+     * Enable SKIP block prediction based on SATQD
+     */
+    UWORD32 u4_enable_stat_sad;
+
+    /*
+     * Minimum distortion to search for
+     * */
+    WORD32 i4_min_sad;
+
+    /*
+     * Signal that minimum sad has been reached in ME
+     * */
+    UWORD32 u4_min_sad_reached;
+
+    /**
+     * Flag to enable/disbale half pel motion estimation
+     */
+    UWORD32 u4_enable_hpel;
+
+    /**
+     * Diamond search Iteration Max Cnt
+     */
+    UWORD32 u4_num_layers;
+
+    /**
+     * encoder me speed
+     */
+    UWORD32 u4_me_speed_preset;
+
+    UWORD32 u4_left_is_intra;
+
+    UWORD32 u4_left_is_skip;
+
+    /**
+     * Structure to store the MB partition info
+     */
+    mb_part_ctxt s_mb_part;
+    /*
+     * Threshold to compare the sad with
+     */
+    UWORD16 *pu2_sad_thrsh;
+
+    /**
+     * fn ptrs for compute sad routines
+     */
+    ime_compute_sad_ft *pf_ime_compute_sad_16x16[2];
+    ime_compute_sad_ft *pf_ime_compute_sad_16x8;
+    ime_compute_sad4_diamond *pf_ime_compute_sad4_diamond;
+    ime_compute_sad3_diamond *pf_ime_compute_sad3_diamond;
+    ime_compute_sad2_diamond *pf_ime_compute_sad2_diamond;
+    ime_sub_pel_compute_sad_16x16_ft *pf_ime_sub_pel_compute_sad_16x16;
+
+    /*
+     * Function poitners for SATQD
+     */
+    ime_compute_sad_stat *pf_ime_compute_sad_stat_luma_16x16;
+
+    /**
+     * Qp
+     */
+    UWORD8 u1_mb_qp;
+
+    /*
+     * Buffers for holding half_x , half_y and half_xy
+     * values when halfpel generation
+     *  for the entire plane is not enabled
+     */
+    UWORD8 *pu1_half_x;
+    UWORD8 *pu1_half_y;
+    UWORD8 *pu1_half_xy;
+
+
+    /*
+     * Buffers to store the best halfpel plane*
+     */
+    UWORD8 *pu1_hpel_buf;
+
+    /*
+     * Stride for hpel buffer
+     */
+    UWORD32 u4_hpel_buf_strd;
+
+    WORD32 u4_hp_buf_strd;
+
+} me_ctxt_t;
+
+
+#endif  // _IME_STRUCTS_H_
+
diff --git a/encoder/ime_typedefs.h b/encoder/ime_typedefs.h
new file mode 100755
index 0000000..d36632d
--- /dev/null
+++ b/encoder/ime_typedefs.h
@@ -0,0 +1,50 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ihevc_typedefs.h
+*
+* @brief
+*  Type definitions used in the code
+*
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef _IME_TYPEDEFS_H_
+#define _IME_TYPEDEFS_H_
+
+
+typedef unsigned char   UWORD8;
+typedef unsigned short  UWORD16;
+typedef unsigned int    UWORD32;
+typedef unsigned long   UWORD64;
+
+typedef signed char     WORD8;
+typedef short    WORD16;
+typedef int      WORD32;
+typedef long     WORD64;
+
+typedef char            CHAR;
+
+#endif /*_IME_TYPEDEFS_H_*/
diff --git a/encoder/irc_bit_allocation.c b/encoder/irc_bit_allocation.c
new file mode 100755
index 0000000..1dfd9de
--- /dev/null
+++ b/encoder/irc_bit_allocation.c
@@ -0,0 +1,859 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/** Includes */
+#include <stdio.h>
+#include <string.h>
+#include "irc_datatypes.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_trace_support.h"
+
+/** Macros **/
+#define MIN(x,y)  ((x) < (y))? (x) : (y)
+
+/* State structure for bit allocation */
+typedef struct
+{
+    /* using var_q number as it can cross 31 bits for large intra frameinterval */
+    number_t vq_rem_bits_in_period;
+
+    /* Storing inputs */
+    WORD32 i4_tot_frms_in_gop;
+
+    WORD32 i4_num_intra_frm_interval;
+
+    WORD32 i4_bits_per_frm;
+
+} rem_bit_in_prd_t;
+
+typedef struct bit_allocation_t
+{
+    rem_bit_in_prd_t s_rbip;
+
+    /* A universal constant giving the relative complexity between pictures */
+    WORD32 i2_K[MAX_PIC_TYPE];
+
+    /* To get a estimate of the header bits consumed */
+    WORD32 i4_prev_frm_header_bits[MAX_PIC_TYPE];
+
+    WORD32 i4_bits_per_frm;
+
+    WORD32 i4_num_gops_in_period;
+
+    /* Num gops as set by rate control module */
+    WORD32 i4_actual_num_gops_in_period;
+
+    number_t vq_saved_bits;
+
+    WORD32 i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES];
+
+    WORD32 i4_min_bits_per_frm;
+
+    /* Error bits module */
+    error_bits_handle ps_error_bits;
+
+    /* Storing frame rate */
+    WORD32 i4_frame_rate;
+
+    WORD32 i4_bit_rate;
+
+    WORD32 ai4_peak_bit_rate[MAX_NUM_DRAIN_RATES];
+
+} bit_allocation_t;
+
+static WORD32 get_number_of_frms_in_a_gop(pic_handling_handle ps_pic_handling)
+{
+    WORD32 i4_tot_frms_in_gop = 0, i;
+    WORD32 ai4_frms_in_gop[MAX_PIC_TYPE];
+
+    /* Query the pic_handling struct for the rem frames in the period */
+    irc_pic_type_get_frms_in_gop(ps_pic_handling, ai4_frms_in_gop);
+
+    /* Get the total frms in the gop */
+    i4_tot_frms_in_gop = 0;
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        i4_tot_frms_in_gop += ai4_frms_in_gop[i];
+    }
+    return (i4_tot_frms_in_gop);
+}
+
+static void init_rbip(rem_bit_in_prd_t *ps_rbip,
+                      pic_handling_handle ps_pic_handling,
+                      WORD32 i4_bits_per_frm,
+                      WORD32 i4_num_intra_frm_interval)
+{
+    WORD32 i4_tot_frms_in_gop = get_number_of_frms_in_a_gop(ps_pic_handling);
+
+    /* rem_bits_in_period = bits_per_frm * tot_frms_in_gop * num_intra_frm_interval */
+    {
+        number_t vq_bits_per_frm, vq_tot_frms_in_gop, vq_num_intra_frm_interval;
+        number_t *pvq_rem_bits_in_period = &ps_rbip->vq_rem_bits_in_period;
+
+        SET_VAR_Q(vq_bits_per_frm, i4_bits_per_frm, 0);
+        SET_VAR_Q(vq_tot_frms_in_gop, i4_tot_frms_in_gop, 0);
+        SET_VAR_Q(vq_num_intra_frm_interval, i4_num_intra_frm_interval, 0);
+
+        /* rem_bits_in_period = bits_per_frm * tot_frms_in_gop */
+        mult32_var_q(vq_bits_per_frm, vq_tot_frms_in_gop,
+                     pvq_rem_bits_in_period);
+
+        /* rem_bits_in_period *= num_intra_frm_interval */
+        mult32_var_q(vq_num_intra_frm_interval, pvq_rem_bits_in_period[0],
+                     pvq_rem_bits_in_period);
+    }
+
+    /*
+     * Store the total number of frames in GOP value which is
+     * used from module A
+     */
+    ps_rbip->i4_tot_frms_in_gop = i4_tot_frms_in_gop;
+    ps_rbip->i4_num_intra_frm_interval = i4_num_intra_frm_interval;
+    ps_rbip->i4_bits_per_frm = i4_bits_per_frm;
+}
+
+static void check_update_rbip(rem_bit_in_prd_t *ps_rbip,
+                              pic_handling_handle ps_pic_handling)
+{
+    /*
+     * NOTE: Intra frame interval changes after the first I frame that is
+     * encoded in a GOP
+     */
+    WORD32 i4_new_tot_frms_in_gop = get_number_of_frms_in_a_gop(
+                    ps_pic_handling);
+
+    if(i4_new_tot_frms_in_gop != ps_rbip->i4_tot_frms_in_gop)
+    {
+        WORD32 i4_rem_frames_in_period =
+                        ps_rbip->i4_num_intra_frm_interval
+                                        * (i4_new_tot_frms_in_gop
+                                                        - ps_rbip->i4_tot_frms_in_gop);
+
+        number_t vq_rem_frms_in_period, s_bits_per_frm, vq_delta_bits_in_period;
+
+        SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frames_in_period, 0);
+        SET_VAR_Q(s_bits_per_frm, ps_rbip->i4_bits_per_frm, 0);
+
+        /* delta_bits_in_period = bits_per_frm * rem_frms_in_period */
+        mult32_var_q(s_bits_per_frm, vq_rem_frms_in_period,
+                     &vq_delta_bits_in_period);
+
+        /* rem_bits_in_period += delta_bits_in_period */
+        add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period,
+                    &ps_rbip->vq_rem_bits_in_period);
+    }
+    /* Updated the new values */
+    ps_rbip->i4_tot_frms_in_gop = i4_new_tot_frms_in_gop;
+}
+
+static void irc_ba_update_rbip(rem_bit_in_prd_t *ps_rbip,
+                               pic_handling_handle ps_pic_handling,
+                               WORD32 i4_num_of_bits)
+{
+    number_t vq_num_bits;
+
+    check_update_rbip(ps_rbip, ps_pic_handling);
+
+    /* rem_bits_in_period += num_of_bits */
+    SET_VAR_Q(vq_num_bits, i4_num_of_bits, 0);
+    add32_var_q(vq_num_bits, ps_rbip->vq_rem_bits_in_period,
+                &ps_rbip->vq_rem_bits_in_period);
+}
+
+static void irc_ba_change_rbip(rem_bit_in_prd_t *ps_rbip,
+                               pic_handling_handle ps_pic_handling,
+                               WORD32 i4_new_bits_per_frm,
+                               WORD32 i4_new_num_intra_frm_interval)
+{
+    WORD32 ai4_rem_frms_in_period[MAX_PIC_TYPE], i4_rem_frms_in_gop, i;
+    irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, ai4_rem_frms_in_period);
+
+    i4_rem_frms_in_gop = 0;
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+        i4_rem_frms_in_gop += ai4_rem_frms_in_period[i];
+
+    if(i4_new_bits_per_frm != ps_rbip->i4_bits_per_frm)
+    {
+        WORD32 i4_rem_frms_in_period = (ps_rbip->i4_num_intra_frm_interval - 1)
+                        * ps_rbip->i4_tot_frms_in_gop + i4_rem_frms_in_gop;
+
+        number_t vq_rem_frms_in_period, vq_delta_bits_per_frm,
+                        vq_delta_bits_in_period;
+
+        /* delta_bits_per_frm = new_bits_per_frm - old_bits_per_frm */
+        SET_VAR_Q(vq_delta_bits_per_frm,
+                  (i4_new_bits_per_frm - ps_rbip->i4_bits_per_frm), 0);
+
+        SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frms_in_period, 0);
+
+        /* delta_bits_in_period = delta_bits_per_frm * rem_frms_in_period */
+        mult32_var_q(vq_delta_bits_per_frm, vq_rem_frms_in_period,
+                     &vq_delta_bits_in_period);
+
+        /* ps_rbip->rem_bits_in_period += delta_bits_in_period */
+        add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period,
+                    &ps_rbip->vq_rem_bits_in_period);
+    }
+
+    if(i4_new_num_intra_frm_interval != ps_rbip->i4_num_intra_frm_interval)
+    {
+        WORD32 i4_rem_frms_in_period = ps_rbip->i4_tot_frms_in_gop
+                        * (i4_new_num_intra_frm_interval
+                                        - ps_rbip->i4_num_intra_frm_interval);
+
+        number_t vq_rem_frms_in_period, vq_new_bits_per_frm,
+                        vq_delta_bits_in_period;
+
+        /* new_bits_per_frm = new_new_bits_per_frm - old_new_bits_per_frm */
+        SET_VAR_Q(vq_new_bits_per_frm, i4_new_bits_per_frm, 0);
+
+        SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frms_in_period, 0);
+
+        /* delta_bits_in_period = new_bits_per_frm * rem_frms_in_period */
+        mult32_var_q(vq_new_bits_per_frm, vq_rem_frms_in_period,
+                     &vq_delta_bits_in_period);
+
+        /* ps_rbip->rem_bits_in_period += delta_bits_in_period */
+        add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period,
+                    &ps_rbip->vq_rem_bits_in_period);
+    }
+    /* Update the new value */
+    ps_rbip->i4_num_intra_frm_interval = i4_new_num_intra_frm_interval;
+    ps_rbip->i4_bits_per_frm = i4_new_bits_per_frm;
+}
+
+WORD32 irc_ba_num_fill_use_free_memtab(bit_allocation_t **pps_bit_allocation,
+                                       itt_memtab_t *ps_memtab,
+                                       ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static bit_allocation_t s_bit_allocation_temp;
+
+    /*
+     * Hack for all alloc, during which we don't have any state memory.
+     * Dereferencing can cause issues
+     */
+    if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_bit_allocation) = &s_bit_allocation_temp;
+
+    /*for src rate control state structure*/
+    if(e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(bit_allocation_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**)pps_bit_allocation,
+                         e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab(
+                    &pps_bit_allocation[0]->ps_error_bits,
+                    &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+    return (i4_mem_tab_idx);
+}
+
+/*******************************************************************************
+ Function Name : irc_ba_init_bit_allocation
+ Description   : Initialize the bit_allocation structure.
+ ******************************************************************************/
+void irc_ba_init_bit_allocation(bit_allocation_t *ps_bit_allocation,
+                                pic_handling_handle ps_pic_handling,
+                                WORD32 i4_num_intra_frm_interval,
+                                WORD32 i4_bit_rate,
+                                WORD32 i4_frm_rate,
+                                WORD32 *i4_peak_bit_rate,
+                                WORD32 i4_min_bitrate)
+{
+    WORD32 i;
+    WORD32 i4_bits_per_frm, i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES];
+
+    /* Calculate the bits per frame */
+    X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frm_rate, i4_bits_per_frm);
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frm_rate,
+                       i4_max_bits_per_frm[i]);
+    }
+    /* Initialize the bits_per_frame */
+    ps_bit_allocation->i4_bits_per_frm = i4_bits_per_frm;
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        ps_bit_allocation->i4_max_bits_per_frm[i] = i4_max_bits_per_frm[i];
+    }
+    X_PROD_Y_DIV_Z(i4_min_bitrate, 1000, i4_frm_rate,
+                   ps_bit_allocation->i4_min_bits_per_frm);
+
+    /*
+     * Initialize the rem_bits in period
+     * The first gop in case of an OPEN GOP may have fewer B_PICs,
+     * That condition is not taken care of
+     */
+    init_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, i4_bits_per_frm,
+              i4_num_intra_frm_interval);
+
+    /* Initialize the num_gops_in_period */
+    ps_bit_allocation->i4_num_gops_in_period = i4_num_intra_frm_interval;
+    ps_bit_allocation->i4_actual_num_gops_in_period = i4_num_intra_frm_interval;
+
+    /* Relative complexity between I and P frames */
+    ps_bit_allocation->i2_K[I_PIC] = (1 << K_Q);
+    ps_bit_allocation->i2_K[P_PIC] = I_TO_P_RATIO;
+    ps_bit_allocation->i2_K[B_PIC] = (P_TO_B_RATIO * I_TO_P_RATIO) >> K_Q;
+
+    /* Initialize the saved bits to 0*/
+    SET_VAR_Q(ps_bit_allocation->vq_saved_bits, 0, 0);
+
+    /* Update the error bits module with average bits */
+    irc_init_error_bits(ps_bit_allocation->ps_error_bits, i4_frm_rate,
+                        i4_bit_rate);
+    /* Store the input for implementing change in values */
+    ps_bit_allocation->i4_frame_rate = i4_frm_rate;
+    ps_bit_allocation->i4_bit_rate = i4_bit_rate;
+
+    memset(ps_bit_allocation->i4_prev_frm_header_bits, 0, sizeof(ps_bit_allocation->i4_prev_frm_header_bits));
+    for(i=0;i<MAX_NUM_DRAIN_RATES;i++)
+        ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i];
+}
+
+/*******************************************************************************
+ Function Name : get_cur_frm_est_bits
+ Description   : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ ******************************************************************************/
+WORD32 irc_ba_get_cur_frm_est_texture_bits(bit_allocation_t *ps_bit_allocation,
+                                           rc_rd_model_handle *pps_rd_model,
+                                           est_sad_handle ps_est_sad,
+                                           pic_handling_handle ps_pic_handling,
+                                           picture_type_e e_pic_type)
+{
+    WORD32 i, j;
+    WORD32 i4_est_texture_bits_for_frm;
+    number_t vq_rem_texture_bits;
+    number_t vq_complexity_estimate[MAX_PIC_TYPE];
+    WORD32 i4_rem_frms_in_period[MAX_PIC_TYPE], i4_frms_in_period[MAX_PIC_TYPE];
+    number_t vq_max_consumable_bits;
+    number_t vq_rem_frms_in_period[MAX_PIC_TYPE], vq_est_texture_bits_for_frm;
+    number_t vq_prev_hdr_bits[MAX_PIC_TYPE];
+
+    WORD32 complexity_est = 0;
+
+    /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+    irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, i4_rem_frms_in_period);
+    irc_pic_type_get_frms_in_gop(ps_pic_handling, i4_frms_in_period);
+
+    /* Depending on the number of gops in a period, find the num_frms_in_prd */
+    for(j = 0; j < MAX_PIC_TYPE; j++)
+    {
+        i4_rem_frms_in_period[j] += (i4_frms_in_period[j]
+                        * (ps_bit_allocation->i4_num_gops_in_period - 1));
+        i4_frms_in_period[j] *= ps_bit_allocation->i4_num_gops_in_period;
+    }
+
+    /* Remove the header bits from the remaining bits to find how many bits you
+     can transfer.*/
+    irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        SET_VAR_Q(vq_rem_frms_in_period[i], i4_rem_frms_in_period[i], 0);
+        SET_VAR_Q(vq_prev_hdr_bits[i],
+                  ps_bit_allocation->i4_prev_frm_header_bits[i], 0);
+    }
+    {
+        /*
+         *rem_texture_bits = rem_bits_in_period -
+         *(rem_frms_in_period[I_PIC] * prev_frm_header_bits[I_PIC]) -
+         *(rem_frms_in_period[P_PIC] * prev_frm_header_bits[P_PIC]) -
+         *(rem_frms_in_period[B_PIC] * prev_frm_header_bits[B_PIC]);
+         */
+        number_t vq_rem_hdr_bits;
+        vq_rem_texture_bits = ps_bit_allocation->s_rbip.vq_rem_bits_in_period;
+
+        mult32_var_q(vq_prev_hdr_bits[I_PIC], vq_rem_frms_in_period[I_PIC],
+                     &vq_rem_hdr_bits);
+        sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits);
+
+        mult32_var_q(vq_prev_hdr_bits[P_PIC], vq_rem_frms_in_period[P_PIC],
+                     &vq_rem_hdr_bits);
+        sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits);
+
+        mult32_var_q(vq_prev_hdr_bits[B_PIC], vq_rem_frms_in_period[B_PIC],
+                     &vq_rem_hdr_bits);
+        sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits);
+    }
+    {
+        /* max_consumable_bits =
+         *(frms_in_period[I_PIC] * max_bits_per_frm[0] ) +
+         *(frms_in_period[P_PIC] + frms_in_period[B_PIC] ) * max_bits_per_frm[1];
+         */
+        number_t vq_max_bits, vq_max_bits_per_frm[2];
+
+        SET_VAR_Q(vq_max_bits_per_frm[0],
+                  ps_bit_allocation->i4_max_bits_per_frm[0], 0);
+        SET_VAR_Q(vq_max_bits_per_frm[1],
+                  ps_bit_allocation->i4_max_bits_per_frm[1], 0);
+
+        mult32_var_q(vq_rem_frms_in_period[I_PIC], vq_max_bits_per_frm[0],
+                     &vq_max_bits);
+        vq_max_consumable_bits = vq_max_bits;
+
+        mult32_var_q(vq_rem_frms_in_period[P_PIC], vq_max_bits_per_frm[1],
+                     &vq_max_bits);
+        add32_var_q(vq_max_bits, vq_max_consumable_bits,
+                    &vq_max_consumable_bits);
+
+        mult32_var_q(vq_rem_frms_in_period[B_PIC], vq_max_bits_per_frm[1],
+                     &vq_max_bits);
+        add32_var_q(vq_max_bits, vq_max_consumable_bits,
+                    &vq_max_consumable_bits);
+    }
+
+    /* rem_texture_bits = MIN(rem_texture_bits, max_consumable_bits) */
+    MIN_VARQ(vq_max_consumable_bits, vq_rem_texture_bits, vq_rem_texture_bits);
+
+    /* The bits are then allocated based on the relative complexity of the
+     current frame with respect to that of the rest of the frames in period */
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        number_t vq_lin_mod_coeff, vq_est_sad, vq_K;
+
+        /* Getting the linear model coefficient */
+        vq_lin_mod_coeff = irc_get_linear_coefficient(pps_rd_model[i]);
+
+        /* Getting the estimated SAD */
+        SET_VAR_Q(vq_est_sad, irc_get_est_sad(ps_est_sad,i), 0);
+
+        /* Making K factor a var Q format */
+        SET_VAR_Q(vq_K, ps_bit_allocation->i2_K[i], K_Q);
+
+        /* Complexity_estimate = [ (lin_mod_coeff * estimated_sad) / K factor ]  */
+        mult32_var_q(vq_lin_mod_coeff, vq_est_sad, &vq_lin_mod_coeff);
+        div32_var_q(vq_lin_mod_coeff, vq_K, &vq_complexity_estimate[i]);
+    }
+
+    /*
+     * For simple cases, one of the complexities go to zero and in those cases
+     * distribute the bits evenly among frames based on I_TO_P_RATIO
+     */
+
+    /* Also check the B-pictures complexity only in case they are present*/
+    if(i4_frms_in_period[B_PIC] == 0)
+    {
+        complexity_est = (vq_complexity_estimate[I_PIC]
+                        && vq_complexity_estimate[P_PIC]);
+    }
+    else
+    {
+        complexity_est = (vq_complexity_estimate[I_PIC]
+                        && vq_complexity_estimate[P_PIC]
+                        && vq_complexity_estimate[B_PIC]);
+    }
+
+    if(complexity_est)
+    {
+        /*
+         * Estimated texture bits =
+         * (remaining bits) * (cur frm complexity)
+         * ---------------------------------------
+         * (num_i_frm*i_frm_complexity) + (num_p_frm*pfrm_complexity)
+         *  + (b_frm * b_frm_cm)
+         */
+        mult32_var_q(vq_rem_texture_bits, vq_complexity_estimate[e_pic_type],
+                     &vq_rem_texture_bits);
+
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            mult32_var_q(vq_rem_frms_in_period[i], vq_complexity_estimate[i],
+                         &vq_rem_frms_in_period[i]);
+        }
+
+        add32_var_q(vq_rem_frms_in_period[I_PIC], vq_rem_frms_in_period[P_PIC],
+                    &vq_rem_frms_in_period[I_PIC]);
+
+        add32_var_q(vq_rem_frms_in_period[I_PIC], vq_rem_frms_in_period[B_PIC],
+                    &vq_rem_frms_in_period[I_PIC]);
+
+        div32_var_q(vq_rem_texture_bits, vq_rem_frms_in_period[I_PIC],
+                    &vq_est_texture_bits_for_frm);
+
+        number_t_to_word32(vq_est_texture_bits_for_frm,
+                           &i4_est_texture_bits_for_frm);
+    }
+    else
+    {
+        number_t vq_i_to_p_bit_ratio, vq_rem_frms;
+
+        SET_VAR_Q(vq_i_to_p_bit_ratio, I_TO_P_BIT_RATIO, 0);
+
+        /* rem_frms = ((I_TO_P_BIT_RATIO * rem_frms_in_period[I_PIC]) +
+         * rem_frms_in_period[P_PIC]  +  rem_frms_in_period[B_PIC]);
+         */
+        mult32_var_q(vq_rem_frms_in_period[I_PIC], vq_i_to_p_bit_ratio,
+                     &vq_rem_frms);
+        add32_var_q(vq_rem_frms_in_period[P_PIC], vq_rem_frms, &vq_rem_frms);
+        add32_var_q(vq_rem_frms_in_period[B_PIC], vq_rem_frms, &vq_rem_frms);
+
+        /* est_texture_bits_for_frm = rem_texture_bits / rem_frms */
+        div32_var_q(vq_rem_texture_bits, vq_rem_frms,
+                    &vq_est_texture_bits_for_frm);
+        number_t_to_word32(vq_est_texture_bits_for_frm,
+                           &i4_est_texture_bits_for_frm);
+
+        i4_est_texture_bits_for_frm =
+                        (I_PIC == e_pic_type) ?
+                                        (i4_est_texture_bits_for_frm
+                                                        * I_TO_P_BIT_RATIO) :
+                                        i4_est_texture_bits_for_frm;
+    }
+
+    /*
+     * If the remaining bits in the period becomes negative then the estimated
+     * texture bits would also become negative. This would send a feedback to
+     * the model which may go for a toss. Thus sending the minimum possible
+     * value = 0
+     */
+    if(i4_est_texture_bits_for_frm < 0)
+    {
+        i4_est_texture_bits_for_frm = 0;
+    }
+
+    return (i4_est_texture_bits_for_frm);
+}
+
+/******************************************************************************
+ Function Name : irc_ba_get_cur_frm_est_header_bits
+ Description   : Based on remaining bits in period and rd_model
+                 the number of bits required for the current frame is estimated.
+ ******************************************************************************/
+WORD32 irc_ba_get_cur_frm_est_header_bits(bit_allocation_t *ps_bit_allocation,
+                                          picture_type_e e_pic_type)
+{
+    return (ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type]);
+}
+
+WORD32 irc_ba_get_rem_bits_in_period(bit_allocation_t *ps_bit_allocation,
+                                     pic_handling_handle ps_pic_handling)
+{
+    WORD32 i4_rem_bits_in_gop = 0;
+    irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+    number_t_to_word32(ps_bit_allocation->s_rbip.vq_rem_bits_in_period,
+                       &i4_rem_bits_in_gop);
+    return (i4_rem_bits_in_gop);
+}
+
+/*******************************************************************************
+ Function Name : irc_ba_update_cur_frm_consumed_bits
+ Description   : Based on remaining bits in period and rd_model
+                 the number of bits required for the current frame is estimated.
+ ******************************************************************************/
+void irc_ba_update_cur_frm_consumed_bits(bit_allocation_t *ps_bit_allocation,
+                                         pic_handling_handle ps_pic_handling,
+                                         WORD32 i4_total_frame_bits,
+                                         WORD32 i4_model_updation_hdr_bits,
+                                         picture_type_e e_pic_type,
+                                         UWORD8 u1_is_scd,
+                                         WORD32 i4_last_frm_in_gop)
+{
+    WORD32 i4_error_bits = irc_get_error_bits(ps_bit_allocation->ps_error_bits);
+
+    /* Update the remaining bits in period */
+    irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+                       (-i4_total_frame_bits + i4_error_bits));
+
+    /*
+     * Update the header bits so that it can be used as an estimate to the next
+     * frame
+     */
+    if(u1_is_scd)
+    {
+        /*
+         * In case of SCD, even though the frame type is P, it is equivalent to
+         * a I frame and so the corresponding header bits is updated
+         */
+        ps_bit_allocation->i4_prev_frm_header_bits[I_PIC] =
+                        i4_model_updation_hdr_bits;
+
+#define MAX_NUM_GOPS_IN_PERIOD (3)
+        if(ps_bit_allocation->i4_num_gops_in_period < MAX_NUM_GOPS_IN_PERIOD)
+        {
+            /*
+             * Whenever there is a scene change increase the number of gops by
+             * 2 so that the number of bits allocated is not very constrained
+             */
+            ps_bit_allocation->i4_num_gops_in_period += 2;
+            /* Add the extra bits in GOP to remaining bits in period */
+            irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+                               ps_bit_allocation->i4_bits_per_frm,
+                               ps_bit_allocation->i4_num_gops_in_period);
+        }
+    }
+    else
+    {
+        ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type] =
+                        i4_model_updation_hdr_bits;
+    }
+
+    if(i4_last_frm_in_gop)
+    {
+        WORD32 i4_num_bits_in_a_gop = get_number_of_frms_in_a_gop(
+                        ps_pic_handling) * ps_bit_allocation->i4_bits_per_frm;
+        /*
+         * If the number of gops in period has been increased due to scene
+         * change, slowly bring in down across the gops
+         */
+        if(ps_bit_allocation->i4_num_gops_in_period
+                        > ps_bit_allocation->i4_actual_num_gops_in_period)
+        {
+            ps_bit_allocation->i4_num_gops_in_period--;
+            irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+                               ps_bit_allocation->i4_bits_per_frm,
+                               ps_bit_allocation->i4_num_gops_in_period);
+        }
+        /*
+         * If rem_bits_in_period < 0 decrease the number of bits allocated for
+         * the next period else increase it
+         */
+        irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+                           i4_num_bits_in_a_gop);
+    }
+    /* Update the lower modules */
+    irc_update_error_bits(ps_bit_allocation->ps_error_bits);
+}
+
+void irc_ba_change_remaining_bits_in_period(bit_allocation_t *ps_bit_allocation,
+                                            pic_handling_handle ps_pic_handling,
+                                            WORD32 i4_bit_rate,
+                                            WORD32 i4_frame_rate,
+                                            WORD32 *i4_peak_bit_rate)
+{
+    WORD32 i4_new_avg_bits_per_frm;
+    WORD32 i4_new_peak_bits_per_frm[MAX_NUM_DRAIN_RATES];
+    WORD32 i4_rem_frms_in_period[MAX_PIC_TYPE];
+    int i;
+
+    /* Calculate the new per frame bits */
+    X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frame_rate, i4_new_avg_bits_per_frm);
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frame_rate,
+                       i4_new_peak_bits_per_frm[i]);
+    }
+
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        ps_bit_allocation->i4_max_bits_per_frm[i] = i4_new_peak_bits_per_frm[i];
+    }
+
+    /*
+     * Get the rem_frms_in_prd & the frms_in_prd from the pic_type state
+     * struct
+     */
+    irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, i4_rem_frms_in_period);
+
+    /*
+     * If the difference > 0(/ <0), the remaining bits in period needs to be
+     * increased(/decreased) based on the remaining number of frames
+     */
+    irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+                       i4_new_avg_bits_per_frm,
+                       ps_bit_allocation->i4_num_gops_in_period);
+
+    /* Update the new average bits per frame */
+    ps_bit_allocation->i4_bits_per_frm = i4_new_avg_bits_per_frm;
+    /* change the lower modules state */
+    irc_change_bitrate_in_error_bits(ps_bit_allocation->ps_error_bits,
+                                     i4_bit_rate);
+    irc_change_frm_rate_in_error_bits(ps_bit_allocation->ps_error_bits,
+                                      i4_frame_rate);
+
+    /* Store the modified frame_rate */
+    ps_bit_allocation->i4_frame_rate = i4_frame_rate;
+    ps_bit_allocation->i4_bit_rate = i4_bit_rate;
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+        ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i];
+}
+
+void irc_ba_change_ba_peak_bit_rate(bit_allocation_t *ps_bit_allocation,
+                                    WORD32 *ai4_peak_bit_rate)
+{
+    WORD32 i;
+
+    /* Calculate the bits per frame */
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        X_PROD_Y_DIV_Z(ai4_peak_bit_rate[i], 1000,
+                       ps_bit_allocation->i4_frame_rate,
+                       ps_bit_allocation->i4_max_bits_per_frm[i]);
+        ps_bit_allocation->ai4_peak_bit_rate[i] = ai4_peak_bit_rate[i];
+    }
+}
+
+/******************************************************************************
+ * @brief Modifies the remaining bit in period for the gop which has fif.
+ *      since fif would cause a new gop to be created, we need to add the number
+ *      of encoded frames in the fif GOP worth of bits to remaining bits in
+ *      period
+ ******************************************************************************/
+void irc_ba_change_rem_bits_in_prd_at_force_I_frame(bit_allocation_t *ps_bit_allocation,
+                                                    pic_handling_handle ps_pic_handling)
+{
+    WORD32 i4_frms_in_period;
+    i4_frms_in_period = irc_pic_type_get_frms_in_gop_force_I_frm(
+                    ps_pic_handling);
+    irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+                       ps_bit_allocation->i4_bits_per_frm * i4_frms_in_period);
+}
+
+void irc_ba_check_and_update_bit_allocation(bit_allocation_t *ps_bit_allocation,
+                                            pic_handling_handle ps_pic_handling,
+                                            WORD32 i4_cur_buf_size,
+                                            WORD32 i4_max_buf_size,
+                                            WORD32 i4_max_bits_inflow_per_frm,
+                                            WORD32 i4_tot_frame_bits)
+{
+
+    number_t vq_max_drain_bits, vq_extra_bits, vq_less_bits,
+                    vq_allocated_saved_bits, vq_min_bits_for_period;
+    WORD32 i4_num_frms_in_period = get_number_of_frms_in_a_gop(ps_pic_handling);
+    number_t vq_rem_bits_in_period, vq_num_frms_in_period, vq_zero;
+    WORD32 b_rem_bits_gt_max_drain, b_rem_bits_lt_min_bits,
+                    b_saved_bits_gt_zero;
+    rem_bit_in_prd_t *ps_rbip = &ps_bit_allocation->s_rbip;
+
+    UNUSED(i4_cur_buf_size);
+    UNUSED(i4_max_buf_size);
+    UNUSED(i4_tot_frame_bits);
+
+    /*
+     * If the remaining bits is greater than what can be drained in that period
+     * Clip the remaining bits in period to the maximum it can drain in that
+     * period with the error of current buffer size.Accumulate the saved bits
+     * if any. else if the remaining bits is lesser than the minimum bit rate
+     * promised in that period Add the excess bits to remaining bits in period
+     * and reduce it from the saved bits Else Provide the extra bits from the
+     * "saved bits pool".
+     */
+    /*
+     * max_drain_bits = num_gops_in_period * num_frms_in_period *
+     * * max_bits_inflow_per_frm
+     */
+    SET_VAR_Q(vq_num_frms_in_period,
+              (ps_bit_allocation->i4_num_gops_in_period * i4_num_frms_in_period),
+              0);
+    SET_VAR_Q(vq_max_drain_bits, i4_max_bits_inflow_per_frm, 0);
+    SET_VAR_Q(vq_zero, 0, 0);
+    mult32_var_q(vq_max_drain_bits, vq_num_frms_in_period, &vq_max_drain_bits);
+
+    /*
+     * min_bits_for_period = num_gops_in_period * num_frms_in_period *
+     * min_bits_per_frm
+     */
+    SET_VAR_Q(vq_min_bits_for_period, ps_bit_allocation->i4_min_bits_per_frm,
+              0);
+    mult32_var_q(vq_min_bits_for_period, vq_num_frms_in_period,
+                 &vq_min_bits_for_period);
+
+    vq_rem_bits_in_period = ps_rbip->vq_rem_bits_in_period;
+
+    /* Evaluate rem_bits_in_period  > max_drain_bits      */
+    VQ_A_GT_VQ_B(ps_rbip->vq_rem_bits_in_period, vq_max_drain_bits,
+                 b_rem_bits_gt_max_drain);
+
+    /* Evaluate rem_bits_in_period  < min_bits_for_period */
+    VQ_A_LT_VQ_B(ps_rbip->vq_rem_bits_in_period, vq_min_bits_for_period,
+                 b_rem_bits_lt_min_bits);
+
+    /* Evaluate saved_bits  > 0 */
+    VQ_A_LT_VQ_B(ps_bit_allocation->vq_saved_bits, vq_zero,
+                 b_saved_bits_gt_zero);
+
+    /* (i4_rem_bits_in_period > i4_max_drain_bits) */
+    if(b_rem_bits_gt_max_drain)
+    {
+        /* extra_bits = rem_bits_in_period - max_drain_bits */
+        sub32_var_q(ps_rbip->vq_rem_bits_in_period, vq_max_drain_bits,
+                    &vq_extra_bits);
+
+        /* saved_bits += extra_bits */
+        add32_var_q(ps_bit_allocation->vq_saved_bits, vq_extra_bits,
+                    &ps_bit_allocation->vq_saved_bits);
+
+        /* rem_bits_in_period = vq_max_drain_bits */
+        ps_rbip->vq_rem_bits_in_period = vq_max_drain_bits;
+    }
+    else if(b_rem_bits_lt_min_bits)
+    {
+        /* extra_bits(-ve) =  rem_bits_in_period - i4_min_bits_for_period */
+        sub32_var_q(ps_rbip->vq_rem_bits_in_period, vq_min_bits_for_period,
+                    &vq_extra_bits);
+
+        /* saved_bits += extra_bits(-ve) */
+        add32_var_q(ps_bit_allocation->vq_saved_bits, vq_extra_bits,
+                    &ps_bit_allocation->vq_saved_bits);
+
+        /* rem_bits_in_period = min_bits_for_period */
+        ps_rbip->vq_rem_bits_in_period = vq_min_bits_for_period;
+    }
+    else if(b_saved_bits_gt_zero)
+    {
+        /* less_bits = max_drain_bits - _rem_bits_in_period */
+        sub32_var_q(vq_max_drain_bits, vq_rem_bits_in_period, &vq_less_bits);
+
+        /* allocated_saved_bits = MIN (less_bits, saved_bits) */
+        MIN_VARQ(ps_bit_allocation->vq_saved_bits, vq_less_bits,
+                 vq_allocated_saved_bits);
+
+        /* rem_bits_in_period += allocted_save_bits */
+        add32_var_q(ps_rbip->vq_rem_bits_in_period, vq_allocated_saved_bits,
+                    &ps_rbip->vq_rem_bits_in_period);
+
+        /* saved_bits -= allocted_save_bits */
+        sub32_var_q(ps_bit_allocation->vq_saved_bits, vq_allocated_saved_bits,
+                    &ps_bit_allocation->vq_saved_bits);
+    }
+    return;
+}
+
+WORD32 irc_ba_get_frame_rate(bit_allocation_t *ps_bit_allocation)
+{
+    return (ps_bit_allocation->i4_frame_rate);
+}
+
+WORD32 irc_ba_get_bit_rate(bit_allocation_t *ps_bit_allocation)
+{
+    return (ps_bit_allocation->i4_bit_rate);
+}
+
+void irc_ba_get_peak_bit_rate(bit_allocation_t *ps_bit_allocation,
+                              WORD32 *pi4_peak_bit_rate)
+{
+    WORD32 i;
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        pi4_peak_bit_rate[i] = ps_bit_allocation->ai4_peak_bit_rate[i];
+    }
+}
diff --git a/encoder/irc_bit_allocation.h b/encoder/irc_bit_allocation.h
new file mode 100755
index 0000000..19ba0df
--- /dev/null
+++ b/encoder/irc_bit_allocation.h
@@ -0,0 +1,99 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _BIT_ALLOCATION_H_
+#define _BIT_ALLOCATION_H_
+
+typedef struct bit_allocation_t *bit_allocation_handle;
+
+WORD32 irc_ba_num_fill_use_free_memtab(bit_allocation_handle *pps_bit_allocation,
+                                       itt_memtab_t *ps_memtab,
+                                       ITT_FUNC_TYPE_E e_func_type);
+
+void irc_ba_init_bit_allocation(bit_allocation_handle ps_bit_allocation,
+                                pic_handling_handle ps_pic_handling,
+                                WORD32 i4_num_intra_frm_interval,
+                                WORD32 i4_bit_rate,
+                                WORD32 i4_frm_rate,
+                                WORD32 *u4_peak_bit_rate,
+                                WORD32 i4_min_bitrate);
+
+/* Estimates the number of texture bits required by the current frame */
+WORD32 irc_ba_get_cur_frm_est_texture_bits(bit_allocation_handle ps_bit_allocation,
+                                           rc_rd_model_handle *pps_rd_model,
+                                           est_sad_handle ps_est_sad,
+                                           pic_handling_handle ps_pic_handling,
+                                           picture_type_e e_pic_type);
+
+/* Estimate the number of header bits required by the current frame */
+WORD32 irc_ba_get_cur_frm_est_header_bits(bit_allocation_handle ps_bit_allocation,
+                                          picture_type_e e_pic_type);
+
+/* Get the remaining bits allocated in the period */
+WORD32 irc_ba_get_rem_bits_in_period(bit_allocation_handle ps_bit_allocation,
+                                     pic_handling_handle ps_pic_handling);
+
+WORD32 irc_ba_get_frame_rate(bit_allocation_handle ps_bit_allocation);
+
+WORD32 irc_ba_get_bit_rate(bit_allocation_handle ps_bit_allocation);
+void irc_ba_get_peak_bit_rate(bit_allocation_handle ps_bit_allocation,
+                              WORD32 *pi4_peak_bit_rate);
+
+/* Updates the bit allocation module with the actual encoded values */
+void irc_ba_update_cur_frm_consumed_bits(bit_allocation_handle ps_bit_allocation,
+                                         pic_handling_handle ps_pic_handling,
+                                         WORD32 i4_total_frame_bits,
+                                         WORD32 i4_model_updation_hdr_bits,
+                                         picture_type_e e_pic_type,
+                                         UWORD8 u1_is_scd,
+                                         WORD32 i4_last_frm_in_gop);
+
+void irc_ba_check_and_update_bit_allocation(bit_allocation_handle ps_bit_allocation,
+                                            pic_handling_handle ps_pic_handling,
+                                            WORD32 i4_cur_buf_size,
+                                            WORD32 i4_max_buf_size,
+                                            WORD32 i4_max_bits_inflow_per_frm,
+                                            WORD32 i4_tot_frame_bits);
+
+/* Based on the change in frame/bit rate update the remaining bits in period */
+void irc_ba_change_remaining_bits_in_period(bit_allocation_handle ps_bit_allocation,
+                                            pic_handling_handle ps_pic_handling,
+                                            WORD32 i4_bit_rate,
+                                            WORD32 i4_frame_rate,
+                                            WORD32 *i4_peak_bit_rate);
+
+/* Change the gop size in the middle of a current gop */
+void change_gop_size(bit_allocation_handle ps_bit_allocation,
+                     WORD32 i4_intra_frm_interval,
+                     WORD32 i4_inter_frm_interval,
+                     WORD32 i4_num_intra_frm_interval);
+
+void update_rem_frms_in_period(bit_allocation_handle ps_bit_allocation,
+                               picture_type_e e_pic_type,
+                               UWORD8 u1_is_first_frm,
+                               WORD32 i4_intra_frm_interval,
+                               WORD32 i4_num_intra_frm_interval);
+
+void irc_ba_change_rem_bits_in_prd_at_force_I_frame(bit_allocation_handle ps_bit_allocation,
+                                                    pic_handling_handle ps_pic_handling);
+
+void irc_ba_change_ba_peak_bit_rate(bit_allocation_handle ps_bit_allocation,
+                                    WORD32 *ai4_peak_bit_rate);
+#endif
diff --git a/encoder/irc_cbr_buffer_control.c b/encoder/irc_cbr_buffer_control.c
new file mode 100755
index 0000000..c179a28
--- /dev/null
+++ b/encoder/irc_cbr_buffer_control.c
@@ -0,0 +1,653 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_common.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_trace_support.h"
+
+typedef struct cbr_buffer_t
+{
+    /* Buffer size = Delay * Bitrate*/
+    WORD32 i4_buffer_size;
+
+    /* Constant drain rate */
+    WORD32 i4_drain_bits_per_frame[MAX_NUM_DRAIN_RATES];
+
+    /* Encoder Buffer Fullness */
+    WORD32 i4_ebf;
+
+    /* Upper threshold of the Buffer */
+    WORD32 i4_upr_thr[MAX_PIC_TYPE];
+
+    /* Lower threshold of the Buffer */
+    WORD32 i4_low_thr[MAX_PIC_TYPE];
+
+    /* Stuffing threshold equal to error bits per second in the drain bits
+     * fixed point computation */
+    WORD32 i4_stuffing_threshold;
+
+    /* For error due to bits per frame calculation */
+    error_bits_handle aps_bpf_error_bits[MAX_NUM_DRAIN_RATES];
+
+    /* Whether the buffer model is used for CBR or VBR streaming */
+    WORD32 i4_is_cbr_mode;
+
+    /* Input parameters stored for initialization */
+    WORD32 ai4_bit_rate[MAX_NUM_DRAIN_RATES];
+
+    WORD32 i4_max_delay;
+
+    WORD32 ai4_num_pics_in_delay_period[MAX_PIC_TYPE];
+
+    WORD32 i4_tgt_frm_rate;
+
+    UWORD32 u4_max_vbv_buf_size;
+
+} cbr_buffer_t;
+
+WORD32 irc_cbr_buffer_num_fill_use_free_memtab(cbr_buffer_t **pps_cbr_buffer,
+                                               itt_memtab_t *ps_memtab,
+                                               ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0, i;
+    static cbr_buffer_t s_cbr_buffer_temp;
+
+    /*
+     * Hack for all alloc, during which we don't have any state memory.
+     * Dereferencing can cause issues
+     */
+    if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_cbr_buffer) = &s_cbr_buffer_temp;
+
+    if(e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(cbr_buffer_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**)pps_cbr_buffer, e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab(
+                        &pps_cbr_buffer[0]->aps_bpf_error_bits[i],
+                        &ps_memtab[i4_mem_tab_idx], e_func_type);
+    }
+    return (i4_mem_tab_idx);
+}
+
+/******************************************************************************
+ * @brief Initialize the CBR VBV buffer state.
+ * This could however be used for VBR streaming VBV also
+ *
+ ******************************************************************************/
+void irc_init_cbr_buffer(cbr_buffer_t *ps_cbr_buffer,
+                         WORD32 i4_buffer_delay,
+                         WORD32 i4_tgt_frm_rate,
+                         WORD32 *i4_bit_rate,
+                         UWORD32 *u4_num_pics_in_delay_prd,
+                         UWORD32 u4_vbv_buf_size)
+{
+    WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+    int i;
+
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        X_PROD_Y_DIV_Z(i4_bit_rate[i], 1000, i4_tgt_frm_rate,
+                       i4_bits_per_frm[i]);
+        /* Drain rate = bitrate/(framerate/1000) */
+        ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+        /* Initialize the bits per frame error bits calculation */
+        irc_init_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i],
+                            i4_tgt_frm_rate, i4_bit_rate[i]);
+    }
+
+    /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+    /* This would mean CBR mode */
+    if(i4_bit_rate[0] == i4_bit_rate[1])
+    {
+        X_PROD_Y_DIV_Z(i4_bit_rate[0], i4_buffer_delay, 1000,
+                       ps_cbr_buffer->i4_buffer_size);
+        ps_cbr_buffer->i4_is_cbr_mode = 1;
+    }
+    else
+    {
+        /* VBR streaming case which has different drain rates for I and P */
+        ps_cbr_buffer->i4_buffer_size = u4_num_pics_in_delay_prd[0]
+                                        * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+                                        + u4_num_pics_in_delay_prd[1]
+                                        * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+        ps_cbr_buffer->i4_is_cbr_mode = 0;
+    }
+
+    if(ps_cbr_buffer->i4_buffer_size > (WORD32)u4_vbv_buf_size)
+    {
+        ps_cbr_buffer->i4_buffer_size = u4_vbv_buf_size;
+    }
+
+    /* Initially Encoder buffer fullness is zero */
+    ps_cbr_buffer->i4_ebf = 0;
+
+    /* tgt_frame_rate is divided by 1000 because, an approximate value is fine
+     * as this is just a threshold below which stuffing is done to avoid buffer
+     * underflow due to fixed point error in drain rate
+     */
+    ps_cbr_buffer->i4_stuffing_threshold = (i4_bit_rate[0]
+                    - (i4_bits_per_frm[0] * (i4_tgt_frm_rate / 1000)));
+
+    for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+    {
+        /*
+         * Upper threshold for
+         * I frame = 1 * bits per frame
+         * P Frame = 4 * bits per frame.
+         * The threshold for I frame is only 1 * bits per frame as the threshold
+         * should only account for error in estimated bits.
+         * In P frame it should account for difference bets bits consumed by
+         * I(Scene change) and P frame I to P complexity is assumed to be 5.
+         */
+        WORD32 i4_index;
+        i4_index = i4_i > 0 ? 1 : 0;
+        ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+                        - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+        /*
+         * For both I and P frame Lower threshold is equal to drain rate.Even if
+         * the encoder consumes zero bits it should have enough bits to drain
+         */
+        ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+    }
+
+    /* Storing the input parameters for using it for change functions */
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        ps_cbr_buffer->ai4_bit_rate[i] = i4_bit_rate[i];
+    }
+
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_cbr_buffer->ai4_num_pics_in_delay_period[i] =
+                        u4_num_pics_in_delay_prd[i];
+    }
+    ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate;
+    ps_cbr_buffer->i4_max_delay = i4_buffer_delay;
+    ps_cbr_buffer->u4_max_vbv_buf_size = u4_vbv_buf_size;
+}
+
+/******************************************************************************
+ * @brief Condition check for constraining the number of bits allocated based on
+ * bufer size
+ ******************************************************************************/
+WORD32 irc_cbr_buffer_constraint_check(cbr_buffer_t *ps_cbr_buffer,
+                                       WORD32 i4_tgt_bits,
+                                       picture_type_e e_pic_type)
+{
+    WORD32 i4_max_tgt_bits, i4_min_tgt_bits;
+    WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+                                     ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+                                     ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+    /* Max tgt bits = Upper threshold - current encoder buffer fullness */
+    i4_max_tgt_bits = ps_cbr_buffer->i4_upr_thr[e_pic_type]
+                    - ps_cbr_buffer->i4_ebf;
+    /* Max tgt bits cannot be negative */
+    if(i4_max_tgt_bits < 0)
+        i4_max_tgt_bits = 0;
+
+    /*
+     * Min tgt bits , least number of bits in the Encoder after
+     * draining such that it is greater than lower threshold
+     */
+    i4_min_tgt_bits = ps_cbr_buffer->i4_low_thr[e_pic_type]
+                    - (ps_cbr_buffer->i4_ebf - i4_drain_bits_per_frame);
+    /* Min tgt bits cannot be negative */
+    if(i4_min_tgt_bits < 0)
+        i4_min_tgt_bits = 0;
+
+    /* Current tgt bits should be between max and min tgt bits */
+    CLIP(i4_tgt_bits, i4_max_tgt_bits, i4_min_tgt_bits);
+    return i4_tgt_bits;
+}
+
+/* *****************************************************************************
+ * @brief constaints the bit allocation based on buffer size
+ *
+ ******************************************************************************/
+WORD32 irc_vbr_stream_buffer_constraint_check(cbr_buffer_t *ps_cbr_buffer,
+                                              WORD32 i4_tgt_bits,
+                                              picture_type_e e_pic_type)
+{
+    WORD32 i4_max_tgt_bits;
+
+    /* Max tgt bits = Upper threshold - current encoder buffer fullness */
+    i4_max_tgt_bits = ps_cbr_buffer->i4_upr_thr[e_pic_type]
+                    - ps_cbr_buffer->i4_ebf;
+
+    /* Max tgt bits cannot be negative */
+    if(i4_max_tgt_bits < 0)
+        i4_max_tgt_bits = 0;
+
+    if(i4_tgt_bits > i4_max_tgt_bits)
+        i4_tgt_bits = i4_max_tgt_bits;
+
+    return i4_tgt_bits;
+}
+
+/* *****************************************************************************
+ * @brief Verifies the buffer state and returns whether it is overflowing,
+ * underflowing or normal
+ *
+ ******************************************************************************/
+vbv_buf_status_e irc_get_cbr_buffer_status(cbr_buffer_t *ps_cbr_buffer,
+                                           WORD32 i4_tot_consumed_bits,
+                                           WORD32 *pi4_num_bits_to_prevent_overflow,
+                                           picture_type_e e_pic_type)
+{
+    vbv_buf_status_e e_buf_status;
+    WORD32 i4_cur_enc_buf;
+    WORD32 i4_error_bits = (e_pic_type == I_PIC) ?
+                            irc_get_error_bits(ps_cbr_buffer
+                                               ->aps_bpf_error_bits[0]) :
+                            irc_get_error_bits(ps_cbr_buffer
+                                               ->aps_bpf_error_bits[1]);
+
+    WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+                                     ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+                                     ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+    /* Add the tot consumed bits to the Encoder Buffer*/
+    i4_cur_enc_buf = ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits;
+
+    /* If the Encoder exceeds the Buffer Size signal an Overflow*/
+    if(i4_cur_enc_buf > ps_cbr_buffer->i4_buffer_size)
+    {
+        e_buf_status = VBV_OVERFLOW;
+        i4_cur_enc_buf = ps_cbr_buffer->i4_buffer_size;
+    }
+    else
+    {
+        /*
+         * Subtract the constant drain bits and error bits due to fixed point
+         * implementation
+         */
+        i4_cur_enc_buf -= (i4_drain_bits_per_frame + i4_error_bits);
+
+        /*
+         * If the buffer is less than stuffing threshold an Underflow is
+         * signaled else its NORMAL
+         */
+        if(i4_cur_enc_buf < ps_cbr_buffer->i4_stuffing_threshold)
+        {
+            e_buf_status = VBV_UNDERFLOW;
+        }
+        else
+        {
+            e_buf_status = VBV_NORMAL;
+        }
+
+        if(i4_cur_enc_buf < 0)
+            i4_cur_enc_buf = 0;
+    }
+
+    /*
+     * The RC lib models the encoder buffer, but the VBV buffer characterizes
+     * the decoder buffer
+     */
+    if(e_buf_status == VBV_OVERFLOW)
+    {
+        e_buf_status = VBV_UNDERFLOW;
+    }
+    else if(e_buf_status == VBV_UNDERFLOW)
+    {
+        e_buf_status = VBV_OVERFLOW;
+    }
+
+    pi4_num_bits_to_prevent_overflow[0] = (ps_cbr_buffer->i4_buffer_size
+                    - i4_cur_enc_buf);
+
+    return e_buf_status;
+}
+
+/*******************************************************************************
+ * @brief Based on the bits consumed the buffer model is updated
+ ******************************************************************************/
+void irc_update_cbr_buffer(cbr_buffer_t *ps_cbr_buffer,
+                           WORD32 i4_tot_consumed_bits,
+                           picture_type_e e_pic_type)
+{
+    WORD32 i4_error_bits = (e_pic_type == I_PIC) ?
+                           irc_get_error_bits(ps_cbr_buffer->
+                                             aps_bpf_error_bits[0]) :
+                           irc_get_error_bits( ps_cbr_buffer->
+                                              aps_bpf_error_bits[1]);
+
+    WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+                                     ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+                                     ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+    /* Update the Encoder buffer with the total consumed bits*/
+    ps_cbr_buffer->i4_ebf += i4_tot_consumed_bits;
+
+    /*
+     * Subtract the drain bits and error bits due to fixed point
+     * implementation
+     */
+    ps_cbr_buffer->i4_ebf -= (i4_drain_bits_per_frame + i4_error_bits);
+
+    if(ps_cbr_buffer->i4_ebf < 0)
+        ps_cbr_buffer->i4_ebf = 0;
+
+    /*SS - Fix for lack of stuffing*/
+    if(ps_cbr_buffer->i4_ebf > ps_cbr_buffer->i4_buffer_size)
+    {
+        trace_printf(
+             (const WORD8*)"Error: Should not be coming here with stuffing\n");
+        ps_cbr_buffer->i4_ebf = ps_cbr_buffer->i4_buffer_size;
+    }
+}
+
+/*******************************************************************************
+ * @brief If the buffer underflows then return the number of bits to prevent
+ * underflow
+ *
+ ******************************************************************************/
+WORD32 irc_get_cbr_bits_to_stuff(cbr_buffer_t *ps_cbr_buffer,
+                                 WORD32 i4_tot_consumed_bits,
+                                 picture_type_e e_pic_type)
+{
+    WORD32 i4_bits_to_stuff;
+    WORD32 i4_error_bits = (e_pic_type == I_PIC) ?
+                            irc_get_error_bits(ps_cbr_buffer
+                                               ->aps_bpf_error_bits[0]) :
+                            irc_get_error_bits(ps_cbr_buffer
+                                               ->aps_bpf_error_bits[1]);
+
+    WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+                                     ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+                                     ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+    /*
+     * Stuffing bits got from the following equation
+     * Stuffing_threshold = ebf + tcb - drain bits - error bits + stuff_bits
+     */
+    i4_bits_to_stuff = i4_drain_bits_per_frame + i4_error_bits
+                    + ps_cbr_buffer->i4_stuffing_threshold
+                    - (ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits);
+
+    return i4_bits_to_stuff;
+}
+
+/*******************************************************************************
+ * @brief Update the state for change in number of pics in the delay period
+ *
+ ******************************************************************************/
+void irc_change_cbr_vbv_num_pics_in_delay_period(cbr_buffer_t *ps_cbr_buffer,
+                                                 UWORD32 *u4_num_pics_in_delay_prd)
+{
+    WORD32 i;
+
+    if(!ps_cbr_buffer->i4_is_cbr_mode)
+    {
+        ps_cbr_buffer->i4_buffer_size =
+                        u4_num_pics_in_delay_prd[0]
+                        * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+                        + u4_num_pics_in_delay_prd[1]
+                        * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+        if(ps_cbr_buffer->i4_buffer_size
+                        > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+        {
+            ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+        }
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            ps_cbr_buffer->i4_upr_thr[i] = ps_cbr_buffer->i4_buffer_size
+                            - (ps_cbr_buffer->i4_buffer_size >> 3);
+        }
+
+        /* Re-initialize the number of pics in delay period */
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            ps_cbr_buffer->ai4_num_pics_in_delay_period[i] =
+                            u4_num_pics_in_delay_prd[i];
+        }
+    }
+}
+
+/******************************************************************************
+ * @brief update the state for change in target frame rate
+ *
+ ******************************************************************************/
+void irc_change_cbr_vbv_tgt_frame_rate(cbr_buffer_t *ps_cbr_buffer,
+                                       WORD32 i4_tgt_frm_rate)
+{
+    WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+    int i;
+
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        X_PROD_Y_DIV_Z(ps_cbr_buffer->ai4_bit_rate[i], 1000, i4_tgt_frm_rate,
+                       i4_bits_per_frm[i]);
+        /* Drain rate = bitrate/(framerate/1000) */
+        ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+        /* Initialize the bits per frame error bits calculation */
+        irc_change_frm_rate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i],
+                                          i4_tgt_frm_rate);
+    }
+
+    /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+    if(!ps_cbr_buffer->i4_is_cbr_mode)
+    {
+        /* VBR streaming case which has different drain rates for I and P */
+        ps_cbr_buffer->i4_buffer_size =
+                        ps_cbr_buffer->ai4_num_pics_in_delay_period[0]
+                      * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+                      + ps_cbr_buffer->ai4_num_pics_in_delay_period[1]
+                      * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+    }
+
+    if(ps_cbr_buffer->i4_buffer_size
+                    > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+    {
+        ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+    }
+
+    /*
+     * Tgt_frame_rate is divided by 1000 because an approximate value is fine as
+     * this is just a threshold below which stuffing is done to avoid buffer
+     * underflow due to fixed point error in drain rate
+     */
+    ps_cbr_buffer->i4_stuffing_threshold = (ps_cbr_buffer->ai4_bit_rate[0]
+                    - (i4_bits_per_frm[0] * (i4_tgt_frm_rate / 1000)));
+
+    for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+    {
+        /*
+         * Upper threshold for
+         * I frame = 1 * bits per frame
+         * P Frame = 4 * bits per frame.
+         * The threshold for I frame is only 1 * bits per frame as the threshold should
+         * only account for error in estimated bits.
+         * In P frame it should account for difference bets bits consumed by I(Scene change)
+         * and P frame I to P complexity is assumed to be 5.
+         */
+        WORD32 i4_index;
+        i4_index = i4_i > 0 ? 1 : 0;
+        ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+                        - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+        /*
+         * For both I and P frame Lower threshold is equal to drain rate.
+         * Even if the encoder consumes zero bits it should have enough bits to
+         * drain
+         */
+        ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+    }
+
+    /* Storing the input parameters for using it for change functions */
+    ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate;
+}
+
+/*******************************************************************************
+ * @brief Change the state for change in bit rate
+ *
+ ******************************************************************************/
+void irc_change_cbr_vbv_bit_rate(cbr_buffer_t *ps_cbr_buffer,
+                                 WORD32 *i4_bit_rate)
+{
+    WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+    int i;
+
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        X_PROD_Y_DIV_Z(i4_bit_rate[i], 1000, ps_cbr_buffer->i4_tgt_frm_rate,
+                       i4_bits_per_frm[i]);
+        /* Drain rate = bitrate/(framerate/1000) */
+        ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+        /* Initialize the bits per frame error bits calculation */
+        irc_change_bitrate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i],
+                                         i4_bit_rate[i]);
+    }
+
+    /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+    if(i4_bit_rate[0] == i4_bit_rate[1]) /* This would mean CBR mode */
+    {
+        X_PROD_Y_DIV_Z(i4_bit_rate[0], ps_cbr_buffer->i4_max_delay, 1000,
+                       ps_cbr_buffer->i4_buffer_size);
+        ps_cbr_buffer->i4_is_cbr_mode = 1;
+    }
+    else
+    {
+        /* VBR streaming case which has different drain rates for I and P */
+        ps_cbr_buffer->i4_buffer_size =
+                        ps_cbr_buffer->ai4_num_pics_in_delay_period[0]
+                      * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+                      + ps_cbr_buffer->ai4_num_pics_in_delay_period[1]
+                      * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+        ps_cbr_buffer->i4_is_cbr_mode = 0;
+    }
+
+    if(ps_cbr_buffer->i4_buffer_size
+                    > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+    {
+        ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+    }
+
+    /*
+     * tgt_frame_rate is divided by 1000 because
+     * an approximate value is fine as this is just a threshold below which
+     * stuffing is done to avoid buffer underflow due to fixed point
+     * error in drain rate
+     */
+    ps_cbr_buffer->i4_stuffing_threshold = (i4_bit_rate[0]
+                    - (i4_bits_per_frm[0]
+                                    * (ps_cbr_buffer->i4_tgt_frm_rate / 1000)));
+
+    for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+    {
+        /*
+         * Upper threshold for
+         * I frame = 1 * bits per frame
+         * P Frame = 4 * bits per frame.
+         * The threshold for I frame is only 1 * bits per frame as the threshold
+         * should only account for error in estimated bits.
+         * In P frame it should account for difference bets bits consumed by
+         * I(Scene change) and P frame I to P complexity is assumed to be 5.
+         */
+
+        WORD32 i4_index;
+        i4_index = i4_i > 0 ? 1 : 0;
+        ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+                        - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+        /* For both I and P frame Lower threshold is equal to drain rate.
+         * Even if the encoder consumes zero bits it should have enough bits to
+         * drain
+         */
+        ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+    }
+
+    /* Storing the input parameters for using it for change functions */
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        ps_cbr_buffer->ai4_bit_rate[i] = i4_bit_rate[i];
+    }
+}
+
+void irc_change_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer,
+                                 WORD32 i4_buffer_delay)
+{
+    WORD32 i4_i;
+
+    /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+    if(ps_cbr_buffer->i4_is_cbr_mode)
+    {
+        X_PROD_Y_DIV_Z(ps_cbr_buffer->ai4_bit_rate[0], i4_buffer_delay, 1000,
+                       ps_cbr_buffer->i4_buffer_size);
+    }
+
+    if(ps_cbr_buffer->i4_buffer_size
+                    > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+    {
+        ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+    }
+
+    for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+    {
+        /*
+         * Upper threshold for
+         * I frame = 1 * bits per frame
+         * P Frame = 4 * bits per frame.
+         * The threshold for I frame is only 1 * bits per frame as the threshold
+         * should only account for error in estimated bits.
+         * In P frame it should account for difference bets bits consumed by I
+         * (Scene change) and P frame I to P complexity is assumed to be 5.
+         */
+        ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+                        - (ps_cbr_buffer->i4_buffer_size >> 3);
+    }
+
+    /* Storing the input parameters for using it for change functions */
+    ps_cbr_buffer->i4_max_delay = i4_buffer_delay;
+}
+
+WORD32 irc_get_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer)
+{
+    return (ps_cbr_buffer->i4_max_delay);
+}
+
+WORD32 irc_get_cbr_buffer_size(cbr_buffer_t *ps_cbr_buffer)
+{
+    return (ps_cbr_buffer->i4_buffer_size);
+}
diff --git a/encoder/irc_cbr_buffer_control.h b/encoder/irc_cbr_buffer_control.h
new file mode 100755
index 0000000..2534961
--- /dev/null
+++ b/encoder/irc_cbr_buffer_control.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : irc_cbr_buffer_control.h                             */
+/*                                                                           */
+/*  Description       : This file contains all the necessary declarations    */
+/*                      for cbr_buffer_control functions                     */
+/*                                                                           */
+/*                                                                           */
+/*  List of Functions : <List the functions defined in this file>            */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         06 05 2008   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef CBR_BUFFER_CONTROL_H
+#define CBR_BUFFER_CONTROL_H
+
+/* Macro for clipping a number between to extremes */
+#define CLIP(Number,Max,Min)    if((Number) > (Max)) (Number) = (Max); \
+                                else if((Number) < (Min)) (Number) = (Min);
+/*****************************************************************************/
+/* Structure                                                                 */
+/*****************************************************************************/
+typedef struct cbr_buffer_t *cbr_buffer_handle;
+
+WORD32 irc_cbr_buffer_num_fill_use_free_memtab(cbr_buffer_handle *pps_cbr_buffer,
+                                               itt_memtab_t *ps_memtab,
+                                               ITT_FUNC_TYPE_E e_func_type);
+
+/* Initialize the cbr Buffer*/
+void irc_init_cbr_buffer(cbr_buffer_handle ps_cbr_buffer,
+                         WORD32 i4_buffer_delay,
+                         WORD32 i4_tgt_frm_rate,
+                         WORD32 *i4_bit_rate,
+                         UWORD32 *u4_num_pics_in_delay_prd,
+                         UWORD32 u4_vbv_buf_size);
+
+/* Check for tgt bits with in CBR buffer*/
+WORD32 irc_cbr_buffer_constraint_check(cbr_buffer_handle ps_cbr_buffer,
+                                       WORD32 i4_tgt_bits,
+                                       picture_type_e e_pic_type);
+
+/* Get the buffer status with the current consumed bits*/
+vbv_buf_status_e irc_get_cbr_buffer_status(cbr_buffer_handle ps_cbr_buffer,
+                                           WORD32 i4_tot_consumed_bits,
+                                           WORD32 *pi4_num_bits_to_prevent_overflow,
+                                           picture_type_e e_pic_type);
+
+/* Update the CBR buffer at the end of the VOP*/
+void irc_update_cbr_buffer(cbr_buffer_handle ps_cbr_buffer,
+                           WORD32 i4_tot_consumed_bits,
+                           picture_type_e e_pic_type);
+
+/*Get the bits needed to stuff in case of Underflow*/
+WORD32 irc_get_cbr_bits_to_stuff(cbr_buffer_handle ps_cbr_buffer,
+                                 WORD32 i4_tot_consumed_bits,
+                                 picture_type_e e_pic_type);
+
+WORD32 irc_get_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer);
+
+WORD32 irc_get_cbr_buffer_size(cbr_buffer_handle ps_cbr_buffer);
+
+WORD32 irc_vbr_stream_buffer_constraint_check(cbr_buffer_handle ps_cbr_buffer,
+                                              WORD32 i4_tgt_bits,
+                                              picture_type_e e_pic_type);
+
+void irc_change_cbr_vbv_bit_rate(cbr_buffer_handle ps_cbr_buffer,
+                                 WORD32 *i4_bit_rate);
+
+void irc_change_cbr_vbv_tgt_frame_rate(cbr_buffer_handle ps_cbr_buffer,
+                                       WORD32 i4_tgt_frm_rate);
+
+void irc_change_cbr_vbv_num_pics_in_delay_period(cbr_buffer_handle ps_cbr_buffer,
+                                                 UWORD32 *u4_num_pics_in_delay_prd);
+
+void irc_change_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer,
+                                 WORD32 i4_buffer_delay);
+#endif /* CBR_BUFFER_CONTROL_H */
+
diff --git a/encoder/irc_cntrl_param.h b/encoder/irc_cntrl_param.h
new file mode 100755
index 0000000..82235f7
--- /dev/null
+++ b/encoder/irc_cntrl_param.h
@@ -0,0 +1,59 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RC_CNTRL_PARAM_H_
+#define _RC_CNTRL_PARAM_H_
+
+/* This file should contain only enumerations exported to codec by RC */
+
+/* RC algo type */
+typedef enum
+{
+    VBR_STORAGE = 0,
+    VBR_STORAGE_DVD_COMP = 1,
+    VBR_STREAMING = 2,
+    CONST_QP = 3,
+    CBR_LDRC = 4,
+    CBR_NLDRC = 5
+
+} rc_type_e;
+
+/* Picture type structure*/
+typedef enum
+{
+    BUF_PIC = -1, I_PIC = 0, P_PIC, B_PIC, MAX_PIC_TYPE
+
+} picture_type_e;
+
+/* MB Type structure*/
+typedef enum
+{
+    /* Based on MB TYPES added the array size increases */
+    MB_TYPE_INTRA, MB_TYPE_INTER, MAX_MB_TYPE
+} mb_type_e;
+
+typedef enum
+{
+    VBV_NORMAL, VBV_UNDERFLOW, VBV_OVERFLOW, VBR_CAUTION
+
+} vbv_buf_status_e;
+
+#endif
+
diff --git a/encoder/irc_common.h b/encoder/irc_common.h
new file mode 100755
index 0000000..c341de4
--- /dev/null
+++ b/encoder/irc_common.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RC_COMMON_H_
+#define _RC_COMMON_H_
+
+/****************************************************************************
+ NOTE : Put only those things into this file which are common across many
+ files, say I_TO_P_BIT_RATIO macro is used across irc_bit_allocation.c
+ and irc_rate_control_api.c.If anything is exclusive only to one file,
+ define it in the same file
+
+ This file is an RC private file. It should not be exported to Codec
+ ****************************************************************************/
+
+#define UNUSED(x) ((void)(x))
+
+typedef float number_t;
+
+#define mult32_var_q(a,b,c) *c = a * b
+
+#define div32_var_q(a,b,c) (*c = ((b == 0)? a : (a / b)))
+
+#define add32_var_q(a,b,c) *c = a + b
+
+#define sub32_var_q(a,b,c) *c = a - b
+
+#define sqrt32_var_q(a, c) *c = sqrt(a)
+
+#define number_t_to_word32(num_a, a) *a = (WORD32)num_a
+
+#define convert_float_to_fix(a_f, a) *a = (WORD32)a_f
+
+#define convert_fix_to_float(a, a_f) *a_f = (float) a
+
+#define SET_VAR_Q(a,b,c) {a = (float) b;}
+
+
+/* Defines the maximum and the minimum quantizer allowed in the stream.*/
+#define MAX_MPEG2_QP        255 /* 127*/
+
+/* Bits ratio between I and P frame */
+#define I_TO_P_BIT_RATIO 5
+
+/* Calculates P = (X*Y/Z) (Assuming all the four are in integers)*/
+#define X_PROD_Y_DIV_Z(X1,Y1,Z1,P1)\
+{\
+    number_t vq_a,vq_b,vq_c;\
+    SET_VAR_Q(vq_a,(X1),0);\
+    SET_VAR_Q(vq_b,(Y1),0);\
+    SET_VAR_Q(vq_c,(Z1),0);\
+    mult32_var_q(vq_a,vq_b,&vq_a);\
+    div32_var_q(vq_a,vq_c,&vq_a);\
+    number_t_to_word32(vq_a,&(P1));\
+}
+#define VQ_A_LT_VQ_B(A,B, Z) Z = A < B;
+#define VQ_A_GT_VQ_B(A,B, Z) Z = A > B;
+
+/* Z=MAX(A,B) where A, B  and Z are var_q variables */
+#define MAX_VARQ(A,B, Z)\
+{\
+    WORD32 a_gt_b;\
+    VQ_A_GT_VQ_B((A), (B), a_gt_b);\
+    (Z) = (a_gt_b) ? (A) : (B);\
+}
+
+/* Z=MIN(A,B) where A, B  and Z are var_q variables */
+#define MIN_VARQ(A,B, Z)\
+{\
+    WORD32 a_lt_b;\
+    VQ_A_LT_VQ_B((A), (B), a_lt_b);\
+    (Z) = (a_lt_b) ? (A) : (B);\
+}
+
+/* Maximum number of drain-rates supported. Currently a maximum of only 2
+ drain-rates supported. One for
+ I pictures and the other for P & B pictures */
+#define MAX_NUM_DRAIN_RATES 2
+
+/* The ratios between I to P and P to B Qp is specified here */
+#define K_Q 4
+#define I_TO_P_RATIO (19) /* In K_Q Q factor */
+#define P_TO_B_RATIO (21) /* In K_Q Q factor */
+#define P_TO_I_RATIO (13) /* In K_Q Q factor */
+
+#endif /* _RC_COMMON_H_ */
+
diff --git a/encoder/irc_datatypes.h b/encoder/irc_datatypes.h
new file mode 100755
index 0000000..8e4685a
--- /dev/null
+++ b/encoder/irc_datatypes.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264_typedefs.h
+*
+* @brief
+*  Type definitions used in the code
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _IH264_TYPEDEFS_H_
+#define _IH264_TYPEDEFS_H_
+
+
+/*****************************************************************************/
+/* Unsigned data types                                                       */
+/*****************************************************************************/
+typedef unsigned char   UWORD8;
+typedef unsigned short  UWORD16;
+typedef unsigned int    UWORD32;
+typedef unsigned long long   UWORD64;
+
+
+/*****************************************************************************/
+/* Signed data types                                                         */
+/*****************************************************************************/
+typedef signed char     WORD8;
+typedef short           WORD16;
+typedef int             WORD32;
+
+
+/*****************************************************************************/
+/* Miscellaneous data types                                                  */
+/*****************************************************************************/
+typedef char            CHAR;
+typedef double          DOUBLE;
+
+#endif /*   _IH264_TYPEDEFS_H_ */
diff --git a/encoder/irc_est_sad.c b/encoder/irc_est_sad.c
new file mode 100755
index 0000000..0d8abc2
--- /dev/null
+++ b/encoder/irc_est_sad.c
@@ -0,0 +1,260 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_est_sad.h"
+#include "irc_common.h"
+
+typedef struct est_sad_t
+{
+    WORD32 i4_use_est_intra_sad;
+
+    /* Previous frame SAD */
+    UWORD32 au4_prev_frm_sad[MAX_PIC_TYPE];
+
+    /* Current (nth) ifi average P frame SAD */
+    UWORD32 u4_n_p_frm_ifi_avg_sad;
+
+    /* (n-1)th ifi average P frame SAD */
+    UWORD32 u4_n_1_p_frm_ifi_avg_sad;
+
+    /* (n-2)th ifi average P frame SAD */
+    UWORD32 u4_n_2_p_frm_ifi_avg_sad;
+
+    /* number of ifi encoded till now */
+    WORD32 i4_num_ifi_encoded;
+
+    /* number of P frames in the current IFI */
+    WORD32 i4_num_p_frm_in_cur_ifi;
+
+} est_sad_t;
+
+WORD32 irc_est_sad_num_fill_use_free_memtab(est_sad_t **pps_est_sad,
+                                            itt_memtab_t *ps_memtab,
+                                            ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static est_sad_t s_est_sad;
+
+    /* Hack for al alloc, during which we don't have any state memory.
+     * Dereferencing can cause issues
+     */
+    if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_est_sad) = &s_est_sad;
+
+    /* For src rate control state structure */
+    if(e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(est_sad_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**)pps_est_sad, e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    return (i4_mem_tab_idx);
+}
+
+void irc_init_est_sad(est_sad_t *ps_est_sad, WORD32 i4_use_est_intra_sad)
+{
+    WORD32 i;
+    ps_est_sad->i4_use_est_intra_sad = i4_use_est_intra_sad;
+
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_est_sad->au4_prev_frm_sad[i] = 0;
+    }
+
+    ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+    ps_est_sad->u4_n_1_p_frm_ifi_avg_sad = 0;
+    ps_est_sad->u4_n_2_p_frm_ifi_avg_sad = 0;
+    ps_est_sad->i4_num_ifi_encoded = 0;
+    ps_est_sad->i4_num_p_frm_in_cur_ifi = 0;
+}
+
+void irc_reset_est_sad(est_sad_t *ps_est_sad)
+{
+    irc_init_est_sad(ps_est_sad, ps_est_sad->i4_use_est_intra_sad);
+}
+
+/*
+ * Get estimated SAD can be called at any point. The various use cases are:
+ * 1) When a I frame is getting encoded,
+ *    - get the estimated of P => No issues since we use the last coded P frame
+ *      value
+ *    - get estimated of I => This call for two cases:
+ *    => a) if num_ifi_encoded is less than 2
+ *          then return the previous encoded I frame sad
+ *    => b) if num_ifi_encoded is more than 2, then we scale
+ *          the prev I sad by the ratio of (n-1) ifi P to n-2 ifi P
+ * 2) When P frame is getting encoded,
+ *    - get the estimated of P =>  No issues since we use the last coded P frame value
+ *    - get the estimated of I => Simillar to I we have two cases.
+ *      To handle the b) case extra logic had to introduced using
+ *      u1_is_n_1_p_frm_ifi_avg_sad_usable flag
+ */
+UWORD32 irc_get_est_sad(est_sad_t *ps_est_sad, picture_type_e e_pic_type)
+{
+    if(ps_est_sad->i4_use_est_intra_sad)
+    {
+        UWORD32 u4_estimated_sad;
+        if(e_pic_type == P_PIC)
+        {
+            u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[P_PIC];
+        }
+        else if(e_pic_type == B_PIC)
+        {
+            u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[B_PIC];
+        }
+        else
+        {
+            if(ps_est_sad->i4_num_ifi_encoded < 2)
+            {
+                /*
+                 * Only one IFI has been encoded and so use the previous I
+                 * frames SAD
+                 */
+                u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC];
+            }
+            else
+            {
+                /*
+                 * Since the n-1 'P' frame IFI would have just accumulated the
+                 * frame sads we average it out here
+                 */
+                UWORD32 u4_n_1_p_frm_ifi_avg_sad, u4_n_2_p_frm_ifi_avg_sad;
+                number_t vq_n_1_p_frm_ifi_avg_sad, vq_n_2_p_frm_ifi_avg_sad;
+                number_t vq_prev_frm_sad_i;
+
+                /*
+                 * If there are frames in the current IFI start using it to
+                 * estimate the I frame SAD
+                 */
+                if(ps_est_sad->i4_num_p_frm_in_cur_ifi)
+                {
+                    u4_n_1_p_frm_ifi_avg_sad =
+                                    (ps_est_sad->u4_n_p_frm_ifi_avg_sad
+                                     / ps_est_sad->i4_num_p_frm_in_cur_ifi);
+                    u4_n_2_p_frm_ifi_avg_sad =
+                                    ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+                }
+                else
+                {
+                    u4_n_1_p_frm_ifi_avg_sad =
+                                    ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+                    u4_n_2_p_frm_ifi_avg_sad =
+                                    ps_est_sad->u4_n_2_p_frm_ifi_avg_sad;
+                }
+
+                /*
+                 * If any of the previous p frame SADs are zeros we just return
+                 * the previous I frame SAD
+                 */
+                if(u4_n_1_p_frm_ifi_avg_sad && u4_n_2_p_frm_ifi_avg_sad)
+                {
+                    SET_VAR_Q(vq_prev_frm_sad_i,
+                              ps_est_sad->au4_prev_frm_sad[I_PIC], 0);
+                    SET_VAR_Q(vq_n_1_p_frm_ifi_avg_sad,
+                              u4_n_1_p_frm_ifi_avg_sad, 0);
+                    SET_VAR_Q(vq_n_2_p_frm_ifi_avg_sad,
+                              u4_n_2_p_frm_ifi_avg_sad, 0);
+                    /*
+                     * Estimated SAD =
+                     *(n-1)th intra frame interval(ifi) P frame Avg SAD *
+                     *(prev I frame SAD /
+                     *(prev (n-2)nd intra frame interval(ifi) P frame Avg SAD)
+                     */
+                    mult32_var_q(vq_prev_frm_sad_i, vq_n_1_p_frm_ifi_avg_sad,
+                                 &vq_prev_frm_sad_i);
+                    div32_var_q(vq_prev_frm_sad_i, vq_n_2_p_frm_ifi_avg_sad,
+                                &vq_prev_frm_sad_i);
+                    number_t_to_word32(vq_prev_frm_sad_i,
+                                       (WORD32*)&u4_estimated_sad);
+                }
+                else
+                {
+                    u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC];
+                }
+            }
+        }
+        return u4_estimated_sad;
+    }
+    else
+    {
+        return ps_est_sad->au4_prev_frm_sad[e_pic_type];
+    }
+}
+
+void irc_update_actual_sad(est_sad_t *ps_est_sad,
+                           UWORD32 u4_actual_sad,
+                           picture_type_e e_pic_type)
+{
+    ps_est_sad->au4_prev_frm_sad[e_pic_type] = u4_actual_sad;
+
+    if(ps_est_sad->i4_use_est_intra_sad)
+    {
+        if(e_pic_type == I_PIC)
+        {
+            /* The requirement is to have two IFI before estimating I frame SAD */
+            if(ps_est_sad->i4_num_ifi_encoded < 2)
+                ps_est_sad->i4_num_ifi_encoded++;
+
+            /* Calculate the average SAD */
+            if(ps_est_sad->i4_num_p_frm_in_cur_ifi)
+            {
+                ps_est_sad->u4_n_p_frm_ifi_avg_sad /=
+                                ps_est_sad->i4_num_p_frm_in_cur_ifi;
+            }
+            else
+            {
+                ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+            }
+            /* Push the (n-1)th average SAD to the (n-2)th average SAD  */
+            ps_est_sad->u4_n_2_p_frm_ifi_avg_sad =
+                            ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+            /* Push the nth average SAD to the (n-1)th average SAD */
+            ps_est_sad->u4_n_1_p_frm_ifi_avg_sad =
+                            ps_est_sad->u4_n_p_frm_ifi_avg_sad;
+            /* Reset SAD and number of P frames */
+            ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+            ps_est_sad->i4_num_p_frm_in_cur_ifi = 0;
+        }
+        else
+        {
+            ps_est_sad->u4_n_p_frm_ifi_avg_sad += u4_actual_sad;
+            ps_est_sad->i4_num_p_frm_in_cur_ifi++;
+        }
+    }
+}
+
+void irc_update_actual_sad_for_intra(est_sad_t *ps_est_sad,
+                                     WORD32 i4_intra_frm_cost)
+{
+    if(!(ps_est_sad->i4_use_est_intra_sad))
+    {
+        irc_update_actual_sad(ps_est_sad, i4_intra_frm_cost, I_PIC);
+    }
+}
diff --git a/encoder/irc_est_sad.h b/encoder/irc_est_sad.h
new file mode 100755
index 0000000..c8238c9
--- /dev/null
+++ b/encoder/irc_est_sad.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _EST_SAD_H_
+#define _EST_SAD_H_
+
+/*
+ * "est_sad_t->i4_use_est_intra_sad" Flag to control how the I frame SAD is estimated.
+ * If set to zero
+ * - it uses the Intra sad calculated by the previous P frame as
+ * the estimated sad for the current I frame
+ * else
+ * - it uses the ratio of P frame sads of the previous two GOPS and
+ * scales the I Frame sad with this ratio to estimate the current
+ * I frame SAD
+ */
+
+/* Estimating the Average SAD for the current picture type is done by:
+ * 1) if picture_type is I
+ * - Estimated SAD = (n-1)th intra frame interval(ifi) P frame Avg SAD *
+ * ( prev I frame SAD / (n-2)nd intra frame interval(ifi) P frame Avg SAD)
+ * - if only one IFI is encoded use the previous I frame SAD
+ * 2) if picture type is P
+ * - Estimate SAD is previous P frame SAD
+ * 3) The first P frame in a IFI could use a little better logic to decide the
+ * estimated SAD but currently we assume the last coded P frames SAD
+ a*/
+
+typedef struct est_sad_t *est_sad_handle;
+
+WORD32 irc_est_sad_num_fill_use_free_memtab(est_sad_handle *est_sad,
+                                            itt_memtab_t *ps_memtab,
+                                            ITT_FUNC_TYPE_E e_func_type);
+
+void irc_init_est_sad(est_sad_handle est_sad, WORD32 i4_use_est_frame_sad);
+
+UWORD32 irc_get_est_sad(est_sad_handle est_sad, picture_type_e e_pic_type);
+
+void irc_update_actual_sad(est_sad_handle est_sad,
+                           UWORD32 u4_actual_sad,
+                           picture_type_e e_pic_type);
+
+void irc_update_actual_sad_for_intra(est_sad_handle est_sad,
+                                     WORD32 i4_intra_frm_cost);
+
+void irc_reset_est_sad(est_sad_handle ps_est_sad);
+#endif
diff --git a/encoder/irc_fixed_point_error_bits.c b/encoder/irc_fixed_point_error_bits.c
new file mode 100755
index 0000000..42dcfc5
--- /dev/null
+++ b/encoder/irc_fixed_point_error_bits.c
@@ -0,0 +1,185 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_fixed_point_error_bits.h"
+
+typedef struct error_bits_t
+{
+    /* Max tgt frm rate so that dynamic change in frm rate can be handled */
+    WORD32 i4_max_tgt_frm_rate;
+
+    /* Cur frm rate */
+    WORD32 i4_cur_tgt_frm_rate;
+
+    /* tgt frame rate*/
+    WORD32 i4_tgt_frm_rate;
+
+    /* tgt frm rate increment */
+    WORD32 i4_tgt_frm_rate_incr;
+
+    /* flag to indicate 1 second is up */
+    UWORD8 u1_compute_error_bits;
+
+    /* Bitrate/frame rate value added over a period */
+    WORD32 i4_accum_bitrate;
+
+    /* bitrate */
+    WORD32 i4_bitrate;
+
+} error_bits_t;
+
+WORD32 irc_error_bits_num_fill_use_free_memtab(error_bits_t **pps_error_bits,
+                                               itt_memtab_t *ps_memtab,
+                                               ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static error_bits_t s_error_bits_temp;
+
+    /*
+     * Hack for all alloc, during which we don't have any state memory.
+     * Dereferencing can cause issues
+     */
+    if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_error_bits) = &s_error_bits_temp;
+
+    /* For src rate control state structure */
+    if(e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(error_bits_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**)pps_error_bits, e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    return (i4_mem_tab_idx);
+}
+
+/*******************************************************************************
+ * @brief Calculates the error bits due to fixed point divisions
+ ******************************************************************************/
+void irc_init_error_bits(error_bits_t *ps_error_bits,
+                         WORD32 i4_max_tgt_frm_rate,
+                         WORD32 i4_bitrate)
+{
+    /* Initializing the parameters*/
+    ps_error_bits->i4_cur_tgt_frm_rate = 0;
+    ps_error_bits->i4_max_tgt_frm_rate = i4_max_tgt_frm_rate;
+
+    /* Value by which i4_cur_tgt_frm_rate is incremented every VOP*/
+    ps_error_bits->i4_tgt_frm_rate_incr = 1000;
+
+    /*Compute error bits is set to 1 at the end of 1 second*/
+    ps_error_bits->u1_compute_error_bits = 0;
+    ps_error_bits->i4_tgt_frm_rate = i4_max_tgt_frm_rate;
+    ps_error_bits->i4_accum_bitrate = 0;
+    ps_error_bits->i4_bitrate = i4_bitrate;
+}
+
+/*******************************************************************************
+ * @brief Updates the error state
+ ******************************************************************************/
+void irc_update_error_bits(error_bits_t *ps_error_bits)
+{
+    WORD32 i4_bits_per_frame;
+
+    X_PROD_Y_DIV_Z(ps_error_bits->i4_bitrate, 1000,
+                   ps_error_bits->i4_tgt_frm_rate, i4_bits_per_frame);
+
+    /*
+     * This value is incremented every at the end of every VOP by
+     * i4_tgt_frm_rate_incr
+     */
+    ps_error_bits->i4_cur_tgt_frm_rate += ps_error_bits->i4_tgt_frm_rate_incr;
+    if(ps_error_bits->u1_compute_error_bits == 1)
+    {
+        ps_error_bits->i4_accum_bitrate = 0;
+    }
+    ps_error_bits->i4_accum_bitrate += i4_bits_per_frame;
+
+    /*
+     * When current tgt frm rate is equal or greater than max tgt frame rate
+     * 1 second is up , compute the error bits
+     */
+    if(ps_error_bits->i4_cur_tgt_frm_rate >= ps_error_bits->i4_max_tgt_frm_rate)
+    {
+        ps_error_bits->i4_cur_tgt_frm_rate -=
+                        ps_error_bits->i4_max_tgt_frm_rate;
+        ps_error_bits->u1_compute_error_bits = 1;
+    }
+    else
+    {
+        ps_error_bits->u1_compute_error_bits = 0;
+    }
+}
+
+/*******************************************************************************
+ * @brief Returns the error bits for the current frame if there are any
+ *
+ ******************************************************************************/
+WORD32 irc_get_error_bits(error_bits_t *ps_error_bits)
+{
+    WORD32 i4_error_bits = 0;
+
+    /*If 1s is up calculate error for the last 1s worth of frames*/
+    if(ps_error_bits->u1_compute_error_bits == 1)
+    {
+        /*Error = Actual bitrate - bits_per_frame * num of frames*/
+        i4_error_bits = ps_error_bits->i4_bitrate
+                        - ps_error_bits->i4_accum_bitrate;
+    }
+
+    return (i4_error_bits);
+}
+
+/* *****************************************************************************
+ *
+ * @brief Change the frame rate parameter for the error bits state
+ *
+ ******************************************************************************/
+void irc_change_frm_rate_in_error_bits(error_bits_t *ps_error_bits,
+                                       WORD32 i4_tgt_frm_rate)
+{
+    /* Value by which i4_cur_tgt_frm_rate is incremented every VOP*/
+    ps_error_bits->i4_tgt_frm_rate_incr = (ps_error_bits->i4_max_tgt_frm_rate
+                                           * 1000) / i4_tgt_frm_rate;
+    ps_error_bits->i4_tgt_frm_rate = i4_tgt_frm_rate;
+}
+
+/*******************************************************************************
+ * @brief Change the bitrate value for error bits module
+ ******************************************************************************/
+void irc_change_bitrate_in_error_bits(error_bits_t *ps_error_bits,
+                                      WORD32 i4_bitrate)
+{
+    ps_error_bits->i4_bitrate = i4_bitrate;
+}
+
diff --git a/encoder/irc_fixed_point_error_bits.h b/encoder/irc_fixed_point_error_bits.h
new file mode 100755
index 0000000..4ddf1eb
--- /dev/null
+++ b/encoder/irc_fixed_point_error_bits.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : irc_cbr_buffer_control.h                             */
+/*                                                                           */
+/*  Description       : This file contains all the necessary declarations    */
+/*                      for cbr_buffer_control functions                     */
+/*                                                                           */
+/*                                                                           */
+/*  List of Functions : <List the functions defined in this file>            */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         06 05 2008   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef FIXED_POINT_ERROR_BITS_H
+#define FIXED_POINT_ERROR_BITS_H
+
+typedef struct error_bits_t *error_bits_handle;
+
+WORD32 irc_error_bits_num_fill_use_free_memtab(error_bits_handle *pps_error_bits,
+                                               itt_memtab_t *ps_memtab,
+                                               ITT_FUNC_TYPE_E e_func_type);
+
+void irc_init_error_bits(error_bits_handle ps_error_bits,
+                         WORD32 i4_max_tgt_frm_rate,
+                         WORD32 i4_bitrate);
+
+void irc_update_error_bits(error_bits_handle ps_error_bits);
+
+WORD32 irc_get_error_bits(error_bits_handle ps_error_bits);
+
+void irc_change_frm_rate_in_error_bits(error_bits_handle ps_error_bits,
+                                       WORD32 i4_tgt_frm_rate);
+
+void irc_change_bitrate_in_error_bits(error_bits_handle ps_error_bits,
+                                      WORD32 i4_bitrate);
+
+#endif
+
diff --git a/encoder/irc_frame_info_collector.c b/encoder/irc_frame_info_collector.c
new file mode 100755
index 0000000..65f24c4
--- /dev/null
+++ b/encoder/irc_frame_info_collector.c
@@ -0,0 +1,177 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/******************************************************************************/
+/* File Includes                                                              */
+/******************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+
+void irc_init_frame_info(frame_info_t *frame_info)
+{
+    WORD32 i;
+
+    for(i = 0; i < MAX_MB_TYPE; i++)
+    {
+        frame_info->mb_header_bits[i] = 0;
+        frame_info->tot_mb_sad[i] = 0;
+        frame_info->num_mbs[i] = 0;
+        frame_info->qp_sum[i] = 0;
+        frame_info->mb_texture_bits[i] = 0;
+    }
+
+    frame_info->other_header_bits = 0;
+    frame_info->activity_sum = 0;
+    frame_info->intra_mb_cost_sum = 0;
+}
+
+/******************************************************************************
+ * GET Functions: Sending back collected information to the rate control module
+ ******************************************************************************/
+WORD32 irc_fi_get_total_header_bits(frame_info_t *frame_info)
+{
+    WORD32 total_header_bits = 0, i;
+
+    for(i = 0; i < MAX_MB_TYPE; i++)
+    {
+        total_header_bits += frame_info->mb_header_bits[i];
+    }
+    total_header_bits += frame_info->other_header_bits;
+
+    return (total_header_bits);
+}
+
+WORD32 irc_fi_get_total_texture_bits(frame_info_t *frame_info)
+{
+    WORD32 total_texture_bits = 0, i;
+
+    for(i = 0; i < MAX_MB_TYPE; i++)
+    {
+        total_texture_bits += frame_info->mb_texture_bits[i];
+    }
+
+    return (total_texture_bits);
+}
+
+WORD32 irc_fi_get_total_frame_sad(frame_info_t *frame_info)
+{
+    WORD32 total_sad = 0, i;
+
+    for(i = 0; i < MAX_MB_TYPE; i++)
+    {
+        total_sad += frame_info->tot_mb_sad[i];
+    }
+
+    return (total_sad);
+}
+
+WORD32 irc_fi_get_average_qp(frame_info_t *frame_info)
+{
+    WORD32 i, total_qp = 0, total_mbs = 0;
+
+    for(i = 0; i < MAX_MB_TYPE; i++)
+    {
+        total_qp += frame_info->qp_sum[i];
+        total_mbs += frame_info->num_mbs[i];
+    }
+
+    if(total_mbs)
+    {
+        return (total_qp / total_mbs);
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+WORD32 irc_fi_get_avg_mb_header(frame_info_t *frame_info, UWORD8 mb_type)
+{
+    if(frame_info->num_mbs[mb_type])
+    {
+        return (frame_info->mb_header_bits[mb_type]
+                        / frame_info->num_mbs[mb_type]);
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+WORD32 irc_fi_get_total_mb_texture_bits(frame_info_t *frame_info,
+                                        UWORD8 mb_type)
+{
+    return (frame_info->mb_texture_bits[mb_type]);
+}
+
+WORD32 irc_fi_get_total_mb_sad(frame_info_t *frame_info, UWORD8 mb_type)
+{
+    return (frame_info->tot_mb_sad[mb_type]);
+}
+
+WORD32 irc_fi_get_total_mb_qp(frame_info_t *frame_info, UWORD8 mb_type)
+{
+    if(frame_info->num_mbs[mb_type])
+    {
+        return (frame_info->qp_sum[mb_type]);
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+WORD32 irc_fi_get_total_mb(frame_info_t *frame_info, UWORD8 mb_type)
+{
+    return (frame_info->num_mbs[mb_type]);
+}
+
+WORD32 irc_fi_get_num_intra_mb(frame_info_t *frame_info)
+{
+    return (frame_info->num_mbs[MB_TYPE_INTRA]);
+}
+
+WORD32 irc_fi_get_avg_activity(frame_info_t *frame_info)
+{
+    WORD32 i;
+    WORD32 i4_tot_mbs = 0;
+
+    for(i = 0; i < MAX_MB_TYPE; i++)
+    {
+        i4_tot_mbs += frame_info->num_mbs[i];
+    }
+
+    if(i4_tot_mbs)
+    {
+        return (frame_info->activity_sum / i4_tot_mbs);
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+WORD32 irc_fi_get_total_intra_mb_cost(frame_info_t *frame_info)
+{
+    return (frame_info->intra_mb_cost_sum);
+}
diff --git a/encoder/irc_frame_info_collector.h b/encoder/irc_frame_info_collector.h
new file mode 100755
index 0000000..58dc467
--- /dev/null
+++ b/encoder/irc_frame_info_collector.h
@@ -0,0 +1,109 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _FRAME_INFO_COLLECTOR_H_
+#define _FRAME_INFO_COLLECTOR_H_
+
+typedef struct
+{
+    /* Number of MBs in each type */
+    WORD32 num_mbs[MAX_MB_TYPE];
+
+    /* Sum of all MB SADs of each MB type  */
+    WORD32 tot_mb_sad[MAX_MB_TYPE];
+
+    /* Sum of QPs for each mb type */
+    WORD32 qp_sum[MAX_MB_TYPE];
+
+    /* Header bits consumed other than MB headers */
+    WORD32 other_header_bits;
+
+    /* Header bits consumed for each type of MBs */
+    WORD32 mb_header_bits[MAX_MB_TYPE];
+
+    /* Texture bits consumed for each type of MBs */
+    WORD32 mb_texture_bits[MAX_MB_TYPE];
+
+    /* Sum of all MB activity */
+    WORD32 activity_sum;
+
+    /* Sum of all the Intra MB cost values for the entire frame */
+    WORD32 intra_mb_cost_sum;
+
+} frame_info_t;
+
+void irc_init_frame_info(frame_info_t *frame_info);
+
+/*
+ * Update functions: Collecting information from encoder
+ */
+#define FI_UPDATE_OTHER_HEADER_BITS(frame_info,header_bits)\
+    {(frame_info)->other_header_bits += (header_bits);}
+
+#define FI_UPDATE_MB_HEADER(frame_info,header_bits,mb_type)\
+    {(frame_info)->mb_header_bits[(mb_type)] += (header_bits);}
+
+#define FI_UPDATE_MB_TEXTURE(frame_info,texture_bits,mb_type)\
+    {(frame_info)->mb_texture_bits[(mb_type)] += (texture_bits);}
+
+#define FI_UPDATE_MB_SAD(frame_info,mb_sad,mb_type)\
+    {(frame_info)->tot_mb_sad[(mb_type)] += (mb_sad);}
+
+#define FI_UPDATE_MB_QP(frame_info,qp,mb_type)\
+    {(frame_info)->qp_sum[(mb_type)] += (qp);(frame_info)->num_mbs[(mb_type)]++;}
+
+#define FI_UPDATE_ACTIVITY(frame_info,mb_activity)\
+    {(frame_info)->activity_sum += (mb_activity);}
+
+#define FI_UPDATE_INTRA_MB_COST(frame_info,intra_mb_cost)\
+    {(frame_info)->intra_mb_cost_sum += (intra_mb_cost);}
+
+/*
+ * GET Functions: Sending back collected information to the rate control module
+ */
+
+/* Frame Level Model Information */
+WORD32 irc_fi_get_total_header_bits(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_total_texture_bits(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_average_qp(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_total_frame_sad(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_avg_activity(frame_info_t *frame_info);
+
+/* Number of Intra MBs for Scene Change Detection */
+WORD32 irc_fi_get_num_intra_mb(frame_info_t *frame_info);
+
+/* MB Level Model Information */
+WORD32 irc_fi_get_avg_mb_header(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb_texture_bits(frame_info_t *frame_info,
+                                        UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb_sad(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb_qp(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_intra_mb_cost(frame_info_t *frame_info);
+#endif
diff --git a/encoder/irc_mb_model_based.c b/encoder/irc_mb_model_based.c
new file mode 100755
index 0000000..880ee19
--- /dev/null
+++ b/encoder/irc_mb_model_based.c
@@ -0,0 +1,157 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_mb_model_based.h"
+
+typedef struct mb_rate_control_t
+{
+    /* Frame Qp */
+    UWORD8 u1_frm_qp;
+
+    /*
+     * Estimated average activity for the current frame (updated with the
+     * previous frame activity since it is independent of picture type whether
+     * it is I or P)
+     */
+    WORD32 i4_avg_activity;
+
+} mb_rate_control_t;
+
+WORD32 irc_mbrc_num_fill_use_free_memtab(mb_rate_control_t **pps_mb_rate_control,
+                                         itt_memtab_t *ps_memtab,
+                                         ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static mb_rate_control_t s_mb_rate_control_temp;
+
+    /*
+     * Hack for al alloc, during which we don't have any state memory.
+     * Dereferencing can cause issues
+     */
+    if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+    {
+        (*pps_mb_rate_control) = &s_mb_rate_control_temp;
+    }
+
+    /*For src rate control state structure*/
+    if(e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(mb_rate_control_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**)pps_mb_rate_control,
+                         e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    return (i4_mem_tab_idx);
+}
+
+/*******************************************************************************
+ MB LEVEL API FUNCTIONS
+ ******************************************************************************/
+
+/******************************************************************************
+ Description     : Initialize the mb model and the average activity to default
+                   values
+ ******************************************************************************/
+void irc_init_mb_level_rc(mb_rate_control_t *ps_mb_rate_control)
+{
+    /* Set values to default */
+    ps_mb_rate_control->i4_avg_activity = 0;
+}
+
+/******************************************************************************
+ Description     : Initialize the mb state with frame level decisions
+ *********************************************************************************/
+void irc_mb_init_frame_level(mb_rate_control_t *ps_mb_rate_control,
+                             UWORD8 u1_frame_qp)
+{
+    /* Update frame level QP */
+    ps_mb_rate_control->u1_frm_qp = u1_frame_qp;
+}
+
+/******************************************************************************
+ Description     : Reset the mb activity - Whenever there is SCD
+                   the mb activity is reset
+ *********************************************************************************/
+void irc_reset_mb_activity(mb_rate_control_t *ps_mb_rate_control)
+{
+    ps_mb_rate_control->i4_avg_activity = 0;
+}
+
+/******************************************************************************
+ Description     : Calculates the mb level qp
+ *********************************************************************************/
+void irc_get_mb_qp(mb_rate_control_t *ps_mb_rate_control,
+                   WORD32 i4_cur_mb_activity,
+                   WORD32 *pi4_mb_qp)
+{
+    WORD32 i4_qp;
+    /* Initialize the mb level qp with the frame level qp */
+    i4_qp = ps_mb_rate_control->u1_frm_qp;
+
+    /*
+     * Store the model based QP - This is used for updating the rate control model
+     */
+    pi4_mb_qp[0] = i4_qp;
+
+    /* Modulate the Qp based on the activity */
+    if((ps_mb_rate_control->i4_avg_activity) && (i4_qp < 100))
+    {
+        i4_qp =((((2 * i4_cur_mb_activity))
+               + ps_mb_rate_control->i4_avg_activity)* i4_qp
+               + ((i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity)
+               >> 1))/ (i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity);
+
+        if(i4_qp > ((3 * ps_mb_rate_control->u1_frm_qp) >> 1))
+        {
+            i4_qp = ((3 * ps_mb_rate_control->u1_frm_qp) >> 1);
+        }
+    }
+
+    /* Store the qp modulated by mb activity - This is used for encoding the MB */
+    pi4_mb_qp[1] = i4_qp;
+}
+
+/*******************************************************************************
+ Description     : Returns the stored frame level QP
+ ******************************************************************************/
+UWORD8 irc_get_frm_level_qp(mb_rate_control_t *ps_mb_rate_control)
+{
+    return (ps_mb_rate_control->u1_frm_qp);
+}
+
+/*******************************************************************************
+ Description     : Update the frame level info collected
+ ******************************************************************************/
+void irc_mb_update_frame_level(mb_rate_control_t *ps_mb_rate_control,
+                               WORD32 i4_avg_activity)
+{
+     /* Update the Average Activity */
+     ps_mb_rate_control->i4_avg_activity = i4_avg_activity;
+}
diff --git a/encoder/irc_mb_model_based.h b/encoder/irc_mb_model_based.h
new file mode 100755
index 0000000..aad520a
--- /dev/null
+++ b/encoder/irc_mb_model_based.h
@@ -0,0 +1,57 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _MB_MODEL_BASED_H_
+#define _MB_MODEL_BASED_H_
+
+typedef struct mb_rate_control_t *mb_rate_control_handle;
+
+WORD32 irc_mbrc_num_fill_use_free_memtab(mb_rate_control_handle *pps_mb_rate_control,
+                                         itt_memtab_t *ps_memtab,
+                                         ITT_FUNC_TYPE_E e_func_type);
+
+/* Initializing the state structure */
+void irc_init_mb_level_rc(mb_rate_control_handle ps_mb_rate_control);
+
+/* MB parameters that are to be initialized at a frame level */
+void irc_mb_init_frame_level(mb_rate_control_handle ps_mb_rate_control,
+                             UWORD8 u1_frame_qp);
+
+/* MB Level call to get the mb_level QP */
+void irc_get_mb_qp(mb_rate_control_handle ps_mb_rate_control,
+                   WORD32 i4_cur_mb_activity,
+                   WORD32 *pi4_mb_qp);
+
+/* MB Parameters that are to be updated at a frame level */
+void irc_mb_update_frame_level(mb_rate_control_handle ps_mb_rate_control,
+                               WORD32 i4_avg_activity);
+
+/****************************************************************************
+ CONTROL FUCNTIONS FROM FRAME LEVEL
+ ****************************************************************************/
+
+/* Returns the stored frame level QP */
+UWORD8 irc_get_frm_level_qp(mb_rate_control_handle ps_mb_rate_control);
+
+/* Disables activity based qp modulation */
+void irc_reset_mb_activity(mb_rate_control_handle ps_mb_rate_control);
+
+#endif
+
diff --git a/encoder/irc_mem_req_and_acq.h b/encoder/irc_mem_req_and_acq.h
new file mode 100755
index 0000000..a2946a7
--- /dev/null
+++ b/encoder/irc_mem_req_and_acq.h
@@ -0,0 +1,179 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+*  ih264e_rc_mem_interface.h
+*
+* @brief
+*  This file contains function declaration and structures for rate control
+*  memtabs
+*
+* @author
+*  ittiam
+*
+* @remarks
+*  The rate control library is a global library across various codecs. It
+*  anticipates certain structures definitions. Those definitions are to be
+*  imported from global workspace. Instead of that, the structures needed for
+*  rc library are copied in to this file and exported to rc library. If the
+*  structures / enums / ... in the global workspace change, this file also needs
+*  to be modified accordingly.
+*
+******************************************************************************
+*/
+#ifndef IH264E_RC_MEM_INTERFACE_H_
+#define IH264E_RC_MEM_INTERFACE_H_
+
+
+/*****************************************************************************/
+/* Function Macros                                                           */
+/*****************************************************************************/
+
+#define FILL_MEMTAB(m_pv_mem_rec, m_j, m_mem_size, m_align, m_type)      \
+{                                                                        \
+    m_pv_mem_rec[m_j].u4_size = sizeof(iv_mem_rec_t);                    \
+    m_pv_mem_rec[m_j].u4_mem_size = m_mem_size;                          \
+    m_pv_mem_rec[m_j].u4_mem_alignment = m_align;                        \
+    m_pv_mem_rec[m_j].e_mem_type = m_type;                               \
+}
+
+/*****************************************************************************/
+/* Enums                                                                     */
+/*****************************************************************************/
+typedef enum
+{
+    ALIGN_BYTE = 1,
+    ALIGN_WORD16 = 2,
+    ALIGN_WORD32 = 4,
+    ALIGN_WORD64 = 8,
+    ALIGN_128_BYTE = 128
+}ITT_MEM_ALIGNMENT_TYPE_E;
+
+typedef enum
+{
+    SCRATCH = 0,
+    PERSISTENT = 1,
+    WRITEONCE  = 2
+}ITT_MEM_USAGE_TYPE_E;
+
+typedef enum
+{
+    L1D = 0,
+    SL2 = 1,
+    DDR = 3
+}ITT_MEM_REGION_E;
+
+typedef enum
+{
+    GET_NUM_MEMTAB = 0,
+    FILL_MEMTAB = 1,
+    USE_BASE = 2,
+    FILL_BASE =3
+}ITT_FUNC_TYPE_E;
+
+
+/*****************************************************************************/
+/* Structures                                                                */
+/*****************************************************************************/
+
+/*NOTE : This should be an exact replica of IALG_MemRec, any change in IALG_MemRec
+         must be replicated here*/
+typedef struct
+{
+    /* Size in bytes */
+    UWORD32 u4_size;
+
+    /* Alignment in bytes */
+    WORD32 i4_alignment;
+
+    /* decides which memory region to be placed */
+    ITT_MEM_REGION_E e_mem_region;
+
+    /* memory is scratch or persistent */
+    ITT_MEM_USAGE_TYPE_E e_usage;
+
+    /* Base pointer for allocated memory */
+    void *pv_base;
+} itt_memtab_t;
+
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par   Description
+*  This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+*  pointer to mem records
+*
+* @param[in] u4_size
+*  size of the record
+*
+* @param[in] i4_alignment
+*  memory alignment size
+*
+* @param[in] e_usage
+*  usage
+*
+* @param[in] e_mem_region
+*  mem region
+*
+* @return void
+*
+******************************************************************************
+*/
+void fill_memtab(itt_memtab_t *ps_mem_tab, WORD32 u4_size, WORD32 i4_alignment,
+                 ITT_MEM_USAGE_TYPE_E e_usage, ITT_MEM_REGION_E e_mem_region);
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par   Description
+*  This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+*  pointer to mem records
+*
+* @param[in] ptr_to_be_filled
+*  handle to the memory record storage space
+*
+* @param[in] e_func_type
+*  enum that dictates fill memory records or use memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab, void **ptr_to_be_filled,
+                        ITT_FUNC_TYPE_E e_func_type);
+
+
+#endif // IH264E_RC_MEM_INTERFACE_H_
+
diff --git a/encoder/irc_picture_type.c b/encoder/irc_picture_type.c
new file mode 100755
index 0000000..186188c
--- /dev/null
+++ b/encoder/irc_picture_type.c
@@ -0,0 +1,1585 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include "stdio.h"
+#include "string.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_picture_type.h"
+#include "irc_trace_support.h"
+
+#define MAX_INTER_FRM_INT   10
+
+/******************************Pic_details ************************************/
+typedef struct
+{
+    /* The id sent by the codec */
+    WORD32 i4_pic_id;
+
+    /* The pics come in, in this order  */
+    WORD32 i4_pic_disp_order_no;
+
+    /* I,P,B */
+    picture_type_e e_pic_type;
+
+} pic_details_t;
+
+/**************************Pic_handling structure *****************************/
+typedef struct pic_handling_t
+{
+    /***************************************************************************
+     * Inputs from the codec
+     **************************************************************************/
+
+    /* Number of frames after which an I frame will repeat in display order */
+    WORD32 i4_intra_frm_int;
+
+    /* (num_b_pics_in_subgop + 1) */
+    WORD32 i4_inter_frm_int;
+
+    /* After these many buffered frames, the pics are encoded */
+    WORD32 i4_max_inter_frm_int;
+
+    /* OPEN or CLOSED */
+    WORD32 i4_is_gop_closed;
+
+    /* The pic stack */
+    /* Stack used to store the input pics in encode order */
+    pic_details_t as_pic_stack[MAX_INTER_FRM_INT + 2];
+
+    /***************************************************************************
+     * Counters
+     **************************************************************************/
+
+    /* Decides whether a B or ref pic */
+    WORD32 i4_buf_pic_no;
+
+    /* Current pic's number in displayed, and gets reset after an I-frm */
+    WORD32 i4_pic_disp_order_no;
+
+    /* Number of P frms that have come, in the current gop, so far */
+    WORD32 i4_p_count_in_gop;
+
+    /* Number of B frms that have come, in the current gop, so far */
+    WORD32 i4_b_count_in_gop;
+
+    /* Number of B frms that have come, in the current subgop, so far */
+    WORD32 i4_b_count_in_subgop;
+
+    /***************************************************************************
+     * Indices to the pic stack (Since we store the pics in the encode order,
+     * these vars are modified to meet that)
+     **************************************************************************/
+
+    /* B_PIC index */
+    WORD32 i4_b_pic_idx;
+
+    /* I,P PIC index */
+    WORD32 i4_ref_pic_idx;
+
+    /***************************************************************************
+     * Variables operating on the input pics
+     **************************************************************************/
+
+    /* Flag denoting whether it's the first gop or not */
+    WORD32 i4_is_first_gop;
+
+    /* Number of B_PICs in an incomplete subgop */
+    WORD32 i4_b_in_incomp_subgop;
+
+    /* In CLOSED_GOPs, even if inter_frm_int > 1, there can be 2 continous
+     * P_PICs at the GOP end. This takes values of 0 or 1 */
+    WORD32 i4_extra_p;
+
+    /***************************************************************************
+     * Arrays storing the number of frms in the gop
+     **************************************************************************/
+
+    /* In the steady state, what's the pic distribution in display order */
+    WORD32 i4_frms_in_gop[MAX_PIC_TYPE];
+
+    /*
+     * In case of a change in inter frm int call, the pic distribution in
+     * that gop in display order
+     */
+    WORD32 i4_frms_in_cur_gop[MAX_PIC_TYPE];
+
+    /*
+     * This is used to denote the number of frms remaining to be encoded in the
+     * current gop
+     */
+    WORD32 i4_rem_frms_in_gop[MAX_PIC_TYPE];
+
+    /***************************************************************************
+     * Variables operating on the output pics
+     **************************************************************************/
+
+    /* Counts the frms encoded in a gop */
+    WORD32 i4_coded_pic_no;
+
+    /* Counts from the start of stack to the end repeatedly */
+    WORD32 i4_stack_count;
+
+    /***************************************************************************
+     * Tracking a change in the inputs from the codec
+     **************************************************************************/
+
+    /* A flag that is set when the codec calls for a change in inter_frm_int */
+    WORD32 i4_change_in_inter_frm_int;
+
+    /*
+     * When a change_in_inter_frm_int is called, this stores the new
+     * inter_frm_int
+     */
+    WORD32 i4_new_inter_frm_int;
+
+    /*
+     * When a change_in_inter_frm_int is called in the middle of a gop,this
+     * stores the B_PICs in the incomplete subgop of the mixed gop
+     */
+    WORD32 i4_b_in_incomp_subgop_mix_gop;
+
+    /*
+     * For a CLOSED GOP, when a change_in_inter_frm_int is called in the middle
+     * of a gop,this is a flag denoting if there is an extra P_PIC in the mixed
+     * gop
+     */
+    WORD32 i4_extra_p_mix_gop;
+
+    /* A flag that is set when the codec calls for a change in intra_frm_int */
+    WORD32 i4_change_in_intra_frm_int;
+
+    /*
+     * When a change_in_intra_frm_int is called, this stores the new
+     * intra_frm_int
+     */
+    WORD32 i4_new_intra_frm_int;
+
+    /***************************************************************************
+     * Previous pic_stack_indices & details
+     **************************************************************************/
+    pic_details_t s_prev_pic_details;
+
+    WORD32 i4_prev_b_pic_idx;
+
+    WORD32 i4_last_frm_in_gop;
+
+    WORD32 i4_first_gop_encoded;
+
+    /* NITT TBR */
+    picture_type_e e_previous_pic_type;
+
+    WORD32 i4_force_I_frame;
+
+    WORD32 i4_forced_I_frame_cur_frame;
+
+    WORD32 i4_sum_remaining_frm_in_gop;
+
+    WORD32 i4_mod_temp_ref_cnt;
+
+    WORD32 i4_frames_in_fif_gop;
+
+    WORD32 i4_prev_intra_frame_interval;
+
+} pic_handling_t;
+
+static void irc_update_pic_distbn(pic_handling_t *ps_pic_handling,
+                                  WORD32 i4_intra_frm_int,
+                                  WORD32 i4_inter_frm_int,
+                                  WORD32 i4_gop_boundary);
+
+static void find_pic_distbn_in_gop(WORD32 i4_frms_in_gop[MAX_PIC_TYPE],
+                                   WORD32 i4_intra_frm_int,
+                                   WORD32 i4_inter_frm_int,
+                                   WORD32 i4_is_gop_closed,
+                                   WORD32 *pi4_b_in_incomp_subgop,
+                                   WORD32 *pi4_extra_p);
+
+WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_t **pps_pic_handling,
+                                                 itt_memtab_t *ps_memtab,
+                                                 ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static pic_handling_t s_pic_handling_temp;
+
+    /*
+     * Hack for al alloc, during which we dont have any state memory.
+     * Dereferencing can cause issues
+     */
+    if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+    {
+        (*pps_pic_handling) = &s_pic_handling_temp;
+    }
+
+    /*for src rate control state structure*/
+    if(e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(pic_handling_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**)pps_pic_handling, e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    return (i4_mem_tab_idx);
+}
+
+/******************************************************************************
+ Description     : initializes the pic handling state struct
+ *****************************************************************************/
+void irc_init_pic_handling(pic_handling_t *ps_pic_handling,
+                           WORD32 i4_intra_frm_int,
+                           WORD32 i4_max_inter_frm_int,
+                           WORD32 i4_is_gop_closed)
+{
+    /* Declarations */
+    WORD32 i;
+
+    /* Checks */
+    /* Codec Parameters */
+    ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int;
+    ps_pic_handling->i4_inter_frm_int = i4_max_inter_frm_int;
+    ps_pic_handling->i4_max_inter_frm_int = i4_max_inter_frm_int;
+    ps_pic_handling->i4_is_gop_closed = i4_is_gop_closed;
+
+    /* Pic_stack */
+    memset(ps_pic_handling->as_pic_stack, 0,
+           sizeof(ps_pic_handling->as_pic_stack));
+    memset(&ps_pic_handling->s_prev_pic_details, 0,
+           sizeof(ps_pic_handling->s_prev_pic_details));
+
+    /* Counters */
+    ps_pic_handling->i4_buf_pic_no = 0;
+    ps_pic_handling->i4_pic_disp_order_no = 0;
+
+    /* Indices to the pic_stack */
+    ps_pic_handling->i4_ref_pic_idx = 0;
+    ps_pic_handling->i4_b_pic_idx = 2;
+    ps_pic_handling->i4_prev_b_pic_idx = 2;
+
+    /* Variables working on the input frames */
+    ps_pic_handling->i4_is_first_gop = 1;
+    ps_pic_handling->i4_p_count_in_gop = 0;
+    ps_pic_handling->i4_b_count_in_gop = 0;
+    ps_pic_handling->i4_b_count_in_subgop = 0;
+
+    /* Variables working on the output frames */
+    ps_pic_handling->i4_coded_pic_no = -1;
+    ps_pic_handling->i4_stack_count = -1;
+
+    /* Tracks the changes in the Codec Parameters */
+    ps_pic_handling->i4_change_in_inter_frm_int = 0;
+    ps_pic_handling->i4_new_inter_frm_int = i4_max_inter_frm_int;
+
+    /* Tracks the changes in the Codec Parameters */
+    ps_pic_handling->i4_change_in_intra_frm_int = 0;
+    ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int;
+
+    /* Variables on which the bit allocation is dependent  */
+    /* Get the pic distribution in the gop */
+    find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int,
+                           i4_max_inter_frm_int, i4_is_gop_closed,
+                           &ps_pic_handling->i4_b_in_incomp_subgop,
+                           &ps_pic_handling->i4_extra_p);
+
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_pic_handling->i4_frms_in_cur_gop[i] =
+                        ps_pic_handling->i4_frms_in_gop[i];
+        ps_pic_handling->i4_rem_frms_in_gop[i] =
+                        ps_pic_handling->i4_frms_in_gop[i];
+    }
+
+    ps_pic_handling->e_previous_pic_type = I_PIC;
+    ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int;
+    ps_pic_handling->i4_force_I_frame = 0;
+    ps_pic_handling->i4_forced_I_frame_cur_frame = 0;
+    ps_pic_handling->i4_sum_remaining_frm_in_gop = 0;
+    ps_pic_handling->i4_mod_temp_ref_cnt = 0;
+
+    ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+                    ps_pic_handling->i4_b_in_incomp_subgop;
+    ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+    ps_pic_handling->i4_last_frm_in_gop = 0;
+    ps_pic_handling->i4_first_gop_encoded = 0;
+    ps_pic_handling->i4_frames_in_fif_gop = 0;
+
+}
+
+/*******************************************************************************
+ * @brief registers the new intra frame interval value
+ ******************************************************************************/
+void irc_pic_handling_register_new_int_frm_interval(pic_handling_t *ps_pic_handling,
+                                                    WORD32 i4_intra_frm_int)
+{
+    ps_pic_handling->i4_change_in_intra_frm_int = 1;
+    ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int;
+}
+
+void irc_pic_handling_register_new_inter_frm_interval(pic_handling_t *ps_pic_handling,
+                                                      WORD32 i4_inter_frm_int)
+{
+    /* Update the state structure with the latest values */
+    ps_pic_handling->i4_change_in_inter_frm_int = 1;
+    ps_pic_handling->i4_new_inter_frm_int = i4_inter_frm_int;
+}
+
+static void start_new_gop(pic_handling_t *ps_pic_handling)
+{
+    WORD32 i;
+    WORD32 i4_sum_remaining_frm_in_gop = 0;
+
+    /* Now, the end of gop updates */
+    ps_pic_handling->i4_pic_disp_order_no = 0;
+    ps_pic_handling->i4_buf_pic_no = 0;
+    ps_pic_handling->i4_is_first_gop = 0;
+    ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+    if(ps_pic_handling->i4_is_gop_closed)
+    {
+        ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+                        ps_pic_handling->i4_b_in_incomp_subgop;
+    }
+    /*
+     * Store the number of frames in the gop that is encoded till now
+     * just before Force I frame call is made
+     */
+    ps_pic_handling->i4_frames_in_fif_gop = ps_pic_handling->i4_b_count_in_gop
+                    + ps_pic_handling->i4_p_count_in_gop + 1;
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        i4_sum_remaining_frm_in_gop += ps_pic_handling->i4_rem_frms_in_gop[i];
+    }
+    ps_pic_handling->i4_sum_remaining_frm_in_gop = i4_sum_remaining_frm_in_gop;
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_pic_handling->i4_frms_in_cur_gop[i] =
+                        ps_pic_handling->i4_frms_in_gop[i];
+        ps_pic_handling->i4_rem_frms_in_gop[i] =
+                        ps_pic_handling->i4_frms_in_cur_gop[i];
+    }
+}
+
+/*******************************************************************************
+ * @brief Fills the pic_stack with the incoming pics in encode order
+ ******************************************************************************/
+void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id)
+{
+    /* Declarations */
+    WORD32 i4_inter_frm_int, i4_max_inter_frm_int,
+           i4_intra_frm_int, i4_new_inter_frm_int;
+    WORD32 i4_is_gop_closed;
+    WORD32 i4_buf_pic_no, i4_pic_disp_order_no;
+    WORD32 i4_b_pic_idx, i4_ref_pic_idx;
+    WORD32 i4_is_first_gop, i4_b_in_incomp_subgop, i4_p_count_in_gop,
+           i4_b_count_in_gop, i4_b_count_in_subgop;
+    WORD32 i, i4_p_frms_in_prd, i4_b_frms_in_prd,
+           i4_num_b_in_subgop, i4_extra_p;
+    WORD32 i4_condn_for_change_in_inter_frm_int;
+    picture_type_e e_previous_pic_type, e_cur_pic_type;
+    WORD32 i4_force_I_frame;
+
+    /*
+     * Initialize the local vars with the state struct values needed by the
+     * change calls
+     */
+    i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+    i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+    i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+    i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+
+    i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no;
+    i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+    i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop;
+    i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC];
+    i4_is_first_gop = ps_pic_handling->i4_is_first_gop;
+    i4_new_inter_frm_int = ps_pic_handling->i4_new_inter_frm_int;
+    e_previous_pic_type = ps_pic_handling->e_previous_pic_type;
+    i4_force_I_frame = ps_pic_handling->i4_force_I_frame;
+
+    /*  Force I frame :
+     *  Two different cases
+     *  1)OPEN_GOP: New GOP is started after number of B pictures in the last
+     *              sub gop of a gop to mimic the GOP structure.
+     *  2)Closed GOP:Wait till P frame at input and The frame after a P frame
+     *               a new GOP is started to mimic the GOP structure.
+     */
+    if(i4_force_I_frame)
+    {
+        WORD32 i4_temp_is_gop_closed;
+        WORD32 i4_codn = 0;
+        /* A special case of Open GOP where the it behaves like Closed GOP*/
+        if((i4_intra_frm_int % i4_inter_frm_int) == 1)
+        {
+            i4_temp_is_gop_closed = 1;
+        }
+        else
+        {
+            i4_temp_is_gop_closed = i4_is_gop_closed;
+        }
+        /* Get the current picture type to aid decision to force an I frame*/
+        if((i4_buf_pic_no % i4_inter_frm_int)
+           && !(i4_is_gop_closed&& (i4_b_count_in_gop == i4_b_frms_in_prd)))
+        {
+            e_cur_pic_type = B_PIC;
+        }
+        else
+        {
+            if(i4_pic_disp_order_no == 0)
+            {
+                e_cur_pic_type = I_PIC;
+            }
+            else
+            {
+                e_cur_pic_type = P_PIC;
+            }
+        }
+        if((i4_intra_frm_int % i4_inter_frm_int) == 0)
+        {
+            i4_codn = (e_cur_pic_type == P_PIC);
+        }
+        else
+        {
+            i4_codn = (ps_pic_handling->i4_b_count_in_subgop
+                            == ps_pic_handling->i4_b_in_incomp_subgop);
+        }
+        if(e_cur_pic_type == I_PIC)
+        {
+            /*
+             * Don't do anything. Resetting the force I frame flag
+             * since the current picture type is already a I frame
+             */
+            i4_force_I_frame = 0;
+        }
+        else if(i4_inter_frm_int == 1)
+        {
+            /*IPP case , Force I frame immediately*/
+            start_new_gop(ps_pic_handling);
+        }
+        else if((!i4_temp_is_gop_closed) && i4_codn)
+        {
+            start_new_gop(ps_pic_handling);
+            if(ps_pic_handling->i4_b_count_in_subgop)
+            {
+                ps_pic_handling->i4_b_pic_idx += 1;
+                ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+            }
+        }
+        else if(i4_temp_is_gop_closed && (e_previous_pic_type == P_PIC)
+                        && (e_cur_pic_type != P_PIC))
+        {
+            start_new_gop(ps_pic_handling);
+            ps_pic_handling->i4_b_pic_idx++;
+            ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+        }
+        i4_is_first_gop = ps_pic_handling->i4_is_first_gop;
+    }
+
+
+     /***********************CHANGE_INTRA_FRM_INTERVAL**************************
+     *
+     * Call the irc_update_pic_distbn if
+     *      1)Change in intra frm interval flag is set
+     *      2)It's the first B_PIC of a gop
+     */
+    if((ps_pic_handling->i4_change_in_intra_frm_int == 1)
+                    && ((i4_pic_disp_order_no == 1)))
+    {
+        irc_update_pic_distbn(ps_pic_handling,
+                              ps_pic_handling->i4_new_intra_frm_int,
+                              ps_pic_handling->i4_inter_frm_int, 1);
+
+        ps_pic_handling->i4_change_in_intra_frm_int = 0;
+
+        if(ps_pic_handling->i4_new_intra_frm_int == 1)
+        {
+            ps_pic_handling->i4_pic_disp_order_no = 0;
+        }
+    }
+    /*********************CHANGE_INTER_FRM_INTERVAL****************************/
+    /* Call irc_update_pic_distbn if
+     *      1)Change in inter frm interval flag is set
+     *      2)It's the first B_PIC after gop/subgop start, and
+     *      3)The new inter-frm-interval won't cross the intra_frm_interval
+     */
+    if((ps_pic_handling->i4_change_in_inter_frm_int == 1)
+       && ((i4_buf_pic_no % i4_inter_frm_int == 1)
+       || (i4_pic_disp_order_no == 1) || (i4_inter_frm_int == 1)))
+    {
+        /*
+         * Condition which checks if the new inter_frm_int will cross the
+         * intra_frm_int
+         */
+        i4_condn_for_change_in_inter_frm_int = ((i4_pic_disp_order_no
+                        + i4_new_inter_frm_int - 1) < i4_intra_frm_int);
+
+        if(i4_condn_for_change_in_inter_frm_int)
+        {
+            /*If the inter_frm_int = 1, then the b_pic_idx needs to be modified */
+            if(i4_inter_frm_int == 1)
+            {
+                ps_pic_handling->i4_b_pic_idx = (1
+                                + ps_pic_handling->i4_ref_pic_idx)
+                                % (i4_max_inter_frm_int + 1);
+            }
+
+            /*
+             * Depending on the gop/subgop boundary, call the change_inter_frm_int
+             *
+             * TO DO: make a single call, change the name of the fxn to
+             * update_state,
+             * where state = frms_in_gop + b_incomp_subgop + extra_p
+             */
+
+            /* GOP boundary */
+            if(i4_pic_disp_order_no == 1)
+            {
+                irc_update_pic_distbn(ps_pic_handling,
+                                      ps_pic_handling->i4_intra_frm_int,
+                                      ps_pic_handling->i4_new_inter_frm_int, 1);
+            }
+            /* Subgop boundary */
+            else
+            {
+                irc_update_pic_distbn(ps_pic_handling,
+                                      ps_pic_handling->i4_intra_frm_int,
+                                      ps_pic_handling->i4_new_inter_frm_int, 0);
+            }
+
+            ps_pic_handling->i4_change_in_inter_frm_int = 0;
+            ps_pic_handling->i4_new_inter_frm_int =
+                            ps_pic_handling->i4_inter_frm_int;
+        }
+
+    }
+
+    /* Initialize the local vars with the state struct values */
+    i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no;
+    i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+    i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+    i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+    i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+    i4_p_count_in_gop = ps_pic_handling->i4_p_count_in_gop;
+    i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop;
+    i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop;
+    i4_p_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[P_PIC];
+    i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC];
+    i4_extra_p = ps_pic_handling->i4_extra_p_mix_gop;
+    i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+    i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+
+    /* Initializing the prev_state vars */
+    ps_pic_handling->i4_prev_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+
+    i4_num_b_in_subgop = (i4_inter_frm_int - 1);
+
+    /*********************** Fill the stack ***********************************/
+    /* The next part of the code is organized as
+     *
+     * if(B_PIC conditions satisfied)
+     * {
+     *  Fill the pic_stack using the b_pic_index
+     *  Update the b_pic_index and the other b_pic related vars for the
+     *      next B_PIC
+     * }
+     *  else
+     * {
+     *  if(I_PIC conditions are satisfied)
+     * {
+     *  Fill the pic_stack using the ref_pic_index
+     *  Update the ref_pic_index and the other ref_pic related vars for the next
+     *      I_PIC/P_PIC
+     * }
+     *  else
+     * {
+     *  Fill the pic_stack using the ref_pic_index
+     *  Update the ref_pic_index and the other ref_pic related vars for the next
+     *      I_PIC/P_PIC
+     * }
+     * }
+     */
+    /*
+     * Condition for a B_PIC -
+     * 1) Other than the first I_PIC and the periodically appearing P_PICs, after
+     *    every inter_frm_int, rest all pics are B_PICs
+     * 2) In case of CLOSED_GOP, the last frame of the gop has to be a P_PIC
+     */
+
+    if((i4_buf_pic_no % i4_inter_frm_int)&& !(i4_is_gop_closed
+       && (i4_b_count_in_gop == i4_b_frms_in_prd))) /**** B_PIC ****/
+    {
+        /* Fill the pic_stack */
+        ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = i4_enc_pic_id;
+        ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type = B_PIC;
+        ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no =
+                        i4_pic_disp_order_no;
+
+        /* Store Pic type*/
+        e_previous_pic_type = B_PIC;
+
+        /* Update the prev_pic_details */
+        memcpy(&ps_pic_handling->s_prev_pic_details,
+               &ps_pic_handling->as_pic_stack[i4_b_pic_idx],
+               sizeof(pic_details_t));
+
+        i4_b_count_in_gop++;
+        i4_b_count_in_subgop++;
+
+        /* Update the i4_b_pic_idx */
+        if(!i4_is_gop_closed)
+        {
+            /* If this B_PIC features in one of the complete subgops */
+            if((i4_b_count_in_subgop < i4_num_b_in_subgop)
+                            && !(i4_b_count_in_gop == i4_b_frms_in_prd))
+            {
+                i4_b_pic_idx++;
+            }
+            else /* Else if this B_PIC is the last one in a subgop or gop  */
+            {
+                /*
+                 * If this is the last B_PIC of a GOP, depending on the number
+                 * of incomp B_pics in the subgop, there can be either only I
+                 * or I,P pics between this and the next B_PIC
+                 */
+                if(i4_b_count_in_gop == i4_b_frms_in_prd)
+                {
+                    i4_b_pic_idx += (2 + (!i4_b_in_incomp_subgop)); /*Prev*/
+                    i4_b_count_in_gop = 0;
+                }
+                /*
+                 * For the last B_PIC of a subgop, there's always a P b/w
+                 * this & the next B_PIC
+                 */
+                else
+                {
+                    i4_b_pic_idx += 2;
+                }
+                i4_b_count_in_subgop = 0;
+            }
+        }
+        else
+        {
+            /* For the last B_PIC of a gop
+             * Normally,there will be 3 pics (P,I,P) between this and the next
+             * B_PIC for a CLOSED gop, except when
+             *  1)Number of P_pics in the gop = 1
+             *  2)There is an extra P at the end of the gop
+             */
+            if(i4_b_count_in_gop == i4_b_frms_in_prd)
+            {
+                i4_b_pic_idx += (3 + ((i4_b_in_incomp_subgop == 0)
+                                 && (i4_p_frms_in_prd> 1)
+                                 && (i4_pic_disp_order_no
+                                 != (i4_p_frms_in_prd+ i4_b_frms_in_prd- 1))));
+
+                i4_b_count_in_subgop = 0;
+            }
+            /* For a B_PIC which is not the last one in a subgop */
+            else if(i4_b_count_in_subgop < i4_num_b_in_subgop)
+            {
+                i4_b_pic_idx++;
+            }
+            else /* For the last B_PIC of a subgop */
+            {
+                i4_b_pic_idx += 2;
+                i4_b_count_in_subgop = 0;
+            }
+        }
+        i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+    }
+    /*********** I or P pic *********/
+    else
+    {
+        ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = i4_enc_pic_id;
+        ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no =
+                        i4_pic_disp_order_no;
+        /* Store Pic type*/
+        e_previous_pic_type = I_PIC;
+
+        /**** I_PIC ****/
+        if(i4_pic_disp_order_no == 0)
+        {
+            ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = I_PIC;
+
+            /* Update the prev_pic_details */
+            memcpy(&ps_pic_handling->s_prev_pic_details,
+                   &ps_pic_handling->as_pic_stack[i4_ref_pic_idx],
+                   sizeof(pic_details_t));
+            /*
+             * In case of an I-frame depending on OPEN or CLOSED gop,
+             * the ref_pic_idx changes
+             */
+            if((!i4_is_gop_closed) && (i4_is_first_gop == 0))
+            {
+                if((i4_p_frms_in_prd <= 1) && (i4_b_in_incomp_subgop == 0))
+                {
+                    i4_ref_pic_idx++;
+                }
+                /*
+                 * From the 2nd gop onwards, the I and first P frame are
+                 * separated by the num_b_in_incomp_subgop
+                 */
+                else
+                {
+                    i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1);
+                }
+
+                ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+                                ps_pic_handling->i4_b_in_incomp_subgop;
+            }
+            else
+            {
+                i4_ref_pic_idx++;
+            }
+
+            i4_b_count_in_gop = 0;
+            i4_p_count_in_gop = 0;
+            i4_b_count_in_subgop = 0;
+
+        }
+        /**** P_PIC ****/
+        else
+        {
+            ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC;
+            /* Store Pic type*/
+            e_previous_pic_type = P_PIC;
+
+            /* Update the prev_pic_details */
+            memcpy(&ps_pic_handling->s_prev_pic_details,
+                   &ps_pic_handling->as_pic_stack[i4_ref_pic_idx],
+                   sizeof(pic_details_t));
+
+            i4_p_count_in_gop++;
+            ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int;
+
+            /*
+             * In case of an P-frame depending on OPEN or CLOSED gop, the
+             * ref_pic_idx changes
+             */
+            if(i4_is_gop_closed && (i4_p_count_in_gop == i4_p_frms_in_prd))
+            {
+                /*
+                 * For the last P_PIC in a gop, if extra_p or incomp_b are
+                 * present, the number of such pics between this and the next
+                 * ref_pic is (i4_b_in_incomp_subgop + 1)
+                 */
+                if((i4_p_count_in_gop > 1)
+                                && (i4_b_in_incomp_subgop || i4_extra_p))
+                {
+                    i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1);
+                }
+                else
+                {
+                    i4_ref_pic_idx += i4_inter_frm_int;
+                }
+            }
+            else
+            {
+                i4_ref_pic_idx += i4_inter_frm_int;
+            }
+        }
+
+        i4_ref_pic_idx %= (i4_max_inter_frm_int + 1);
+    }
+
+    /* Update those variables working on the input frames  */
+    i4_pic_disp_order_no++;
+    i4_buf_pic_no++;
+
+    /* For any gop */
+    if(ps_pic_handling->i4_pic_disp_order_no
+                    == (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed)
+                        * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)))
+    {
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            ps_pic_handling->i4_rem_frms_in_gop[i] =
+                            ps_pic_handling->i4_frms_in_cur_gop[i];
+        }
+
+        if((!i4_is_gop_closed) && (i4_is_first_gop)
+                        && (ps_pic_handling->i4_rem_frms_in_gop[B_PIC]
+                                        > ps_pic_handling->i4_b_in_incomp_subgop_mix_gop))
+        {
+            ps_pic_handling->i4_rem_frms_in_gop[B_PIC] =
+                            ps_pic_handling->i4_frms_in_cur_gop[B_PIC]
+                                            - ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+        }
+    }
+
+    /* End of GOP updates */
+    if(i4_pic_disp_order_no == (i4_p_frms_in_prd + i4_b_frms_in_prd + 1))
+    {
+        /* Now, the end of gop updates */
+        i4_pic_disp_order_no = 0;
+        i4_buf_pic_no = 0;
+        i4_is_first_gop = 0;
+        ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+        if(i4_is_gop_closed)
+        {
+            ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+                            ps_pic_handling->i4_b_in_incomp_subgop;
+        }
+
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            ps_pic_handling->i4_frms_in_cur_gop[i] =
+                            ps_pic_handling->i4_frms_in_gop[i];
+        }
+    }
+
+    /* Updating the vars which work on the encoded pics */
+    /* For the first gop */
+    if(((ps_pic_handling->i4_is_first_gop)
+                    && (ps_pic_handling->i4_pic_disp_order_no
+                                    == (i4_max_inter_frm_int - 1)))
+                    || (i4_intra_frm_int == 1))
+    {
+        ps_pic_handling->i4_coded_pic_no = 0;
+        ps_pic_handling->i4_stack_count = 0;
+    }
+
+    /* Update the state struct with the modifiable local vars */
+    ps_pic_handling->i4_buf_pic_no = i4_buf_pic_no;
+    ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no;
+    ps_pic_handling->i4_b_pic_idx = i4_b_pic_idx;
+    ps_pic_handling->i4_ref_pic_idx = i4_ref_pic_idx;
+    ps_pic_handling->i4_is_first_gop = i4_is_first_gop;
+    ps_pic_handling->i4_p_count_in_gop = i4_p_count_in_gop;
+    ps_pic_handling->i4_b_count_in_gop = i4_b_count_in_gop;
+    ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop;
+    ps_pic_handling->e_previous_pic_type = e_previous_pic_type;
+    ps_pic_handling->i4_force_I_frame = i4_force_I_frame;
+}
+
+/*******************************************************************************
+ * @brief Returns the picture type, ip and display order number for the frame to
+ *        be encoded
+ ******************************************************************************/
+void irc_get_pic_from_stack(pic_handling_t *ps_pic_handling,
+                            WORD32 *pi4_pic_id,
+                            WORD32 *pi4_pic_disp_order_no,
+                            picture_type_e *pe_pic_type)
+{
+    pic_details_t s_pic_details;
+    pic_details_t *ps_pic_details = &s_pic_details;
+
+    if(ps_pic_handling->i4_stack_count < 0)
+    {
+        ps_pic_details->e_pic_type = BUF_PIC;
+        ps_pic_details->i4_pic_disp_order_no = -1;
+        ps_pic_details->i4_pic_id = -1;
+    }
+    else
+    {
+        memcpy(ps_pic_details,
+               &ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count],
+               sizeof(pic_details_t));
+
+        /* Force I frame updations */
+        if((ps_pic_handling->i4_force_I_frame == 1)
+                        && (ps_pic_details->e_pic_type == I_PIC))
+        {
+            /* Flag to signal change in remaining bits*/
+            ps_pic_handling->i4_forced_I_frame_cur_frame = 1;
+            ps_pic_handling->i4_force_I_frame = 0;
+            /*
+             * Indicates count for no. of Pictures whose temporal reference
+             * has to be modified
+             * in the new GOP
+             */
+            ps_pic_handling->i4_mod_temp_ref_cnt =
+                            ps_pic_handling->i4_b_in_incomp_subgop + 1;
+            ps_pic_handling->i4_first_gop_encoded = 1;
+        }
+
+        /*
+         * In MPEG2, the temporal reference of the first displayed frame in a
+         * gop is 0.In case of an OPEN_GOP, the B_PICs of the last subgop in a
+         * gop, maybe coded as a part of the next gop. Hence, in such conditions
+         * the pic_disp_order needs to be modified so that it gives an
+         * indication of the temporal reference
+         */
+        if((!ps_pic_handling->i4_is_gop_closed)
+                        && (ps_pic_handling->i4_first_gop_encoded))
+        {
+            if(!ps_pic_handling->i4_mod_temp_ref_cnt)
+            {
+                ps_pic_details->i4_pic_disp_order_no =
+                                (ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no
+                                                + ps_pic_handling->i4_b_in_incomp_subgop)
+                                                % (ps_pic_handling->i4_prev_intra_frame_interval);
+
+            }
+            else
+            {
+                /*
+                 * due to force I frame First frame will have only
+                 * ps_pic_handling->i4_frames_in_fif_gop number of frames
+                 */
+                ps_pic_details->i4_pic_disp_order_no =
+                                (ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no
+                                                + ps_pic_handling->i4_b_in_incomp_subgop)
+                                                % (ps_pic_handling->i4_frames_in_fif_gop);
+                ps_pic_handling->i4_mod_temp_ref_cnt--;
+            }
+        }
+    }
+
+    /* Giving this to the Codec */
+    *pi4_pic_id = s_pic_details.i4_pic_id;
+    *pi4_pic_disp_order_no = s_pic_details.i4_pic_disp_order_no;
+    *pe_pic_type = s_pic_details.e_pic_type;
+}
+
+/*******************************************************************************
+ * @brief Updates the picture handling state whenever there is changes in input
+ *        parameter
+ *
+ ******************************************************************************/
+static void irc_update_pic_distbn(pic_handling_t *ps_pic_handling,
+                                  WORD32 i4_intra_frm_int,
+                                  WORD32 i4_inter_frm_int,
+                                  WORD32 i4_gop_boundary)
+{
+    /* Declarations */
+    WORD32 i4_is_gop_closed;
+    WORD32 i, i4_prev_inter_frm_int, i4_max_inter_frm_int, i4_pic_disp_order_no;
+    WORD32 i4_b_in_incomp_subgop, i4_extra_p,
+           i4_b_in_incomp_subgop_mix_gop,i4_extra_p_mix_gop;
+    WORD32 i4_pb_frms_till_prev_p;
+    WORD32 ai4_diff_in_frms[MAX_PIC_TYPE];
+
+    /* Initialize the local vars from the state struct */
+    i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+    i4_prev_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+    i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+    i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop;
+    i4_extra_p = ps_pic_handling->i4_extra_p;
+    i4_b_in_incomp_subgop_mix_gop =
+                    ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+    i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p_mix_gop;
+    i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+
+    i4_pb_frms_till_prev_p = (ps_pic_handling->i4_p_count_in_gop
+                              * i4_prev_inter_frm_int);
+
+    /* Check for the validity of the intra_frm_int */
+    if(i4_intra_frm_int <= 0)
+    {
+        i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+    }
+    /* Check for the validity of the inter_frm_int */
+    if((i4_inter_frm_int > i4_max_inter_frm_int) || (i4_inter_frm_int < 0))
+    {
+        i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+    }
+
+    /* Keep a copy of the older frms_in_gop */
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ai4_diff_in_frms[i] = ps_pic_handling->i4_frms_in_cur_gop[i];
+    }
+
+    /* Update all the variables which are calculated from the inter_frm_int */
+
+    /* Get the new pic distribution in the gop */
+    find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int,
+                           i4_inter_frm_int, i4_is_gop_closed,
+                           &i4_b_in_incomp_subgop, &i4_extra_p);
+
+    /* Find the other related variables */
+    if(i4_gop_boundary == 0)
+    {
+        /*
+         * Since, the inter frame interval has changed between a gop the
+         * current gop will be a mixed gop. So, we need to find the values of
+         * the related variables
+         */
+        find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_cur_gop,
+                               (i4_intra_frm_int - i4_pb_frms_till_prev_p),
+                               i4_inter_frm_int, i4_is_gop_closed,
+                               &i4_b_in_incomp_subgop_mix_gop,
+                               &i4_extra_p_mix_gop);
+
+        ps_pic_handling->i4_frms_in_cur_gop[P_PIC] +=
+                        ps_pic_handling->i4_p_count_in_gop;
+        ps_pic_handling->i4_frms_in_cur_gop[B_PIC] +=
+                        ps_pic_handling->i4_b_count_in_gop;
+    }
+    else
+    {
+        /*
+         * Since, the inter_frm_interval has changed at a gop boundary, the
+         * new gop will have all the subgops with the new inter_frm_interval
+         */
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            ps_pic_handling->i4_frms_in_cur_gop[i] =
+                            ps_pic_handling->i4_frms_in_gop[i];
+        }
+
+        i4_b_in_incomp_subgop_mix_gop = i4_b_in_incomp_subgop;
+        i4_extra_p_mix_gop = i4_extra_p;
+    }
+
+    /* For bit-allocation the rem_frms_in_gop need to be updated */
+    /* Checks needed:
+     1) If the encoding is happening on the same gop as that of the buffering */
+    if(ps_pic_handling->i4_pic_disp_order_no
+                    >= (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed)
+                       * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)))
+    {
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            ps_pic_handling->i4_rem_frms_in_gop[i] +=
+                            (ps_pic_handling->i4_frms_in_cur_gop[i]
+                                            - ai4_diff_in_frms[i]);
+        }
+    }
+
+    /* Update the vars which will affect the proper filling of the pic_stack */
+    if(i4_pic_disp_order_no == 0) /*Check if redundant*/
+    {
+        ps_pic_handling->i4_buf_pic_no = 0;
+    }
+    else
+    {
+        ps_pic_handling->i4_buf_pic_no = 1;
+    }
+
+    ps_pic_handling->i4_b_count_in_subgop = 0;
+
+    /* Update the state struct with the new inter_frm_int */
+    ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int;
+    ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int;
+    ps_pic_handling->i4_b_in_incomp_subgop = i4_b_in_incomp_subgop;
+    ps_pic_handling->i4_extra_p = i4_extra_p;
+    ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+                    i4_b_in_incomp_subgop_mix_gop;
+    ps_pic_handling->i4_extra_p_mix_gop = i4_extra_p_mix_gop;
+
+}
+
+/* *****************************************************************************
+ * @brief Distributes the frames as I, P and B based on intra/inter frame interval.
+ *  Along with it it fills the number of frames in sub-gop and extra p frame
+ *
+ ******************************************************************************/
+static void find_pic_distbn_in_gop(WORD32 i4_frms_in_gop[MAX_PIC_TYPE],
+                                   WORD32 i4_intra_frm_int,
+                                   WORD32 i4_inter_frm_int,
+                                   WORD32 i4_is_gop_closed,
+                                   WORD32 *pi4_b_in_incomp_subgop,
+                                   WORD32 *pi4_extra_p)
+{
+    /*
+     * Find the pic distribution in the gop depending on the inter and intra
+     * frm intervals
+     */
+    i4_frms_in_gop[I_PIC] = 1;
+
+    /* All I frames */
+    if(i4_intra_frm_int == 1)
+    {
+        i4_frms_in_gop[P_PIC] = 0;
+        i4_frms_in_gop[B_PIC] = 0;
+        *pi4_b_in_incomp_subgop = 0;
+        *pi4_extra_p = 0;
+    }
+    else
+    {
+        if(i4_is_gop_closed)
+        {
+            i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 2) / i4_inter_frm_int)
+                            + 1;
+
+            if((((i4_intra_frm_int - 2) / i4_inter_frm_int) * i4_inter_frm_int)
+                            == (i4_intra_frm_int - 2))
+            {
+                *pi4_extra_p = 1;
+            }
+            else
+            {
+                *pi4_extra_p = 0;
+            }
+        }
+        else
+        {
+            i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 1) / i4_inter_frm_int);
+
+            *pi4_extra_p = 0;
+        }
+
+        i4_frms_in_gop[B_PIC] = (i4_intra_frm_int - 1 - i4_frms_in_gop[P_PIC]);
+
+        *pi4_b_in_incomp_subgop = (i4_frms_in_gop[B_PIC] - (i4_inter_frm_int - 1)
+                                   * ((i4_intra_frm_int - 1)/ i4_inter_frm_int));
+    }
+}
+
+WORD32 irc_pic_type_get_intra_frame_interval(pic_handling_t *ps_pic_handling)
+{
+
+    return (ps_pic_handling->i4_intra_frm_int);
+}
+
+WORD32 irc_pic_type_get_inter_frame_interval(pic_handling_t *ps_pic_handling)
+{
+    return (ps_pic_handling->i4_inter_frm_int);
+}
+
+void irc_pic_type_get_rem_frms_in_gop(pic_handling_t *ps_pic_handling,
+                                      WORD32 ai4_rem_frms_in_gop[MAX_PIC_TYPE])
+{
+    memcpy(ai4_rem_frms_in_gop, ps_pic_handling->i4_rem_frms_in_gop,
+           sizeof(ps_pic_handling->i4_rem_frms_in_gop));
+}
+
+WORD32 irc_pic_type_get_frms_in_gop_force_I_frm(pic_handling_t *ps_pic_handling)
+{
+    return (ps_pic_handling->i4_frames_in_fif_gop);
+}
+
+void irc_pic_type_get_frms_in_gop(pic_handling_t *ps_pic_handling,
+                                  WORD32 ai4_frms_in_gop[MAX_PIC_TYPE])
+{
+    memcpy(ai4_frms_in_gop, ps_pic_handling->i4_frms_in_cur_gop,
+           sizeof(ps_pic_handling->i4_frms_in_cur_gop));
+}
+
+WORD32 irc_pic_type_get_disp_order_no(pic_handling_t *ps_pic_handling)
+{
+    return (ps_pic_handling->i4_pic_disp_order_no);
+}
+
+void irc_set_force_I_frame_flag(pic_handling_t *ps_pic_handling)
+{
+    ps_pic_handling->i4_force_I_frame = 1;
+}
+WORD32 irc_get_forced_I_frame_cur_frm_flag(pic_handling_t *ps_pic_handling)
+{
+    return (ps_pic_handling->i4_forced_I_frame_cur_frame);
+}
+void irc_reset_forced_I_frame_cur_frm_flag(pic_handling_t *ps_pic_handling)
+{
+    ps_pic_handling->i4_forced_I_frame_cur_frame = 0;
+}
+
+/******************************************************************************/
+/* Functions that work on the encoded frames */
+/******************************************************************************/
+
+/******************************************************************************
+ Function Name   : irc_update_pic_handling
+ Description     : Will be called only for the frames to be encoded
+ *****************************************************************************/
+void irc_update_pic_handling(pic_handling_t *ps_pic_handling,
+                             picture_type_e e_pic_type)
+{
+
+    WORD32 i4_max_inter_frm_int;
+    WORD32 i;
+
+    /* Initializing the local vars with that of the state struct */
+    i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+
+    /* Update the variables working on the output frames */
+    /* Update the stack count */
+    ps_pic_handling->i4_stack_count++;
+
+    if(ps_pic_handling->i4_stack_count == (i4_max_inter_frm_int + 1))
+    {
+        ps_pic_handling->i4_stack_count = 0;
+    }
+
+    /* Update the rem_frms_in_gop */
+    ps_pic_handling->i4_rem_frms_in_gop[e_pic_type]--;
+
+    /* Assumption : Rem_frms_in_gop needs to be taken care of, for every change in frms */
+    ps_pic_handling->i4_last_frm_in_gop = 0;
+    if((ps_pic_handling->i4_rem_frms_in_gop[I_PIC] <= 0)
+                    && (ps_pic_handling->i4_rem_frms_in_gop[P_PIC] <= 0)
+                    && (ps_pic_handling->i4_rem_frms_in_gop[B_PIC] <= 0))
+    {
+        /* Copy the cur_frms_in_gop to the rem_frm_in_gop */
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            ps_pic_handling->i4_rem_frms_in_gop[i] =
+                            ps_pic_handling->i4_frms_in_cur_gop[i];
+        }
+
+        ps_pic_handling->i4_last_frm_in_gop = 1;
+        ps_pic_handling->i4_first_gop_encoded = 1;
+    }
+}
+
+WORD32 irc_is_last_frame_in_gop(pic_handling_handle ps_pic_handling)
+{
+    return (ps_pic_handling->i4_last_frm_in_gop);
+}
+
+/******************************************************************************
+ Function Name   : irc_skip_encoded_frame
+ Description     : Needs to go to the current pic in the pic_stack.
+                   If it's B_PIC don't do anything
+                   If it's a reference picture, push all but the last B_PICs
+                   in the current subgop one place down (i.e. just copy their
+                   pic_details) and move the last B_PIC in that subgop to the
+                   next slot of the skipped picture and convert it's pic_type
+                   to that of the reference picture
+ *****************************************************************************/
+void irc_skip_encoded_frame(pic_handling_t *ps_pic_handling,
+                            picture_type_e e_pic_type)
+{
+    pic_details_t s_pic_details;
+    WORD32 i4_stack_count, i4_next_ref_pic_idx, i4_pic_idx;
+    WORD32 i4_max_inter_frm_int, i4_last_b_pic_idx, i4_first_b_pic_idx;
+    WORD32 i4_next_pic_idx;
+
+    /* State variables used to initialize the local vars (Not to be changed) */
+    i4_stack_count = ps_pic_handling->i4_stack_count;
+    i4_next_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+    i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+
+    i4_next_pic_idx = ((i4_stack_count + 1) % (i4_max_inter_frm_int + 1));
+
+    /*
+     * Check what is the encoded frm_type
+     * Changing a B_PIC to a ref_pic is not reqd if
+     * there are no B_PICs referring from the skipped ref_pic
+     */
+    if(((e_pic_type == P_PIC) || (e_pic_type == I_PIC))
+                    && (i4_next_pic_idx != i4_next_ref_pic_idx))
+    {
+        /* Go to the last B_PIC before the next_ref_pic */
+        if(i4_next_ref_pic_idx == 0)
+        {
+            i4_last_b_pic_idx = i4_max_inter_frm_int;
+        }
+        else
+        {
+            i4_last_b_pic_idx = (i4_next_ref_pic_idx - 1);
+        }
+
+        /* Keep a copy of the last B_PIC pic_details */
+        memcpy(&s_pic_details,
+               &ps_pic_handling->as_pic_stack[i4_last_b_pic_idx],
+               sizeof(pic_details_t));
+
+        i4_pic_idx = i4_last_b_pic_idx;
+        i4_first_b_pic_idx = (i4_stack_count + 1) % (i4_max_inter_frm_int + 1);
+
+        /*
+         * All the B_PICs other than the last one, need to be shifted one place
+         * in the stack
+         */
+        while((i4_pic_idx != i4_stack_count)
+                        && (i4_first_b_pic_idx != i4_last_b_pic_idx))
+        {
+            if(i4_pic_idx == 0)
+            {
+                i4_pic_idx = i4_max_inter_frm_int;
+            }
+            else
+            {
+                i4_pic_idx--;
+            }
+
+            memcpy(&ps_pic_handling->as_pic_stack[(i4_pic_idx + 1)
+                                   % (i4_max_inter_frm_int + 1)],
+                   &ps_pic_handling->as_pic_stack[i4_pic_idx],
+                   sizeof(pic_details_t));
+
+        }
+
+        /*
+         * Copy the last B_PIC pic_details to the first B_PIC place and change
+         * it's pic type to the ref_PIC
+         */
+        /*e_ref_pic_type*/
+        ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].e_pic_type = P_PIC;
+
+        ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_disp_order_no =
+                        s_pic_details.i4_pic_disp_order_no;
+        ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_id =
+                        s_pic_details.i4_pic_id;
+
+        /* Change the rem_frms_in_prd so that the update works properly */
+        if(ps_pic_handling->i4_rem_frms_in_gop[B_PIC] > 0)
+        {
+            ps_pic_handling->i4_rem_frms_in_gop[B_PIC]--;
+            ps_pic_handling->i4_rem_frms_in_gop[P_PIC]++;
+        }
+    }
+
+}
+
+/******************************************************************************
+ Function Name   : flush_frame
+ Description     : Since when a flush frame is called, there will be no valid
+                   frames after it, the last frame cannot be a B_PIC, as there
+                   will be no reference frame for it (Input in display order)
+
+                   So,this fxn needs to go to the last added pic in the pic_stack.
+                   If it's reference pic don't do anything
+                   If it's a B_PIC, copy it's pic_details and put it in the
+                   place of the next reference pic, changing the pic_type to
+                   P_PIC
+ *****************************************************************************/
+void irc_flush_frame_from_pic_stack(pic_handling_t *ps_pic_handling)
+{
+
+    pic_details_t s_prev_pic_details;
+
+    /* Get the last entered pic_details (not to be modified here) */
+    WORD32 i4_prev_b_pic_idx = ps_pic_handling->i4_prev_b_pic_idx;
+    WORD32 i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+    WORD32 i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+
+    memcpy(&s_prev_pic_details, &ps_pic_handling->s_prev_pic_details,
+           sizeof(pic_details_t));
+
+    if(s_prev_pic_details.e_pic_type == B_PIC)
+    {
+        /* Copy the last B_PIC details to the next reference pic in display order */
+        ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no =
+                        s_prev_pic_details.i4_pic_disp_order_no;
+        ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id =
+                        s_prev_pic_details.i4_pic_id;
+        ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC;
+
+        /*
+         * Modify the last B_PIC pic_type, so that codec gets to know when
+         * all the buffered frames
+         * are flushed
+         */
+        ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].e_pic_type =
+                        MAX_PIC_TYPE;
+        ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_id = -1;
+        ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_disp_order_no =
+                        -1;
+    }
+    else
+    {
+        /*
+         * Modify the next pic_type details in the stack, so that codec gets to
+         * know when all the
+         * buffered frames are flushed
+         */
+        ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = MAX_PIC_TYPE;
+        ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = -1;
+        ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no = -1;
+
+        if(ps_pic_handling->i4_inter_frm_int != 1)
+        {
+            ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type =
+                            MAX_PIC_TYPE;
+            ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = -1;
+            ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no =
+                            -1;
+        }
+    }
+}
+
+/******************************************************************************
+ Function Name   : irc_add_pic_to_stack_re_enc
+ Description     : In case of a re-enc, we can assume the pictures to be coming
+                   in the encode order.
+                   In case of re-encoder basically, there are 2 problematic cases.
+                   1)Inter_frm_int is not known to start with
+                   2)Inter_frm_int can keep changing
+                   3)Intra_frm_int set by the application and that actually in the
+                    decoded bitstream may be different
+ *****************************************************************************/
+WORD32 irc_add_pic_to_stack_re_enc(pic_handling_t *ps_pic_handling,
+                                   WORD32 i4_enc_pic_id,
+                                   picture_type_e e_pic_type)
+{
+    WORD32 i4_b_count_in_subgop;
+    WORD32 i4_max_inter_frm_int, i4_inter_frm_int, i4_intra_frm_int;
+    WORD32 i4_pic_disp_order_no;
+    WORD32 i4_is_gop_closed;
+    picture_type_e e_out_pic_type;
+    WORD32 i4_b_in_incomp_subgop;
+
+    /* Check if a change in intra_frm_int call has been made */
+    if(ps_pic_handling->i4_change_in_intra_frm_int == 1)
+    {
+        irc_update_pic_distbn(ps_pic_handling,
+                              ps_pic_handling->i4_new_intra_frm_int,
+                              ps_pic_handling->i4_inter_frm_int, 1);
+        ps_pic_handling->i4_change_in_intra_frm_int = 0;
+    }
+
+    /* Check if a change in inter_frm_int call has been made */
+    if(ps_pic_handling->i4_change_in_inter_frm_int == 1)
+    {
+        irc_update_pic_distbn(ps_pic_handling,
+                              ps_pic_handling->i4_intra_frm_int,
+                              ps_pic_handling->i4_new_inter_frm_int, 1);
+
+        ps_pic_handling->i4_change_in_inter_frm_int = 0;
+    }
+
+    /* Initialize the local vars with the state vars */
+    i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop;
+    i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+    i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+    i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+    i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+    i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+    i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop;
+
+    e_out_pic_type = e_pic_type;
+
+    /* Initially the rate_control assumes an IPP sequence */
+    if(e_pic_type == B_PIC)
+    {
+        /* Update the number of B_PICs in a subgop */
+        i4_b_count_in_subgop++;
+
+        if(i4_b_count_in_subgop > i4_max_inter_frm_int)
+        {
+            return (-1);
+        }
+
+        /* If the number of B_PICs exceed the set inter_frm_int then
+         change the inter_frm_int */
+        if(i4_b_count_in_subgop > (i4_inter_frm_int - 1))
+        {
+            i4_inter_frm_int = (i4_b_count_in_subgop + 1);
+
+            irc_update_pic_distbn(ps_pic_handling, i4_intra_frm_int,
+                                  i4_inter_frm_int, 0);
+        }
+    }
+    else if((e_pic_type == I_PIC) || (e_pic_type == P_PIC))
+    {
+        /* If the B_PICs in the prev subgop were fewer than the current
+         * (inter_frm_int-1) and none of these conditions occur, it'll mean the
+         *  decrease in the inter_frm_int
+         *    1)End of a GOP
+         *    2)Beginning of an OPEN_GOP
+         */
+        if((i4_b_count_in_subgop < (i4_inter_frm_int - 1))
+                        && !((!i4_is_gop_closed)
+                                        && (i4_b_count_in_subgop
+                                                        >= i4_b_in_incomp_subgop))
+                        && !((i4_pic_disp_order_no
+                                        + (i4_inter_frm_int - 1
+                                                        - i4_b_count_in_subgop))
+                                        > i4_intra_frm_int))
+        {
+            i4_inter_frm_int = (i4_b_count_in_subgop + 1);
+
+            irc_update_pic_distbn(ps_pic_handling, i4_intra_frm_int,
+                                  i4_inter_frm_int, 0);
+        }
+
+        /* Reset the number of B_PICs in a subgop */
+        i4_b_count_in_subgop = 0;
+    }
+
+    /* Updation of the frame level vars */
+    i4_pic_disp_order_no++;
+
+    /* End of gop condition
+     *Two cases can arise :
+     *1) The intra_frm_int set by the application is greater than the actual
+     *   bitstream intra_frm_int (i.e. we will get an I frame before
+     *   pic_disp_order_no goes to intra_frm_int)
+     *2) The intra_frm_int set by the application is smaller than the actual bitstream intra_frm_int
+     *   (i.e. we won't get an I_PIC even if pic_disp_order_no goes to
+     *   intra_frm_int) Constraints :
+     *    1) I_PIC cannot be changed to B_PIC
+     *    2) B_PIC cannot be changed to I_PIC
+     */
+    if(i4_pic_disp_order_no >= i4_intra_frm_int)
+    {
+        if(e_pic_type != B_PIC)
+        {
+            e_out_pic_type = I_PIC;
+        }
+        else
+        {
+            e_out_pic_type = B_PIC;
+            ps_pic_handling->i4_rem_frms_in_gop[B_PIC]++;
+            ps_pic_handling->i4_frms_in_cur_gop[B_PIC]++;
+            ps_pic_handling->i4_frms_in_gop[B_PIC]++;
+        }
+    }
+    else
+    {
+        if((e_pic_type == I_PIC) && (!ps_pic_handling->i4_is_first_gop))
+        {
+            e_out_pic_type = P_PIC;
+            ps_pic_handling->i4_rem_frms_in_gop[P_PIC]++;
+            ps_pic_handling->i4_frms_in_cur_gop[P_PIC]++;
+            ps_pic_handling->i4_frms_in_gop[P_PIC]++;
+        }
+        else
+        {
+            e_out_pic_type = e_pic_type;
+        }
+    }
+
+    /* Update the frm_vars at the end of the gop */
+    if(i4_pic_disp_order_no
+                    == (ps_pic_handling->i4_frms_in_cur_gop[P_PIC]
+                                    + ps_pic_handling->i4_frms_in_cur_gop[B_PIC]
+                                    + 1))
+    {
+        i4_pic_disp_order_no = 0;
+        ps_pic_handling->i4_is_first_gop = 0;
+    }
+
+    /* Update the vars working on the encoded pics */
+    if((ps_pic_handling->i4_is_first_gop)
+                    && (ps_pic_handling->i4_stack_count == -1))
+    {
+        ps_pic_handling->i4_coded_pic_no = 0;
+        ps_pic_handling->i4_stack_count = 0;
+    }
+
+    /* Add the pic_details to the pic_stack */
+    ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].e_pic_type =
+                    e_out_pic_type;
+    ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no =
+                    ps_pic_handling->i4_pic_disp_order_no;
+    ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_id =
+                    i4_enc_pic_id;
+
+    /* Writing back those values which need to be updated */
+    ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int;
+    ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no;
+    ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop;
+
+    return (0);
+}
diff --git a/encoder/irc_picture_type.h b/encoder/irc_picture_type.h
new file mode 100755
index 0000000..1af5424
--- /dev/null
+++ b/encoder/irc_picture_type.h
@@ -0,0 +1,95 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _PIC_HANDLING_H_
+#define _PIC_HANDLING_H_
+
+/*
+ * Basic Understanding:
+ * irc_add_pic_to_stack(_re_enc):
+ * This functions converts the input (or display) order to encoding order
+ * */
+typedef struct pic_handling_t *pic_handling_handle;
+
+WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_handle *pps_pic_handling,
+                                                 itt_memtab_t *ps_memtab,
+                                                 ITT_FUNC_TYPE_E e_func_type);
+
+void irc_init_pic_handling(pic_handling_handle ps_pic_handling,
+                           WORD32 i4_intra_frm_int,
+                           WORD32 i4_max_inter_frm_int,
+                           WORD32 i4_is_gop_closed);
+
+void irc_add_pic_to_stack(pic_handling_handle ps_pic_handling,
+                          WORD32 i4_enc_pic_id);
+
+WORD32 irc_add_pic_to_stack_re_enc(pic_handling_handle ps_pic_handling,
+                                   WORD32 i4_enc_pic_id,
+                                   picture_type_e e_pic_type);
+
+void irc_get_pic_from_stack(pic_handling_handle ps_pic_handling,
+                            WORD32 *pi4_pic_id,
+                            WORD32 *pi4_pic_disp_order_no,
+                            picture_type_e *pe_pic_type);
+
+WORD32 irc_is_last_frame_in_gop(pic_handling_handle ps_pic_handling);
+
+void irc_flush_frame_from_pic_stack(pic_handling_handle ps_pic_handling);
+
+/* NITT TBR The below two functions should be made a single function */
+void irc_skip_encoded_frame(pic_handling_handle ps_pic_handling,
+                            picture_type_e e_pic_type);
+
+void irc_update_pic_handling(pic_handling_handle ps_pic_handling,
+                             picture_type_e e_pic_type);
+
+/*
+ * Function returns the number of frames that have been encoded in the GOP in
+ * which the force I frame takes impact
+ */
+WORD32 irc_pic_type_get_frms_in_gop_force_I_frm(pic_handling_handle ps_pic_handling);
+
+void irc_set_force_I_frame_flag(pic_handling_handle ps_pic_handling);
+
+WORD32 irc_get_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling);
+
+void irc_reset_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling);
+
+/* Normal get functions */
+WORD32 irc_pic_type_get_inter_frame_interval(pic_handling_handle ps_pic_handling);
+
+WORD32 irc_pic_type_get_intra_frame_interval(pic_handling_handle ps_pic_handling);
+
+WORD32 irc_pic_type_get_disp_order_no(pic_handling_handle ps_pic_handling);
+
+void irc_pic_handling_register_new_int_frm_interval(pic_handling_handle ps_pic_handling,
+                                                    WORD32 i4_intra_frm_int);
+
+void irc_pic_handling_register_new_inter_frm_interval(pic_handling_handle ps_pic_handling,
+                                                      WORD32 i4_inter_frm_int);
+
+void irc_pic_type_get_rem_frms_in_gop(pic_handling_handle ps_pic_handling,
+                                      WORD32 ai4_rem_frms_in_gop[MAX_PIC_TYPE]);
+
+void irc_pic_type_get_frms_in_gop(pic_handling_handle ps_pic_handling,
+                                  WORD32 ai4_frms_in_gop[MAX_PIC_TYPE]);
+
+#endif /* _PIC_HANDLING_H_ */
+
diff --git a/encoder/irc_rate_control_api.c b/encoder/irc_rate_control_api.c
new file mode 100755
index 0000000..6c6586e
--- /dev/null
+++ b/encoder/irc_rate_control_api.c
@@ -0,0 +1,1600 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include "stdio.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_mb_model_based.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_vbr_str_prms.h"
+#include "irc_rate_control_api.h"
+#include "irc_rate_control_api_structs.h"
+#include "irc_trace_support.h"
+
+#define DEV_Q   4       /*Q format(Shift) for Deviation range factor */
+#define HI_DEV_FCTR     22  /* 1.4*16 */
+#define LO_DEV_FCTR     12  /* 0.75*16 */
+#define GET_HI_DEV_QP(Qprev) (( ((WORD32) Qprev)*HI_DEV_FCTR + (1<<(DEV_Q-1)))>>DEV_Q)
+#define GET_LO_DEV_QP(Qprev) (( ((WORD32) Qprev)*LO_DEV_FCTR + (1<<(DEV_Q-1)))>>DEV_Q)
+#define CLIP_QP(Qc, hi_d, lo_d) (((Qc) < (lo_d))?((lo_d)):(((Qc) > (hi_d))?(hi_d):(Qc)))
+
+/*****************************************************************************/
+/* Restricts the quantization parameter variation within delta */
+/*****************************************************************************/
+/* static WORD32 restrict_swing(WORD32 cur_qp, WORD32 prev_qp, WORD32 delta_qp)
+ {
+ if((cur_qp) - (prev_qp) > (delta_qp)) (cur_qp) = (prev_qp) + (delta_qp) ;
+ if((prev_qp) - (cur_qp) > (delta_qp)) (cur_qp) = (prev_qp) - (delta_qp) ;
+ return cur_qp;
+ }*/
+
+/*****************************************************************************
+ Function Name : rate_control_get_init_free_memtab
+ Description   : Takes or gives memtab
+ Inputs        : pps_rate_control_api -  pointer to RC api pointer
+ ps_memtab            -  Memtab pointer
+ i4_use_base          -  Set during init, else 0
+ i4_fill_base         -  Set during free, else 0
+ *****************************************************************************/
+WORD32 irc_rate_control_num_fill_use_free_memtab(rate_control_handle *pps_rate_control_api,
+                                                 itt_memtab_t *ps_memtab,
+                                                 ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0, i;
+    static rate_control_api_t s_temp_rc_api;
+
+    /*
+     * Hack for al alloc, during which we dont have any state memory.
+     * Dereferencing can cause issues
+     */
+    if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_rate_control_api) = &s_temp_rc_api;
+
+    /*for src rate control state structure*/
+    if(e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(rate_control_api_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**)pps_rate_control_api,
+                         e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    /* Get the memory requirement of lower modules */
+    i4_mem_tab_idx += irc_ba_num_fill_use_free_memtab(
+                    &pps_rate_control_api[0]->ps_bit_allocation,
+                    &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+    i4_mem_tab_idx += irc_cbr_buffer_num_fill_use_free_memtab(
+                    &pps_rate_control_api[0]->ps_cbr_buffer,
+                    &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+    i4_mem_tab_idx += irc_est_sad_num_fill_use_free_memtab(
+                    &pps_rate_control_api[0]->ps_est_sad,
+                    &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+    i4_mem_tab_idx += irc_mbrc_num_fill_use_free_memtab(
+                    &pps_rate_control_api[0]->ps_mb_rate_control,
+                    &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+    i4_mem_tab_idx += irc_vbr_vbv_num_fill_use_free_memtab(
+                    &pps_rate_control_api[0]->ps_vbr_storage_vbv,
+                    &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        i4_mem_tab_idx += irc_rd_model_num_fill_use_free_memtab(
+                        &pps_rate_control_api[0]->aps_rd_model[i],
+                        &ps_memtab[i4_mem_tab_idx], e_func_type);
+    }
+    i4_mem_tab_idx += irc_pic_handling_num_fill_use_free_memtab(
+                    &pps_rate_control_api[0]->ps_pic_handling,
+                    &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+    return (i4_mem_tab_idx);
+}
+
+/*****************************************************************************
+ Function Name : irc_initialise_rate_control
+ Description   : Initialise the rate control structure
+ Inputs        : ps_rate_control_api   - api struct
+                 e_rate_control_type   - VBR, CBR (NLDRC/LDRC), VBR_STREAMING
+                 u1_is_mb_level_rc_on  - enabling mb level RC
+                 u4_avg_bit_rate       - bit rate to achieved across the entire
+                                         file size
+                 u4_peak_bit_rate      - max possible drain rate
+                 u4_frame_rate         - number of frames in 1000 seconds
+                 u4_intra_frame_interval - num frames between two I frames
+                 *au1_init_qp          - init_qp for I,P,B
+ *****************************************************************************/
+void irc_initialise_rate_control(rate_control_api_t *ps_rate_control_api,
+                                 rc_type_e e_rate_control_type,
+                                 UWORD8 u1_is_mb_level_rc_on,
+                                 UWORD32 u4_avg_bit_rate,
+                                 UWORD32 *pu4_peak_bit_rate,
+                                 UWORD32 u4_min_bit_rate,
+                                 UWORD32 u4_frame_rate,
+                                 UWORD32 u4_max_delay,
+                                 UWORD32 u4_intra_frame_interval,
+                                 UWORD8 *pu1_init_qp,
+                                 UWORD32 u4_max_vbv_buff_size,
+                                 WORD32 i4_max_inter_frm_int,
+                                 WORD32 i4_is_gop_closed,
+                                 UWORD8 *pu1_min_max_qp,
+                                 WORD32 i4_use_est_intra_sad,
+                                 UWORD32 u4_src_ticks,
+                                 UWORD32 u4_tgt_ticks)
+{
+    WORD32 i;
+    UWORD32 u4_frms_in_delay_prd = (u4_frame_rate * u4_max_delay) / 1000000;
+    ps_rate_control_api->e_rc_type = e_rate_control_type;
+    ps_rate_control_api->u1_is_mb_level_rc_on = u1_is_mb_level_rc_on;
+
+    trace_printf((const WORD8*)"RC type = %d\n", e_rate_control_type);
+
+    /* Set the avg_bitrate_changed flag for each pic_type to 0 */
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_rate_control_api->au1_avg_bitrate_changed[i] = 0;
+    }
+
+    /* Initialize the pic_handling module */
+    irc_init_pic_handling(ps_rate_control_api->ps_pic_handling,
+                          (WORD32)u4_intra_frame_interval, i4_max_inter_frm_int,
+                          i4_is_gop_closed);
+
+    /*** Initialize the rate control modules  ***/
+    if(ps_rate_control_api->e_rc_type != CONST_QP)
+    {
+        UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+        /* Initialize the model parameter structures */
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            irc_init_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[i],
+                                     MAX_FRAMES_MODELLED);
+        }
+
+        /* Initialize the buffer mechanism */
+        if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+                        || (ps_rate_control_api->e_rc_type
+                                        == VBR_STORAGE_DVD_COMP))
+        {
+            /* Assuming both the peak bit rates are same for a VBR_STORAGE and
+             VBR_STORAGE_DVD_COMP */
+            if(pu4_peak_bit_rate[0] != pu4_peak_bit_rate[1])
+            {
+                trace_printf((const WORD8*)"For VBR_STORAGE and VBR_STORAGE_DVD_COMP the peak bit rates should be same\n");
+            }
+            irc_init_vbr_vbv(ps_rate_control_api->ps_vbr_storage_vbv,
+                             (WORD32)pu4_peak_bit_rate[0],
+                             (WORD32)u4_frame_rate,
+                             (WORD32)u4_max_vbv_buff_size);
+        }
+        else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+        {
+            UWORD32 u4_avg_bit_rate_copy[MAX_NUM_DRAIN_RATES];
+            for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+            {
+                u4_avg_bit_rate_copy[i] = u4_avg_bit_rate;
+            }
+            /* In case of CBR the num pics in delay is ignored */
+            for(i = 0; i < MAX_PIC_TYPE; i++)
+                au4_num_pics_in_delay_prd[i] = 0;
+
+            irc_init_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+                                u4_max_delay, u4_frame_rate,
+                                (WORD32 *)u4_avg_bit_rate_copy,
+                                au4_num_pics_in_delay_prd,
+                                u4_max_vbv_buff_size);
+        }
+        else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+        {
+            irc_init_vbv_str_prms(&ps_rate_control_api->s_vbr_str_prms,
+                                  u4_intra_frame_interval, u4_src_ticks,
+                                  u4_tgt_ticks, u4_frms_in_delay_prd);
+
+            /* Get the number of pics of each type in delay period */
+            irc_get_vsp_num_pics_in_dly_prd(
+                            &ps_rate_control_api->s_vbr_str_prms,
+                            au4_num_pics_in_delay_prd);
+
+            irc_init_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+                                u4_max_delay, u4_frame_rate,
+                                (WORD32 *)pu4_peak_bit_rate,
+                                au4_num_pics_in_delay_prd,
+                                u4_max_vbv_buff_size);
+        }
+
+        /* Initialize the SAD estimation module */
+        irc_init_est_sad(ps_rate_control_api->ps_est_sad, i4_use_est_intra_sad);
+
+        /* Initialize the bit allocation module according to VBR or CBR */
+        if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+                        || (ps_rate_control_api->e_rc_type == VBR_STREAMING)
+                        || (ps_rate_control_api->e_rc_type
+                                        == VBR_STORAGE_DVD_COMP))
+        {
+            irc_ba_init_bit_allocation(ps_rate_control_api->ps_bit_allocation,
+                                       ps_rate_control_api->ps_pic_handling,
+                                       VBR_BIT_ALLOC_PERIOD, u4_avg_bit_rate,
+                                       u4_frame_rate,
+                                       (WORD32 *)pu4_peak_bit_rate,
+                                       u4_min_bit_rate);
+        }
+        else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+        {
+            irc_ba_init_bit_allocation(ps_rate_control_api->ps_bit_allocation,
+                                       ps_rate_control_api->ps_pic_handling,
+                                       CBR_BIT_ALLOC_PERIOD, u4_avg_bit_rate,
+                                       u4_frame_rate,
+                                       (WORD32 *)pu4_peak_bit_rate,
+                                       u4_min_bit_rate);
+        }
+
+        /*
+         * u1_scd_detected will be initialized to 1 when a Scene change is
+         * detected
+         */
+        ps_rate_control_api->u1_scd_detected = 0;
+    }
+
+    /* Initialize the init_qp */
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_rate_control_api->au1_init_qp[i] = pu1_init_qp[i];
+        ps_rate_control_api->au1_prev_frm_qp[i] = pu1_init_qp[i];
+        ps_rate_control_api->au1_min_max_qp[(i << 1)] =
+                        pu1_min_max_qp[(i << 1)];
+        ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i
+                        << 1) + 1];
+    }
+
+    /* Initialize the is_first_frm_encoded */
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_rate_control_api->au1_is_first_frm_coded[i] = 0;
+    }
+    ps_rate_control_api->u1_is_first_frm = 1;
+
+    /*
+     * Control flag for delayed impact after a change in peak bitrate has been
+     * made
+     */
+    ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change = 0;
+    for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+    {
+        ps_rate_control_api->au4_new_peak_bit_rate[i] = pu4_peak_bit_rate[i];
+    }
+
+    /* Initialize the mb level rate control module */
+    irc_init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control);
+    ps_rate_control_api->i4_prev_frm_est_bits = u4_avg_bit_rate * 1000
+                    / u4_frame_rate;
+
+    ps_rate_control_api->prev_ref_pic_type = I_PIC;
+}
+
+/******************************************************************************
+ *Description   : calls irc_add_pic_to_stack
+ ******************************************************************************/
+void irc_add_picture_to_stack(rate_control_api_t *rate_control_api,
+                              WORD32 i4_enc_pic_id)
+{
+    /* Call the routine to add the pic to stack in encode order */
+    irc_add_pic_to_stack(rate_control_api->ps_pic_handling, i4_enc_pic_id);
+}
+
+void irc_add_picture_to_stack_re_enc(rate_control_api_t *rate_control_api,
+                                     WORD32 i4_enc_pic_id,
+                                     picture_type_e e_pic_type)
+{
+    /*
+     * In case of a re-encoder, the pics will come in the encode order itself.
+     * So, there is no need to buffer the pics up
+     */
+    irc_add_pic_to_stack_re_enc(rate_control_api->ps_pic_handling,
+                                i4_enc_pic_id, e_pic_type);
+}
+
+/*******************************************************************************
+ Description   : Decides the picture type based on the state
+ ******************************************************************************/
+void irc_get_picture_details(rate_control_handle rate_control_api,
+                             WORD32 *pi4_pic_id,
+                             WORD32 *pi4_pic_disp_order_no,
+                             picture_type_e *pe_pic_type)
+{
+    /* Call to get the pic_details */
+    irc_get_pic_from_stack(rate_control_api->ps_pic_handling, pi4_pic_id,
+                           pi4_pic_disp_order_no, pe_pic_type);
+}
+
+/*******************************************************************************
+ *  Description   : Gets the frame level qp for the given picture type
+ ******************************************************************************/
+UWORD8 irc_get_frame_level_qp(rate_control_api_t *ps_rate_control_api,
+                              picture_type_e e_pic_type,
+                              WORD32 i4_ud_max_bits)
+{
+    UWORD8 u1_frame_qp, i;
+
+    if((ps_rate_control_api->e_rc_type != VBR_STORAGE)
+                    && (ps_rate_control_api->e_rc_type != VBR_STORAGE_DVD_COMP)
+                    && (ps_rate_control_api->e_rc_type != CBR_NLDRC)
+                    && (ps_rate_control_api->e_rc_type != CONST_QP)
+                    && (ps_rate_control_api->e_rc_type != VBR_STREAMING))
+    {
+        trace_printf((const WORD8*)(const WORD8*)" Only VBR,NLDRC and CONST QP supported for now \n");
+        return (0);
+    }
+
+    if(ps_rate_control_api->e_rc_type != CONST_QP)
+    {
+        UWORD8 u1_is_first_frm_coded = 1;
+
+        /* Check whether at least one frame of a each picture type gets encoded*/
+        /* Check whether it is an IPP or IPB kind of encoding */
+        if((ps_rate_control_api->au1_is_first_frm_coded[I_PIC]
+                        && ps_rate_control_api->au1_is_first_frm_coded[P_PIC])
+                        || ((irc_pic_type_get_intra_frame_interval(
+                                        ps_rate_control_api->ps_pic_handling)
+                                        == 1)
+                                        && (ps_rate_control_api->au1_is_first_frm_coded[I_PIC])))
+        {
+            if(e_pic_type != B_PIC)
+                u1_is_first_frm_coded = 1;
+            else
+            {
+                for(i = 0; i < MAX_PIC_TYPE; i++)
+                {
+                    u1_is_first_frm_coded &=
+                                    ps_rate_control_api->au1_is_first_frm_coded[i];
+                }
+            }
+        }
+        else
+        {
+            u1_is_first_frm_coded = 0;
+        }
+
+        if(u1_is_first_frm_coded)
+        {
+            WORD32 i4_cur_est_texture_bits, i4_cur_est_header_bits;
+            WORD32 i4_cur_est_bits;
+            UWORD32 u4_estimated_sad;
+
+            /* Force I frame updation of rem_bits_in_frame*/
+            if(irc_get_forced_I_frame_cur_frm_flag(
+                            ps_rate_control_api->ps_pic_handling) == 1)
+            {
+                irc_ba_change_rem_bits_in_prd_at_force_I_frame(
+                                ps_rate_control_api->ps_bit_allocation,
+                                ps_rate_control_api->ps_pic_handling);
+                irc_reset_forced_I_frame_cur_frm_flag(
+                                ps_rate_control_api->ps_pic_handling);
+            }
+
+            /* Get the estimated texture bits allocated for the current frame*/
+            i4_cur_est_texture_bits = irc_ba_get_cur_frm_est_texture_bits(
+                            ps_rate_control_api->ps_bit_allocation,
+                            ps_rate_control_api->aps_rd_model,
+                            ps_rate_control_api->ps_est_sad,
+                            ps_rate_control_api->ps_pic_handling, e_pic_type);
+
+            /* Get the estimated header bits*/
+            i4_cur_est_header_bits = irc_ba_get_cur_frm_est_header_bits(
+                            ps_rate_control_api->ps_bit_allocation, e_pic_type);
+
+            /* Total estimated bits */
+            i4_cur_est_bits = i4_cur_est_header_bits + i4_cur_est_texture_bits;
+
+            trace_printf((const WORD8*)"ft %d, etb = %d, eb %d, ", e_pic_type,
+                         i4_cur_est_texture_bits, i4_cur_est_bits);
+
+            /* Threshold the estimated bits based on the buffer fullness*/
+            if(ps_rate_control_api->e_rc_type == VBR_STORAGE)
+            {
+                WORD32 i4_cur_frm_max_bit_possible;
+                i4_cur_frm_max_bit_possible = irc_get_max_target_bits(
+                                ps_rate_control_api->ps_vbr_storage_vbv);
+
+                if(i4_cur_est_bits > i4_cur_frm_max_bit_possible)
+                {
+                    /* Assuming header would consume the same amount of bits */
+                    i4_cur_est_texture_bits = i4_cur_frm_max_bit_possible
+                                    - i4_cur_est_header_bits;
+                }
+            }
+            else if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+            {
+                WORD32 i4_rem_bits_in_gop, i4_rem_frms_in_gop, i;
+                WORD32 i4_cur_frm_max_bit_possible,
+                                ai4_rem_frms_in_gop[MAX_PIC_TYPE];
+                irc_pic_type_get_rem_frms_in_gop(
+                                ps_rate_control_api->ps_pic_handling,
+                                ai4_rem_frms_in_gop);
+                i4_rem_bits_in_gop = irc_get_rem_bits_in_period(
+                                ps_rate_control_api);
+                i4_rem_frms_in_gop = 0;
+                for(i = 0; i < MAX_PIC_TYPE; i++)
+                    i4_rem_frms_in_gop += ai4_rem_frms_in_gop[i];
+
+                /* Threshold the bits based on estimated buffer fullness */
+                i4_cur_frm_max_bit_possible = irc_get_max_tgt_bits_dvd_comp(
+                                ps_rate_control_api->ps_vbr_storage_vbv,
+                                i4_rem_bits_in_gop, i4_rem_frms_in_gop,
+                                e_pic_type);
+
+                if(i4_cur_est_bits > i4_cur_frm_max_bit_possible)
+                {
+                    /* Assuming header would consume the same amount of bits */
+                    i4_cur_est_texture_bits = i4_cur_frm_max_bit_possible
+                                    - i4_cur_est_header_bits;
+
+                }
+            }
+            else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+            {
+                WORD32 i4_cur_frm_bits_acc_buffer =
+                                irc_cbr_buffer_constraint_check(
+                                                ps_rate_control_api->ps_cbr_buffer,
+                                                i4_cur_est_bits, e_pic_type);
+
+                /* Assuming the header would consume the same amount of bits */
+                i4_cur_est_texture_bits = i4_cur_frm_bits_acc_buffer
+                                - i4_cur_est_header_bits;
+
+            }
+            else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+            {
+                WORD32 i4_cur_frm_bits_acc_buffer =
+                                irc_vbr_stream_buffer_constraint_check(
+                                                ps_rate_control_api->ps_cbr_buffer,
+                                                i4_cur_est_bits, e_pic_type);
+
+                /* Assuming the header would consume the same amount of bits */
+                i4_cur_est_texture_bits = i4_cur_frm_bits_acc_buffer
+                                - i4_cur_est_header_bits;
+            }
+
+            trace_printf((const WORD8*)"emtb = %d, ", i4_cur_est_texture_bits);
+
+            /*
+             * If the estimated texture bits go to values less than zero
+             * due to buffer underflow, make the estimated target bits to go
+             * to zero
+             */
+            if(i4_cur_est_texture_bits < 0)
+                i4_cur_est_texture_bits = 0;
+
+            ps_rate_control_api->i4_prev_frm_est_bits = (i4_cur_est_texture_bits
+                            + i4_cur_est_header_bits);
+
+            /* Clip est_texture_bits according to the user-defined max value */
+            if((i4_cur_est_texture_bits
+                            > (i4_ud_max_bits - i4_cur_est_header_bits))
+                            && (e_pic_type != I_PIC))
+            {
+                i4_cur_est_texture_bits = (i4_ud_max_bits
+                                - i4_cur_est_header_bits);
+                trace_printf((const WORD8*)"udcb = %d, ",
+                             i4_ud_max_bits - i4_cur_est_header_bits);
+            }
+
+            /* Calculate the estimated SAD for corresponding frame*/
+            u4_estimated_sad = irc_get_est_sad(ps_rate_control_api->ps_est_sad,
+                                               e_pic_type);
+
+            /* Query the model for the Qp for the corresponding frame*/
+
+            /*
+             * The check is because the model gives a negative QP when the
+             * i4_cur_est_texture_bits is less than or equal to 0
+             * [This is a bug in the model]. As a temporary fix, the frame QP
+             * is being set to the max QP allowed
+             */
+            if(i4_cur_est_texture_bits > 0)
+            {
+                u1_frame_qp = irc_find_qp_for_target_bits(
+                                ps_rate_control_api->aps_rd_model[e_pic_type],
+                                i4_cur_est_texture_bits,
+                                u4_estimated_sad,
+                                ps_rate_control_api->au1_min_max_qp[(e_pic_type
+                                                << 1)],
+                                ps_rate_control_api->au1_min_max_qp[(e_pic_type
+                                                << 1) + 1]);
+            }
+            else
+            {
+                u1_frame_qp = ps_rate_control_api->au1_min_max_qp[(e_pic_type
+                                << 1) + 1];
+            }
+
+            trace_printf((const WORD8*)"ehb %d, etb %d, fqp %d, es %d, eb %d, ",
+                         i4_cur_est_header_bits, i4_cur_est_texture_bits,
+                         u1_frame_qp, u4_estimated_sad, i4_cur_est_bits);
+
+            /* Restricting the QP swing if the average bit rate has changed */
+            if(ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] == 0)
+            {
+                WORD32 prev_qp;
+                WORD32 hi_dev_qp, lo_dev_qp;
+                /* Restricting the qp swing */
+                prev_qp = ps_rate_control_api->au1_prev_frm_qp[ps_rate_control_api->prev_ref_pic_type];
+
+                if(ps_rate_control_api->prev_ref_pic_type != e_pic_type)
+                {
+                    if(e_pic_type == I_PIC)
+                    {
+                        /*
+                         * Constrain I-frame QP to be within specified limit of
+                         * prev_ref_qp/Kp
+                         */
+                        prev_qp = (P_TO_I_RATIO * prev_qp + (1 << (K_Q - 1)))
+                                        >> (K_Q);
+                    }
+                    else if(e_pic_type == P_PIC)
+                    {
+                        /*
+                         * Constrain P-frame QP to be within specified limit of
+                         * Kp*prev_ref_qp
+                         */
+                        prev_qp = (I_TO_P_RATIO * prev_qp + (1 << (K_Q - 1)))
+                                        >> (K_Q);
+                    }
+                    else if(ps_rate_control_api->prev_ref_pic_type == P_PIC)
+                    {
+                        /* current frame is B-pic */
+                        /* Constrain B-frame QP to be within specified limit of
+                         * prev_ref_qp/Kb
+                         */
+                        prev_qp = (P_TO_B_RATIO * prev_qp + (1 << (K_Q - 1)))
+                                        >> (K_Q);
+                    }
+                    else /* if(ps_rate_control_api->prev_ref_pic_type == I_PIC*/
+                    {
+                        /* current frame is B-pic */
+                        /*
+                         * Constrain B-frame QP to be within specified limit of
+                         * prev_ref_qp/Kb
+                         */
+                        prev_qp = (P_TO_B_RATIO * I_TO_P_RATIO * prev_qp
+                                        + (1 << (K_Q + K_Q - 1)))
+                                        >> (K_Q + K_Q);
+                    }
+                }
+
+                hi_dev_qp = GET_HI_DEV_QP(prev_qp);
+                /*
+                 * For lower QPs due to scale factor and fixed point arithmetic,
+                 * the hi_dev_qp can be same as that of the prev qp and in which
+                 * case it gets stuck in the lower most qp and thus not allowing
+                 * QPs not to change. To avoid this,for lower qps the hi_dev_qp
+                 * should be made slightly more than prev_qp
+                 */
+                if(prev_qp == hi_dev_qp)
+                {
+                    hi_dev_qp += 1;
+                }
+                lo_dev_qp = GET_LO_DEV_QP(prev_qp);
+                u1_frame_qp = (UWORD8)CLIP_QP((WORD32)u1_frame_qp, hi_dev_qp, lo_dev_qp);
+            }
+            else
+            {
+                ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] = 0;
+            }
+        }
+        else
+        {
+            /*
+             * The u1_is_first_frm_coded gets reset
+             *  a) at start of sequence
+             *  b) whenever there is a scene change.
+             *     In both cases since we do not have any estimate about the
+             *     current frame, we just send in the previous frame qp value.IN
+             *     Scene change case the previous QP is incremented by 4 , This is
+             *     done because the Scene changed VOP will have over consumed and
+             *     chances of future frames skipping is very high. For the init
+             *     case, the previous frame QP is initialized with the init qp
+             */
+            if((ps_rate_control_api->u1_scd_detected)
+                            && (ps_rate_control_api->e_rc_type != CONST_QP))
+            {
+                /*
+                 * If scene change is detected, I frame Qp would have been
+                 * updated
+                 */
+                 /* Use a QP calculated in the prev update fxn */
+                u1_frame_qp = ps_rate_control_api->u1_frm_qp_after_scd;
+            }
+            else
+            {
+                u1_frame_qp = ps_rate_control_api->au1_prev_frm_qp[e_pic_type];
+            }
+        }
+    }
+    else
+    {
+        u1_frame_qp = ps_rate_control_api->au1_init_qp[e_pic_type];
+    }
+
+    trace_printf((const WORD8*)"fqp %d\n", u1_frame_qp);
+
+    return (u1_frame_qp);
+}
+
+/*******************************************************************************
+ *Function Name : irc_get_buffer_status
+ *Description   : Gets the state of VBV buffer
+ *Outputs       : 0 = normal, 1 = underflow, 2= overflow
+ *Returns       : vbv_buf_status_e
+ ******************************************************************************/
+vbv_buf_status_e irc_get_buffer_status(rate_control_api_t *ps_rate_control_api,
+                                       WORD32 i4_total_frame_bits,
+                                       picture_type_e e_pic_type,
+                                       WORD32 *pi4_num_bits_to_prevent_vbv_underflow)
+{
+    vbv_buf_status_e e_buf_status = VBV_NORMAL;
+
+    /* Get the buffer status for the current total consumed bits and error bits*/
+    if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+    {
+        e_buf_status = irc_get_vbv_buffer_status(
+                        ps_rate_control_api->ps_vbr_storage_vbv,
+                        i4_total_frame_bits,
+                        pi4_num_bits_to_prevent_vbv_underflow);
+
+        trace_printf((const WORD8*)"e_buf_status = %d\n", e_buf_status);
+    }
+    else if(ps_rate_control_api->e_rc_type == VBR_STORAGE)
+    {
+        /* For VBR case since there is not underflow returning the max value */
+        pi4_num_bits_to_prevent_vbv_underflow[0] = irc_get_max_vbv_buf_size(
+                        ps_rate_control_api->ps_vbr_storage_vbv);
+        e_buf_status = VBV_NORMAL;
+    }
+    else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+    {
+        e_buf_status = irc_get_cbr_buffer_status(
+                        ps_rate_control_api->ps_cbr_buffer, i4_total_frame_bits,
+                        pi4_num_bits_to_prevent_vbv_underflow, e_pic_type);
+
+    }
+    else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+    {
+        /* For VBR_streaming, error bits are computed according to peak bitrate*/
+        e_buf_status = irc_get_cbr_buffer_status(
+                        ps_rate_control_api->ps_cbr_buffer, i4_total_frame_bits,
+                        pi4_num_bits_to_prevent_vbv_underflow, e_pic_type);
+    }
+    return e_buf_status;
+}
+
+/*******************************************************************************
+ Function Name : irc_update_pic_handling_state
+ Description   : If the forward path and the backward path of rate control
+ ******************************************************************************/
+void irc_update_pic_handling_state(rate_control_api_t *ps_rate_control_api,
+                                   picture_type_e e_pic_type)
+{
+    irc_update_pic_handling(ps_rate_control_api->ps_pic_handling, e_pic_type);
+}
+
+/******************************************************************************
+ Function Name : irc_update_frame_level_info
+ Description   : Updates the frame level information into the rate control
+                 structure
+ ******************************************************************************/
+void irc_update_frame_level_info(rate_control_api_t *ps_rate_control_api,
+                                 picture_type_e e_pic_type,
+                                 WORD32 *pi4_mb_type_sad,
+                                 WORD32 i4_total_frame_bits,
+                                 WORD32 i4_model_updation_hdr_bits,
+                                 WORD32 *pi4_mb_type_tex_bits,
+                                 WORD32 *pi4_tot_mb_type_qp,
+                                 WORD32 *pi4_tot_mb_in_type,
+                                 WORD32 i4_avg_activity,
+                                 UWORD8 u1_is_scd,
+                                 WORD32 i4_is_it_a_skip,
+                                 WORD32 i4_intra_frm_cost,
+                                 WORD32 i4_is_pic_handling_done)
+{
+    UWORD8 u1_num_skips = 0;
+    WORD32 i;
+    UWORD32 u4_frame_sad = 0;
+    WORD32 i4_tot_texture_bits = 0;
+    WORD32 i4_tot_mbs = 0;
+    WORD32 i4_avg_qp = 0;
+
+    /* SCD not supported in case of IPB encoder */
+    if(u1_is_scd && (irc_pic_type_get_inter_frame_interval(
+                                    ps_rate_control_api->ps_pic_handling) > 1))
+    {
+        u1_is_scd = 0;
+    }
+    trace_printf((const WORD8*)"i4_total_frame_bits %d\n", i4_total_frame_bits);
+
+    if(!i4_is_it_a_skip && !i4_is_pic_handling_done)
+    {
+        /* Update the pic_handling struct */
+        irc_update_pic_handling(ps_rate_control_api->ps_pic_handling,
+                                e_pic_type);
+    }
+
+    if(ps_rate_control_api->e_rc_type != CONST_QP)
+    {
+        if(!i4_is_it_a_skip)
+        {
+            WORD32 i4_new_period_flag;
+            /******************************************************************
+             Calculate the total values from the individual values
+             ******************************************************************/
+            for(i = 0; i < MAX_MB_TYPE; i++)
+                u4_frame_sad += pi4_mb_type_sad[i];
+            for(i = 0; i < MAX_MB_TYPE; i++)
+                i4_tot_texture_bits += pi4_mb_type_tex_bits[i];
+            for(i = 0; i < MAX_MB_TYPE; i++)
+                i4_avg_qp += pi4_tot_mb_type_qp[i];
+            for(i = 0; i < MAX_MB_TYPE; i++)
+                i4_tot_mbs += pi4_tot_mb_in_type[i];
+            i4_avg_qp /= i4_tot_mbs; /* Calculate the average QP */
+
+            if(ps_rate_control_api->u1_is_mb_level_rc_on)
+            {
+                /*
+                 * The model needs to take into consideration the average
+                 * activity of the entire frame while estimating the QP. Thus
+                 * the frame sad values are scaled by the average activity
+                 * before updating it into the model.
+                 */
+                if(!i4_avg_activity)
+                    i4_avg_activity = 1;
+                i4_intra_frm_cost *= i4_avg_activity;
+                u4_frame_sad *= i4_avg_activity;
+            }
+
+            /******************************************************************
+             Update the bit allocation module
+             NOTE: For bit allocation module, the pic_type should not be
+             modified to that of 'I', in case of a SCD.
+             ******************************************************************/
+            i4_new_period_flag = irc_is_last_frame_in_gop(
+                            ps_rate_control_api->ps_pic_handling);
+            irc_ba_update_cur_frm_consumed_bits(
+                            ps_rate_control_api->ps_bit_allocation,
+                            ps_rate_control_api->ps_pic_handling,
+                            i4_total_frame_bits, i4_model_updation_hdr_bits,
+                            e_pic_type, u1_is_scd, i4_new_period_flag);
+
+            if(1 == i4_new_period_flag
+                            && ((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+                                            || (ps_rate_control_api->e_rc_type
+                                                            == VBR_STORAGE_DVD_COMP)))
+            {
+                irc_ba_check_and_update_bit_allocation(
+                                ps_rate_control_api->ps_bit_allocation,
+                                ps_rate_control_api->ps_pic_handling,
+                                irc_get_cur_vbv_buf_size(
+                                                ps_rate_control_api->ps_vbr_storage_vbv),
+                                irc_get_max_vbv_buf_size(
+                                                ps_rate_control_api->ps_vbr_storage_vbv),
+                                irc_get_max_bits_per_tgt_frm(
+                                                ps_rate_control_api->ps_vbr_storage_vbv),
+                                i4_total_frame_bits);
+            }
+        }
+
+        /**********************************************************************
+         Update the buffer status
+         *********************************************************************/
+        /*
+         * This update is done after overflow and underflow handling to
+         *  account for the actual bits dumped
+         */
+        if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+                        || (ps_rate_control_api->e_rc_type
+                                        == VBR_STORAGE_DVD_COMP))
+        {
+            irc_update_vbr_vbv(ps_rate_control_api->ps_vbr_storage_vbv,
+                               i4_total_frame_bits);
+        }
+        else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+        {
+            irc_update_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+                                  i4_total_frame_bits, e_pic_type);
+        }
+        else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+        {
+            UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+            irc_get_vsp_num_pics_in_dly_prd(
+                            &ps_rate_control_api->s_vbr_str_prms,
+                            au4_num_pics_in_delay_prd);
+
+            irc_update_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+                                  i4_total_frame_bits, e_pic_type);
+
+            irc_update_vbr_str_prms(&ps_rate_control_api->s_vbr_str_prms,
+                                    e_pic_type);
+
+            irc_change_cbr_vbv_num_pics_in_delay_period(
+                            ps_rate_control_api->ps_cbr_buffer,
+                            au4_num_pics_in_delay_prd);
+
+            /*
+             * If the change_in_peak_bitrate flag is set, after the delay period
+             * update the peak_bitrate and the buffer parameters
+             */
+            if(!ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change)
+            {
+                irc_ba_change_ba_peak_bit_rate(
+                                ps_rate_control_api->ps_bit_allocation,
+                                (WORD32 *)&ps_rate_control_api->au4_new_peak_bit_rate[0]);
+                irc_change_cbr_vbv_bit_rate(
+                                ps_rate_control_api->ps_cbr_buffer,
+                                (WORD32 *)&ps_rate_control_api->au4_new_peak_bit_rate[0]);
+            }
+            if(ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change)
+                ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change--;
+        }
+
+        if(!i4_is_it_a_skip)
+        {
+            /*******************************************************************
+             Handle the SCENE CHANGE DETECTED
+             1) Make the picture type as I, so that updation happens as if it is
+                an I frame
+             2) Reset model, SAD and flag to restart the estimation process
+             ******************************************************************/
+            if(u1_is_scd)
+            {
+                WORD32 i4_frm_qp_after_scd;
+                UWORD32 u4_prev_I_frm_sad;
+
+                e_pic_type = I_PIC;
+
+                /* Scale scd qp based on SCD Frm sad and previous I Frm sad */
+                /* frm_qp_after_scd = (avg_qp * cur_frm_sad)/prev_I_frm_sad */
+
+                /*
+                 * QP for the next frame should take care of
+                 * 1) due to scene change, the current picture has consumed more
+                 *      bits
+                 * 2) relative complexity of the previous scene and the current
+                 *     scene
+                 */
+
+                /* Get the intra SAD for the previous scene */
+                u4_prev_I_frm_sad = irc_get_est_sad(
+                                ps_rate_control_api->ps_est_sad, I_PIC);
+
+                /*
+                 * Scale the QP based on the SAD ratio of the current pic and
+                 * previous scene intra SAD
+                 */
+                X_PROD_Y_DIV_Z(i4_avg_qp, u4_frame_sad, u4_prev_I_frm_sad,
+                               i4_frm_qp_after_scd);
+
+                /* Limit the next frame qp by 50% across both the sides */
+                if(i4_frm_qp_after_scd > ((i4_avg_qp * 3) >> 1))
+                {
+                    i4_frm_qp_after_scd = (i4_avg_qp * 3) >> 1;
+                }
+                else if(i4_frm_qp_after_scd < (i4_avg_qp >> 1))
+                {
+                    i4_frm_qp_after_scd = (i4_avg_qp >> 1);
+                }
+
+                /*
+                 * Ensure that the next frame QP is within the min_max limit of
+                 * QP allowed
+                 */
+                if(i4_frm_qp_after_scd
+                                > ps_rate_control_api->au1_min_max_qp[(e_pic_type
+                                                << 1) + 1])
+                {
+                    i4_frm_qp_after_scd =
+                                    ps_rate_control_api->au1_min_max_qp[(e_pic_type
+                                                    << 1) + 1];
+                }
+                else if(i4_frm_qp_after_scd
+                                < ps_rate_control_api->au1_min_max_qp[(e_pic_type
+                                                << 1)])
+                {
+                    i4_frm_qp_after_scd =
+                                    ps_rate_control_api->au1_min_max_qp[(e_pic_type
+                                                    << 1)];
+                }
+
+                /* Update the state var */
+                ps_rate_control_api->u1_frm_qp_after_scd =
+                                (UWORD8)i4_frm_qp_after_scd;
+
+                /* re-set model */
+                for(i = 0; i < MAX_PIC_TYPE; i++)
+                {
+                    irc_reset_frm_rc_rd_model(
+                                    ps_rate_control_api->aps_rd_model[i]);
+                }
+
+                /* Reset the SAD estimation module */
+                irc_reset_est_sad(ps_rate_control_api->ps_est_sad);
+
+                /* Reset flag */
+                for(i = 0; i < MAX_PIC_TYPE; i++)
+                {
+                    ps_rate_control_api->au1_is_first_frm_coded[i] = 0;
+                }
+
+                /* Reset the MB Rate control */
+                irc_init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control);
+
+                /*Set u1_scd_detected flag*/
+                ps_rate_control_api->u1_scd_detected = 1;
+
+                /*
+                 * Adjust the average QP for the frame based on bits
+                 * consumption
+                 */
+                /*
+                 *  Initialize the QP for each picture type according to the
+                 * average QP of the SCD pic
+                 */
+                ps_rate_control_api->au1_prev_frm_qp[I_PIC] = (UWORD8)i4_avg_qp;
+
+                trace_printf((const WORD8*)"SCD DETECTED\n");
+            }
+            else
+            {
+                ps_rate_control_api->u1_scd_detected = 0;
+                /**************************************************************
+                 Update the Qp used by the current frame
+                 **************************************************************/
+                ps_rate_control_api->au1_prev_frm_qp[e_pic_type] =
+                                (UWORD8)i4_avg_qp;
+            }
+
+            /********************************************************************
+             Update the model of the correponding picture type
+             NOTE: For SCD, we force the frame type from 'P' to that of a 'I'
+             ******************************************************************/
+            /*
+             * For very simple sequences no bits are consumed by texture. These
+             * frames do not add any information to the model and so not added
+             */
+            if(i4_tot_texture_bits && u4_frame_sad)
+            {
+                irc_add_frame_to_rd_model(
+                                ps_rate_control_api->aps_rd_model[e_pic_type],
+                                i4_tot_texture_bits, (UWORD8)i4_avg_qp,
+                                u4_frame_sad, u1_num_skips);
+
+                /*
+                 * At least one proper frame in added into the model. Until that
+                 * keep using the initial QP
+                 */
+                ps_rate_control_api->au1_is_first_frm_coded[e_pic_type] = 1;
+            }
+
+            if(i4_avg_activity)
+            {
+                /* Update the mb_level model */
+                irc_mb_update_frame_level(
+                                ps_rate_control_api->ps_mb_rate_control,
+                                i4_avg_activity);
+            }
+
+            /******************************************************************
+             Update the sad estimation module
+             NOTE: For SCD, we force the frame type from 'P' to that of a 'I'
+             ******************************************************************/
+            if(u4_frame_sad)
+            {
+                irc_update_actual_sad(ps_rate_control_api->ps_est_sad,
+                                      u4_frame_sad, e_pic_type);
+
+                irc_update_actual_sad_for_intra(ps_rate_control_api->ps_est_sad,
+                                                i4_intra_frm_cost);
+            }
+
+            /*
+             * Update the variable which denotes that a frame has been
+             * encountered
+             */
+            ps_rate_control_api->u1_is_first_frm = 0;
+
+        }
+    }
+
+    /* Store the prev encoded picture type for restricting Qp swing */
+    if((e_pic_type == I_PIC) || (e_pic_type == P_PIC))
+    {
+        ps_rate_control_api->prev_ref_pic_type = e_pic_type;
+    }
+
+    trace_printf((const WORD8*)"ft %d,hb %d,tb %d,qp %d,fs %d\n", e_pic_type,
+                 i4_model_updation_hdr_bits, i4_tot_texture_bits, i4_avg_qp,
+                 u4_frame_sad);
+
+    return;
+}
+
+/*******************************************************************************
+ MB Level API functions
+ ******************************************************************************/
+
+/******************************************************************************
+ Function Name : irc_init_mb_rc_frame_level
+ Description   : Initialise the frame level details required for a mb level
+ ******************************************************************************/
+
+void irc_init_mb_rc_frame_level(rate_control_api_t *ps_rate_control_api,
+                                UWORD8 u1_frame_qp)
+{
+    irc_mb_init_frame_level(ps_rate_control_api->ps_mb_rate_control,
+                            u1_frame_qp);
+}
+
+/******************************************************************************
+ Function Name : irc_get_mb_level_qp
+ Description   : Get the mb level qp
+ *****************************************************************************/
+void irc_get_mb_level_qp(rate_control_api_t *ps_rate_control_api,
+                         WORD32 i4_cur_mb_activity,
+                         WORD32 *pi4_mb_qp,
+                         picture_type_e e_pic_type)
+{
+    if(ps_rate_control_api->u1_is_mb_level_rc_on)
+    {
+        irc_get_mb_qp(ps_rate_control_api->ps_mb_rate_control,
+                      i4_cur_mb_activity, pi4_mb_qp);
+
+        /* Truncating the QP to the Max and Min Qp values possible */
+        if(pi4_mb_qp[1] < ps_rate_control_api->au1_min_max_qp[e_pic_type << 1])
+        {
+            pi4_mb_qp[1] = ps_rate_control_api->au1_min_max_qp[e_pic_type << 1];
+        }
+        if(pi4_mb_qp[1]
+                        > ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1)
+                                        + 1])
+        {
+            pi4_mb_qp[1] = ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1)
+                            + 1];
+        }
+    }
+    else
+    {
+        WORD32 i4_qp;
+        i4_qp = irc_get_frm_level_qp(ps_rate_control_api->ps_mb_rate_control);
+        /* Both the qp are used for */
+        pi4_mb_qp[0] = i4_qp; /* Used as feedback for the rate control */
+        pi4_mb_qp[1] = i4_qp; /* Used for quantising the MB*/
+    }
+}
+
+/****************************************************************************
+ Function Name : irc_get_bits_to_stuff
+ Description   : Gets the bits to stuff to prevent Underflow of Encoder Buffer
+ *****************************************************************************/
+WORD32 irc_get_bits_to_stuff(rate_control_api_t *ps_rate_control_api,
+                             WORD32 i4_tot_consumed_bits,
+                             picture_type_e e_pic_type)
+{
+    WORD32 i4_bits_to_stuff;
+    /* Get the CBR bits to stuff*/
+    i4_bits_to_stuff = irc_get_cbr_bits_to_stuff(
+                    ps_rate_control_api->ps_cbr_buffer, i4_tot_consumed_bits,
+                    e_pic_type);
+    return i4_bits_to_stuff;
+}
+
+/****************************************************************************
+ Function Name : irc_get_prev_frm_est_bits
+ Description   : Returns previous frame estimated bits
+ *****************************************************************************/
+WORD32 irc_get_prev_frm_est_bits(rate_control_api_t *ps_rate_control_api)
+{
+    return (ps_rate_control_api->i4_prev_frm_est_bits);
+}
+
+/******************************************************************************
+ Control Level API functions
+ Logic: The control call sets the state structure of the rate control api
+         accordingly such that the next process call would implement the same.
+ ******************************************************************************/
+
+void irc_change_inter_frm_int_call(rate_control_api_t *ps_rate_control_api,
+                                   WORD32 i4_inter_frm_int)
+{
+    irc_pic_handling_register_new_inter_frm_interval(
+                    ps_rate_control_api->ps_pic_handling, i4_inter_frm_int);
+}
+
+void irc_change_intra_frm_int_call(rate_control_api_t *ps_rate_control_api,
+                                   WORD32 i4_intra_frm_int)
+{
+    irc_pic_handling_register_new_int_frm_interval(
+                    ps_rate_control_api->ps_pic_handling, i4_intra_frm_int);
+
+    if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+    {
+        irc_change_vsp_ifi(&ps_rate_control_api->s_vbr_str_prms,
+                           i4_intra_frm_int);
+    }
+}
+
+/****************************************************************************
+ Function Name : irc_change_avg_bit_rate
+ Description   : Whenever the average bit rate changes, the excess bits is
+                 between the changed bit rate and the old one is re-distributed
+                 in the bit allocation module
+ *****************************************************************************/
+void irc_change_avg_bit_rate(rate_control_api_t *ps_rate_control_api,
+                             UWORD32 u4_average_bit_rate)
+{
+    int i;
+    if(ps_rate_control_api->e_rc_type != CONST_QP)
+    {
+        /*
+         * Bit Allocation Module: distribute the excess/deficit bits between the
+         * old and the new frame rate to all the remaining frames
+         */
+        irc_ba_change_remaining_bits_in_period(
+                        ps_rate_control_api->ps_bit_allocation,
+                        ps_rate_control_api->ps_pic_handling,
+                        u4_average_bit_rate,
+                        irc_ba_get_frame_rate(
+                                        ps_rate_control_api->ps_bit_allocation),
+                        (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+    }
+    if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+    {
+        UWORD32 u4_average_bit_rate_copy[MAX_NUM_DRAIN_RATES];
+        for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+        {
+            u4_average_bit_rate_copy[i] = u4_average_bit_rate;
+        }
+        irc_change_cbr_vbv_bit_rate(ps_rate_control_api->ps_cbr_buffer,
+                                    (WORD32 *)(u4_average_bit_rate_copy));
+    }
+
+    /*
+     * This is done only for average bitrate changing somewhere after the model
+     * stabilizes.Here it is assumed that user will not do this call after
+     * first few frames. If we dont have this check, what would happen is since
+     * the model has not stabilized, also bitrate has changed before the first
+     * frame, we dont restrict the qp. Qp can go to very bad values after init
+     * qp since if swing is disabled.
+     * This check will become buggy if change bitrate is called say somewhere
+     * after first two frames.Bottom line - RC init is done during create and
+     * this call is done just before first process.And we want to differentiate
+     * between this call done before first process and the call which is done
+     * during run time
+     */
+    if(ps_rate_control_api->u1_is_first_frm == 0)
+    {
+        for(i = 0; i < MAX_PIC_TYPE; i++)
+        {
+            ps_rate_control_api->au1_avg_bitrate_changed[i] = 1;
+        }
+    }
+}
+
+/****************************************************************************
+ Function Name : irc_change_frame_rate
+ Description   : Does the necessary changes whenever there is a change in
+                 frame rate
+ *****************************************************************************/
+void irc_change_frame_rate(rate_control_api_t *ps_rate_control_api,
+                           UWORD32 u4_frame_rate,
+                           UWORD32 u4_src_ticks,
+                           UWORD32 u4_tgt_ticks)
+{
+
+    if(ps_rate_control_api->e_rc_type != CONST_QP)
+    {
+        UWORD32 u4_frms_in_delay_prd = ((u4_frame_rate
+                        * irc_get_cbr_buffer_delay(
+                                        ps_rate_control_api->ps_cbr_buffer))
+                        / 1000000);
+        if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+                        || (ps_rate_control_api->e_rc_type
+                                        == VBR_STORAGE_DVD_COMP))
+        {
+            irc_change_vbr_vbv_frame_rate(
+                            ps_rate_control_api->ps_vbr_storage_vbv,
+                            u4_frame_rate);
+        }
+        else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+        {
+            irc_change_cbr_vbv_tgt_frame_rate(
+                            ps_rate_control_api->ps_cbr_buffer, u4_frame_rate);
+        }
+        else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+        {
+            UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+            irc_change_vsp_tgt_ticks(&ps_rate_control_api->s_vbr_str_prms,
+                                     u4_tgt_ticks);
+            irc_change_vsp_src_ticks(&ps_rate_control_api->s_vbr_str_prms,
+                                     u4_src_ticks);
+            irc_change_vsp_fidp(&ps_rate_control_api->s_vbr_str_prms,
+                                u4_frms_in_delay_prd);
+
+            irc_get_vsp_num_pics_in_dly_prd(
+                            &ps_rate_control_api->s_vbr_str_prms,
+                            au4_num_pics_in_delay_prd);
+            irc_change_cbr_vbv_tgt_frame_rate(
+                            ps_rate_control_api->ps_cbr_buffer, u4_frame_rate);
+            irc_change_cbr_vbv_num_pics_in_delay_period(
+                            ps_rate_control_api->ps_cbr_buffer,
+                            au4_num_pics_in_delay_prd);
+        }
+
+        /*
+         * Bit Allocation Module: distribute the excess/deficit bits between the
+         * old and the new frame rate to all the remaining frames
+         */
+        irc_ba_change_remaining_bits_in_period(
+                        ps_rate_control_api->ps_bit_allocation,
+                        ps_rate_control_api->ps_pic_handling,
+                        irc_ba_get_bit_rate(
+                                        ps_rate_control_api->ps_bit_allocation),
+                        u4_frame_rate,
+                        (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+    }
+}
+
+/****************************************************************************
+ Function Name : irc_change_frm_rate_for_bit_alloc
+ Description   : Does the necessary changes only in the bit_allocation module
+                 there is a change in frame rate
+ *****************************************************************************/
+void irc_change_frm_rate_for_bit_alloc(rate_control_api_t *ps_rate_control_api,
+                                       UWORD32 u4_frame_rate)
+{
+
+    if(ps_rate_control_api->e_rc_type != CONST_QP)
+    {
+        /*
+         * Bit Allocation Module: distribute the excess/deficit bits between the
+         * old and the new frame rate to all the remaining frames
+         */
+        irc_ba_change_remaining_bits_in_period(
+                        ps_rate_control_api->ps_bit_allocation,
+                        ps_rate_control_api->ps_pic_handling,
+                        irc_ba_get_bit_rate(
+                                        ps_rate_control_api->ps_bit_allocation),
+                        u4_frame_rate,
+                        (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+
+        if(ps_rate_control_api->e_rc_type == VBR_STORAGE
+                        || ps_rate_control_api->e_rc_type
+                                        == VBR_STORAGE_DVD_COMP)
+        {
+            irc_change_vbr_max_bits_per_tgt_frm(
+                            ps_rate_control_api->ps_vbr_storage_vbv,
+                            u4_frame_rate);
+        }
+    }
+}
+
+void irc_change_init_qp(rate_control_api_t *ps_rate_control_api,
+                        UWORD8 *pu1_init_qp)
+{
+    WORD32 i;
+    /* Initialize the init_qp */
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_rate_control_api->au1_init_qp[i] = pu1_init_qp[i];
+        ps_rate_control_api->au1_prev_frm_qp[i] = pu1_init_qp[i];
+    }
+}
+
+void irc_change_min_max_qp(rate_control_api_t *ps_rate_control_api,
+                           UWORD8 *pu1_min_max_qp)
+{
+    WORD32 i;
+    for(i = 0; i < MAX_PIC_TYPE; i++)
+    {
+        ps_rate_control_api->au1_min_max_qp[(i << 1)] =
+                        pu1_min_max_qp[(i << 1)];
+        ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i
+                        << 1) + 1];
+    }
+}
+
+/****************************************************************************
+ Function Name : irc_change_peak_bit_rate
+ Description   : Does the necessary changes whenever there is a change in
+                 peak bit rate
+ *****************************************************************************/
+WORD32 irc_change_peak_bit_rate(rate_control_api_t *ps_rate_control_api,
+                                UWORD32 *pu4_peak_bit_rate)
+{
+    WORD32 i4_ret_val = RC_OK;
+    int i;
+
+    /*
+     * Buffer Mechanism Module: Re-initialize the number of bits consumed per
+     * frame
+     */
+    if(ps_rate_control_api->e_rc_type == VBR_STORAGE
+                    || ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+    {
+        /* Send the new peak bit rate and the old frame rate */
+        irc_change_vbr_vbv_bit_rate(ps_rate_control_api->ps_vbr_storage_vbv,
+                                    pu4_peak_bit_rate[0]);
+        irc_ba_change_ba_peak_bit_rate(ps_rate_control_api->ps_bit_allocation,
+                                       (WORD32 *)pu4_peak_bit_rate);
+
+        for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+        {
+            ps_rate_control_api->au4_new_peak_bit_rate[i] =
+                            pu4_peak_bit_rate[i];
+        }
+    }
+    else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+    {
+        if(ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change)
+        {
+            /*
+             * Means that change in peak bit rate has been made twice before the
+             * previous change could take effect
+             */
+            i4_ret_val = RC_BENIGN_ERR;
+        }
+        /*
+         * If the change happens before encoding the first frame make the
+         * effect immediately else delay the effect
+         */
+        if(ps_rate_control_api->u1_is_first_frm)
+        {
+            for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+            {
+                ps_rate_control_api->au4_new_peak_bit_rate[i] =
+                                pu4_peak_bit_rate[i];
+            }
+            irc_ba_change_ba_peak_bit_rate(
+                            ps_rate_control_api->ps_bit_allocation,
+                            (WORD32 *)pu4_peak_bit_rate);
+            irc_change_cbr_vbv_bit_rate(ps_rate_control_api->ps_cbr_buffer,
+                                        (WORD32 *)pu4_peak_bit_rate);
+        }
+        else
+        {
+            UWORD32 au4_num_pics_in_delay_prd[MAX_NUM_DRAIN_RATES];
+            /*
+             * Else store the number of frames after which the effect should
+             * happen and then update the peak bitrate
+             */
+            ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change =
+                            irc_get_vsp_num_pics_in_dly_prd(
+                                            &ps_rate_control_api->s_vbr_str_prms,
+                                            au4_num_pics_in_delay_prd);
+            for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+            {
+                ps_rate_control_api->au4_new_peak_bit_rate[i] =
+                                pu4_peak_bit_rate[i];
+            }
+        }
+    }
+
+    return (i4_ret_val);
+}
+
+void irc_change_buffer_delay(rate_control_api_t *ps_rate_control_api,
+                             UWORD32 u4_buffer_delay)
+{
+    UWORD32 u4_frms_in_delay_prd = ((irc_ba_get_frame_rate(
+                    ps_rate_control_api->ps_bit_allocation) * u4_buffer_delay)
+                    / 1000000);
+
+    /* Initialize the rate control modules */
+    if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+    {
+        irc_change_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer,
+                                    u4_buffer_delay);
+    }
+    else if(ps_rate_control_api->e_rc_type == VBR_STORAGE
+                    || ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+    {
+        UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+        irc_change_vsp_fidp(&ps_rate_control_api->s_vbr_str_prms,
+                            u4_frms_in_delay_prd);
+
+        /* Get the number of pics of each type in delay period */
+        irc_get_vsp_num_pics_in_dly_prd(&ps_rate_control_api->s_vbr_str_prms,
+                                        au4_num_pics_in_delay_prd);
+
+        irc_change_cbr_vbv_num_pics_in_delay_period(
+                        ps_rate_control_api->ps_cbr_buffer,
+                        au4_num_pics_in_delay_prd);
+    }
+}
+
+/* Getter functions to get the current rate control parameters */
+UWORD32 irc_get_frame_rate(rate_control_api_t *ps_rate_control_api)
+{
+    return (irc_ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation));
+}
+
+UWORD32 irc_get_bit_rate(rate_control_api_t *ps_rate_control_api)
+{
+    return (irc_ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation));
+}
+
+UWORD32 irc_get_peak_bit_rate(rate_control_api_t *ps_rate_control_api,
+                              WORD32 i4_index)
+{
+    return (ps_rate_control_api->au4_new_peak_bit_rate[i4_index]);
+}
+
+UWORD32 irc_get_intra_frame_interval(rate_control_api_t *ps_rate_control_api)
+{
+    return (irc_pic_type_get_intra_frame_interval(
+                    ps_rate_control_api->ps_pic_handling));
+}
+
+UWORD32 irc_get_inter_frame_interval(rate_control_api_t *ps_rate_control_api)
+{
+    return (irc_pic_type_get_inter_frame_interval(
+                    ps_rate_control_api->ps_pic_handling));
+}
+
+rc_type_e irc_get_rc_type(rate_control_api_t *ps_rate_control_api)
+{
+    return (ps_rate_control_api->e_rc_type);
+}
+
+WORD32 irc_get_bits_per_frame(rate_control_api_t *ps_rate_control_api)
+{
+    WORD32 i4_bits_per_frm;
+
+    X_PROD_Y_DIV_Z(irc_ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation),
+                   (UWORD32)1000,
+                   irc_ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation),
+                   i4_bits_per_frm);
+
+    return (i4_bits_per_frm);
+}
+
+UWORD32 irc_get_max_delay(rate_control_api_t *ps_rate_control_api)
+{
+    return (irc_get_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer));
+}
+
+UWORD32 irc_get_seq_no(rate_control_api_t *ps_rate_control_api)
+{
+    return (irc_pic_type_get_disp_order_no(ps_rate_control_api->ps_pic_handling));
+}
+
+UWORD32 irc_get_rem_frames_in_gop(rate_control_api_t *ps_rate_control_api)
+{
+    WORD32 ai4_rem_frms_in_period[MAX_PIC_TYPE];
+    WORD32 j;
+    UWORD32 u4_rem_frms_in_period = 0;
+
+    /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+    irc_pic_type_get_rem_frms_in_gop(ps_rate_control_api->ps_pic_handling,
+                                     ai4_rem_frms_in_period);
+
+    /* Depending on the number of gops in a period, find the num_frms_in_prd */
+    for(j = 0; j < MAX_PIC_TYPE; j++)
+    {
+        u4_rem_frms_in_period += ai4_rem_frms_in_period[j];
+    }
+
+    return (u4_rem_frms_in_period);
+}
+
+/****************************************************************************
+ Function Name : irc_flush_buf_frames
+ Description   : API call to flush the buffered up frames
+ *****************************************************************************/
+void irc_flush_buf_frames(rate_control_api_t *ps_rate_control_api)
+{
+    irc_flush_frame_from_pic_stack(ps_rate_control_api->ps_pic_handling);
+}
+
+/****************************************************************************
+ Function Name : irc_flush_buf_frames
+ Description   : API call to flush the buffered up frames
+ *****************************************************************************/
+
+void irc_post_encode_frame_skip(rate_control_api_t *ps_rate_control_api,
+                                picture_type_e e_pic_type)
+{
+    irc_skip_encoded_frame(ps_rate_control_api->ps_pic_handling, e_pic_type);
+}
+
+/****************************************************************************
+ Function Name : irc_force_I_frame
+ Description   : API call to force an I frame
+ *****************************************************************************/
+void irc_force_I_frame(rate_control_api_t *ps_rate_control_api)
+{
+    irc_set_force_I_frame_flag(ps_rate_control_api->ps_pic_handling);
+}
+
+/****************************************************************************
+ * Function Name : rc_get_rem_bits_in_gop
+ * Description   : API call to get remaining bits in GOP
+ * *****************************************************************************/
+WORD32 irc_get_rem_bits_in_period(rate_control_api_t *ps_rate_control_api)
+{
+    return (irc_ba_get_rem_bits_in_period(
+                    ps_rate_control_api->ps_bit_allocation,
+                    ps_rate_control_api->ps_pic_handling));
+}
+
+/****************************************************************************
+ * Function Name : irc_get_vbv_buf_fullness
+ * Description   : API call to get VBV buffer fullness
+ ******************************************************************************/
+WORD32 irc_get_vbv_buf_fullness(rate_control_api_t *ps_rate_control_api)
+{
+    return (irc_get_cur_vbv_buf_size(ps_rate_control_api->ps_vbr_storage_vbv));
+}
+
+WORD32 irc_get_vbv_buf_size(rate_control_api_t *ps_rate_control_api)
+{
+    if(ps_rate_control_api->e_rc_type == CBR_NLDRC
+                    || ps_rate_control_api->e_rc_type == VBR_STREAMING)
+    {
+        return (irc_get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer));
+    }
+    else
+    {
+        return (irc_get_max_vbv_buf_size(
+                        ps_rate_control_api->ps_vbr_storage_vbv));
+    }
+}
+
+WORD32 irc_get_vbv_fulness_with_cur_bits(rate_control_api_t *ps_rate_control_api,
+                                         UWORD32 u4_bits)
+{
+    return (irc_vbv_get_vbv_buf_fullness(
+                    ps_rate_control_api->ps_vbr_storage_vbv, u4_bits));
+}
+
+void irc_set_avg_mb_act(rate_control_api_t *ps_rate_control_api,
+                        WORD32 i4_avg_activity)
+{
+    irc_mb_update_frame_level(ps_rate_control_api->ps_mb_rate_control,
+                              i4_avg_activity);
+    return;
+}
diff --git a/encoder/irc_rate_control_api.h b/encoder/irc_rate_control_api.h
new file mode 100755
index 0000000..0173037
--- /dev/null
+++ b/encoder/irc_rate_control_api.h
@@ -0,0 +1,188 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RATE_CONTROL_API_H_
+#define _RATE_CONTROL_API_H_
+
+#define RC_OK            0
+#define RC_FAIL         -1
+#define RC_BENIGN_ERR   -2
+
+/* This file should only contain RC API function declarations */
+
+typedef struct rate_control_api_t *rate_control_handle;
+
+WORD32 irc_rate_control_num_fill_use_free_memtab(rate_control_handle *pps_rate_control_api,
+                                                 itt_memtab_t *ps_memtab,
+                                                 ITT_FUNC_TYPE_E e_func_type);
+
+void irc_initialise_rate_control(rate_control_handle ps_rate_control_api,
+                                 rc_type_e e_rate_control_type,
+                                 UWORD8 u1_is_mb_level_rc_on,
+                                 UWORD32 u4_avg_bit_rate,
+                                 UWORD32 *pu4_peak_bit_rate,
+                                 UWORD32 u4_min_bit_rate,
+                                 UWORD32 u4_frame_rate,
+                                 UWORD32 u4_max_delay,
+                                 UWORD32 u4_intra_frame_interval,
+                                 UWORD8 *pu1_init_qp,
+                                 UWORD32 u4_max_vbv_buff_size,
+                                 WORD32 i4_max_inter_frm_int,
+                                 WORD32 i4_is_gop_closed,
+                                 UWORD8 *pu1_min_max_qp,
+                                 WORD32 i4_use_est_intra_sad,
+                                 UWORD32 u4_src_ticks,
+                                 UWORD32 u4_tgt_ticks);
+
+/*****************************************************************************
+ Process level API fuctions (FRAME LEVEL)
+ *****************************************************************************/
+void irc_flush_buf_frames(rate_control_handle ps_rate_control_api);
+
+void irc_post_encode_frame_skip(rate_control_handle ps_rate_control_api,
+                                picture_type_e e_pic_type);
+
+void irc_add_picture_to_stack(rate_control_handle rate_control_api,
+                              WORD32 i4_enc_pic_id);
+
+void irc_add_picture_to_stack_re_enc(rate_control_handle rate_control_api,
+                                     WORD32 i4_enc_pic_id,
+                                     picture_type_e e_pic_type);
+
+void irc_get_picture_details(rate_control_handle rate_control_api,
+                             WORD32 *pi4_pic_id,
+                             WORD32 *pi4_pic_disp_order_no,
+                             picture_type_e *pe_pic_type);
+
+/* Gets the frame level Qp */
+UWORD8 irc_get_frame_level_qp(rate_control_handle rate_control_api,
+                              picture_type_e pic_type,
+                              WORD32 i4_max_frm_bits);
+
+vbv_buf_status_e irc_get_buffer_status(rate_control_handle rate_control_api,
+                                       WORD32 i4_total_frame_bits,
+                                       picture_type_e e_pic_type,
+                                       WORD32 *pi4_num_bits_to_prevent_vbv_underflow);
+
+WORD32 irc_get_prev_frm_est_bits(rate_control_handle ps_rate_control_api);
+
+void irc_update_pic_handling_state(rate_control_handle ps_rate_control_api,
+                                   picture_type_e e_pic_type);
+
+void irc_update_frame_level_info(rate_control_handle ps_rate_control_api,
+                                 picture_type_e e_pic_type,
+                                 WORD32 *pi4_mb_type_sad,
+                                 WORD32 i4_total_frame_bits,
+                                 WORD32 i4_model_updation_hdr_bits,
+                                 WORD32 *pi4_mb_type_tex_bits,
+                                 WORD32 *pi4_tot_mb_type_qp,
+                                 WORD32 *pi4_tot_mb_in_type,
+                                 WORD32 i4_avg_activity,
+                                 UWORD8 u1_is_scd,
+                                 WORD32 i4_is_it_a_skip,
+                                 WORD32 i4_intra_frm_cost,
+                                 WORD32 i4_is_pic_handling_done);
+
+/*****************************************************************************
+ MB LEVEL API (just wrapper fucntions)
+ *****************************************************************************/
+
+void irc_init_mb_rc_frame_level(rate_control_handle ps_rate_control_api,
+                                UWORD8 u1_frame_qp);/* Current frame qp*/
+
+void irc_get_mb_level_qp(rate_control_handle ps_rate_control_api,
+                         WORD32 i4_cur_mb_activity,
+                         WORD32 *pi4_mb_qp,
+                         picture_type_e e_pic_type);
+
+WORD32 irc_get_bits_to_stuff(rate_control_handle ps_rate_control_api,
+                             WORD32 i4_tot_consumed_bits,
+                             picture_type_e e_pic_type);
+
+/******************************************************************************
+ Control Level API functions
+ Logic: The control call sets the state structure of the rate control api
+ accordingly such that the next process call would implement the same.
+ ******************************************************************************/
+
+void irc_change_inter_frm_int_call(rate_control_handle ps_rate_control_api,
+                                   WORD32 i4_inter_frm_int);
+
+void irc_change_intra_frm_int_call(rate_control_handle ps_rate_control_api,
+                                   WORD32 i4_intra_frm_int);
+
+void irc_change_avg_bit_rate(rate_control_handle ps_rate_control_api,
+                             UWORD32 u4_average_bit_rate);
+
+void irc_change_frame_rate(rate_control_handle ps_rate_control_api,
+                           UWORD32 u4_frame_rate,
+                           UWORD32 u4_src_ticks,
+                           UWORD32 u4_target_ticks);
+
+void irc_change_frm_rate_for_bit_alloc(rate_control_handle ps_rate_control_api,
+                                       UWORD32 u4_frame_rate);
+
+void irc_change_init_qp(rate_control_handle ps_rate_control_api,
+                        UWORD8 *init_qp);
+
+WORD32 irc_change_peak_bit_rate(rate_control_handle ps_rate_control_api,
+                                UWORD32 *u4_peak_bit_rate);
+
+void irc_change_buffer_delay(rate_control_handle ps_rate_control_api,
+                             UWORD32 u4_buffer_delay);
+
+void irc_force_I_frame(rate_control_handle ps_rate_control_api);
+
+void irc_change_min_max_qp(rate_control_handle ps_rate_control_api,
+                           UWORD8 *u1_min_max_qp);
+
+/********************************************************************************
+ Getter functions
+ For getting the current state of the rate control structures
+ ********************************************************************************/
+
+UWORD32 irc_get_frame_rate(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_bit_rate(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_intra_frame_interval(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_inter_frame_interval(rate_control_handle ps_rate_control_api);
+
+rc_type_e irc_get_rc_type(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_bits_per_frame(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_peak_bit_rate(rate_control_handle ps_rate_control_api,
+                              WORD32 i4_index);
+
+UWORD32 irc_get_max_delay(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_seq_no(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_rem_bits_in_period(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_vbv_buf_fullness(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_vbv_buf_size(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_vbv_fulness_with_cur_bits(rate_control_handle ps_rate_control_api,
+                                         UWORD32 u4_bits);
+#endif
diff --git a/encoder/irc_rate_control_api_structs.h b/encoder/irc_rate_control_api_structs.h
new file mode 100755
index 0000000..ba39e7f
--- /dev/null
+++ b/encoder/irc_rate_control_api_structs.h
@@ -0,0 +1,93 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RATE_CONTROL_API_STRUCTS_H_
+#define _RATE_CONTROL_API_STRUCTS_H_
+
+/*
+ * The following definitions were present in irc_cntrl_param.h, moved to this
+ * file as it is used by irc_rate_control_api.c
+ */
+
+/* num_frm_in_period = BIT_ALLOC_PERIOD*intra_frame_interval */
+#define VBR_BIT_ALLOC_PERIOD 3
+#define CBR_BIT_ALLOC_PERIOD 1
+
+/* Rate control state structure */
+typedef struct rate_control_api_t
+{
+    /* RC Algorithm */
+    rc_type_e e_rc_type;
+
+    /* Whether MB level rc is enabled or not */
+    UWORD8 u1_is_mb_level_rc_on;
+
+    /* Picture handling struct */
+    pic_handling_handle ps_pic_handling;
+
+    /* Model struct for I and P frms */
+    rc_rd_model_handle aps_rd_model[MAX_PIC_TYPE];
+
+    /* VBR storage VBV structure */
+    vbr_storage_vbv_handle ps_vbr_storage_vbv;
+
+    /* Calculate the estimated SAD */
+    est_sad_handle ps_est_sad;
+
+    /* Allocation of bits for each frame */
+    bit_allocation_handle ps_bit_allocation;
+
+    /* Init Qp(also used for Const Qp scenarios) */
+    UWORD8 au1_init_qp[MAX_PIC_TYPE];
+
+    /* MB Level rate control state structure */
+    mb_rate_control_handle ps_mb_rate_control;
+
+    UWORD8 au1_is_first_frm_coded[MAX_PIC_TYPE];
+
+    UWORD8 au1_prev_frm_qp[MAX_PIC_TYPE];
+
+    cbr_buffer_handle ps_cbr_buffer;
+
+    UWORD8 u1_scd_detected;
+
+    UWORD8 u1_frm_qp_after_scd;
+
+    UWORD8 au1_avg_bitrate_changed[MAX_PIC_TYPE];
+
+    UWORD8 u1_is_first_frm;
+
+    UWORD8 au1_min_max_qp[(MAX_PIC_TYPE << 1)];
+
+    WORD32 i4_prev_frm_est_bits;
+
+    vbr_str_prms_t s_vbr_str_prms;
+
+    /* Store the values which are to be impacted after a delay */
+    UWORD32 u4_frms_in_delay_prd_for_peak_bit_rate_change;
+
+    UWORD32 au4_new_peak_bit_rate[MAX_NUM_DRAIN_RATES];
+
+    picture_type_e prev_ref_pic_type;
+
+} rate_control_api_t;
+
+#endif/*_RATE_CONTROL_API_STRUCTS_H_*/
+
diff --git a/encoder/irc_rd_model.c b/encoder/irc_rd_model.c
new file mode 100755
index 0000000..f5c0737
--- /dev/null
+++ b/encoder/irc_rd_model.c
@@ -0,0 +1,565 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/****************************************************************************/
+/* File Name         : irc_rd_model.c                                       */
+/*                                                                          */
+/* Description       : Implall the Functions to Model the                   */
+/*                     Rate Distortion Behaviour of the Codec over the Last */
+/*                     Few Frames.                                          */
+/*                                                                          */
+/* List of Functions : irc_update_frame_rd_model                            */
+/*                     estimate_mpeg2_qp_for_resbits                        */
+/*                                                                          */
+/* Issues / Problems : None                                                 */
+/*                                                                          */
+/* Revision History  :                                                      */
+/*        DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*        21 06 2006   Sarat           Initial Version                      */
+/****************************************************************************/
+
+/* System include files */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "math.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_rd_model.h"
+#include "irc_rd_model_struct.h"
+
+
+WORD32 irc_rd_model_num_fill_use_free_memtab(rc_rd_model_t **pps_rc_rd_model,
+                                             itt_memtab_t *ps_memtab,
+                                             ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static rc_rd_model_t s_rc_rd_model_temp;
+
+    /*
+     * Hack for al alloc, during which we don't have any state memory.
+     * Dereferencing can cause issues
+     */
+    if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_rc_rd_model) = &s_rc_rd_model_temp;
+
+    /*for src rate control state structure*/
+    if(e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(rc_rd_model_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**)pps_rc_rd_model, e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    return (i4_mem_tab_idx);
+}
+
+void irc_init_frm_rc_rd_model(rc_rd_model_t *ps_rd_model,
+                              UWORD8 u1_max_frames_modelled)
+{
+
+    ps_rd_model->u1_num_frms_in_model = 0;
+    ps_rd_model->u1_curr_frm_counter = 0;
+    ps_rd_model->u1_max_frms_to_model = u1_max_frames_modelled;
+
+    ps_rd_model->model_coeff_a_lin_wo_int = 0;
+    ps_rd_model->model_coeff_b_lin_wo_int = 0;
+    ps_rd_model->model_coeff_c_lin_wo_int = 0;
+}
+
+void irc_reset_frm_rc_rd_model(rc_rd_model_t *ps_rd_model)
+{
+    ps_rd_model->u1_num_frms_in_model = 0;
+    ps_rd_model->u1_curr_frm_counter = 0;
+
+    ps_rd_model->model_coeff_a_lin_wo_int = 0;
+    ps_rd_model->model_coeff_b_lin_wo_int = 0;
+    ps_rd_model->model_coeff_c_lin_wo_int = 0;
+}
+
+static UWORD8 find_model_coeffs(UWORD32 *pi4_res_bits,
+                                UWORD32 *pi4_sad_h264,
+                                UWORD8 *pu1_num_skips,
+                                UWORD8 *pui_avg_mpeg2_qp,
+                                UWORD8 u1_num_frms,
+                                UWORD8 u1_model_used,
+                                WORD8 *pi1_frame_index,
+                                model_coeff *pmc_model_coeff,
+                                model_coeff *pmc_model_coeff_lin,
+                                model_coeff *pmc_model_coeff_lin_wo_int,
+                                rc_rd_model_t *ps_rd_model)
+{
+    UWORD32 i;
+    UWORD8 u1_num_frms_used = 0;
+    UWORD8 u1_frm_indx;
+
+#if !(ENABLE_QUAD_RC_MODEL||ENABLE_LIN_MODEL_WITH_INTERCEPT)
+    UNUSED(pu1_num_skips);
+    UNUSED(pmc_model_coeff);
+    UNUSED(pmc_model_coeff_lin);
+#endif
+    float sum_y = 0;
+    float sum_x_y = 0;
+    float sum_x2_y = 0;
+    float sum_x = 0;
+    float sum_x2 = 0;
+    float sum_x3 = 0;
+    float sum_x4 = 0;
+
+    float x0, y0;
+    float model_coeff_a = 0.0, model_coeff_b = 0.0, model_coeff_c = 0.0;
+
+    for(i = 0; i < u1_num_frms; i++)
+    {
+        if(-1 == pi1_frame_index[i])
+            continue;
+
+        u1_frm_indx = (UWORD8)pi1_frame_index[i];
+
+        y0 = (float)(pi4_res_bits[u1_frm_indx]);
+        x0 = (float)(pi4_sad_h264[u1_frm_indx]
+                        / (float)pui_avg_mpeg2_qp[u1_frm_indx]);
+
+        sum_y += y0;
+        sum_x_y += x0 * y0;
+        sum_x2_y += x0 * x0 * y0;
+        sum_x += x0;
+        sum_x2 += x0 * x0;
+        sum_x3 += x0 * x0 * x0;
+        sum_x4 += x0 * x0 * x0 * x0;
+        u1_num_frms_used++;
+    }
+
+    sum_y /= u1_num_frms_used;
+    sum_x_y /= u1_num_frms_used;
+    sum_x2_y /= u1_num_frms_used;
+    sum_x /= u1_num_frms_used;
+    sum_x2 /= u1_num_frms_used;
+    sum_x3 /= u1_num_frms_used;
+    sum_x4 /= u1_num_frms_used;
+
+    {
+        UWORD8 u1_curr_frame_index;
+        UWORD8 u1_avgqp_prvfrm;
+        UWORD32 u4_prevfrm_bits, u4_prevfrm_sad;
+
+        u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+        if(0 == u1_curr_frame_index)
+            u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+        else
+            u1_curr_frame_index--;
+
+        u1_avgqp_prvfrm = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+        u4_prevfrm_bits = ps_rd_model->pi4_res_bits[u1_curr_frame_index];
+        u4_prevfrm_sad = ps_rd_model->pi4_sad[u1_curr_frame_index];
+
+        if(0 != u4_prevfrm_sad)
+            model_coeff_a = (float)(u4_prevfrm_bits * u1_avgqp_prvfrm)
+                            / u4_prevfrm_sad;
+        else
+            model_coeff_a = 0;
+
+        model_coeff_b = 0;
+        model_coeff_c = 0;
+
+        pmc_model_coeff_lin_wo_int[0] = model_coeff_b;
+        pmc_model_coeff_lin_wo_int[1] = model_coeff_a;
+        pmc_model_coeff_lin_wo_int[2] = model_coeff_c;
+    }
+
+    return u1_model_used;
+}
+
+static void irc_update_frame_rd_model(rc_rd_model_t *ps_rd_model)
+{
+    WORD8 pi1_frame_index[MAX_FRAMES_MODELLED],
+                    pi1_frame_index_initial[MAX_FRAMES_MODELLED];
+
+    UWORD8 u1_num_skips_temp;
+    UWORD8 u1_avg_mpeg2_qp_temp, u1_min_mpeg2_qp, u1_max_mpeg2_qp;
+    UWORD8 u1_num_frms_input, u1_num_active_frames, u1_reject_frame;
+    UWORD32 u4_num_skips;
+
+    UWORD8 u1_min2_mpeg2_qp, u1_max2_mpeg2_qp;
+    UWORD8 u1_min_qp_frame_indx, u1_max_qp_frame_indx;
+    UWORD8 pu1_num_frames[MPEG2_QP_ELEM];
+    model_coeff model_coeff_array[3], model_coeff_array_lin[3],
+                    model_coeff_array_lin_wo_int[3];
+    UWORD32 i;
+    UWORD8 u1_curr_frame_index;
+
+    u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+
+    ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+
+    if(0 == u1_curr_frame_index)
+        u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+    else
+        u1_curr_frame_index--;
+
+    /************************************************************************/
+    /* Rearrange data to be fed into a Linear Regression Module             */
+    /* Module finds a,b,c such that                                         */
+    /*      y = ax + bx^2 + c                                               */
+    /************************************************************************/
+    u4_num_skips = 0;
+    u1_num_frms_input = 0;
+    memset(pu1_num_frames, 0, MPEG2_QP_ELEM);
+    memset(pi1_frame_index, -1, MAX_FRAMES_MODELLED);
+    u1_min_mpeg2_qp = MAX_MPEG2_QP;
+    u1_max_mpeg2_qp = 0;
+
+    u1_num_active_frames = ps_rd_model->u1_num_frms_in_model;
+    if(u1_num_active_frames > MAX_ACTIVE_FRAMES)
+    {
+        u1_num_active_frames = MAX_ACTIVE_FRAMES;
+    }
+
+    /************************************************************************/
+    /* Choose the set of Points to be used for MSE fit of Quadratic model   */
+    /* Points chosen are spread across the Qp range. Max of 2 points are    */
+    /* chosen for a Qp.                                                     */
+    /************************************************************************/
+    for(i = 0; i < u1_num_active_frames; i++)
+    {
+        u1_reject_frame = 0;
+        u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+        u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+
+        if((0 == u4_num_skips) && (0 != u1_num_skips_temp))
+            u1_reject_frame = 1;
+        if((1 == u4_num_skips) && (u1_num_skips_temp > 1))
+            u1_reject_frame = 1;
+        if(pu1_num_frames[u1_avg_mpeg2_qp_temp] >= 2)
+            u1_reject_frame = 1;
+
+        if(0 == i)
+            u1_reject_frame = 0;
+
+        if(0 == u1_reject_frame)
+        {
+            pi1_frame_index[u1_num_frms_input] = (WORD8)u1_curr_frame_index;
+            pu1_num_frames[u1_avg_mpeg2_qp_temp] += 1;
+
+            if(u1_min_mpeg2_qp > u1_avg_mpeg2_qp_temp)
+                u1_min_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+            if(u1_max_mpeg2_qp < u1_avg_mpeg2_qp_temp)
+                u1_max_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+
+            u1_num_frms_input++;
+        }
+
+        if(0 == u1_curr_frame_index)
+            u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+        else
+            u1_curr_frame_index--;
+    }
+
+    /************************************************************************/
+    /* Add Pivot Points to the Data set to be used for finding Quadratic    */
+    /* Model Coeffs. These will help in constraining the shape of  Quadratic*/
+    /* to adapt too much to the Local deviations.                           */
+    /************************************************************************/
+    u1_min2_mpeg2_qp = u1_min_mpeg2_qp;
+    u1_max2_mpeg2_qp = u1_max_mpeg2_qp;
+    u1_min_qp_frame_indx = INVALID_FRAME_INDEX;
+    u1_max_qp_frame_indx = INVALID_FRAME_INDEX;
+
+    /* Loop runnning over the Stored Frame Level Data
+     to find frames of MinQp and MaxQp */
+    for(; i < ps_rd_model->u1_num_frms_in_model; i++)
+    {
+        u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+        u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+
+        if(((0 == u4_num_skips) && (0 != u1_num_skips_temp))
+                        || ((1 == u4_num_skips) && (u1_num_skips_temp > 1)))
+            continue;
+
+        if(u1_min2_mpeg2_qp > u1_avg_mpeg2_qp_temp)
+        {
+            u1_min2_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+            u1_min_qp_frame_indx = u1_curr_frame_index;
+        }
+        if(u1_max2_mpeg2_qp < u1_avg_mpeg2_qp_temp)
+        {
+            u1_max2_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+            u1_max_qp_frame_indx = u1_curr_frame_index;
+        }
+        if(0 == u1_curr_frame_index)
+            u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+        else
+            u1_curr_frame_index--;
+    }
+
+    /* Add the Chosen Points to the regression data set */
+    if(INVALID_FRAME_INDEX != u1_min_qp_frame_indx)
+    {
+        pi1_frame_index[u1_num_frms_input] = (WORD8)u1_min_qp_frame_indx;
+        u1_num_frms_input++;
+    }
+    if(INVALID_FRAME_INDEX != u1_max_qp_frame_indx)
+    {
+        pi1_frame_index[u1_num_frms_input] = (WORD8)u1_max_qp_frame_indx;
+        u1_num_frms_input++;
+    }
+    memcpy(pi1_frame_index_initial, pi1_frame_index, MAX_FRAMES_MODELLED);
+
+    /***** Call the Module to Return the Coeffs for the Fed Data *****/
+    ps_rd_model->u1_model_used = find_model_coeffs(ps_rd_model->pi4_res_bits,
+                                                   ps_rd_model->pi4_sad,
+                                                   ps_rd_model->pu1_num_skips,
+                                                   ps_rd_model->pu1_avg_qp,
+                                                   u1_num_frms_input,
+                                                   ps_rd_model->u1_model_used,
+                                                   pi1_frame_index,
+                                                   model_coeff_array,
+                                                   model_coeff_array_lin,
+                                                   model_coeff_array_lin_wo_int,
+                                                   ps_rd_model);
+
+    ps_rd_model->model_coeff_b_lin_wo_int = model_coeff_array_lin_wo_int[0];
+    ps_rd_model->model_coeff_a_lin_wo_int = model_coeff_array_lin_wo_int[1];
+    ps_rd_model->model_coeff_c_lin_wo_int = model_coeff_array_lin_wo_int[2];
+}
+
+UWORD32 irc_estimate_bits_for_qp(rc_rd_model_t *ps_rd_model,
+                                 UWORD32 u4_estimated_sad,
+                                 UWORD8 u1_avg_qp)
+{
+  float fl_num_bits = 0;
+
+  fl_num_bits = ps_rd_model->model_coeff_a_lin_wo_int
+      * ((float)(u4_estimated_sad / u1_avg_qp));
+
+  return ((UWORD32)fl_num_bits);
+}
+
+UWORD8 irc_find_qp_for_target_bits(rc_rd_model_t *ps_rd_model,
+                                   UWORD32 u4_target_res_bits,
+                                   UWORD32 u4_estimated_sad,
+                                   UWORD8 u1_min_qp,
+                                   UWORD8 u1_max_qp)
+{
+    UWORD8 u1_qp;
+    float x_value = 1.0, f_qp;
+
+    ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+
+    {
+        x_value = (float)u4_target_res_bits
+                        / ps_rd_model->model_coeff_a_lin_wo_int;
+    }
+
+    if(0 != x_value)
+        f_qp = u4_estimated_sad / x_value;
+    else
+        f_qp = 255;
+
+    if(f_qp > 255)
+        f_qp = 255;
+
+    /* Truncating the QP to the Max and Min Qp values possible */
+    if(f_qp < u1_min_qp)
+        f_qp = u1_min_qp;
+    if(f_qp > u1_max_qp)
+        f_qp = u1_max_qp;
+
+    u1_qp = (UWORD8)(f_qp + 0.5);
+
+    return u1_qp;
+}
+
+void irc_add_frame_to_rd_model(rc_rd_model_t *ps_rd_model,
+                               UWORD32 i4_res_bits,
+                               UWORD8 u1_avg_mp2qp,
+                               UWORD32 i4_sad_h264,
+                               UWORD8 u1_num_skips)
+{
+    UWORD8 u1_curr_frame_index;
+    u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+
+    /*Insert the Present Frame Data into the RD Model State Memory*/
+    ps_rd_model->pi4_res_bits[u1_curr_frame_index] = i4_res_bits;
+    ps_rd_model->pi4_sad[u1_curr_frame_index] = i4_sad_h264;
+    ps_rd_model->pu1_num_skips[u1_curr_frame_index] = u1_num_skips;
+    ps_rd_model->pu1_avg_qp[u1_curr_frame_index] = u1_avg_mp2qp;
+
+    ps_rd_model->u1_curr_frm_counter++;
+    if(MAX_FRAMES_MODELLED == ps_rd_model->u1_curr_frm_counter)
+        ps_rd_model->u1_curr_frm_counter = 0;
+
+    if(ps_rd_model->u1_num_frms_in_model < ps_rd_model->u1_max_frms_to_model)
+    {
+        ps_rd_model->u1_num_frms_in_model++;
+    }
+    irc_update_frame_rd_model(ps_rd_model);
+}
+
+/*****************************************************************************
+ *Function Name : irc_calc_per_frm_bits
+ *Description   :
+ *Inputs        : pu2_num_pics_of_a_pic_type
+ *                  -  pointer to RC api pointer
+ *                pu2_num_pics_of_a_pic_type
+ *                  -  N1, N2,...Nk
+ *                pu1_update_pic_type_model
+ *                  -  flag which tells whether or not to update model
+ *                     coefficients of a particular pic-type
+ *                u1_num_pic_types
+ *                  - value of k
+ *                pu4_num_skip_of_a_pic_type
+ *                  - the number of skips of that pic-type. It "may" be used to
+ *                    update the model coefficients at a later point. Right now
+ *                    it is not being used at all.
+ *                u1_base_pic_type
+ *                  - base pic type index wrt which alpha & beta are calculated
+ *                pfl_gamma
+ *                  - gamma_i = beta_i / alpha_i
+ *                pfl_eta
+ *                  -
+ *                u1_curr_pic_type
+ *                  - the current pic-type for which the targetted bits need to
+ *                    be computed
+ *                u4_bits_for_sub_gop
+ *                 - the number of bits to be consumed for the remaining part of
+ *                   sub-gop
+ *                u4_curr_estimated_sad
+ *                 -
+ *                pu1_curr_pic_type_qp
+ *                  -  output of this function
+ *****************************************************************************/
+
+WORD32 irc_calc_per_frm_bits(rc_rd_model_t *ps_rd_model,
+                             UWORD16 *pu2_num_pics_of_a_pic_type,
+                             UWORD8 *pu1_update_pic_type_model,
+                             UWORD8 u1_num_pic_types,
+                             UWORD32 *pu4_num_skip_of_a_pic_type,
+                             UWORD8 u1_base_pic_type,
+                             float *pfl_gamma,
+                             float *pfl_eta,
+                             UWORD8 u1_curr_pic_type,
+                             UWORD32 u4_bits_for_sub_gop,
+                             UWORD32 u4_curr_estimated_sad,
+                             UWORD8 *pu1_curr_pic_type_qp)
+{
+    WORD32 i4_per_frm_bits_Ti;
+    UWORD8 u1_i;
+    rc_rd_model_t *ps_rd_model_of_pic_type;
+
+    UNUSED(pu4_num_skip_of_a_pic_type);
+    UNUSED(u1_base_pic_type);
+
+    /* First part of this function updates all the model coefficients */
+    /*for all the pic-types */
+    {
+        for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+        {
+            if((0 != pu2_num_pics_of_a_pic_type[u1_i])
+                            && (1 == pu1_update_pic_type_model[u1_i]))
+            {
+                irc_update_frame_rd_model(&ps_rd_model[u1_i]);
+            }
+        }
+    }
+
+    /*
+     * The second part of this function deals with solving the
+     * equation using all the pic-types models
+     */
+    {
+        UWORD8 u1_combined_model_used;
+
+        /* solve the equation */
+        {
+            model_coeff eff_A;
+            float fl_sad_by_qp_base;
+            float fl_sad_by_qp_curr_frm = 1.0;
+            float fl_qp_curr_frm;
+            float fl_bits_for_curr_frm = 0;
+
+
+
+            /* If the combined chosen model is linear model without an intercept */
+
+            u1_combined_model_used = PREV_FRAME_MODEL;
+            {
+                eff_A = 0.0;
+
+                for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+                {
+                    ps_rd_model_of_pic_type = ps_rd_model + u1_i;
+
+                    eff_A += ((pfl_eta[u1_i]
+                               + pu2_num_pics_of_a_pic_type[u1_i]- 1)
+                               * ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int
+                               * pfl_gamma[u1_i]);
+                }
+
+                fl_sad_by_qp_base = u4_bits_for_sub_gop / eff_A;
+
+                fl_sad_by_qp_curr_frm = fl_sad_by_qp_base
+                                * pfl_gamma[u1_curr_pic_type]
+                                * pfl_eta[u1_curr_pic_type];
+
+                ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+
+                fl_bits_for_curr_frm =
+                                ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int
+                                                * fl_sad_by_qp_curr_frm;
+            }
+
+            /*
+             * Store the model that was finally used to calculate Qp.
+             * This is so that the same model is used in further calculations
+             * for this picture.
+             */
+            ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+            ps_rd_model_of_pic_type->u1_model_used = u1_combined_model_used;
+
+            i4_per_frm_bits_Ti = (WORD32)(fl_bits_for_curr_frm + 0.5);
+
+            if(fl_sad_by_qp_curr_frm > 0)
+                fl_qp_curr_frm = (float)u4_curr_estimated_sad
+                                / fl_sad_by_qp_curr_frm;
+            else
+                fl_qp_curr_frm = 255;
+
+            if(fl_qp_curr_frm > 255)
+                fl_qp_curr_frm = 255;
+
+            *pu1_curr_pic_type_qp = (fl_qp_curr_frm + 0.5);
+
+        }
+    }
+    return (i4_per_frm_bits_Ti);
+}
+
+model_coeff irc_get_linear_coefficient(rc_rd_model_t *ps_rd_model)
+{
+    return (ps_rd_model->model_coeff_a_lin_wo_int);
+}
+
+
diff --git a/encoder/irc_rd_model.h b/encoder/irc_rd_model.h
new file mode 100755
index 0000000..8be31c1
--- /dev/null
+++ b/encoder/irc_rd_model.h
@@ -0,0 +1,98 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Name         : irc_rd_model.h                                        */
+/*                                                                           */
+/* Description       : Implements all the Functions to Model the             */
+/*                     Rate Distortion Behaviour of the Codec over the Last  */
+/*                     Few Frames.                                           */
+/*                                                                           */
+/* List of Functions : irc_update_frame_rd_model                             */
+/*                     estimate_mpeg2_qp_for_resbits                         */
+/*                     update_mb_rd_model                                    */
+/*                     find_model_coeffs                                     */
+/*                     refine_set_of_points                                  */
+/*                     init_mb_rd_model                                      */
+/*                     irc_add_frame_to_rd_model                             */
+/*                     irc_find_qp_for_target_bits                           */
+/*                                                                           */
+/*                                                                           */
+/* Issues / Problems : None                                                  */
+/*                                                                           */
+/* Revision History  :                                                       */
+/*        DD MM YYYY   Author(s)       Changes (Describe the changes made)   */
+/*        21 06 2006   Sarat           Initial Version                       */
+/*****************************************************************************/
+
+#ifndef RC_RD_MODEL
+#define RC_RD_MODEL
+
+#define MAX_FRAMES_MODELLED 16
+
+typedef float model_coeff;
+typedef struct rc_rd_model_t *rc_rd_model_handle;
+
+WORD32 irc_rd_model_num_fill_use_free_memtab(rc_rd_model_handle *pps_rc_rd_model,
+                                             itt_memtab_t *ps_memtab,
+                                             ITT_FUNC_TYPE_E e_func_type);
+/* Interface Functions */
+/* Initialise the rate distortion model */
+void irc_init_frm_rc_rd_model(rc_rd_model_handle ps_rd_model,
+                              UWORD8 u1_max_frames_modelled);
+
+/* Reset the rate distortion model */
+void irc_reset_frm_rc_rd_model(rc_rd_model_handle ps_rd_model);
+
+/* Returns the Qp to be used for the given bits and SAD */
+UWORD8 irc_find_qp_for_target_bits(rc_rd_model_handle ps_rd_model,
+                                   UWORD32 u4_target_res_bits,
+                                   UWORD32 u4_estimated_sad,
+                                   UWORD8 u1_max_qp,
+                                   UWORD8 u1_min_qp);
+
+/* Updates the frame level statistics after encoding a frame */
+void irc_add_frame_to_rd_model(rc_rd_model_handle ps_rd_model,
+                               UWORD32 i4_res_bits,
+                               UWORD8 u1_avg_mp2qp,
+                               UWORD32 i4_sad_h264,
+                               UWORD8 u1_num_skips);
+
+UWORD32 irc_estimate_bits_for_qp(rc_rd_model_handle ps_rd_model,
+                                 UWORD32 u4_estimated_sad,
+                                 UWORD8 u1_avg_qp);
+
+/* Get the Linear model coefficient */
+model_coeff irc_get_linear_coefficient(rc_rd_model_handle ps_rd_model);
+
+WORD32 irc_calc_per_frm_bits(rc_rd_model_handle ps_rd_model,
+                             UWORD16 *pu2_num_pics_of_a_pic_type,
+                             UWORD8 *pu1_update_pic_type_model,
+                             UWORD8 u1_num_pic_types,
+                             UWORD32 *pu4_num_skip_of_a_pic_type,
+                             UWORD8 u1_base_pic_type,
+                             float *pfl_gamma,
+                             float *pfl_eta,
+                             UWORD8 u1_curr_pic_type,
+                             UWORD32 u4_bits_for_sub_gop,
+                             UWORD32 u4_curr_estimated_sad,
+                             UWORD8 *pu1_curr_pic_type_qp);
+#endif
+
diff --git a/encoder/irc_rd_model_struct.h b/encoder/irc_rd_model_struct.h
new file mode 100755
index 0000000..dc4c0ea
--- /dev/null
+++ b/encoder/irc_rd_model_struct.h
@@ -0,0 +1,75 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef RC_RD_MODEL_STRUCT
+#define RC_RD_MODEL_STRUCT
+
+/*Enable or diable QUAD model*/
+#define ENABLE_QUAD_RC_MODEL       0
+#define ENABLE_LIN_MODEL_WITH_INTERCEPT  0
+
+/* Number of elements for QP */
+#define MPEG2_QP_ELEM       (MAX_MPEG2_QP + 1)
+
+
+#if ENABLE_QUAD_RC_MODEL
+#define QUAD                       1
+#define MIN_FRAMES_FOR_QUAD_MODEL  5
+#endif
+
+#define MAX_ACTIVE_FRAMES          16
+#define MIN_FRAMES_FOR_LIN_MODEL   3
+#define INVALID_FRAME_INDEX        255
+
+#define UP_THR_SM           1  /* (1  /pow(2,4) = 0.0625   */
+#define UP_THR_E            4
+
+#define LO_THR_SM           368  /* (368.64 / pow(2,14)) = 0.0225 */
+#define LO_THR_E            14
+
+#define LIN_DEV_THR_SM     1  /* (1 / pow(1,2)) = .25*/
+#define LIN_DEV_THR_E      2
+
+#define PREV_FRAME_MODEL    2
+
+/* Q Factors used for fixed point calculation */
+#define Q_FORMAT_GAMMA  8
+#define Q_FORMAT_ETA    8
+
+typedef struct rc_rd_model_t
+{
+    UWORD8 u1_curr_frm_counter;
+    UWORD8 u1_num_frms_in_model;
+    UWORD8 u1_max_frms_to_model;
+    UWORD8 u1_model_used;
+
+    UWORD32 pi4_res_bits[MAX_FRAMES_MODELLED];
+    UWORD32 pi4_sad[MAX_FRAMES_MODELLED];
+
+    UWORD8 pu1_num_skips[MAX_FRAMES_MODELLED];
+    UWORD8 pu1_avg_qp[MAX_FRAMES_MODELLED];
+    UWORD8 au1_num_frames[MPEG2_QP_ELEM];
+
+    model_coeff model_coeff_a_lin_wo_int;
+    model_coeff model_coeff_b_lin_wo_int;
+    model_coeff model_coeff_c_lin_wo_int;
+} rc_rd_model_t;
+
+#endif /* RC_RD_MODEL_STRUCT */
diff --git a/encoder/irc_trace_support.h b/encoder/irc_trace_support.h
new file mode 100755
index 0000000..c35bd4f
--- /dev/null
+++ b/encoder/irc_trace_support.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_trace_support.h
+*
+* @brief
+*  This file contains extern declarations of routines that could be helpful
+*  for debugging purposes.
+*
+* @author
+*  Harish
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef TRACE_SUPPORT_H_
+#define TRACE_SUPPORT_H_
+
+/*****************************************************************************/
+/* Structures                                                                */
+/*****************************************************************************/
+
+typedef struct
+{
+    WORD8 * pu1_buf;
+    WORD32 i4_offset;
+    WORD32 i4_max_size;
+}trace_support_t;
+
+/*****************************************************************************/
+/* Extern function declarations                                              */
+/*****************************************************************************/
+
+void init_trace_support(WORD8 *pu1_buf, WORD32 i4_size);
+
+int trace_printf(const WORD8 *format, ...);
+
+#endif // TRACE_SUPPORT_H_
diff --git a/encoder/irc_vbr_storage_vbv.c b/encoder/irc_vbr_storage_vbv.c
new file mode 100755
index 0000000..23e9959
--- /dev/null
+++ b/encoder/irc_vbr_storage_vbv.c
@@ -0,0 +1,368 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_trace_support.h"
+
+#define MAX(x, y)  ((x) > (y) ? (x) : (y))
+
+typedef struct vbr_storage_vbv_t
+{
+    WORD32 i4_max_buf_size;
+    WORD32 i4_cur_buf_size;
+    WORD32 i4_max_bits_inflow_per_frm_period;
+    WORD32 i4_max_bits_per_tgt_frm;
+    /* Storing input variables */
+    WORD32 i4_max_bit_rate;
+    WORD32 i4_max_frame_rate;
+    /* Error bits calculation module */
+    error_bits_handle ps_error_bits;
+
+} vbr_storage_vbv_t;
+
+static void overflow_avoided_summation(WORD32 *pi4_accumulator, WORD32 i4_input)
+{
+    if((pi4_accumulator[0] > 0)
+                    && (((int)0x7fffffff - pi4_accumulator[0]) < i4_input))
+    {
+        pi4_accumulator[0] = 0x7fffffff;
+    }
+    else if((pi4_accumulator[0] < 0)
+                    && (((int)0x80000000 - pi4_accumulator[0]) > i4_input))
+    {
+        pi4_accumulator[0] = 0x80000000;
+    }
+    else
+    {
+        pi4_accumulator[0] += i4_input;
+    }
+}
+
+WORD32 irc_vbr_vbv_num_fill_use_free_memtab(vbr_storage_vbv_t **pps_vbr_storage_vbv,
+                                            itt_memtab_t *ps_memtab,
+                                            ITT_FUNC_TYPE_E e_func_type)
+{
+    WORD32 i4_mem_tab_idx = 0;
+    static vbr_storage_vbv_t s_vbr_storage_vbv_temp;
+
+    /*
+     * Hack for al alloc, during which we don't have any state memory.
+     * Dereferencing can cause issues
+     */
+    if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+        (*pps_vbr_storage_vbv) = &s_vbr_storage_vbv_temp;
+
+    /*for src rate control state structure*/
+    if(e_func_type != GET_NUM_MEMTAB)
+    {
+        fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(vbr_storage_vbv_t),
+                    ALIGN_128_BYTE, PERSISTENT, DDR);
+        use_or_fill_base(&ps_memtab[0], (void**)pps_vbr_storage_vbv,
+                         e_func_type);
+    }
+    i4_mem_tab_idx++;
+
+    i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab(
+                    &pps_vbr_storage_vbv[0]->ps_error_bits,
+                    &ps_memtab[i4_mem_tab_idx], e_func_type);
+    return (i4_mem_tab_idx);
+}
+
+void irc_init_vbr_vbv(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+                      WORD32 i4_max_bit_rate,
+                      WORD32 i4_frm_rate,
+                      WORD32 i4_max_vbv_buff_size)
+{
+    ps_vbr_storage_vbv->i4_max_buf_size = i4_max_vbv_buff_size;
+    ps_vbr_storage_vbv->i4_cur_buf_size = i4_max_vbv_buff_size;
+
+    /*
+     * Calculate the max number of bits that flow into the decoder
+     * in the interval of two frames
+     */
+    X_PROD_Y_DIV_Z(i4_max_bit_rate, 1000, i4_frm_rate,
+                   ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+    /* init error bits */
+    irc_init_error_bits(ps_vbr_storage_vbv->ps_error_bits, i4_frm_rate,
+                        i4_max_bit_rate);
+
+    /* Storing the input values */
+    ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm =
+                    ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period;
+    ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate;
+    ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate;
+}
+
+void irc_update_vbr_vbv(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+                        WORD32 i4_total_bits_decoded)
+{
+    WORD32 i4_error_bits = irc_get_error_bits(
+                    ps_vbr_storage_vbv->ps_error_bits);
+    /*
+     * In the time interval between two decoded frames the buffer would have been
+     * filled up by the max_bits_inflow_per_frm_period.
+     */
+    overflow_avoided_summation(
+                    &ps_vbr_storage_vbv->i4_cur_buf_size,
+                    (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+                                    + i4_error_bits));
+
+    if(ps_vbr_storage_vbv->i4_cur_buf_size
+                    > ps_vbr_storage_vbv->i4_max_buf_size)
+    {
+        ps_vbr_storage_vbv->i4_cur_buf_size =
+                        ps_vbr_storage_vbv->i4_max_buf_size;
+    }
+
+    ps_vbr_storage_vbv->i4_cur_buf_size -= i4_total_bits_decoded;
+
+    /* Update the error bits state */
+    irc_update_error_bits(ps_vbr_storage_vbv->ps_error_bits);
+
+}
+
+WORD32 irc_get_max_target_bits(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+    WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size;
+    WORD32 i4_error_bits = irc_get_error_bits(
+                    ps_vbr_storage_vbv->ps_error_bits);
+
+    /* The buffer size when the next frame is decoded */
+    overflow_avoided_summation(
+                    &i4_cur_buf_size,
+                    (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+                                    + i4_error_bits));
+    if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size)
+    {
+        i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size;
+    }
+
+    /*
+     * Thus for the next frame the maximum number of bits the decoder can consume
+     * without underflow is i4_cur_buf_size
+     */
+    return i4_cur_buf_size;
+}
+
+/****************************************************************************
+ Function Name : irc_get_buffer_status
+ Description   : Gets the state of VBV buffer
+ Inputs        : Rate control API , header and texture bits
+ Outputs       : 0 = normal, 1 = underflow, 2= overflow
+ Returns       : vbv_buf_status_e
+ *****************************************************************************/
+vbv_buf_status_e irc_get_vbv_buffer_status(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+                                           WORD32 i4_total_frame_bits,
+                                           WORD32 *pi4_num_bits_to_prevent_vbv_underflow)
+{
+    vbv_buf_status_e e_buf_status;
+    WORD32 i4_cur_buf;
+    WORD32 i4_error_bits = irc_get_error_bits(
+                    ps_vbr_storage_vbv->ps_error_bits);
+
+    /* error bits due to fixed point computation of drain rate*/
+    i4_cur_buf = ps_vbr_storage_vbv->i4_cur_buf_size;
+    overflow_avoided_summation(
+                    &i4_cur_buf,
+                    (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+                                    + i4_error_bits));
+
+    if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size)
+    {
+        i4_cur_buf = ps_vbr_storage_vbv->i4_max_buf_size;
+    }
+
+    pi4_num_bits_to_prevent_vbv_underflow[0] = i4_cur_buf;
+
+    i4_cur_buf -= i4_total_frame_bits;
+    if(i4_cur_buf < 0)
+    {
+        e_buf_status = VBV_UNDERFLOW;
+    }
+    else if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size)
+    {
+        e_buf_status = VBV_OVERFLOW;
+    }
+    else if(i4_cur_buf < (ps_vbr_storage_vbv->i4_max_buf_size >> 2))
+    {
+        e_buf_status = VBR_CAUTION;
+    }
+    else
+    {
+        e_buf_status = VBV_NORMAL;
+    }
+
+    return e_buf_status;
+}
+
+UWORD8 irc_restrict_swing_dvd_comp(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+    UWORD8 u1_restrict_swing = 1;
+
+    if(ps_vbr_storage_vbv->i4_cur_buf_size
+                    < (ps_vbr_storage_vbv->i4_max_buf_size >> 1))
+    {
+        u1_restrict_swing = 0;
+    }
+
+    return (u1_restrict_swing);
+}
+
+WORD32 irc_get_max_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+    return (ps_vbr_storage_vbv->i4_max_buf_size);
+}
+
+WORD32 irc_get_cur_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+    return (ps_vbr_storage_vbv->i4_cur_buf_size);
+}
+
+WORD32 irc_get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+    return (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+}
+
+WORD32 irc_get_max_bits_per_tgt_frm(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+    return (ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm);
+}
+
+WORD32 irc_vbv_get_vbv_buf_fullness(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+                                    UWORD32 u4_bits)
+{
+    WORD32 i4_error_bits = irc_get_error_bits(
+                    ps_vbr_storage_vbv->ps_error_bits);
+    WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size;
+
+    overflow_avoided_summation(
+                    &i4_cur_buf_size,
+                    (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+                                    + i4_error_bits));
+
+    if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size)
+    {
+        i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size;
+    }
+
+    i4_cur_buf_size -= u4_bits;
+
+    return (i4_cur_buf_size);
+}
+
+WORD32 irc_get_max_tgt_bits_dvd_comp(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+                                     WORD32 i4_rem_bits_in_gop,
+                                     WORD32 i4_rem_frms_in_gop,
+                                     picture_type_e e_pic_type)
+{
+    WORD32 i4_dbf_max, i4_dbf_min, i4_dbf_prev, i4_vbv_size, i4_dbf_desired;
+    WORD32 i4_max_tgt_bits;
+
+    i4_vbv_size = ps_vbr_storage_vbv->i4_max_buf_size;
+    i4_dbf_max = 95 * i4_vbv_size / 100;
+    i4_dbf_min = 10 * i4_vbv_size / 100;
+    i4_dbf_prev = ps_vbr_storage_vbv->i4_cur_buf_size;
+
+    if(i4_rem_bits_in_gop < 0)
+        i4_rem_bits_in_gop = 0;
+    if(i4_rem_frms_in_gop <= 0)
+        i4_rem_frms_in_gop = 1;
+
+    if(e_pic_type == I_PIC)
+    {
+        i4_dbf_desired = i4_dbf_min;
+    }
+    else
+    {
+        i4_dbf_desired = (i4_dbf_max - i4_rem_bits_in_gop / i4_rem_frms_in_gop
+                        - i4_dbf_prev) / i4_rem_frms_in_gop;
+        i4_dbf_desired += i4_dbf_prev;
+    }
+
+    i4_dbf_prev += ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period;
+    if(i4_dbf_prev > ps_vbr_storage_vbv->i4_max_buf_size)
+    {
+        i4_dbf_prev = ps_vbr_storage_vbv->i4_max_buf_size;
+    }
+
+    i4_max_tgt_bits = MAX(0, (i4_dbf_prev - i4_dbf_desired));
+    return (i4_max_tgt_bits);
+}
+
+void irc_change_vbr_vbv_frame_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+                                   WORD32 i4_frm_rate)
+{
+    /*
+     * Calculate the max number of bits that flow into the decoder
+     * in the interval of two frames
+     */
+    X_PROD_Y_DIV_Z(ps_vbr_storage_vbv->i4_max_bit_rate, 1000, i4_frm_rate,
+                   ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+    /* Update the lower modules */
+    irc_change_frm_rate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits,
+                                      i4_frm_rate);
+    /* Storing the input values */
+    ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate;
+}
+
+void irc_change_vbr_vbv_bit_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+                                 WORD32 i4_max_bit_rate)
+{
+    /*
+     * Calculate the max number of bits that flow into the decoder
+     * in the interval of two frames
+     */
+    X_PROD_Y_DIV_Z(i4_max_bit_rate, 1000, ps_vbr_storage_vbv->i4_max_frame_rate,
+                   ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+    /* update the lower modules */
+    irc_change_bitrate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits,
+                                     i4_max_bit_rate);
+
+    /* Storing the input values */
+    ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate;
+}
+
+void irc_change_vbr_max_bits_per_tgt_frm(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+                                         WORD32 i4_tgt_frm_rate)
+{
+    /*
+     * Calculate the max number of bits that flow into the decoder
+     * in the interval of two frames
+     */
+    X_PROD_Y_DIV_Z(ps_vbr_storage_vbv->i4_max_bit_rate, 1000, i4_tgt_frm_rate,
+                   ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm);
+
+}
diff --git a/encoder/irc_vbr_storage_vbv.h b/encoder/irc_vbr_storage_vbv.h
new file mode 100755
index 0000000..c53c66d
--- /dev/null
+++ b/encoder/irc_vbr_storage_vbv.h
@@ -0,0 +1,119 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _VBR_STORAGE_VBV_H_
+#define _VBR_STORAGE_VBV_H_
+/******************************************************************************
+VBR STORAGE (VBV):
+Max. buffer filling rate: Rmax
+Max. buffer size: Bmax (as specified by level and profile)
+Current Buffer Level: Bcur
+Frame Rate: F
+
+For a storage scenario, the initial buffer size is assumed to be max. For every
+frame the Maximum bits filled in to the buffer is given by Rmaxfrm = Rmax/F. If
+the buffer overflows then the buffer is thresholded to the max buffer size.
+
+               (overflow)
+   B(0)            /|
+---|--------------/-|------------------------------ Bmax
+   |             /  |
+   |          /|/   |
+   |  /|     /      |
+   | / |  /|/       |
+   |/  | /          | /|
+       |/           |/ |
+                       |
+                       |
+-----------------------|---------------------------
+   |<->|               |
+(1/F)=>1/frame_rate (underflow)
+
+
+   B"(i) - Bits in buffer just before decoding a frame.
+   B'(i) - Bits in buffer just after decoding a frame.
+
+
+   B(0) (initBuffer size) = Bmax.
+   B'(i) = B"(i) - bits_decoded
+   B"(i) = Min( Bmax, B'(i-1) + Rmaxfrm)
+
+Overflow Scenario: In VBR case, since we have only a max filling rate (or input bit rate)
+buffer overflow is not a issue (since the buffer filling rate can be reduced to any value
+below this rate)
+
+Underflow Scenario: B'(i) should always be > 0. If not then, the buffer underflows. To
+prevent this condition the number bits that needs to be decoded must be equal to B"(i)
+which is equal to Min( Bmax, B'(i-1) + Rmaxfrm)
+****************************************************************************************/
+
+typedef struct vbr_storage_vbv_t* vbr_storage_vbv_handle;
+
+WORD32 irc_vbr_vbv_num_fill_use_free_memtab(vbr_storage_vbv_handle *pps_vbr_storage_vbv,
+                                            itt_memtab_t *ps_memtab,
+                                            ITT_FUNC_TYPE_E e_func_type);
+
+/* Initalises the vbv buffer status */
+void irc_init_vbr_vbv(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+                      WORD32 max_bit_rate, /* In bits/sec*/
+                      WORD32 max_frm_rate, /* In frames/1000 sec*/
+                      WORD32 i4_max_vbv_buff_size); /* in bits*/
+
+/* Updates the buffer after decoding a frame */
+void irc_update_vbr_vbv(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+                        WORD32 i4_total_bits_decoded);
+
+/* gets the max_number of bits that can be decoded out of the VBV without underflow */
+WORD32 irc_get_max_target_bits(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_max_bits_per_tgt_frm(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_cur_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+/* Queries the VBV buffer for the buffer status */
+vbv_buf_status_e irc_get_vbv_buffer_status(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+                                           WORD32 i4_total_frame_bits,
+                                           WORD32 *pi4_num_bits_to_prevent_vbv_underflow);
+
+UWORD8 irc_restrict_swing_dvd_comp(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_max_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_vbv_get_vbv_buf_fullness(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+                                    UWORD32 u4_bits);
+
+WORD32 irc_get_max_tgt_bits_dvd_comp(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+                                     WORD32 i4_rem_bits_in_gop,
+                                     WORD32 i4_rem_frms_in_gop,
+                                     picture_type_e e_pic_type);
+
+/* Changing input values at run time */
+void irc_change_vbr_vbv_bit_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+                                 WORD32 i4_max_bit_rate);
+
+void irc_change_vbr_vbv_frame_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+                                   WORD32 i4_frm_rate);
+
+void irc_change_vbr_max_bits_per_tgt_frm(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+                                         WORD32 i4_tgt_frm_rate);
+#endif
+
diff --git a/encoder/irc_vbr_str_prms.c b/encoder/irc_vbr_str_prms.c
new file mode 100755
index 0000000..29055c2
--- /dev/null
+++ b/encoder/irc_vbr_str_prms.c
@@ -0,0 +1,199 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_vbr_str_prms.h"
+
+/******************************************************************************
+ Function Name   : irc_init_vbv_str_prms
+ Description     : Initializes and calculates the number of I frame and P frames
+                   in the delay period
+ Return Values   : void
+ *****************************************************************************/
+void irc_init_vbv_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+                           UWORD32 u4_intra_frm_interval,
+                           UWORD32 u4_src_ticks,
+                           UWORD32 u4_tgt_ticks,
+                           UWORD32 u4_frms_in_delay_period)
+{
+
+    UWORD32 i4_num_i_frms_in_delay_per, i4_num_p_frms_in_delay_per;
+
+    p_vbr_str_prms->u4_frms_in_delay_prd = u4_frms_in_delay_period;
+    p_vbr_str_prms->u4_src_ticks = u4_src_ticks;
+    p_vbr_str_prms->u4_tgt_ticks = u4_tgt_ticks;
+    p_vbr_str_prms->u4_intra_frame_int = u4_intra_frm_interval;
+
+    /*
+     * Finding the number of I frames and P frames in delay period. This
+     * value along with the drain rates for the corresponding picture types will
+     * be used to calculate the buffer sizes
+     */
+    i4_num_i_frms_in_delay_per = ((u4_frms_in_delay_period * u4_src_ticks)
+                    / (u4_intra_frm_interval * u4_tgt_ticks));
+
+    /* Ceiling the above result*/
+    if((i4_num_i_frms_in_delay_per * u4_intra_frm_interval * u4_tgt_ticks)
+                    < (u4_frms_in_delay_period * u4_src_ticks))
+    {
+        i4_num_i_frms_in_delay_per++;
+
+    }
+    i4_num_p_frms_in_delay_per = u4_frms_in_delay_period
+                    - i4_num_i_frms_in_delay_per;
+
+    p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC] =
+                    i4_num_i_frms_in_delay_per;
+    p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC] =
+                    i4_num_p_frms_in_delay_per;
+    p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks = (u4_intra_frm_interval
+                    * (p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]))
+                    * u4_tgt_ticks;
+    p_vbr_str_prms->u4_pic_num = 0;
+    p_vbr_str_prms->u4_cur_pos_in_src_ticks = 0;
+}
+
+WORD32 irc_get_vsp_num_pics_in_dly_prd(vbr_str_prms_t *p_vbr_str_prms,
+                                       UWORD32 *pu4_num_pics_in_delay_prd)
+{
+    pu4_num_pics_in_delay_prd[I_PIC] =
+                    p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC];
+    pu4_num_pics_in_delay_prd[P_PIC] =
+                    p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC];
+    return (p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+
+/******************************************************************************
+ Function Name   : irc_update_vbr_str_prms
+ Description     : update the number of I frames and P/B frames in the delay period
+                   for buffer size calculations
+ *****************************************************************************/
+void irc_update_vbr_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+                             picture_type_e e_pic_type)
+{
+    /*
+     * Updating the number of I frames and P frames after encoding every
+     * picture. These values along with the drain rates for the corresponding
+     * picture  types will be used to calculate the CBR buffer size every frame
+     */
+
+    if(e_pic_type == I_PIC)
+    {
+        p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]--;
+    }
+    else
+    {
+        p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC]--;
+    }
+
+    /* If the next I frame falls within the delay period, we need to increment
+     * the number of I frames in the period, else increment the number of P
+     * frames
+     */
+    if((p_vbr_str_prms->u4_cur_pos_in_src_ticks
+                    + (p_vbr_str_prms->u4_frms_in_delay_prd
+                                    * p_vbr_str_prms->u4_src_ticks))
+                    >= p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks)
+    {
+        p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks -=
+                        p_vbr_str_prms->u4_cur_pos_in_src_ticks;
+        p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks +=
+                        p_vbr_str_prms->u4_intra_frame_int
+                                        * p_vbr_str_prms->u4_tgt_ticks;
+        p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]++;
+        p_vbr_str_prms->u4_pic_num = 0;
+        p_vbr_str_prms->u4_cur_pos_in_src_ticks = 0;
+    }
+    else
+    {
+        p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC]++;
+    }
+    p_vbr_str_prms->u4_pic_num++;
+    p_vbr_str_prms->u4_cur_pos_in_src_ticks += p_vbr_str_prms->u4_src_ticks;
+}
+
+void irc_get_vsp_src_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+                               UWORD32 *pu4_src_ticks,
+                               UWORD32 *pu4_tgt_ticks)
+{
+    pu4_src_ticks[0] = p_vbr_str_prms->u4_src_ticks;
+    pu4_tgt_ticks[0] = p_vbr_str_prms->u4_tgt_ticks;
+}
+
+/*******************************************************************************
+ Function Name   : change_vbr_str_prms
+ Description     : Takes in changes of Intra frame interval, source and target
+                   ticks and recalculates the position of the  next I frame
+ ******************************************************************************/
+void irc_change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms,
+                        UWORD32 u4_intra_frame_int)
+{
+    irc_init_vbv_str_prms(p_vbr_str_prms, u4_intra_frame_int,
+                          p_vbr_str_prms->u4_src_ticks,
+                          p_vbr_str_prms->u4_tgt_ticks,
+                          p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+
+void irc_change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+                              UWORD32 u4_tgt_ticks)
+{
+    UWORD32 u4_rem_intra_per_scaled;
+    UWORD32 u4_prev_tgt_ticks = p_vbr_str_prms->u4_tgt_ticks;
+
+    /*
+     * If the target frame rate is changed, recalculate the position of the next
+     * I frame based on the new target frame rate
+     * LIMITATIONS :
+     * Currently no support is available for dynamic change in source frame rate
+     */
+
+    u4_rem_intra_per_scaled = ((p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks
+                    - p_vbr_str_prms->u4_cur_pos_in_src_ticks)
+                    / u4_prev_tgt_ticks) * u4_tgt_ticks;
+
+    p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks = u4_rem_intra_per_scaled
+                    + p_vbr_str_prms->u4_cur_pos_in_src_ticks;
+
+}
+
+void irc_change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms,
+                              UWORD32 u4_src_ticks)
+{
+    irc_init_vbv_str_prms(p_vbr_str_prms, p_vbr_str_prms->u4_intra_frame_int,
+                          u4_src_ticks, p_vbr_str_prms->u4_tgt_ticks,
+                          p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+
+void irc_change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms,
+                         UWORD32 u4_frms_in_delay_period)
+{
+    irc_init_vbv_str_prms(p_vbr_str_prms, p_vbr_str_prms->u4_intra_frame_int,
+                          p_vbr_str_prms->u4_src_ticks,
+                          p_vbr_str_prms->u4_tgt_ticks,
+                          u4_frms_in_delay_period);
+}
diff --git a/encoder/irc_vbr_str_prms.h b/encoder/irc_vbr_str_prms.h
new file mode 100755
index 0000000..34301d8
--- /dev/null
+++ b/encoder/irc_vbr_str_prms.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _VBR_STR_PRMS_H_
+#define _VBR_STR_PRMS_H_
+
+typedef struct
+{
+    UWORD32 u4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+    UWORD32 u4_pic_num;
+    UWORD32 u4_intra_prd_pos_in_tgt_ticks;
+    UWORD32 u4_cur_pos_in_src_ticks;
+    UWORD32 u4_intra_frame_int;
+    UWORD32 u4_src_ticks;
+    UWORD32 u4_tgt_ticks;
+    UWORD32 u4_frms_in_delay_prd;
+} vbr_str_prms_t;
+
+void irc_init_vbv_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+                           UWORD32 u4_intra_frm_interval,
+                           UWORD32 u4_src_ticks,
+                           UWORD32 u4_tgt_ticks,
+                           UWORD32 u4_frms_in_delay_period);
+
+WORD32 irc_get_vsp_num_pics_in_dly_prd(vbr_str_prms_t *p_vbr_str_prms,
+                                       UWORD32 *pu4_num_pics_in_delay_prd);
+
+void irc_get_vsp_src_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+                               UWORD32 *pu4_src_ticks,
+                               UWORD32 *pu4_tgt_ticks);
+
+void irc_update_vbr_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+                             picture_type_e e_pic_type);
+
+void irc_change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms,
+                        UWORD32 u4_intra_frame_int);
+
+void irc_change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+                              UWORD32 u4_tgt_ticks);
+
+void irc_change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms,
+                              UWORD32 u4_src_ticks);
+
+void irc_change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms,
+                         UWORD32 u4_frms_in_delay_period);
+
+#endif
+
diff --git a/encoder/ithread.h b/encoder/ithread.h
new file mode 100755
index 0000000..82170a5
--- /dev/null
+++ b/encoder/ithread.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : ithread.h                                            */
+/*                                                                           */
+/*  Description       : This file contains all the necessary structure and   */
+/*                      enumeration definitions needed for the Application   */
+/*                      Program Interface(API) of the                        */
+/*                      Thread Abstraction Layer                             */
+/*                                                                           */
+/*  List of Functions : ithread_get_handle_size()                            */
+/*                      ithread_get_mutex_lock_size()                        */
+/*                      ithread_create()                                     */
+/*                      ithread_exit()                                       */
+/*                      ithread_join()                                       */
+/*                      ithread_get_mutex_struct_size()                      */
+/*                      ithread_mutex_init()                                 */
+/*                      ithread_mutex_destroy()                              */
+/*                      ithread_mutex_lock()                                 */
+/*                      ithread_mutex_unlock()                               */
+/*                      ithread_yield()                                      */
+/*                      ithread_sleep()                                      */
+/*                      ithread_msleep()                                     */
+/*                      ithread_usleep()                                     */
+/*                      ithread_get_sem_struct_size()                        */
+/*                      ithread_sem_init()                                   */
+/*                      ithread_sem_post()                                   */
+/*                      ithread_sem_wait()                                   */
+/*                      ithread_sem_destroy()                                */
+/*                      ithread_set_affinity()                               */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         06 09 2012   Harish          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef _ITHREAD_H_
+#define _ITHREAD_H_
+
+UWORD32 ithread_get_handle_size(void);
+
+UWORD32 ithread_get_mutex_lock_size(void);
+
+WORD32  ithread_create(void *thread_handle, void *attribute, void *strt, void *argument);
+
+void    ithread_exit(void *val_ptr);
+
+WORD32  ithread_join(void *thread_id, void ** val_ptr);
+
+WORD32  ithread_get_mutex_struct_size(void);
+
+WORD32  ithread_mutex_init(void *mutex);
+
+WORD32  ithread_mutex_destroy(void *mutex);
+
+WORD32  ithread_mutex_lock(void *mutex);
+
+WORD32  ithread_mutex_unlock(void *mutex);
+
+void    ithread_yield(void);
+
+void    ithread_sleep(UWORD32 u4_time);
+
+void    ithread_msleep(UWORD32 u4_time_ms);
+
+void    ithread_usleep(UWORD32 u4_time_us);
+
+UWORD32 ithread_get_sem_struct_size(void);
+
+WORD32  ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value);
+
+WORD32  ithread_sem_post(void *sem);
+
+WORD32  ithread_sem_wait(void *sem);
+
+WORD32  ithread_sem_destroy(void *sem);
+
+WORD32  ithread_set_affinity(WORD32 core_id);
+#endif /* _ITHREAD_H_ */
diff --git a/encoder/iv2.h b/encoder/iv2.h
new file mode 100755
index 0000000..538bb1e
--- /dev/null
+++ b/encoder/iv2.h
@@ -0,0 +1,386 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  iv2.h
+*
+* @brief
+* This file contains all the necessary structure and  enumeration
+* definitions needed for the Application  Program Interface(API) of the
+* Ittiam Video codecs  This is version 2 of Ittiam Video API
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _IV2_H_
+#define _IV2_H_
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+#define IV_MAX_RAW_COMPONENTS 4
+
+/*****************************************************************************/
+/* Typedefs                                                                  */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums                                                                     */
+/*****************************************************************************/
+
+
+/** Function status */
+typedef enum{
+    IV_STATUS_NA                                = 0x7FFFFFFF,
+    IV_SUCCESS                                  = 0x0,
+    IV_FAIL                                     = 0x1,
+}IV_STATUS_T;
+
+
+/** Defines the types of memory */
+typedef enum {
+    IV_NA_MEM_TYPE                              = 0x7FFFFFFF,
+    IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM        = 0x0,
+    IV_EXTERNAL_CACHEABLE_SCRATCH_MEM           = 0x1,
+    IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM     = 0x2,
+    IV_EXTERNAL_NONCACHEABLE_SCRATCH_MEM        = 0x3,
+    IV_INTERNAL_CACHEABLE_PERSISTENT_MEM        = 0x10,
+    IV_INTERNAL_CACHEABLE_SCRATCH_MEM           = 0x11,
+    IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM     = 0x12,
+    IV_INTERNAL_NONCACHEABLE_SCRATCH_MEM        = 0x13,
+}IV_MEM_TYPE_T;
+
+/* The color formats used in video/image codecs */
+
+typedef enum {
+    IV_CHROMA_NA                            = 0x7FFFFFFF,
+    IV_YUV_420P                             = 0x0,
+    IV_YUV_420SP_UV                         = 0x1,
+    IV_YUV_420SP_VU                         = 0x2,
+
+    IV_YUV_422P                             = 0x10,
+    IV_YUV_422IBE                           = 0x11,
+    IV_YUV_422ILE                           = 0x12,
+
+    IV_YUV_444P                             = 0x20,
+    IV_YUV_411P                             = 0x21,
+
+    IV_GRAY                                 = 0x30,
+
+    IV_RGB_565                              = 0x31,
+    IV_RGB_24                               = 0x32,
+    IV_RGBA_8888                            = 0x33
+}IV_COLOR_FORMAT_T;
+
+/** Frame/Field coding types */
+typedef enum {
+    IV_NA_FRAME                             = 0x7FFFFFFF,
+    IV_I_FRAME                              = 0x0,
+    IV_P_FRAME                              = 0x1,
+    IV_B_FRAME                              = 0x2,
+    IV_IDR_FRAME                            = 0x3,
+    IV_II_FRAME                             = 0x4,
+    IV_IP_FRAME                             = 0x5,
+    IV_IB_FRAME                             = 0x6,
+    IV_PI_FRAME                             = 0x7,
+    IV_PP_FRAME                             = 0x8,
+    IV_PB_FRAME                             = 0x9,
+    IV_BI_FRAME                             = 0xa,
+    IV_BP_FRAME                             = 0xb,
+    IV_BB_FRAME                             = 0xc,
+    IV_MBAFF_I_FRAME                        = 0xd,
+    IV_MBAFF_P_FRAME                        = 0xe,
+    IV_MBAFF_B_FRAME                        = 0xf,
+    IV_MBAFF_IDR_FRAME                      = 0x10,
+    IV_NOT_CODED_FRAME                      = 0x11,
+    IV_FRAMETYPE_DEFAULT                    = IV_I_FRAME
+}IV_PICTURE_CODING_TYPE_T;
+
+/** Field type */
+typedef enum {
+    IV_NA_FLD                               = 0x7FFFFFFF,
+    IV_TOP_FLD                              = 0x0,
+    IV_BOT_FLD                              = 0x1,
+    IV_FLD_TYPE_DEFAULT                     = IV_TOP_FLD
+}IV_FLD_TYPE_T;
+
+/** Video content type progressive/interlaced etc */
+typedef enum {
+    IV_CONTENTTYPE_NA                       = 0x7FFFFFFF,
+    IV_PROGRESSIVE                          = 0x0,
+    IV_INTERLACED                           = 0x1,
+    IV_PROGRESSIVE_FRAME                    = 0x2,
+    IV_INTERLACED_FRAME                     = 0x3,
+    IV_INTERLACED_TOPFIELD                  = 0x4,
+    IV_INTERLACED_BOTTOMFIELD               = 0x5,
+    IV_CONTENTTYPE_DEFAULT                  = IV_PROGRESSIVE,
+}IV_CONTENT_TYPE_T;
+
+/** Profile */
+typedef enum
+{
+    IV_PROFILE_NA                           = 0x7FFFFFFF,
+    IV_PROFILE_BASE                         = 0x0,
+    IV_PROFILE_MAIN                         = 0x1,
+    IV_PROFILE_HIGH                         = 0x2,
+
+
+    IV_PROFILE_SIMPLE                       = 0x100,
+    IV_PROFILE_ADVSIMPLE                    = 0x101,
+    IV_PROFILE_DEFAULT                      = IV_PROFILE_BASE,
+}IV_PROFILE_T;
+
+
+/** Architecture Enumeration                               */
+typedef enum
+{
+    ARCH_NA                 =   0x7FFFFFFF,
+    ARCH_ARM_NONEON         =   0x0,
+    ARCH_ARM_A9Q,
+    ARCH_ARM_A9A,
+    ARCH_ARM_A9,
+    ARCH_ARM_A7,
+    ARCH_ARM_A5,
+    ARCH_ARM_A15,
+    ARCH_ARM_NEONINTR,
+    ARCH_X86_GENERIC,
+    ARCH_X86_SSSE3,
+    ARCH_X86_SSE42,
+    ARCH_ARM_A53,
+    ARCH_ARM_A57,
+    ARCH_ARM_V8_NEON
+}IV_ARCH_T;
+
+/** SOC Enumeration                               */
+typedef enum
+{
+    SOC_NA                  = 0x7FFFFFFF,
+    SOC_GENERIC             = 0x0,
+    SOC_HISI_37X
+}IV_SOC_T;
+
+
+/** API command type */
+typedef enum {
+    IV_CMD_NA                           = 0x7FFFFFFF,
+    IV_CMD_GET_NUM_MEM_REC              = 0x0,
+    IV_CMD_FILL_NUM_MEM_REC             = 0x1,
+    IV_CMD_RETRIEVE_MEMREC              = 0x2,
+    IV_CMD_INIT                         = 0x3,
+    /* Do not add anything after the following entry */
+    IV_CMD_EXTENSIONS                   = 0x100
+}IV_API_COMMAND_TYPE_T;
+
+/*****************************************************************************/
+/* Structure Definitions                                                     */
+/*****************************************************************************/
+
+/** This structure defines the handle for the codec instance            */
+
+typedef struct{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+    /** Pointer to the API function pointer table of the codec          */
+    void                                        *pv_fxns;
+    /** Pointer to the handle of the codec                              */
+    void                                        *pv_codec_handle;
+}iv_obj_t;
+
+/** This structure defines the memory record holder which will          *
+ * be used by the codec to communicate its memory requirements to the   *
+ * application through appropriate API functions                        */
+
+typedef struct {
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+    /** Pointer to the memory allocated by the application              */
+    void                                        *pv_base;
+    /** u4_size of the memory to be allocated                           */
+    UWORD32                                     u4_mem_size;
+    /** Alignment of the memory pointer                                 */
+    UWORD32                                     u4_mem_alignment;
+    /** Type of the memory to be allocated                              */
+    IV_MEM_TYPE_T                               e_mem_type;
+}iv_mem_rec_t;
+
+/** This structure defines attributes for the raw buffer                */
+typedef struct {
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Color format                                                    */
+    IV_COLOR_FORMAT_T                           e_color_fmt;
+
+    /** Pointer to each component                                       */
+    void                                        *apv_bufs[IV_MAX_RAW_COMPONENTS];
+
+    /** Width of each component                                         */
+    UWORD32                                     au4_wd[IV_MAX_RAW_COMPONENTS];
+
+    /** Height of each component                                        */
+    UWORD32                                     au4_ht[IV_MAX_RAW_COMPONENTS];
+
+    /** Stride of each component                                        */
+    UWORD32                                     au4_strd[IV_MAX_RAW_COMPONENTS];
+
+}iv_raw_buf_t;
+
+/** This structure defines attributes for the bitstream buffer                */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Pointer to buffer                                               */
+    void                                        *pv_buf;
+
+    /** Number of valid bytes in the buffer                             */
+    UWORD32                                     u4_bytes;
+
+    /** Allocated size of the buffer                                    */
+    UWORD32                                     u4_bufsize;
+
+}iv_bits_buf_t;
+/*****************************************************************************/
+/*  Get Number of Memory Records                                             */
+/*****************************************************************************/
+
+/** Input structure : Get number of memory records                     */
+typedef struct {
+    /** size of the structure                                          */
+    UWORD32                                     u4_size;
+
+    /** Command type                                                   */
+    IV_API_COMMAND_TYPE_T                       e_cmd;
+}iv_num_mem_rec_ip_t;
+
+/** Output structure : Get number of memory records                    */
+typedef struct{
+    /** size of the structure                                          */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                              */
+    UWORD32                                     u4_error_code;
+
+    /** Number of memory records that will be used by the codec        */
+    UWORD32                                     u4_num_mem_rec;
+}iv_num_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/*  Fill Memory Records                                                      */
+/*****************************************************************************/
+
+/** Input structure : Fill memory records                              */
+
+typedef struct {
+    /** size of the structure                                          */
+    UWORD32                                     u4_size;
+
+    /** Command type                                                   */
+    IV_API_COMMAND_TYPE_T                       e_cmd;
+
+    /** Number of memory records                                       */
+    UWORD32                                     u4_num_mem_rec;
+
+    /** pointer to array of memrecords structures should be filled by codec
+    with details of memory resource requirements */
+    iv_mem_rec_t                                *ps_mem_rec;
+
+    /** maximum width for which codec should request memory requirements */
+    UWORD32                                     u4_max_wd;
+
+    /** maximum height for which codec should request memory requirements*/
+    UWORD32                                     u4_max_ht;
+
+    /** Maximum number of reference frames                               */
+    UWORD32                                     u4_max_ref_cnt;
+
+    /** Maximum number of reorder frames                                 */
+    UWORD32                                     u4_max_reorder_cnt;
+
+    /** Maximum level supported                                          */
+    UWORD32                                     u4_max_level;
+
+    /** Color format that codec supports for input/output                */
+    IV_COLOR_FORMAT_T                           e_color_format;
+
+    /** Maximum search range to be used in X direction                      */
+    UWORD32                                     u4_max_srch_rng_x;
+
+    /** Maximum search range to be used in Y direction                      */
+    UWORD32                                     u4_max_srch_rng_y;
+
+}iv_fill_mem_rec_ip_t;
+
+
+/** Output structure : Fill memory records                               */
+typedef struct{
+    /** size of the structure                                            */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                                */
+    UWORD32                                     u4_error_code;
+
+    /** no of memory record structures which are filled by codec         */
+    UWORD32                                     u4_num_mem_rec;
+}iv_fill_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/*  Retrieve Memory Records                                                  */
+/*****************************************************************************/
+
+/** Input structure : Retrieve memory records                                */
+
+typedef struct {
+    /** size of the structure                                          */
+    UWORD32                                     u4_size;
+
+    /** Command type                                                   */
+    IV_API_COMMAND_TYPE_T                       e_cmd;
+
+    /** array of structures where codec should fill with all memory  requested earlier */
+    iv_mem_rec_t                                *ps_mem_rec;
+}iv_retrieve_mem_rec_ip_t;
+
+
+typedef struct{
+    /** size of the structure                                            */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                                */
+    UWORD32                                     u4_error_code;
+
+    /** no of memory record structures which are filled by codec         */
+    UWORD32                                     u4_num_mem_rec_filled;
+}iv_retrieve_mem_rec_op_t;
+
+#endif /* _IV2_H_ */
+
diff --git a/encoder/ive2.h b/encoder/ive2.h
new file mode 100755
index 0000000..8cb0fd1
--- /dev/null
+++ b/encoder/ive2.h
@@ -0,0 +1,1445 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ive2.h
+*
+* @brief
+* This file contains all the necessary structure and  enumeration
+* definitions needed for the Application  Program Interface(API) of the
+* Ittiam Video Encoders  This is version 2
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IVE2_H_
+#define _IVE2_H_
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+/** Maximum number of components in I/O Buffers                             */
+#define IVE_MAX_IO_BUFFER_COMPONENTS   4
+
+/** Maximum number of reference pictures                                    */
+#define IVE_MAX_REF 16
+
+/*****************************************************************************/
+/* Enums                                                                     */
+/*****************************************************************************/
+
+/** Slice modes */
+typedef enum
+{
+    IVE_SLICE_MODE_NA           = 0x7FFFFFFF,
+    IVE_SLICE_MODE_NONE         = 0x0,
+
+    IVE_SLICE_MODE_BYTES        = 0x1,
+    IVE_SLICE_MODE_BLOCKS       = 0x2,
+}IVE_SLICE_MODE_T;
+
+/** Adaptive Intra refresh modes */
+typedef enum
+{
+    IVE_AIR_MODE_NA             = 0x7FFFFFFF,
+    IVE_AIR_MODE_NONE           = 0x0,
+    IVE_AIR_MODE_CYCLIC         = 0x1,
+    IVE_AIR_MODE_RANDOM         = 0x2,
+    IVE_AIR_MODE_DISTORTION     = 0x3,
+}IVE_AIR_MODE_T;
+
+/** Rate control modes   */
+typedef enum
+{
+  IVE_RC_NA                     = 0x7FFFFFFF,
+  IVE_RC_NONE                   = 0x0,
+  IVE_RC_STORAGE                = 0x1,
+  IVE_RC_CBR_NON_LOW_DELAY      = 0x2,
+  IVE_RC_CBR_LOW_DELAY          = 0x3,
+  IVE_RC_TWOPASS                = 0x4,
+  IVE_RC_RATECONTROLPRESET_DEFAULT = IVE_RC_STORAGE
+}IVE_RC_MODE_T;
+
+/** Encoder mode */
+typedef enum
+{
+    IVE_ENC_MODE_NA                          = 0x7FFFFFFF,
+    IVE_ENC_MODE_HEADER                      = 0x1,
+    IVE_ENC_MODE_PICTURE                     = 0x0,
+    IVE_ENC_MODE_DEFAULT                     = IVE_ENC_MODE_PICTURE,
+}IVE_ENC_MODE_T;
+
+/** Speed Config */
+typedef enum IVE_SPEED_CONFIG
+{
+  IVE_QUALITY_DUMMY                         = 0x7FFFFFFF,
+  IVE_CONFIG                                = 0,
+  IVE_SLOWEST                               = 1,
+  IVE_NORMAL                                = 2,
+  IVE_FAST                                  = 3,
+  IVE_HIGH_SPEED                            = 4,
+  IVE_FASTEST                               = 5,
+}IVE_SPEED_CONFIG;
+
+/** API command type                                   */
+typedef enum
+{
+    IVE_CMD_VIDEO_NA                          = 0x7FFFFFFF,
+    IVE_CMD_VIDEO_CTL                         = IV_CMD_EXTENSIONS + 1,
+    IVE_CMD_VIDEO_ENCODE,
+    IVE_CMD_QUEUE_INPUT,
+    IVE_CMD_DEQUEUE_INPUT,
+    IVE_CMD_QUEUE_OUTPUT,
+    IVE_CMD_DEQUEUE_OUTPUT,
+    IVE_CMD_GET_RECON,
+}IVE_API_COMMAND_TYPE_T;
+
+/** Video Control API command type            */
+typedef enum
+{
+    IVE_CMD_CT_NA                           = 0x7FFFFFFF,
+    IVE_CMD_CTL_SETDEFAULT                  = 0x0,
+    IVE_CMD_CTL_SET_DIMENSIONS              = 0x1,
+    IVE_CMD_CTL_SET_FRAMERATE               = 0x2,
+    IVE_CMD_CTL_SET_BITRATE                 = 0x3,
+    IVE_CMD_CTL_SET_FRAMETYPE               = 0x4,
+    IVE_CMD_CTL_SET_QP                      = 0x5,
+    IVE_CMD_CTL_SET_ENC_MODE                = 0x6,
+    IVE_CMD_CTL_SET_VBV_PARAMS              = 0x7,
+    IVE_CMD_CTL_SET_AIR_PARAMS              = 0x8,
+    IVE_CMD_CTL_SET_ME_PARAMS               = 0X9,
+    IVE_CMD_CTL_SET_GOP_PARAMS              = 0XA,
+    IVE_CMD_CTL_SET_PROFILE_PARAMS          = 0XB,
+    IVE_CMD_CTL_SET_DEBLOCK_PARAMS          = 0XC,
+    IVE_CMD_CTL_SET_IPE_PARAMS              = 0XD,
+    IVE_CMD_CTL_SET_NUM_CORES               = 0x30,
+    IVE_CMD_CTL_RESET                       = 0xA0,
+    IVE_CMD_CTL_FLUSH                       = 0xB0,
+    IVE_CMD_CTL_GETBUFINFO                  = 0xC0,
+    IVE_CMD_CTL_GETVERSION                  = 0xC1,
+    IVE_CMD_CTL_CODEC_SUBCMD_START          = 0x100,
+}IVE_CONTROL_API_COMMAND_TYPE_T;
+
+/* IVE_ERROR_BITS_T: A UWORD32 container will be used for reporting the error*/
+/* code to the application. The first 8 bits starting from LSB have been     */
+/* reserved for the codec to report internal error details. The rest of the  */
+/* bits will be generic for all video encoders and each bit has an associated*/
+/* meaning as mentioned below. The unused bit fields are reserved for future */
+/* extenstions and will be zero in the current implementation                */
+typedef enum {
+
+    /* Bit 8 - Unsupported input parameter or configuration.                 */
+    IVE_UNSUPPORTEDPARAM                        = 0x8,
+
+    /* Bit 9 - Fatal error (stop the codec).If there is an                  */
+    /* error and this bit is not set, the error is a recoverable one.       */
+    IVE_FATALERROR                              = 0x9,
+
+    IVE_ERROR_BITS_T_DUMMY_ELEMENT              = 0x7FFFFFFF
+}IVE_ERROR_BITS_T;
+
+/* IVE_ERROR_CODES_T: The list of error codes depicting the possible error  */
+/* scenarios that can be encountered while encoding                         */
+typedef enum
+{
+
+    IVE_ERR_NA                                                  = 0x7FFFFFFF,
+    IVE_ERR_NONE                                                = 0x00,
+    IVE_ERR_INVALID_API_CMD                                     = 0x01,
+    IVE_ERR_INVALID_API_SUB_CMD                                 = 0x02,
+    IVE_ERR_IP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT            = 0x03,
+    IVE_ERR_OP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT            = 0x04,
+    IVE_ERR_IP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT           = 0x05,
+    IVE_ERR_OP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT           = 0x06,
+    IVE_ERR_IP_INIT_API_STRUCT_SIZE_INCORRECT                   = 0x07,
+    IVE_ERR_OP_INIT_API_STRUCT_SIZE_INCORRECT                   = 0x08,
+    IVE_ERR_IP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT       = 0x09,
+    IVE_ERR_OP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT       = 0x0A,
+    IVE_ERR_IP_ENCODE_API_STRUCT_SIZE_INCORRECT                 = 0x0B,
+    IVE_ERR_OP_ENCODE_API_STRUCT_SIZE_INCORRECT                 = 0x0C,
+    IVE_ERR_IP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT             = 0x0D,
+    IVE_ERR_OP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT             = 0x0E,
+    IVE_ERR_IP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT         = 0x0F,
+    IVE_ERR_OP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT         = 0x10,
+    IVE_ERR_IP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT         = 0x11,
+    IVE_ERR_OP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT         = 0x12,
+    IVE_ERR_IP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT              = 0x13,
+    IVE_ERR_OP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT              = 0x14,
+    IVE_ERR_IP_CTL_RESET_API_STRUCT_SIZE_INCORRECT              = 0x15,
+    IVE_ERR_OP_CTL_RESET_API_STRUCT_SIZE_INCORRECT              = 0x16,
+    IVE_ERR_IP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT           = 0x17,
+    IVE_ERR_OP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT           = 0x18,
+    IVE_ERR_IP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT             = 0x19,
+    IVE_ERR_OP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT             = 0x1A,
+    IVE_ERR_IP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT       = 0x1B,
+    IVE_ERR_OP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT       = 0x1C,
+    IVE_ERR_IP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT         = 0x1D,
+    IVE_ERR_OP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT         = 0x1E,
+    IVE_ERR_IP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT       = 0x1F,
+    IVE_ERR_OP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT       = 0x20,
+    IVE_ERR_IP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT        = 0x21,
+    IVE_ERR_OP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT        = 0x22,
+    IVE_ERR_IP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT       = 0x23,
+    IVE_ERR_OP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT       = 0x24,
+    IVE_ERR_IP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT       = 0x25,
+    IVE_ERR_OP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT       = 0x26,
+    IVE_ERR_IP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT     = 0x27,
+    IVE_ERR_OP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT     = 0x28,
+    IVE_ERR_IP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT        = 0x29,
+    IVE_ERR_OP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT        = 0x2A,
+    IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL                      = 0x2B,
+    IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT                          = 0x2C,
+    IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT                       = 0x2D,
+    IVE_ERR_MEM_REC_BASE_POINTER_NULL                           = 0x2E,
+    IVE_ERR_MEM_REC_OVERLAP_ERR                                 = 0x2F,
+    IVE_ERR_MEM_REC_INSUFFICIENT_SIZE                           = 0x30,
+    IVE_ERR_MEM_REC_ALIGNMENT_ERR                               = 0x31,
+    IVE_ERR_MEM_REC_INCORRECT_TYPE                              = 0x32,
+    IVE_ERR_HANDLE_NULL                                         = 0x33,
+    IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT                        = 0x34,
+    IVE_ERR_API_FUNCTION_PTR_NULL                               = 0x35,
+    IVE_ERR_INVALID_CODEC_HANDLE                                = 0x36,
+    IVE_ERR_CTL_GET_VERSION_BUFFER_IS_NULL                      = 0x37,
+    IVE_ERR_IP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT       = 0x38,
+    IVE_ERR_OP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT       = 0x39,
+    IVE_ERR_IP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT         = 0x3A,
+    IVE_ERR_OP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT         = 0x3B,
+    IVE_ERR_IP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT       = 0x3C,
+    IVE_ERR_OP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT       = 0x3D,
+    IVE_ERR_IP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT         = 0x3E,
+    IVE_ERR_OP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT         = 0x3F,
+
+}IVE_ERROR_CODES_T;
+
+
+/*****************************************************************************/
+/*   Initialize encoder                                                      */
+/*****************************************************************************/
+
+/** Input structure : Initialize the encoder                                */
+typedef struct
+{
+    /** size of the structure                                               */
+    UWORD32                                 u4_size;
+
+    /** Command type                                                        */
+    IV_API_COMMAND_TYPE_T                   e_cmd;
+
+    /** Number of memory records                                            */
+    UWORD32                                 u4_num_mem_rec;
+
+    /** pointer to array of memrecords structures should be filled by codec
+    with details of memory resource requirements                            */
+    iv_mem_rec_t                            *ps_mem_rec;
+
+    /** maximum width for which codec should request memory requirements    */
+    UWORD32                                 u4_max_wd;
+
+    /** maximum height for which codec should request memory requirements   */
+    UWORD32                                 u4_max_ht;
+
+    /** Maximum number of reference frames                                  */
+    UWORD32                                 u4_max_ref_cnt;
+
+    /** Maximum number of reorder frames                                    */
+    UWORD32                                 u4_max_reorder_cnt;
+
+    /** Maximum level supported                                             */
+    UWORD32                                 u4_max_level;
+
+    /** Input color format                                                  */
+    IV_COLOR_FORMAT_T                       e_inp_color_fmt;
+
+    /** Flag to enable/disable - To be used only for debugging/testing      */
+    UWORD32                                 u4_enable_recon;
+
+    /** Recon color format                                                  */
+    IV_COLOR_FORMAT_T                       e_recon_color_fmt;
+
+    /** Rate control mode                                                   */
+    IVE_RC_MODE_T                           e_rc_mode;
+
+    /** Maximum frame rate to be supported                                  */
+    UWORD32                                 u4_max_framerate;
+
+    /** Maximum bitrate to be supported                                     */
+    UWORD32                                 u4_max_bitrate;
+
+    /** Maximum number of consecutive  B frames                             */
+    UWORD32                                 u4_max_num_bframes;
+
+    /** Content type Interlaced/Progressive                                 */
+    IV_CONTENT_TYPE_T                       e_content_type;
+
+    /** Maximum search range to be used in X direction                      */
+    UWORD32                                 u4_max_srch_rng_x;
+
+    /** Maximum search range to be used in Y direction                      */
+    UWORD32                                 u4_max_srch_rng_y;
+
+    /** Slice Mode                                                          */
+    IVE_SLICE_MODE_T                        e_slice_mode;
+
+    /** Slice parameter                                                     */
+    UWORD32                                 u4_slice_param;
+
+    /** Processor architecture                                          */
+    IV_ARCH_T                                   e_arch;
+
+    /** SOC details                                                     */
+    IV_SOC_T                                    e_soc;
+
+
+}ive_init_ip_t;
+
+/** Output structure : Initialize the encoder                           */
+typedef struct
+{
+    /** Size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                 u4_error_code;
+}ive_init_op_t;
+
+
+/*****************************************************************************/
+/*   Video Encode - Deprecated                                               */
+/*****************************************************************************/
+
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    IVE_API_COMMAND_TYPE_T                  e_cmd;
+
+    /** Descriptor for input raw buffer                                 */
+    iv_raw_buf_t                            s_inp_buf;
+
+    /** Buffer containing pic info if mb_info_type is non-zero           */
+    void                                    *pv_bufs;
+
+    /** Flag to indicate if mb info is sent along with input buffer     */
+    UWORD32                                 u4_mb_info_type;
+
+    /** Buffer containing mb info if mb_info_type is non-zero           */
+    void                                    *pv_mb_info;
+
+    /** Flag to indicate if pic info is sent along with input buffer     */
+    UWORD32                                 u4_pic_info_type;
+
+    /** Buffer containing pic info if mb_info_type is non-zero           */
+    void                                    *pv_pic_info;
+
+    /** Lower 32bits of input time stamp                                */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of input time stamp                                */
+    UWORD32                                 u4_timestamp_high;
+
+    /** Flag to indicate if this is the last input in the stream       */
+    UWORD32                                 u4_is_last;
+
+    /** Descriptor for output bit-stream buffer                         */
+    iv_bits_buf_t                           s_out_buf;
+
+    /** Descriptor for recon buffer                                     */
+    iv_raw_buf_t                            s_recon_buf;
+
+}ive_video_encode_ip_t;
+
+
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** error code                                                      */
+    UWORD32                                 u4_error_code;
+
+    /* Output present                                                   */
+    WORD32                                  output_present;
+
+    /* dump recon                                                       */
+    WORD32                                  dump_recon;
+
+    /* encoded frame type                                               */
+    UWORD32                                 u4_encoded_frame_type;
+
+    /** Descriptor for input raw buffer freed from codec                */
+    iv_raw_buf_t                            s_inp_buf;
+
+    /** Descriptor for output bit-stream buffer                         */
+    iv_bits_buf_t                           s_out_buf;
+
+    /** Descriptor for recon buffer                                     */
+    iv_raw_buf_t                            s_recon_buf;
+
+}ive_video_encode_op_t;
+
+/*****************************************************************************/
+/*   Queue Input raw buffer - Send the YUV buffer to be encoded              */
+/*****************************************************************************/
+/** Input structure : Queue input buffer to the encoder                 */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Command : IVE_CMD_QUEUE_INPUT                                   */
+    IVE_API_COMMAND_TYPE_T                  e_cmd;
+
+    /** Descriptor for input raw buffer                                 */
+    iv_raw_buf_t                            s_inp_buf;
+
+    /** Flag to indicate if mb info is sent along with input buffer     */
+    UWORD32                                 u4_mb_info_type;
+
+    /** Flag to indicate the size of mb info structure                  */
+    UWORD32                                 u4_mb_info_size;
+
+    /** Buffer containing mb info if mb_info_type is non-zero           */
+    void                                    *pv_mb_info;
+
+    /** Flag to indicate if pic info is sent along with input buffer     */
+    UWORD32                                 u4_pic_info_type;
+
+    /** Buffer containing pic info if mb_info_type is non-zero           */
+    void                                    *pv_pic_info;
+
+    /** Lower 32bits of input time stamp                                */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of input time stamp                                */
+    UWORD32                                 u4_timestamp_high;
+
+
+    /** Flag to enable/disable blocking the current API call            */
+    UWORD32                                 u4_is_blocking;
+
+    /** Flag to indicate if this is the last input in the stream       */
+    UWORD32                                 u4_is_last;
+
+}ive_queue_inp_ip_t;
+
+/** Input structure : Queue output buffer to the encoder                */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                 u4_error_code;
+}ive_queue_inp_op_t;
+
+/*****************************************************************************/
+/*   Dequeue Input raw buffer - Get free YUV buffer from the encoder         */
+/*****************************************************************************/
+/** Input structure : Dequeue input buffer from the encoder             */
+
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Command: IVE_CMD_DEQUEUE_INPUT                                  */
+    IVE_API_COMMAND_TYPE_T                  e_cmd;
+
+    /** Flag to enable/disable blocking the current API call            */
+    UWORD32                                 u4_is_blocking;
+
+}ive_dequeue_inp_ip_t;
+
+/** Output structure : Dequeue input buffer from the encoder            */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                 u4_error_code;
+
+    /** Buffer descriptor of the buffer returned from encoder           */
+    iv_raw_buf_t                            s_inp_buf;
+
+    /** Flag to indicate if mb info is sent along with input buffer     */
+    UWORD32                                 u4_mb_info_type;
+
+    /** Flag to indicate the size of mb info structure                  */
+    UWORD32                                 u4_mb_info_size;
+
+    /** Buffer containing mb info if mb_info_type is non-zero           */
+    void                                    *pv_mb_info;
+
+    /** Flag to indicate if pic info is sent along with input buffer     */
+    UWORD32                                 u4_pic_info_type;
+
+    /** Buffer containing pic info if mb_info_type is non-zero           */
+    void                                    *pv_pic_info;
+
+    /** Lower 32bits of input time stamp                                */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of input time stamp                                */
+    UWORD32                                 u4_timestamp_high;
+
+    /** Flag to indicate if this is the last input in the stream       */
+    UWORD32                                 u4_is_last;
+
+
+}ive_dequeue_inp_op_t;
+
+/*****************************************************************************/
+/*   Queue Output bitstream buffer - Send the bistream buffer to be filled   */
+/*****************************************************************************/
+/** Input structure : Queue output buffer to the encoder                 */
+
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Command : IVE_CMD_QUEUE_OUTPUT                                  */
+    IVE_API_COMMAND_TYPE_T                  e_cmd;
+
+    /** Descriptor for output bit-stream buffer                         */
+    iv_bits_buf_t                           s_out_buf;
+
+    /** Flag to enable/disable blocking the current API call            */
+    UWORD32                                 u4_is_blocking;
+
+    /** Flag to indicate if this is the last output in the stream       */
+    UWORD32                                 u4_is_last;
+
+}ive_queue_out_ip_t;
+
+/** Output structure : Queue output buffer to the encoder               */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                 u4_error_code;
+
+}ive_queue_out_op_t;
+
+
+/*****************************************************************************/
+/* Dequeue Output bitstream buffer - Get the bistream buffer filled          */
+/*****************************************************************************/
+/** Input structure : Dequeue output buffer from the encoder            */
+
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Command : IVE_CMD_DEQUEUE_OUTPUT                                */
+    IVE_API_COMMAND_TYPE_T                  e_cmd;
+
+    /** Flag to enable/disable blocking the current API call            */
+    UWORD32                                 u4_is_blocking;
+}ive_dequeue_out_ip_t;
+
+/** Output structure : Dequeue output buffer from the encoder           */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                 u4_error_code;
+
+    /** Descriptor for output bit-stream buffer                         */
+    iv_bits_buf_t                           s_out_buf;
+
+    /** Lower 32bits of timestamp corresponding to this buffer           */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of timestamp corresponding to this buffer           */
+    UWORD32                                 u4_timestamp_high;
+
+    /** Flag to indicate if this is the last output in the stream       */
+    UWORD32                                 u4_is_last;
+
+}ive_dequeue_out_op_t;
+
+/*****************************************************************************/
+/* Get Recon data - Get the reconstructed data from encoder                  */
+/*****************************************************************************/
+/** Input structure : Get recon data from the encoder                   */
+
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Command : IVE_CMD_GET_RECON                                     */
+    IVE_API_COMMAND_TYPE_T                  e_cmd;
+
+    /** Flag to enable/disable blocking the current API call            */
+    UWORD32                                 u4_is_blocking;
+
+    /** Descriptor for recon buffer                                     */
+    iv_raw_buf_t                            s_recon_buf;
+
+    /** Flag to indicate if this is the last recon in the stream       */
+    UWORD32                                 u4_is_last;
+
+}ive_get_recon_ip_t;
+
+/** Output structure : Get recon data from the encoder                  */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                 u4_error_code;
+
+    /** Lower 32bits of time stamp corresponding to this buffer          */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to this buffer          */
+    UWORD32                                 u4_timestamp_high;
+
+    /** Flag to indicate if this is the last recon in the stream       */
+    UWORD32                                 u4_is_last;
+
+}ive_get_recon_op_t;
+
+/*****************************************************************************/
+/*   Video control  Flush                                                    */
+/*****************************************************************************/
+
+/** Input structure : Flush all the buffers from the encoder            */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                  e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_FLUSH                            */
+    IVE_CONTROL_API_COMMAND_TYPE_T          e_sub_cmd;
+}ive_ctl_flush_ip_t;
+
+/** Output structure : Flush all the buffers from the encoder           */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                 u4_error_code;
+}ive_ctl_flush_op_t;
+
+/*****************************************************************************/
+/*   Video control reset                                                     */
+/*****************************************************************************/
+/** Input structure : Reset the encoder                                 */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                  e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_RESET                            */
+    IVE_CONTROL_API_COMMAND_TYPE_T          e_sub_cmd;
+}ive_ctl_reset_ip_t;
+
+/** Output structure : Reset the encoder                                */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                 u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                 u4_error_code;
+}ive_ctl_reset_op_t;
+
+/*****************************************************************************/
+/*   Video control:Get Buf Info                                              */
+/*****************************************************************************/
+
+/** Input structure : Get encoder buffer requirements                   */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_GETBUFINFO                       */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** maximum width for which codec should request memory requirements    */
+    UWORD32                                     u4_max_wd;
+
+    /** maximum height for which codec should request memory requirements   */
+    UWORD32                                     u4_max_ht;
+
+    /** Input color format                                                  */
+    IV_COLOR_FORMAT_T                           e_inp_color_fmt;
+
+}ive_ctl_getbufinfo_ip_t;
+
+/** Output structure : Get encoder buffer requirements                  */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32 u4_size;
+
+    /** Return error code                                               */
+    UWORD32 u4_error_code;
+
+    /** Minimum number of input buffers required for codec              */
+    UWORD32 u4_min_inp_bufs;
+
+    /** Minimum number of output buffers required for codec             */
+    UWORD32 u4_min_out_bufs;
+
+    /** Number of components in input buffers required for codec        */
+    UWORD32 u4_inp_comp_cnt;
+
+    /** Number of components in output buffers required for codec       */
+    UWORD32 u4_out_comp_cnt;
+
+    /** Minimum sizes of each component in input buffer required        */
+    UWORD32 au4_min_in_buf_size[IVE_MAX_IO_BUFFER_COMPONENTS];
+
+    /** Minimum sizes of each component in output buffer  required      */
+    UWORD32 au4_min_out_buf_size[IVE_MAX_IO_BUFFER_COMPONENTS];
+
+}ive_ctl_getbufinfo_op_t;
+
+
+
+
+/*****************************************************************************/
+/*   Video control:Get Version Info                                          */
+/*****************************************************************************/
+
+/** Input structure : Get encoder version information                   */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_GETVERSION                       */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Buffer where version info will be returned                      */
+    UWORD8                                      *pu1_version;
+
+    /** Size of the buffer allocated for version info                   */
+    UWORD32                                     u4_version_bufsize;
+}ive_ctl_getversioninfo_ip_t;
+
+/** Output structure : Get encoder version information                  */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_getversioninfo_op_t;
+
+
+/*****************************************************************************/
+/*   Video control:set  default params                                       */
+/*****************************************************************************/
+/** Input structure : Set default encoder parameters                    */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SETDEFAULT                       */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_setdefault_ip_t;
+
+/** Output structure : Set default encoder parameters                   */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_setdefault_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Frame dimensions                                     */
+/*****************************************************************************/
+
+/** Input structure : Set frame dimensions                              */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_DIMENSIONS                   */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Input width                                                     */
+    UWORD32                                     u4_wd;
+
+    /** Input height                                                    */
+    UWORD32                                     u4_ht;
+
+    /** Input stride                                                    */
+    UWORD32                                     u4_strd;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_set_dimensions_ip_t;
+
+/** Output structure : Set frame dimensions                             */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_dimensions_op_t;
+
+
+/*****************************************************************************/
+/*   Video control  Set Frame rates                                          */
+/*****************************************************************************/
+
+/** Input structure : Set frame rate                                    */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_FRAMERATE                   */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Source frame rate                                               */
+    UWORD32                                     u4_src_frame_rate;
+
+    /** Target frame rate                                               */
+    UWORD32                                     u4_tgt_frame_rate;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_set_frame_rate_ip_t;
+
+/** Output structure : Set frame rate                                    */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_frame_rate_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Bitrate                                              */
+/*****************************************************************************/
+
+/** Input structure : Set bitrate                                       */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_BITRATE                      */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Target bitrate in kilobits per second                           */
+    UWORD32                                     u4_target_bitrate;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_set_bitrate_ip_t;
+
+/** Output structure : Set bitrate                                      */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_bitrate_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Frame type                                           */
+/*****************************************************************************/
+
+/** Input structure : Set frametype                                     */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_FRAMETYPE                    */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Force current frame type                                        */
+    IV_PICTURE_CODING_TYPE_T                    e_frame_type;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_set_frame_type_ip_t;
+
+/** Output structure : Set frametype                                     */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_frame_type_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Encode mode                                          */
+/*****************************************************************************/
+
+/** Input structure : Set encode mode                                   */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_ENC_MODE                    */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Encoder mode                                                    */
+    IVE_ENC_MODE_T                              e_enc_mode;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_set_enc_mode_ip_t;
+
+/** Output structure : Set encode mode                                  */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+
+}ive_ctl_set_enc_mode_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set QP                                                   */
+/*****************************************************************************/
+
+/** Input structure : Set QP                                            */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_QP                           */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Set initial Qp for I pictures                                   */
+    UWORD32                                     u4_i_qp;
+
+    /** Set initial Qp for P pictures                                   */
+    UWORD32                                     u4_p_qp;
+
+    /** Set initial Qp for B pictures                                   */
+    UWORD32                                     u4_b_qp;
+
+    /** Set minimum Qp for I pictures                                   */
+    UWORD32                                     u4_i_qp_min;
+
+    /** Set maximum Qp for I pictures                                   */
+    UWORD32                                     u4_i_qp_max;
+
+    /** Set minimum Qp for P pictures                                   */
+    UWORD32                                     u4_p_qp_min;
+
+    /** Set maximum Qp for P pictures                                   */
+    UWORD32                                     u4_p_qp_max;
+
+    /** Set minimum Qp for B pictures                                   */
+    UWORD32                                     u4_b_qp_min;
+
+    /** Set maximum Qp for B pictures                                   */
+    UWORD32                                     u4_b_qp_max;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+
+}ive_ctl_set_qp_ip_t;
+
+/** Output structure : Set QP                                           */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_qp_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set AIR params                                           */
+/*****************************************************************************/
+
+/** Input structure : Set AIR params                                    */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_AIR_PARAMS                   */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Adaptive intra refresh mode                                     */
+    IVE_AIR_MODE_T                              e_air_mode;
+
+    /** Adaptive intra refresh period in frames                         */
+    UWORD32                                     u4_air_refresh_period;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+
+}ive_ctl_set_air_params_ip_t;
+
+/** Output structure : Set AIR params                                   */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_air_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set VBV params                                           */
+/*****************************************************************************/
+
+/** Input structure : Set VBV params                                    */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_VBV_PARAMS                   */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** VBV buffer delay                                                */
+    UWORD32                                     u4_vbv_buffer_delay;
+
+    /** VBV buffer size                                                 */
+    UWORD32                                     u4_vbv_buf_size;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+
+}ive_ctl_set_vbv_params_ip_t;
+
+/** Output structure : Set VBV params                                   */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_vbv_params_op_t;
+
+
+/*****************************************************************************/
+/*   Video control  Set Processor Details                                    */
+/*****************************************************************************/
+
+/** Input structure : Set processor details                             */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_NUM_CORES                    */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Total number of cores to be used                                */
+    UWORD32                                     u4_num_cores;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_set_num_cores_ip_t;
+
+/** Output structure : Set processor details                            */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_num_cores_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Intra Prediction estimation params                   */
+/*****************************************************************************/
+
+/** Input structure : Set IPE params                                    */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_IPE_PARAMS                   */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Flag to enable/disbale intra 4x4 analysis                       */
+    UWORD32                                     u4_enable_intra_4x4;
+
+    /** Flag to enable/disable pre-enc stage of Intra Pred estimation   */
+    UWORD32                                     u4_pre_enc_ipe;
+
+    /** Speed preset - Value between 0 (slowest) and 100 (fastest)      */
+    IVE_SPEED_CONFIG                            u4_enc_speed_preset;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                     u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                     u4_timestamp_high;
+
+}ive_ctl_set_ipe_params_ip_t;
+
+/** Output structure : Set IPE Params                                   */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_ipe_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Motion estimation params                             */
+/*****************************************************************************/
+
+/** Input structure : Set ME Params                                     */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_ME_PARAMS                    */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Flag to enable/disable pre-enc stage of Motion estimation       */
+    UWORD32                                     u4_pre_enc_me;
+
+    /** Speed preset - Value between 0 (slowest) and 100 (fastest)      */
+    UWORD32                                     u4_me_speed_preset;
+
+    /** Flag to enable/disable half pel motion estimation               */
+    UWORD32                                     u4_enable_hpel;
+
+    /** Flag to enable/disable quarter pel motion estimation            */
+    UWORD32                                     u4_enable_qpel;
+
+    /** Flag to enable/disable fast SAD approximation                   */
+    UWORD32                                     u4_enable_fast_sad;
+
+    /** Flag to enable/disable alternate reference frames               */
+    UWORD32                                     u4_enable_alt_ref;
+
+    /** Maximum search range in X direction for farthest reference      */
+    UWORD32                                     u4_srch_rng_x;
+
+    /** Maximum search range in Y direction for farthest reference      */
+    UWORD32                                     u4_srch_rng_y;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                     u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                     u4_timestamp_high;
+
+}ive_ctl_set_me_params_ip_t;
+
+/** Output structure : Set ME Params                                    */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_me_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set GOP params                                           */
+/*****************************************************************************/
+
+/** Input structure : Set GOP Params                                    */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_GOP_PARAMS                   */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** I frame interval                                                */
+    UWORD32                                     u4_i_frm_interval;
+
+    /** IDR frame interval                                              */
+    UWORD32                                     u4_idr_frm_interval;
+
+    /** consecutive B frames                                            */
+    UWORD32                                     u4_num_b_frames;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_set_gop_params_ip_t;
+
+/** Output structure : Set GOP params                                   */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_gop_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Deblock params                                       */
+/*****************************************************************************/
+
+/** Input structure : Set Deblock Params                                */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_GOP_PARAMS                   */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Disable deblock level (0: Enable completely, 3: Disable completely */
+    UWORD32                                     u4_disable_deblock_level;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_set_deblock_params_ip_t;
+
+/** Output structure : Set Deblock Params                               */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_deblock_params_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Profile params                                       */
+/*****************************************************************************/
+
+/** Input structure : Set Profile Params                                */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Command type : IVE_CMD_VIDEO_CTL                                */
+    IVE_API_COMMAND_TYPE_T                      e_cmd;
+
+    /** Sub command type : IVE_CMD_CTL_SET_PROFILE_PARAMS               */
+    IVE_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /** Profile                                                         */
+    IV_PROFILE_T                               e_profile;
+
+    /** Lower 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_low;
+
+    /** Upper 32bits of time stamp corresponding to input buffer,
+     * from which this command takes effect                             */
+    UWORD32                                 u4_timestamp_high;
+
+}ive_ctl_set_profile_params_ip_t;
+
+/** Output structure : Set Profile Params                               */
+typedef struct
+{
+    /** size of the structure                                           */
+    UWORD32                                     u4_size;
+
+    /** Return error code                                               */
+    UWORD32                                     u4_error_code;
+}ive_ctl_set_profile_params_op_t;
+
+
+#endif /* _IVE2_H_ */
+
diff --git a/encoder/mips/ih264e_function_selector.c b/encoder/mips/ih264e_function_selector.c
new file mode 100755
index 0000000..58ec4d0
--- /dev/null
+++ b/encoder/mips/ih264e_function_selector.c
@@ -0,0 +1,110 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in h264
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+void ih264e_init_function_ptr(void *pv_codec)
+{
+    codec_t *ps_codec = (codec_t *)pv_codec;
+    ih264e_init_function_ptr_generic(ps_codec);
+}
+
+IV_ARCH_T ih264e_default_arch(void)
+{
+    return ARCH_NA;
+}
+
diff --git a/encoder/mips/ih264e_platform_macros.h b/encoder/mips/ih264e_platform_macros.h
new file mode 100755
index 0000000..ed1edd4
--- /dev/null
+++ b/encoder/mips/ih264e_platform_macros.h
@@ -0,0 +1,135 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_platform_macros.h
+ *
+ * @brief
+ *  Contains platform specific routines used for codec context intialization
+ *
+ * @author
+ *  ittiam
+ *
+ * @remarks
+ *  none
+ *
+ *******************************************************************************
+ */
+
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+#define DATA_SYNC()
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+/**
+*******************************************************************************
+*
+* @brief Data Memory Barrier, Data Synchronization Barrier
+*
+*
+* @par Description: These functions do nothing on x86 side. But on arm platforms,
+*
+* Data Memory Barrier acts as a memory barrier. It ensures that all explicit
+* memory accesses that appear in program order before the DMB instruction are
+* observed before any explicit memory accesses that appear in program order
+* after the DMB instruction. It does not affect the ordering of any other
+* instructions executing on the processor
+*
+* Data Synchronization Barrier acts as a special kind of memory barrier. No
+* instruction in program order after this instruction executes until this instruction
+* completes. This instruction completes when:
+*       1. All explicit memory accesses before this instruction complete.
+*       2. All Cache, Branch predictor and TLB maintenance operations before
+*       this instruction complete.
+*
+* @param[in] void
+*
+* @returns  void
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/mips/ime_platform_macros.h b/encoder/mips/ime_platform_macros.h
new file mode 100755
index 0000000..18e2e8f
--- /dev/null
+++ b/encoder/mips/ime_platform_macros.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ime_platform_macros.h
+*
+* @brief
+*  Platform specific Macro definitions used in the codec
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions                                                */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+                sad +=  ABS(src[0]-est[0]) + \
+                ABS(src[1]-est[1]) + \
+                ABS(src[2]-est[2]) + \
+                ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/encoder/x86/ih264e_function_selector.c b/encoder/x86/ih264e_function_selector.c
new file mode 100755
index 0000000..429cdab
--- /dev/null
+++ b/encoder/x86/ih264e_function_selector.c
@@ -0,0 +1,141 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  ih264e_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in h264
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec)
+{
+    codec_t *ps_codec = (codec_t *)pv_codec;
+    ih264e_init_function_ptr_generic(ps_codec);
+    switch(ps_codec->s_cfg.e_arch)
+    {
+        case ARCH_X86_GENERIC:
+            ih264e_init_function_ptr_generic(ps_codec);
+            break;
+        case ARCH_X86_SSSE3:
+            ih264e_init_function_ptr_ssse3(ps_codec);
+            break;
+        case ARCH_X86_SSE42:
+        default:
+            ih264e_init_function_ptr_ssse3(ps_codec);
+            ih264e_init_function_ptr_sse42(ps_codec);
+            break;
+    }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void)
+{
+    return ARCH_X86_SSE42;
+}
+
+
diff --git a/encoder/x86/ih264e_function_selector_sse42.c b/encoder/x86/ih264e_function_selector_sse42.c
new file mode 100755
index 0000000..6fa6308
--- /dev/null
+++ b/encoder/x86/ih264e_function_selector_sse42.c
@@ -0,0 +1,146 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264e_function_selector_sse42.c
+*
+* @brief
+*  Contains functions to initialize function pointers of codec context
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - ih264e_init_function_ptr_sse42
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_sse42(codec_t *ps_codec)
+{
+    WORD32 i;
+    process_ctxt_t *ps_proc = NULL;
+    me_ctxt_t *ps_me_ctxt = NULL;
+    printf("Enabling SSE42 functions\n");
+
+    /* Init luma forward transform fn ptr */
+    ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_sse42;
+    ps_codec->pf_resi_trans_quant_chroma_4x4     = ih264_resi_trans_quant_chroma_4x4_sse42;
+    ps_codec->pf_hadamard_quant_4x4              = ih264_hadamard_quant_4x4_sse42;
+    ps_codec->pf_hadamard_quant_2x2_uv           = ih264_hadamard_quant_2x2_uv_sse42;
+
+    /* Init inverse transform fn ptr */
+    ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_sse42;
+    ps_codec->pf_iquant_itrans_recon_chroma_4x4   = ih264_iquant_itrans_recon_chroma_4x4_sse42;
+    ps_codec->pf_ihadamard_scaling_4x4            = ih264_ihadamard_scaling_4x4_sse42;
+
+    /* sad me level functions */
+    ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42;
+    ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42;
+    ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_sse42;
+
+    /* sad me level functions */
+    for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+    {
+        ps_proc = &ps_codec->as_process[i];
+
+        ps_me_ctxt = &ps_proc->s_me_ctxt;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42;
+        ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42;
+        ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_sse42;
+        ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_sse42;
+        ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_sse42;
+        ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16      = ime_compute_satqd_16x16_lumainter_sse42;
+    }
+}
diff --git a/encoder/x86/ih264e_function_selector_ssse3.c b/encoder/x86/ih264e_function_selector_ssse3.c
new file mode 100755
index 0000000..7401e53
--- /dev/null
+++ b/encoder/x86/ih264e_function_selector_ssse3.c
@@ -0,0 +1,190 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264e_function_selector_ssse3.c
+*
+* @brief
+*  Contains functions to initialize function pointers of codec context
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - ih264e_init_function_ptr_ssse3
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_ssse3(codec_t *ps_codec)
+{
+    printf("Enabling SSSE3 functions\n");
+
+    /* Init function pointers for intra pred leaf level functions luma
+      * Intra 16x16 */
+     ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_ssse3;
+     ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_ssse3;
+     ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_ssse3;
+     ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_ssse3;
+
+     /* Init function pointers for intra pred leaf level functions luma
+      * Intra 4x4 */
+     ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_ssse3;
+     ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_ssse3;
+     ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_ssse3;
+     ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3;
+     ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3;
+     ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_ssse3;
+     ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_ssse3;
+     ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_ssse3;
+     ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_ssse3;
+
+     /* Init function pointers for intra pred leaf level functions luma
+      * Intra 8x8 */
+     ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_ssse3;
+     ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_ssse3;
+     ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3;
+     ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3;
+     ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_ssse3;
+     ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_ssse3;
+     ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_ssse3;
+     ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_ssse3;
+
+     /* Init function pointers for intra pred leaf level functions chroma
+      * Intra 8x8 */
+     ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_ssse3;
+     ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_ssse3;
+     ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_ssse3;
+
+    /* Init inverse transform fn ptr */
+    ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8_ssse3;
+    ps_codec->pf_iquant_itrans_recon_4x4_dc       = ih264_iquant_itrans_recon_4x4_dc_ssse3;
+    ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3;
+
+    /* Init fn ptr luma deblocking */
+    ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_ssse3;
+     ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_ssse3;
+     ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_ssse3;
+     ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_ssse3;
+    /* Init fn ptr chroma deblocking */
+     ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_ssse3;
+     ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_ssse3;
+     ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_ssse3;
+     ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_ssse3;
+
+     /* Padding Functions */
+     ps_codec->pf_pad_left_luma = ih264_pad_left_luma_ssse3;
+     ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_ssse3;
+     ps_codec->pf_pad_right_luma = ih264_pad_right_luma_ssse3;
+     ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_ssse3;
+
+    /* Inter pred leaf level functions */
+    ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_ssse3;
+    ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_ssse3;
+    ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_ssse3;
+    ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_ssse3;
+
+    /* memory handling operations */
+    ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_ssse3;
+    ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_ssse3;
+
+    /*intra mode eval -encoder level function*/
+    ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_ssse3;
+    ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_ssse3;
+    ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_ssse3;
+
+    /* Halp pel generation function - encoder level*/
+    ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_ssse3;
+    ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_ssse3;
+}
diff --git a/encoder/x86/ih264e_half_pel_ssse3.c b/encoder/x86/ih264e_half_pel_ssse3.c
new file mode 100755
index 0000000..42580fa
--- /dev/null
+++ b/encoder/x86/ih264e_half_pel_ssse3.c
@@ -0,0 +1,487 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_half_pel_ssse3.c
+ *
+ * @brief
+ *  Contains the x86 intrinsic function definitions for 6-tap vertical filter
+ *  and cascaded 2D filter used in motion estimation in H264 encoder.
+ *
+ * @author
+ *  Ittiam
+ *
+ * @par List of Functions:
+ *  ih264e_sixtapfilter_horz_ssse3
+ *  ih264e_sixtap_filter_2dvh_vert_ssse3
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_defs.h"
+#include "ih264e_half_pel.h"
+#include "ih264_macros.h"
+#include "ih264e_half_pel.h"
+#include "ih264e_debug.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+/*
+*******************************************************************************
+*
+* @brief
+*  Interprediction luma filter for horizontal input(Filter run for width = 17
+*  and height =16)
+*
+* @par Description:
+*  Applies a 6 tap horizontal filter .The output is  clipped to 8 bits sec.
+*  8.4.2.2.1 titled "Luma sample interpolation process"
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @returns
+*  None
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+void ih264e_sixtapfilter_horz_ssse3(UWORD8 *pu1_src,
+                                    UWORD8 *pu1_dst,
+                                    WORD32 src_strd,
+                                    WORD32 dst_strd)
+{
+    WORD32 ht;
+    WORD32 tmp;
+
+    __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+    __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+    __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+    __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+
+    __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+    __m128i const_val16_8x16b;
+
+    ht = 16;
+    pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
+
+    coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+    coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+    coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+                                                 //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+    const_val16_8x16b = _mm_set1_epi16(16);
+
+    //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+    //Row0 :                         b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+    //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
+
+    do
+    {
+        src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);                     //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+        src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));               //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+        src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1);                      //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+        src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1);                      //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+        src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+        src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+        res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b);    //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+                                                                                 //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+        res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b);    //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+                                                                                 //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+        src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+        src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+        src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a3 a4 a5 a6 a7 a8 a9....a15 0  0  0
+        src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b3 b4 b5 b6 b7 b8 b9....b15 0  0  0
+
+        src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+        src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+        res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b);    //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+                                                                                 //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+        res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b);    //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+                                                                                 //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+        src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2);                          //a4 a5 a6 a7 a8 a9....a15 0  0  0  0
+        src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2);                          //b4 b5 b6 b7 b8 b9....b15 0  0  0  0
+
+        src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2);                  //a5 a6 a7 a8 a9....a15 0  0  0  0  0
+        src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2);                  //b5 b6 b7 b8 b9....b15 0  0  0  0  0
+
+        src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b);     //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+        src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b);     //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+        res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b);    //a4*c4+a5*c5 a5*c4+a6*c5  a6*c4+a7*c5   a7*c4+a8*c5
+                                                                                 //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+        res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b);    //b4*c4+b5*c5 b5*c4+b6*c5  b6*c4+b7*c5   b7*c4+b8*c5
+                                                                                 //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+        res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+        res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+        res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b);
+        res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b);
+        res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+        res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+        tmp = ((pu1_src[18] + pu1_src[19]) << 2) - pu1_src[17] - pu1_src[20];
+        tmp = pu1_src[16] + pu1_src[21] + (tmp << 2) + tmp;
+
+        res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5);                    //shifting right by 5 bits.
+        res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);
+        tmp = (tmp + 16) >> 5;
+
+        src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b);
+        pu1_dst[16] = CLIP_U8(tmp);
+
+        _mm_storeu_si128((__m128i *)pu1_dst, src_r0_16x8b);
+
+        ht--;
+        pu1_src += src_strd;
+        pu1_dst += dst_strd;
+    }
+    while(ht > 0);
+}
+
+/*
+*******************************************************************************
+*
+* @brief
+*   This function implements a two stage cascaded six tap filter. It
+*    applies the six tap filter in the vertical direction on the
+*    predictor values, followed by applying the same filter in the
+*    horizontal direction on the output of the first stage. The six tap
+*    filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+*    interpolation process" (Filter run for width = 17 and height =17)
+*
+* @par Description:
+*    The function interpolates the predictors first in the vertical direction
+*    and then in the horizontal direction to output the (1/2,1/2). The output
+*    of the first stage of the filter is stored in the buffer pointed to by
+*    pi16_pred1(only in C) in 16 bit precision.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst1
+*  UWORD8 pointer to the destination(Vertical filtered output)
+*
+* @param[out] pu1_dst2
+*  UWORD8 pointer to the destination(out put after applying horizontal filter
+*  to the intermediate vertical output)
+*
+* @param[in] src_strd
+*  integer source stride
+
+* @param[in] dst_strd
+*  integer destination stride of pu1_dst
+*
+* @param[in]pi16_pred1
+*  Pointer to 16bit intermediate buffer(used only in c)
+*
+* @param[in] pi16_pred1_strd
+*  integer destination stride of pi16_pred1
+*
+* @returns
+*  None
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+void ih264e_sixtap_filter_2dvh_vert_ssse3(UWORD8 *pu1_src,
+                                          UWORD8 *pu1_dst1,
+                                          UWORD8 *pu1_dst2,
+                                          WORD32 src_strd,
+                                          WORD32 dst_strd,
+                                          WORD32 *pi4_pred1,
+                                          WORD32 pred1_strd)
+{
+    WORD32 ht;
+    WORD16 *pi2_pred1;
+
+    ht = 17;
+    pi2_pred1 = (WORD16 *)pi4_pred1;
+    pred1_strd = pred1_strd << 1;
+
+    // Vertical 6-tap filter
+    {
+        __m128i src1_r0_16x8b, src1_r1_16x8b, src1_r2_16x8b;
+        __m128i src1_r3_16x8b, src1_r4_16x8b, src1_r5_16x8b;
+        __m128i src2_r0_16x8b, src2_r1_16x8b, src2_r2_16x8b;
+        __m128i src2_r3_16x8b, src2_r4_16x8b, src2_r5_16x8b;
+
+        __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+        __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+        __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+        coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+        coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+        coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+                                                     //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+
+        pu1_src -= 2;
+        pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3])
+
+        // Loading first five rows to start first row processing.
+        // 22 values loaded in each row.
+        src1_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        src2_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+        pu1_src += src_strd;
+
+        src1_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        src2_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+        pu1_src += src_strd;
+
+        src1_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        src2_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+        pu1_src += src_strd;
+
+        src1_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        src2_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+        pu1_src += src_strd;
+
+        src1_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        src2_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+        pu1_src += src_strd;
+
+        do
+        {
+            src1_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+            src2_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+
+            src_r0r1_16x8b = _mm_unpacklo_epi8(src1_r0_16x8b, src1_r1_16x8b);
+            src_r2r3_16x8b = _mm_unpacklo_epi8(src1_r2_16x8b, src1_r3_16x8b);
+            src_r4r5_16x8b = _mm_unpacklo_epi8(src1_r4_16x8b, src1_r5_16x8b);
+
+            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+            res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+            _mm_storeu_si128((__m128i *)pi2_pred1, res_t1_8x16b);
+
+            src_r0r1_16x8b = _mm_unpackhi_epi8(src1_r0_16x8b, src1_r1_16x8b);
+            src_r2r3_16x8b = _mm_unpackhi_epi8(src1_r2_16x8b, src1_r3_16x8b);
+            src_r4r5_16x8b = _mm_unpackhi_epi8(src1_r4_16x8b, src1_r5_16x8b);
+
+            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+            res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+            _mm_storeu_si128((__m128i *)(pi2_pred1 + 8), res_t1_8x16b);
+
+            src_r0r1_16x8b = _mm_unpacklo_epi8(src2_r0_16x8b, src2_r1_16x8b);
+            src_r2r3_16x8b = _mm_unpacklo_epi8(src2_r2_16x8b, src2_r3_16x8b);
+            src_r4r5_16x8b = _mm_unpacklo_epi8(src2_r4_16x8b, src2_r5_16x8b);
+
+            res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+            res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+            res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+            res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+            res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+            _mm_storeu_si128((__m128i *)(pi2_pred1 + 14), res_t1_8x16b);
+
+            src1_r0_16x8b = src1_r1_16x8b;
+            src1_r1_16x8b = src1_r2_16x8b;
+            src1_r2_16x8b = src1_r3_16x8b;
+            src1_r3_16x8b = src1_r4_16x8b;
+            src1_r4_16x8b = src1_r5_16x8b;
+
+            src2_r0_16x8b = src2_r1_16x8b;
+            src2_r1_16x8b = src2_r2_16x8b;
+            src2_r2_16x8b = src2_r3_16x8b;
+            src2_r3_16x8b = src2_r4_16x8b;
+            src2_r4_16x8b = src2_r5_16x8b;
+
+            ht--;
+            pu1_src += src_strd;
+            pi2_pred1 += pred1_strd;
+        }
+        while(ht > 0);
+    }
+
+    ht = 17;
+    pi2_pred1 = (WORD16 *)pi4_pred1;
+
+    // Horizontal 6-tap filter
+    {
+        WORD32 temp;
+
+        __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
+        __m128i src_r4_8x16b, src_r5_8x16b;
+        __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
+        __m128i res_vert1_8x16b, res_vert2_8x16b, res_16x8b;
+
+        __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+        __m128i res_c0_8x16b, res_c1_8x16b;
+
+        __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+        __m128i const_val512_4x32b, const_val16_8x16b;
+
+        coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); //c0 c1 c0 c1 c0 c1 c0 c1
+        coeff2_3_8x16b = _mm_set1_epi32(0x00140014); //c2 c3 c2 c3 c2 c3 c2 c3
+        coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); //c4 c5 c4 c5 c4 c5 c4 c5
+                                                     //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+        const_val512_4x32b = _mm_set1_epi32(512);
+        const_val16_8x16b = _mm_set1_epi16(16);
+
+        do
+        {
+            src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1));
+            src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 1));
+            src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 2));
+            src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 3));
+            src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 4));
+            src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 5));
+
+            res_vert1_8x16b = _mm_add_epi16(src_r2_8x16b, const_val16_8x16b);
+            res_vert1_8x16b = _mm_srai_epi16(res_vert1_8x16b, 5); //shifting right by 5 bits.
+
+            src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+            src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+            src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+            res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+            res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+            res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+            res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+            res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+            res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+            res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+            src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+            src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+            src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+            res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+            res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+            res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+            res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+            res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+            res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+            res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+            res_c0_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+
+            src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8));
+            src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 1));
+            src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 2));
+            src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 3));
+            src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 4));
+            src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 5));
+
+            res_vert2_8x16b = _mm_add_epi16(src_r2_8x16b, const_val16_8x16b);
+            res_vert2_8x16b = _mm_srai_epi16(res_vert2_8x16b, 5); //shifting right by 5 bits.
+
+            src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+            src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+            src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+            res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+            res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+            res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+            res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+            res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+            res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+            res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b ,10);
+
+            src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+            src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+            src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+            res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+            res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+            res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+            res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+            res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+            res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+            res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+            res_c1_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+
+            res_16x8b = _mm_packus_epi16(res_vert1_8x16b, res_vert2_8x16b);
+            _mm_storeu_si128((__m128i *)pu1_dst1, res_16x8b);
+            pu1_dst1[16] = CLIP_U8((pi2_pred1[18] + 16) >> 5);
+
+            res_16x8b = _mm_packus_epi16(res_c0_8x16b, res_c1_8x16b);
+            _mm_storeu_si128((__m128i *)pu1_dst2, res_16x8b);
+            temp = ((pi2_pred1[18] + pi2_pred1[19]) << 2) - pi2_pred1[17] - pi2_pred1[20];
+            temp = pi2_pred1[16] + pi2_pred1[21] + (temp << 2) + temp;
+            pu1_dst2[16] = CLIP_U8((temp + 512) >> 10);
+
+            ht--;
+            pi2_pred1 += pred1_strd;
+            pu1_dst1 += dst_strd;
+            pu1_dst2 += dst_strd;
+        }
+        while(ht > 0);
+    }
+}
diff --git a/encoder/x86/ih264e_intra_modes_eval_ssse3.c b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
new file mode 100755
index 0000000..657921f
--- /dev/null
+++ b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
@@ -0,0 +1,1259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ih264e_intra_modes_eval_ssse3.c
+*
+* @brief
+*   This file contains definitions of routines that perform rate distortion
+*  analysis on a macroblock if they are to be coded as intra.
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  ih264e_evaluate_intra16x16_modes_ssse3
+*  ih264e_evaluate_intra_4x4_modes_ssse3
+*  ih264e_evaluate_intra_chroma_modes_ssse3
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+#include <immintrin.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264e_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ime_distortion_metrics.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_structs.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+
+#include "ih264e_structs.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_globals.h"
+#include "ime_platform_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+/**
+******************************************************************************
+*
+* @brief
+*  evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+*  prediction.
+*
+* @par Description
+*  This function evaluates first three 16x16 modes and compute corresponding
+*  SAD and returns the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+*  availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+*  pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+*  pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+*  says what all modes are valid
+*
+* @return
+*  None
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra16x16_modes_ssse3(UWORD8 *pu1_src,
+                                            UWORD8 *pu1_ngbr_pels_i16,
+                                            UWORD8 *pu1_dst,
+                                            UWORD32 src_strd,
+                                            UWORD32 dst_strd,
+                                            WORD32 n_avblty,
+                                            UWORD32 *u4_intra_mode,
+                                            WORD32 *pu4_sadmin,
+                                            UWORD32 u4_valid_intra_modes)
+{
+    UWORD8 *pu1_src_temp;
+
+    WORD32 left, top, horz_flag, vert_flag, dc_flag;
+    WORD32 sad_vert, sad_horz, sad_dc, min_sad;
+
+    WORD32 cnt, dcval;
+    WORD32 src_strd2, src_strd3, src_strd4;
+    WORD32 dst_strd2, dst_strd3, dst_strd4;
+
+    __m128i src1_16x8b, src2_16x8b, src3_16x8b, src4_16x8b;
+    __m128i val1_16x8b, val2_16x8b, val3_16x8b, val4_16x8b;
+    __m128i sad1_8x16b, sad2_8x16b, sad3_8x16b, sad4_8x16b;
+
+    __m128i sad_8x16b, val_16x8b, zero_vector;
+
+    sad_vert = INT_MAX;
+    sad_horz = INT_MAX;
+    sad_dc = INT_MAX;
+
+    src_strd2 = src_strd << 1;
+    src_strd4 = src_strd << 2;
+    src_strd3 = src_strd + src_strd2;
+
+    dst_strd2 = dst_strd << 1;
+    dst_strd4 = dst_strd << 2;
+    dst_strd3 = dst_strd + dst_strd2;
+
+    left = (n_avblty & LEFT_MB_AVAILABLE_MASK);
+    top = (n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+    zero_vector = _mm_setzero_si128();
+
+    horz_flag = left && ((u4_valid_intra_modes & 02) != 0);
+    vert_flag = top && ((u4_valid_intra_modes & 01) != 0);
+    dc_flag = (u4_valid_intra_modes & 04) != 0;
+
+    if(horz_flag)
+    {
+        pu1_src_temp = pu1_src;
+
+        val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[15]);
+        val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[14]);
+        val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[13]);
+        val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[12]);
+
+        src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+        src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+        src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+        src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+        sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+        sad2_8x16b = _mm_sad_epu8(val2_16x8b, src2_16x8b);
+        sad3_8x16b = _mm_sad_epu8(val3_16x8b, src3_16x8b);
+        sad4_8x16b = _mm_sad_epu8(val4_16x8b, src4_16x8b);
+
+        sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+        sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+
+        cnt = 11;
+        sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+        do
+        {
+            pu1_src_temp += src_strd4;
+
+            val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt]);
+            val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 1]);
+            val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 2]);
+            val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 3]);
+
+            src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+            src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+            src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+            src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+            sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+            sad2_8x16b = _mm_sad_epu8(val2_16x8b, src2_16x8b);
+            sad3_8x16b = _mm_sad_epu8(val3_16x8b, src3_16x8b);
+            sad4_8x16b = _mm_sad_epu8(val4_16x8b, src4_16x8b);
+
+            sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+            sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+            sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+
+            cnt -= 4;
+            sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b);
+        }
+        while(cnt >= 0);
+
+        sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+        sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+        sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+
+        sad_horz = _mm_extract_epi16(sad_8x16b, 0);
+    }
+
+    if(vert_flag)
+    {
+        pu1_src_temp = pu1_src;
+
+        val1_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17));
+
+        src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+        src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+        src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+        src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+        sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+        sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+        sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+        sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+        sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+        sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+
+        cnt = 11;
+        sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+        do
+        {
+            pu1_src_temp += src_strd4;
+
+            src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+            src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+            src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+            src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+            sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+            sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+            sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+            sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+            sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+            sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+            sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+
+            cnt -= 4;
+            sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b);
+        }
+        while(cnt >= 0);
+
+        sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+        sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+        sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+
+        sad_vert = _mm_extract_epi16(sad_8x16b, 0);
+    }
+
+    dcval = 0;
+
+    if(left)
+    {
+        val_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels_i16);
+        dcval += 8;
+
+        sad1_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+        dcval += _mm_extract_epi16(sad1_8x16b, 0);
+        dcval += _mm_extract_epi16(sad1_8x16b, 4);
+    }
+    if(top)
+    {
+        val_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17));
+        dcval += 8;
+
+        sad1_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+        dcval += _mm_extract_epi16(sad1_8x16b, 0);
+        dcval += _mm_extract_epi16(sad1_8x16b, 4);
+    }
+    dcval = dcval >> (3 + left + top);
+    dcval += ((left == 0) & (top == 0)) << 7;
+
+    if(dc_flag)
+    {
+        pu1_src_temp = pu1_src;
+        val1_16x8b = _mm_set1_epi8(dcval);
+
+        src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+        src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+        src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+        src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+        sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+        sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+        sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+        sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+        sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+        sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+
+        cnt = 12;
+        sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+        do
+        {
+            pu1_src_temp += src_strd4;
+
+            src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+            src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+            src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+            src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+            sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+            sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+            sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+            sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+            sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+            sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+            sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+
+            cnt -= 4;
+            sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b);
+        }
+        while(cnt > 0);
+
+        sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+        sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+        sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+
+        sad_dc = _mm_extract_epi16(sad_8x16b, 0);
+    }
+
+    // Doing prediction for minimum SAD
+    min_sad = MIN3(sad_horz, sad_vert, sad_dc);
+    if(min_sad < *pu4_sadmin)
+    {
+        *pu4_sadmin = min_sad;
+        if(min_sad == sad_vert)
+        {
+            *u4_intra_mode = VERT_I16x16;
+            val1_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17));
+            cnt = 15;
+            do
+            {
+                _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b);
+                _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val1_16x8b);
+                _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val1_16x8b);
+                _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val1_16x8b);
+
+                cnt -= 4;
+                pu1_dst += dst_strd4;
+            }
+            while(cnt > 0);
+        }
+        else if(min_sad == sad_horz)
+        {
+            *u4_intra_mode = HORZ_I16x16;
+            cnt = 15;
+            do
+            {
+                val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt]);
+                val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 1]);
+                val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 2]);
+                val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 3]);
+
+                _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b);
+                _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val2_16x8b);
+                _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val3_16x8b);
+                _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val4_16x8b);
+
+                cnt -= 4;
+                pu1_dst += dst_strd4;
+            }
+            while(cnt >= 0);
+        }
+        else
+        {
+            *u4_intra_mode = DC_I16x16;
+            val1_16x8b = _mm_set1_epi8(dcval);
+            cnt = 15;
+            do
+            {
+                _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b);
+                _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val1_16x8b);
+                _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val1_16x8b);
+                _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val1_16x8b);
+
+                cnt -= 4;
+                pu1_dst += dst_strd4;
+            }
+            while(cnt > 0);
+        }
+    }
+}
+
+/**
+******************************************************************************
+*
+* @brief :Evaluate best intra 4x4 mode and do the prediction.
+*
+* @par Description
+*  This function evaluates intra 4x4 modes, computes corresponding sad
+*  and returns the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+** @param[in] pu1_ngbr_pels
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* * @param[in] u4_lambda
+* Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+* Predicted mode for cost computation
+*
+* @return      none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra_4x4_modes_ssse3(UWORD8 *pu1_src,
+                                           UWORD8 *pu1_ngbr_pels,
+                                           UWORD8 *pu1_dst,
+                                           UWORD32 src_strd,
+                                           UWORD32 dst_strd,
+                                           WORD32 u4_n_avblty,
+                                           UWORD32 *u4_intra_mode,
+                                           WORD32 *pu4_sadmin,
+                                           UWORD32 u4_valid_intra_modes,
+                                           UWORD32 u4_lambda,
+                                           UWORD32 u4_predictd_mode)
+{
+    WORD32 left, top;
+    WORD32 sad[MAX_I4x4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+                             INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+    WORD32 cost[MAX_I4x4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+                              INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+
+    WORD32 min_cost;
+    WORD32 lambda4 = u4_lambda << 2;
+    WORD32 dst_strd2, dst_strd3;
+
+    __m128i left_top_16x8b, src_16x8b, pred0_16x8b, sad_8x16b;
+    __m128i pred1_16x8b, pred2_16x8b, pred3_16x8b, pred4_16x8b;
+    __m128i pred5_16x8b, pred6_16x8b, pred7_16x8b, pred8_16x8b;
+    __m128i shuffle_16x8b, zero_vector, mask_low_32b;
+
+    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+    top  =  (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+    dst_strd2 = dst_strd << 1;
+    dst_strd3 = dst_strd + dst_strd2;
+
+    // loading the 4x4 source block and neighbouring pixels
+    {
+        __m128i row1_16x8b, row2_16x8b;
+
+        row1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+        row2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+        left_top_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels);
+
+        pu1_src += src_strd << 1;
+        src_16x8b = _mm_unpacklo_epi32(row1_16x8b, row2_16x8b);
+
+        row1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+        row2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+        zero_vector = _mm_setzero_si128();
+
+        row1_16x8b = _mm_unpacklo_epi32(row1_16x8b, row2_16x8b);
+        src_16x8b = _mm_unpacklo_epi64(src_16x8b, row1_16x8b);
+    }
+
+    /* Computing SADs*/
+    if(u4_valid_intra_modes & 1)/* VERT mode valid ????*/
+    {
+        pred0_16x8b = _mm_srli_si128(left_top_16x8b, 5);
+        pred0_16x8b = _mm_shuffle_epi32(pred0_16x8b, 0);
+        sad_8x16b = _mm_sad_epu8(src_16x8b, pred0_16x8b);
+
+        sad[VERT_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+        cost[VERT_I4x4] = sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda: lambda4);
+    }
+
+    if(u4_valid_intra_modes & 2)/* HORZ mode valid ????*/
+    {
+        shuffle_16x8b = _mm_setr_epi8(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0);
+        pred1_16x8b = _mm_shuffle_epi8(left_top_16x8b, shuffle_16x8b);
+
+        sad_8x16b = _mm_sad_epu8(src_16x8b, pred1_16x8b);
+
+        sad[HORZ_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+        cost[HORZ_I4x4] = sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda: lambda4);
+    }
+
+    if(u4_valid_intra_modes & 4)/* DC mode valid ????*/
+    {
+        if(top + left)
+        {
+            WORD32 shft = 1, dcval = 0;
+
+            __m128i val_16x8b, temp_16x8b, temp_8x16b;
+
+            val_16x8b = _mm_setzero_si128();
+
+            if(top)
+            {
+                temp_16x8b = _mm_srli_si128(left_top_16x8b, 5);
+                val_16x8b = _mm_alignr_epi8(temp_16x8b, val_16x8b, 4);
+                shft ++;
+                dcval += 2;
+            }
+            if(left)
+            {
+                val_16x8b = _mm_alignr_epi8(left_top_16x8b, val_16x8b, 4);
+                shft++;
+                dcval += 2;
+            }
+
+            temp_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+            dcval += _mm_extract_epi16(temp_8x16b, 4);
+            dcval = dcval >> shft;
+            pred2_16x8b = _mm_set1_epi8(dcval);
+        }
+        else
+            pred2_16x8b = _mm_set1_epi8(128);
+
+        sad_8x16b = _mm_sad_epu8(src_16x8b, pred2_16x8b);
+
+        sad[DC_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+        cost[DC_I4x4] = sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda: lambda4);
+    }
+
+    if(u4_valid_intra_modes > 7)/* if modes other than VERT, HORZ and DC are  valid ????*/
+    {
+        __m128i w11_16x8b, w121_16x8b;
+        __m128i temp1_16x8b, temp2_16x8b;
+
+        /* Performing FILT121 and FILT11 operation for all neighbour values*/
+        {
+            __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b;
+            __m128i const_2_8x16b;
+
+            const_2_8x16b = _mm_set1_epi16(2);
+
+            temp1_8x16b = _mm_unpacklo_epi8(left_top_16x8b, zero_vector);   //l3 l2 l1 l0 tl t0 t1 t2
+            temp2_8x16b = _mm_slli_si128(temp1_8x16b, 2);                   // 0 l3 l2 l1 l0 tl t0 t1
+            temp2_8x16b = _mm_shufflelo_epi16(temp2_8x16b, 0xe5);           //l3 l3 l2 l1 l0 tl t0 t1
+
+            temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b);          //l3+l3  l3+l2       l2+l1...       t1+t2
+            temp2_8x16b = _mm_slli_si128(temp1_8x16b, 2);                   //l3+l3  l3+l3       l3+l2...       t0+t1
+            temp2_8x16b = _mm_shufflelo_epi16(temp2_8x16b, 0xe5);
+            temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b);          //4*l3   l3+2*l3+l2  l3+2*l2+l1...  t0+2*t1+t2
+
+            temp1_8x16b = _mm_add_epi16(const_2_8x16b, temp1_8x16b);        //4*l3+2 3*l3+l2+2   l3+2*l2+l1+2.. t0+2*t1+t2+2
+            temp1_8x16b = _mm_srli_epi16(temp1_8x16b, 2);
+
+            temp1_16x8b = _mm_srli_si128(left_top_16x8b, 1);
+            w11_16x8b = _mm_avg_epu8(left_top_16x8b, temp1_16x8b);
+
+            temp2_16x8b = _mm_srli_si128(left_top_16x8b, 6);
+            temp2_8x16b = _mm_unpacklo_epi8(temp2_16x8b, zero_vector);      //t1 t2 t3 t4 t5 t6 t7 0
+            temp3_8x16b = _mm_srli_si128(temp2_8x16b, 2);                   //t2 t3 t4 t5 t6 t7 0  0
+            temp3_8x16b = _mm_shufflehi_epi16(temp3_8x16b, 0xd4);           //t2 t3 t4 t5 t6 t7 t7 0
+
+            temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b);          //t1+t2      t2+t3...     t6+t7      t7+t7 0
+            temp3_8x16b = _mm_srli_si128(temp2_8x16b, 2);                   //t2+t3      t3+t4...     t7+t7      0     0
+            temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b);          //t1+2*t2+t3 t2+2*t3+t4.. t6+2*t7+t7 t7+t7 0
+
+            temp2_8x16b = _mm_add_epi16(const_2_8x16b, temp2_8x16b);        //t1+2*t2+t3+2 t2+2*t3+t4+2 t3+2*t4+t5+2... t6+2*t7+t7+2 t7+t7+2  2
+            temp2_8x16b = _mm_srli_epi16(temp2_8x16b, 2);
+
+            w121_16x8b = _mm_packus_epi16(temp1_8x16b, temp2_8x16b);
+        }
+
+        if(u4_valid_intra_modes & 8)/* DIAG_DL */
+        {
+            shuffle_16x8b = _mm_setr_epi8( 7,  8,  9,  10,
+                                           8,  9,  10, 11,
+                                           9,  10, 11, 12,
+                                          10,  11, 12, 13);
+            pred3_16x8b = _mm_shuffle_epi8(w121_16x8b, shuffle_16x8b);
+            sad_8x16b = _mm_sad_epu8(src_16x8b, pred3_16x8b);
+
+            sad[DIAG_DL_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+            cost[DIAG_DL_I4x4] = sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda: lambda4);
+        }
+
+        if(u4_valid_intra_modes & 16)/* DIAG_DR */
+        {
+            shuffle_16x8b = _mm_setr_epi8(5, 6, 7, 8,
+                                          4, 5, 6, 7,
+                                          3, 4, 5, 6,
+                                          2, 3, 4, 5);
+            pred4_16x8b = _mm_shuffle_epi8(w121_16x8b, shuffle_16x8b);
+            sad_8x16b = _mm_sad_epu8(src_16x8b, pred4_16x8b);
+
+            sad[DIAG_DR_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+            cost[DIAG_DR_I4x4] = sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda: lambda4);
+        }
+
+        if(u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
+        {
+            temp1_16x8b = _mm_srli_si128(w121_16x8b, 1);
+            temp1_16x8b = _mm_unpacklo_epi64(temp1_16x8b, w11_16x8b);
+            shuffle_16x8b = _mm_setr_epi8(12, 13, 14, 15,
+                                           4,  5,  6,  7,
+                                           3, 12, 13, 14,
+                                           2,  4,  5,  6);
+            pred5_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+            sad_8x16b = _mm_sad_epu8(src_16x8b, pred5_16x8b);
+
+            sad[VERT_R_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+            cost[VERT_R_I4x4] = sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda: lambda4);
+        }
+
+        if(u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
+        {
+            temp1_16x8b = _mm_unpacklo_epi64(w121_16x8b, w11_16x8b);
+            shuffle_16x8b = _mm_setr_epi8(11, 5,  6, 7,
+                                          10, 4, 11, 5,
+                                           9, 3, 10, 4,
+                                           8, 2,  9, 3);
+            pred6_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+            sad_8x16b = _mm_sad_epu8(src_16x8b, pred6_16x8b);
+
+            sad[HORZ_D_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+            cost[HORZ_D_I4x4] = sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda: lambda4);
+        }
+
+        if(u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
+        {
+            temp1_16x8b = _mm_srli_si128(w121_16x8b, 5);
+            temp2_16x8b = _mm_srli_si128(w11_16x8b, 5);
+            temp1_16x8b = _mm_unpacklo_epi64(temp1_16x8b, temp2_16x8b);
+            shuffle_16x8b = _mm_setr_epi8(8,  9, 10, 11,
+                                          2,  3,  4,  5,
+                                          9, 10, 11, 12,
+                                          3,  4,  5,  6);
+            pred7_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+            sad_8x16b = _mm_sad_epu8(src_16x8b, pred7_16x8b);
+
+            sad[VERT_L_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+            cost[VERT_L_I4x4] = sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda: lambda4);
+        }
+
+        if(u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
+        {
+            temp1_16x8b = _mm_unpacklo_epi64(w121_16x8b, w11_16x8b);
+            shuffle_16x8b = _mm_setr_epi8(10, 3, 9, 2,
+                                           9, 2, 8, 1,
+                                           8, 1, 0, 0,
+                                           0, 0, 0, 0);
+            pred8_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+            sad_8x16b = _mm_sad_epu8(src_16x8b, pred8_16x8b);
+
+            sad[HORZ_U_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+            cost[HORZ_U_I4x4] = sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda: lambda4);
+        }
+
+        min_cost = MIN3(MIN3(cost[0], cost[1], cost[2]),
+                        MIN3(cost[3], cost[4], cost[5]),
+                        MIN3(cost[6], cost[7], cost[8]));
+    }
+    else
+    {  /*Only first three modes valid*/
+        min_cost = MIN3(cost[0], cost[1], cost[2]);
+    }
+
+    *pu4_sadmin = min_cost;
+
+    if(min_cost == cost[0])
+    {
+        *u4_intra_mode = VERT_I4x4;
+    }
+    else if(min_cost == cost[1])
+    {
+        *u4_intra_mode = HORZ_I4x4;
+        pred0_16x8b = pred1_16x8b;
+    }
+    else if(min_cost == cost[2])
+    {
+        *u4_intra_mode = DC_I4x4;
+        pred0_16x8b = pred2_16x8b;
+    }
+    else if(min_cost == cost[3])
+    {
+        *u4_intra_mode = DIAG_DL_I4x4;
+        pred0_16x8b = pred3_16x8b;
+    }
+    else if(min_cost == cost[4])
+    {
+        *u4_intra_mode = DIAG_DR_I4x4;
+        pred0_16x8b = pred4_16x8b;
+    }
+    else if(min_cost == cost[5])
+    {
+        *u4_intra_mode = VERT_R_I4x4;
+        pred0_16x8b = pred5_16x8b;
+    }
+    else if(min_cost == cost[6])
+    {
+        *u4_intra_mode = HORZ_D_I4x4;
+        pred0_16x8b = pred6_16x8b;
+    }
+    else if(min_cost == cost[7])
+    {
+        *u4_intra_mode = VERT_L_I4x4;
+        pred0_16x8b = pred7_16x8b;
+    }
+    else if(min_cost == cost[8])
+    {
+        *u4_intra_mode = HORZ_U_I4x4;
+        pred0_16x8b = pred8_16x8b;
+    }
+
+    mask_low_32b = _mm_set1_epi8(0xff);
+    mask_low_32b = _mm_srli_si128(mask_low_32b, 12);
+
+    _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)pu1_dst);
+    pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4);
+    _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+    pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4);
+    _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+    pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4);
+    _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+
+}
+
+/**
+******************************************************************************
+*
+* @brief
+*  Evaluate best intra chroma mode (among VERT, HORZ and DC) and do the prediction.
+*
+* @par Description
+*  This function evaluates first three intra chroma modes and compute corresponding sad
+*  and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+** @param[in] pu1_ngbr_pels
+*  UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_n_avblty
+*  availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+*  pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+*  pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+*  says what all modes are valid
+*
+* @return
+*  none
+*
+******************************************************************************
+*/
+
+void ih264e_evaluate_intra_chroma_modes_ssse3(UWORD8 *pu1_src,
+                                              UWORD8 *pu1_ngbr_pels,
+                                              UWORD8 *pu1_dst,
+                                              UWORD32 src_strd,
+                                              UWORD32 dst_strd,
+                                              WORD32 u4_n_avblty,
+                                              UWORD32 *u4_intra_mode,
+                                              WORD32 *pu4_sadmin,
+                                              UWORD32 u4_valid_intra_modes)
+{
+    WORD32 left, top;
+    WORD32 sad_vert = INT_MAX, sad_horz = INT_MAX, sad_dc = INT_MAX, min_sad;
+
+    __m128i src1_16x8b, src2_16x8b, src3_16x8b, src4_16x8b;
+    __m128i src5_16x8b, src6_16x8b, src7_16x8b, src8_16x8b;
+
+    __m128i top_16x8b, left_16x8b;
+    __m128i pred1_16x8b, pred2_16x8b;
+    __m128i tmp1_8x16b, tmp2_8x16b, sad_8x16b;
+
+    left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+    top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+    //Loading source
+    {
+        src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        pu1_src += src_strd;
+        src2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        pu1_src += src_strd;
+        src3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        pu1_src += src_strd;
+        src4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        pu1_src += src_strd;
+        src5_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        pu1_src += src_strd;
+        src6_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        pu1_src += src_strd;
+        src7_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+        pu1_src += src_strd;
+        src8_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+    }
+
+    if(left)
+    {
+        left_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels);
+
+        if(u4_valid_intra_modes & 02) //If HORZ mode is valid
+        {
+            __m128i left_tmp_16x8b, left_sh_16x8b;
+            __m128i const_14_15_16x8b;
+
+            const_14_15_16x8b = _mm_set1_epi16(0x0f0e);
+            left_sh_16x8b = _mm_slli_si128(left_16x8b, 2);
+
+            pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);    //row 1
+            pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 2
+            tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred2_16x8b);
+
+            left_tmp_16x8b = _mm_slli_si128(left_16x8b, 4);
+            left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+            sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+            pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 3
+            pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);  //row 4
+            tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred2_16x8b);
+
+            left_tmp_16x8b = _mm_slli_si128(left_tmp_16x8b, 4);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 5
+            pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);  //row 6
+            tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b);
+
+            left_tmp_16x8b = _mm_slli_si128(left_tmp_16x8b, 4);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 7
+            pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);  //row 8
+            tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b);
+
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            sad_horz = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+        }
+    }
+
+    if(top)
+    {
+        UWORD8 *pu1_top;
+
+        pu1_top = pu1_ngbr_pels + 2 * BLK8x8SIZE + 2;
+        top_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
+
+        if(u4_valid_intra_modes & 04) //If VERT mode is valid
+        {
+            tmp1_8x16b = _mm_sad_epu8(src1_16x8b, top_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src2_16x8b, top_16x8b);
+            sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src3_16x8b, top_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src4_16x8b, top_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src5_16x8b, top_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src6_16x8b, top_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src7_16x8b, top_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src8_16x8b, top_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            sad_vert = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+        }
+    }
+
+    if(u4_valid_intra_modes & 01) //If DC mode is valid
+    {
+        if(left && top)
+        {
+            WORD32 left_up_u, left_down_u, left_up_v, left_down_v;
+            WORD32 top_left_u, top_right_u, top_left_v, top_right_v;
+            WORD32 dc_1u, dc_1v, dc_2u, dc_2v;
+
+            __m128i val_sh_16x8b;
+            __m128i intrlv_mask_8x16b, zero_vector;
+
+            intrlv_mask_8x16b = _mm_set1_epi16(0x00ff);
+            zero_vector = _mm_setzero_si128();
+
+            val_sh_16x8b = _mm_srli_si128(left_16x8b, 1);
+
+            tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_16x8b);
+            tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, val_sh_16x8b);
+            tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+            tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+            left_up_u = _mm_extract_epi16(tmp1_8x16b, 4);
+            left_up_v = _mm_extract_epi16(tmp2_8x16b, 4);
+            left_down_u = _mm_extract_epi16(tmp1_8x16b, 0);
+            left_down_v = _mm_extract_epi16(tmp2_8x16b, 0);
+
+            val_sh_16x8b = _mm_srli_si128(top_16x8b, 1);
+
+            tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_16x8b);
+            tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, val_sh_16x8b);
+            tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+            tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+            top_left_u = _mm_extract_epi16(tmp1_8x16b, 0);
+            top_left_v = _mm_extract_epi16(tmp2_8x16b, 0);
+            top_right_u = _mm_extract_epi16(tmp1_8x16b, 4);
+            top_right_v = _mm_extract_epi16(tmp2_8x16b, 4);
+
+            // First four rows
+            dc_1u = (left_up_u + top_left_u + 4) >> 3;
+            dc_1v = (left_up_v + top_left_v + 4) >> 3;
+            dc_2u = (top_right_u + 2) >> 2;
+            dc_2v = (top_right_v + 2) >> 2;
+
+            pred1_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v,
+                                        dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v);
+
+            tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            // Second four rows
+            dc_1u = (left_down_u + 2) >> 2;
+            dc_1v = (left_down_v + 2) >> 2;
+            dc_2u = (left_down_u + top_right_u + 4) >> 3;
+            dc_2v = (left_down_v + top_right_v + 4) >> 3;
+
+            pred2_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v,
+                                        dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v);
+
+            tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred2_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred2_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+        }
+        else if(left)
+        {
+            WORD32 left_up_u, left_down_u, left_up_v, left_down_v;
+            WORD32 dc_u, dc_v;
+
+            __m128i left_sh_16x8b;
+            __m128i intrlv_mask_8x16b, zero_vector;
+
+            intrlv_mask_8x16b = _mm_set1_epi16(0x00ff);
+            zero_vector = _mm_setzero_si128();
+
+            left_sh_16x8b = _mm_srli_si128(left_16x8b, 1);
+
+            tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_16x8b);
+            tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_sh_16x8b);
+            tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+            tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+            left_up_u = _mm_extract_epi16(tmp1_8x16b, 4);
+            left_up_v = _mm_extract_epi16(tmp2_8x16b, 4);
+            left_down_u = _mm_extract_epi16(tmp1_8x16b, 0);
+            left_down_v = _mm_extract_epi16(tmp2_8x16b, 0);
+
+            // First four rows
+            dc_u = (left_up_u + 2) >> 2;
+            dc_v = (left_up_v + 2) >> 2;
+
+            pred1_16x8b = _mm_set1_epi16(dc_u | (dc_v << 8));
+
+            tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            // Second four rows
+            dc_u = (left_down_u + 2) >> 2;
+            dc_v = (left_down_v + 2) >> 2;
+
+            pred2_16x8b = _mm_set1_epi16(dc_u | (dc_v << 8));
+
+            tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred2_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred2_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+        }
+        else if(top)
+        {
+            WORD32 top_left_u, top_right_u, top_left_v, top_right_v;
+            WORD32 dc_1u, dc_1v, dc_2u, dc_2v;
+
+            __m128i top_sh_16x8b;
+            __m128i intrlv_mask_8x16b, zero_vector;
+
+            intrlv_mask_8x16b = _mm_set1_epi16(0x00ff);
+            zero_vector = _mm_setzero_si128();
+
+            top_sh_16x8b = _mm_srli_si128(top_16x8b, 1);
+
+            tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_16x8b);
+            tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_sh_16x8b);
+            tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+            tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+            top_left_u = _mm_extract_epi16(tmp1_8x16b, 0);
+            top_left_v = _mm_extract_epi16(tmp2_8x16b, 0);
+            top_right_u = _mm_extract_epi16(tmp1_8x16b, 4);
+            top_right_v = _mm_extract_epi16(tmp2_8x16b, 4);
+
+            dc_1u = (top_left_u + 2) >> 2;
+            dc_1v = (top_left_v + 2) >> 2;
+            dc_2u = (top_right_u + 2) >> 2;
+            dc_2v = (top_right_v + 2) >> 2;
+
+            pred1_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v,
+                                       dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v);
+
+            tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+        }
+        else
+        {
+            pred1_16x8b = _mm_set1_epi8(128);
+
+            tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b);
+            tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred1_16x8b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+            sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+            sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+        }
+    }
+
+    min_sad = MIN3(sad_horz, sad_vert, sad_dc);
+
+    /* Finding minimum SAD and doing corresponding prediction*/
+    if(min_sad < *pu4_sadmin)
+    {
+        *pu4_sadmin = min_sad;
+
+        if(min_sad == sad_dc)
+        {
+            *u4_intra_mode = DC_CH_I8x8;
+
+            if(!left)
+                pred2_16x8b = pred1_16x8b;
+
+            _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+            pu1_dst += dst_strd;
+
+            _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+        }
+        else if(min_sad == sad_horz)
+        {
+            __m128i left_sh_16x8b, const_14_15_16x8b;
+
+            *u4_intra_mode = HORZ_CH_I8x8;
+
+            const_14_15_16x8b = _mm_set1_epi16(0x0f0e);
+
+            left_sh_16x8b = _mm_slli_si128(left_16x8b, 2);
+            pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);    //row 1
+            pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 2
+
+            _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+
+            left_16x8b = _mm_slli_si128(left_16x8b, 4);
+            left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+            pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);    //row 3
+            pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 4
+
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+
+            left_16x8b = _mm_slli_si128(left_16x8b, 4);
+            left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+            pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);    //row 5
+            pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 6
+
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+
+            left_16x8b = _mm_slli_si128(left_16x8b, 4);
+            left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+            pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);    //row 7
+            pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 8
+
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+        }
+        else
+        {
+            *u4_intra_mode = VERT_CH_I8x8;
+
+            _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+            pu1_dst += dst_strd;
+            _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+        }
+    }
+}
diff --git a/encoder/x86/ih264e_platform_macros.h b/encoder/x86/ih264e_platform_macros.h
new file mode 100755
index 0000000..b4dfadd
--- /dev/null
+++ b/encoder/x86/ih264e_platform_macros.h
@@ -0,0 +1,154 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ *  ih264e_platform_macros.h
+ *
+ * @brief
+ *  Contains platform specific routines used for codec context intialization
+ *
+ * @author
+ *  ittiam
+ *
+ * @remarks
+ *  none
+ *
+ *******************************************************************************
+ */
+
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations                                              */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_ssse3(codec_t *ps_codec);
+void ih264e_init_function_ptr_sse42(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+*  Codec context pointer
+*
+* @returns  none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns  IV_ARCH_T
+*  architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+/**
+*******************************************************************************
+*
+* @brief Data Memory Barrier, Data Synchronization Barrier
+*
+*
+* @par Description: These functions do nothing on x86 side. But on arm platforms,
+*
+* Data Memory Barrier acts as a memory barrier. It ensures that all explicit
+* memory accesses that appear in program order before the DMB instruction are
+* observed before any explicit memory accesses that appear in program order
+* after the DMB instruction. It does not affect the ordering of any other
+* instructions executing on the processor
+*
+* Data Synchronization Barrier acts as a special kind of memory barrier. No
+* instruction in program order after this instruction executes until this instruction
+* completes. This instruction completes when:
+*       1. All explicit memory accesses before this instruction complete.
+*       2. All Cache, Branch predictor and TLB maintenance operations before
+*       this instruction complete.
+*
+* @param[in] void
+*
+* @returns  void
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/x86/ime_distortion_metrics_sse42.c b/encoder/x86/ime_distortion_metrics_sse42.c
new file mode 100755
index 0000000..0876788
--- /dev/null
+++ b/encoder/x86/ime_distortion_metrics_sse42.c
@@ -0,0 +1,1940 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ime_distortion_metrics_sse42.c
+*
+* @brief
+*  This file contains definitions of routines that compute distortion
+*  between two macro/sub blocks of identical dimensions
+*
+* @author
+*  Ittiam
+*
+* @par List of Functions:
+*  - ime_compute_sad_16x16_sse42()
+*  - ime_compute_sad_16x16_fast_sse42()
+*  - ime_compute_sad_16x16_ea8_sse42()
+*  - ime_compute_sad_16x8_sse42()
+*  - ime_calculate_sad4_prog_sse42()
+*  - ime_sub_pel_compute_sad_16x16_sse42()
+*  - ime_compute_satqd_16x16_lumainter_sse42()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "ime_typedefs.h"
+#include "ime_defs.h"
+#include "ime_macros.h"
+#include "ime_statistics.h"
+#include "ime_platform_macros.h"
+#include "ime_distortion_metrics.h"
+#include <immintrin.h>
+
+/*****************************************************************************/
+/* Function Definitions                                                      */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par   Description
+*   This functions computes SAD between 2 16x16 blocks. There is a provision
+*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] i4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_sse42(UWORD8 *pu1_src,
+                           UWORD8 *pu1_est,
+                           WORD32 src_strd,
+                           WORD32 est_strd,
+                           WORD32 i4_max_sad,
+                           WORD32 *pi4_mb_distortion)
+{
+    __m128i src_r0, src_r1, src_r2, src_r3;
+    __m128i est_r0, est_r1, est_r2, est_r3;
+    __m128i res_r0, res_r1, res_r2, res_r3;
+    __m128i sad_val;
+    int val1, val2;
+
+    // Row 0-3 sad calculation
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(res_r0, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    // Row 4-7 sad calculation
+    pu1_src += 4*src_strd;
+    pu1_est += 4*est_strd;
+
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(sad_val, res_r0);
+    sad_val = _mm_add_epi64(sad_val, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    // Row 8-11 sad calculation
+    pu1_src += 4*src_strd;
+    pu1_est += 4*est_strd;
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(sad_val, res_r0);
+    sad_val = _mm_add_epi64(sad_val, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    // Row 12-15 sad calculation
+    pu1_src += 4*src_strd;
+    pu1_est += 4*est_strd;
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(sad_val, res_r0);
+    sad_val = _mm_add_epi64(sad_val, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    val1 = _mm_extract_epi32(sad_val,0);
+    val2 = _mm_extract_epi32(sad_val, 2);
+    *pi4_mb_distortion = (val1+val2);
+
+    return;
+}
+
+/**
+******************************************************************************
+*
+*  @brief computes distortion (SAD) between 2 16x8  blocks
+*
+*
+*  @par   Description
+*   This functions computes SAD between 2 16x8 blocks. There is a provision
+*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] u4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x8_sse42(UWORD8 *pu1_src,
+                    UWORD8 *pu1_est,
+                    WORD32 src_strd,
+                    WORD32 est_strd,
+                    WORD32 i4_max_sad,
+                    WORD32 *pi4_mb_distortion)
+{
+    __m128i src_r0, src_r1, src_r2, src_r3;
+    __m128i est_r0, est_r1, est_r2, est_r3;
+    __m128i res_r0, res_r1, res_r2, res_r3;
+    __m128i sad_val;
+    int val1, val2;
+
+    // Row 0-3 sad calculation
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(res_r0, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    // Row 4-7 sad calculation
+    pu1_src += 4*src_strd;
+    pu1_est += 4*est_strd;
+
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(sad_val, res_r0);
+    sad_val = _mm_add_epi64(sad_val, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    val1 = _mm_extract_epi32(sad_val,0);
+    val2 = _mm_extract_epi32(sad_val, 2);
+    *pi4_mb_distortion = (val1+val2);
+    return;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par   Description
+*   This functions computes SAD between 2 16x16 blocks. There is a provision
+*   for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+*   compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] i4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_ea8_sse42(UWORD8 *pu1_src,
+                               UWORD8 *pu1_est,
+                               WORD32 src_strd,
+                               WORD32 est_strd,
+                               WORD32 i4_max_sad,
+                               WORD32 *pi4_mb_distortion)
+{
+    __m128i src_r0, src_r1, src_r2, src_r3;
+    __m128i est_r0, est_r1, est_r2, est_r3;
+    __m128i res_r0, res_r1, res_r2, res_r3;
+    __m128i sad_val;
+    WORD32 val1, val2;
+    WORD32 i4_sad;
+    UWORD8 *pu1_src_temp = pu1_src + src_strd;
+    UWORD8 *pu1_est_temp = pu1_est + est_strd;
+
+    // Row 0,2,4,6 sad calculation
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(res_r0, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    // Row 8,10,12,14 sad calculation
+    pu1_src += 8*src_strd;
+    pu1_est += 8*est_strd;
+
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(sad_val, res_r0);
+    sad_val = _mm_add_epi64(sad_val, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    pu1_src = pu1_src_temp;
+    pu1_est = pu1_est_temp;
+
+    val1 = _mm_extract_epi32(sad_val, 0);
+    val2 = _mm_extract_epi32(sad_val, 2);
+
+    i4_sad = val1 + val2;
+    if (i4_max_sad < i4_sad)
+    {
+        *pi4_mb_distortion = i4_sad;
+        return ;
+    }
+    // Row 1,3,5,7 sad calculation
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(sad_val, res_r0);
+    sad_val = _mm_add_epi64(sad_val, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    // Row 9,11,13,15 sad calculation
+    pu1_src += 8*src_strd;
+    pu1_est += 8*est_strd;
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(sad_val, res_r0);
+    sad_val = _mm_add_epi64(sad_val, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    val1 = _mm_extract_epi32(sad_val, 0);
+    val2 = _mm_extract_epi32(sad_val, 2);
+    *pi4_mb_distortion = (val1+val2);
+
+    return;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+*
+* @par   Description
+*   This functions computes SAD between 2 16x16 blocks by processing alternate
+*   rows (fast mode). For fast mode it is assumed sad obtained by processing
+*   alternate rows is approximately twice as that for the whole block.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] i4_max_sad
+*  integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_fast_sse42(UWORD8 *pu1_src,
+                                UWORD8 *pu1_est,
+                                WORD32 src_strd,
+                                WORD32 est_strd,
+                                WORD32 i4_max_sad,
+                                WORD32 *pi4_mb_distortion)
+{
+    __m128i src_r0, src_r1, src_r2, src_r3;
+    __m128i est_r0, est_r1, est_r2, est_r3;
+    __m128i res_r0, res_r1, res_r2, res_r3;
+    __m128i sad_val;
+    WORD32 val1, val2;
+    WORD32 i4_sad;
+    UWORD8 *pu1_src_temp = pu1_src + src_strd;
+    UWORD8 *pu1_est_temp = pu1_est + est_strd;
+
+    // Row 0,2,4,6 sad calculation
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2 * src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4 * src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6 * src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2 * est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4 * est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6 * est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(res_r0, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    // Row 8,10,12,14 sad calculation
+    pu1_src += 8 * src_strd;
+    pu1_est += 8 * est_strd;
+
+    src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+    src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2 * src_strd));
+    src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4 * src_strd));
+    src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6 * src_strd));
+
+    est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+    est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2 * est_strd));
+    est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4 * est_strd));
+    est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6 * est_strd));
+
+    res_r0 = _mm_sad_epu8(src_r0, est_r0);
+    res_r1 = _mm_sad_epu8(src_r1, est_r1);
+    res_r2 = _mm_sad_epu8(src_r2, est_r2);
+    res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+    sad_val = _mm_add_epi64(sad_val, res_r0);
+    sad_val = _mm_add_epi64(sad_val, res_r1);
+    sad_val = _mm_add_epi64(sad_val, res_r2);
+    sad_val = _mm_add_epi64(sad_val, res_r3);
+
+    pu1_src = pu1_src_temp;
+    pu1_est = pu1_est_temp;
+
+    val1 = _mm_extract_epi32(sad_val, 0);
+    val2 = _mm_extract_epi32(sad_val, 2);
+
+    i4_sad = val1 + val2;
+    *pi4_mb_distortion = (i4_sad<<1);
+    return;
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref
+*  UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+*  integer reference stride
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[out] pi4_sad
+*  pointer to integer array evaluated sad
+*
+* @returns  sad at all evaluated vertexes
+*
+* @remarks  none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad4_prog_sse42(UWORD8 *pu1_ref,
+                             UWORD8 *pu1_src,
+                             WORD32 ref_strd,
+                             WORD32 src_strd,
+                             WORD32 *pi4_sad)
+{
+    /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
+    UWORD8 *left_ptr    = pu1_ref - 1;
+    UWORD8 *right_ptr   = pu1_ref + 1;
+    UWORD8 *top_ptr     = pu1_ref - ref_strd;
+    UWORD8 *bot_ptr     = pu1_ref + ref_strd;
+
+    WORD32 val1, val2;
+    __m128i src, ref_left, ref_right, ref_top, ref_bot;
+    __m128i res_r0, res_r1, res_r2, res_r3;
+    __m128i sad_r0, sad_r1, sad_r2, sad_r3;
+
+    // Row 0 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    sad_r0 = _mm_sad_epu8(src, ref_left);
+    sad_r1 = _mm_sad_epu8(src, ref_right);
+    sad_r2 = _mm_sad_epu8(src, ref_top);
+    sad_r3 = _mm_sad_epu8(src, ref_bot);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 1 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 2 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 3 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 4 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 5 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 6 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 7 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 8 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 9 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 10 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 11 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 12 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 13 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 14 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    pu1_src += src_strd;
+    left_ptr += ref_strd;
+    right_ptr += ref_strd;
+    top_ptr += ref_strd;
+    bot_ptr += ref_strd;
+
+    // Row 15 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+    ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+    ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+    ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+    res_r0 = _mm_sad_epu8(src, ref_left);
+    res_r1 = _mm_sad_epu8(src, ref_right);
+    res_r2 = _mm_sad_epu8(src, ref_top);
+    res_r3 = _mm_sad_epu8(src, ref_bot);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+    val1 = _mm_extract_epi32(sad_r0, 0);
+    val2 = _mm_extract_epi32(sad_r0, 2);
+    pi4_sad[0] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r1, 0);
+    val2 = _mm_extract_epi32(sad_r1, 2);
+    pi4_sad[1] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r2, 0);
+    val2 = _mm_extract_epi32(sad_r2, 2);
+    pi4_sad[2] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r3, 0);
+    val2 = _mm_extract_epi32(sad_r3, 2);
+    pi4_sad[3] = (val1 + val2);
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) at all subpel points about the src location
+*
+* @par Description
+*   This functions computes SAD at all points at a subpel distance from the
+*   current source location.
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_ref_half_x
+*  UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_y
+*  UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_xy
+*  UWORD8 pointer to half pel buffer
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] ref_strd
+*  integer ref stride
+*
+* @param[out] pi4_sad
+*  integer evaluated sad
+*  pi4_sad[0] - half x
+*  pi4_sad[1] - half x - 1
+*  pi4_sad[2] - half y
+*  pi4_sad[3] - half y - 1
+*  pi4_sad[4] - half xy
+*  pi4_sad[5] - half xy - 1
+*  pi4_sad[6] - half xy - strd
+*  pi4_sad[7] - half xy - 1 - strd
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_sub_pel_compute_sad_16x16_sse42(UWORD8 *pu1_src,
+                                   UWORD8 *pu1_ref_half_x,
+                                   UWORD8 *pu1_ref_half_y,
+                                   UWORD8 *pu1_ref_half_xy,
+                                   WORD32 src_strd,
+                                   WORD32 ref_strd,
+                                   WORD32 *pi4_sad)
+{
+    UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
+    UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
+    UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
+    UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
+    UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
+    WORD32 val1, val2;
+
+    __m128i src, ref_half_x, ref_half_y, ref_half_xy;
+    __m128i ref_half_x_left, ref_half_y_top, ref_half_xy_left, ref_half_xy_top, ref_half_xy_top_left;
+    __m128i res_r0, res_r1, res_r2, res_r3, res_r4, res_r5, res_r6, res_r7;
+    __m128i sad_r0, sad_r1, sad_r2, sad_r3, sad_r4, sad_r5, sad_r6, sad_r7;
+    // Row 0 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    sad_r0 = _mm_sad_epu8(src, ref_half_x);
+    sad_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    sad_r2 = _mm_sad_epu8(src, ref_half_y);
+    sad_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    sad_r4 = _mm_sad_epu8(src, ref_half_xy);
+    sad_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    sad_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    sad_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 1 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 2 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 3 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 4 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+
+    // Row 5 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 6 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 7 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 8 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 9 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 10 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 11 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 12 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 13 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 14 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    pu1_src += src_strd;
+    pu1_ref_half_x += ref_strd;
+    pu1_ref_half_x_left += ref_strd;
+    pu1_ref_half_y += ref_strd;
+    pu1_ref_half_y_top += ref_strd;
+    pu1_ref_half_xy += ref_strd;
+    pu1_ref_half_xy_left += ref_strd;
+    pu1_ref_half_xy_top += ref_strd;
+    pu1_ref_half_xy_top_left += ref_strd;
+
+    // Row 15 sad calculation
+    src = _mm_loadu_si128((__m128i *) (pu1_src));
+    ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+    ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+    ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+    ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+    ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+    ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+    ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+    ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+    res_r0 = _mm_sad_epu8(src, ref_half_x);
+    res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+    res_r2 = _mm_sad_epu8(src, ref_half_y);
+    res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+    res_r4 = _mm_sad_epu8(src, ref_half_xy);
+    res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+    res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+    res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+    sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+    sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+    sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+    sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+    sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+    sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+    sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+    sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+    val1 = _mm_extract_epi32(sad_r0, 0);
+    val2 = _mm_extract_epi32(sad_r0, 2);
+    pi4_sad[0] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r1, 0);
+    val2 = _mm_extract_epi32(sad_r1, 2);
+    pi4_sad[1] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r2, 0);
+    val2 = _mm_extract_epi32(sad_r2, 2);
+    pi4_sad[2] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r3, 0);
+    val2 = _mm_extract_epi32(sad_r3, 2);
+    pi4_sad[3] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r4, 0);
+    val2 = _mm_extract_epi32(sad_r4, 2);
+    pi4_sad[4] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r5, 0);
+    val2 = _mm_extract_epi32(sad_r5, 2);
+    pi4_sad[5] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r6, 0);
+    val2 = _mm_extract_epi32(sad_r6, 2);
+    pi4_sad[6] = (val1 + val2);
+
+    val1 = _mm_extract_epi32(sad_r7, 0);
+    val2 = _mm_extract_epi32(sad_r7, 2);
+    pi4_sad[7] = (val1 + val2);
+
+    return;
+}
+/*
+*
+* @brief This function computes SAD between two 16x16 blocks
+*        It also computes if the block will be zero after H264 transform and quant for
+*        Intra 16x16 blocks
+*
+* @param[in] pu1_src
+*  UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+*  UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+*  integer source stride
+*
+* @param[in] dst_strd
+*  integer destination stride
+*
+* @param[in] pu2_thrsh
+*  Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+*  integer evaluated sad
+*
+* @param[out] pu4_is_zero
+*  Poitner to store if the block is zero after transform and quantization
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_satqd_16x16_lumainter_sse42(UWORD8 *pu1_src,
+                                         UWORD8 *pu1_est,
+                                         WORD32 src_strd,
+                                         WORD32 est_strd,
+                                         UWORD16 *pu2_thrsh,
+                                         WORD32 *pi4_mb_distortion,
+                                         UWORD32 *pu4_is_zero)
+{
+    __m128i src_r0, src_r1, src_r2, src_r3;
+    __m128i est_r0, est_r1, est_r2, est_r3;
+    __m128i temp0, temp1, temp2, temp3, temp4;
+    __m128i zero = _mm_setzero_si128();          // all bits reset to zero
+    __m128i all_one = _mm_set1_epi8(0xFF);
+    __m128i sad_b1, sad_b2, threshold;
+    WORD16 sad_1, sad_2;
+    WORD32 i;
+    UWORD32 flag = 0;
+    WORD32 test1, test2;
+    threshold = _mm_loadu_si128((__m128i *) pu2_thrsh);
+    (*pi4_mb_distortion) = 0;
+
+    for (i=0; i<4; i++)
+    {
+        src_r0 = _mm_loadl_epi64((__m128i *) pu1_src);  //Row 0 - Block1 and 2
+        src_r1 = _mm_loadl_epi64((__m128i *) (pu1_src + src_strd)); //Row 1 - Block1 and 2
+        src_r2 = _mm_loadl_epi64((__m128i *) (pu1_src + 2 * src_strd)); //Row 2 - Block1 and 2
+        src_r3 = _mm_loadl_epi64((__m128i *) (pu1_src + 3 * src_strd)); //Row 3 - Block1 and 2
+
+        src_r0 = _mm_cvtepu8_epi16(src_r0);
+        src_r1 = _mm_cvtepu8_epi16(src_r1);
+        src_r2 = _mm_cvtepu8_epi16(src_r2);
+        src_r3 = _mm_cvtepu8_epi16(src_r3);
+
+        est_r0 = _mm_loadl_epi64((__m128i *) pu1_est);
+        est_r1 = _mm_loadl_epi64((__m128i *) (pu1_est + est_strd));
+        est_r2 = _mm_loadl_epi64((__m128i *) (pu1_est + 2 * est_strd));
+        est_r3 = _mm_loadl_epi64((__m128i *) (pu1_est + 3 * est_strd));
+
+        est_r0 = _mm_cvtepu8_epi16(est_r0);
+        est_r1 = _mm_cvtepu8_epi16(est_r1);
+        est_r2 = _mm_cvtepu8_epi16(est_r2);
+        est_r3 = _mm_cvtepu8_epi16(est_r3);
+
+        src_r0 = _mm_sub_epi16(src_r0, est_r0);
+        src_r1 = _mm_sub_epi16(src_r1, est_r1);
+        src_r2 = _mm_sub_epi16(src_r2, est_r2);
+        src_r3 = _mm_sub_epi16(src_r3, est_r3);
+
+        src_r0 = _mm_abs_epi16(src_r0);
+        src_r1 = _mm_abs_epi16(src_r1);
+        src_r2 = _mm_abs_epi16(src_r2);
+        src_r3 = _mm_abs_epi16(src_r3);
+
+        src_r0 = _mm_add_epi16(src_r0, src_r3);     //s1 s4 s4 s1 a1 a4 a4 a1
+        src_r1 = _mm_add_epi16(src_r1, src_r2);     //s2 s3 s3 s2 a2 a3 a3 a2
+
+        //SAD calculation
+        temp0 = _mm_add_epi16(src_r0, src_r1);      //s1+s2 s4+s3 s4+s3 s1+s2 a1+a2 a4+a3 a4+a3 a1+a2
+        temp0 = _mm_hadd_epi16(temp0, zero);
+        temp0 = _mm_hadd_epi16(temp0, zero);        //sad1, sad2 - 16bit values
+
+        sad_1 = _mm_extract_epi16(temp0, 0);
+        sad_2 = _mm_extract_epi16(temp0, 1);
+
+        (*pi4_mb_distortion) += sad_1 + sad_2;
+
+        if (flag == 0) {
+            sad_b1 = _mm_set1_epi16((sad_1 << 1));
+            sad_b2 = _mm_set1_epi16((sad_2 << 1));
+
+            src_r0 = _mm_shufflelo_epi16(src_r0, 0x9c); //Block 0 s1 s1 s4 s4 a1 a4 a4 a1
+            src_r0 = _mm_shufflehi_epi16(src_r0, 0x9c); //Block 1 s1 s1 s4 s4 a1 a1 a4 a4
+
+            src_r1 = _mm_shufflelo_epi16(src_r1, 0x9c); //Block 0 s2 s2 s3 s3 a2 a3 a3 a2
+            src_r1 = _mm_shufflehi_epi16(src_r1, 0x9c); //Block 1 s2 s2 s3 s3 a2 a2 a3 a3
+
+            src_r0 = _mm_hadd_epi16(src_r0, zero);      //s1 s4 a1 a4 0 0 0 0
+            src_r1 = _mm_hadd_epi16(src_r1, zero);      //s2 s3 a2 a3 0 0 0 0
+
+            temp0 = _mm_slli_epi16(src_r0, 1);//s1<<1 s4<<1 a1<<1 a4<<1 0 0 0 0
+            temp1 = _mm_slli_epi16(src_r1, 1);//s2<<1 s3<<1 a2<<1 a3<<1 0 0 0 0
+
+            temp0 = _mm_shufflelo_epi16(temp0, 0xb1);//s4<<1 s1<<1 a4<<1 a1<<1 0 0 0 0
+            temp1 = _mm_shufflelo_epi16(temp1, 0xb1);//s3<<1 s2<<1 a3<<1 a2<<1 0 0 0 0
+
+            temp2 = _mm_sub_epi16(src_r0, temp1);//(s1-s3<<1) (s4-s2<<1) (a1-a3<<1) (a4-a2<<1) 0 0 0 0
+            temp3 = _mm_sub_epi16(src_r1, temp0);//(s2-s4<<1) (s3-s1<<1) (a2-a4<<1) (a3-a1<<1) 0 0 0 0
+
+            temp4 = _mm_add_epi16(src_r0, src_r1);//s1+s2 s4+s3 a1+a2 a4+a3 0 0 0 0
+
+            temp0 = _mm_hadd_epi16(src_r0, zero);   //s1+s4 a1+a4 0 0 0 0 0 0
+            temp1 = _mm_hadd_epi16(src_r1, zero);   //s2+s3 a2+a3 0 0 0 0 0 0
+
+            temp0 = _mm_unpacklo_epi16(temp0, temp1);//s1+s4 s2+s3 a1+a4 a2+a3 0 0 0 0
+
+            temp0 = _mm_unpacklo_epi32(temp0, temp2);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1)
+            temp1 = _mm_unpacklo_epi32(temp4, temp3);//s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) a1+a2 a4+a3 (a2-a4<<1) (a3-a1<<1)
+
+            temp2 = _mm_unpacklo_epi64(temp0, temp1);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1)
+            temp3 = _mm_unpackhi_epi64(temp0, temp1); //a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) a1+a2 a4+a3 (s2-s4<<1) (s3-s1<<1)
+
+            sad_b1 = _mm_sub_epi16(sad_b1, temp2);      //lsi values Block0
+            sad_b2 = _mm_sub_epi16(sad_b2, temp3);      //lsi values Block1
+
+            temp0 = _mm_cmpgt_epi16(threshold, sad_b1); //if any threshold[i]>ls[i], corresponding 16-bit value in temp becomes 0xffff
+
+            temp1 = _mm_cmpgt_epi16(threshold, sad_b2);
+
+            temp0 = _mm_xor_si128(temp0, all_one);      //Xor with 1 => NOT operation
+            temp1 = _mm_xor_si128(temp1, all_one);
+
+            test1 = _mm_test_all_zeros(temp0, all_one);
+            test2 = _mm_test_all_zeros(temp1, all_one);
+
+            if (test1 == 0 || test2 == 0 || pu2_thrsh[8] <= sad_1
+                    || pu2_thrsh[8] <= sad_2)
+                flag = 1;
+        }
+
+        pu1_src += 8;
+        pu1_est += 8;
+
+        src_r0 = _mm_loadl_epi64((__m128i *) pu1_src);  //Row 0 - Block1 and 2
+        src_r1 = _mm_loadl_epi64((__m128i *) (pu1_src + src_strd)); //Row 1 - Block1 and 2
+        src_r2 = _mm_loadl_epi64((__m128i *) (pu1_src + 2 * src_strd)); //Row 2 - Block1 and 2
+        src_r3 = _mm_loadl_epi64((__m128i *) (pu1_src + 3 * src_strd)); //Row 3 - Block1 and 2
+
+        src_r0 = _mm_cvtepu8_epi16(src_r0);
+        src_r1 = _mm_cvtepu8_epi16(src_r1);
+        src_r2 = _mm_cvtepu8_epi16(src_r2);
+        src_r3 = _mm_cvtepu8_epi16(src_r3);
+
+        est_r0 = _mm_loadl_epi64((__m128i *) pu1_est);
+        est_r1 = _mm_loadl_epi64((__m128i *) (pu1_est + est_strd));
+        est_r2 = _mm_loadl_epi64((__m128i *) (pu1_est + 2 * est_strd));
+        est_r3 = _mm_loadl_epi64((__m128i *) (pu1_est + 3 * est_strd));
+
+        est_r0 = _mm_cvtepu8_epi16(est_r0);
+        est_r1 = _mm_cvtepu8_epi16(est_r1);
+        est_r2 = _mm_cvtepu8_epi16(est_r2);
+        est_r3 = _mm_cvtepu8_epi16(est_r3);
+
+        src_r0 = _mm_sub_epi16(src_r0, est_r0);
+        src_r1 = _mm_sub_epi16(src_r1, est_r1);
+        src_r2 = _mm_sub_epi16(src_r2, est_r2);
+        src_r3 = _mm_sub_epi16(src_r3, est_r3);
+
+        src_r0 = _mm_abs_epi16(src_r0);
+        src_r1 = _mm_abs_epi16(src_r1);
+        src_r2 = _mm_abs_epi16(src_r2);
+        src_r3 = _mm_abs_epi16(src_r3);
+
+        src_r0 = _mm_add_epi16(src_r0, src_r3);     //s1 s4 s4 s1 a1 a4 a4 a1
+        src_r1 = _mm_add_epi16(src_r1, src_r2);     //s2 s3 s3 s2 a2 a3 a3 a2
+
+        //SAD calculation
+        temp0 = _mm_add_epi16(src_r0, src_r1);
+        temp0 = _mm_hadd_epi16(temp0, zero);
+        temp0 = _mm_hadd_epi16(temp0, zero);        //sad1, sad2 - 16bit values
+
+        sad_1 = _mm_extract_epi16(temp0, 0);
+        sad_2 = _mm_extract_epi16(temp0, 1);
+
+        (*pi4_mb_distortion) += sad_1 + sad_2;
+
+        if (flag == 0) {
+            sad_b1 = _mm_set1_epi16((sad_1 << 1));
+            sad_b2 = _mm_set1_epi16((sad_2 << 1));
+
+            src_r0 = _mm_shufflelo_epi16(src_r0, 0x9c); //Block 0 s1 s1 s4 s4 a1 a4 a4 a1
+            src_r0 = _mm_shufflehi_epi16(src_r0, 0x9c); //Block 1 s1 s1 s4 s4 a1 a1 a4 a4
+
+            src_r1 = _mm_shufflelo_epi16(src_r1, 0x9c); //Block 0 s2 s2 s3 s3 a2 a3 a3 a2
+            src_r1 = _mm_shufflehi_epi16(src_r1, 0x9c); //Block 1 s2 s2 s3 s3 a2 a2 a3 a3
+
+            src_r0 = _mm_hadd_epi16(src_r0, zero);      //s1 s4 a1 a4 0 0 0 0
+            src_r1 = _mm_hadd_epi16(src_r1, zero);      //s2 s3 a2 a3 0 0 0 0
+
+            temp0 = _mm_slli_epi16(src_r0, 1);//s1<<1 s4<<1 a1<<1 a4<<1 0 0 0 0
+            temp1 = _mm_slli_epi16(src_r1, 1);//s2<<1 s3<<1 a2<<1 a3<<1 0 0 0 0
+
+            temp0 = _mm_shufflelo_epi16(temp0, 0xb1);//s4<<1 s1<<1 a4<<1 a1<<1 0 0 0 0
+            temp1 = _mm_shufflelo_epi16(temp1, 0xb1);//s3<<1 s2<<1 a3<<1 a2<<1 0 0 0 0
+
+            temp2 = _mm_sub_epi16(src_r0, temp1);//(s1-s3<<1) (s4-s2<<1) (a1-a3<<1) (a4-a2<<1) 0 0 0 0
+            temp3 = _mm_sub_epi16(src_r1, temp0);//(s2-s4<<1) (s3-s1<<1) (a2-a4<<1) (a3-a1<<1) 0 0 0 0
+
+            temp4 = _mm_add_epi16(src_r0, src_r1);//s1+s2 s4+s3 a1+a2 a4+a3 0 0 0 0
+
+            temp0 = _mm_hadd_epi16(src_r0, zero);   //s1+s4 a1+a4 0 0 0 0 0 0
+            temp1 = _mm_hadd_epi16(src_r1, zero);   //s2+s3 a2+a3 0 0 0 0 0 0
+
+            temp0 = _mm_unpacklo_epi16(temp0, temp1);//s1+s4 s2+s3 a1+a4 a2+a3 0 0 0 0
+
+            temp0 = _mm_unpacklo_epi32(temp0, temp2);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1)
+            temp1 = _mm_unpacklo_epi32(temp4, temp3);//s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) a1+a2 a4+a3 (a2-a4<<1) (a3-a1<<1)
+
+            temp2 = _mm_unpacklo_epi64(temp0, temp1);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1)
+            temp3 = _mm_unpackhi_epi64(temp0, temp1); //a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) a1+a2 a4+a3 (s2-s4<<1) (s3-s1<<1)
+
+            sad_b1 = _mm_sub_epi16(sad_b1, temp2);      //lsi values Block0
+            sad_b2 = _mm_sub_epi16(sad_b2, temp3);      //lsi values Block1
+
+            temp0 = _mm_cmpgt_epi16(threshold, sad_b1); //if any threshold[i]>ls[i], corresponding 16-bit value in temp becomes 0xffff
+
+            temp1 = _mm_cmpgt_epi16(threshold, sad_b2);
+
+            temp0 = _mm_xor_si128(temp0, all_one);      //Xor with 1 => NOT operation
+            temp1 = _mm_xor_si128(temp1, all_one);
+
+            test1 = _mm_test_all_zeros(temp0, all_one);
+            test2 = _mm_test_all_zeros(temp1, all_one);
+
+            if (test1 == 0 || test2 == 0 || pu2_thrsh[8] <= sad_1
+                    || pu2_thrsh[8] <= sad_2)
+                flag = 1;
+        }
+
+        pu1_src += 4*src_strd - 8;
+        pu1_est += 4*est_strd - 8;
+    }
+
+        *pu4_is_zero = flag;
+}
diff --git a/encoder/x86/ime_platform_macros.h b/encoder/x86/ime_platform_macros.h
new file mode 100755
index 0000000..18e2e8f
--- /dev/null
+++ b/encoder/x86/ime_platform_macros.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ime_platform_macros.h
+*
+* @brief
+*  Platform specific Macro definitions used in the codec
+*
+* @author
+*  Ittiam
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions                                                */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+                sad +=  ABS(src[0]-est[0]) + \
+                ABS(src[1]-est[1]) + \
+                ABS(src[2]-est[2]) + \
+                ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
author	Hamsalekha S <hamsalekha.s@ittiam.com>	2015-03-13 21:24:58 +0530
committer	Hamsalekha S <hamsalekha.s@ittiam.com>	2015-04-02 15:59:02 +0530
commit	8d3d303c7942ced6a987a52db8977d768dc3605f (patch)
tree	cc806c96794356996b13ba9970941d0aed74a97e /encoder
parent	3956d913d37327dcb340f836e604b04bd478b158 (diff)
download	android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.gz android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.bz2 android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.zip