@/****************************************************************************** @ * @ * Copyright (C) 2015 The Android Open Source Project @ * @ * Licensed under the Apache License, Version 2.0 (the "License"); @ * you may not use this file except in compliance with the License. @ * You may obtain a copy of the License at: @ * @ * http://www.apache.org/licenses/LICENSE-2.0 @ * @ * Unless required by applicable law or agreed to in writing, software @ * distributed under the License is distributed on an "AS IS" BASIS, @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @ * See the License for the specific language governing permissions and @ * limitations under the License. @ * @ ***************************************************************************** @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore @*/ .data .p2align 2 scratch_intrapred_luma_4x4_prediction: .long ver, hor, d_c, dia_dl .long dia_dr, ver_r, hor_d, ver_l .long hor_u .text .p2align 2 scratch_intrapred_luma_4x4_prediction_addr1: .long scratch_intrapred_luma_4x4_prediction - scrintra_4x4 - 8 @/** @****************************************************************************** @* @* @brief :Evaluate best intra 4x4 mode @* and do the prediction. @* @* @par Description @* This function evaluates 4x4 modes and compute corresponding sad @* and return the buffer predicted with best mode. @* @* @param[in] pu1_src @* UWORD8 pointer to the source @* @** @param[in] pu1_ngbr_pels @* UWORD8 pointer to neighbouring pels @* @* @param[out] pu1_dst @* UWORD8 pointer to the destination @* @* @param[in] src_strd @* integer source stride @* @* @param[in] dst_strd @* integer destination stride @* @* @param[in] u4_n_avblty @* availability of neighbouring pixels @* @* @param[in] u4_intra_mode @* Pointer to the variable in which best mode is returned @* @* @param[in] pu4_sadmin @* Pointer to the variable in which minimum cost is returned @* @* @param[in] u4_valid_intra_modes @* Says what all modes are valid @* @* * @param[in] u4_lambda @* Lamda value for computing cost from SAD @* @* @param[in] u4_predictd_mode @* Predicted mode for cost computation @* @* @* @* @return none @* @****************************************************************************** @*/ @void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src, @ UWORD8 *pu1_ngbr_pels, @ UWORD8 *pu1_dst, @ UWORD32 src_strd, @ UWORD32 dst_strd, @ WORD32 u4_n_avblty, @ UWORD32 *u4_intra_mode, @ WORD32 *pu4_sadmin, @ UWORD32 u4_valid_intra_modes, @ UWORD32 u4_lambda, @ UWORD32 u4_predictd_mode) .global ih264e_evaluate_intra_4x4_modes_a9q ih264e_evaluate_intra_4x4_modes_a9q: @r0 = pu1_src, @r1 = pu1_ngbr_pels_i16, @r2 = pu1_dst, @r3 = src_strd, @r4 = dst_strd, @r5 = u4_n_avblty, @r6 = u4_intra_mode, @r7 = pu4_sadmin @r8 = u4_valid_intra_modes @r0 =u4_lambda @r1 = u4_predictd_mode stmfd sp!, {r4-r12, r14} @store register values to stack @-------------------- ldr r5, [sp, #44] @r5 = u4_n_avblty, @---------------------- vpush {d8-d15} @Loading neighbours vld1.32 {q0}, [r1] add r4, r1, #12 vld1.8 d1[5], [r4] vld1.8 d1[7], [r1] @-------------------------------- ldr r8, [sp, #120] @u4_valid_intra_modes @---------------------------------------------- @ LOADING pu1_src vld1.32 {d20[0]}, [r0], r3 vext.8 q1, q0, q0, #1 vld1.32 {d20[1]}, [r0], r3 mov r11, #1 vld1.32 {d21[0]}, [r0], r3 lsl r11, r11, #30 vld1.32 {d21[1]}, [r0], r3 @-------------------------------- ldr r0, [sp, #124] @r0 =u4_lambda ldr r1, [sp, #128] @r1 = u4_predictd_mode @------ vert: ands r10, r8, #01 @VERT sad ?? beq horz vdup.32 q2, d2[1] vabdl.u8 q14, d4, d20 vabal.u8 q14, d4, d21 vadd.i16 d28, d29, d28 subs r6, r1, #0 vpaddl.u16 d28, d28 @ lslne r6, r0, #2 vpaddl.u32 d28, d28 @/ moveq r6, r0 @ vmov.u32 r9, d28[0] @ vert add r9, r6, r9 subs r6, r11, r9 movgt r11, r9 movgt r12, #0 horz: ands r10, r8, #02 @HORZ sad ?? beq dc vdup.32 q3, d0[0] vmov.32 q4, q3 vtrn.8 q3, q4 vtrn.16 d7, d6 vtrn.16 d9, d8 vtrn.32 d9, d7 vtrn.32 d8, d6 vabdl.u8 q14, d6, d20 subs r6, r1, #1 vabal.u8 q14, d7, d21 vadd.i16 d28, d29, d28 lslne r6, r0, #2 vpaddl.u16 d28, d28 @ vpaddl.u32 d28, d28 @/ vmov.u32 r9, d28[0] @ moveq r6, r0 @ add r9, r6, r9 subs r6, r11, r9 movgt r11, r9 movgt r12, #1 dc: ands r10, r8, #04 @DC sad ?? beq diags vext.8 q4, q0, q0, #5 vaddl.u8 q4, d0, d8 vpaddl.u16 d8, d8 @ vpaddl.u32 d8, d8 @/ vmov.u32 r4, d8[0] @ mov r14, #1 ands r10, r5, #1 addne r4, r4, #2 addne r14, r14, #1 ands r10, r5, #4 addne r4, r4, #2 addne r14, r14, #1 ands r10, r5, #5 moveq r4, #128 moveq r14, #0 subs r6, r1, #2 lsr r4, r4, r14 vdup.8 q4, r4 lslne r6, r0, #2 vabdl.u8 q14, d8, d20 vabal.u8 q14, d9, d21 vadd.i16 d28, d29, d28 vpaddl.u16 d28, d28 @ vpaddl.u32 d28, d28 @/ vmov.u32 r9, d28[0] @ moveq r6, r0 @ add r9, r6, r9 subs r6, r11, r9 movgt r11, r9 movgt r12, #2 diags: ands r10, r8, #504 @/* if modes other than VERT, HORZ and DC are valid ????*/ beq pred @/* Performing FILT11 and FILT121 operation for all neighbour values*/ vext.8 q5, q0, q0, #2 vaddl.u8 q6, d0, d2 vaddl.u8 q7, d1, d3 vaddl.u8 q8, d10, d2 vaddl.u8 q9, d11, d3 vadd.u16 q12, q10, q11 vqrshrun.s16 d10, q6, #1 vqrshrun.s16 d11, q7, #1 vadd.u16 q11, q6, q8 vadd.u16 q12, q7, q9 vqrshrun.s16 d12, q11, #2 vqrshrun.s16 d13, q12, #2 mov r14, #0 vdup.32 q13 , r14 mov r14, #-1 vmov.i32 d26[0], r14 diag_dl: ands r10, r8, #0x08 @DIAG_DL sad ?? beq diag_dr vext.8 q15, q6, q6, #5 vbit.32 d14, d30, d26 vext.8 q15, q6, q6, #15 vbit.32 d15, d31, d26 vext.8 q15, q6, q6, #2 vext.32 q14, q13, q13, #3 vbit.32 d14, d30, d28 vext.8 q15, q6, q6, #4 vbit.32 d15, d30, d28 vabdl.u8 q14, d14, d20 subs r6, r1, #3 vabal.u8 q14, d15, d21 vadd.i16 d28, d29, d28 vpaddl.u16 d28, d28 @ lslne r6, r0, #2 vpaddl.u32 d28, d28 @/ vmov.u32 r9, d28[0] @ moveq r6, r0 @ add r9, r6, r9 subs r6, r11, r9 movgt r11, r9 movgt r12, #3 diag_dr: ands r10, r8, #16 @DIAG_DR sad ?? beq vert_r vext.8 q15, q6, q6, #3 vbit.32 d16, d30, d26 vext.8 q15, q6, q6, #1 vbit.32 d17, d30, d26 vext.8 q15, q6, q6, #4 vext.32 q14, q13, q13, #3 vbit.32 d17, d31, d28 vext.8 q15, q6, q6, #6 vbit.32 d16, d31, d28 vabdl.u8 q14, d16, d20 subs r6, r1, #4 vabal.u8 q14, d17, d21 vadd.i16 d28, d29, d28 vpaddl.u16 d28, d28 @ lslne r6, r0, #2 vpaddl.u32 d28, d28 @/ vmov.u32 r9, d28[0] @ moveq r6, r0 @ add r9, r6, r9 subs r6, r11, r9 movgt r11, r9 movgt r12, #4 vert_r: ands r10, r8, #32 @VERT_R sad ?? beq horz_d vext.8 q15, q5, q5, #4 vbit.32 d18, d30, d26 vext.8 q15, q5, q5, #3 vbit.32 d19, d30, d26 vext.32 q14, q13, q13, #3 vext.8 q15, q6, q6, #15 vbit.32 d18, d30, d28 vext.8 q15, q6, q6, #14 vbit.32 d19, d30, d28 mov r14, #0 vdup.32 q14 , r14 mov r14, #0xff vmov.i8 d28[0], r14 vext.8 q15, q6, q6, #2 vbit.32 d19, d30, d28 vext.32 q14, q14, q14, #3 subs r6, r1, #5 vext.8 q15, q6, q6, #13 vbit.32 d19, d30, d28 lslne r6, r0, #2 vabdl.u8 q14, d18, d20 vabal.u8 q14, d19, d21 vadd.i16 d28, d29, d28 vpaddl.u16 d28, d28 @ vpaddl.u32 d28, d28 @/ vmov.u32 r9, d28[0] @ moveq r6, r0 @ add r9, r6, r9 subs r6, r11, r9 movgt r11, r9 movgt r12, #5 horz_d: vmov.8 q1, q5 vmov.8 q15, q6 vzip.8 q1, q15 ands r10, r8, #64 @HORZ_D sad ?? beq vert_l vext.8 q15, q6, q6, #2 vbit.32 d8, d30, d26 mov r14, #0 vdup.32 q14 , r14 mov r14, #0xff vmov.i8 d28[0], r14 vext.8 q15, q5, q5, #3 vbit.32 d8, d30, d28 vext.8 q15, q1, q1, #2 vbit.32 d9, d30, d26 vext.32 q14, q13, q13, #3 vbit.32 d8, d2, d28 subs r6, r1, #6 vext.8 q15, q1, q1, #12 vbit.32 d9, d30, d28 vabdl.u8 q14, d8, d20 vabal.u8 q14, d9, d21 vadd.i16 d28, d29, d28 vpaddl.u16 d28, d28 @ lslne r6, r0, #2 vpaddl.u32 d28, d28 @/ vmov.u32 r9, d28[0] @ moveq r6, r0 @ add r9, r6, r9 subs r6, r11, r9 movgt r11, r9 movgt r12, #6 vert_l: ands r10, r8, #128 @VERT_L sad ?? beq horz_u vext.8 q15, q5, q5, #5 vbit.32 d24, d30, d26 vext.8 q15, q15, q15, #1 vbit.32 d25, d30, d26 vext.8 q15, q6, q6, #1 vext.32 q14, q13, q13, #3 vbit.32 d24, d30, d28 vext.8 q15, q15, q15, #1 subs r6, r1, #7 vbit.32 d25, d30, d28 vabdl.u8 q14, d24, d20 vabal.u8 q14, d25, d21 vadd.i16 d28, d29, d28 vpaddl.u16 d28, d28 @ lslne r6, r0, #2 vpaddl.u32 d28, d28 @/ vmov.u32 r9, d28[0] @ moveq r6, r0 @ add r9, r6, r9 subs r6, r11, r9 movgt r11, r9 movgt r12, #7 horz_u: ands r10, r8, #256 @HORZ_U sad ?? beq pred vrev64.8 q5, q1 vdup.8 q1, d0[0] vext.8 q6, q6, #7 mov r14, #0 vdup.32 q14 , r14 mov r14, #0xff vmov.i8 d28[0], r14 vbit.32 d11, d13, d28 movw r14, #0xffff vmov.i16 d28[0], r14 vext.8 q6, q5, q5, #7 subs r6, r1, #8 vbit.32 d3, d12, d28 vext.8 q6, q5, q5, #3 vbit.32 d2, d12, d26 vext.32 q14, q13, q13, #3 vext.8 q6, q5, q5, #1 vbit.32 d2, d12, d28 vabdl.u8 q14, d2, d20 vabal.u8 q14, d3, d21 vadd.i16 d28, d29, d28 vpaddl.u16 d28, d28 @ lslne r6, r0, #2 vpaddl.u32 d28, d28 @/ vmov.u32 r9, d28[0] @ moveq r6, r0 @ add r9, r6, r9 subs r6, r11, r9 movgt r11, r9 movgt r12, #8 pred: @/*dOING FINAL PREDICTION*/ @--------------------------- ldr r7, [sp, #116] @r7 = pu4_sadmin ldr r6, [sp, #112] @ R6 =MODE @-------------------------- str r11, [r7] @/STORING MIN SAD*/ str r12, [r6] @/FINAL MODE*/ ldr r3, scratch_intrapred_luma_4x4_prediction_addr1 scrintra_4x4: add r3, r3, pc lsl r12, r12, #2 add r3, r3, r12 ldr r5, [r3] and r5, r5, #0xfffffffe bx r5 ver: vext.8 q0, q0, q0, #1 vdup.32 q15, d0[1] b store hor: vmov.32 q15, q3 b store d_c: vdup.8 q15, r4 b store dia_dl: vmov.32 q15, q7 b store dia_dr: vmov.32 q15, q8 b store ver_r: vmov.32 q15, q9 b store hor_d: vmov.32 q15, q4 b store ver_l: vmov.32 q15, q12 b store hor_u: vmov.32 q15, q1 store: @/* storing to pu1_dst*/ ldr r4, [sp, #104] @r4 = dst_strd, vst1.32 {d30[0]}, [r2], r4 vst1.32 {d30[1]}, [r2], r4 vst1.32 {d31[0]}, [r2], r4 vst1.32 {d31[1]}, [r2], r4 end_func: vpop {d8-d15} ldmfd sp!, {r4-r12, pc} @Restoring registers from stack