diff options
Diffstat (limited to 'common/arm/ih264_intra_pred_chroma_a9q.s')
-rwxr-xr-x | common/arm/ih264_intra_pred_chroma_a9q.s | 551 |
1 files changed, 551 insertions, 0 deletions
diff --git a/common/arm/ih264_intra_pred_chroma_a9q.s b/common/arm/ih264_intra_pred_chroma_a9q.s new file mode 100755 index 0000000..d03fc55 --- /dev/null +++ b/common/arm/ih264_intra_pred_chroma_a9q.s @@ -0,0 +1,551 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ +@/** +@****************************************************************************** +@* @file +@* ih264_intra_pred_chroma_a9q.s +@* +@* @brief +@* Contains function definitions for intra chroma prediction . +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* +@* - ih264_intra_pred_chroma_mode_horz_a9q() +@* - ih264_intra_pred_chroma_8x8_mode_vert_a9q() +@* - ih264_intra_pred_chroma_mode_dc_a9q() +@* - ih264_intra_pred_chroma_mode_plane_a9q() +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ + +@/* All the functions here are replicated from ih264_chroma_intra_pred_filters.c +@ + +@/** +@/** +@/** +@ +.text +.p2align 2 + + .extern ih264_gai1_intrapred_chroma_plane_coeffs1 +.hidden ih264_gai1_intrapred_chroma_plane_coeffs1 + .extern ih264_gai1_intrapred_chroma_plane_coeffs2 +.hidden ih264_gai1_intrapred_chroma_plane_coeffs2 +scratch_chroma_intrapred_addr1: + .long ih264_gai1_intrapred_chroma_plane_coeffs1 - scrlblc1 - 8 + +scratch_intrapred_chroma_plane_addr1: + .long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8 +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_chroma_8x8_mode_dc +@* +@* @brief +@* Perform Intra prediction for chroma_8x8 mode:DC +@* +@* @par Description: +@* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source containing alternate U and V samples +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination with alternate U and V samples +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@** @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + + .global ih264_intra_pred_chroma_8x8_mode_dc_a9q + +ih264_intra_pred_chroma_8x8_mode_dc_a9q: + + stmfd sp!, {r4, r14} @store register values to stack + ldr r4, [sp, #8] @r4 => ui_neighboravailability + vpush {d8-d15} + + ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE + beq top_available + ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE + beq left_available + + vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE + add r0, r0, #18 + vld1.u8 {q1}, [r0] + vaddl.u8 q2, d1, d2 + vaddl.u8 q3, d0, d3 + vmovl.u8 q1, d3 + vmovl.u8 q0, d0 + + vadd.u16 d12, d4, d5 + vadd.u16 d13, d2, d3 + vadd.u16 d15, d6, d7 + vadd.u16 d14, d0, d1 + + vpadd.u32 d12, d12, d15 + vpadd.u32 d14, d13, d14 + vqrshrun.s16 d12, q6, #3 + vqrshrun.s16 d14, q7, #2 + vdup.u16 d8, d12[0] + vdup.u16 d9, d14[0] + vdup.u16 d10, d14[1] + vdup.u16 d11, d12[1] + b str_pred + +top_available: @ONLY TOP AVAILABLE + ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE + beq none_available + + add r0, r0, #18 + vld1.u8 {q0}, [r0] + vmovl.u8 q1, d0 + vmovl.u8 q2, d1 + vadd.u16 d0, d2, d3 + vadd.u16 d1, d4, d5 + vpaddl.u32 q0, q0 + vqrshrun.s16 d0, q0, #2 + vdup.u16 d8, d0[0] + vdup.u16 d9, d0[2] + vmov q5, q4 + b str_pred + +left_available: @ONLY LEFT AVAILABLE + vld1.u8 {q0}, [r0] + vmovl.u8 q1, d0 + vmovl.u8 q2, d1 + vadd.u16 d0, d2, d3 + vadd.u16 d1, d4, d5 + vpaddl.u32 q0, q0 + vqrshrun.s16 d0, q0, #2 + vdup.u16 q5, d0[0] + vdup.u16 q4, d0[2] + b str_pred + +none_available: @NONE AVAILABLE + vmov.u8 q4, #128 + vmov.u8 q5, #128 + +str_pred: + vst1.8 {q4}, [r1], r3 + vst1.8 {q4}, [r1], r3 + vst1.8 {q4}, [r1], r3 + vst1.8 {q4}, [r1], r3 + vst1.8 {q5}, [r1], r3 + vst1.8 {q5}, [r1], r3 + vst1.8 {q5}, [r1], r3 + vst1.8 {q5}, [r1], r3 + + vpop {d8-d15} + ldmfd sp!, {r4, pc} @Restoring registers from stack + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_chroma_8x8_mode_horz +@* +@* @brief +@* Perform Intra prediction for chroma_8x8 mode:Horizontal +@* +@* @par Description: +@* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source containing alternate U and V samples +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination with alternate U and V samples +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@*/ +@void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_chroma_8x8_mode_horz_a9q + +ih264_intra_pred_chroma_8x8_mode_horz_a9q: + + stmfd sp!, {r14} @store register values to stack + + vld1.u8 {q0}, [r0] + mov r2, #6 + + vdup.u16 q1, d1[3] + vdup.u16 q2, d1[2] + vst1.8 {q1}, [r1], r3 + +loop_8x8_horz: + vext.8 q0, q0, q0, #12 + vst1.8 {q2}, [r1], r3 + vdup.u16 q1, d1[3] + subs r2, #2 + vdup.u16 q2, d1[2] + vst1.8 {q1}, [r1], r3 + bne loop_8x8_horz + + vext.8 q0, q0, q0, #12 + vst1.8 {q2}, [r1], r3 + + ldmfd sp!, {pc} @restoring registers from stack + + + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_chroma_8x8_mode_vert +@* +@* @brief +@* Perform Intra prediction for chroma_8x8 mode:vertical +@* +@* @par Description: +@*Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source containing alternate U and V samples +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination with alternate U and V samples +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels(Not used in this function) +@* +@* @returns +@* +@* @remarks +@* None +@* +@******************************************************************************* +@void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + + .global ih264_intra_pred_chroma_8x8_mode_vert_a9q + +ih264_intra_pred_chroma_8x8_mode_vert_a9q: + + stmfd sp!, {r4-r12, r14} @store register values to stack + + add r0, r0, #18 + vld1.8 {q0}, [r0] + + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + vst1.8 {q0}, [r1], r3 + + ldmfd sp!, {r4-r12, pc} @Restoring registers from stack + + + + +@/****************************************************************************** + + +@/** +@******************************************************************************* +@* +@*ih264_intra_pred_chroma_8x8_mode_plane +@* +@* @brief +@* Perform Intra prediction for chroma_8x8 mode:PLANE +@* +@* @par Description: +@* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4 +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source containing alternate U and V samples +@* +@* @param[out] pu1_dst +@* UWORD8 pointer to the destination with alternate U and V samples +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] dst_strd +@* integer destination stride +@* +@* @param[in] ui_neighboravailability +@* availability of neighbouring pixels +@* +@* @returns +@* +@* @remarks +@* None +@* +@*******************************************************************************/ +@void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src, +@ UWORD8 *pu1_dst, +@ WORD32 src_strd, +@ WORD32 dst_strd, +@ WORD32 ui_neighboravailability) + +@**************Variables Vs Registers***************************************** +@ r0 => *pu1_src +@ r1 => *pu1_dst +@ r2 => src_strd +@ r3 => dst_strd +@ r4 => ui_neighboravailability + + .global ih264_intra_pred_chroma_8x8_mode_plane_a9q +ih264_intra_pred_chroma_8x8_mode_plane_a9q: + + stmfd sp!, {r4-r10, r12, lr} + vpush {d8-d15} + + + vld1.32 d0, [r0] + add r10, r0, #10 + vld1.32 d1, [r10] + add r10, r10, #6 + vrev64.16 d5, d0 + vld1.32 d2, [r10]! + add r10, r10, #2 + vrev64.16 d7, d2 + vld1.32 d3, [r10] + sub r5, r3, #8 + ldr r12, scratch_chroma_intrapred_addr1 +scrlblc1: + add r12, r12, pc + vsubl.u8 q5, d5, d1 + vld1.64 {q4}, [r12] @ Load multiplication factors 1 to 8 into D3 + vsubl.u8 q6, d3, d7 + vmul.s16 q7, q5, q4 + vmul.s16 q8, q6, q4 + vuzp.16 q7, q8 + + vpadd.s16 d14, d14 + vpadd.s16 d15, d15 + vpadd.s16 d16, d16 + vpadd.s16 d17, d17 + vpadd.s16 d14, d14 + vpadd.s16 d15, d15 + vpadd.s16 d16, d16 + vpadd.s16 d17, d17 + + mov r6, #34 + vdup.16 q9, r6 + + vmull.s16 q11, d14, d18 + vmull.s16 q12, d15, d18 + vmull.s16 q13, d16, d18 + vmull.s16 q14, d17, d18 + + vrshrn.s32 d10, q11, #6 + vrshrn.s32 d12, q12, #6 + vrshrn.s32 d13, q13, #6 + vrshrn.s32 d14, q14, #6 + + + ldrb r6, [r0], #1 + add r10, r0, #31 + ldrb r8, [r0], #1 + ldrb r7, [r10], #1 + ldrb r9, [r10], #1 + + add r6, r6, r7 + add r8, r8, r9 + lsl r6, r6, #4 + lsl r8, r8, #4 + + vdup.16 q0, r6 + vdup.16 q1, r8 + vdup.16 q2, d12[0] + vdup.16 q3, d10[0] + + vdup.16 q12, d14[0] + vdup.16 q13, d13[0] + vzip.16 q2, q12 + vzip.16 q3, q13 + vzip.16 q0, q1 + + ldr r12, scratch_intrapred_chroma_plane_addr1 +scrlblc2: + add r12, r12, pc + vld1.64 {q4}, [r12] + vmov.16 q5, q4 + vmov q11, q4 + vzip.16 q4, q5 + + vmul.s16 q6, q2, q4 + vmul.s16 q8, q2, q5 + vadd.s16 q6, q0, q6 + vadd.s16 q8, q0, q8 + + + vdup.16 q10, d22[0] + vmul.s16 q2, q3, q10 + vdup.16 q15, d22[1] + vmul.s16 q9, q3, q10 + vmul.s16 q7, q3, q15 + vmul.s16 q4, q3, q15 + vadd.s16 q12, q6, q2 + vadd.s16 q0, q8, q9 + vadd.s16 q1, q6, q7 + vqrshrun.s16 d28, q12, #5 + vadd.s16 q13, q8, q4 + vqrshrun.s16 d29, q0, #5 + vdup.16 q10, d22[2] + vst1.8 {q14}, [r1], r3 + vqrshrun.s16 d28, q1, #5 + vqrshrun.s16 d29, q13, #5 + vmul.s16 q2, q3, q10 + vmul.s16 q9, q3, q10 + vst1.8 {q14}, [r1], r3 + vadd.s16 q12, q6, q2 + vadd.s16 q0, q8, q9 + vdup.16 q15, d22[3] + vqrshrun.s16 d28, q12, #5 + vqrshrun.s16 d29, q0, #5 + vmul.s16 q7, q3, q15 + vmul.s16 q4, q3, q15 + vst1.8 {q14}, [r1], r3 + vadd.s16 q1, q6, q7 + vadd.s16 q13, q8, q4 + vdup.16 q10, d23[0] + vqrshrun.s16 d28, q1, #5 + vqrshrun.s16 d29, q13, #5 + vmul.s16 q2, q3, q10 + vmul.s16 q9, q3, q10 + vst1.8 {q14}, [r1], r3 + vadd.s16 q12, q6, q2 + vadd.s16 q0, q8, q9 + vdup.16 q15, d23[1] + vqrshrun.s16 d28, q12, #5 + vqrshrun.s16 d29, q0, #5 + vmul.s16 q7, q3, q15 + vmul.s16 q4, q3, q15 + vst1.8 {q14}, [r1], r3 + vadd.s16 q1, q6, q7 + vadd.s16 q13, q8, q4 + vdup.16 q10, d23[2] + vqrshrun.s16 d28, q1, #5 + vqrshrun.s16 d29, q13, #5 + vmul.s16 q2, q3, q10 + vmul.s16 q9, q3, q10 + vst1.8 {q14}, [r1], r3 + vadd.s16 q12, q6, q2 + vadd.s16 q0, q8, q9 + vdup.16 q15, d23[3] + vqrshrun.s16 d28, q12, #5 + vqrshrun.s16 d29, q0, #5 + vmul.s16 q7, q3, q15 + vmul.s16 q4, q3, q15 + vst1.8 {q14}, [r1], r3 + vadd.s16 q1, q6, q7 + vadd.s16 q13, q8, q4 + vqrshrun.s16 d28, q1, #5 + vqrshrun.s16 d29, q13, #5 + vst1.8 {q14}, [r1], r3 + + + +end_func_plane: + + + vpop {d8-d15} + ldmfd sp!, {r4-r10, r12, pc} + + + + |