summaryrefslogtreecommitdiffstats
path: root/common/arm/ih264_intra_pred_chroma_a9q.s
diff options
context:
space:
mode:
Diffstat (limited to 'common/arm/ih264_intra_pred_chroma_a9q.s')
-rwxr-xr-xcommon/arm/ih264_intra_pred_chroma_a9q.s551
1 files changed, 551 insertions, 0 deletions
diff --git a/common/arm/ih264_intra_pred_chroma_a9q.s b/common/arm/ih264_intra_pred_chroma_a9q.s
new file mode 100755
index 0000000..d03fc55
--- /dev/null
+++ b/common/arm/ih264_intra_pred_chroma_a9q.s
@@ -0,0 +1,551 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_intra_pred_chroma_a9q.s
+@*
+@* @brief
+@* Contains function definitions for intra chroma prediction .
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@* - ih264_intra_pred_chroma_mode_horz_a9q()
+@* - ih264_intra_pred_chroma_8x8_mode_vert_a9q()
+@* - ih264_intra_pred_chroma_mode_dc_a9q()
+@* - ih264_intra_pred_chroma_mode_plane_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@
+.text
+.p2align 2
+
+ .extern ih264_gai1_intrapred_chroma_plane_coeffs1
+.hidden ih264_gai1_intrapred_chroma_plane_coeffs1
+ .extern ih264_gai1_intrapred_chroma_plane_coeffs2
+.hidden ih264_gai1_intrapred_chroma_plane_coeffs2
+scratch_chroma_intrapred_addr1:
+ .long ih264_gai1_intrapred_chroma_plane_coeffs1 - scrlblc1 - 8
+
+scratch_intrapred_chroma_plane_addr1:
+ .long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_chroma_8x8_mode_dc
+@*
+@* @brief
+@* Perform Intra prediction for chroma_8x8 mode:DC
+@*
+@* @par Description:
+@* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source containing alternate U and V samples
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination with alternate U and V samples
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@** @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+
+ .global ih264_intra_pred_chroma_8x8_mode_dc_a9q
+
+ih264_intra_pred_chroma_8x8_mode_dc_a9q:
+
+ stmfd sp!, {r4, r14} @store register values to stack
+ ldr r4, [sp, #8] @r4 => ui_neighboravailability
+ vpush {d8-d15}
+
+ ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
+ beq top_available
+ ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ beq left_available
+
+ vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE
+ add r0, r0, #18
+ vld1.u8 {q1}, [r0]
+ vaddl.u8 q2, d1, d2
+ vaddl.u8 q3, d0, d3
+ vmovl.u8 q1, d3
+ vmovl.u8 q0, d0
+
+ vadd.u16 d12, d4, d5
+ vadd.u16 d13, d2, d3
+ vadd.u16 d15, d6, d7
+ vadd.u16 d14, d0, d1
+
+ vpadd.u32 d12, d12, d15
+ vpadd.u32 d14, d13, d14
+ vqrshrun.s16 d12, q6, #3
+ vqrshrun.s16 d14, q7, #2
+ vdup.u16 d8, d12[0]
+ vdup.u16 d9, d14[0]
+ vdup.u16 d10, d14[1]
+ vdup.u16 d11, d12[1]
+ b str_pred
+
+top_available: @ONLY TOP AVAILABLE
+ ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ beq none_available
+
+ add r0, r0, #18
+ vld1.u8 {q0}, [r0]
+ vmovl.u8 q1, d0
+ vmovl.u8 q2, d1
+ vadd.u16 d0, d2, d3
+ vadd.u16 d1, d4, d5
+ vpaddl.u32 q0, q0
+ vqrshrun.s16 d0, q0, #2
+ vdup.u16 d8, d0[0]
+ vdup.u16 d9, d0[2]
+ vmov q5, q4
+ b str_pred
+
+left_available: @ONLY LEFT AVAILABLE
+ vld1.u8 {q0}, [r0]
+ vmovl.u8 q1, d0
+ vmovl.u8 q2, d1
+ vadd.u16 d0, d2, d3
+ vadd.u16 d1, d4, d5
+ vpaddl.u32 q0, q0
+ vqrshrun.s16 d0, q0, #2
+ vdup.u16 q5, d0[0]
+ vdup.u16 q4, d0[2]
+ b str_pred
+
+none_available: @NONE AVAILABLE
+ vmov.u8 q4, #128
+ vmov.u8 q5, #128
+
+str_pred:
+ vst1.8 {q4}, [r1], r3
+ vst1.8 {q4}, [r1], r3
+ vst1.8 {q4}, [r1], r3
+ vst1.8 {q4}, [r1], r3
+ vst1.8 {q5}, [r1], r3
+ vst1.8 {q5}, [r1], r3
+ vst1.8 {q5}, [r1], r3
+ vst1.8 {q5}, [r1], r3
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4, pc} @Restoring registers from stack
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_chroma_8x8_mode_horz
+@*
+@* @brief
+@* Perform Intra prediction for chroma_8x8 mode:Horizontal
+@*
+@* @par Description:
+@* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source containing alternate U and V samples
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination with alternate U and V samples
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_chroma_8x8_mode_horz_a9q
+
+ih264_intra_pred_chroma_8x8_mode_horz_a9q:
+
+ stmfd sp!, {r14} @store register values to stack
+
+ vld1.u8 {q0}, [r0]
+ mov r2, #6
+
+ vdup.u16 q1, d1[3]
+ vdup.u16 q2, d1[2]
+ vst1.8 {q1}, [r1], r3
+
+loop_8x8_horz:
+ vext.8 q0, q0, q0, #12
+ vst1.8 {q2}, [r1], r3
+ vdup.u16 q1, d1[3]
+ subs r2, #2
+ vdup.u16 q2, d1[2]
+ vst1.8 {q1}, [r1], r3
+ bne loop_8x8_horz
+
+ vext.8 q0, q0, q0, #12
+ vst1.8 {q2}, [r1], r3
+
+ ldmfd sp!, {pc} @restoring registers from stack
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_chroma_8x8_mode_vert
+@*
+@* @brief
+@* Perform Intra prediction for chroma_8x8 mode:vertical
+@*
+@* @par Description:
+@*Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source containing alternate U and V samples
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination with alternate U and V samples
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_chroma_8x8_mode_vert_a9q
+
+ih264_intra_pred_chroma_8x8_mode_vert_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ add r0, r0, #18
+ vld1.8 {q0}, [r0]
+
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_chroma_8x8_mode_plane
+@*
+@* @brief
+@* Perform Intra prediction for chroma_8x8 mode:PLANE
+@*
+@* @par Description:
+@* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source containing alternate U and V samples
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination with alternate U and V samples
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_chroma_8x8_mode_plane_a9q
+ih264_intra_pred_chroma_8x8_mode_plane_a9q:
+
+ stmfd sp!, {r4-r10, r12, lr}
+ vpush {d8-d15}
+
+
+ vld1.32 d0, [r0]
+ add r10, r0, #10
+ vld1.32 d1, [r10]
+ add r10, r10, #6
+ vrev64.16 d5, d0
+ vld1.32 d2, [r10]!
+ add r10, r10, #2
+ vrev64.16 d7, d2
+ vld1.32 d3, [r10]
+ sub r5, r3, #8
+ ldr r12, scratch_chroma_intrapred_addr1
+scrlblc1:
+ add r12, r12, pc
+ vsubl.u8 q5, d5, d1
+ vld1.64 {q4}, [r12] @ Load multiplication factors 1 to 8 into D3
+ vsubl.u8 q6, d3, d7
+ vmul.s16 q7, q5, q4
+ vmul.s16 q8, q6, q4
+ vuzp.16 q7, q8
+
+ vpadd.s16 d14, d14
+ vpadd.s16 d15, d15
+ vpadd.s16 d16, d16
+ vpadd.s16 d17, d17
+ vpadd.s16 d14, d14
+ vpadd.s16 d15, d15
+ vpadd.s16 d16, d16
+ vpadd.s16 d17, d17
+
+ mov r6, #34
+ vdup.16 q9, r6
+
+ vmull.s16 q11, d14, d18
+ vmull.s16 q12, d15, d18
+ vmull.s16 q13, d16, d18
+ vmull.s16 q14, d17, d18
+
+ vrshrn.s32 d10, q11, #6
+ vrshrn.s32 d12, q12, #6
+ vrshrn.s32 d13, q13, #6
+ vrshrn.s32 d14, q14, #6
+
+
+ ldrb r6, [r0], #1
+ add r10, r0, #31
+ ldrb r8, [r0], #1
+ ldrb r7, [r10], #1
+ ldrb r9, [r10], #1
+
+ add r6, r6, r7
+ add r8, r8, r9
+ lsl r6, r6, #4
+ lsl r8, r8, #4
+
+ vdup.16 q0, r6
+ vdup.16 q1, r8
+ vdup.16 q2, d12[0]
+ vdup.16 q3, d10[0]
+
+ vdup.16 q12, d14[0]
+ vdup.16 q13, d13[0]
+ vzip.16 q2, q12
+ vzip.16 q3, q13
+ vzip.16 q0, q1
+
+ ldr r12, scratch_intrapred_chroma_plane_addr1
+scrlblc2:
+ add r12, r12, pc
+ vld1.64 {q4}, [r12]
+ vmov.16 q5, q4
+ vmov q11, q4
+ vzip.16 q4, q5
+
+ vmul.s16 q6, q2, q4
+ vmul.s16 q8, q2, q5
+ vadd.s16 q6, q0, q6
+ vadd.s16 q8, q0, q8
+
+
+ vdup.16 q10, d22[0]
+ vmul.s16 q2, q3, q10
+ vdup.16 q15, d22[1]
+ vmul.s16 q9, q3, q10
+ vmul.s16 q7, q3, q15
+ vmul.s16 q4, q3, q15
+ vadd.s16 q12, q6, q2
+ vadd.s16 q0, q8, q9
+ vadd.s16 q1, q6, q7
+ vqrshrun.s16 d28, q12, #5
+ vadd.s16 q13, q8, q4
+ vqrshrun.s16 d29, q0, #5
+ vdup.16 q10, d22[2]
+ vst1.8 {q14}, [r1], r3
+ vqrshrun.s16 d28, q1, #5
+ vqrshrun.s16 d29, q13, #5
+ vmul.s16 q2, q3, q10
+ vmul.s16 q9, q3, q10
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q12, q6, q2
+ vadd.s16 q0, q8, q9
+ vdup.16 q15, d22[3]
+ vqrshrun.s16 d28, q12, #5
+ vqrshrun.s16 d29, q0, #5
+ vmul.s16 q7, q3, q15
+ vmul.s16 q4, q3, q15
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q1, q6, q7
+ vadd.s16 q13, q8, q4
+ vdup.16 q10, d23[0]
+ vqrshrun.s16 d28, q1, #5
+ vqrshrun.s16 d29, q13, #5
+ vmul.s16 q2, q3, q10
+ vmul.s16 q9, q3, q10
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q12, q6, q2
+ vadd.s16 q0, q8, q9
+ vdup.16 q15, d23[1]
+ vqrshrun.s16 d28, q12, #5
+ vqrshrun.s16 d29, q0, #5
+ vmul.s16 q7, q3, q15
+ vmul.s16 q4, q3, q15
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q1, q6, q7
+ vadd.s16 q13, q8, q4
+ vdup.16 q10, d23[2]
+ vqrshrun.s16 d28, q1, #5
+ vqrshrun.s16 d29, q13, #5
+ vmul.s16 q2, q3, q10
+ vmul.s16 q9, q3, q10
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q12, q6, q2
+ vadd.s16 q0, q8, q9
+ vdup.16 q15, d23[3]
+ vqrshrun.s16 d28, q12, #5
+ vqrshrun.s16 d29, q0, #5
+ vmul.s16 q7, q3, q15
+ vmul.s16 q4, q3, q15
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q1, q6, q7
+ vadd.s16 q13, q8, q4
+ vqrshrun.s16 d28, q1, #5
+ vqrshrun.s16 d29, q13, #5
+ vst1.8 {q14}, [r1], r3
+
+
+
+end_func_plane:
+
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r10, r12, pc}
+
+
+
+