summaryrefslogtreecommitdiffstats
path: root/common/arm/ih264_intra_pred_luma_8x8_a9q.s
diff options
context:
space:
mode:
Diffstat (limited to 'common/arm/ih264_intra_pred_luma_8x8_a9q.s')
-rwxr-xr-xcommon/arm/ih264_intra_pred_luma_8x8_a9q.s1037
1 files changed, 1037 insertions, 0 deletions
diff --git a/common/arm/ih264_intra_pred_luma_8x8_a9q.s b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
new file mode 100755
index 0000000..6da1c95
--- /dev/null
+++ b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
@@ -0,0 +1,1037 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_intra_pred_luma_8x8_a9q.s
+@*
+@* @brief
+@* Contains function definitions for intra 8x8 Luma prediction .
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@* -ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
+@* -ih264_intra_pred_luma_8x8_mode_vert_a9q
+@* -ih264_intra_pred_luma_8x8_mode_horz_a9q
+@* -ih264_intra_pred_luma_8x8_mode_dc_a9q
+@* -ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
+@* -ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
+@* -ih264_intra_pred_luma_8x8_mode_vert_r_a9q
+@* -ih264_intra_pred_luma_8x8_mode_horz_d_a9q
+@* -ih264_intra_pred_luma_8x8_mode_vert_l_a9q
+@* -ih264_intra_pred_luma_8x8_mode_horz_u_a9q
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_intra_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@
+
+
+.text
+.p2align 2
+
+ .extern ih264_gai1_intrapred_luma_8x8_horz_u
+.hidden ih264_gai1_intrapred_luma_8x8_horz_u
+scratch_intrapred_addr_8x8:
+ .long ih264_gai1_intrapred_luma_8x8_horz_u - scrlb8x8l2 - 8
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_ref_filtering
+@*
+@* @brief
+@* Reference sample filtering process for Intra_8x8 sample prediction
+@*
+@* @par Description:
+@* Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride [Not used]
+@*
+@* @param[in] dst_strd
+@* integer destination stride[Not used]
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels[Not used]
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+
+
+ .global ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
+
+ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vpush {d8-d15}
+
+
+ vld1.u8 {q0}, [r0]! @
+ vld1.u8 {q1}, [r0]
+ add r0, r0, #8 @
+ vext.8 q2, q0, q1, #1
+ vext.8 q3, q1, q1, #1
+ vext.8 q4, q2, q3, #1
+ vext.8 q5, q3, q3, #1
+ vld1.8 {d10[7]}, [r0] @ LOADING SRC[24] AGIN TO THE END FOR p'[ 15, -1 ] = ( p[ 14, -1 ] + 3 * p[ 15, -1 ] + 2 ) >> 2
+ vaddl.u8 q10, d0, d4
+ vaddl.u8 q7, d0, d0 @ SPECIAL CASE FOR p'[ -1 ,7 ] = ( p[ -1, 6 ] + 3 * p[ -1, 7 ] + 2 ) >> 2
+ vadd.u16 q7, q10, q7
+ vaddl.u8 q11, d1, d5
+ vqrshrun.s16 d14, q7, #2
+ vaddl.u8 q12, d4, d8
+ vaddl.u8 q13, d5, d9
+ vst1.8 {d14[0]}, [r1]!
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+ vaddl.u8 q9, d2, d6
+ vaddl.u8 q8, d6, d10
+ vqrshrun.s16 d4, q12, #2
+ vqrshrun.s16 d5, q13, #2
+ vadd.u16 q6, q8, q9
+ vst1.8 {q2}, [r1]!
+ vqrshrun.s16 d6, q6, #2
+ vst1.8 {d6}, [r1]
+
+
+end_func_ref_filt:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_vert
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:vertical
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_vert_a9q
+
+ih264_intra_pred_luma_8x8_mode_vert_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ add r0, r0, #9
+ vld1.8 d0, [r0]
+
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_horz
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:horizontal
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_horz_a9q
+
+ih264_intra_pred_luma_8x8_mode_horz_a9q:
+
+ stmfd sp!, {r14} @store register values to stack
+
+ vld1.u8 {d0}, [r0]
+ mov r2, #6
+
+ vdup.u8 d1, d0[7]
+ vdup.u8 d2, d0[6]
+ vst1.8 {d1}, [r1], r3
+
+loop_8x8_horz:
+ vext.8 d0, d0, d0, #6
+ vst1.8 {d2}, [r1], r3
+ vdup.u8 d1, d0[7]
+ subs r2, #2
+ vdup.u8 d2, d0[6]
+ vst1.8 {d1}, [r1], r3
+ bne loop_8x8_horz
+
+ vext.8 d0, d0, d0, #6
+ vst1.8 {d2}, [r1], r3
+
+ ldmfd sp!, {pc} @restoring registers from stack
+
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_dc
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:DC
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_dc_a9q
+
+ih264_intra_pred_luma_8x8_mode_dc_a9q:
+
+ stmfd sp!, {r4, r14} @store register values to stack
+ ldr r4, [sp, #8] @r4 => ui_neighboravailability
+
+ ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
+ beq top_available
+ ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ beq left_available
+
+ vld1.u8 {d0}, [r0] @BOTH LEFT AND TOP AVAILABLE
+ add r0, r0, #9
+ vld1.u8 {d1}, [r0]
+ vpaddl.u8 q0, q0
+ vadd.u16 d0, d0, d1
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vqrshrun.s16 d0, q0, #4
+ vdup.u8 d0, d0[0]
+ b str_pred
+
+top_available: @ONLY TOP AVAILABLE
+ ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ beq none_available
+
+ add r0, r0, #9
+ vld1.u8 {d0}, [r0]
+ vpaddl.u8 d0, d0
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vqrshrun.s16 d0, q0, #3
+ vdup.u8 d0, d0[0]
+ b str_pred
+
+left_available: @ONLY LEFT AVAILABLE
+ vld1.u8 {d0}, [r0]
+ vpaddl.u8 d0, d0
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vqrshrun.s16 d0, q0, #3
+ vdup.u8 d0, d0[0]
+ b str_pred
+
+none_available: @NONE AVAILABLE
+ vmov.u8 q0, #128
+
+str_pred:
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+
+ ldmfd sp!, {r4, pc} @Restoring registers from stack
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_diag_dl
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
+
+ih264_intra_pred_luma_8x8_mode_diag_dl_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ add r0, r0, #9
+ sub r5, r3, #4
+ add r6, r0, #15
+ vld1.8 {q0}, [r0]
+ vext.8 q2, q0, q0, #2
+ vext.8 q1, q0, q0, #1
+ vld1.8 {d5[6]}, [r6]
+ @ q1 = q0 shifted to left once
+ @ q2 = q1 shifted to left once
+ vaddl.u8 q10, d0, d2 @Adding for FILT121
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+
+ vqrshrun.s16 d4, q12, #2
+ vqrshrun.s16 d5, q13, #2
+ @Q2 has all FILT121 values
+ vst1.8 {d4}, [r1], r3
+ vext.8 q9, q2, q2, #1
+ vext.8 q8, q9, q9, #1
+ vst1.8 {d18}, [r1], r3
+ vext.8 q15, q8, q8, #1
+ vst1.8 {d16}, [r1], r3
+ vst1.8 {d30}, [r1], r3
+ vst1.32 {d4[1]}, [r1]!
+ vst1.32 {d5[0]}, [r1], r5
+ vst1.32 {d18[1]}, [r1]!
+ vst1.32 {d19[0]}, [r1], r5
+ vst1.32 {d16[1]}, [r1]!
+ vst1.32 {d17[0]}, [r1], r5
+ vst1.32 {d30[1]}, [r1]!
+ vst1.32 {d31[0]}, [r1], r5
+
+
+end_func_diag_dl:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_diag_dr
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
+
+ih264_intra_pred_luma_8x8_mode_diag_dr_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+
+ vld1.u8 {q0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {q1}, [r0]
+ vext.8 q2, q1, q1, #1
+ @ q1 = q0 shifted to left once
+ @ q2 = q1 shifted to left once
+ vaddl.u8 q10, d0, d2 @Adding for FILT121
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+ vqrshrun.s16 d4, q12, #2
+ vqrshrun.s16 d5, q13, #2
+ @Q2 has all FILT121 values
+ sub r5, r3, #4
+ vext.8 q9, q2, q2, #15
+ vst1.8 {d19}, [r1], r3
+ vext.8 q8, q9, q9, #15
+ vst1.8 {d17}, [r1], r3
+ vext.8 q15, q8, q8, #15
+ vst1.8 {d31}, [r1], r3
+ vst1.32 {d4[1]}, [r1]!
+ vst1.32 {d5[0]}, [r1], r5
+ vst1.32 {d18[1]}, [r1]!
+ vst1.32 {d19[0]}, [r1], r5
+ vst1.32 {d16[1]}, [r1]!
+ vst1.32 {d17[0]}, [r1], r5
+ vst1.32 {d30[1]}, [r1]!
+ vst1.32 {d31[0]}, [r1], r5
+ vst1.8 {d4}, [r1], r3
+
+end_func_diag_dr:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_vert_r
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Vertical_Right
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_vert_r_a9q
+
+ih264_intra_pred_luma_8x8_mode_vert_r_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ vld1.u8 {q0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {q1}, [r0]
+ vext.8 q2, q1, q1, #1
+ @ q1 = q0 shifted to left once
+ @ q2 = q1 shifted to left once
+ vaddl.u8 q10, d0, d2
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q11, #1
+ vqrshrun.s16 d6, q12, #2
+ vqrshrun.s16 d7, q13, #2
+ @Q2 has all FILT11 values
+ @Q3 has all FILT121 values
+ sub r5, r3, #6
+ sub r6, r3, #4
+ vst1.8 {d5}, [r1], r3 @ row 0
+ vext.8 q9, q3, q3, #15
+ vmov.8 q11, q9
+ vext.8 q8, q2, q2, #1
+ vst1.8 {d19}, [r1], r3 @row 1
+
+ vmov.8 q15, q8
+ vext.8 q10, q2, q2, #15
+ vuzp.8 q8, q9
+ @row 2
+ vext.8 q14, q8, q8, #1
+ vst1.8 {d21}, [r1]
+ vst1.8 {d6[6]}, [r1], r3
+ @row 3
+
+ vst1.16 {d29[1]}, [r1]!
+ vst1.32 {d7[0]}, [r1]!
+ vst1.16 {d7[2]}, [r1], r5
+@row 4
+ vst1.16 {d19[1]}, [r1]!
+ vst1.32 {d5[0]}, [r1]!
+ vst1.16 {d5[2]}, [r1], r5
+
+@row 5
+ vext.8 q13, q9, q9, #1
+ vst1.16 {d17[1]}, [r1]!
+ vst1.32 {d23[0]}, [r1]!
+ vst1.16 {d23[2]}, [r1], r5
+
+
+@row 6
+ vst1.16 {d27[0]}, [r1]!
+ vst1.8 {d27[2]}, [r1]!
+ vst1.8 {d5[0]}, [r1]!
+ vst1.32 {d31[0]}, [r1], r6
+@row 7
+ vst1.32 {d29[0]}, [r1]!
+ vst1.32 {d7[0]}, [r1]!
+
+
+
+end_func_vert_r:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_horz_d
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Horizontal_Down
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_8x8_mode_horz_d_a9q
+
+ih264_intra_pred_luma_8x8_mode_horz_d_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vpush {d8-d15}
+
+ vld1.u8 {q0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {q1}, [r0]
+ vext.8 q2, q1, q1, #1
+ @ q1 = q0 shifted to left once
+ @ q2 = q1 shifted to left once
+ vaddl.u8 q10, d0, d2
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q11, #1
+ vqrshrun.s16 d6, q12, #2
+ vqrshrun.s16 d7, q13, #2
+ @Q2 has all FILT11 values
+ @Q3 has all FILT121 values
+ vmov.8 q4, q2
+ vmov.8 q5, q3
+ sub r6, r3, #6
+ vtrn.8 q4, q5 @
+ vmov.8 q6, q4
+ vmov.8 q7, q5
+ sub r5, r3, #4
+ vtrn.16 q6, q7
+ vext.8 q8, q3, q3, #14
+ @ROW 0
+ vst1.8 {d17}, [r1]
+ vst1.16 {d10[3]}, [r1], r3
+
+ @ROW 1
+ vst1.32 {d14[1]}, [r1]!
+ vst1.32 {d7[0]}, [r1], r5
+ @ROW 2
+ vst1.16 {d10[2]}, [r1]!
+ vst1.32 {d14[1]}, [r1]!
+ vst1.16 {d7[0]}, [r1], r6
+ @ROW 3
+ vst1.32 {d12[1]}, [r1]!
+ vst1.32 {d14[1]}, [r1], r5
+ @ROW 4
+ vst1.16 {d14[1]}, [r1]!
+ vst1.32 {d12[1]}, [r1]!
+ vst1.16 {d14[2]}, [r1], r6
+ @ROW 5
+ vst1.32 {d14[0]}, [r1]!
+ vst1.32 {d12[1]}, [r1], r5
+ @ROW 6
+ vst1.16 {d10[0]}, [r1]!
+ vst1.16 {d8[1]}, [r1]!
+ vst1.16 {d14[1]}, [r1]!
+ vst1.16 {d12[2]}, [r1], r6
+ @ROW 7
+ vst1.32 {d12[0]}, [r1]!
+ vst1.32 {d14[0]}, [r1], r5
+
+end_func_horz_d:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_vert_l
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Vertical_Left
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_vert_l_a9q
+
+ih264_intra_pred_luma_8x8_mode_vert_l_a9q:
+
+ stmfd sp!, {r4-r12, r14} @Restoring registers from stack
+ vpush {d8-d15}
+ add r0, r0, #9
+ vld1.u8 {q0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {q1}, [r0]
+ vext.8 q2, q1, q1, #1
+ vaddl.u8 q10, d0, d2
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q11, #1
+ vqrshrun.s16 d6, q12, #2
+ vext.8 q4, q2, q2, #1
+ vqrshrun.s16 d7, q13, #2
+ @Q2 has all FILT11 values
+ @Q3 has all FILT121 values
+
+ vext.8 q5, q3, q3, #1
+ @ROW 0,1
+ vst1.8 {d4}, [r1], r3
+ vst1.8 {d6}, [r1], r3
+
+ vext.8 q6, q4, q4, #1
+ vext.8 q7, q5, q5, #1
+ @ROW 2,3
+ vst1.8 {d8}, [r1], r3
+ vst1.8 {d10}, [r1], r3
+
+ vext.8 q8, q6, q6, #1
+ vext.8 q9, q7, q7, #1
+ @ROW 4,5
+ vst1.8 {d12}, [r1], r3
+ vst1.8 {d14}, [r1], r3
+ @ROW 6,7
+ vst1.8 {d16}, [r1], r3
+ vst1.8 {d18}, [r1], r3
+
+end_func_vert_l:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_horz_u
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Horizontal_Up
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_8x8_mode_horz_u_a9q
+
+ih264_intra_pred_luma_8x8_mode_horz_u_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vpush {d8-d15}
+
+ vld1.u8 {q0}, [r0]
+ vld1.u8 {d1[7]}, [r0]
+ vext.8 q1, q0, q0, #1
+ vext.8 q2, q1, q1, #1
+ @ LOADING V TABLE
+ ldr r12, scratch_intrapred_addr_8x8
+scrlb8x8l2:
+ add r12, r12, pc
+ vaddl.u8 q10, d0, d2
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+ vld1.u8 {q5}, [r12]
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q11, #1
+ vqrshrun.s16 d6, q12, #2
+ vqrshrun.s16 d7, q13, #2
+ @Q2 has all FILT11 values
+ @Q3 has all FILT121 values
+ vtbl.u8 d12, {q2, q3}, d10
+ vdup.u8 q7, d5[7] @
+ vtbl.u8 d13, {q2, q3}, d11
+ vext.8 q8, q6, q7, #2
+ vext.8 q9, q8, q7, #2
+ vst1.8 {d12}, [r1], r3
+ vext.8 q10, q9, q7, #2
+ vst1.8 {d16}, [r1], r3
+ vst1.8 {d18}, [r1], r3
+ vst1.8 {d20}, [r1], r3
+ vst1.8 {d13}, [r1], r3
+ vst1.8 {d17}, [r1], r3
+ vst1.8 {d19}, [r1], r3
+ vst1.8 {d21}, [r1], r3
+
+
+end_func_horz_u:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+
+