summaryrefslogtreecommitdiffstats
path: root/common/arm/ideint_cac_a9.s
diff options
context:
space:
mode:
Diffstat (limited to 'common/arm/ideint_cac_a9.s')
-rw-r--r--common/arm/ideint_cac_a9.s213
1 files changed, 213 insertions, 0 deletions
diff --git a/common/arm/ideint_cac_a9.s b/common/arm/ideint_cac_a9.s
new file mode 100644
index 0000000..964c5e6
--- /dev/null
+++ b/common/arm/ideint_cac_a9.s
@@ -0,0 +1,213 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+
+@******************************************************************************
+@*
+@* @brief
+@* This file contains definitions of routines for spatial filter
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@* - ideint_cac_8x8_a9()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+
+
+@******************************************************************************
+@*
+@* @brief Calculates Combing Artifact
+@*
+@* @par Description
+@* This functions calculates combing artifact check (CAC) for given two fields
+@*
+@* @param[in] pu1_top
+@* UWORD8 pointer to top field
+@*
+@* @param[in] pu1_bot
+@* UWORD8 pointer to bottom field
+@*
+@* @param[in] top_strd
+@* Top field stride
+@*
+@* @param[in] bot_strd
+@* Bottom field stride
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@*
+@******************************************************************************
+
+ .global ideint_cac_8x8_a9
+
+ideint_cac_8x8_a9:
+
+ stmfd sp!, {r4-r10, lr}
+
+ @ Load first row of top
+ vld1.u8 d28, [r0], r2
+
+ @ Load first row of bottom
+ vld1.u8 d29, [r1], r3
+
+ @ Load second row of top
+ vld1.u8 d30, [r0], r2
+
+ @ Load second row of bottom
+ vld1.u8 d31, [r1], r3
+
+
+ @ Calculate row based adj and alt values
+ @ Get row sums
+ vpaddl.u8 q0, q14
+
+ vpaddl.u8 q1, q15
+
+ vpaddl.u16 q0, q0
+
+ vpaddl.u16 q1, q1
+
+ @ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows
+ @ Pack q0 and q1 into a single register (sum does not exceed 16bits)
+
+ vshl.u32 q8, q1, #16
+ vorr.u32 q8, q0, q8
+ @ q8 now contains 8 sums
+
+ @ Load third row of top
+ vld1.u8 d24, [r0], r2
+
+ @ Load third row of bottom
+ vld1.u8 d25, [r1], r3
+
+ @ Load fourth row of top
+ vld1.u8 d26, [r0], r2
+
+ @ Load fourth row of bottom
+ vld1.u8 d27, [r1], r3
+
+ @ Get row sums
+ vpaddl.u8 q2, q12
+
+ vpaddl.u8 q3, q13
+
+ vpaddl.u16 q2, q2
+
+ vpaddl.u16 q3, q3
+ @ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows
+ @ Pack q2 and q3 into a single register (sum does not exceed 16bits)
+
+ vshl.u32 q9, q3, #16
+ vorr.u32 q9, q2, q9
+ @ q9 now contains 8 sums
+
+ @ Compute absolute diff between top and bottom row sums
+ vabd.u16 d16, d16, d17
+ vabd.u16 d17, d18, d19
+
+ @ RSUM_CSUM_THRESH
+ vmov.u16 q9, #20
+
+ @ Eliminate values smaller than RSUM_CSUM_THRESH
+ vcge.u16 q10, q8, q9
+ vand.u16 q10, q8, q10
+ @ q10 now contains 8 absolute diff of sums above the threshold
+
+
+ @ Compute adj
+ vadd.u16 d20, d20, d21
+
+ @ d20 has four adj values for two sub-blocks
+
+ @ Compute alt
+ vabd.u32 q0, q0, q1
+ vabd.u32 q2, q2, q3
+
+ vadd.u32 q0, q0, q2
+ vadd.u32 d21, d0, d1
+ @ d21 has two values for two sub-blocks
+
+
+ @ Calculate column based adj and alt values
+
+ vrhadd.u8 q0, q14, q15
+ vrhadd.u8 q1, q12, q13
+ vrhadd.u8 q0, q0, q1
+
+ vabd.u8 d0, d0, d1
+
+ @ RSUM_CSUM_THRESH >> 2
+ vmov.u8 d9, #5
+
+ @ Eliminate values smaller than RSUM_CSUM_THRESH >> 2
+ vcge.u8 d1, d0, d9
+ vand.u8 d0, d0, d1
+ @ d0 now contains 8 absolute diff of sums above the threshold
+
+
+ vpaddl.u8 d0, d0
+ vshl.u16 d0, d0, #2
+
+ @ Add row based adj
+ vadd.u16 d20, d0, d20
+
+ vpaddl.u16 d20, d20
+ @ d20 now contains 2 adj values
+
+
+ vrhadd.u8 d0, d28, d29
+ vrhadd.u8 d2, d24, d25
+ vrhadd.u8 d0, d0, d2
+
+ vrhadd.u8 d1, d30, d31
+ vrhadd.u8 d3, d26, d27
+ vrhadd.u8 d1, d1, d3
+
+ vabd.u8 d0, d0, d1
+ vpaddl.u8 d0, d0
+
+ vshl.u16 d0, d0, #2
+ vpaddl.u16 d0, d0
+ vadd.u32 d21, d0, d21
+
+
+ @ d21 now contains 2 alt values
+
+ @ SAD_BIAS_MULT_SHIFT
+ vshr.u32 d0, d21, #3
+ vadd.u32 d21, d21, d0
+
+ @ SAD_BIAS_ADDITIVE >> 1
+ vmov.u32 d0, #4
+ vadd.u32 d21, d21, d0
+
+ vclt.u32 d0, d21, d20
+ vpaddl.u32 d0, d0
+
+ vmov.u32 r0, d0[0]
+ cmp r0, #0
+ movne r0, #1
+ ldmfd sp!, {r4-r10, pc}