1 files changed, 213 insertions, 0 deletions
diff --git a/common/arm/ideint_cac_a9.s b/common/arm/ideint_cac_a9.s
new file mode 100644
index 0000000..964c5e6
--- /dev/null
+++ b/common/arm/ideint_cac_a9.s
@@ -0,0 +1,213 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+
+@******************************************************************************
+@*
+@* @brief
+@*  This file contains definitions of routines for spatial filter
+@*
+@* @author
+@*  Ittiam
+@*
+@* @par List of Functions:
+@*  - ideint_cac_8x8_a9()
+@*
+@* @remarks
+@*  None
+@*
+@*******************************************************************************
+
+
+@******************************************************************************
+@*
+@*  @brief Calculates Combing Artifact
+@*
+@*  @par   Description
+@*   This functions calculates combing artifact check (CAC) for given two fields
+@*
+@* @param[in] pu1_top
+@*  UWORD8 pointer to top field
+@*
+@* @param[in] pu1_bot
+@*  UWORD8 pointer to bottom field
+@*
+@* @param[in] top_strd
+@*  Top field stride
+@*
+@* @param[in] bot_strd
+@*  Bottom field stride
+@*
+@* @returns
+@*  None
+@*
+@* @remarks
+@*
+@******************************************************************************
+
+    .global ideint_cac_8x8_a9
+
+ideint_cac_8x8_a9:
+
+    stmfd       sp!,    {r4-r10, lr}
+
+    @ Load first row of top
+    vld1.u8     d28,    [r0],   r2
+
+    @ Load first row of bottom
+    vld1.u8     d29,    [r1],   r3
+
+    @ Load second row of top
+    vld1.u8     d30,    [r0],   r2
+
+    @ Load second row of bottom
+    vld1.u8     d31,    [r1],   r3
+
+
+    @ Calculate row based adj and alt values
+    @ Get row sums
+    vpaddl.u8   q0,     q14
+
+    vpaddl.u8   q1,     q15
+
+    vpaddl.u16  q0,     q0
+
+    vpaddl.u16  q1,     q1
+
+    @ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows
+    @ Pack q0 and q1 into a single register (sum does not exceed 16bits)
+
+    vshl.u32    q8,     q1,     #16
+    vorr.u32    q8,     q0,     q8
+    @ q8 now contains 8 sums
+
+    @ Load third row of top
+    vld1.u8     d24,    [r0],   r2
+
+    @ Load third row of bottom
+    vld1.u8     d25,    [r1],   r3
+
+    @ Load fourth row of top
+    vld1.u8     d26,    [r0],   r2
+
+    @ Load fourth row of bottom
+    vld1.u8     d27,    [r1],   r3
+
+    @ Get row sums
+    vpaddl.u8   q2,     q12
+
+    vpaddl.u8   q3,     q13
+
+    vpaddl.u16  q2,     q2
+
+    vpaddl.u16  q3,     q3
+    @ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows
+    @ Pack q2 and q3 into a single register (sum does not exceed 16bits)
+
+    vshl.u32    q9,     q3,     #16
+    vorr.u32    q9,     q2,     q9
+    @ q9 now contains 8 sums
+
+    @ Compute absolute diff between top and bottom row sums
+    vabd.u16    d16,    d16,    d17
+    vabd.u16    d17,    d18,    d19
+
+    @ RSUM_CSUM_THRESH
+    vmov.u16    q9,     #20
+
+    @ Eliminate values smaller than RSUM_CSUM_THRESH
+    vcge.u16    q10,    q8,     q9
+    vand.u16    q10,    q8,     q10
+    @ q10 now contains 8 absolute diff of sums above the threshold
+
+
+    @ Compute adj
+    vadd.u16    d20,    d20,    d21
+
+    @ d20 has four adj values for two sub-blocks
+
+    @ Compute alt
+    vabd.u32    q0,     q0,     q1
+    vabd.u32    q2,     q2,     q3
+
+    vadd.u32    q0,     q0,     q2
+    vadd.u32    d21,    d0,     d1
+    @ d21 has two values for two sub-blocks
+
+
+    @ Calculate column based adj and alt values
+
+    vrhadd.u8   q0,     q14,    q15
+    vrhadd.u8   q1,     q12,    q13
+    vrhadd.u8   q0,     q0,     q1
+
+    vabd.u8     d0,     d0,     d1
+
+    @ RSUM_CSUM_THRESH >> 2
+    vmov.u8     d9,     #5
+
+    @ Eliminate values smaller than RSUM_CSUM_THRESH >> 2
+    vcge.u8     d1,     d0,     d9
+    vand.u8     d0,     d0,     d1
+    @ d0 now contains 8 absolute diff of sums above the threshold
+
+
+    vpaddl.u8   d0,     d0
+    vshl.u16    d0,     d0,     #2
+
+    @ Add row based adj
+    vadd.u16    d20,    d0,     d20
+
+    vpaddl.u16  d20,    d20
+    @ d20 now contains 2 adj values
+
+
+    vrhadd.u8   d0,     d28,    d29
+    vrhadd.u8   d2,     d24,    d25
+    vrhadd.u8   d0,     d0,     d2
+
+    vrhadd.u8   d1,     d30,    d31
+    vrhadd.u8   d3,     d26,    d27
+    vrhadd.u8   d1,     d1,     d3
+
+    vabd.u8     d0,     d0,     d1
+    vpaddl.u8   d0,     d0
+
+    vshl.u16    d0,     d0,     #2
+    vpaddl.u16  d0,     d0
+    vadd.u32    d21,    d0,     d21
+
+
+    @ d21 now contains 2 alt values
+
+    @ SAD_BIAS_MULT_SHIFT
+    vshr.u32    d0,     d21,    #3
+    vadd.u32    d21,    d21,    d0
+
+    @ SAD_BIAS_ADDITIVE >> 1
+    vmov.u32    d0,     #4
+    vadd.u32    d21,    d21,    d0
+
+    vclt.u32    d0,     d21,    d20
+    vpaddl.u32  d0,     d0
+
+    vmov.u32    r0,     d0[0]
+    cmp         r0,     #0
+    movne       r0,     #1
+    ldmfd       sp!,    {r4-r10, pc}