diff options
author | Harish Mahendrakar <harish.mahendrakar@ittiam.com> | 2015-08-13 10:59:13 +0530 |
---|---|---|
committer | Marco Nelissen <marcone@google.com> | 2015-10-16 20:15:49 +0000 |
commit | 8520690e241ad92a07893e198b2df417fe045d79 (patch) | |
tree | 1fd034f495a58663a5423622c0112a0ec63239c7 | |
parent | 903fd2be3063609dd3cd7ad0691de46c1b52b7d1 (diff) | |
download | android_external_libmpeg2-8520690e241ad92a07893e198b2df417fe045d79.tar.gz android_external_libmpeg2-8520690e241ad92a07893e198b2df417fe045d79.tar.bz2 android_external_libmpeg2-8520690e241ad92a07893e198b2df417fe045d79.zip |
Added deinterlacer
Decoder output is deinterlaced if a picture is signalled as interlaced
Added SIMD optimizations for arm, armv8 and x86/x64
Bug: 20932810
Change-Id: I6079922f4fc8f1d3680e5169a4d8e70efe8ea471
58 files changed, 6457 insertions, 27 deletions
diff --git a/common/arm/icv_platform_macros.h b/common/arm/icv_platform_macros.h new file mode 100644 index 0000000..db092a3 --- /dev/null +++ b/common/arm/icv_platform_macros.h @@ -0,0 +1,115 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_platform_macros.h +* +* @brief +* This header files contains all the platform/toolchain specific macros +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef __ICV_PLATFORM_MACROS_H__ +#define __ICV_PLATFORM_MACROS_H__ + +static __inline UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return (__builtin_clz(u4_word)); + else + return 32; +} + +static __inline UWORD32 CLZNZ(UWORD32 u4_word) +{ + ASSERT(u4_word); + return (__builtin_clz(u4_word)); +} + +static __inline WORD32 CLIP_U8(WORD32 x) +{ + asm("usat %0, #8, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S8(WORD32 x) +{ + asm("ssat %0, #8, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U12(WORD32 x) +{ + asm("usat %0, #12, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_S12(WORD32 x) +{ + asm("ssat %0, #12, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline WORD32 CLIP_U16(WORD32 x) +{ + asm("usat %0, #16, %1" : "=r"(x) : "r"(x)); + return x; +} +static __inline WORD32 CLIP_S16(WORD32 x) +{ + asm("ssat %0, #16, %1" : "=r"(x) : "r"(x)); + return x; +} + +static __inline UWORD32 ITT_BIG_ENDIAN(UWORD32 x) +{ + asm("rev %0, %1" : "=r"(x) : "r"(x)); + return x; +} + +#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");} + +#define PREFETCH(x) __builtin_prefetch(x); + +#define DATA_SYNC() __sync_synchronize() + +#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0) +#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0) + +#define SHR_NEG(val,shift) (((shift) > 0) ? ( (val) >> (shift)) : ((val) << (-(shift)))) +#define SHL_NEG(val,shift) (((shift) > 0) ? ( (val) >> (-(shift))) : ((val) << (shift))) + +#define INLINE inline + +#define MEM_ALIGN8 __attribute__ ((aligned (8))) +#define MEM_ALIGN16 __attribute__ ((aligned (16))) +#define MEM_ALIGN32 __attribute__ ((aligned (32))) + + +#endif /* __ICV_PLATFORM_MACROS_H__ */ diff --git a/common/arm/icv_sad_a9.s b/common/arm/icv_sad_a9.s new file mode 100644 index 0000000..097070e --- /dev/null +++ b/common/arm/icv_sad_a9.s @@ -0,0 +1,103 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ + + +@****************************************************************************** +@* +@* +@* @brief +@* This file contains definitions of routines for SAD caclulation +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* - icv_sad_8x4_a9() +@* +@* @remarks +@* None +@* +@******************************************************************************* + + +@****************************************************************************** +@* +@* @brief computes distortion (SAD) between 2 8x4 blocks +@* +@* +@* @par Description +@* This functions computes SAD between 2 8x4 blocks. +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[out] pu1_ref +@* UWORD8 pointer to the reference buffer +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] ref_strd +@* integer reference stride +@* +@* @param[in] wd +@* Width (assumed to be 8) +@* +@* @param[in] ht +@* Height (assumed to be 4) +@* +@* @returns +@* SAD value in r0 +@* +@* @remarks +@* +@****************************************************************************** + + .global icv_sad_8x4_a9 + +icv_sad_8x4_a9: + + push {lr} + + vld1.8 d4, [r0], r2 + vld1.8 d5, [r1], r3 + + vld1.8 d6, [r0], r2 + vabdl.u8 q0, d5, d4 + + vld1.8 d7, [r1], r3 + vabal.u8 q0, d7, d6 + + vld1.8 d4, [r0], r2 + vld1.8 d5, [r1], r3 + + vld1.8 d6, [r0], r2 + vabal.u8 q0, d5, d4 + + vld1.8 d7, [r1], r3 + vabal.u8 q0, d7, d6 + + vadd.i16 d0, d1, d0 + vpaddl.u16 d0, d0 + vpaddl.u32 d0, d0 + + vmov.32 r0, d0[0] + + pop {pc} diff --git a/common/arm/icv_variance_a9.s b/common/arm/icv_variance_a9.s new file mode 100644 index 0000000..00795e7 --- /dev/null +++ b/common/arm/icv_variance_a9.s @@ -0,0 +1,120 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ + + +@****************************************************************************** +@* +@* +@* @brief +@* This file contains definitions of routines for variance caclulation +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* - icv_variance_8x4_a9() +@* +@* @remarks +@* None +@* +@******************************************************************************* + + +@****************************************************************************** +@* +@* @brief computes variance of a 8x4 block +@* +@* +@* @par Description +@* This functions computes variance of a 8x4 block +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[in] src_strd +@* integer source stride +@* +@* @param[in] wd +@* Width (assumed to be 8) +@* +@* @param[in] ht +@* Height (assumed to be 4) +@* +@* @returns +@* variance value in r0 +@* +@* @remarks +@* +@****************************************************************************** + + .global icv_variance_8x4_a9 + +icv_variance_8x4_a9: + + push {lr} + + @ Load 8x4 source + vld1.8 d0, [r0], r1 + vld1.8 d1, [r0], r1 + vld1.8 d2, [r0], r1 + vld1.8 d3, [r0], r1 + + @ Calculate Sum(values) + vaddl.u8 q2, d0, d1 + vaddl.u8 q3, d2, d3 + vadd.u16 q2, q2, q3 + + vadd.u16 d4, d4, d5 + vpadd.u16 d4, d4, d4 + vpadd.u16 d4, d4, d4 + + @ Calculate SumOfSquares + vmull.u8 q10, d0, d0 + vmull.u8 q11, d1, d1 + vmull.u8 q12, d2, d2 + vmull.u8 q13, d3, d3 + + vaddl.u16 q10, d20, d21 + vaddl.u16 q11, d22, d23 + vaddl.u16 q12, d24, d25 + vaddl.u16 q13, d26, d27 + + vadd.u32 q10, q10, q11 + vadd.u32 q11, q12, q13 + vadd.u32 q10, q10, q11 + vadd.u32 d20, d20, d21 + vpadd.u32 d20, d20, d20 + + @ Sum(values) + vmov.u16 r0, d4[0] + + @ SumOfSquares + vmov.u32 r1, d20[0] + + @ SquareOfSums + mul r3, r0, r0 + + @ SumOfSquares * 8 * 4 - SquareOfSums + rsb r0, r3, r1, LSL #5 + + @ Divide by 32 * 32 + + mov r0, r0, ASR #10 + pop {pc} diff --git a/common/arm/ideint_cac_a9.s b/common/arm/ideint_cac_a9.s new file mode 100644 index 0000000..964c5e6 --- /dev/null +++ b/common/arm/ideint_cac_a9.s @@ -0,0 +1,213 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ + +@****************************************************************************** +@* +@* @brief +@* This file contains definitions of routines for spatial filter +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* - ideint_cac_8x8_a9() +@* +@* @remarks +@* None +@* +@******************************************************************************* + + +@****************************************************************************** +@* +@* @brief Calculates Combing Artifact +@* +@* @par Description +@* This functions calculates combing artifact check (CAC) for given two fields +@* +@* @param[in] pu1_top +@* UWORD8 pointer to top field +@* +@* @param[in] pu1_bot +@* UWORD8 pointer to bottom field +@* +@* @param[in] top_strd +@* Top field stride +@* +@* @param[in] bot_strd +@* Bottom field stride +@* +@* @returns +@* None +@* +@* @remarks +@* +@****************************************************************************** + + .global ideint_cac_8x8_a9 + +ideint_cac_8x8_a9: + + stmfd sp!, {r4-r10, lr} + + @ Load first row of top + vld1.u8 d28, [r0], r2 + + @ Load first row of bottom + vld1.u8 d29, [r1], r3 + + @ Load second row of top + vld1.u8 d30, [r0], r2 + + @ Load second row of bottom + vld1.u8 d31, [r1], r3 + + + @ Calculate row based adj and alt values + @ Get row sums + vpaddl.u8 q0, q14 + + vpaddl.u8 q1, q15 + + vpaddl.u16 q0, q0 + + vpaddl.u16 q1, q1 + + @ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows + @ Pack q0 and q1 into a single register (sum does not exceed 16bits) + + vshl.u32 q8, q1, #16 + vorr.u32 q8, q0, q8 + @ q8 now contains 8 sums + + @ Load third row of top + vld1.u8 d24, [r0], r2 + + @ Load third row of bottom + vld1.u8 d25, [r1], r3 + + @ Load fourth row of top + vld1.u8 d26, [r0], r2 + + @ Load fourth row of bottom + vld1.u8 d27, [r1], r3 + + @ Get row sums + vpaddl.u8 q2, q12 + + vpaddl.u8 q3, q13 + + vpaddl.u16 q2, q2 + + vpaddl.u16 q3, q3 + @ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows + @ Pack q2 and q3 into a single register (sum does not exceed 16bits) + + vshl.u32 q9, q3, #16 + vorr.u32 q9, q2, q9 + @ q9 now contains 8 sums + + @ Compute absolute diff between top and bottom row sums + vabd.u16 d16, d16, d17 + vabd.u16 d17, d18, d19 + + @ RSUM_CSUM_THRESH + vmov.u16 q9, #20 + + @ Eliminate values smaller than RSUM_CSUM_THRESH + vcge.u16 q10, q8, q9 + vand.u16 q10, q8, q10 + @ q10 now contains 8 absolute diff of sums above the threshold + + + @ Compute adj + vadd.u16 d20, d20, d21 + + @ d20 has four adj values for two sub-blocks + + @ Compute alt + vabd.u32 q0, q0, q1 + vabd.u32 q2, q2, q3 + + vadd.u32 q0, q0, q2 + vadd.u32 d21, d0, d1 + @ d21 has two values for two sub-blocks + + + @ Calculate column based adj and alt values + + vrhadd.u8 q0, q14, q15 + vrhadd.u8 q1, q12, q13 + vrhadd.u8 q0, q0, q1 + + vabd.u8 d0, d0, d1 + + @ RSUM_CSUM_THRESH >> 2 + vmov.u8 d9, #5 + + @ Eliminate values smaller than RSUM_CSUM_THRESH >> 2 + vcge.u8 d1, d0, d9 + vand.u8 d0, d0, d1 + @ d0 now contains 8 absolute diff of sums above the threshold + + + vpaddl.u8 d0, d0 + vshl.u16 d0, d0, #2 + + @ Add row based adj + vadd.u16 d20, d0, d20 + + vpaddl.u16 d20, d20 + @ d20 now contains 2 adj values + + + vrhadd.u8 d0, d28, d29 + vrhadd.u8 d2, d24, d25 + vrhadd.u8 d0, d0, d2 + + vrhadd.u8 d1, d30, d31 + vrhadd.u8 d3, d26, d27 + vrhadd.u8 d1, d1, d3 + + vabd.u8 d0, d0, d1 + vpaddl.u8 d0, d0 + + vshl.u16 d0, d0, #2 + vpaddl.u16 d0, d0 + vadd.u32 d21, d0, d21 + + + @ d21 now contains 2 alt values + + @ SAD_BIAS_MULT_SHIFT + vshr.u32 d0, d21, #3 + vadd.u32 d21, d21, d0 + + @ SAD_BIAS_ADDITIVE >> 1 + vmov.u32 d0, #4 + vadd.u32 d21, d21, d0 + + vclt.u32 d0, d21, d20 + vpaddl.u32 d0, d0 + + vmov.u32 r0, d0[0] + cmp r0, #0 + movne r0, #1 + ldmfd sp!, {r4-r10, pc} diff --git a/common/arm/ideint_function_selector.c b/common/arm/ideint_function_selector.c new file mode 100644 index 0000000..920a8eb --- /dev/null +++ b/common/arm/ideint_function_selector.c @@ -0,0 +1,134 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_function_selector.c +* +* @brief +* This file contains the function selector related code +* +* @author +* Ittiam +* +* @par List of Functions: +* ih264e_init_function_ptr +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" +#include "ideint_function_selector.h" + +/** +******************************************************************************* +* +* @brief +* Call corresponding function pointer initialization function +* +* @par Description +* Call corresponding function pointer initialization function +* +* @param[in] ps_ctxt +* Context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ideint_init_function_ptr(ctxt_t *ps_ctxt) +{ + ideint_init_function_ptr_generic(ps_ctxt); + + switch(ps_ctxt->s_params.e_arch) + { +#if defined(ARMV8) + default: + ideint_init_function_ptr_av8(ps_ctxt); + break; +#elif !defined(DISABLE_NEON) + case ICV_ARM_NONEON: + break; + case ICV_ARM_A5: + case ICV_ARM_A7: + case ICV_ARM_A9: + case ICV_ARM_A15: + case ICV_ARM_A9Q: + default: + ideint_init_function_ptr_a9(ps_ctxt); + break; +#else + default: + break; +#endif + } + +} + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +ICV_ARCH_T ideint_default_arch(void) +{ + return ICV_ARM_A9Q; +} + diff --git a/common/arm/ideint_function_selector_a9.c b/common/arm/ideint_function_selector_a9.c new file mode 100644 index 0000000..58939c9 --- /dev/null +++ b/common/arm/ideint_function_selector_a9.c @@ -0,0 +1,92 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_function_selector.c +* +* @brief +* This file contains the function selector related code +* +* @author +* Ittiam +* +* @par List of Functions: +* ih264e_init_function_ptr +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" + + +/** +******************************************************************************* +* +* @brief +* Initialize function pointers based on the architecture +* +* @par Description +* Initialize function pointers based on the architecture +* +* @param[in] ps_ctxt +* Context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ideint_init_function_ptr_a9(ctxt_t *ps_ctxt) +{ + ps_ctxt->pf_sad_8x4 = icv_sad_8x4_a9; + ps_ctxt->pf_variance_8x4 = icv_variance_8x4_a9; + ps_ctxt->pf_spatial_filter = ideint_spatial_filter_a9; + ps_ctxt->pf_cac_8x8 = ideint_cac_8x8_a9; + return; +} + diff --git a/common/arm/ideint_function_selector_av8.c b/common/arm/ideint_function_selector_av8.c new file mode 100644 index 0000000..7e433c4 --- /dev/null +++ b/common/arm/ideint_function_selector_av8.c @@ -0,0 +1,92 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_function_selector.c +* +* @brief +* This file contains the function selector related code +* +* @author +* Ittiam +* +* @par List of Functions: +* ih264e_init_function_ptr +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" + + +/** +******************************************************************************* +* +* @brief +* Initialize function pointers based on the architecture +* +* @par Description +* Initialize function pointers based on the architecture +* +* @param[in] ps_ctxt +* Context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ideint_init_function_ptr_av8(ctxt_t *ps_ctxt) +{ + ps_ctxt->pf_sad_8x4 = icv_sad_8x4_av8; + ps_ctxt->pf_variance_8x4 = icv_variance_8x4_av8; + ps_ctxt->pf_spatial_filter = ideint_spatial_filter_av8; + ps_ctxt->pf_cac_8x8 = ideint_cac_8x8_av8; + return; +} + diff --git a/common/arm/ideint_spatial_filter_a9.s b/common/arm/ideint_spatial_filter_a9.s new file mode 100644 index 0000000..f7fa796 --- /dev/null +++ b/common/arm/ideint_spatial_filter_a9.s @@ -0,0 +1,223 @@ +@/****************************************************************************** +@ * +@ * Copyright (C) 2015 The Android Open Source Project +@ * +@ * Licensed under the Apache License, Version 2.0 (the "License"); +@ * you may not use this file except in compliance with the License. +@ * You may obtain a copy of the License at: +@ * +@ * http://www.apache.org/licenses/LICENSE-2.0 +@ * +@ * Unless required by applicable law or agreed to in writing, software +@ * distributed under the License is distributed on an "AS IS" BASIS, +@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ * See the License for the specific language governing permissions and +@ * limitations under the License. +@ * +@ ***************************************************************************** +@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +@*/ + +@****************************************************************************** +@* +@* @brief +@* This file contains definitions of routines for spatial filter +@* +@* @author +@* Ittiam +@* +@* @par List of Functions: +@* - ideint_spatial_filter_a9() +@* +@* @remarks +@* None +@* +@******************************************************************************* + + +@****************************************************************************** +@* +@* @brief Performs spatial filtering +@* +@* @par Description +@* This functions performs edge adaptive spatial filtering on a 8x8 block +@* +@* @param[in] pu1_src +@* UWORD8 pointer to the source +@* +@* @param[in] pu1_out +@* UWORD8 pointer to the destination +@* +@* @param[in] src_strd +@* source stride +@* +@* @param[in] src_strd +@* destination stride +@* +@* @returns +@* None +@* +@* @remarks +@* +@****************************************************************************** + + .global ideint_spatial_filter_a9 + +ideint_spatial_filter_a9: + + stmfd sp!, {r4-r10, lr} + + vmov.u16 q8, #0 + vmov.u16 q9, #0 + vmov.u16 q10, #0 + + @ Backup r0 + mov r10, r0 + + @ Load from &pu1_row_1[0] + sub r5, r0, #1 + vld1.8 d0, [r0], r2 + + @ Load from &pu1_row_1[-1] + vld1.8 d1, [r5] + add r5, r5, #2 + + @ Load from &pu1_row_1[1] + vld1.8 d2, [r5] + + @ Number of rows + mov r4, #4 + + @ EDGE_BIAS_0 + vmov.u32 d30, #5 + + @ EDGE_BIAS_1 + vmov.u32 d31, #7 + +detect_edge: + @ Load from &pu1_row_2[0] + sub r5, r0, #1 + vld1.8 d3, [r0], r2 + + @ Load from &pu1_row_2[-1] + vld1.8 d4, [r5] + add r5, r5, #2 + + @ Load from &pu1_row_2[1] + vld1.8 d5, [r5] + + @ Calculate absolute differences + @ pu1_row_1[i] - pu1_row_2[i] + vabal.u8 q8, d0, d3 + + @ pu1_row_1[i - 1] - pu1_row_2[i + 1] + vabal.u8 q9, d1, d5 + + @ pu1_row_1[i + 1] - pu1_row_2[i - 1] + vabal.u8 q10, d4, d2 + + vmov d0, d3 + vmov d1, d4 + vmov d2, d5 + + subs r4, r4, #1 + bgt detect_edge + + @ Calculate sum of absolute differeces for each edge + vpadd.u16 d16, d16, d17 + vpadd.u16 d18, d18, d19 + vpadd.u16 d20, d20, d21 + + vpaddl.u16 d16, d16 + vpaddl.u16 d18, d18 + vpaddl.u16 d20, d20 + + @ adiff[0] *= EDGE_BIAS_0; + vmul.u32 d16, d16, d30 + + @ adiff[1] *= EDGE_BIAS_1; + vmul.u32 d18, d18, d31 + + @ adiff[2] *= EDGE_BIAS_1; + vmul.u32 d20, d20, d31 + + @ Move the differences to ARM registers + + + @ Compute shift for first half of the block +compute_shift_1: + vmov.u32 r5, d16[0] + vmov.u32 r6, d18[0] + vmov.u32 r7, d20[0] + + @ Compute shift + mov r8, #0 + + @ adiff[2] <= adiff[1] + cmp r7, r6 + bgt dir_45_gt_135_1 + + @ adiff[2] <= adiff[0] + cmp r7, r5 + movle r8, #1 + + b compute_shift_2 +dir_45_gt_135_1: + + @ adiff[1] <= adiff[0] + cmp r6, r5 + @ Move -1 if less than or equal to + mvnle r8, #0 + + +compute_shift_2: + @ Compute shift for first half of the block + vmov.u32 r5, d16[1] + vmov.u32 r6, d18[1] + vmov.u32 r7, d20[1] + + @ Compute shift + mov r9, #0 + + @ adiff[2] <= adiff[1] + cmp r7, r6 + bgt dir_45_gt_135_2 + + @ adiff[2] <= adiff[0] + cmp r7, r5 + movle r9, #1 + + b interpolate +dir_45_gt_135_2: + + @ adiff[1] <= adiff[0] + cmp r6, r5 + + @ Move -1 if less than or equal to + mvnle r9, #0 + +interpolate: + add r4, r10, r8 + add r5, r10, r2 + sub r5, r5, r8 + + add r10, r10, #4 + add r6, r10, r9 + add r7, r10, r2 + sub r7, r7, r9 + mov r8, #4 + +filter_loop: + vld1.u32 d0[0], [r4], r2 + vld1.u32 d2[0], [r5], r2 + + vld1.u32 d0[1], [r6], r2 + vld1.u32 d2[1], [r7], r2 + + vrhadd.u8 d4, d0, d2 + vst1.u32 d4, [r1], r3 + + subs r8, #1 + bgt filter_loop + + ldmfd sp!, {r4-r10, pc} diff --git a/common/armv8/icv_platform_macros.h b/common/armv8/icv_platform_macros.h new file mode 100644 index 0000000..3602688 --- /dev/null +++ b/common/armv8/icv_platform_macros.h @@ -0,0 +1,93 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_platform_macros.h +* +* @brief +* This header files contains all the platform/toolchain specific macros +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef __ICV_PLATFORM_MACROS_H__ +#define __ICV_PLATFORM_MACROS_H__ + +#define INLINE inline + +static INLINE UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return (__builtin_clz(u4_word)); + else + return 32; +} + +static __inline UWORD32 CLZNZ(UWORD32 u4_word) +{ + ASSERT(u4_word); + return (__builtin_clz(u4_word)); +} + +#define CLIP_U8(x) ((x) > 255) ? (255) : (((x) < 0) ? (0) : (x)) +#define CLIP_S8(x) ((x) > 127) ? (127) : (((x) < -128) ? (-128) : (x)) + +#define CLIP_U12(x) ((x) > 4095) ? (4095) : (((x) < 0) ? (0) : (x)) +#define CLIP_S12(x) ((x) > 2047) ? (2047) : (((x) < -2048) ? (-2048) : (x)) + +#define CLIP_U16(x) ((x) > 65535) ? (65535) : (((x) < 0) ? (0) : (x)) +#define CLIP_S16(x) ((x) > 32767) ? (32767) : (((x) < -32768) ? (-32768) : (x)) + +#define ITT_BIG_ENDIAN(x) __asm__("rev %0, %1" : "=r"(x) : "r"(x)); + +#define NOP(nop_cnt) \ +{ \ + UWORD32 nop_i; \ + for (nop_i = 0; nop_i < nop_cnt; nop_i++) \ + __asm__ __volatile__("mov x0, x0"); \ +} + + +#define PREFETCH(x) __builtin_prefetch(x); + +#define DATA_SYNC() __sync_synchronize() + +#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0) +#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0) + +#define SHR_NEG(val,shift) (((shift) > 0) ? ( (val) >> (shift)) : ((val) << (-(shift)))) +#define SHL_NEG(val,shift) (((shift) > 0) ? ( (val) >> (-(shift))) : ((val) << (shift))) + +#define INLINE inline + +#define MEM_ALIGN8 __attribute__ ((aligned (8))) +#define MEM_ALIGN16 __attribute__ ((aligned (16))) +#define MEM_ALIGN32 __attribute__ ((aligned (32))) + + +#endif /* __ICV_PLATFORM_MACROS_H__ */ diff --git a/common/armv8/icv_sad_av8.s b/common/armv8/icv_sad_av8.s new file mode 100644 index 0000000..7bc1ffd --- /dev/null +++ b/common/armv8/icv_sad_av8.s @@ -0,0 +1,100 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ + + +//****************************************************************************** +//* +//* +//* @brief +//* This file contains definitions of routines for SAD caclulation +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* - icv_sad_8x4_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* + + +//****************************************************************************** +//* +//* @brief computes distortion (SAD) between 2 8x4 blocks +//* +//* +//* @par Description +//* This functions computes SAD between 2 8x4 blocks. +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_ref +//* UWORD8 pointer to the reference buffer +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] ref_strd +//* integer reference stride +//* +//* @param[in] wd +//* Width (assumed to be 8) +//* +//* @param[in] ht +//* Height (assumed to be 4) +//* +//* @returns +//* SAD value in r0 +//* +//* @remarks +//* +//****************************************************************************** + + .global icv_sad_8x4_av8 + +icv_sad_8x4_av8: + + // Load 8x4 source + ld1 {v0.8b}, [x0], x2 + ld1 {v1.8b}, [x0], x2 + ld1 {v2.8b}, [x0], x2 + ld1 {v3.8b}, [x0], x2 + + // Load 8x4 reference + ld1 {v4.8b}, [x1], x3 + ld1 {v5.8b}, [x1], x3 + ld1 {v6.8b}, [x1], x3 + ld1 {v7.8b}, [x1], x3 + + uabdl v0.8h, v0.8b, v4.8b + uabal v0.8h, v1.8b, v5.8b + uabal v0.8h, v2.8b, v6.8b + uabal v0.8h, v3.8b, v7.8b + + addp v0.8h, v0.8h, v0.8h + addp v0.8h, v0.8h, v0.8h + addp v0.8h, v0.8h, v0.8h + + smov x0, v0.8h[0] + + ret diff --git a/common/armv8/icv_variance_av8.s b/common/armv8/icv_variance_av8.s new file mode 100644 index 0000000..3caa148 --- /dev/null +++ b/common/armv8/icv_variance_av8.s @@ -0,0 +1,118 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ + +//****************************************************************************** +//* +//* +//* @brief +//* This file contains definitions of routines for variance caclulation +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* - icv_variance_8x4_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* + + +//****************************************************************************** +//* +//* @brief computes variance of a 8x4 block +//* +//* +//* @par Description +//* This functions computes variance of a 8x4 block +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] wd +//* Width (assumed to be 8) +//* +//* @param[in] ht +//* Height (assumed to be 4) +//* +//* @returns +//* variance value in x0 +//* +//* @remarks +//* +//****************************************************************************** + + .global icv_variance_8x4_av8 + +icv_variance_8x4_av8: + + // Load 8x4 source + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x0], x1 + ld1 {v2.8b}, [x0], x1 + ld1 {v3.8b}, [x0], x1 + + // Calculate Sum(values) + uaddl v4.8h, v0.8b, v1.8b + uaddl v6.8h, v2.8b, v3.8b + add v4.8h, v4.8h, v6.8h + + addp v4.8h, v4.8h, v4.8h + addp v4.4h, v4.4h, v4.4h + addp v4.4h, v4.4h, v4.4h + + // Calculate SumOfSquares + umull v20.8h, v0.8b, v0.8b + umull v22.8h, v1.8b, v1.8b + umull v24.8h, v2.8b, v2.8b + umull v26.8h, v3.8b, v3.8b + + uaddl v21.4s, v20.4h, v22.4h + uaddl v25.4s, v24.4h, v26.4h + uaddl2 v20.4s, v20.8h, v22.8h + uaddl2 v24.4s, v24.8h, v26.8h + + add v20.4s, v20.4s, v21.4s + add v22.4s, v24.4s, v25.4s + add v20.4s, v20.4s, v22.4s + addp v20.4s, v20.4s, v20.4s + addp v20.2s, v20.2s, v20.2s + + // Sum(values) + smov x0, v4.4h[0] + + // SumOfSquares + smov x1, v20.2s[0] + + // SquareOfSums + mul x3, x0, x0 + + // SumOfSquares * 8 * 4 - SquareOfSums + sub x1, x3, x1, LSL #5 + neg x0, x1 + + // Divide by 32 * 32 + + ASR x0, x0, #10 + ret diff --git a/common/armv8/ideint_cac_av8.s b/common/armv8/ideint_cac_av8.s new file mode 100644 index 0000000..76c22b7 --- /dev/null +++ b/common/armv8/ideint_cac_av8.s @@ -0,0 +1,225 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ + +//****************************************************************************** +//* +//* @brief +//* This file contains definitions of routines for spatial filter +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* - ideint_cac_8x8_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* + + +//****************************************************************************** +//* +//* @brief Calculates Combing Artifact +//* +//* @par Description +//* This functions calculates combing artifact check (CAC) for given two fields +//* +//* @param[in] pu1_top +//* UWORD8 pointer to top field +//* +//* @param[in] pu1_bot +//* UWORD8 pointer to bottom field +//* +//* @param[in] top_strd +//* Top field stride +//* +//* @param[in] bot_strd +//* Bottom field stride +//* +//* @returns +//* None +//* +//* @remarks +//* +//****************************************************************************** + + .global ideint_cac_8x8_av8 + +ideint_cac_8x8_av8: + + // Load first row of top + ld1 {v28.8b}, [x0], x2 + + // Load first row of bottom + ld1 {v29.8b}, [x1], x3 + mov v28.d[1], v29.d[0] + + // Load second row of top + ld1 {v30.8b}, [x0], x2 + + // Load second row of bottom + ld1 {v31.8b}, [x1], x3 + mov v30.d[1], v31.d[0] + + + // Calculate row based adj and alt values + // Get row sums + uaddlp v0.8h, v28.16b + + uaddlp v2.8h, v30.16b + + uaddlp v0.4s, v0.8h + + uaddlp v2.4s, v2.8h + + // Both v0 and v2 have four 32 bit sums corresponding to first 4 rows + // Pack v0 and v2 into a single register (sum does not exceed 16bits) + + shl v16.4s, v2.4s, #16 + orr v16.16b, v0.16b, v16.16b + // v16 now contains 8 sums + + // Load third row of top + ld1 {v24.8b}, [x0], x2 + + // Load third row of bottom + ld1 {v25.8b}, [x1], x3 + mov v24.d[1], v25.d[0] + + // Load fourth row of top + ld1 {v26.8b}, [x0], x2 + + // Load fourth row of bottom + ld1 {v27.8b}, [x1], x3 + mov v26.d[1], v27.d[0] + + // Get row sums + uaddlp v4.8h, v24.16b + + uaddlp v6.8h, v26.16b + + uaddlp v4.4s, v4.8h + + uaddlp v6.4s, v6.8h + // Both v4 and v6 have four 32 bit sums corresponding to last 4 rows + // Pack v4 and v6 into a single register (sum does not exceed 16bits) + + shl v18.4s, v6.4s, #16 + orr v18.16b, v4.16b, v18.16b + // v18 now contains 8 sums + + // Compute absolute diff between top and bottom row sums + mov v17.d[0], v16.d[1] + uabd v16.4h, v16.4h, v17.4h + + mov v19.d[0], v18.d[1] + uabd v17.4h, v18.4h, v19.4h + + mov v16.d[1], v17.d[0] + + // RSUM_CSUM_THRESH + movi v18.8h, #20 + + // Eliminate values smaller than RSUM_CSUM_THRESH + cmhs v20.8h, v16.8h, v18.8h + and v20.16b, v16.16b, v20.16b + + // v20 now contains 8 absolute diff of sums above the threshold + + // Compute adj + mov v21.d[0], v20.d[1] + add v20.4h, v20.4h, v21.4h + + // v20 has four adj values for two sub-blocks + + // Compute alt + uabd v0.4s, v0.4s, v2.4s + uabd v4.4s, v4.4s, v6.4s + + add v0.4s, v0.4s, v4.4s + + mov v1.d[0], v0.d[1] + add v21.4s, v0.4s, v1.4s + // d21 has two values for two sub-blocks + + + // Calculate column based adj and alt values + + urhadd v0.16b, v28.16b, v30.16b + urhadd v2.16b, v24.16b, v26.16b + urhadd v0.16b, v0.16b, v2.16b + + mov v1.d[0], v0.d[1] + uabd v0.8b, v0.8b, v1.8b + + // RSUM_CSUM_THRESH >> 2 + movi v22.16b, #5 + + // Eliminate values smaller than RSUM_CSUM_THRESH >> 2 + cmhs v1.16b, v0.16b, v22.16b + and v0.16b, v0.16b, v1.16b + // d0 now contains 8 absolute diff of sums above the threshold + + + uaddlp v0.4h, v0.8b + shl v0.4h, v0.4h,#2 + + // Add row based adj + add v20.4h, v0.4h, v20.4h + + uaddlp v20.2s, v20.4h + // d20 now contains 2 adj values + + + urhadd v0.8b, v28.8b, v29.8b + urhadd v2.8b, v24.8b, v25.8b + urhadd v0.8b, v0.8b, v2.8b + + urhadd v1.8b, v30.8b, v31.8b + urhadd v3.8b, v26.8b, v27.8b + urhadd v1.8b, v1.8b, v3.8b + + uabd v0.8b, v0.8b, v1.8b + uaddlp v0.4h, v0.8b + + shl v0.4h, v0.4h, #2 + uaddlp v0.2s, v0.4h + add v21.2s, v0.2s, v21.2s + + + // d21 now contains 2 alt values + + // SAD_BIAS_MULT_SHIFT + ushr v0.2s, v21.2s, #3 + add v21.2s, v21.2s, v0.2s + + // SAD_BIAS_ADDITIVE >> 1 + movi v0.2s, #4 + add v21.2s, v21.2s, v0.2s + + cmhi v0.2s, v20.2s, v21.2s + uaddlp v0.1d, v0.2s + + smov x0, v0.2s[0] + cmp x0, #0 + mov x4, #1 + csel x0, x4, x0, ne + ret diff --git a/common/armv8/ideint_spatial_filter_av8.s b/common/armv8/ideint_spatial_filter_av8.s new file mode 100644 index 0000000..b95e07f --- /dev/null +++ b/common/armv8/ideint_spatial_filter_av8.s @@ -0,0 +1,225 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ + +//****************************************************************************** +//* +//* @brief +//* This file contains definitions of routines for spatial filter +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* - ideint_spatial_filter_a9() +//* +//* @remarks +//* None +//* +//******************************************************************************* + + +//****************************************************************************** +//* +//* @brief Performs spatial filtering +//* +//* @par Description +//* This functions performs edge adaptive spatial filtering on a 8x8 block +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[in] pu1_out +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* source stride +//* +//* @param[in] src_strd +//* destination stride +//* +//* @returns +//* None +//* +//* @remarks +//* +//****************************************************************************** + + .global ideint_spatial_filter_av8 + +ideint_spatial_filter_av8: + + movi v16.8h, #0 + movi v18.8h, #0 + movi v20.8h, #0 + + // Backup x0 + mov x10, x0 + + // Load from &pu1_row_1[0] + sub x5, x0, #1 + ld1 {v0.8b}, [x0], x2 + + // Load from &pu1_row_1[-1] + ld1 {v1.8b}, [x5] + add x5, x5, #2 + + // Load from &pu1_row_1[1] + ld1 {v2.8b}, [x5] + + // Number of rows + mov x4, #4 + + // EDGE_BIAS_0 + movi v30.2s, #5 + + // EDGE_BIAS_1 + movi v31.2s, #7 + +detect_edge: + // Load from &pu1_row_2[0] + sub x5, x0, #1 + ld1 {v3.8b}, [x0], x2 + + // Load from &pu1_row_2[-1] + ld1 {v4.8b}, [x5] + add x5, x5, #2 + + // Load from &pu1_row_2[1] + ld1 {v5.8b}, [x5] + + // Calculate absolute differences + // pu1_row_1[i] - pu1_row_2[i] + uabal v16.8h, v0.8b, v3.8b + + // pu1_row_1[i - 1] - pu1_row_2[i + 1] + uabal v18.8h, v1.8b, v5.8b + + // pu1_row_1[i + 1] - pu1_row_2[i - 1] + uabal v20.8h, v2.8b, v4.8b + + mov v0.8b, v3.8b + mov v1.8b, v4.8b + mov v2.8b, v5.8b + + subs x4, x4, #1 + bgt detect_edge + + // Calculate sum of absolute differeces for each edge + addp v16.8h, v16.8h, v16.8h + addp v18.8h, v18.8h, v18.8h + addp v20.8h, v20.8h, v20.8h + + uaddlp v16.2s, v16.4h + uaddlp v18.2s, v18.4h + uaddlp v20.2s, v20.4h + + // adiff[0] *= EDGE_BIAS_0; + mul v16.2s, v16.2s, v30.2s + + // adiff[1] *= EDGE_BIAS_1; + mul v18.2s, v18.2s, v31.2s + + // adiff[2] *= EDGE_BIAS_1; + mul v20.2s, v20.2s, v31.2s + + // Move the differences to ARM registers + + + // Compute shift for first half of the block +compute_shift_1: + smov x5, v16.2s[0] + smov x6, v18.2s[0] + smov x7, v20.2s[0] + + // Compute shift + mov x8, #0 + + // adiff[2] <= adiff[1] + cmp x7, x6 + bgt dir_45_gt_135_1 + + // adiff[2] <= adiff[0] + cmp x7, x5 + mov x11, #1 + csel x8, x11, x8, le + + b compute_shift_2 +dir_45_gt_135_1: + + // adiff[1] <= adiff[0] + cmp x6, x5 + // Move -1 if less than or equal to + movn x11, #0 + csel x8, x11, x8, le + + +compute_shift_2: + // Compute shift for first half of the block + smov x5, v16.2s[1] + smov x6, v18.2s[1] + smov x7, v20.2s[1] + + // Compute shift + mov x9, #0 + + // adiff[2] <= adiff[1] + cmp x7, x6 + bgt dir_45_gt_135_2 + + // adiff[2] <= adiff[0] + cmp x7, x5 + mov x11, #1 + csel x9, x11, x9, le + + b interpolate + +dir_45_gt_135_2: + // adiff[1] <= adiff[0] + cmp x6, x5 + + // Move -1 if less than or equal to + movn x11, #0 + csel x9, x11, x9, le + +interpolate: + add x4, x10, x8 + add x5, x10, x2 + sub x5, x5, x8 + + add x10, x10, #4 + add x6, x10, x9 + add x7, x10, x2 + sub x7, x7, x9 + mov x8, #4 + +filter_loop: + ld1 {v0.s}[0], [x4], x2 + ld1 {v2.s}[0], [x5], x2 + + ld1 {v0.s}[1], [x6], x2 + ld1 {v2.s}[1], [x7], x2 + + urhadd v4.8b, v0.8b, v2.8b + st1 {v4.2s}, [x1], x3 + + subs x8, x8, #1 + bgt filter_loop + + ret diff --git a/common/icv.h b/common/icv.h new file mode 100644 index 0000000..7ab4645 --- /dev/null +++ b/common/icv.h @@ -0,0 +1,114 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv.h +* +* @brief +* This header files contains all the common definitions +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef __ICV_H__ +#define __ICV_H__ + +/** Color formats */ +typedef enum +{ + /** Dummy candidate */ + ICV_COLOR_NA = 0x7FFFFFFF, + + /** YUV 420 Planar */ + ICV_YUV420P = 0, + + /** YUV 420 Semi Planar UV*/ + ICV_YUV420SP_UV, + + /** YUV 420 Semi Planar VU*/ + ICV_YUV420SP_VU, + +}ICV_COLOR_FMT_T; + +/** Architecture Enumeration */ +typedef enum +{ + ICV_ARCH_NA = 0x7FFFFFFF, + ICV_ARM_NONEON = 0x0, + ICV_ARM_NEONINTR, + ICV_ARM_A9Q, + ICV_ARM_A9A, + ICV_ARM_A9, + ICV_ARM_A7, + ICV_ARM_A5, + ICV_ARM_A15, + ICV_ARMV8_GENERIC = 0x100, + ICV_ARM_A53, + ICV_ARM_A57, + ICV_X86_GENERIC = 0x1000, + ICV_X86_SSSE3, + ICV_X86_SSE42, + ICV_X86_AVX, + ICV_X86_AVX2, + ICV_MIPS_GENERIC = 0x2000, + ICV_MIPS_32, +}ICV_ARCH_T; + +/** SOC Enumeration */ +typedef enum +{ + ICV_SOC_NA = 0x7FFFFFFF, + ICV_SOC_GENERIC = 0x0, +}ICV_SOC_T; + + +/** Max Color components */ +#define MAX_COMPONENTS 4 + +/** Structure to define a picture */ +typedef struct +{ + /** Buffer address */ + UWORD8 *apu1_buf[MAX_COMPONENTS]; + + /** Width */ + WORD32 ai4_wd[MAX_COMPONENTS]; + + /** Height */ + WORD32 ai4_ht[MAX_COMPONENTS]; + + /** Stride */ + WORD32 ai4_strd[MAX_COMPONENTS]; + + /** Color Format */ + ICV_COLOR_FMT_T e_color_fmt; + +}icv_pic_t; + + +#endif /* __ICV_H__ */ diff --git a/common/icv_datatypes.h b/common/icv_datatypes.h new file mode 100644 index 0000000..0facc2e --- /dev/null +++ b/common/icv_datatypes.h @@ -0,0 +1,68 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_datatypes.h +* +* @brief +* This file has the definitions of the data types used +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef __ICV_DATATYPES_H__ +#define __ICV_DATATYPES_H__ + +/*****************************************************************************/ +/* Typedefs */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* Unsigned data types */ +/*****************************************************************************/ +typedef uint8_t UWORD8; +typedef uint16_t UWORD16; +typedef uint32_t UWORD32; +typedef uint64_t UWORD64; + +/*****************************************************************************/ +/* Signed data types */ +/*****************************************************************************/ +typedef int8_t WORD8; +typedef int16_t WORD16; +typedef int32_t WORD32; +typedef int64_t WORD64; + +/*****************************************************************************/ +/* Miscellaneous data types */ +/*****************************************************************************/ +typedef char CHAR; +typedef float FLOAT; +typedef double DOUBLE; + +#endif /*__ICV_DATATYPES_H__*/ diff --git a/common/icv_macros.h b/common/icv_macros.h new file mode 100644 index 0000000..bb86704 --- /dev/null +++ b/common/icv_macros.h @@ -0,0 +1,73 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_macros.h +* +* @brief +* This header files contains all the common macros +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef __ICV_MACROS_H__ +#define __ICV_MACROS_H__ + +#define ABS(x) ((x) < 0 ? (-1 * (x)) : (x)) + +#define MAX(x,y) ((x) > (y) ? (x) : (y)) + +#define MIN(x,y) ((x) < (y) ? (x) : (y)) + +/* Absolute difference */ +#define ABS_DIF(x,y) (((x) > (y)) ? ((x) - (y)) : ((y) - (x))) + +#define MED3(a,b,c) (MIN(MAX( MIN((a),(b)), (c)), MAX((a),(b)))) + +#define AVG(a,b) (((a) + (b) + 1) >> 1) + +#define MEAN(a, b) AVG(a, b) + +#define CLIP3(min, max, x) (((x) > (max)) ? (max) :(((x) < (min))? (min):(x))) +#define SIGN(x) (((x) < 0) ? -1 : 1) + + +#define ALIGN128(x) ((((x) + 127) >> 7) << 7) +#define ALIGN64(x) ((((x) + 63) >> 6) << 6) +#define ALIGN32(x) ((((x) + 31) >> 5) << 5) +#define ALIGN16(x) ((((x) + 15) >> 4) << 4) +#define ALIGN8(x) ((((x) + 7) >> 3) << 3) + + +#define RETURN_IF(cond, retval) if(cond) {return (retval);} +#define UNUSED(x) ((void)(x)) + +#define ASSERT(x) assert(x) + + +#endif /* __ICV_IT_MACROS_H__ */ diff --git a/common/icv_sad.c b/common/icv_sad.c new file mode 100644 index 0000000..e43f41d --- /dev/null +++ b/common/icv_sad.c @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_sad.c +* +* @brief +* This file contains the functions to compute SAD +* +* @author +* Ittiam +* +* @par List of Functions: +* sad_8x4() +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" + +/** +******************************************************************************* +* +* @brief +* Compute 8x4 SAD +* +* @par Description +* Compute 8x4 sum of absolute differences between source and reference block +* +* @param[in] pu1_src +* Source buffer +* +* @param[in] pu1_ref +* Reference buffer +* +* @param[in] src_strd +* Source stride +* +* @param[in] ref_strd +* Reference stride +* +* @param[in] wd +* Assumed to be 8 +* +* @param[in] ht +* Assumed to be 4 + +* @returns +* SAD +* +* @remarks +* +******************************************************************************* +*/ +WORD32 icv_sad_8x4(UWORD8 *pu1_src, + UWORD8 *pu1_ref, + WORD32 src_strd, + WORD32 ref_strd, + WORD32 wd, + WORD32 ht) +{ + WORD32 sad; + WORD32 i; + WORD32 j; + UNUSED(wd); + UNUSED(ht); + + ASSERT(wd == 8); + ASSERT(ht == 4); + + sad = 0; + + for(j = 0; j < 4; j++) + { + for(i = 0; i < 8; i++) + { + WORD32 src; + WORD32 ref; + + src = *pu1_src++; + ref = *pu1_ref++; + + sad += ABS_DIF(src, ref); + } + pu1_src += (src_strd - 8); + pu1_ref += (ref_strd - 8); + } + + return sad; +} diff --git a/common/icv_sad.h b/common/icv_sad.h new file mode 100644 index 0000000..5b5ac9e --- /dev/null +++ b/common/icv_sad.h @@ -0,0 +1,52 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_sad.h +* +* @brief +* This file contains the functions to compute SAD +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef __ICV_SAD_H__ +#define __ICV_SAD_H__ + +typedef WORD32 icv_sad_8x4_t(UWORD8 *pu1_src, + UWORD8 *pu1_ref, + WORD32 src_strd, + WORD32 ref_strd, + WORD32 wd, + WORD32 ht); + +icv_sad_8x4_t icv_sad_8x4; +icv_sad_8x4_t icv_sad_8x4_a9; +icv_sad_8x4_t icv_sad_8x4_av8; +icv_sad_8x4_t icv_sad_8x4_ssse3; +#endif /* __ICV_SAD_H__ */ diff --git a/common/icv_variance.c b/common/icv_variance.c new file mode 100644 index 0000000..21dedbe --- /dev/null +++ b/common/icv_variance.c @@ -0,0 +1,126 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_variance.c +* +* @brief +* This file contains the functions to compute variance +* +* @author +* Ittiam +* +* @par List of Functions: +* icv_variance_8x4() +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" + +/** +******************************************************************************* +* +* @brief +* Computes variance of a given 8x4 block +* +* @par Description +* Compute variance of a given 8x4 block +* +* @param[in] pu1_src +* Source +* +* @param[in] src_strd +* Source stride +* +* @param[in] wd +* Assumed to be 8 +* +* @param[in] ht +* Assumed to be 4 +* +* @returns +* Variance +* +* @remarks +* +******************************************************************************* +*/ +WORD32 icv_variance_8x4(UWORD8 *pu1_src, WORD32 src_strd, WORD32 wd, WORD32 ht) +{ + WORD32 sum; + WORD32 sum_sqr; + WORD32 blk_sz; + WORD32 vrnc; + WORD32 i; + WORD32 j; + UNUSED(wd); + UNUSED(ht); + + ASSERT(wd == 8); + ASSERT(ht == 4); + + sum = 0; + sum_sqr = 0; + + blk_sz = 8 * 4; + + /*************************************************************************/ + /* variance */ + /* var = (n * SUM(x_i^2) - (SUM(x_i))^2) / (n^2); */ + /*************************************************************************/ + + /*************************************************************************/ + /* The outer-loop runs for BLK_HT/2 times, because it */ + /* calculates the variance only for field area not frame one. */ + /*************************************************************************/ + for(j = 0; j < 4; j ++) + { + for(i = 0; i < 8; i++) + { + sum_sqr += (*pu1_src) * (*pu1_src); + sum += *pu1_src++; + } + pu1_src += (src_strd - 8); + } + + vrnc = ((sum_sqr * blk_sz) - (sum * sum)) / (blk_sz * blk_sz); + + return vrnc; +} + diff --git a/common/icv_variance.h b/common/icv_variance.h new file mode 100644 index 0000000..93c602f --- /dev/null +++ b/common/icv_variance.h @@ -0,0 +1,48 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_variance.h +* +* @brief +* This file contains the functions to compute variance +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef __ICV_VARIANCE_H__ +#define __ICV_VARIANCE_H__ + +typedef WORD32 icv_variance_8x4_t(UWORD8 *pu1_src, WORD32 src_strd, WORD32 wd, WORD32 ht); + +icv_variance_8x4_t icv_variance_8x4; +icv_variance_8x4_t icv_variance_8x4_a9; +icv_variance_8x4_t icv_variance_8x4_av8; +icv_variance_8x4_t icv_variance_8x4_ssse3; + +#endif /* __ICV_VARIANCE_H__ */ diff --git a/common/ideint.c b/common/ideint.c new file mode 100644 index 0000000..24e4e72 --- /dev/null +++ b/common/ideint.c @@ -0,0 +1,419 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_api.c +* +* @brief +* This file contains the definitions of the core processing of the de- +* interlacer. +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" + +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" +#include "ideint_function_selector.h" + +/** +******************************************************************************* +* +* @brief +* Return deinterlacer context size +* +* @par Description +* Return deinterlacer context size, application will allocate this memory +* and send it as context to process call +* +* @param[in] None +* +* @returns +* Size of deinterlacer context +* +* @remarks +* None +* +******************************************************************************* +*/ +WORD32 ideint_ctxt_size(void) +{ + return sizeof(ctxt_t); +} + +/** +******************************************************************************* +* +* @brief +* Deinterlace given fields and produce a frame +* +* @par Description +* Deinterlacer function that deinterlaces given fields and produces a frame +* +* @param[in] pv_ctxt +* Deinterlacer context returned by ideint_create() +* +* @param[in] ps_prv_fld +* Previous field (can be null, in which case spatial filtering is done +* unconditionally) +* +* @param[in] ps_cur_fld +* Current field +* +* @param[in] ps_nxt_fld +* Next field +* +* @param[in] ps_out_frm +* Output frame +* +* @param[in] ps_params +* Parameters +* +* @param[in] start_row +* Start row +* +* @param[in] num_rows +* Number of rows to be processed +* +* @returns +* IDEINT_ERROR_T +* +* @remarks +* +******************************************************************************* +*/ +IDEINT_ERROR_T ideint_process(void *pv_ctxt, + icv_pic_t *ps_prv_fld, + icv_pic_t *ps_cur_fld, + icv_pic_t *ps_nxt_fld, + icv_pic_t *ps_out_frm, + ideint_params_t *ps_params, + WORD32 start_row, + WORD32 num_rows) +{ + ctxt_t *ps_ctxt; + WORD32 num_blks_x, num_blks_y; + WORD32 num_comp; + WORD32 i, row, col; + WORD32 rows_remaining; + + if(NULL == pv_ctxt) + return IDEINT_INVALID_CTXT; + + ps_ctxt = (ctxt_t *)pv_ctxt; + + /* Copy the parameters */ + if(ps_params) + { + ps_ctxt->s_params = *ps_params; + } + else + { + /* Use default params if ps_params is NULL */ + ps_ctxt->s_params.i4_cur_fld_top = 1; + ps_ctxt->s_params.e_mode = IDEINT_MODE_SPATIAL; + ps_ctxt->s_params.e_arch = ideint_default_arch(); + ps_ctxt->s_params.e_soc = ICV_SOC_GENERIC; + ps_ctxt->s_params.i4_disable_weave = 0; + ps_ctxt->s_params.pf_aligned_alloc = NULL; + ps_ctxt->s_params.pf_aligned_free = NULL; + } + + /* Start row has to be multiple of 8 */ + if(start_row & 0x7) + { + return IDEINT_START_ROW_UNALIGNED; + } + + /* Initialize variances */ + ps_ctxt->ai4_vrnc_avg_fb[0] = VAR_AVG_LUMA; + ps_ctxt->ai4_vrnc_avg_fb[1] = VAR_AVG_CHROMA; + ps_ctxt->ai4_vrnc_avg_fb[2] = VAR_AVG_CHROMA; + + ideint_init_function_ptr(ps_ctxt); + + rows_remaining = ps_out_frm->ai4_ht[0] - start_row; + num_rows = MIN(num_rows, + rows_remaining); + + IDEINT_CORRUPT_PIC(ps_out_frm, 0xCD); + + //Weave two fields to get a frame + if(IDEINT_MODE_WEAVE == ps_ctxt->s_params.e_mode) + { + if(0 == ps_ctxt->s_params.i4_disable_weave) + { + if(ps_ctxt->s_params.i4_cur_fld_top) + ideint_weave_pic(ps_cur_fld, ps_nxt_fld, ps_out_frm, + start_row, + num_rows); + else + ideint_weave_pic(ps_nxt_fld, ps_cur_fld, ps_out_frm, + start_row, + num_rows); + } + return IDEINT_ERROR_NONE; + } + + num_comp = 3; + + for(i = 0; i < num_comp; i++) + { + UWORD8 *pu1_prv, *pu1_out; + UWORD8 *pu1_top, *pu1_bot, *pu1_dst; + WORD32 cur_strd, out_strd, dst_strd; + + WORD32 st_thresh; + WORD32 vrnc_avg_st; + WORD32 disable_cac_sad; + WORD32 comp_row_start, comp_row_end; + num_blks_x = ALIGN8(ps_out_frm->ai4_wd[i]) >> 3; + num_blks_y = ALIGN8(ps_out_frm->ai4_ht[i]) >> 3; + comp_row_start = start_row; + comp_row_end = comp_row_start + num_rows; + + if(i) + { + comp_row_start >>= 1; + comp_row_end >>= 1; + } + + comp_row_end = MIN(comp_row_end, ps_out_frm->ai4_ht[i]); + + comp_row_start = ALIGN8(comp_row_start) >> 3; + comp_row_end = ALIGN8(comp_row_end) >> 3; + st_thresh = ST_THRESH; + vrnc_avg_st = VAR_AVG_LUMA; + + if(i) + { + st_thresh = ST_THRESH >> 1; + vrnc_avg_st = VAR_AVG_CHROMA; + } + + out_strd = ps_out_frm->ai4_strd[i]; + if(ps_ctxt->s_params.i4_cur_fld_top) + { + cur_strd = ps_cur_fld->ai4_strd[i]; + } + else + { + cur_strd = ps_nxt_fld->ai4_strd[i]; + } + + + disable_cac_sad = 0; + /* If previous field is not provided, then change to SPATIAL mode */ + if(ps_prv_fld->apu1_buf[i] == NULL) + { + disable_cac_sad = 1; + } + + for(row = comp_row_start; row < comp_row_end; row++) + { + pu1_out = ps_out_frm->apu1_buf[i]; + pu1_out += (ps_out_frm->ai4_strd[i] * row << 3); + + pu1_prv = ps_prv_fld->apu1_buf[i]; + pu1_prv += (ps_prv_fld->ai4_strd[i] * row << 2); + + if(ps_ctxt->s_params.i4_cur_fld_top) + { + pu1_top = ps_cur_fld->apu1_buf[i]; + pu1_bot = ps_nxt_fld->apu1_buf[i]; + } + else + { + pu1_top = ps_nxt_fld->apu1_buf[i]; + pu1_bot = ps_cur_fld->apu1_buf[i]; + } + pu1_top += (cur_strd * row << 2); + pu1_bot += (cur_strd * row << 2); + + for(col = 0; col < num_blks_x; col++) + { + WORD32 cac, sad, vrnc; + WORD32 th_num, th_den; + UWORD8 au1_dst[BLK_WD * BLK_HT]; + WORD32 blk_wd, blk_ht; + WORD32 input_boundary; + cac = 0; + sad = 0; + th_den = 0; + th_num = st_thresh; + vrnc = 0; + + disable_cac_sad = 0; + /* If previous field is not provided, then change to SPATIAL mode */ + if(ps_prv_fld->apu1_buf[i] == NULL) + { + disable_cac_sad = 1; + } + /* For boundary blocks when input dimensions are not multiple of 8, + * then change to spatial mode */ + input_boundary = 0; + + blk_wd = BLK_WD; + blk_ht = BLK_HT; + + if((((num_blks_x - 1) == col) && (ps_out_frm->ai4_wd[i] & 0x7)) || + (((num_blks_y - 1) == row) && (ps_out_frm->ai4_ht[i] & 0x7))) + { + disable_cac_sad = 1; + input_boundary = 1; + + if(((num_blks_x - 1) == col) && (ps_out_frm->ai4_wd[i] & 0x7)) + blk_wd = (ps_out_frm->ai4_wd[i] & 0x7); + + if(((num_blks_y - 1) == row) && (ps_out_frm->ai4_ht[i] & 0x7)) + blk_ht = (ps_out_frm->ai4_ht[i] & 0x7); + + } + + if(0 == disable_cac_sad) + { + /* Compute SAD */ + PROFILE_DISABLE_SAD + sad = ps_ctxt->pf_sad_8x4(pu1_prv, pu1_bot, cur_strd, + cur_strd, + BLK_WD, + BLK_HT >> 1); + /* Compute Variance */ + PROFILE_DISABLE_VARIANCE + vrnc = ps_ctxt->pf_variance_8x4(pu1_top, cur_strd, BLK_WD, + BLK_HT >> 1); + + th_num = st_thresh; + + th_num *= vrnc_avg_st + + ((MOD_IDX_ST_NUM * vrnc) >> MOD_IDX_ST_SHIFT); + + th_den = vrnc + + ((MOD_IDX_ST_NUM * vrnc_avg_st) >> MOD_IDX_ST_SHIFT); + + if((sad * th_den) <= th_num) + { + /* Calculate Combing Artifact if SAD test fails */ + PROFILE_DISABLE_CAC + cac = ps_ctxt->pf_cac_8x8(pu1_top, pu1_bot, cur_strd, cur_strd); + } + } + + pu1_dst = pu1_out; + dst_strd = out_strd; + + /* In case boundary blocks are not complete (dimensions non-multiple of 8) + * Use intermediate buffer as destination and copy required pixels to output + * buffer later + */ + if(input_boundary) + { + pu1_dst = au1_dst; + dst_strd = BLK_WD; + ideint_weave_blk(pu1_top, pu1_bot, pu1_dst, dst_strd, + cur_strd, blk_wd, blk_ht); + } + + /* Weave the two fields unconditionally */ + if(0 == ps_ctxt->s_params.i4_disable_weave) + { + ideint_weave_blk(pu1_top, pu1_bot, pu1_dst, dst_strd, + cur_strd, blk_wd, blk_ht); + } + + if(disable_cac_sad || cac || (sad * th_den > th_num)) + { + /* Pad the input fields in an intermediate buffer if required */ + if((0 == row) || (0 == col) || + ((num_blks_x - 1) == col) || ((num_blks_y - 1) == row)) + { + UWORD8 *pu1_dst_top; + UWORD8 au1_pad[(BLK_HT + 4) * (BLK_WD + 4)]; + + ideint_pad_blk(pu1_top, pu1_bot, au1_pad, cur_strd, row, + col, num_blks_y, num_blks_x, blk_wd, blk_ht); + + pu1_dst_top = au1_pad + 2 * (BLK_WD + 4) + 2; + + PROFILE_DISABLE_SPATIAL + ps_ctxt->pf_spatial_filter(pu1_dst_top, pu1_dst + dst_strd, + (BLK_WD + 4) * 2, + dst_strd * 2); + } + else + { + PROFILE_DISABLE_SPATIAL + ps_ctxt->pf_spatial_filter(pu1_top, pu1_dst + dst_strd, + cur_strd, dst_strd * 2); + + } + } + + /* copy required pixels to output buffer for boundary blocks + * when dimensions are not multiple of 8 + */ + if(input_boundary) + { + WORD32 j; + + for(j = 0; j < blk_ht; j++) + { + memcpy(pu1_out + j * out_strd, au1_dst + j * BLK_WD, blk_wd); + } + } + pu1_prv += 8; + pu1_top += 8; + pu1_bot += 8; + pu1_out += 8; + } + } + } + return IDEINT_ERROR_NONE; +} diff --git a/common/ideint.h b/common/ideint.h new file mode 100644 index 0000000..808d675 --- /dev/null +++ b/common/ideint.h @@ -0,0 +1,123 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint.h +* +* @brief +* Deinterlacer API file +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef __IDEINT_H__ +#define __IDEINT_H__ + +/** Error codes */ +typedef enum +{ + /** Dummy error code */ + IDEINT_ERROR_NA = 0x7FFFFFFF, + + /** No error */ + IDEINT_ERROR_NONE = 0, + + /** Invalid Context */ + IDEINT_INVALID_CTXT, + + /** Start row not aligned to 8 */ + IDEINT_START_ROW_UNALIGNED, + + +}IDEINT_ERROR_T; + +/** Modes of deinterlacing */ +typedef enum +{ + /** Dummy mode */ + IDEINT_MODE_NA = 0x7FFFFFFF, + + /** Weave two fields to get a frame, no filtering */ + IDEINT_MODE_WEAVE = 0, + + /** Weave two fields in static blocks and + spatial filtering for non-static blocks */ + IDEINT_MODE_SPATIAL, + +}IDEINT_MODE_T; + +/** Deinterlacer parameters */ +typedef struct +{ + /** Mode for deinterlacing */ + IDEINT_MODE_T e_mode; + + /** Flag to indicate if the current field is top field, + * Prev and Next field are assumed to be of opposite parity + */ + WORD32 i4_cur_fld_top; + + /** Flag to signal if weave should be disabled. + * i.e. output already contains weaved fields + */ + WORD32 i4_disable_weave; + + /** CPU Architecture */ + ICV_ARCH_T e_arch; + + /** SOC */ + ICV_SOC_T e_soc; + + /** Pointer to a function for aligned allocation. + * If NULL, then malloc will be used internally + * Module will allocate if any extra memory is needed + */ + void *(*pf_aligned_alloc)(WORD32 alignment, WORD32 size); + + /** Pointer to a function for aligned free. + * If NULL, then free will be used internally + */ + void (*pf_aligned_free)(void *pv_buf); + +}ideint_params_t; + +/** Deinterlacer context size */ +WORD32 ideint_ctxt_size(void); + +/** Deinterlacer process */ +IDEINT_ERROR_T ideint_process(void *pv_ctxt, + icv_pic_t *ps_prv_fld, + icv_pic_t *ps_cur_fld, + icv_pic_t *ps_nxt_fld, + icv_pic_t *ps_out_frm, + ideint_params_t *ps_params, + WORD32 start_row, + WORD32 num_rows); + +#endif /* __IDEINT_H__ */ diff --git a/common/ideint_cac.c b/common/ideint_cac.c new file mode 100644 index 0000000..b128768 --- /dev/null +++ b/common/ideint_cac.c @@ -0,0 +1,365 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_cac.c +* +* @brief +* This file include the definitions of the combing artifact check function +* of the de-interlacer and some variant of that. +* +* @author +* Ittiam +* +* @par List of Functions: +* cac_4x8() +* ideint_cac() +* +* @remarks +* In the de-interlacer workspace, cac is not a seperate assembly module as +* it comes along with the de_int_decision() function. But in C-Model, to +* keep the things cleaner, it was made to be a separate function during +* cac experiments long after the assembly was written by Mudit. +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_cac.h" + +/** +******************************************************************************* +* +* @brief +* Combing artifact check function for 8x4 block +* +* @par Description +* Adjacent and alternate SADs are calculated by row based and column-based +* collapsing. The adjacent and alternate SADs are then compared with some +* biasing to get CAC +* +* @param[in] pu1_top +* Top field +* +* @param[in] pu1_bot +* Bottom field +* +* @param[in] top_strd +* Top field Stride +* +* @param[in] bot_strd +* Bottom field stride +* +* @param[in] pi4_adj_sad +* Pointer to return adjacent SAD +* +* @param[in] pi4_alt_sad +* Pointer to return alternate SAD +* +* @returns +* combing artifact flag (1 = detected, 0 = not detected) +* +* @remarks +* +******************************************************************************* +*/ +static WORD32 cac_4x8(UWORD8 *pu1_top, + UWORD8 *pu1_bot, + WORD32 top_strd, + WORD32 bot_strd) +{ + WORD32 ca; + WORD32 adj; + WORD32 alt; + UWORD8 *pu1_tmp_top; + UWORD8 *pu1_tmp_bot; + WORD32 i; + WORD32 j; + UWORD8 *pu1_top_0; + UWORD8 *pu1_top_1; + UWORD8 *pu1_top_2; + UWORD8 *pu1_top_3; + UWORD8 *pu1_bot_0; + UWORD8 *pu1_bot_1; + UWORD8 *pu1_bot_2; + UWORD8 *pu1_bot_3; + WORD32 rsum_csum_thresh; + WORD32 sad_bias_mult_shift; + WORD32 sad_bias_additive; + + WORD32 diff_sum; + WORD32 top_row_end_incr; + WORD32 bot_row_end_incr; + + ca = 0; + + adj = 0; + alt = 0; + + rsum_csum_thresh = RSUM_CSUM_THRESH; + sad_bias_additive = SAD_BIAS_ADDITIVE; + sad_bias_mult_shift = SAD_BIAS_MULT_SHIFT; + + /*************************************************************************/ + /* In the adjacent sad calculation by row-method, the absolute */ + /* difference is taken between the adjacent rows. The pixels of the diff */ + /* row, thus obtained, are then summed up. If this sum of absolute */ + /* differace (sad) is greater than a threshold value, it is added to the */ + /* adjcacent SAD value. */ + /*************************************************************************/ + + /*************************************************************************/ + /* Adj dif: Row based */ + /*************************************************************************/ + + pu1_tmp_top = pu1_top; + pu1_tmp_bot = pu1_bot; + + top_row_end_incr = top_strd - SUB_BLK_WD; + bot_row_end_incr = bot_strd - SUB_BLK_WD; + + /*************************************************************************/ + /* The outer-loop runs for BLK_HT/2 times, because one pixel */ + /* is touched only once. */ + /*************************************************************************/ + for(j = 0; j < BLK_HT; j += 4) + { + WORD32 sum_1, sum_2, sum_3, sum_4; + WORD32 sum_diff; + + /*********************************************************************/ + /* Because the 8x4 is split into two halves of 4x4, the width of the */ + /* block is now 4. */ + /*********************************************************************/ + sum_1 = 0; + sum_2 = 0; + + for(i = 0; i < SUB_BLK_WD; i ++) + { + sum_1 += *pu1_tmp_top++; + sum_2 += *pu1_tmp_bot++; + } + + sum_diff = ABS_DIF(sum_1, sum_2); + + /*********************************************************************/ + /* Thresholding. */ + /*********************************************************************/ + if(sum_diff >= rsum_csum_thresh) + adj += sum_diff; + + pu1_tmp_top += top_row_end_incr; + pu1_tmp_bot += bot_row_end_incr; + + + sum_3 = 0; + sum_4 = 0; + + for(i = 0; i < SUB_BLK_WD; i ++) + { + sum_3 += *pu1_tmp_top++; + sum_4 += *pu1_tmp_bot++; + } + + sum_diff = ABS_DIF(sum_3, sum_4); + + /*********************************************************************/ + /* Thresholding. */ + /*********************************************************************/ + if(sum_diff >= rsum_csum_thresh) + adj += sum_diff; + + pu1_tmp_top += top_row_end_incr; + pu1_tmp_bot += bot_row_end_incr; + + /*************************************************************************/ + /* Alt diff : Row based */ + /*************************************************************************/ + alt += ABS_DIF(sum_1, sum_3); + alt += ABS_DIF(sum_2, sum_4); + + } + + /*************************************************************************/ + /* In the adjacent sad calculation by column-method, the rows of both */ + /* the fields are averaged separately and then summed across the column. */ + /* The difference of the two values, thus obtained, is added to the */ + /* adjacent sad value, if it is beyond the threshold. */ + /*************************************************************************/ + + pu1_top_0 = pu1_top; + pu1_top_1 = pu1_top_0 + top_strd; + pu1_top_2 = pu1_top_1 + top_strd; + pu1_top_3 = pu1_top_2 + top_strd; + + pu1_bot_0 = pu1_bot; + pu1_bot_1 = pu1_bot_0 + bot_strd; + pu1_bot_2 = pu1_bot_1 + bot_strd; + pu1_bot_3 = pu1_bot_2 + bot_strd; + + /*************************************************************************/ + /* Adj dif: Col based */ + /*************************************************************************/ + diff_sum = 0; + + /*************************************************************************/ + /* As the DSP implementation of this modules is anyway going to assume */ + /* the size of the block to the fixed (8x4 or two 4x4's), the height of */ + /* block is also kept to be 8, to have a clean implementation. */ + /*************************************************************************/ + for(i = 0; i < SUB_BLK_WD; i ++) + { + WORD32 val_1; + WORD32 val_2; + WORD32 tmp_1, tmp_2; + WORD32 tmp_diff; + + tmp_1 = AVG(pu1_top_0[i], pu1_top_1[i]); + tmp_2 = AVG(pu1_top_2[i], pu1_top_3[i]); + val_1 = AVG(tmp_1, tmp_2); + + tmp_1 = AVG(pu1_bot_0[i], pu1_bot_1[i]); + tmp_2 = AVG(pu1_bot_2[i], pu1_bot_3[i]); + val_2 = AVG(tmp_1, tmp_2); + + tmp_diff = ABS_DIF(val_1, val_2); + + if(tmp_diff >= (rsum_csum_thresh >> 2)) + diff_sum += tmp_diff; + } + + + adj += diff_sum << 2; + + /*************************************************************************/ + /* Alt diff : Col based */ + /*************************************************************************/ + diff_sum = 0; + + for(i = 0; i < SUB_BLK_WD; i ++) + { + WORD32 val_1; + WORD32 val_2; + WORD32 tmp_1, tmp_2; + WORD32 tmp_diff; + + tmp_1 = AVG(pu1_top_0[i], pu1_bot_0[i]); + tmp_2 = AVG(pu1_top_2[i], pu1_bot_2[i]); + val_1 = AVG(tmp_1, tmp_2); + + tmp_1 = AVG(pu1_top_1[i], pu1_bot_1[i]); + tmp_2 = AVG(pu1_top_3[i], pu1_bot_3[i]); + val_2 = AVG(tmp_1, tmp_2); + + tmp_diff = ABS_DIF(val_1, val_2); + + diff_sum += tmp_diff; + } + + /*************************************************************************/ + /* because of the averaging used in place of summation, a factor of 4 is */ + /* needed while adding the the diff_sum to the sad. */ + /*************************************************************************/ + + alt += diff_sum << 2; + + pu1_top += SUB_BLK_WD; + pu1_bot += SUB_BLK_WD; + + alt += (alt >> sad_bias_mult_shift) + (sad_bias_additive >> 1); + ca = (alt < adj); + + return ca; +} + +/** +******************************************************************************* +* +* @brief +* Combing artifact check function for 8x8 block +* +* @par Description +* Determines CAC for 8x8 block by calling 8x4 CAC function +* +* @param[in] pu1_top +* Top field +* +* @param[in] pu1_bot +* Bottom field +* +* @param[in] top_strd +* Top field Stride +* +* @param[in] bot_strd +* Bottom field stride +* +* @returns +* combing artifact flag (1 = detected, 0 = not detected) +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ideint_cac_8x8(UWORD8 *pu1_top, + UWORD8 *pu1_bot, + WORD32 top_strd, + WORD32 bot_strd) +{ + WORD32 ca; /* combing artifact result */ + WORD32 k; + + ca = 0; + /*************************************************************************/ + /* This loop runs for the two halves of the 4x8 block. */ + /*************************************************************************/ + for(k = 0; k < 2; k ++) + { + ca |= cac_4x8(pu1_top, pu1_bot, top_strd, bot_strd); + + pu1_top += SUB_BLK_WD; + pu1_bot += SUB_BLK_WD; + + /* If Combing Artifact is detected, then return. Else continue to + * check the next half + */ + if(ca) + return ca; + } + + return ca; +} + diff --git a/common/ideint_cac.h b/common/ideint_cac.h new file mode 100644 index 0000000..bc19ac0 --- /dev/null +++ b/common/ideint_cac.h @@ -0,0 +1,48 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_cac.h +* +* @brief +* Deinterlacer CAC header file +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef __IDEINT_CAC_H__ +#define __IDEINT_CAC_H__ + + +ideint_cac_8x8_t ideint_cac_8x8; +ideint_cac_8x8_t ideint_cac_8x8_a9; +ideint_cac_8x8_t ideint_cac_8x8_av8; +ideint_cac_8x8_t ideint_cac_8x8_ssse3; + +#endif /* __IDEINT_CAC_H__ */ diff --git a/common/ideint_debug.c b/common/ideint_debug.c new file mode 100644 index 0000000..0d9a641 --- /dev/null +++ b/common/ideint_debug.c @@ -0,0 +1,110 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_proc_fxns.c +* +* @brief +* This file contains the definitions of the core processing of the de +* interlacer. +* +* @author +* Ittiam +* +* @par List of Functions: +* ideint_corrupt_pic() +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" + +/** +******************************************************************************* +* +* @brief +* Corrupt a picture with given value +* +* @par Description +* Corrupt a picture with given value +* +* @param[in] ps_pic +* Picture to be corrupted +* +* @param[in] val +* Value to be used to corrupt the picture +* +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ideint_corrupt_pic(icv_pic_t *ps_pic, WORD32 val) +{ + WORD32 i, j; + WORD32 num_comp; + + num_comp = 3; + for (i = 0; i < num_comp; i++) + { + WORD32 wd, ht, strd; + UWORD8 *pu1_buf; + wd = ps_pic->ai4_wd[i]; + ht = ps_pic->ai4_ht[i]; + strd = ps_pic->ai4_strd[i]; + pu1_buf = ps_pic->apu1_buf[i]; + + for (j = 0; j < ht; j++) + { + memset(pu1_buf, val, wd); + pu1_buf += strd; + } + + } +} diff --git a/common/ideint_debug.h b/common/ideint_debug.h new file mode 100644 index 0000000..163530c --- /dev/null +++ b/common/ideint_debug.h @@ -0,0 +1,81 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_debug.h +* +* @brief +* Contains debug macros +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef __IDEINT_DEBUG_H__ +#define __IDEINT_DEBUG_H__ + + +#ifdef CORRUPT_PIC +void ideint_corrupt_pic(icv_pic_t *ps_pic, WORD32 val); +#define IDEINT_CORRUPT_PIC(ps_pic, val) ideint_corrupt_pic(ps_pic, val); +#else +#define IDEINT_CORRUPT_PIC(ps_pic, val) +#endif + +#ifdef PROFILE_DIS_SAD +#define PROFILE_DISABLE_SAD if(0) +#else +#define PROFILE_DISABLE_SAD +#endif + +#ifdef PROFILE_DIS_VARIANCE +#define PROFILE_DISABLE_VARIANCE if(0) +#else +#define PROFILE_DISABLE_VARIANCE +#endif + +#ifdef PROFILE_DIS_CAC +#define PROFILE_DISABLE_CAC if(0) +#else +#define PROFILE_DISABLE_CAC +#endif + + +#ifdef PROFILE_DIS_SPATIO_TEMPORAL +#define PROFILE_DISABLE_SPATIO_TEMPORAL if(0) +#else +#define PROFILE_DISABLE_SPATIO_TEMPORAL +#endif + +#ifdef PROFILE_DIS_SPATIAL +#define PROFILE_DISABLE_SPATIAL if(0) +#else +#define PROFILE_DISABLE_SPATIAL +#endif + +#endif /* __IDEINT_DEBUG_H__ */ diff --git a/common/ideint_defs.h b/common/ideint_defs.h new file mode 100644 index 0000000..d275e45 --- /dev/null +++ b/common/ideint_defs.h @@ -0,0 +1,125 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_defs.h +* +* @brief +* Contains deinterlacer definitions +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef __IDEINT_DEFS_H__ +#define __IDEINT_DEFS_H__ + +#define ADJ_SAD_THRESH (6 * (FLD_BLK_SIZE * 2)) // *2 : 2 way collapsing (col+row) + +#define RSUM_CSUM_THRESH_PER_PEL 5//0 + +/* Block dimensions. All the decisions (which method to be used) are */ +/* made on block basis. The blk level decisions help us in */ +/* reducing the time-complexity of the algorithm. */ +#define BLK_WD_SHIFT 3 +#define BLK_HT_SHIFT 3 + +#define BLK_WD (1 << BLK_WD_SHIFT ) +#define BLK_HT (1 << BLK_HT_SHIFT) + +#define FLD_BLK_SIZE (BLK_WD * (BLK_HT >> 1)) + + +/* Inside the algorithm, the block itself is divided amongst further */ +/* smaller blocks. */ +#define SUB_BLK_WD (BLK_WD >> 1) +#define SUB_BLK_HT (BLK_HT >> 1) /* field dimensions. */ + +#define FLD_SUB_BLK_SIZE (SUB_BLK_WD * SUB_BLK_HT) + + +/*****************************************************************************/ +/* Stationarity check threshold, used in deciding when to weave. */ +/*****************************************************************************/ +#define ST_THRESH ((15 * FLD_BLK_SIZE) >> 1) + +#define MOD_IDX_ST_NUM 3 +#define MOD_IDX_ST_SHIFT 1 + +#define VAR_AVG_LUMA 735 +#define VAR_AVG_CHROMA 38 + +/*****************************************************************************/ +/* Threshold to choose the fallback method out of Bob and 3-field Kernel */ +/* method. */ +/*****************************************************************************/ +#define FB_THRESH (32 * FLD_BLK_SIZE) + +#define MOD_IDX_FB 4 + + +#define EDGE_BIAS_0 5 +#define EDGE_BIAS_1 7 + +/*****************************************************************************/ +/* Adjacent correlation bias, used in biasing the adjacent correlation over */ +/* the alternate one, while comparing the two; in the combing-artifact-check */ +/* function. */ +/*****************************************************************************/ +#define SAD_BIAS_ADDITIVE (FLD_SUB_BLK_SIZE >> 1) + +/*****************************************************************************/ +/* Mult bias is 1.125 = 9/8. Multiplication by this number is done in two */ +/* stpes, first multiplication by 9 and then shift by 3. */ +/*****************************************************************************/ +#define SAD_BIAS_MULT_SHIFT 3 + +/*****************************************************************************/ +/* row_sum threshold, used for making the combing artifact check more robust */ +/* against the noise (e.g. ringing) by rejecting insignificant pixel */ +/* difference across two adjacent rows; in the combing artifact check */ +/* function. */ +/*****************************************************************************/ +#define RSUM_CSUM_THRESH (RSUM_CSUM_THRESH_PER_PEL * SUB_BLK_WD) + +/*****************************************************************************/ +/* The 3-field filter is of type [-k 2k -k, 0.5 0.5, -k 2k -k], where k is */ +/* the COEFF_THREE_FIELD defined below. */ +/*****************************************************************************/ +#define COEFF_THREE_FIELD 13 + +/*****************************************************************************/ +/* Definitions used by the variance calculations module. */ +/*****************************************************************************/ +#define SQR_SUB_BLK_SZ (FLD_BLK_SIZE * FLD_BLK_SIZE) +#define SUB_BLK_SZ_SHIFT 5 /* 2^5 = 32 */ +#define SQR_SUB_BLK_SZ_SHIFT (SUB_BLK_SZ_SHIFT << 1) /* 2^10 = 1024 = 32 * 32 */ + + + +#endif /* __IDEINT_DEFS_H__ */ diff --git a/common/ideint_function_selector.h b/common/ideint_function_selector.h new file mode 100644 index 0000000..1b2a0e7 --- /dev/null +++ b/common/ideint_function_selector.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_function_selector.h +* +* @brief +* Contains various functions needed in function selector +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef __IDEINT_FUNCTION_SELECTOR_H__ +#define __IDEINT_FUNCTION_SELECTOR_H__ + +ICV_ARCH_T ideint_default_arch(void); +void ideint_init_function_ptr(ctxt_t *ps_ctxt); +void ideint_init_function_ptr_generic(ctxt_t *ps_ctxt); +void ideint_init_function_ptr_a9(ctxt_t *ps_ctxt); +void ideint_init_function_ptr_av8(ctxt_t *ps_ctxt); + +void ideint_init_function_ptr_ssse3(ctxt_t *ps_ctxt); +void ideint_init_function_ptr_sse42(ctxt_t *ps_ctxt); + +#endif /* __IDEINT_FUNCTION_SELECTOR_H__ */ diff --git a/common/ideint_function_selector_generic.c b/common/ideint_function_selector_generic.c new file mode 100644 index 0000000..5227e66 --- /dev/null +++ b/common/ideint_function_selector_generic.c @@ -0,0 +1,90 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_function_selector_generic.c +* +* @brief +* This file contains the function selector related code +* +* @author +* Ittiam +* +* @par List of Functions: +* ih264e_init_function_ptr_generic +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" + +/** +******************************************************************************* +* +* @brief +* Initialize the function pointers +* +* @par Description +* The current routine initializes the function pointers as generic c functions +* +* @param[in] ps_ctxt +* Context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ideint_init_function_ptr_generic(ctxt_t *ps_ctxt) +{ + ps_ctxt->pf_sad_8x4 = icv_sad_8x4; + ps_ctxt->pf_variance_8x4 = icv_variance_8x4; + ps_ctxt->pf_spatial_filter = ideint_spatial_filter; + ps_ctxt->pf_cac_8x8 = ideint_cac_8x8; + return; +} diff --git a/common/ideint_structs.h b/common/ideint_structs.h new file mode 100644 index 0000000..e92e1c1 --- /dev/null +++ b/common/ideint_structs.h @@ -0,0 +1,70 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_structs.h +* +* @brief +* Deinterlacer structure definitions +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef __IDEINT_STRUCTS_H__ +#define __IDEINT_STRUCTS_H__ + +typedef void ideint_spatial_filter_t(UWORD8 *pu1_src, + UWORD8 *pu1_out, + WORD32 cur_strd, + WORD32 out_strd); + +typedef WORD32 ideint_cac_8x8_t(UWORD8 *pu1_top, + UWORD8 *pu1_bot, + WORD32 i4_top_stride, + WORD32 i4_bot_stride); +/** Deinterlacer context */ +typedef struct +{ + /** params */ + ideint_params_t s_params; + + /** Adaptive variance used in spatio temporal filtering */ + WORD32 ai4_vrnc_avg_fb[3]; + + /** Function pointers */ + icv_sad_8x4_t *pf_sad_8x4; + + icv_variance_8x4_t *pf_variance_8x4; + + ideint_spatial_filter_t *pf_spatial_filter; + + ideint_cac_8x8_t *pf_cac_8x8; +}ctxt_t; + +#endif /* __IDEINT_STRUCTS_H__ */ diff --git a/common/ideint_utils.c b/common/ideint_utils.c new file mode 100644 index 0000000..bdd3268 --- /dev/null +++ b/common/ideint_utils.c @@ -0,0 +1,482 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_utils.c +* +* @brief +* This file contains the definitions of the core processing of the de +* interlacer. +* +* @author +* Ittiam +* +* @par List of Functions: +* ideint_weave_pic() +* init_bob_indices() +* ideint_weave_blk() +* ideint_spatial_filter() +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" + +/** +******************************************************************************* +* +* @brief +* Weaves two fields to produce a frame +* +* @par Description +* Weaves two fields to produce a frame +* +* @param[in] ps_src_top +* Top field source +* +* @param[in] ps_src_bot +* Bottom field source +* +* @param[in] ps_dst_frm +* Destination frame +* +* @returns +* 0 on Success +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ideint_weave_pic(icv_pic_t *ps_src_top, + icv_pic_t *ps_src_bot, + icv_pic_t *ps_dst_frm, + WORD32 start_row, + WORD32 num_rows) +{ + UWORD8 *pu1_src, *pu1_dst; + WORD32 i, j, num_comp; + icv_pic_t *ps_src_fld; + WORD32 fld; + icv_pic_t *ps_src_flds[2]; + + num_comp = 3; + ps_src_flds[0] = ps_src_top; + ps_src_flds[1] = ps_src_bot; + + for(fld = 0; fld < 2; fld++) + { + ps_src_fld = ps_src_flds[fld]; + for(i = 0; i < num_comp; i++) + { + WORD32 src_strd; + WORD32 dst_strd; + WORD32 comp_row_start, comp_row_end; + comp_row_start = start_row; + comp_row_end = comp_row_start + num_rows; + if(i) + { + comp_row_start >>= 1; + comp_row_end >>= 1; + } + + comp_row_end = MIN(comp_row_end, ps_dst_frm->ai4_ht[i]); + + pu1_src = ps_src_fld->apu1_buf[i]; + pu1_dst = ps_dst_frm->apu1_buf[i]; + + src_strd = ps_src_fld->ai4_strd[i]; + dst_strd = ps_dst_frm->ai4_strd[i]; + + /* If source field is bottom, increment destination */ + pu1_dst += fld * dst_strd; + + /* In case input and output are pointing to same buffer, then no need to copy */ + if((pu1_src != pu1_dst) || ((2 * dst_strd) != src_strd)) + { + pu1_dst += ps_dst_frm->ai4_strd[i] * comp_row_start; + pu1_src += ps_src_fld->ai4_strd[i] * comp_row_start / 2; + + for(j = comp_row_start; j < comp_row_end; j += 2) + { + memcpy(pu1_dst, pu1_src, ps_dst_frm->ai4_wd[i]); + pu1_dst += ps_dst_frm->ai4_strd[i] * 2; + pu1_src += ps_src_fld->ai4_strd[i]; + } + } + } + } + return 0; +} + + +/** +******************************************************************************* +* +* @brief +* Weaves a 8x8 block +* +* @par Description +* Weaves a 8x8 block from two fields +* +* @param[in] pu1_top +* Top field source +* +* @param[in] pu1_bot +* Bottom field source +* +* @param[in] pu1_dst +* Destination +* +* @param[in] dst_strd +* Destination stride +* +* @param[in] src_strd +* Source stride +* +* @returns +* 0 on success +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ideint_weave_blk(UWORD8 *pu1_top, + UWORD8 *pu1_bot, + UWORD8 *pu1_dst, + WORD32 dst_strd, + WORD32 src_strd, + WORD32 wd, + WORD32 ht) +{ + WORD32 j; + + for(j = 0; j < ht; j += 2) + { + memcpy(pu1_dst, pu1_top, wd); + pu1_dst += dst_strd; + pu1_top += src_strd; + + memcpy(pu1_dst, pu1_bot, wd); + pu1_dst += dst_strd; + pu1_bot += src_strd; + } + return 0; +} + +/** +******************************************************************************* +* +* @brief +* Copy a boundary block and pad +* +* @par Description +* Copies a block on one of the boundaries and pads +* +* @param[in] pu1_top +* Top field source +* +* @param[in] pu1_bot +* Bottom field source +* +* @param[in] pu1_pad +* Padded destination +* +* @param[in] cur_strd +* Stride for pu1_top and pu1_bot +* +* @param[in] row +* Current block's row +* +* @param[in] col +* Current block's column +* +* @param[in] num_blks_y +* Number of blocks in Y direction +* +* @param[in] num_blks_x +* Number of blocks in X direction + +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ideint_pad_blk(UWORD8 *pu1_top, + UWORD8 *pu1_bot, + UWORD8 *pu1_pad, + WORD32 cur_strd, + WORD32 row, + WORD32 col, + WORD32 num_blks_y, + WORD32 num_blks_x, + WORD32 blk_wd, + WORD32 blk_ht) +{ + WORD32 i; + WORD32 num_cols, num_rows; + UWORD8 *pu1_dst; + UWORD8 *pu1_src_top; + UWORD8 *pu1_src_bot; + + num_rows = blk_ht + 4; + num_cols = blk_wd + 4; + + pu1_src_top = pu1_top - cur_strd - 2; + pu1_src_bot = pu1_bot - cur_strd - 2; + pu1_dst = pu1_pad; + + if(0 == col) + { + num_cols -= 2; + pu1_dst += 2; + pu1_src_top += 2; + pu1_src_bot += 2; + } + + if(0 == row) + { + num_rows -= 2; + pu1_dst += 2 * (BLK_WD + 4); + pu1_src_top += cur_strd; + pu1_src_bot += cur_strd; + } + + if((num_blks_x - 1) == col) + num_cols -= 2; + + if((num_blks_y - 1) == row) + num_rows -= 2; + + for(i = 0; i < num_rows; i += 2) + { + memcpy(pu1_dst, pu1_src_top, num_cols); + pu1_dst += (BLK_WD + 4); + + memcpy(pu1_dst, pu1_src_bot, num_cols); + pu1_dst += (BLK_WD + 4); + + pu1_src_top += cur_strd; + pu1_src_bot += cur_strd; + } + + + /* Pad Left */ + if(0 == col) + { + for(i = 0; i < (BLK_HT + 4); i++) + { + WORD32 ofst = i * (BLK_WD + 4) + 2; + pu1_pad[ofst - 1] = pu1_pad[ofst]; + pu1_pad[ofst - 2] = pu1_pad[ofst]; + } + } + + /* Pad right */ + if((num_blks_x - 1) == col) + { + for(i = 0; i < (BLK_HT + 4); i++) + { + WORD32 ofst = i * (BLK_WD + 4) + 2 + blk_wd - 1; + WORD32 size = (BLK_WD - blk_wd) + 2; + /* Padding on right should include padding for boundary + * blocks when width is non-multiple of 8 + */ + memset(&pu1_pad[ofst + 1], pu1_pad[ofst], size); + } + } + + /* Pad Top */ + if(0 == row) + { + WORD32 src_ofst = 2 * (BLK_WD + 4); + WORD32 dst_ofst = 0; + memcpy(pu1_pad + dst_ofst, pu1_pad + src_ofst, (BLK_WD + 4)); + src_ofst += (BLK_WD + 4); + dst_ofst += (BLK_WD + 4); + memcpy(pu1_pad + dst_ofst, pu1_pad + src_ofst, (BLK_WD + 4)); + } + + /* Pad Bottom */ + if((num_blks_y - 1) == row) + { + WORD32 src_ofst = (0 + blk_ht) * (BLK_WD + 4); + WORD32 dst_ofst = (1 + blk_ht) * (BLK_WD + 4); + WORD32 size = (BLK_HT - blk_ht) + 2; + + /* Padding on bottom should include padding for boundary + * blocks when height is non-multiple of 8 + */ + for(i = 0; i < size; i++) + { + memcpy(pu1_pad + dst_ofst, pu1_pad + src_ofst, (BLK_WD + 4)); + dst_ofst += (BLK_WD + 4); + } + } +} + +/** +******************************************************************************* +* +* @brief +* Performs spatial edge adaptive filtering +* +* @par Description +* Performs spatial edge adaptive filtering by detecting edge direction +* +* @param[in] pu1_src +* Source buffer +* +* @param[in] pu1_out +* Destination buffer +* +* @param[in] src_strd +* Source stride +* +* @param[in] out_strd +* Destination stride + +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ideint_spatial_filter(UWORD8 *pu1_src, + UWORD8 *pu1_out, + WORD32 src_strd, + WORD32 out_strd) +{ + WORD32 i; + WORD32 j; + WORD32 k; + + /*********************************************************************/ + /* This loop is for the two halves inside the 8x4 block. */ + /*********************************************************************/ + for(k = 0; k < 2; k++) + { + WORD32 adiff[3] = {0, 0, 0}; + WORD32 shift; + WORD32 dir_45_le_90, dir_45_le_135, dir_135_le_90; + UWORD8 *pu1_row_1, *pu1_row_2, *pu1_dst; + + /*****************************************************************/ + /* Direction detection */ + /*****************************************************************/ + pu1_row_1 = pu1_src; + pu1_row_2 = pu1_src + src_strd; + + /*****************************************************************/ + /* Calculating the difference along each of the 3 directions. */ + /*****************************************************************/ + for(j = 0; j < SUB_BLK_HT; j ++) + { + for(i = 0; i < SUB_BLK_WD; i++) + { + adiff[0] += ABS_DIF(pu1_row_1[i], pu1_row_2[i]); /* 90 */ + + adiff[1] += ABS_DIF(pu1_row_1[i - 1], pu1_row_2[i + 1]); /* 135 */ + + adiff[2] += ABS_DIF(pu1_row_1[i + 1], pu1_row_2[i - 1]); /* 45 */ + } + pu1_row_1 += src_strd; + pu1_row_2 += src_strd; + } + + /*****************************************************************/ + /* Applying bias, to make the diff comparision more robust. */ + /*****************************************************************/ + adiff[0] *= EDGE_BIAS_0; + adiff[1] *= EDGE_BIAS_1; + adiff[2] *= EDGE_BIAS_1; + + /*****************************************************************/ + /* comapring the diffs */ + /*****************************************************************/ + dir_45_le_90 = (adiff[2] <= adiff[0]); + dir_45_le_135 = (adiff[2] <= adiff[1]); + dir_135_le_90 = (adiff[1] <= adiff[0]); + + /*****************************************************************/ + /* Direction selection. */ + /*****************************************************************/ + shift = 0; + if(1 == dir_45_le_135) + { + if(1 == dir_45_le_90) + shift = 1; + } + else + { + if(1 == dir_135_le_90) + shift = -1; + } + + /*****************************************************************/ + /* Directional interpolation */ + /*****************************************************************/ + pu1_row_1 = pu1_src + shift; + pu1_row_2 = pu1_src + src_strd - shift; + pu1_dst = pu1_out; + + for(j = 0; j < SUB_BLK_HT; j++) + { + for(i = 0; i < SUB_BLK_WD; i++) + { + pu1_dst[i] = (UWORD8)AVG(pu1_row_1[i], pu1_row_2[i]); + } + pu1_row_1 += src_strd; + pu1_row_2 += src_strd; + pu1_dst += out_strd; + } + + pu1_out += SUB_BLK_WD; + pu1_src += SUB_BLK_WD; + } +} + diff --git a/common/ideint_utils.h b/common/ideint_utils.h new file mode 100644 index 0000000..570f40a --- /dev/null +++ b/common/ideint_utils.h @@ -0,0 +1,74 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_utils.h +* +* @brief +* Contains various functions needed in deinterlacer +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ + +#ifndef __IDEINT_UTILS_H__ +#define __IDEINT_UTILS_H__ + +WORD32 ideint_weave_pic(icv_pic_t *ps_src_top, + icv_pic_t *ps_src_bot, + icv_pic_t *ps_dst_frm, + WORD32 start_row, + WORD32 num_rows); + + +void ideint_pad_blk(UWORD8 *pu1_top, + UWORD8 *pu1_bot, + UWORD8 *pu1_pad, + WORD32 cur_strd, + WORD32 row, + WORD32 col, + WORD32 num_blks_y, + WORD32 num_blks_x, + WORD32 blk_wd, + WORD32 blk_ht); + +WORD32 ideint_weave_blk(UWORD8 *pu1_top, + UWORD8 *pu1_bot, + UWORD8 *pu1_dst, + WORD32 dst_strd, + WORD32 src_strd, + WORD32 wd, + WORD32 ht); + + +ideint_spatial_filter_t ideint_spatial_filter; +ideint_spatial_filter_t ideint_spatial_filter_a9; +ideint_spatial_filter_t ideint_spatial_filter_av8; +ideint_spatial_filter_t ideint_spatial_filter_ssse3; + +#endif /* __IDEINT_UTILS_H__ */ diff --git a/common/mips/icv_platform_macros.h b/common/mips/icv_platform_macros.h new file mode 100644 index 0000000..61da7f5 --- /dev/null +++ b/common/mips/icv_platform_macros.h @@ -0,0 +1,99 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_platform_macros.h +* +* @brief +* This header files contains all the platform/toolchain specific macros +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _ICV_PLATFORM_MACROS_H_ +#define _ICV_PLATFORM_MACROS_H_ + +static __inline UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return(__builtin_clz(u4_word)); + else + return 32; +} + +static __inline UWORD32 CLZNZ(UWORD32 u4_word) +{ + ASSERT(u4_word); + return(__builtin_clz(u4_word)); +} + +static __inline UWORD32 CTZ(UWORD32 u4_word) +{ + if(0 == u4_word) + return 31; + else + { + unsigned int index; + index = __builtin_ctz(u4_word); + return (UWORD32)index; + } +} + +#define CLIP_U8(x) CLIP3(0, 255, (x)) +#define CLIP_S8(x) CLIP3(-128, 127, (x)) + +#define CLIP_U10(x) CLIP3(0, 1023, (x)) +#define CLIP_S10(x) CLIP3(-512, 511, (x)) + +#define CLIP_U12(x) CLIP3(0, 4095, (x)) +#define CLIP_S12(x) CLIP3(-2048, 2047, (x)) + +#define CLIP_U16(x) CLIP3(0, 65535, (x)) +#define CLIP_S16(x) CLIP3(-32768, 32767, (x)) + +#define ITT_BIG_ENDIAN(x) __builtin_bswap32(x); + +#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");} +#define PREFETCH(x) __builtin_prefetch(x); + +#define DATA_SYNC() __sync_synchronize() + +#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0) +#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0) + +#define SHR_NEG(val,shift) (((shift) > 0) ? ( (val) >> (shift)) : ((val) << (-(shift)))) +#define SHL_NEG(val,shift) (((shift) > 0) ? ( (val) >> (-(shift))) : ((val) << (shift))) + +#define INLINE inline + +#define MEM_ALIGN8 __attribute__ ((aligned (8))) +#define MEM_ALIGN16 __attribute__ ((aligned (16))) +#define MEM_ALIGN32 __attribute__ ((aligned (32))) + + +#endif /* _ICV_PLATFORM_MACROS_H_ */ diff --git a/common/mips/ideint_function_selector.c b/common/mips/ideint_function_selector.c new file mode 100644 index 0000000..0e3ae70 --- /dev/null +++ b/common/mips/ideint_function_selector.c @@ -0,0 +1,110 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_function_selector.c +* +* @brief +* This file contains the function selector related code +* +* @author +* Ittiam +* +* @par List of Functions: +* ih264e_init_function_ptr +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" +#include "ideint_function_selector.h" + + +/** +******************************************************************************* +* +* @brief +* Call corresponding function pointer initialization function +* +* @par Description +* Call corresponding function pointer initialization function +* +* @param[in] ps_ctxt +* Context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ideint_init_function_ptr(ctxt_t *ps_ctxt) +{ + ideint_init_function_ptr_generic(ps_ctxt); +} + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +ICV_ARCH_T ideint_default_arch(void) +{ + return ICV_MIPS_GENERIC; +} diff --git a/common/x86/icv_platform_macros.h b/common/x86/icv_platform_macros.h new file mode 100644 index 0000000..61da7f5 --- /dev/null +++ b/common/x86/icv_platform_macros.h @@ -0,0 +1,99 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_platform_macros.h +* +* @brief +* This header files contains all the platform/toolchain specific macros +* +* @author +* Ittiam +* +* @par List of Functions: +* +* @remarks +* None +* +******************************************************************************* +*/ +#ifndef _ICV_PLATFORM_MACROS_H_ +#define _ICV_PLATFORM_MACROS_H_ + +static __inline UWORD32 CLZ(UWORD32 u4_word) +{ + if(u4_word) + return(__builtin_clz(u4_word)); + else + return 32; +} + +static __inline UWORD32 CLZNZ(UWORD32 u4_word) +{ + ASSERT(u4_word); + return(__builtin_clz(u4_word)); +} + +static __inline UWORD32 CTZ(UWORD32 u4_word) +{ + if(0 == u4_word) + return 31; + else + { + unsigned int index; + index = __builtin_ctz(u4_word); + return (UWORD32)index; + } +} + +#define CLIP_U8(x) CLIP3(0, 255, (x)) +#define CLIP_S8(x) CLIP3(-128, 127, (x)) + +#define CLIP_U10(x) CLIP3(0, 1023, (x)) +#define CLIP_S10(x) CLIP3(-512, 511, (x)) + +#define CLIP_U12(x) CLIP3(0, 4095, (x)) +#define CLIP_S12(x) CLIP3(-2048, 2047, (x)) + +#define CLIP_U16(x) CLIP3(0, 65535, (x)) +#define CLIP_S16(x) CLIP3(-32768, 32767, (x)) + +#define ITT_BIG_ENDIAN(x) __builtin_bswap32(x); + +#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");} +#define PREFETCH(x) __builtin_prefetch(x); + +#define DATA_SYNC() __sync_synchronize() + +#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0) +#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0) + +#define SHR_NEG(val,shift) (((shift) > 0) ? ( (val) >> (shift)) : ((val) << (-(shift)))) +#define SHL_NEG(val,shift) (((shift) > 0) ? ( (val) >> (-(shift))) : ((val) << (shift))) + +#define INLINE inline + +#define MEM_ALIGN8 __attribute__ ((aligned (8))) +#define MEM_ALIGN16 __attribute__ ((aligned (16))) +#define MEM_ALIGN32 __attribute__ ((aligned (32))) + + +#endif /* _ICV_PLATFORM_MACROS_H_ */ diff --git a/common/x86/icv_sad_ssse3.c b/common/x86/icv_sad_ssse3.c new file mode 100644 index 0000000..a76e58d --- /dev/null +++ b/common/x86/icv_sad_ssse3.c @@ -0,0 +1,145 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_sad.c +* +* @brief +* This file contains the functions to compute SAD +* +* @author +* Ittiam +* +* @par List of Functions: +* icv_sad_8x4_ssse3() +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include <immintrin.h> + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" + +/** +******************************************************************************* +* +* @brief +* Compute 8x4 SAD +* +* @par Description +* Compute 8x4 sum of absolute differences between source and reference block +* +* @param[in] pu1_src +* Source buffer +* +* @param[in] pu1_ref +* Reference buffer +* +* @param[in] src_strd +* Source stride +* +* @param[in] ref_strd +* Reference stride +* +* @param[in] wd +* Assumed to be 8 +* +* @param[in] ht +* Assumed to be 4 + +* @returns +* SAD +* +* @remarks +* +******************************************************************************* +*/ +WORD32 icv_sad_8x4_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_ref, + WORD32 src_strd, + WORD32 ref_strd, + WORD32 wd, + WORD32 ht) +{ + WORD32 sad; + __m128 src_r0, src_r1; + __m128 ref_r0, ref_r1; + __m128i res_r0, res_r1; + + UNUSED(wd); + UNUSED(ht); + ASSERT(wd == 8); + ASSERT(ht == 4); + + /* Load source */ + src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); + pu1_src += src_strd; + + src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); + pu1_src += src_strd; + + src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src)); + pu1_src += src_strd; + + src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src)); + pu1_src += src_strd; + + + /* Load reference */ + ref_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref)); + pu1_ref += ref_strd; + + ref_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref)); + pu1_ref += ref_strd; + + ref_r0 = _mm_loadh_pi (ref_r0, (__m64 *) (pu1_ref)); + pu1_ref += ref_strd; + + ref_r1 = _mm_loadh_pi (ref_r1, (__m64 *) (pu1_ref)); + pu1_ref += ref_strd; + + /* Compute SAD for each row */ + res_r0 = _mm_sad_epu8((__m128i)src_r0, (__m128i)ref_r0); + res_r1 = _mm_sad_epu8((__m128i)src_r1, (__m128i)ref_r1); + + /* Accumulate SAD */ + res_r0 = _mm_add_epi64(res_r0, res_r1); + res_r0 = _mm_add_epi64(res_r0, _mm_srli_si128(res_r0, 8)); + + sad = _mm_cvtsi128_si32(res_r0); + + return sad; +} diff --git a/common/x86/icv_variance_ssse3.c b/common/x86/icv_variance_ssse3.c new file mode 100644 index 0000000..725714e --- /dev/null +++ b/common/x86/icv_variance_ssse3.c @@ -0,0 +1,158 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* icv_variance_sse42.c +* +* @brief +* This file contains the functions to compute variance +* +* @author +* Ittiam +* +* @par List of Functions: +* icv_variance_8x4_ssse3() +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include <immintrin.h> + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" + +/** +******************************************************************************* +* +* @brief +* Computes variance of a given 8x4 block +* +* @par Description +* Compute variance of a given 8x4 block +* +* @param[in] pu1_src +* Source +* +* @param[in] src_strd +* Source stride +* +* @param[in] wd +* Assumed to be 8 +* +* @param[in] ht +* Assumed to be 4 +* +* @returns +* Variance +* +* @remarks +* +******************************************************************************* +*/ +WORD32 icv_variance_8x4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 wd, WORD32 ht) +{ + WORD32 sum; + WORD32 sum_sqr; + WORD32 blk_sz; + WORD32 vrnc; + __m128 src_r0, src_r1; + __m128i ssrc_r0, ssrc_r1, ssrc_r2, ssrc_r3; + __m128i sum_r0, sum_r1; + __m128i sqr_r0, sqr_r1, sqr_r2, sqr_r3; + __m128i vsum, vsum_sqr; + __m128i zero; + UNUSED(wd); + UNUSED(ht); + + ASSERT(wd == 8); + ASSERT(ht == 4); + + sum = 0; + sum_sqr = 0; + + blk_sz = 8 * 4; + + zero = _mm_setzero_si128(); + + /* Load source */ + src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); + pu1_src += src_strd; + + src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); + pu1_src += src_strd; + + src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src)); + pu1_src += src_strd; + + src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src)); + pu1_src += src_strd; + + /* Compute sum of all elements */ + /* Use SAD with 0, since there is no pairwise addition */ + sum_r0 = _mm_sad_epu8((__m128i)src_r0, zero); + sum_r1 = _mm_sad_epu8((__m128i)src_r1, zero); + + /* Accumulate SAD */ + vsum = _mm_add_epi64(sum_r0, sum_r1); + vsum = _mm_add_epi64(vsum, _mm_srli_si128(vsum, 8)); + + sum = _mm_cvtsi128_si32(vsum); + + /* Unpack to 16 bits */ + ssrc_r0 = _mm_unpacklo_epi8((__m128i)src_r0, zero); + ssrc_r1 = _mm_unpacklo_epi8((__m128i)src_r1, zero); + ssrc_r2 = _mm_unpackhi_epi8((__m128i)src_r0, zero); + ssrc_r3 = _mm_unpackhi_epi8((__m128i)src_r1, zero); + + /* Compute sum of squares */ + sqr_r0 = _mm_madd_epi16(ssrc_r0, ssrc_r0); + sqr_r1 = _mm_madd_epi16(ssrc_r1, ssrc_r1); + sqr_r2 = _mm_madd_epi16(ssrc_r2, ssrc_r2); + sqr_r3 = _mm_madd_epi16(ssrc_r3, ssrc_r3); + + vsum_sqr = _mm_add_epi32(sqr_r0, sqr_r1); + vsum_sqr = _mm_add_epi32(vsum_sqr, sqr_r2); + vsum_sqr = _mm_add_epi32(vsum_sqr, sqr_r3); + + vsum_sqr = _mm_add_epi32(vsum_sqr, _mm_srli_si128(vsum_sqr, 8)); + vsum_sqr = _mm_add_epi32(vsum_sqr, _mm_srli_si128(vsum_sqr, 4)); + sum_sqr = _mm_cvtsi128_si32(vsum_sqr); + + /* Compute variance */ + vrnc = ((sum_sqr * blk_sz) - (sum * sum)) / (blk_sz * blk_sz); + + return vrnc; +} + diff --git a/common/x86/ideint_cac_ssse3.c b/common/x86/ideint_cac_ssse3.c new file mode 100644 index 0000000..2be8467 --- /dev/null +++ b/common/x86/ideint_cac_ssse3.c @@ -0,0 +1,236 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_cac_ssse3.c +* +* @brief +* This file include the definitions of the combing artifact check function +* of the de-interlacer and some variant of that. +* +* @author +* Ittiam +* +* @par List of Functions: +* cac_4x8() +* ideint_cac() +* +* @remarks +* In the de-interlacer workspace, cac is not a seperate assembly module as +* it comes along with the de_int_decision() function. But in C-Model, to +* keep the things cleaner, it was made to be a separate function during +* cac experiments long after the assembly was written by Mudit. +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <immintrin.h> + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_cac.h" + +/** +******************************************************************************* +* +* @brief +* Combing artifact check function for 8x8 block +* +* @par Description +* Determines CAC for 8x8 block by calling 8x4 CAC function +* +* @param[in] pu1_top +* Top field +* +* @param[in] pu1_bot +* Bottom field +* +* @param[in] top_strd +* Top field Stride +* +* @param[in] bot_strd +* Bottom field stride +* +* @returns +* combing artifact flag (1 = detected, 0 = not detected) +* +* @remarks +* +******************************************************************************* +*/ +WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top, + UWORD8 *pu1_bot, + WORD32 top_strd, + WORD32 bot_strd) +{ + WORD32 ca; /* combing artifact result */ + WORD32 i; + WORD32 adj[2] = {0}; + WORD32 alt[2] = {0}; + WORD32 sum_1, sum_2, sum_3, sum_4; + WORD32 sum_diff, diff_sum; + + __m128i top[4]; + __m128i bot[4]; + __m128i sum_t[4]; + __m128i sum_b[4]; + __m128i zero; + + + zero = _mm_setzero_si128(); + + for(i = 0; i < 4; i++) + { + /* Load top */ + top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top)); + pu1_top += top_strd; + + /* Load bottom */ + bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot)); + pu1_bot += bot_strd; + + /* Unpack */ + top[i] = _mm_unpacklo_epi8(top[i], zero); + bot[i] = _mm_unpacklo_epi8(bot[i], zero); + + /* Compute row sums */ + sum_t[i] = _mm_sad_epu8(top[i], zero); + sum_b[i] = _mm_sad_epu8(bot[i], zero); + } + + /* Compute row based alt and adj */ + for(i = 0; i < 4; i += 2) + { + sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]); + sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]); + sum_diff = ABS_DIF(sum_1, sum_2); + if(sum_diff >= RSUM_CSUM_THRESH) + adj[0] += sum_diff; + + sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]); + sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]); + sum_diff = ABS_DIF(sum_3, sum_4); + if(sum_diff >= RSUM_CSUM_THRESH) + adj[0] += sum_diff; + + alt[0] += ABS_DIF(sum_1, sum_3); + alt[0] += ABS_DIF(sum_2, sum_4); + + sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8)); + sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8)); + sum_diff = ABS_DIF(sum_1, sum_2); + if(sum_diff >= RSUM_CSUM_THRESH) + adj[1] += sum_diff; + + sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8)); + sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8)); + sum_diff = ABS_DIF(sum_3, sum_4); + if(sum_diff >= RSUM_CSUM_THRESH) + adj[1] += sum_diff; + + alt[1] += ABS_DIF(sum_1, sum_3); + alt[1] += ABS_DIF(sum_2, sum_4); + } + + /* Compute column based adj */ + { + __m128i avg1, avg2; + __m128i top_avg, bot_avg; + __m128i min, max, diff, thresh; + __m128i mask; + avg1 = _mm_avg_epu8(top[0], top[1]); + avg2 = _mm_avg_epu8(top[2], top[3]); + top_avg = _mm_avg_epu8(avg1, avg2); + + avg1 = _mm_avg_epu8(bot[0], bot[1]); + avg2 = _mm_avg_epu8(bot[2], bot[3]); + bot_avg = _mm_avg_epu8(avg1, avg2); + + min = _mm_min_epu8(top_avg, bot_avg); + max = _mm_max_epu8(top_avg, bot_avg); + + diff = _mm_sub_epi16(max, min); + thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1); + + mask = _mm_cmpgt_epi16(diff, thresh); + diff = _mm_and_si128(diff, mask); + + diff_sum = _mm_extract_epi16(diff, 0); + diff_sum += _mm_extract_epi16(diff, 1); + diff_sum += _mm_extract_epi16(diff, 2); + diff_sum += _mm_extract_epi16(diff, 3); + + adj[0] += diff_sum << 2; + + diff_sum = _mm_extract_epi16(diff, 4); + diff_sum += _mm_extract_epi16(diff, 5); + diff_sum += _mm_extract_epi16(diff, 6); + diff_sum += _mm_extract_epi16(diff, 7); + + adj[1] += diff_sum << 2; + + } + + /* Compute column based alt */ + { + __m128i avg1, avg2; + __m128i even_avg, odd_avg, diff; + avg1 = _mm_avg_epu8(top[0], bot[0]); + avg2 = _mm_avg_epu8(top[2], bot[2]); + even_avg = _mm_avg_epu8(avg1, avg2); + + avg1 = _mm_avg_epu8(top[1], bot[1]); + avg2 = _mm_avg_epu8(top[3], bot[3]); + odd_avg = _mm_avg_epu8(avg1, avg2); + + diff = _mm_sad_epu8(even_avg, odd_avg); + + + diff_sum = _mm_cvtsi128_si32(diff); + alt[0] += diff_sum << 2; + + diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8)); + alt[1] += diff_sum << 2; + + } + alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1); + alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1); + + ca = (alt[0] < adj[0]); + ca |= (alt[1] < adj[1]); + + return ca; +} + diff --git a/common/x86/ideint_function_selector.c b/common/x86/ideint_function_selector.c new file mode 100644 index 0000000..64d6177 --- /dev/null +++ b/common/x86/ideint_function_selector.c @@ -0,0 +1,126 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_function_selector.c +* +* @brief +* This file contains the function selector related code +* +* @author +* Ittiam +* +* @par List of Functions: +* ih264e_init_function_ptr +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" +#include "ideint_function_selector.h" + + +/** +******************************************************************************* +* +* @brief +* Call corresponding function pointer initialization function +* +* @par Description +* Call corresponding function pointer initialization function +* +* @param[in] ps_ctxt +* Context +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ideint_init_function_ptr(ctxt_t *ps_ctxt) +{ + + ideint_init_function_ptr_generic(ps_ctxt); + + switch(ps_ctxt->s_params.e_arch) + { + case ICV_X86_GENERIC: + break; + case ICV_X86_SSSE3: + ideint_init_function_ptr_ssse3(ps_ctxt); + break; + case ICV_X86_SSE42: + default: + ideint_init_function_ptr_ssse3(ps_ctxt); + ideint_init_function_ptr_sse42(ps_ctxt); + break; + } + +} + +/** +******************************************************************************* +* +* @brief Determine the architecture of the encoder executing environment +* +* @par Description: This routine returns the architecture of the enviro- +* ment in which the current encoder is being tested +* +* @param[in] void +* +* @returns IV_ARCH_T +* architecture +* +* @remarks none +* +******************************************************************************* +*/ +ICV_ARCH_T ideint_default_arch(void) +{ + return ICV_X86_SSE42; +} diff --git a/common/x86/ideint_function_selector_sse42.c b/common/x86/ideint_function_selector_sse42.c new file mode 100644 index 0000000..b6cf3f3 --- /dev/null +++ b/common/x86/ideint_function_selector_sse42.c @@ -0,0 +1,89 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_function_selector.c +* +* @brief +* This file contains the function selector related code +* +* @author +* Ittiam +* +* @par List of Functions: +* ih264e_init_function_ptr +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" +#include "ideint_function_selector.h" + + +/** +******************************************************************************* +* +* @brief +* Initialize the function pointers +* +* @par Description +* The current routine initializes the function pointers based on architecture +* +* @param[in] ps_ctxt +* Context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ideint_init_function_ptr_sse42(ctxt_t *ps_ctxt) +{ + UNUSED(ps_ctxt); + return; +} diff --git a/common/x86/ideint_function_selector_ssse3.c b/common/x86/ideint_function_selector_ssse3.c new file mode 100644 index 0000000..4d06c70 --- /dev/null +++ b/common/x86/ideint_function_selector_ssse3.c @@ -0,0 +1,92 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_function_selector.c +* +* @brief +* This file contains the function selector related code +* +* @author +* Ittiam +* +* @par List of Functions: +* ih264e_init_function_ptr +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" + +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" +#include "ideint_debug.h" +#include "ideint_function_selector.h" + + +/** +******************************************************************************* +* +* @brief +* Initialize the function pointers +* +* @par Description +* The current routine initializes the function pointers based on architecture +* +* @param[in] ps_ctxt +* Context pointer +* +* @returns none +* +* @remarks none +* +******************************************************************************* +*/ +void ideint_init_function_ptr_ssse3(ctxt_t *ps_ctxt) +{ + ps_ctxt->pf_sad_8x4 = icv_sad_8x4_ssse3; + ps_ctxt->pf_variance_8x4 = icv_variance_8x4_ssse3; + ps_ctxt->pf_spatial_filter = ideint_spatial_filter_ssse3; + ps_ctxt->pf_cac_8x8 = ideint_cac_8x8_ssse3; + return; +} diff --git a/common/x86/ideint_spatial_filter_ssse3.c b/common/x86/ideint_spatial_filter_ssse3.c new file mode 100644 index 0000000..7c2149f --- /dev/null +++ b/common/x86/ideint_spatial_filter_ssse3.c @@ -0,0 +1,240 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +/** +******************************************************************************* +* @file +* ideint_utils.c +* +* @brief +* This file contains the definitions of the core processing of the de +* interlacer. +* +* @author +* Ittiam +* +* @par List of Functions: +* ideint_spatial_filter_ssse3() +* +* @remarks +* None +* +******************************************************************************* +*/ +/*****************************************************************************/ +/* File Includes */ +/*****************************************************************************/ +/* System include files */ +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include <immintrin.h> + +/* User include files */ +#include "icv_datatypes.h" +#include "icv_macros.h" +#include "icv_platform_macros.h" +#include "icv.h" +#include "icv_variance.h" +#include "icv_sad.h" +#include "ideint.h" +#include "ideint_defs.h" +#include "ideint_structs.h" +#include "ideint_utils.h" +#include "ideint_cac.h" + +/** +******************************************************************************* +* +* @brief +* Performs spatial edge adaptive filtering +* +* @par Description +* Performs spatial edge adaptive filtering by detecting edge direction +* +* @param[in] pu1_src +* Source buffer +* +* @param[in] pu1_out +* Destination buffer +* +* @param[in] src_strd +* Source stride +* +* @param[in] out_strd +* Destination stride + +* @returns +* None +* +* @remarks +* +******************************************************************************* +*/ +void ideint_spatial_filter_ssse3(UWORD8 *pu1_src, + UWORD8 *pu1_out, + WORD32 src_strd, + WORD32 out_strd) +{ + WORD32 i; + + WORD32 adiff[6]; + WORD32 *pi4_diff; + WORD32 shifts[2]; + WORD32 dir_45_le_90, dir_45_le_135, dir_135_le_90; + + __m128i row1_0, row1_m1, row1_p1; + __m128i row2_0, row2_m1, row2_p1; + __m128i diff, diffs[3]; + __m128i zero; + + /*****************************************************************/ + /* Direction detection */ + /*****************************************************************/ + + zero = _mm_setzero_si128(); + diffs[0] = _mm_setzero_si128(); + diffs[1] = _mm_setzero_si128(); + diffs[2] = _mm_setzero_si128(); + + /* Load source */ + row1_m1 = _mm_loadl_epi64((__m128i *) (pu1_src - 1)); + row1_0 = _mm_loadl_epi64((__m128i *) (pu1_src)); + row1_p1 = _mm_loadl_epi64((__m128i *) (pu1_src + 1)); + pu1_src += src_strd; + + /* Unpack to 16 bits */ + row1_m1 = _mm_unpacklo_epi8(row1_m1, zero); + row1_0 = _mm_unpacklo_epi8(row1_0, zero); + row1_p1 = _mm_unpacklo_epi8(row1_p1, zero); + + /*****************************************************************/ + /* Calculating the difference along each of the 3 directions. */ + /*****************************************************************/ + for(i = 0; i < SUB_BLK_HT; i ++) + { + row2_m1 = _mm_loadl_epi64((__m128i *) (pu1_src - 1)); + row2_0 = _mm_loadl_epi64((__m128i *) (pu1_src)); + row2_p1 = _mm_loadl_epi64((__m128i *) (pu1_src + 1)); + pu1_src += src_strd; + + /* Unpack to 16 bits */ + row2_m1 = _mm_unpacklo_epi8(row2_m1, zero); + row2_0 = _mm_unpacklo_epi8(row2_0, zero); + row2_p1 = _mm_unpacklo_epi8(row2_p1, zero); + + diff = _mm_sad_epu8(row1_0, row2_0); + diffs[0] = _mm_add_epi64(diffs[0], diff); + + diff = _mm_sad_epu8(row1_m1, row2_p1); + diffs[1] = _mm_add_epi64(diffs[1], diff); + + diff = _mm_sad_epu8(row1_p1, row2_m1); + diffs[2] = _mm_add_epi64(diffs[2], diff); + + row1_m1 = row2_m1; + row1_0 = row2_0; + row1_p1 = row2_p1; + } + /* Revert pu1_src increment */ + pu1_src -= (SUB_BLK_HT + 1) * src_strd; + + + adiff[0] = _mm_cvtsi128_si32(diffs[0]); + adiff[1] = _mm_cvtsi128_si32(diffs[1]); + adiff[2] = _mm_cvtsi128_si32(diffs[2]); + adiff[3] = _mm_cvtsi128_si32(_mm_srli_si128(diffs[0], 8)); + adiff[4] = _mm_cvtsi128_si32(_mm_srli_si128(diffs[1], 8)); + adiff[5] = _mm_cvtsi128_si32(_mm_srli_si128(diffs[2], 8)); + pi4_diff = adiff; + + for(i = 0; i < 2; i++) + { + /*****************************************************************/ + /* Applying bias, to make the diff comparision more robust. */ + /*****************************************************************/ + pi4_diff[0] *= EDGE_BIAS_0; + pi4_diff[1] *= EDGE_BIAS_1; + pi4_diff[2] *= EDGE_BIAS_1; + + /*****************************************************************/ + /* comapring the diffs */ + /*****************************************************************/ + dir_45_le_90 = (pi4_diff[2] <= pi4_diff[0]); + dir_45_le_135 = (pi4_diff[2] <= pi4_diff[1]); + dir_135_le_90 = (pi4_diff[1] <= pi4_diff[0]); + + /*****************************************************************/ + /* Direction selection. */ + /*****************************************************************/ + shifts[i] = 0; + if(1 == dir_45_le_135) + { + if(1 == dir_45_le_90) + shifts[i] = 1; + } + else + { + if(1 == dir_135_le_90) + shifts[i] = -1; + } + pi4_diff += 3; + } + /*****************************************************************/ + /* Directional interpolation */ + /*****************************************************************/ + for(i = 0; i < SUB_BLK_HT / 2; i++) + { + __m128i dst; + __m128i row1, row2; + + UWORD32 *pu4_row1th, *pu4_row1tl; + UWORD32 *pu4_row2th, *pu4_row2tl; + UWORD32 *pu4_row1bh, *pu4_row1bl; + UWORD32 *pu4_row2bh, *pu4_row2bl; + + pu4_row1th = (UWORD32 *)(pu1_src + shifts[0]); + pu4_row1tl = (UWORD32 *)(pu1_src + SUB_BLK_WD + shifts[1]); + + pu1_src += src_strd; + pu4_row2th = (UWORD32 *)(pu1_src + shifts[0]); + pu4_row2tl = (UWORD32 *)(pu1_src + SUB_BLK_WD + shifts[1]); + + pu4_row1bh = (UWORD32 *)(pu1_src - shifts[0]); + pu4_row1bl = (UWORD32 *)(pu1_src + SUB_BLK_WD - shifts[1]); + + pu1_src += src_strd; + pu4_row2bh = (UWORD32 *)(pu1_src - shifts[0]); + pu4_row2bl = (UWORD32 *)(pu1_src + SUB_BLK_WD - shifts[1]); + + row1 = _mm_set_epi32(*pu4_row1tl, *pu4_row1th, *pu4_row2tl, *pu4_row2th); + row2 = _mm_set_epi32(*pu4_row1bl, *pu4_row1bh, *pu4_row2bl, *pu4_row2bh); + + dst = _mm_avg_epu8(row1, row2); + + _mm_storel_epi64((__m128i *)pu1_out, _mm_srli_si128(dst, 8)); + pu1_out += out_strd; + + _mm_storel_epi64((__m128i *)pu1_out, dst); + pu1_out += out_strd; + } +} + diff --git a/decoder.arm.mk b/decoder.arm.mk index 5f59681..fb94969 100644 --- a/decoder.arm.mk +++ b/decoder.arm.mk @@ -2,12 +2,18 @@ libmpeg2d_inc_dir_arm += $(LOCAL_PATH)/decoder/arm libmpeg2d_inc_dir_arm += $(LOCAL_PATH)/common/arm libmpeg2d_srcs_c_arm += decoder/arm/impeg2d_function_selector.c +libmpeg2d_srcs_c_arm += common/arm/ideint_function_selector.c libmpeg2d_cflags_arm += -DDISABLE_NEONINTR -DARM -DARMGCC LOCAL_ARM_MODE := arm ifeq ($(ARCH_ARM_HAVE_NEON),true) libmpeg2d_srcs_c_arm += decoder/arm/impeg2d_function_selector_a9q.c +libmpeg2d_srcs_c_arm += common/arm/ideint_function_selector_a9.c +libmpeg2d_srcs_asm_arm += common/arm/icv_sad_a9.s +libmpeg2d_srcs_asm_arm += common/arm/icv_variance_a9.s +libmpeg2d_srcs_asm_arm += common/arm/ideint_spatial_filter_a9.s +libmpeg2d_srcs_asm_arm += common/arm/ideint_cac_a9.s libmpeg2d_srcs_asm_arm += common/arm/impeg2_format_conv.s libmpeg2d_srcs_asm_arm += common/arm/impeg2_idct.s libmpeg2d_srcs_asm_arm += common/arm/impeg2_inter_pred.s diff --git a/decoder.arm64.mk b/decoder.arm64.mk index bcb6d55..a195111 100644 --- a/decoder.arm64.mk +++ b/decoder.arm64.mk @@ -8,6 +8,12 @@ libmpeg2d_srcs_c_arm64 += decoder/arm/impeg2d_function_selector.c ifeq ($(ARCH_ARM_HAVE_NEON),true) libmpeg2d_srcs_c_arm64 += decoder/arm/impeg2d_function_selector_av8.c +libmpeg2d_srcs_c_arm64 += common/arm/ideint_function_selector.c +libmpeg2d_srcs_c_arm64 += common/arm/ideint_function_selector_av8.c +libmpeg2d_srcs_asm_arm64 += common/armv8/icv_sad_av8.s +libmpeg2d_srcs_asm_arm64 += common/armv8/icv_variance_av8.s +libmpeg2d_srcs_asm_arm64 += common/armv8/ideint_spatial_filter_av8.s +libmpeg2d_srcs_asm_arm64 += common/armv8/ideint_cac_av8.s libmpeg2d_srcs_asm_arm64 += common/armv8/impeg2_neon_macros.s libmpeg2d_srcs_asm_arm64 += common/armv8/impeg2_format_conv.s diff --git a/decoder.mips.mk b/decoder.mips.mk index 514eb9c..7aaa724 100644 --- a/decoder.mips.mk +++ b/decoder.mips.mk @@ -1,6 +1,6 @@ libmpeg2d_inc_dir_mips += $(LOCAL_PATH)/common/mips libmpeg2d_srcs_c_mips += decoder/mips/impeg2d_function_selector.c - +libmpeg2d_srcs_c_mips += common/mips/ideint_function_selector.c LOCAL_C_INCLUDES_mips += $(libmpeg2d_inc_dir_mips) LOCAL_SRC_FILES_mips += $(libmpeg2d_srcs_c_mips) diff --git a/decoder.mips64.mk b/decoder.mips64.mk index 5a3bdbc..ecd5418 100644 --- a/decoder.mips64.mk +++ b/decoder.mips64.mk @@ -1,6 +1,6 @@ libmpeg2d_inc_dir_mips64 += $(LOCAL_PATH)/common/mips libmpeg2d_srcs_c_mips64 += decoder/mips/impeg2d_function_selector.c - +libmpeg2d_srcs_c_mips64 += common/mips/ideint_function_selector.c LOCAL_C_INCLUDES_mips64 += $(libmpeg2d_inc_dir_mips) LOCAL_SRC_FILES_mips64 += $(libmpeg2d_srcs_c_mips) @@ -40,6 +40,15 @@ libmpeg2d_srcs_c += decoder/impeg2d_pic_proc.c libmpeg2d_srcs_c += decoder/impeg2d_pnb_pic.c libmpeg2d_srcs_c += decoder/impeg2d_vld.c libmpeg2d_srcs_c += decoder/impeg2d_vld_tables.c +libmpeg2d_srcs_c += decoder/impeg2d_deinterlace.c + +libmpeg2d_srcs_c += common/icv_sad.c +libmpeg2d_srcs_c += common/icv_variance.c +libmpeg2d_srcs_c += common/ideint.c +libmpeg2d_srcs_c += common/ideint_cac.c +libmpeg2d_srcs_c += common/ideint_debug.c +libmpeg2d_srcs_c += common/ideint_function_selector_generic.c +libmpeg2d_srcs_c += common/ideint_utils.c LOCAL_SRC_FILES := $(libmpeg2d_srcs_c) $(libmpeg2d_srcs_asm) diff --git a/decoder.x86.mk b/decoder.x86.mk index ff6344c..1b42fed 100644 --- a/decoder.x86.mk +++ b/decoder.x86.mk @@ -7,7 +7,14 @@ libmpeg2d_srcs_c_x86 += decoder/x86/impeg2d_function_selector.c libmpeg2d_srcs_c_x86 += decoder/x86/impeg2d_function_selector_avx2.c libmpeg2d_srcs_c_x86 += decoder/x86/impeg2d_function_selector_ssse3.c libmpeg2d_srcs_c_x86 += decoder/x86/impeg2d_function_selector_sse42.c - +libmpeg2d_srcs_c_x86 += common/x86/ideint_function_selector.c +libmpeg2d_srcs_c_x86 += common/x86/ideint_function_selector_ssse3.c +libmpeg2d_srcs_c_x86 += common/x86/ideint_function_selector_sse42.c + +libmpeg2d_srcs_c_x86 += common/x86/icv_variance_ssse3.c +libmpeg2d_srcs_c_x86 += common/x86/icv_sad_ssse3.c +libmpeg2d_srcs_c_x86 += common/x86/ideint_cac_ssse3.c +libmpeg2d_srcs_c_x86 += common/x86/ideint_spatial_filter_ssse3.c libmpeg2d_srcs_c_x86 += common/x86/impeg2_idct_recon_sse42_intr.c libmpeg2d_srcs_c_x86 += common/x86/impeg2_inter_pred_sse42_intr.c diff --git a/decoder.x86_64.mk b/decoder.x86_64.mk index 72c1820..f2ba61d 100644 --- a/decoder.x86_64.mk +++ b/decoder.x86_64.mk @@ -7,7 +7,14 @@ libmpeg2d_srcs_c_x86_64 += decoder/x86/impeg2d_function_selector.c libmpeg2d_srcs_c_x86_64 += decoder/x86/impeg2d_function_selector_avx2.c libmpeg2d_srcs_c_x86_64 += decoder/x86/impeg2d_function_selector_ssse3.c libmpeg2d_srcs_c_x86_64 += decoder/x86/impeg2d_function_selector_sse42.c - +libmpeg2d_srcs_c_x86_64 += common/x86/ideint_function_selector.c +libmpeg2d_srcs_c_x86_64 += common/x86/ideint_function_selector_ssse3.c +libmpeg2d_srcs_c_x86_64 += common/x86/ideint_function_selector_sse42.c + +libmpeg2d_srcs_c_x86_64 += common/x86/icv_variance_ssse3.c +libmpeg2d_srcs_c_x86_64 += common/x86/icv_sad_ssse3.c +libmpeg2d_srcs_c_x86_64 += common/x86/ideint_cac_ssse3.c +libmpeg2d_srcs_c_x86_64 += common/x86/ideint_spatial_filter_ssse3.c libmpeg2d_srcs_c_x86_64 += common/x86/impeg2_idct_recon_sse42_intr.c libmpeg2d_srcs_c_x86_64 += common/x86/impeg2_inter_pred_sse42_intr.c diff --git a/decoder/impeg2d.h b/decoder/impeg2d.h index fe38046..5567733 100644 --- a/decoder/impeg2d.h +++ b/decoder/impeg2d.h @@ -198,6 +198,11 @@ typedef struct /* format in which codec has to give out frame data for display */ IV_COLOR_FORMAT_T e_output_format; + /** + * Flag to enable/disable deinterlacing + */ + UWORD32 u4_deinterlace; + } impeg2d_fill_mem_rec_ip_t; typedef struct @@ -230,6 +235,11 @@ typedef struct and application */ UWORD32 u4_share_disp_buf; + /** + * Flag to enable/disable deinterlacing + */ + UWORD32 u4_deinterlace; + } impeg2d_init_ip_t; typedef struct diff --git a/decoder/impeg2d_api.h b/decoder/impeg2d_api.h index bf3943e..3fa098b 100644 --- a/decoder/impeg2d_api.h +++ b/decoder/impeg2d_api.h @@ -50,7 +50,7 @@ -#define NUM_MEM_RECORDS 4 *MAX_THREADS+NUM_INT_FRAME_BUFFERS + 5 +#define NUM_MEM_RECORDS 4 * MAX_THREADS + NUM_INT_FRAME_BUFFERS + 5 + 2 #define SETBIT(a,i) ((a) |= (1 << i)) diff --git a/decoder/impeg2d_api_main.c b/decoder/impeg2d_api_main.c index 70baae8..63d8275 100644 --- a/decoder/impeg2d_api_main.c +++ b/decoder/impeg2d_api_main.c @@ -68,6 +68,7 @@ #include "impeg2d_structs.h" #include "impeg2d_mc.h" #include "impeg2d_pic_proc.h" +#include "impeg2d_deinterlace.h" #define NUM_FRAMES_LIMIT_ENABLED 0 @@ -363,7 +364,8 @@ void impeg2d_fill_mem_rec(impeg2d_fill_mem_rec_ip_t *ps_ip, WORD32 i4_num_threads; WORD32 i4_share_disp_buf, i4_chroma_format; WORD32 i4_chroma_size; - + UWORD32 u4_deinterlace; + UNUSED(u4_deinterlace); max_frm_width = ALIGN16(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd); max_frm_height = ALIGN16(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht); @@ -392,6 +394,15 @@ void impeg2d_fill_mem_rec(impeg2d_fill_mem_rec_ip_t *ps_ip, i4_chroma_format = -1; } + if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size > offsetof(impeg2d_fill_mem_rec_ip_t, u4_deinterlace)) + { + u4_deinterlace = ps_ip->u4_deinterlace; + } + else + { + u4_deinterlace = 0; + } + if( (i4_chroma_format != IV_YUV_420P) && (i4_chroma_format != IV_YUV_420SP_UV) && @@ -400,6 +411,12 @@ void impeg2d_fill_mem_rec(impeg2d_fill_mem_rec_ip_t *ps_ip, i4_share_disp_buf = 0; } + /* Disable deinterlacer in shared mode */ + if(i4_share_disp_buf) + { + u4_deinterlace = 0; + } + /*************************************************************************/ /* Fill the memory requirement XDM Handle */ /*************************************************************************/ @@ -559,6 +576,23 @@ void impeg2d_fill_mem_rec(impeg2d_fill_mem_rec_ip_t *ps_ip, ps_mem_rec->u4_mem_alignment = 128; ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + ps_mem_rec->u4_mem_size = impeg2d_deint_ctxt_size(); + ps_mem_rec++; + u1_no_rec++; + + ps_mem_rec->u4_mem_alignment = 128; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; + + if(IV_YUV_420P != i4_chroma_format) + ps_mem_rec->u4_mem_size = max_frm_size; + else + ps_mem_rec->u4_mem_size = 64; + + ps_mem_rec++; + u1_no_rec++; + + ps_mem_rec->u4_mem_alignment = 128; + ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM; ps_mem_rec->u4_mem_size = sizeof(iv_mem_rec_t) * (NUM_MEM_RECORDS); ps_mem_rec++; u1_no_rec++; @@ -910,9 +944,15 @@ IV_API_CALL_STATUS_T impeg2d_api_reset(iv_obj_t *ps_dechdl, } - for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++) + if((ps_dec_state->u4_deinterlace) && (NULL != ps_dec_state->ps_deint_pic)) { + impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, + ps_dec_state->ps_deint_pic->i4_buf_id, + MPEG2_BUF_MGR_DEINT); + } + for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++) + { ps_dec_state = ps_dec_state_multi_core->ps_dec_state[i4_num_threads]; @@ -924,6 +964,7 @@ IV_API_CALL_STATUS_T impeg2d_api_reset(iv_obj_t *ps_dechdl, ps_dec_state->u2_is_mpeg2 = 0; ps_dec_state->aps_ref_pics[0] = NULL; ps_dec_state->aps_ref_pics[1] = NULL; + ps_dec_state->ps_deint_pic = NULL; } } else @@ -1448,6 +1489,7 @@ IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *ps_dechdl, impeg2d_init_op_t *ps_dec_init_op; WORD32 i4_num_threads; UWORD32 u4_share_disp_buf, u4_chroma_format; + UWORD32 u4_deinterlace; ps_dec_init_ip = (impeg2d_init_ip_t *)ps_ip; ps_dec_init_op = (impeg2d_init_op_t *)ps_op; @@ -1470,6 +1512,15 @@ IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *ps_dechdl, u4_chroma_format = ps_dec_init_ip->s_ivd_init_ip_t.e_output_format; + if(ps_dec_init_ip->s_ivd_init_ip_t.u4_size > offsetof(impeg2d_init_ip_t, u4_deinterlace)) + { + u4_deinterlace = ps_dec_init_ip->u4_deinterlace; + } + else + { + u4_deinterlace = 0; + } + if( (u4_chroma_format != IV_YUV_420P) && (u4_chroma_format != IV_YUV_420SP_UV) && (u4_chroma_format != IV_YUV_420SP_VU)) @@ -1477,9 +1528,11 @@ IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *ps_dechdl, u4_share_disp_buf = 0; } - - - + /* Disable deinterlacer in shared mode */ + if(u4_share_disp_buf) + { + u4_deinterlace = 0; + } ps_mem_rec = ps_dec_init_ip->s_ivd_init_ip_t.pv_mem_rec_location; ps_mem_rec ++; @@ -1672,6 +1725,8 @@ IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *ps_dechdl, ps_dec_state->u2_frame_height = u4_max_frm_height; ps_dec_state->u2_vertical_size = u4_max_frm_height; ps_dec_state->u4_share_disp_buf = u4_share_disp_buf; + ps_dec_state->u4_deinterlace = u4_deinterlace; + ps_dec_state->ps_deint_pic = NULL; } } @@ -1774,8 +1829,15 @@ IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *ps_dechdl, ps_dec_state->pv_jobq = impeg2_jobq_init(ps_dec_state->pv_jobq_buf, ps_dec_state->i4_jobq_buf_size); + ps_dec_state->pv_deinterlacer_ctxt = ps_mem_rec->pv_base; + ps_mem_rec++; + + ps_dec_state->pu1_deint_fmt_buf = ps_mem_rec->pv_base; + ps_mem_rec++; + + /*************************************************************************/ - /* MemTab[12] is used for storing TabRecords */ + /* Last MemTab is used for storing TabRecords */ /*************************************************************************/ ps_dec_state->pv_memTab = (void *)ps_mem_rec->pv_base; memcpy(ps_mem_rec->pv_base,ps_dec_init_ip->s_ivd_init_ip_t.pv_mem_rec_location, ps_mem_rec->u4_mem_size); @@ -1790,6 +1852,7 @@ IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *ps_dechdl, ps_dec_state->aps_ref_pics[1] = NULL; ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IV_SUCCESS; + impeg2d_init_arch(ps_dec_state); impeg2d_init_function_ptr(ps_dec_state); @@ -3042,9 +3105,38 @@ IV_API_CALL_STATUS_T impeg2d_api_entity(iv_obj_t *ps_dechdl, if(fmt_conv == 1) { - impeg2d_format_convert(ps_dec_state, ps_disp_pic, - &(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf), - 0, ps_dec_state->u2_vertical_size); + iv_yuv_buf_t *ps_dst; + + + ps_dst = &(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf); + if(ps_dec_state->u4_deinterlace && (0 == ps_dec_state->u2_progressive_frame)) + { + impeg2d_deinterlace(ps_dec_state, + ps_disp_pic, + ps_dst, + 0, + ps_dec_state->u2_vertical_size); + + } + else + { + impeg2d_format_convert(ps_dec_state, + ps_disp_pic, + ps_dst, + 0, + ps_dec_state->u2_vertical_size); + } + } + + if(ps_dec_state->u4_deinterlace) + { + if(ps_dec_state->ps_deint_pic) + { + impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, + ps_dec_state->ps_deint_pic->i4_buf_id, + MPEG2_BUF_MGR_DEINT); + } + ps_dec_state->ps_deint_pic = ps_disp_pic; } if(0 == ps_dec_state->u4_share_disp_buf) impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_disp_pic->i4_buf_id, BUF_MGR_DISP); @@ -3249,6 +3341,17 @@ IV_API_CALL_STATUS_T impeg2d_api_entity(iv_obj_t *ps_dechdl, } } + if(ps_dec_state->u4_deinterlace) + { + if(ps_dec_state->ps_deint_pic) + { + impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, + ps_dec_state->ps_deint_pic->i4_buf_id, + MPEG2_BUF_MGR_DEINT); + } + ps_dec_state->ps_deint_pic = ps_dec_state->ps_disp_pic; + } + if(1 == ps_dec_op->s_ivd_video_decode_op_t.u4_output_present) { INSERT_LOGO(ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[0], diff --git a/decoder/impeg2d_dec_hdr.c b/decoder/impeg2d_dec_hdr.c index d9c87b6..752731d 100644 --- a/decoder/impeg2d_dec_hdr.c +++ b/decoder/impeg2d_dec_hdr.c @@ -40,6 +40,7 @@ #include "impeg2d_structs.h" #include "impeg2_globals.h" #include "impeg2d_pic_proc.h" +#include "impeg2d_deinterlace.h" @@ -889,9 +890,22 @@ void impeg2d_dec_pic_data_thread(dec_state_t *ps_dec) start_row = s_job.i2_start_mb_y << 4; num_rows = MIN((s_job.i2_end_mb_y << 4), ps_dec->u2_vertical_size); num_rows -= start_row; - impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic, - ps_dec->ps_disp_frm_buf, - start_row, num_rows); + + if(ps_dec->u4_deinterlace && (0 == ps_dec->u2_progressive_frame)) + { + impeg2d_deinterlace(ps_dec, + ps_dec->ps_disp_pic, + ps_dec->ps_disp_frm_buf, + start_row, + num_rows); + + } + else + { + impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic, + ps_dec->ps_disp_frm_buf, + start_row, num_rows); + } break; } @@ -955,18 +969,46 @@ void impeg2d_dec_pic_data_thread(dec_state_t *ps_dec) start_row = s_job.i2_start_mb_y << 4; num_rows = MIN((s_job.i2_end_mb_y << 4), ps_dec->u2_vertical_size); num_rows -= start_row; - impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic, - ps_dec->ps_disp_frm_buf, - start_row, num_rows); + if(ps_dec->u4_deinterlace && (0 == ps_dec->u2_progressive_frame)) + { + impeg2d_deinterlace(ps_dec, + ps_dec->ps_disp_pic, + ps_dec->ps_disp_frm_buf, + start_row, + num_rows); + + } + else + { + impeg2d_format_convert(ps_dec, + ps_dec->ps_disp_pic, + ps_dec->ps_disp_frm_buf, + start_row, + num_rows); + } } } } else { if((NULL != ps_dec->ps_disp_pic) && ((0 == ps_dec->u4_share_disp_buf) || (IV_YUV_420P != ps_dec->i4_chromaFormat))) - impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic, - ps_dec->ps_disp_frm_buf, - 0, ps_dec->u2_vertical_size); + { + if(ps_dec->u4_deinterlace && (0 == ps_dec->u2_progressive_frame)) + { + impeg2d_deinterlace(ps_dec, + ps_dec->ps_disp_pic, + ps_dec->ps_disp_frm_buf, + 0, + ps_dec->u2_vertical_size); + + } + else + { + impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic, + ps_dec->ps_disp_frm_buf, + 0, ps_dec->u2_vertical_size); + } + } } } @@ -1088,9 +1130,10 @@ static WORD32 impeg2d_init_thread_dec_ctxt(dec_state_t *ps_dec, ps_dec_thd->ps_func_bi_direct = ps_dec->ps_func_bi_direct; ps_dec_thd->ps_func_forw_or_back = ps_dec->ps_func_forw_or_back; + ps_dec_thd->pv_deinterlacer_ctxt = ps_dec->pv_deinterlacer_ctxt; + ps_dec_thd->ps_deint_pic = ps_dec->ps_deint_pic; return 0; - } diff --git a/decoder/impeg2d_deinterlace.c b/decoder/impeg2d_deinterlace.c new file mode 100644 index 0000000..89e5c1a --- /dev/null +++ b/decoder/impeg2d_deinterlace.c @@ -0,0 +1,284 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#include <stdio.h> +#include "iv_datatypedef.h" +#include "iv.h" + +#include "icv.h" +#include "ideint.h" + +#include "impeg2_buf_mgr.h" +#include "impeg2_disp_mgr.h" +#include "impeg2_defs.h" +#include "impeg2_platform_macros.h" +#include "impeg2_inter_pred.h" +#include "impeg2_idct.h" +#include "impeg2_globals.h" +#include "impeg2_mem_func.h" +#include "impeg2_format_conv.h" +#include "impeg2_macros.h" + +#include "impeg2d.h" +#include "impeg2d_bitstream.h" +#include "impeg2d_structs.h" +#include "impeg2d_globals.h" +#include "impeg2d_mc.h" +#include "impeg2d_pic_proc.h" +#include "impeg2d_deinterlace.h" + +typedef struct +{ + IVD_ARCH_T ivd_arch; + ICV_ARCH_T icv_arch; +}arch_map_t; + +static const arch_map_t gas_impeg2d_arch_mapping[] = +{ + {ARCH_ARM_NONEON, ICV_ARM_NONEON}, + {ARCH_ARM_A9Q, ICV_ARM_A9Q}, + {ARCH_ARM_A9A, ICV_ARM_A9A}, + {ARCH_ARM_A9, ICV_ARM_A9}, + {ARCH_ARM_A7, ICV_ARM_A7}, + {ARCH_ARM_A5, ICV_ARM_A5}, + {ARCH_ARM_A15, ICV_ARM_A15}, + {ARCH_ARM_NEONINTR, ICV_ARM_NEONINTR}, + {ARCH_ARMV8_GENERIC, ICV_ARMV8_GENERIC}, + {ARCH_X86_GENERIC, ICV_X86_GENERIC}, + {ARCH_X86_SSSE3, ICV_X86_SSSE3}, + {ARCH_X86_SSE42, ICV_X86_SSE42}, + {ARCH_X86_AVX2, ICV_X86_AVX2}, + {ARCH_MIPS_GENERIC, ICV_MIPS_GENERIC}, + {ARCH_MIPS_32, ICV_MIPS_32} +}; + + +static void impeg2d_get_pic(icv_pic_t *ps_dst, + UWORD8 *pu1_buf_y, + UWORD8 *pu1_buf_u, + UWORD8 *pu1_buf_v, + WORD32 wd, + WORD32 ht, + WORD32 strd) +{ + ps_dst->ai4_wd[0] = wd; + ps_dst->ai4_wd[1] = wd / 2; + ps_dst->ai4_wd[2] = wd / 2; + + ps_dst->ai4_ht[0] = ht; + ps_dst->ai4_ht[1] = ht / 2; + ps_dst->ai4_ht[2] = ht / 2; + + ps_dst->ai4_strd[0] = strd; + ps_dst->ai4_strd[1] = strd / 2; + ps_dst->ai4_strd[2] = strd / 2; + + ps_dst->apu1_buf[0] = pu1_buf_y; + ps_dst->apu1_buf[1] = pu1_buf_u; + ps_dst->apu1_buf[2] = pu1_buf_v; + + ps_dst->e_color_fmt = ICV_YUV420P; +} +static void impeg2d_get_flds(icv_pic_t *ps_frm, + icv_pic_t *ps_top_fld, + icv_pic_t *ps_bot_fld) +{ + ps_top_fld->ai4_wd[0] = ps_frm->ai4_wd[0]; + ps_top_fld->ai4_wd[1] = ps_frm->ai4_wd[1]; + ps_top_fld->ai4_wd[2] = ps_frm->ai4_wd[2]; + + ps_top_fld->ai4_ht[0] = ps_frm->ai4_ht[0] / 2; + ps_top_fld->ai4_ht[1] = ps_frm->ai4_ht[1] / 2; + ps_top_fld->ai4_ht[2] = ps_frm->ai4_ht[2] / 2; + + ps_top_fld->ai4_strd[0] = ps_frm->ai4_strd[0] * 2; + ps_top_fld->ai4_strd[1] = ps_frm->ai4_strd[1] * 2; + ps_top_fld->ai4_strd[2] = ps_frm->ai4_strd[2] * 2; + + ps_top_fld->e_color_fmt = ps_frm->e_color_fmt; + + /* Copy top field structure to bottom field, since properties of both fields are same */ + *ps_bot_fld = *ps_top_fld; + + /* Initialize the addresses for top field */ + ps_top_fld->apu1_buf[0] = ps_frm->apu1_buf[0]; + ps_top_fld->apu1_buf[1] = ps_frm->apu1_buf[1]; + ps_top_fld->apu1_buf[2] = ps_frm->apu1_buf[2]; + + /* Initialize the addresses for bottom field */ + ps_bot_fld->apu1_buf[0] = ps_frm->apu1_buf[0] + ps_frm->ai4_strd[0]; + ps_bot_fld->apu1_buf[1] = ps_frm->apu1_buf[1] + ps_frm->ai4_strd[1]; + ps_bot_fld->apu1_buf[2] = ps_frm->apu1_buf[2] + ps_frm->ai4_strd[2]; + + return; +} + + +static ICV_ARCH_T impeg2d_get_arch(IVD_ARCH_T e_arch) +{ + ICV_ARCH_T ret_arch; + WORD32 num_entries, i; + + ret_arch = ICV_ARM_A9; + num_entries = sizeof(gas_impeg2d_arch_mapping) / sizeof(gas_impeg2d_arch_mapping[0]); + for(i = 0; i < num_entries; i++) + { + if(e_arch == gas_impeg2d_arch_mapping[i].ivd_arch) + { + ret_arch = gas_impeg2d_arch_mapping[i].icv_arch; + break; + } + } + return ret_arch; +} + +/****************************************************************************** +* Function Name : impeg2d_deinterlace +* +* Description : Deinterlace current picture +* +* Arguments : +* dec : Decoder Context +* +* Values Returned : 0 on success, -1 on error +******************************************************************************/ +WORD32 impeg2d_deint_ctxt_size(void) +{ + return ideint_ctxt_size(); +} + +/****************************************************************************** +* Function Name : impeg2d_deinterlace +* +* Description : Deinterlace current picture +* +* Arguments : +* dec : Decoder Context +* +* Values Returned : 0 on success, -1 on error +******************************************************************************/ +WORD32 impeg2d_deinterlace(dec_state_t *ps_dec, + pic_buf_t *ps_src_pic, + iv_yuv_buf_t *ps_disp_frm_buf, + WORD32 start_row, + WORD32 num_rows) +{ + icv_pic_t as_inp_flds[3]; + IDEINT_ERROR_T ret; + icv_pic_t s_src_frm; + icv_pic_t s_dst_frm; + UWORD8 *pu1_dst_y, *pu1_dst_u, *pu1_dst_v; + ideint_params_t s_params; + + if((NULL == ps_src_pic) || (NULL == ps_src_pic->pu1_y) || (0 == num_rows)) + return -1; + + s_params.e_arch = impeg2d_get_arch(ps_dec->e_processor_arch); + s_params.e_soc = ICV_SOC_GENERIC; + s_params.e_mode = IDEINT_MODE_SPATIAL; + s_params.i4_cur_fld_top = ps_dec->u2_top_field_first; + s_params.i4_disable_weave = 0; + s_params.pf_aligned_alloc = NULL; + s_params.pf_aligned_free = NULL; + + impeg2d_get_pic(&s_src_frm, ps_src_pic->pu1_y, ps_src_pic->pu1_u, + ps_src_pic->pu1_v, ps_dec->u2_horizontal_size, + ps_dec->u2_vertical_size, ps_dec->u2_frame_width); + impeg2d_get_flds(&s_src_frm, &as_inp_flds[1], &as_inp_flds[2]); + + if(ps_dec->ps_deint_pic) + { + icv_pic_t s_prv_frm; + icv_pic_t s_fld; + impeg2d_get_pic(&s_prv_frm, ps_dec->ps_deint_pic->pu1_y, + ps_dec->ps_deint_pic->pu1_u, + ps_dec->ps_deint_pic->pu1_v, ps_dec->u2_horizontal_size, + ps_dec->u2_vertical_size, ps_dec->u2_frame_width); + impeg2d_get_flds(&s_prv_frm, &s_fld, &as_inp_flds[0]); + } + else + { + as_inp_flds[0].apu1_buf[0] = NULL; + as_inp_flds[0].apu1_buf[1] = NULL; + as_inp_flds[0].apu1_buf[2] = NULL; + } + + pu1_dst_y = ps_disp_frm_buf->pv_y_buf; + pu1_dst_u = ps_disp_frm_buf->pv_u_buf; + pu1_dst_v = ps_disp_frm_buf->pv_v_buf; + + /* Use intermediate buffer as output to deinterlacer, + * if color format is not 420P + */ + if(IV_YUV_420P != ps_dec->i4_chromaFormat) + { + UWORD8 *pu1_buf_y; + UWORD8 *pu1_buf_u; + UWORD8 *pu1_buf_v; + WORD32 wd = ALIGN16(ps_dec->u2_horizontal_size); + WORD32 ht = ALIGN16(ps_dec->u2_vertical_size); + + pu1_buf_y = ps_dec->pu1_deint_fmt_buf; + pu1_buf_u = pu1_buf_y + wd * ht; + pu1_buf_v = pu1_buf_u + wd * ht / 4; + + pu1_dst_u = pu1_buf_u; + pu1_dst_v = pu1_buf_v; + + if((ps_dec->i4_chromaFormat != IV_YUV_420SP_UV) && + (ps_dec->i4_chromaFormat != IV_YUV_420SP_VU)) + { + pu1_dst_y = pu1_buf_y; + } + + } + impeg2d_get_pic(&s_dst_frm, pu1_dst_y, pu1_dst_u, pu1_dst_v, + ps_dec->u2_horizontal_size, ps_dec->u2_vertical_size, + ps_dec->u4_frm_buf_stride); + + + ret = ideint_process(ps_dec->pv_deinterlacer_ctxt, &as_inp_flds[0], + &as_inp_flds[1], &as_inp_flds[2], &s_dst_frm, + &s_params, start_row, num_rows); + + if(IDEINT_ERROR_NONE != ret) + { + return -1; + } + + /* Format convert deinterlacer output if required*/ + if(IV_YUV_420P != ps_dec->i4_chromaFormat) + { + pic_buf_t s_src_pic; + + s_src_pic = *ps_src_pic; + s_src_pic.pu1_y = pu1_dst_y; + s_src_pic.pu1_u = pu1_dst_u; + s_src_pic.pu1_v = pu1_dst_v; + + impeg2d_format_convert(ps_dec, + &s_src_pic, + ps_disp_frm_buf, + start_row, + num_rows); + + } + return 0; + +} diff --git a/decoder/impeg2d_deinterlace.h b/decoder/impeg2d_deinterlace.h new file mode 100644 index 0000000..b2e434c --- /dev/null +++ b/decoder/impeg2d_deinterlace.h @@ -0,0 +1,30 @@ +/****************************************************************************** + * + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +*/ +#ifndef __IMPEG2D_DEINTERLACE_H__ +#define __IMPEG2D_DEINTERLACE_H__ + +WORD32 impeg2d_deint_ctxt_size(void); +WORD32 impeg2d_deinterlace(dec_state_t *ps_dec, + pic_buf_t *ps_src_pic, + iv_yuv_buf_t *ps_disp_frm_buf, + WORD32 start_row, + WORD32 num_rows); + +#endif /* __IMPEG2D_DEINTERLACE_H__ */ diff --git a/decoder/impeg2d_pic_proc.c b/decoder/impeg2d_pic_proc.c index 0aee534..82da137 100644 --- a/decoder/impeg2d_pic_proc.c +++ b/decoder/impeg2d_pic_proc.c @@ -117,9 +117,13 @@ void impeg2d_format_convert(dec_state_t *ps_dec, dest_inc_Y = ps_dec->u4_frm_buf_stride; dest_inc_UV = ((ps_dec->u4_frm_buf_stride + 1) >> 1) << 1; convert_uv_only = 0; + if(1 == ps_dec->u4_share_disp_buf) convert_uv_only = 1; + if(pu1_src_y == pu1_dst_y) + convert_uv_only = 1; + if(ps_dec->i4_chromaFormat == IV_YUV_420SP_UV) { ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv(pu1_src_y, @@ -365,6 +369,8 @@ IMPEG2D_ERROR_CODES_T impeg2d_pre_pic_dec_proc(dec_state_t *ps_dec) impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_DISP); impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_REF); + if(ps_dec->u4_deinterlace) + impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, MPEG2_BUF_MGR_DEINT); ps_pic_buf->u4_ts = ps_dec->u4_inp_ts; ps_pic_buf->e_pic_type = ps_dec->e_pic_type; @@ -406,6 +412,8 @@ IMPEG2D_ERROR_CODES_T impeg2d_pre_pic_dec_proc(dec_state_t *ps_dec) } impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_DISP); impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_REF); + if(ps_dec->u4_deinterlace) + impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, MPEG2_BUF_MGR_DEINT); ps_pic_buf->u4_ts = ps_dec->u4_inp_ts; ps_pic_buf->e_pic_type = ps_dec->e_pic_type; diff --git a/decoder/impeg2d_structs.h b/decoder/impeg2d_structs.h index 9fa2631..743b043 100644 --- a/decoder/impeg2d_structs.h +++ b/decoder/impeg2d_structs.h @@ -25,7 +25,7 @@ to support B pictures. Because of format conversion in a thread, codec delay is To reduce this delay, format conversion has to wait for MB status before converting for B pictures. To avoid this check the delay is increased to 2 and hence number of reference frames minimum is 4. Because of temporal dependency in deinterlacer one additional buffer is also needed */ -#define NUM_INT_FRAME_BUFFERS 4 +#define NUM_INT_FRAME_BUFFERS 5 #define MAX_WIDTH 4096 @@ -41,6 +41,9 @@ Because of temporal dependency in deinterlacer one additional buffer is also nee #define MAX_BITSTREAM_BUFFER_SIZE 2000 * 1024 +/* Flag to signal that buffer is held by deinterlacing */ +#define MPEG2_BUF_MGR_DEINT (BUF_MGR_DISP << 1) + typedef enum { CMD_PROCESS, @@ -360,6 +363,18 @@ typedef struct dec_state_struct_t WORD32 i4_frame_decoded; + /** Flag to enable deinterlace */ + UWORD32 u4_deinterlace; + + /** Deinterlacer context */ + void *pv_deinterlacer_ctxt; + + /** Picture buffer held by deinterlacer */ + pic_buf_t *ps_deint_pic; + + /** Buffer used after deinterlacer for format conversion */ + UWORD8 *pu1_deint_fmt_buf; + }dec_state_t; diff --git a/test/decoder/main.c b/test/decoder/main.c index 5930bd1..c344ec0 100644 --- a/test/decoder/main.c +++ b/test/decoder/main.c @@ -184,6 +184,7 @@ typedef struct void *cocodec_obj; UWORD32 share_disp_buf; + UWORD32 deinterlace; UWORD32 num_disp_buf; UWORD32 b_pic_present; WORD32 i4_degrade_type; @@ -255,6 +256,7 @@ typedef enum NUM_CORES, SHARE_DISPLAY_BUF, + DEINTERLACE, LOOPBACK, DISPLAY, FULLSCREEN, @@ -312,7 +314,8 @@ static const argument_t argument_mapping[] = "Number of cores to be used\n" }, { "--", "--share_display_buf", SHARE_DISPLAY_BUF, "Enable shared display buffer mode\n" }, - + { "--", "--deinterlace", DEINTERLACE, + "Enable deinterlacing for interlaced pics\n" }, { "--", "--loopback", LOOPBACK, "Enable playback in a loop\n" }, { "--", "--display", DISPLAY, @@ -350,6 +353,7 @@ static const argument_t argument_mapping[] = #define MAX_REF_FRAMES 16 #define MAX_REORDER_FRAMES 16 #define DEFAULT_SHARE_DISPLAY_BUF 0 +#define DEFAULT_DEINTERLACE 0 #define STRIDE 0 #define DEFAULT_NUM_CORES 1 @@ -1232,6 +1236,9 @@ void parse_argument(vid_dec_ctx_t *ps_app_ctx, CHAR *argument, CHAR *value) case SHARE_DISPLAY_BUF: sscanf(value, "%d", &ps_app_ctx->share_disp_buf); break; + case DEINTERLACE: + sscanf(value, "%d", &ps_app_ctx->deinterlace); + break; case LOOPBACK: sscanf(value, "%d", &ps_app_ctx->loopback); break; @@ -1848,6 +1855,7 @@ int main(WORD32 argc, CHAR *argv[]) #ifdef PROFILE_ENABLE memset(peak_window, 0, sizeof(WORD32) * PEAK_WINDOW_SIZE); #endif + s_app_ctx.deinterlace = DEFAULT_DEINTERLACE; s_app_ctx.share_disp_buf = DEFAULT_SHARE_DISPLAY_BUF; s_app_ctx.u4_num_cores = DEFAULT_NUM_CORES; s_app_ctx.i4_degrade_type = 0; @@ -1976,6 +1984,7 @@ int main(WORD32 argc, CHAR *argv[]) s_app_ctx.i4_degrade_pics = 0; s_app_ctx.i4_degrade_type = 0; s_app_ctx.loopback = 0; + s_app_ctx.deinterlace = 0; s_app_ctx.share_disp_buf = 0; s_app_ctx.display = 0; #endif @@ -2118,6 +2127,7 @@ int main(WORD32 argc, CHAR *argv[]) s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht = (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht; s_fill_mem_rec_ip.u4_share_disp_buf = s_app_ctx.share_disp_buf; + s_fill_mem_rec_ip.u4_deinterlace = s_app_ctx.deinterlace; s_fill_mem_rec_ip.e_output_format = (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format; @@ -2183,7 +2193,7 @@ int main(WORD32 argc, CHAR *argv[]) s_init_ip.s_ivd_init_ip_t.u4_frm_max_ht = (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht; s_init_ip.u4_share_disp_buf = s_app_ctx.share_disp_buf; - + s_init_ip.u4_deinterlace = s_app_ctx.deinterlace; s_init_ip.s_ivd_init_ip_t.u4_num_mem_rec = u4_num_mem_recs; s_init_ip.s_ivd_init_ip_t.e_output_format = (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format; @@ -2588,6 +2598,8 @@ int main(WORD32 argc, CHAR *argv[]) s_ctl_get_frame_dimensions_op.u4_y_offset[0]); */ } + + /*************************************************************************/ /* Get VUI parameters */ /*************************************************************************/ |