diff options
author | Hamsalekha S <hamsalekha.s@ittiam.com> | 2015-03-13 21:24:58 +0530 |
---|---|---|
committer | Hamsalekha S <hamsalekha.s@ittiam.com> | 2015-04-02 15:59:02 +0530 |
commit | 8d3d303c7942ced6a987a52db8977d768dc3605f (patch) | |
tree | cc806c96794356996b13ba9970941d0aed74a97e /common/armv8/ih264_intra_pred_luma_16x16_av8.s | |
parent | 3956d913d37327dcb340f836e604b04bd478b158 (diff) | |
download | android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.gz android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.bz2 android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.zip |
Initial version
Change-Id: I7efe9a589cd24edf86e8d086b40c27cbbf8b4017
Diffstat (limited to 'common/armv8/ih264_intra_pred_luma_16x16_av8.s')
-rwxr-xr-x | common/armv8/ih264_intra_pred_luma_16x16_av8.s | 606 |
1 files changed, 606 insertions, 0 deletions
diff --git a/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/common/armv8/ih264_intra_pred_luma_16x16_av8.s new file mode 100755 index 0000000..a9eb165 --- /dev/null +++ b/common/armv8/ih264_intra_pred_luma_16x16_av8.s @@ -0,0 +1,606 @@ +//****************************************************************************** +//* +//* Copyright (C) 2015 The Android Open Source Project +//* +//* Licensed under the Apache License, Version 2.0 (the "License"); +//* you may not use this file except in compliance with the License. +//* You may obtain a copy of the License at: +//* +//* http://www.apache.org/licenses/LICENSE-2.0 +//* +//* Unless required by applicable law or agreed to in writing, software +//* distributed under the License is distributed on an "AS IS" BASIS, +//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +//* See the License for the specific language governing permissions and +//* limitations under the License. +//* +//***************************************************************************** +//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore +//*/ +///** +//****************************************************************************** +//* @file +//* ih264_intra_pred_luma_16x16_av8.s +//* +//* @brief +//* Contains function definitions for intra 16x16 Luma prediction . +//* +//* @author +//* Ittiam +//* +//* @par List of Functions: +//* +//* - ih264_intra_pred_luma_16x16_mode_vert_av8() +//* - ih264_intra_pred_luma_16x16_mode_horz_av8() +//* - ih264_intra_pred_luma_16x16_mode_dc_av8() +//* - ih264_intra_pred_luma_16x16_mode_plane_av8() +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ + +///* All the functions here are replicated from ih264_intra_pred_filters.c +// + +///** +///** +///** +// + + +.text +.p2align 2 +.include "ih264_neon_macros.s" +.extern ih264_gai1_intrapred_luma_plane_coeffs + + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_16x16_mode_vert +//* +//* @brief +//* Perform Intra prediction for luma_16x16 mode:vertical +//* +//* @par Description: +//* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + + .global ih264_intra_pred_luma_16x16_mode_vert_av8 + +ih264_intra_pred_luma_16x16_mode_vert_av8: + + push_v_regs + + + add x0, x0, #17 + ld1 {v0.8b, v1.8b}, [x0] + + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + st1 {v0.8b, v1.8b}, [x1], x3 + + pop_v_regs + ret + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_16x16_mode_horz +//* +//* @brief +//* Perform Intra prediction for luma_16x16 mode:horizontal +//* +//* @par Description: +//* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels(Not used in this function) +//* +//* @returns +//* +//* @remarks +//* None +//* +//******************************************************************************* +//*/ +//void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_16x16_mode_horz_av8 + +ih264_intra_pred_luma_16x16_mode_horz_av8: + + + + push_v_regs + + ld1 {v0.16b}, [x0] + + + + dup v10.16b, v0.b[15] + dup v11.16b, v0.b[14] + dup v12.16b, v0.b[13] + dup v13.16b, v0.b[12] + st1 {v10.16b}, [x1], x3 + dup v14.16b, v0.b[11] + st1 {v11.16b}, [x1], x3 + dup v15.16b, v0.b[10] + st1 {v12.16b}, [x1], x3 + dup v16.16b, v0.b[9] + st1 {v13.16b}, [x1], x3 + dup v17.16b, v0.b[8] + st1 {v14.16b}, [x1], x3 + dup v18.16b, v0.b[7] + st1 {v15.16b}, [x1], x3 + dup v19.16b, v0.b[6] + st1 {v16.16b}, [x1], x3 + dup v20.16b, v0.b[5] + st1 {v17.16b}, [x1], x3 + dup v21.16b, v0.b[4] + st1 {v18.16b}, [x1], x3 + dup v22.16b, v0.b[3] + st1 {v19.16b}, [x1], x3 + dup v23.16b, v0.b[2] + st1 {v20.16b}, [x1], x3 + dup v24.16b, v0.b[1] + st1 {v21.16b}, [x1], x3 + dup v25.16b, v0.b[0] + st1 {v22.16b}, [x1], x3 + st1 {v23.16b}, [x1], x3 + st1 {v24.16b}, [x1], x3 + st1 {v25.16b}, [x1], x3 + + pop_v_regs + ret + + + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_16x16_mode_dc +//* +//* @brief +//* Perform Intra prediction for luma_16x16 mode:DC +//* +//* @par Description: +//* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_16x16_mode_dc_av8 + +ih264_intra_pred_luma_16x16_mode_dc_av8: + + + + push_v_regs + stp x19, x20, [sp, #-16]! + + sub v0.16b, v0.16b, v0.16b + sub v1.16b, v1.16b, v1.16b + mov w10, #0 + mov w11 , #3 + ands x6, x4, #0x01 + beq top_available //LEFT NOT AVAILABLE + ld1 {v0.16b}, [x0] + add w10, w10, #8 + add w11, w11, #1 +top_available: + ands x6, x4, #0x04 + beq none_available + add x6, x0, #17 + ld1 {v1.16b}, [x6] + add w10, w10, #8 + add w11, w11, #1 + b summation +none_available: + cmp x4, #0 + bne summation + mov w15, #128 + dup v20.16b, w15 + b store +summation: + uaddl v2.8h, v0.8b, v1.8b + uaddl2 v3.8h, v0.16b, v1.16b + dup v10.8h, w10 + neg w11, w11 + dup v20.8h, w11 + add v0.8h, v2.8h, v3.8h + mov v1.d[0], v0.d[1] + add v0.4h, v0.4h, v1.4h + addp v0.4h, v0.4h , v0.4h + addp v0.4h, v0.4h , v0.4h + add v0.4h, v0.4h, v10.4h + uqshl v0.8h, v0.8h, v20.8h + sqxtun v0.8b, v0.8h + dup v20.16b, v0.b[0] + +store: + + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + st1 { v20.16b}, [x1], x3 + + + +end_func: + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + + + + + +///****************************************************************************** + + +///** +//******************************************************************************* +//* +//*ih264_intra_pred_luma_16x16_mode_plane +//* +//* @brief +//* Perform Intra prediction for luma_16x16 mode:PLANE +//* +//* @par Description: +//* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4 +//* +//* @param[in] pu1_src +//* UWORD8 pointer to the source +//* +//* @param[out] pu1_dst +//* UWORD8 pointer to the destination +//* +//* @param[in] src_strd +//* integer source stride +//* +//* @param[in] dst_strd +//* integer destination stride +//* +//* @param[in] ui_neighboravailability +//* availability of neighbouring pixels +//* +//* @returns +//* +//* @remarks +//* None +//* +//*******************************************************************************/ +//void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src, +// UWORD8 *pu1_dst, +// WORD32 src_strd, +// WORD32 dst_strd, +// WORD32 ui_neighboravailability) + +//**************Variables Vs Registers***************************************** +// x0 => *pu1_src +// x1 => *pu1_dst +// x2 => src_strd +// x3 => dst_strd +// x4 => ui_neighboravailability + + .global ih264_intra_pred_luma_16x16_mode_plane_av8 +ih264_intra_pred_luma_16x16_mode_plane_av8: + + push_v_regs + stp x19, x20, [sp, #-16]! + mov x2, x1 + add x1, x0, #17 + add x0, x0, #15 + mov x8, #9 + sub x1, x1, #1 + mov x10, x1 //top_left + mov x4, #-1 + ld1 {v2.2s}, [x1], x8 + + adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs + ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs] + + ld1 {v0.2s}, [x1] + rev64 v2.8b, v2.8b + ld1 {v6.2s, v7.2s}, [x7] + usubl v0.8h, v0.8b, v2.8b + uxtl v16.8h, v6.8b + mul v0.8h, v0.8h , v16.8h + uxtl v18.8h, v7.8b + add x7, x0, x4, lsl #3 + sub x0, x7, x4, lsl #1 + sub x20, x4, #0x0 + neg x14, x20 + addp v0.8h, v0.8h, v1.8h + ldrb w8, [x7], #-1 + sxtw x8, w8 + ldrb w9, [x0], #1 + sxtw x9, w9 + saddlp v0.2s, v0.4h + sub x12, x8, x9 + ldrb w8, [x7], #-1 + sxtw x8, w8 + saddlp v0.1d, v0.2s + ldrb w9, [x0], #1 + sxtw x9, w9 + sub x8, x8, x9 + shl v2.2s, v0.2s, #2 + add x12, x12, x8, lsl #1 + add v0.2s, v0.2s , v2.2s + ldrb w8, [x7], #-1 + sxtw x8, w8 + ldrb w9, [x0], #1 + sxtw x9, w9 + srshr v0.2s, v0.2s, #6 // i_b = D0[0] + sub x8, x8, x9 + ldrb w5, [x7], #-1 + sxtw x5, w5 + add x8, x8, x8, lsl #1 + dup v4.8h, v0.4h[0] + add x12, x12, x8 + ldrb w9, [x0], #1 + sxtw x9, w9 + mul v0.8h, v4.8h , v16.8h + sub x5, x5, x9 + mul v2.8h, v4.8h , v18.8h + add x12, x12, x5, lsl #2 + ldrb w8, [x7], #-1 + sxtw x8, w8 + ldrb w9, [x0], #1 + sxtw x9, w9 + sub x8, x8, x9 + ldrb w5, [x7], #-1 + sxtw x5, w5 + add x8, x8, x8, lsl #2 + ldrb w6, [x0], #1 + sxtw x6, w6 + add x12, x12, x8 + ldrb w8, [x7], #-1 + sxtw x8, w8 + ldrb w9, [x0], #1 + sxtw x9, w9 + sub x5, x5, x6 + sub x8, x8, x9 + add x5, x5, x5, lsl #1 + sub x20, x8, x8, lsl #3 + neg x8, x20 + add x12, x12, x5, lsl #1 + ldrb w5, [x7], #-1 + sxtw x5, w5 + ldrb w6, [x10] //top_left + sxtw x6, w6 + add x12, x12, x8 + sub x9, x5, x6 + ldrb w6, [x1, #7] + sxtw x6, w6 + add x12, x12, x9, lsl #3 // i_c = x12 + add x8, x5, x6 + add x12, x12, x12, lsl #2 + lsl x8, x8, #4 // i_a = x8 + add x12, x12, #0x20 + lsr x12, x12, #6 + shl v28.8h, v4.8h, #3 + dup v6.8h, w12 + dup v30.8h, w8 + shl v26.8h, v6.8h, #3 + sub v30.8h, v30.8h , v28.8h + sub v30.8h, v30.8h , v26.8h + add v28.8h, v30.8h , v6.8h + add v26.8h, v28.8h , v0.8h + add v28.8h, v28.8h , v2.8h + sqrshrun v20.8b, v26.8h, #5 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v20.8b, v26.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + sqrshrun v21.8b, v28.8h, #5 + add v26.8h, v26.8h , v6.8h + add v28.8h, v28.8h , v6.8h + sqrshrun v22.8b, v26.8h, #5 + st1 {v20.2s, v21.2s}, [x2], x3 + sqrshrun v23.8b, v28.8h, #5 + st1 {v22.2s, v23.2s}, [x2], x3 + +end_func_plane: + + ldp x19, x20, [sp], #16 + pop_v_regs + ret + |