summaryrefslogtreecommitdiffstats
path: root/common/armv8/ih264_intra_pred_luma_16x16_av8.s
diff options
context:
space:
mode:
authorHamsalekha S <hamsalekha.s@ittiam.com>2015-03-13 21:24:58 +0530
committerHamsalekha S <hamsalekha.s@ittiam.com>2015-04-02 15:59:02 +0530
commit8d3d303c7942ced6a987a52db8977d768dc3605f (patch)
treecc806c96794356996b13ba9970941d0aed74a97e /common/armv8/ih264_intra_pred_luma_16x16_av8.s
parent3956d913d37327dcb340f836e604b04bd478b158 (diff)
downloadandroid_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.gz
android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.tar.bz2
android_external_libavc-8d3d303c7942ced6a987a52db8977d768dc3605f.zip
Initial version
Change-Id: I7efe9a589cd24edf86e8d086b40c27cbbf8b4017
Diffstat (limited to 'common/armv8/ih264_intra_pred_luma_16x16_av8.s')
-rwxr-xr-xcommon/armv8/ih264_intra_pred_luma_16x16_av8.s606
1 files changed, 606 insertions, 0 deletions
diff --git a/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
new file mode 100755
index 0000000..a9eb165
--- /dev/null
+++ b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
@@ -0,0 +1,606 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_intra_pred_luma_16x16_av8.s
+//*
+//* @brief
+//* Contains function definitions for intra 16x16 Luma prediction .
+//*
+//* @author
+//* Ittiam
+//*
+//* @par List of Functions:
+//*
+//* - ih264_intra_pred_luma_16x16_mode_vert_av8()
+//* - ih264_intra_pred_luma_16x16_mode_horz_av8()
+//* - ih264_intra_pred_luma_16x16_mode_dc_av8()
+//* - ih264_intra_pred_luma_16x16_mode_plane_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_intra_pred_filters.c
+//
+
+///**
+///**
+///**
+//
+
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+.extern ih264_gai1_intrapred_luma_plane_coeffs
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_16x16_mode_vert
+//*
+//* @brief
+//* Perform Intra prediction for luma_16x16 mode:vertical
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_16x16_mode_vert_av8
+
+ih264_intra_pred_luma_16x16_mode_vert_av8:
+
+ push_v_regs
+
+
+ add x0, x0, #17
+ ld1 {v0.8b, v1.8b}, [x0]
+
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+
+ pop_v_regs
+ ret
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_16x16_mode_horz
+//*
+//* @brief
+//* Perform Intra prediction for luma_16x16 mode:horizontal
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_16x16_mode_horz_av8
+
+ih264_intra_pred_luma_16x16_mode_horz_av8:
+
+
+
+ push_v_regs
+
+ ld1 {v0.16b}, [x0]
+
+
+
+ dup v10.16b, v0.b[15]
+ dup v11.16b, v0.b[14]
+ dup v12.16b, v0.b[13]
+ dup v13.16b, v0.b[12]
+ st1 {v10.16b}, [x1], x3
+ dup v14.16b, v0.b[11]
+ st1 {v11.16b}, [x1], x3
+ dup v15.16b, v0.b[10]
+ st1 {v12.16b}, [x1], x3
+ dup v16.16b, v0.b[9]
+ st1 {v13.16b}, [x1], x3
+ dup v17.16b, v0.b[8]
+ st1 {v14.16b}, [x1], x3
+ dup v18.16b, v0.b[7]
+ st1 {v15.16b}, [x1], x3
+ dup v19.16b, v0.b[6]
+ st1 {v16.16b}, [x1], x3
+ dup v20.16b, v0.b[5]
+ st1 {v17.16b}, [x1], x3
+ dup v21.16b, v0.b[4]
+ st1 {v18.16b}, [x1], x3
+ dup v22.16b, v0.b[3]
+ st1 {v19.16b}, [x1], x3
+ dup v23.16b, v0.b[2]
+ st1 {v20.16b}, [x1], x3
+ dup v24.16b, v0.b[1]
+ st1 {v21.16b}, [x1], x3
+ dup v25.16b, v0.b[0]
+ st1 {v22.16b}, [x1], x3
+ st1 {v23.16b}, [x1], x3
+ st1 {v24.16b}, [x1], x3
+ st1 {v25.16b}, [x1], x3
+
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_16x16_mode_dc
+//*
+//* @brief
+//* Perform Intra prediction for luma_16x16 mode:DC
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_16x16_mode_dc_av8
+
+ih264_intra_pred_luma_16x16_mode_dc_av8:
+
+
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub v0.16b, v0.16b, v0.16b
+ sub v1.16b, v1.16b, v1.16b
+ mov w10, #0
+ mov w11 , #3
+ ands x6, x4, #0x01
+ beq top_available //LEFT NOT AVAILABLE
+ ld1 {v0.16b}, [x0]
+ add w10, w10, #8
+ add w11, w11, #1
+top_available:
+ ands x6, x4, #0x04
+ beq none_available
+ add x6, x0, #17
+ ld1 {v1.16b}, [x6]
+ add w10, w10, #8
+ add w11, w11, #1
+ b summation
+none_available:
+ cmp x4, #0
+ bne summation
+ mov w15, #128
+ dup v20.16b, w15
+ b store
+summation:
+ uaddl v2.8h, v0.8b, v1.8b
+ uaddl2 v3.8h, v0.16b, v1.16b
+ dup v10.8h, w10
+ neg w11, w11
+ dup v20.8h, w11
+ add v0.8h, v2.8h, v3.8h
+ mov v1.d[0], v0.d[1]
+ add v0.4h, v0.4h, v1.4h
+ addp v0.4h, v0.4h , v0.4h
+ addp v0.4h, v0.4h , v0.4h
+ add v0.4h, v0.4h, v10.4h
+ uqshl v0.8h, v0.8h, v20.8h
+ sqxtun v0.8b, v0.8h
+ dup v20.16b, v0.b[0]
+
+store:
+
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+
+
+
+end_func:
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_16x16_mode_plane
+//*
+//* @brief
+//* Perform Intra prediction for luma_16x16 mode:PLANE
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_16x16_mode_plane_av8
+ih264_intra_pred_luma_16x16_mode_plane_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ mov x2, x1
+ add x1, x0, #17
+ add x0, x0, #15
+ mov x8, #9
+ sub x1, x1, #1
+ mov x10, x1 //top_left
+ mov x4, #-1
+ ld1 {v2.2s}, [x1], x8
+
+ adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs
+ ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs]
+
+ ld1 {v0.2s}, [x1]
+ rev64 v2.8b, v2.8b
+ ld1 {v6.2s, v7.2s}, [x7]
+ usubl v0.8h, v0.8b, v2.8b
+ uxtl v16.8h, v6.8b
+ mul v0.8h, v0.8h , v16.8h
+ uxtl v18.8h, v7.8b
+ add x7, x0, x4, lsl #3
+ sub x0, x7, x4, lsl #1
+ sub x20, x4, #0x0
+ neg x14, x20
+ addp v0.8h, v0.8h, v1.8h
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ saddlp v0.2s, v0.4h
+ sub x12, x8, x9
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ saddlp v0.1d, v0.2s
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ sub x8, x8, x9
+ shl v2.2s, v0.2s, #2
+ add x12, x12, x8, lsl #1
+ add v0.2s, v0.2s , v2.2s
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ srshr v0.2s, v0.2s, #6 // i_b = D0[0]
+ sub x8, x8, x9
+ ldrb w5, [x7], #-1
+ sxtw x5, w5
+ add x8, x8, x8, lsl #1
+ dup v4.8h, v0.4h[0]
+ add x12, x12, x8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ mul v0.8h, v4.8h , v16.8h
+ sub x5, x5, x9
+ mul v2.8h, v4.8h , v18.8h
+ add x12, x12, x5, lsl #2
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ sub x8, x8, x9
+ ldrb w5, [x7], #-1
+ sxtw x5, w5
+ add x8, x8, x8, lsl #2
+ ldrb w6, [x0], #1
+ sxtw x6, w6
+ add x12, x12, x8
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ sub x5, x5, x6
+ sub x8, x8, x9
+ add x5, x5, x5, lsl #1
+ sub x20, x8, x8, lsl #3
+ neg x8, x20
+ add x12, x12, x5, lsl #1
+ ldrb w5, [x7], #-1
+ sxtw x5, w5
+ ldrb w6, [x10] //top_left
+ sxtw x6, w6
+ add x12, x12, x8
+ sub x9, x5, x6
+ ldrb w6, [x1, #7]
+ sxtw x6, w6
+ add x12, x12, x9, lsl #3 // i_c = x12
+ add x8, x5, x6
+ add x12, x12, x12, lsl #2
+ lsl x8, x8, #4 // i_a = x8
+ add x12, x12, #0x20
+ lsr x12, x12, #6
+ shl v28.8h, v4.8h, #3
+ dup v6.8h, w12
+ dup v30.8h, w8
+ shl v26.8h, v6.8h, #3
+ sub v30.8h, v30.8h , v28.8h
+ sub v30.8h, v30.8h , v26.8h
+ add v28.8h, v30.8h , v6.8h
+ add v26.8h, v28.8h , v0.8h
+ add v28.8h, v28.8h , v2.8h
+ sqrshrun v20.8b, v26.8h, #5
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+
+end_func_plane:
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+