//****************************************************************************** //* //* Copyright (C) 2015 The Android Open Source Project //* //* Licensed under the Apache License, Version 2.0 (the "License"); //* you may not use this file except in compliance with the License. //* You may obtain a copy of the License at: //* //* http://www.apache.org/licenses/LICENSE-2.0 //* //* Unless required by applicable law or agreed to in writing, software //* distributed under the License is distributed on an "AS IS" BASIS, //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //* See the License for the specific language governing permissions and //* limitations under the License. //* //***************************************************************************** //* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore //*/ ///** // ******************************************************************************* // * @file // * ih264_mem_fns_neon.s // * // * @brief // * Contains function definitions for memory manipulation // * // * @author // * Naveen SR // * // * @par List of Functions: // * - ih264_memcpy_av8() // * - ih264_memcpy_mul_8_av8() // * - ih264_memset_mul_8_av8() // * - ih264_memset_16bit_mul_8_av8() // * - ih264_memset_16bit_av8() // * // * @remarks // * None // * // ******************************************************************************* //*/ .text .p2align 2 .include "ih264_neon_macros.s" ///** //******************************************************************************* //* //* @brief //* memcpy of a 1d array //* //* @par Description: //* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes //* //* @param[in] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[in] num_bytes //* number of bytes to copy //* @returns //* //* @remarks //* None //* //******************************************************************************* //*/ //void ih264_memcpy_mul_8(UWORD8 *pu1_dst, // UWORD8 *pu1_src, // UWORD8 num_bytes) //**************Variables Vs Registers************************* // x0 => *pu1_dst // x1 => *pu1_src // x2 => num_bytes .global ih264_memcpy_mul_8_av8 ih264_memcpy_mul_8_av8: loop_neon_memcpy_mul_8: // Memcpy 8 bytes ld1 {v0.8b}, [x1], #8 st1 {v0.8b}, [x0], #8 subs x2, x2, #8 bne loop_neon_memcpy_mul_8 ret //******************************************************************************* //*/ //void ih264_memcpy(UWORD8 *pu1_dst, // UWORD8 *pu1_src, // UWORD8 num_bytes) //**************Variables Vs Registers************************* // x0 => *pu1_dst // x1 => *pu1_src // x2 => num_bytes .global ih264_memcpy_av8 ih264_memcpy_av8: subs x2, x2, #8 blt arm_memcpy loop_neon_memcpy: // Memcpy 8 bytes ld1 {v0.8b}, [x1], #8 st1 {v0.8b}, [x0], #8 subs x2, x2, #8 bge loop_neon_memcpy cmp x2, #-8 beq end_func1 arm_memcpy: add x2, x2, #8 loop_arm_memcpy: ldrb w3, [x1], #1 sxtw x3, w3 strb w3, [x0], #1 sxtw x3, w3 subs x2, x2, #1 bne loop_arm_memcpy ret end_func1: ret //void ih264_memset_mul_8(UWORD8 *pu1_dst, // UWORD8 value, // UWORD8 num_bytes) //**************Variables Vs Registers************************* // x0 => *pu1_dst // x1 => value // x2 => num_bytes .global ih264_memset_mul_8_av8 ih264_memset_mul_8_av8: // Assumptions: numbytes is either 8, 16 or 32 dup v0.8b, w1 loop_memset_mul_8: // Memset 8 bytes st1 {v0.8b}, [x0], #8 subs x2, x2, #8 bne loop_memset_mul_8 ret //void ih264_memset(UWORD8 *pu1_dst, // UWORD8 value, // UWORD8 num_bytes) //**************Variables Vs Registers************************* // x0 => *pu1_dst // x1 => value // x2 => num_bytes .global ih264_memset_av8 ih264_memset_av8: subs x2, x2, #8 blt arm_memset dup v0.8b, w1 loop_neon_memset: // Memcpy 8 bytes st1 {v0.8b}, [x0], #8 subs x2, x2, #8 bge loop_neon_memset cmp x2, #-8 beq end_func2 arm_memset: add x2, x2, #8 loop_arm_memset: strb w1, [x0], #1 sxtw x1, w1 subs x2, x2, #1 bne loop_arm_memset ret end_func2: ret //void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, // UWORD16 value, // UWORD8 num_words) //**************Variables Vs Registers************************* // x0 => *pu2_dst // x1 => value // x2 => num_words .global ih264_memset_16bit_mul_8_av8 ih264_memset_16bit_mul_8_av8: // Assumptions: num_words is either 8, 16 or 32 // Memset 8 words dup v0.4h, w1 loop_memset_16bit_mul_8: st1 {v0.4h}, [x0], #8 st1 {v0.4h}, [x0], #8 subs x2, x2, #8 bne loop_memset_16bit_mul_8 ret //void ih264_memset_16bit(UWORD16 *pu2_dst, // UWORD16 value, // UWORD8 num_words) //**************Variables Vs Registers************************* // x0 => *pu2_dst // x1 => value // x2 => num_words .global ih264_memset_16bit_av8 ih264_memset_16bit_av8: subs x2, x2, #8 blt arm_memset_16bit dup v0.4h, w1 loop_neon_memset_16bit: // Memset 8 words st1 {v0.4h}, [x0], #8 st1 {v0.4h}, [x0], #8 subs x2, x2, #8 bge loop_neon_memset_16bit cmp x2, #-8 beq end_func3 arm_memset_16bit: add x2, x2, #8 loop_arm_memset_16bit: strh w1, [x0], #2 sxtw x1, w1 subs x2, x2, #1 bne loop_arm_memset_16bit ret end_func3: ret