/* Copyright (c) 2009, 2010, 2011, 2012, 2013 STMicroelectronics Written by Christophe Lyon Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _STM_ARM_NEON_REF_H_ #define _STM_ARM_NEON_REF_H_ #if defined(__cplusplus) #include #include #include #else #include #if defined(_MSC_VER) #include "msinttypes.h" #include /* for isnan() ... */ static int32_t _ptrNan[]={0x7fc00000L}; #define NAN (*(float*)_ptrNan) static int32_t _ptrInf[]={0x7f800000L}; #define INFINITY (*(float*)_ptrInf) #define HUGE_VALF INFINITY #else #include #endif #include #endif #define xSTR(X) #X #define STR(X) xSTR(X) #define xNAME1(V,T) V ## _ ## T #define xNAME(V,T) xNAME1(V,T) #define VAR(V,T,W) xNAME(V,T##W) #define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W) #define VECT_NAME(T, W, N) T##W##x##N #define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L #define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t) #define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t) #define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N)) #define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N) /* This one is used for padding between input buffers. */ #define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42; /* Array declarations. */ #define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N] #define ARRAY4(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[4] /* Arrays of vectors. */ #define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L)) #define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L] static int result_idx = 0; #define DUMP(MSG,T,W,N,FMT) \ fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \ STR(VECT_VAR(result, T, W, N))); \ for(i=0; i #define THIS_ENDIAN __BYTE_ORDER #else /* __arm__ */ #ifdef __ARMEL__ #define THIS_ENDIAN __LITTLE_ENDIAN #else /* __ARMEL__ */ #define THIS_ENDIAN __BIG_ENDIAN #endif #endif /* __arm__ */ #if THIS_ENDIAN == __LITTLE_ENDIAN typedef union { struct { int _xxx:27; unsigned int QC:1; int V:1; int C:1; int Z:1; int N:1; } b; unsigned int word; } _ARM_FPSCR; #else /* __BIG_ENDIAN */ typedef union { struct { int N:1; int Z:1; int C:1; int V:1; unsigned int QC:1; int _dnm:27; } b; unsigned int word; } _ARM_FPSCR; #endif /* __BIG_ENDIAN */ #ifdef __ARMCC_VERSION register _ARM_FPSCR _afpscr_for_qc __asm("fpscr"); # define Neon_Cumulative_Sat _afpscr_for_qc.b.QC # define Set_Neon_Cumulative_Sat(x, depend) {Neon_Cumulative_Sat = (x);} #else /* GCC/ARM does not know this register */ # define Neon_Cumulative_Sat __read_neon_cumulative_sat() /* We need a fake dependency to ensure correct ordering of asm statements to preset the QC flag value, and Neon operators writing to QC. */ #define Set_Neon_Cumulative_Sat(x, depend) \ __set_neon_cumulative_sat((x), (depend)) # if defined(__aarch64__) static volatile int __read_neon_cumulative_sat (void) { _ARM_FPSCR _afpscr_for_qc; asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); return _afpscr_for_qc.b.QC; } #define __set_neon_cumulative_sat(x, depend) { \ _ARM_FPSCR _afpscr_for_qc; \ asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); \ _afpscr_for_qc.b.QC = x; \ asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \ } # else static volatile int __read_neon_cumulative_sat (void) { _ARM_FPSCR _afpscr_for_qc; asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); return _afpscr_for_qc.b.QC; } #define __set_neon_cumulative_sat(x, depend) { \ _ARM_FPSCR _afpscr_for_qc; \ asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); \ _afpscr_for_qc.b.QC = x; \ asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \ } # endif #endif #endif /* STM_ARM_NEON_MODELS */ static void dump_neon_cumulative_sat(const char* msg, const char *name, const char* t1, int w, int n) { fprintf(ref_file, "%s:%d:%s Neon cumulative saturation %d\n", msg, result_idx++, name, Neon_Cumulative_Sat); fprintf(gcc_tests_file, "int VECT_VAR(expected_cumulative_sat,%s,%d,%d) = %d;\n", t1, w, n, Neon_Cumulative_Sat); } /* Clean output buffers before execution */ static void clean_results (void) { result_idx = 0; CLEAN(result, int, 8, 8); CLEAN(result, int, 16, 4); CLEAN(result, int, 32, 2); CLEAN(result, int, 64, 1); CLEAN(result, uint, 8, 8); CLEAN(result, uint, 16, 4); CLEAN(result, uint, 32, 2); CLEAN(result, uint, 64, 1); CLEAN(result, poly, 8, 8); CLEAN(result, poly, 16, 4); CLEAN(result, float, 32, 2); CLEAN(result, int, 8, 16); CLEAN(result, int, 16, 8); CLEAN(result, int, 32, 4); CLEAN(result, int, 64, 2); CLEAN(result, uint, 8, 16); CLEAN(result, uint, 16, 8); CLEAN(result, uint, 32, 4); CLEAN(result, uint, 64, 2); CLEAN(result, poly, 8, 16); CLEAN(result, poly, 16, 8); CLEAN(result, float, 32, 4); } /* Helpers to declare variables of various types */ #define DECL_VARIABLE(VAR, T1, W, N) \ volatile VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N) #define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \ DECL_VARIABLE(VAR, int, 8, 8); \ DECL_VARIABLE(VAR, int, 16, 4); \ DECL_VARIABLE(VAR, int, 32, 2); \ DECL_VARIABLE(VAR, int, 64, 1) #define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR) \ DECL_VARIABLE(VAR, uint, 8, 8); \ DECL_VARIABLE(VAR, uint, 16, 4); \ DECL_VARIABLE(VAR, uint, 32, 2); \ DECL_VARIABLE(VAR, uint, 64, 1) #define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) \ DECL_VARIABLE(VAR, int, 8, 16); \ DECL_VARIABLE(VAR, int, 16, 8); \ DECL_VARIABLE(VAR, int, 32, 4); \ DECL_VARIABLE(VAR, int, 64, 2) #define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) \ DECL_VARIABLE(VAR, uint, 8, 16); \ DECL_VARIABLE(VAR, uint, 16, 8); \ DECL_VARIABLE(VAR, uint, 32, 4); \ DECL_VARIABLE(VAR, uint, 64, 2) #define DECL_VARIABLE_64BITS_VARIANTS(VAR) \ DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \ DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \ DECL_VARIABLE(VAR, poly, 8, 8); \ DECL_VARIABLE(VAR, poly, 16, 4); \ DECL_VARIABLE(VAR, float, 32, 2) #define DECL_VARIABLE_128BITS_VARIANTS(VAR) \ DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \ DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \ DECL_VARIABLE(VAR, poly, 8, 16); \ DECL_VARIABLE(VAR, poly, 16, 8); \ DECL_VARIABLE(VAR, float, 32, 4) #define DECL_VARIABLE_ALL_VARIANTS(VAR) \ DECL_VARIABLE_64BITS_VARIANTS(VAR); \ DECL_VARIABLE_128BITS_VARIANTS(VAR) #define DECL_VARIABLE_SIGNED_VARIANTS(VAR) \ DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \ DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) #define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR) \ DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \ DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) /* Helpers to initialize vectors */ #define VDUP(VAR, Q, T1, T2, W, N, V) \ VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V) #define TEST_VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \ VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \ VECT_VAR(VAR, T1, W, N), \ L) /* We need to load initial values first, so rely on VLD1 */ #define VLOAD(VAR, BUF, Q, T1, T2, W, N) \ VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)) /* Helpers for macros with 1 constant and 5 variable arguments */ #define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \ MACRO(VAR, , int, s, 8, 8); \ MACRO(VAR, , int, s, 16, 4); \ MACRO(VAR, , int, s, 32, 2); \ MACRO(VAR, , int, s, 64, 1) #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) \ MACRO(VAR, , uint, u, 8, 8); \ MACRO(VAR, , uint, u, 16, 4); \ MACRO(VAR, , uint, u, 32, 2); \ MACRO(VAR, , uint, u, 64, 1) #define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \ MACRO(VAR, q, int, s, 8, 16); \ MACRO(VAR, q, int, s, 16, 8); \ MACRO(VAR, q, int, s, 32, 4); \ MACRO(VAR, q, int, s, 64, 2) #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR) \ MACRO(VAR, q, uint, u, 8, 16); \ MACRO(VAR, q, uint, u, 16, 8); \ MACRO(VAR, q, uint, u, 32, 4); \ MACRO(VAR, q, uint, u, 64, 2) #define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR) \ TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \ TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) #define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) \ TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \ TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) #define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR) \ TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR); \ TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) #define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR) \ TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \ TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) /* Helpers for macros with 2 constant and 5 variable arguments */ #define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ MACRO(VAR1, VAR2, , int, s, 8, 8); \ MACRO(VAR1, VAR2, , int, s, 16, 4); \ MACRO(VAR1, VAR2, , int, s, 32, 2); \ MACRO(VAR1, VAR2 , , int, s, 64, 1) #define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ MACRO(VAR1, VAR2, , uint, u, 8, 8); \ MACRO(VAR1, VAR2, , uint, u, 16, 4); \ MACRO(VAR1, VAR2, , uint, u, 32, 2); \ MACRO(VAR1, VAR2, , uint, u, 64, 1) #define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ MACRO(VAR1, VAR2, q, int, s, 8, 16); \ MACRO(VAR1, VAR2, q, int, s, 16, 8); \ MACRO(VAR1, VAR2, q, int, s, 32, 4); \ MACRO(VAR1, VAR2, q, int, s, 64, 2) #define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ MACRO(VAR1, VAR2, q, uint, u, 8, 16); \ MACRO(VAR1, VAR2, q, uint, u, 16, 8); \ MACRO(VAR1, VAR2, q, uint, u, 32, 4); \ MACRO(VAR1, VAR2, q, uint, u, 64, 2) #define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \ TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ MACRO(VAR1, VAR2, , poly, p, 8, 8); \ MACRO(VAR1, VAR2, , poly, p, 16, 4) #define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \ TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ MACRO(VAR1, VAR2, q, poly, p, 8, 16); \ MACRO(VAR1, VAR2, q, poly, p, 16, 8) #define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \ TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \ TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) #define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \ TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) #endif /* _STM_ARM_NEON_REF_H_ */