diff options
author | Christophe Lyon <christophe.lyon@st.com> | 2013-04-11 15:05:18 +0200 |
---|---|---|
committer | Christophe Lyon <christophe.lyon@st.com> | 2013-04-11 15:05:18 +0200 |
commit | 34adaf63011ec336c8fa4594c6bba1c59840e167 (patch) | |
tree | 421db32a0477c74f47a6e51f040cfd60f6db5936 /stm-arm-neon-ref.h | |
parent | 91ae809096ef1dae9756ada87efe475ce79e4bb5 (diff) | |
download | platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.gz platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.bz2 platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.zip |
Added support for half-precision (fp16) variants.
Diffstat (limited to 'stm-arm-neon-ref.h')
-rw-r--r-- | stm-arm-neon-ref.h | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h index 94cfd11..3ea931f 100644 --- a/stm-arm-neon-ref.h +++ b/stm-arm-neon-ref.h @@ -98,6 +98,23 @@ static int result_idx = 0; } \ fprintf(ref_file, " }\n"); +/* ARMCC has internal knowledge of half-precision type. Define this + alias to avoid having to duplicate declarations. */ +#ifdef __ARMCC_VERSION +#define float16_t __fp16 +#endif + +#define DUMP_FP16(MSG,T,W,N,FMT) \ + fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \ + STR(VECT_VAR(result, T, W, N))); \ + for(i=0; i<N ; i++) \ + { \ + uint##W##_t tmp; \ + tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \ + fprintf(ref_file, "%" FMT ", ", tmp); \ + } \ + fprintf(ref_file, " }\n"); + #define CLEAN_PATTERN_8 0x33 #define CLEAN_PATTERN_16 0x3333 #define CLEAN_PATTERN_32 0x33333333 @@ -142,6 +159,9 @@ extern ARRAY(buffer, uint, 64, 1); extern ARRAY(buffer, poly, 8, 8); extern ARRAY(buffer, poly, 16, 4); extern ARRAY(buffer, float, 32, 2); +#if __ARM_NEON_FP16_INTRINSICS +extern ARRAY(buffer, float, 16, 4); +#endif extern ARRAY(buffer, int, 8, 16); extern ARRAY(buffer, int, 16, 8); extern ARRAY(buffer, int, 32, 4); @@ -153,6 +173,9 @@ extern ARRAY(buffer, uint, 64, 2); extern ARRAY(buffer, poly, 8, 16); extern ARRAY(buffer, poly, 16, 8); extern ARRAY(buffer, float, 32, 4); +#if __ARM_NEON_FP16_INTRINSICS +extern ARRAY(buffer, float, 16, 8); +#endif /* The tests for vld1_dup and vdup expect at least 4 entries in the input buffer, so force 1- and 2-elements initializers to have 4 @@ -168,6 +191,9 @@ extern ARRAY4(buffer_dup, uint, 64, 1); extern ARRAY(buffer_dup, poly, 8, 8); extern ARRAY(buffer_dup, poly, 16, 4); extern ARRAY4(buffer_dup, float, 32, 2); +#if __ARM_NEON_FP16_INTRINSICS +extern ARRAY4(buffer_dup, float, 16, 4); +#endif extern ARRAY(buffer_dup, int, 8, 16); extern ARRAY(buffer_dup, int, 16, 8); extern ARRAY(buffer_dup, int, 32, 4); @@ -179,6 +205,9 @@ extern ARRAY4(buffer_dup, uint, 64, 2); extern ARRAY(buffer_dup, poly, 8, 16); extern ARRAY(buffer_dup, poly, 16, 8); extern ARRAY(buffer_dup, float, 32, 4); +#if __ARM_NEON_FP16_INTRINSICS +extern ARRAY(buffer_dup, float, 16, 8); +#endif /* Input buffers for vld2, one of each size */ extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2); @@ -192,6 +221,9 @@ extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2); extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2); extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2); extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2); +#if __ARM_NEON_FP16_INTRINSICS +extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2); +#endif extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2); extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2); extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2); @@ -203,6 +235,9 @@ extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2); extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2); extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2); extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2); +#if __ARM_NEON_FP16_INTRINSICS +extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2); +#endif /* Input buffers for vld3, one of each size */ extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3); @@ -216,6 +251,9 @@ extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3); extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3); extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3); extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3); +#if __ARM_NEON_FP16_INTRINSICS +extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3); +#endif extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3); extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3); extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3); @@ -227,6 +265,9 @@ extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3); extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3); extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3); extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3); +#if __ARM_NEON_FP16_INTRINSICS +extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3); +#endif /* Input buffers for vld4, one of each size */ extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4); @@ -240,6 +281,9 @@ extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4); extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4); extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4); extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4); +#if __ARM_NEON_FP16_INTRINSICS +extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4); +#endif extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4); extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4); extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4); @@ -251,6 +295,9 @@ extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4); extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4); extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4); extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4); +#if __ARM_NEON_FP16_INTRINSICS +extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4); +#endif /* Input buffers for vld2_lane */ extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2]; @@ -264,6 +311,9 @@ extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2]; extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2]; extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2]; extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2]; +#if __ARM_NEON_FP16_INTRINSICS +extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2]; +#endif /* Input buffers for vld3_lane */ extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3]; @@ -277,6 +327,9 @@ extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3]; extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3]; extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3]; extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3]; +#if __ARM_NEON_FP16_INTRINSICS +extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3]; +#endif /* Input buffers for vld4_lane */ extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4]; @@ -290,6 +343,9 @@ extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4]; extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4]; extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4]; extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4]; +#if __ARM_NEON_FP16_INTRINSICS +extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4]; +#endif /* Output buffers, one of each size */ static ARRAY(result, int, 8, 8); @@ -303,6 +359,9 @@ static ARRAY(result, uint, 64, 1); static ARRAY(result, poly, 8, 8); static ARRAY(result, poly, 16, 4); static ARRAY(result, float, 32, 2); +#if __ARM_NEON_FP16_INTRINSICS +static ARRAY(result, float, 16, 4); +#endif static ARRAY(result, int, 8, 16); static ARRAY(result, int, 16, 8); static ARRAY(result, int, 32, 4); @@ -314,6 +373,9 @@ static ARRAY(result, uint, 64, 2); static ARRAY(result, poly, 8, 16); static ARRAY(result, poly, 16, 8); static ARRAY(result, float, 32, 4); +#if __ARM_NEON_FP16_INTRINSICS +static ARRAY(result, float, 16, 8); +#endif /* Dump results (generic function) */ static void dump_results (char *test_name) @@ -333,6 +395,9 @@ static void dump_results (char *test_name) DUMP(test_name, poly, 8, 8, PRIu8); DUMP(test_name, poly, 16, 4, PRIu16); DUMP_FP(test_name, float, 32, 2, PRIx32); +#if __ARM_NEON_FP16_INTRINSICS + DUMP_FP16(test_name, float, 16, 4, PRIu16); +#endif DUMP(test_name, int, 8, 16, PRId8); DUMP(test_name, int, 16, 8, PRId16); @@ -345,6 +410,9 @@ static void dump_results (char *test_name) DUMP(test_name, poly, 8, 16, PRIu8); DUMP(test_name, poly, 16, 8, PRIu16); DUMP_FP(test_name, float, 32, 4, PRIx32); +#if __ARM_NEON_FP16_INTRINSICS + DUMP_FP16(test_name, float, 16, 8, PRIu16); +#endif } /* Dump results in hex (generic function) */ @@ -365,6 +433,9 @@ static void dump_results_hex2 (const char *test_name, const char* comment) DUMP(test_name, poly, 8, 8, PRIx8); DUMP(test_name, poly, 16, 4, PRIx16); DUMP_FP(test_name, float, 32, 2, PRIx32); +#if __ARM_NEON_FP16_INTRINSICS + DUMP_FP16(test_name, float, 16, 4, PRIx16); +#endif DUMP(test_name, int, 8, 16, PRIx8); DUMP(test_name, int, 16, 8, PRIx16); @@ -377,6 +448,9 @@ static void dump_results_hex2 (const char *test_name, const char* comment) DUMP(test_name, poly, 8, 16, PRIx8); DUMP(test_name, poly, 16, 8, PRIx16); DUMP_FP(test_name, float, 32, 4, PRIx32); +#if __ARM_NEON_FP16_INTRINSICS + DUMP_FP16(test_name, float, 16, 8, PRIx16); +#endif } static void dump_results_hex (const char *test_name) |