aboutsummaryrefslogtreecommitdiffstats
path: root/stm-arm-neon-ref.h
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@st.com>2013-04-11 15:05:18 +0200
committerChristophe Lyon <christophe.lyon@st.com>2013-04-11 15:05:18 +0200
commit34adaf63011ec336c8fa4594c6bba1c59840e167 (patch)
tree421db32a0477c74f47a6e51f040cfd60f6db5936 /stm-arm-neon-ref.h
parent91ae809096ef1dae9756ada87efe475ce79e4bb5 (diff)
downloadplatform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.gz
platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.bz2
platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.zip
Added support for half-precision (fp16) variants.
Diffstat (limited to 'stm-arm-neon-ref.h')
-rw-r--r--stm-arm-neon-ref.h74
1 files changed, 74 insertions, 0 deletions
diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h
index 94cfd11..3ea931f 100644
--- a/stm-arm-neon-ref.h
+++ b/stm-arm-neon-ref.h
@@ -98,6 +98,23 @@ static int result_idx = 0;
} \
fprintf(ref_file, " }\n");
+/* ARMCC has internal knowledge of half-precision type. Define this
+ alias to avoid having to duplicate declarations. */
+#ifdef __ARMCC_VERSION
+#define float16_t __fp16
+#endif
+
+#define DUMP_FP16(MSG,T,W,N,FMT) \
+ fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++, \
+ STR(VECT_VAR(result, T, W, N))); \
+ for(i=0; i<N ; i++) \
+ { \
+ uint##W##_t tmp; \
+ tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i]; \
+ fprintf(ref_file, "%" FMT ", ", tmp); \
+ } \
+ fprintf(ref_file, " }\n");
+
#define CLEAN_PATTERN_8 0x33
#define CLEAN_PATTERN_16 0x3333
#define CLEAN_PATTERN_32 0x33333333
@@ -142,6 +159,9 @@ extern ARRAY(buffer, uint, 64, 1);
extern ARRAY(buffer, poly, 8, 8);
extern ARRAY(buffer, poly, 16, 4);
extern ARRAY(buffer, float, 32, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+extern ARRAY(buffer, float, 16, 4);
+#endif
extern ARRAY(buffer, int, 8, 16);
extern ARRAY(buffer, int, 16, 8);
extern ARRAY(buffer, int, 32, 4);
@@ -153,6 +173,9 @@ extern ARRAY(buffer, uint, 64, 2);
extern ARRAY(buffer, poly, 8, 16);
extern ARRAY(buffer, poly, 16, 8);
extern ARRAY(buffer, float, 32, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+extern ARRAY(buffer, float, 16, 8);
+#endif
/* The tests for vld1_dup and vdup expect at least 4 entries in the
input buffer, so force 1- and 2-elements initializers to have 4
@@ -168,6 +191,9 @@ extern ARRAY4(buffer_dup, uint, 64, 1);
extern ARRAY(buffer_dup, poly, 8, 8);
extern ARRAY(buffer_dup, poly, 16, 4);
extern ARRAY4(buffer_dup, float, 32, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+extern ARRAY4(buffer_dup, float, 16, 4);
+#endif
extern ARRAY(buffer_dup, int, 8, 16);
extern ARRAY(buffer_dup, int, 16, 8);
extern ARRAY(buffer_dup, int, 32, 4);
@@ -179,6 +205,9 @@ extern ARRAY4(buffer_dup, uint, 64, 2);
extern ARRAY(buffer_dup, poly, 8, 16);
extern ARRAY(buffer_dup, poly, 16, 8);
extern ARRAY(buffer_dup, float, 32, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+extern ARRAY(buffer_dup, float, 16, 8);
+#endif
/* Input buffers for vld2, one of each size */
extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2);
@@ -192,6 +221,9 @@ extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2);
extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2);
extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2);
extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2);
+#endif
extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2);
extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2);
extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2);
@@ -203,6 +235,9 @@ extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2);
extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2);
extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2);
extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2);
+#endif
/* Input buffers for vld3, one of each size */
extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3);
@@ -216,6 +251,9 @@ extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3);
extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3);
extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3);
extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3);
+#endif
extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3);
extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3);
extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3);
@@ -227,6 +265,9 @@ extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3);
extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3);
extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3);
extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3);
+#endif
/* Input buffers for vld4, one of each size */
extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4);
@@ -240,6 +281,9 @@ extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4);
extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4);
extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4);
extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4);
+#endif
extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4);
extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4);
extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4);
@@ -251,6 +295,9 @@ extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4);
extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4);
extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4);
extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4);
+#endif
/* Input buffers for vld2_lane */
extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2];
@@ -264,6 +311,9 @@ extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2];
extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2];
extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2];
extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2];
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2];
+#endif
/* Input buffers for vld3_lane */
extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3];
@@ -277,6 +327,9 @@ extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3];
extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3];
extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3];
extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3];
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3];
+#endif
/* Input buffers for vld4_lane */
extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4];
@@ -290,6 +343,9 @@ extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4];
extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4];
extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4];
extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4];
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4];
+#endif
/* Output buffers, one of each size */
static ARRAY(result, int, 8, 8);
@@ -303,6 +359,9 @@ static ARRAY(result, uint, 64, 1);
static ARRAY(result, poly, 8, 8);
static ARRAY(result, poly, 16, 4);
static ARRAY(result, float, 32, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+static ARRAY(result, float, 16, 4);
+#endif
static ARRAY(result, int, 8, 16);
static ARRAY(result, int, 16, 8);
static ARRAY(result, int, 32, 4);
@@ -314,6 +373,9 @@ static ARRAY(result, uint, 64, 2);
static ARRAY(result, poly, 8, 16);
static ARRAY(result, poly, 16, 8);
static ARRAY(result, float, 32, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+static ARRAY(result, float, 16, 8);
+#endif
/* Dump results (generic function) */
static void dump_results (char *test_name)
@@ -333,6 +395,9 @@ static void dump_results (char *test_name)
DUMP(test_name, poly, 8, 8, PRIu8);
DUMP(test_name, poly, 16, 4, PRIu16);
DUMP_FP(test_name, float, 32, 2, PRIx32);
+#if __ARM_NEON_FP16_INTRINSICS
+ DUMP_FP16(test_name, float, 16, 4, PRIu16);
+#endif
DUMP(test_name, int, 8, 16, PRId8);
DUMP(test_name, int, 16, 8, PRId16);
@@ -345,6 +410,9 @@ static void dump_results (char *test_name)
DUMP(test_name, poly, 8, 16, PRIu8);
DUMP(test_name, poly, 16, 8, PRIu16);
DUMP_FP(test_name, float, 32, 4, PRIx32);
+#if __ARM_NEON_FP16_INTRINSICS
+ DUMP_FP16(test_name, float, 16, 8, PRIu16);
+#endif
}
/* Dump results in hex (generic function) */
@@ -365,6 +433,9 @@ static void dump_results_hex2 (const char *test_name, const char* comment)
DUMP(test_name, poly, 8, 8, PRIx8);
DUMP(test_name, poly, 16, 4, PRIx16);
DUMP_FP(test_name, float, 32, 2, PRIx32);
+#if __ARM_NEON_FP16_INTRINSICS
+ DUMP_FP16(test_name, float, 16, 4, PRIx16);
+#endif
DUMP(test_name, int, 8, 16, PRIx8);
DUMP(test_name, int, 16, 8, PRIx16);
@@ -377,6 +448,9 @@ static void dump_results_hex2 (const char *test_name, const char* comment)
DUMP(test_name, poly, 8, 16, PRIx8);
DUMP(test_name, poly, 16, 8, PRIx16);
DUMP_FP(test_name, float, 32, 4, PRIx32);
+#if __ARM_NEON_FP16_INTRINSICS
+ DUMP_FP16(test_name, float, 16, 8, PRIx16);
+#endif
}
static void dump_results_hex (const char *test_name)