Added support for half-precision (fp16) variants.

author: Christophe Lyon <christophe.lyon@st.com> 2013-04-11 15:05:18 +0200
committer: Christophe Lyon <christophe.lyon@st.com> 2013-04-11 15:05:18 +0200
commit: 34adaf63011ec336c8fa4594c6bba1c59840e167 (patch)
tree: 421db32a0477c74f47a6e51f040cfd60f6db5936 /stm-arm-neon-ref.h
parent: 91ae809096ef1dae9756ada87efe475ce79e4bb5 (diff)
download: platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.gz
platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.bz2
platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.zip
1 files changed, 74 insertions, 0 deletions
diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h
index 94cfd11..3ea931f 100644
--- a/stm-arm-neon-ref.h
+++ b/stm-arm-neon-ref.h
@@ -98,6 +98,23 @@ static int result_idx = 0;
     }									\
   fprintf(ref_file, " }\n");
 
+/* ARMCC has internal knowledge of half-precision type. Define this
+   alias to avoid having to duplicate declarations.  */
+#ifdef __ARMCC_VERSION
+#define float16_t __fp16
+#endif
+
+#define DUMP_FP16(MSG,T,W,N,FMT)					\
+  fprintf(ref_file, "%s:%d:%s [] = { ", MSG, result_idx++,		\
+	  STR(VECT_VAR(result, T, W, N)));				\
+  for(i=0; i<N ; i++)							\
+    {									\
+      uint##W##_t tmp;							\
+      tmp = (uint##W##_t)VECT_VAR(result, T, W, N)[i];			\
+      fprintf(ref_file, "%" FMT ", ", tmp);				\
+    }									\
+  fprintf(ref_file, " }\n");
+
 #define CLEAN_PATTERN_8  0x33
 #define CLEAN_PATTERN_16 0x3333
 #define CLEAN_PATTERN_32 0x33333333
@@ -142,6 +159,9 @@ extern ARRAY(buffer, uint, 64, 1);
 extern ARRAY(buffer, poly, 8, 8);
 extern ARRAY(buffer, poly, 16, 4);
 extern ARRAY(buffer, float, 32, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+extern ARRAY(buffer, float, 16, 4);
+#endif
 extern ARRAY(buffer, int, 8, 16);
 extern ARRAY(buffer, int, 16, 8);
 extern ARRAY(buffer, int, 32, 4);
@@ -153,6 +173,9 @@ extern ARRAY(buffer, uint, 64, 2);
 extern ARRAY(buffer, poly, 8, 16);
 extern ARRAY(buffer, poly, 16, 8);
 extern ARRAY(buffer, float, 32, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+extern ARRAY(buffer, float, 16, 8);
+#endif
 
 /* The tests for vld1_dup and vdup expect at least 4 entries in the
    input buffer, so force 1- and 2-elements initializers to have 4
@@ -168,6 +191,9 @@ extern ARRAY4(buffer_dup, uint, 64, 1);
 extern ARRAY(buffer_dup, poly, 8, 8);
 extern ARRAY(buffer_dup, poly, 16, 4);
 extern ARRAY4(buffer_dup, float, 32, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+extern ARRAY4(buffer_dup, float, 16, 4);
+#endif
 extern ARRAY(buffer_dup, int, 8, 16);
 extern ARRAY(buffer_dup, int, 16, 8);
 extern ARRAY(buffer_dup, int, 32, 4);
@@ -179,6 +205,9 @@ extern ARRAY4(buffer_dup, uint, 64, 2);
 extern ARRAY(buffer_dup, poly, 8, 16);
 extern ARRAY(buffer_dup, poly, 16, 8);
 extern ARRAY(buffer_dup, float, 32, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+extern ARRAY(buffer_dup, float, 16, 8);
+#endif
 
 /* Input buffers for vld2, one of each size */
 extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2);
@@ -192,6 +221,9 @@ extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2);
 extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2);
 extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2);
 extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2);
+#endif
 extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2);
 extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2);
 extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2);
@@ -203,6 +235,9 @@ extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2);
 extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2);
 extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2);
 extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2);
+#endif
 
 /* Input buffers for vld3, one of each size */
 extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3);
@@ -216,6 +251,9 @@ extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3);
 extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3);
 extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3);
 extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3);
+#endif
 extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3);
 extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3);
 extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3);
@@ -227,6 +265,9 @@ extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3);
 extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3);
 extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3);
 extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3);
+#endif
 
 /* Input buffers for vld4, one of each size */
 extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4);
@@ -240,6 +281,9 @@ extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4);
 extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4);
 extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4);
 extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4);
+#endif
 extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4);
 extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4);
 extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4);
@@ -251,6 +295,9 @@ extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4);
 extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4);
 extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4);
 extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4);
+#endif
 
 /* Input buffers for vld2_lane */
 extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2];
@@ -264,6 +311,9 @@ extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2];
 extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2];
 extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2];
 extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2];
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2];
+#endif
 
 /* Input buffers for vld3_lane */
 extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3];
@@ -277,6 +327,9 @@ extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3];
 extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3];
 extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3];
 extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3];
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3];
+#endif
 
 /* Input buffers for vld4_lane */
 extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4];
@@ -290,6 +343,9 @@ extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4];
 extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4];
 extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4];
 extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4];
+#if __ARM_NEON_FP16_INTRINSICS
+extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4];
+#endif
 
 /* Output buffers, one of each size */
 static ARRAY(result, int, 8, 8);
@@ -303,6 +359,9 @@ static ARRAY(result, uint, 64, 1);
 static ARRAY(result, poly, 8, 8);
 static ARRAY(result, poly, 16, 4);
 static ARRAY(result, float, 32, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+static ARRAY(result, float, 16, 4);
+#endif
 static ARRAY(result, int, 8, 16);
 static ARRAY(result, int, 16, 8);
 static ARRAY(result, int, 32, 4);
@@ -314,6 +373,9 @@ static ARRAY(result, uint, 64, 2);
 static ARRAY(result, poly, 8, 16);
 static ARRAY(result, poly, 16, 8);
 static ARRAY(result, float, 32, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+static ARRAY(result, float, 16, 8);
+#endif
 
 /* Dump results (generic function) */
 static void dump_results (char *test_name)
@@ -333,6 +395,9 @@ static void dump_results (char *test_name)
   DUMP(test_name, poly, 8, 8, PRIu8);
   DUMP(test_name, poly, 16, 4, PRIu16);
   DUMP_FP(test_name, float, 32, 2, PRIx32);
+#if __ARM_NEON_FP16_INTRINSICS
+  DUMP_FP16(test_name, float, 16, 4, PRIu16);
+#endif
 
   DUMP(test_name, int, 8, 16, PRId8);
   DUMP(test_name, int, 16, 8, PRId16);
@@ -345,6 +410,9 @@ static void dump_results (char *test_name)
   DUMP(test_name, poly, 8, 16, PRIu8);
   DUMP(test_name, poly, 16, 8, PRIu16);
   DUMP_FP(test_name, float, 32, 4, PRIx32);
+#if __ARM_NEON_FP16_INTRINSICS
+  DUMP_FP16(test_name, float, 16, 8, PRIu16);
+#endif
 }
 
 /* Dump results in hex (generic function) */
@@ -365,6 +433,9 @@ static void dump_results_hex2 (const char *test_name, const char* comment)
   DUMP(test_name, poly, 8, 8, PRIx8);
   DUMP(test_name, poly, 16, 4, PRIx16);
   DUMP_FP(test_name, float, 32, 2, PRIx32);
+#if __ARM_NEON_FP16_INTRINSICS
+  DUMP_FP16(test_name, float, 16, 4, PRIx16);
+#endif
 
   DUMP(test_name, int, 8, 16, PRIx8);
   DUMP(test_name, int, 16, 8, PRIx16);
@@ -377,6 +448,9 @@ static void dump_results_hex2 (const char *test_name, const char* comment)
   DUMP(test_name, poly, 8, 16, PRIx8);
   DUMP(test_name, poly, 16, 8, PRIx16);
   DUMP_FP(test_name, float, 32, 4, PRIx32);
+#if __ARM_NEON_FP16_INTRINSICS
+  DUMP_FP16(test_name, float, 16, 8, PRIx16);
+#endif
 }
 
 static void dump_results_hex (const char *test_name)
author	Christophe Lyon <christophe.lyon@st.com>	2013-04-11 15:05:18 +0200
committer	Christophe Lyon <christophe.lyon@st.com>	2013-04-11 15:05:18 +0200
commit	34adaf63011ec336c8fa4594c6bba1c59840e167 (patch)
tree	421db32a0477c74f47a6e51f040cfd60f6db5936 /stm-arm-neon-ref.h
parent	91ae809096ef1dae9756ada87efe475ce79e4bb5 (diff)
download	platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.gz platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.bz2 platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.zip