Added support for half-precision (fp16) variants.

author: Christophe Lyon <christophe.lyon@st.com> 2013-04-11 15:05:18 +0200
committer: Christophe Lyon <christophe.lyon@st.com> 2013-04-11 15:05:18 +0200
commit: 34adaf63011ec336c8fa4594c6bba1c59840e167 (patch)
tree: 421db32a0477c74f47a6e51f040cfd60f6db5936 /ref_vcvt.c
parent: 91ae809096ef1dae9756ada87efe475ce79e4bb5 (diff)
download: platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.gz
platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.bz2
platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.zip
1 files changed, 75 insertions, 1 deletions
diff --git a/ref_vcvt.c b/ref_vcvt.c
index 898ebf0..c09d8b1 100644
--- a/ref_vcvt.c
+++ b/ref_vcvt.c
@@ -1,6 +1,6 @@
 /*
 
-Copyright (c) 2009, 2010, 2011 STMicroelectronics
+Copyright (c) 2009, 2010, 2011, 2013 STMicroelectronics
 Written by Christophe Lyon
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -30,6 +30,7 @@ THE SOFTWARE.
 #endif
 
 #include "stm-arm-neon-ref.h"
+#include <math.h>
 
 #define TEST_MSG "VCVT/VCVTQ"
 void exec_vcvt (void)
@@ -51,6 +52,22 @@ void exec_vcvt (void)
 		    VECT_VAR(vector_res, T1, W, N));		\
   DUMP_FP(TEST_MSG, T1, W, N, PRIx##W);
 
+#if __ARM_NEON_FP16_INTRINSICS
+#define TEST_VCVT_FP16(T1, T2, W1, W2, N)			\
+  VECT_VAR(vector_res, T1, W1, N) =				\
+    vcvt_##T2##W1##_##T2##W2(VECT_VAR(vector, T1, W2, N));	\
+  vst1q_##T2##W1(VECT_VAR(result, T1, W1, N),			\
+		 VECT_VAR(vector_res, T1, W1, N));		\
+  DUMP_FP(TEST_MSG, T1, W1, N, PRIx##W1);
+
+#define TEST_VCVT_2FP16(T1, T2, W1, W2, N)			\
+  VECT_VAR(vector_res, T1, W1, N) =				\
+    vcvt_##T2##W1##_##T2##W2(VECT_VAR(vector, T1, W2, N));	\
+  vst1_##T2##W1(VECT_VAR(result, T1, W1, N),			\
+		 VECT_VAR(vector_res, T1, W1, N));		\
+  DUMP_FP16(TEST_MSG, T1, W1, N, PRIx##W1);
+#endif
+
 #define TEST_VCVT_N(Q, T1, T2, W, N, TS1, TS2, V)			\
   VECT_VAR(vector_res, T1, W, N) =					\
     vcvt##Q##_n_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N), V);	\
@@ -69,6 +86,14 @@ void exec_vcvt (void)
      statement */
   DECL_VARIABLE_ALL_VARIANTS(vector);
   DECL_VARIABLE_ALL_VARIANTS(vector_res);
+#if __ARM_NEON_FP16_INTRINSICS
+  DECL_VARIABLE(vector_init, uint, 16, 4);
+  DECL_VARIABLE(vector_init, uint, 16, 8);
+  DECL_VARIABLE(vector, float, 16, 4);
+  DECL_VARIABLE(vector, float, 16, 8);
+  DECL_VARIABLE(vector_res, float, 16, 4);
+  DECL_VARIABLE(vector_res, float, 16, 8);
+#endif
 
   clean_results ();
 
@@ -77,7 +102,12 @@ void exec_vcvt (void)
   TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLOAD, vector, buffer);
   TEST_VLOAD(vector, buffer, , float, f, 32, 2);
   TEST_VLOAD(vector, buffer, q, float, f, 32, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+  TEST_VLOAD(vector, buffer, , float, f, 16, 4);
+  TEST_VLOAD(vector, buffer, q, float, f, 16, 8);
+#endif
 
+#if __ARM_NEON_FP16_INTRINSICS
   /* Make sure some elements have a fractional part, to exercise
      integer conversions */
   TEST_VSET_LANE(vector, , float, f, 32, 2, 0, -15.3f);
@@ -85,6 +115,19 @@ void exec_vcvt (void)
   TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, -15.3f);
   TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, 5.3f);
 
+  /* FP16 tests.  */
+  /* There is no vdup_n_f16, so we need another initialization
+     method.  */
+  /* Use all lanes with values of different types: first, a "standard"
+     positive number, a "standard" negative one, and +0 and -0.  */
+  TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 0, 0x4b90 /* 15.125 */);
+  TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 1, 0xcb90 /* -15.125 */);
+  TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 2, 0 /* 0 */);
+  TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 3, 0x8000 /* -0 */);
+  VECT_VAR(vector, float, 16, 4) =
+    vreinterpret_f16_u16(VECT_VAR(vector_init, uint, 16, 4));
+#endif
+
   /* The same result buffers are used multiple times, so we output
      them before overwriting them  */
   fprintf(ref_file, "\n%s output:\n", TEST_MSG);
@@ -92,6 +135,9 @@ void exec_vcvt (void)
   /* vcvt_f32_xx */
   TEST_VCVT_FP(, float, f, 32, 2, int, s);
   TEST_VCVT_FP(, float, f, 32, 2, uint, u);
+#if __ARM_NEON_FP16_INTRINSICS
+  TEST_VCVT_FP16(float, f, 32, 16, 4);
+#endif
 
   /* vcvtq_f32_xx */
   TEST_VCVT_FP(q, float, f, 32, 4, int, s);
@@ -101,6 +147,14 @@ void exec_vcvt (void)
   TEST_VCVT(, int, s, 32, 2, float, f);
   TEST_VCVT(, uint, u, 32, 2, float, f);
 
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 0, 0.0f);
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 1, -0.0f);
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, 15.12f);
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, -15.12f);
+#if __ARM_NEON_FP16_INTRINSICS
+  TEST_VCVT_2FP16(float, f, 16, 32, 4);
+#endif
+
   /* vcvtq_xx_f32 */
   TEST_VCVT(q, int, s, 32, 4, float, f);
   TEST_VCVT(q, uint, u, 32, 4, float, f);
@@ -157,4 +211,24 @@ void exec_vcvt (void)
   TEST_VCVT_N(, int, s, 32, 2, float, f, 31);
   /* vcvtq_n_xx_f32 */
   TEST_VCVT_N(q, int, s, 32, 4, float, f, 31);
+
+#if __ARM_NEON_FP16_INTRINSICS
+#undef TEST_MSG
+#define TEST_MSG "VCVT FP16"
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check fp16-fp32 inf/nan/denormal)");
+  TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 0, 0x0390 /* DENORMAL */);
+  TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 1, 0x7c00 /* inf */);
+  TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 2, 0x7e00 /* nan */);
+  TEST_VSET_LANE(vector_init, , uint, u, 16, 4, 3, 0xfc00 /* -inf */);
+  VECT_VAR(vector, float, 16, 4) =
+    vreinterpret_f16_u16(VECT_VAR(vector_init, uint, 16, 4));
+  TEST_VCVT_FP16(float, f, 32, 16, 4);
+
+  fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check fp32-fp16 inf/nan)");
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 0, NAN);
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 1, HUGE_VALF);
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, -HUGE_VALF);
+  TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, -0.0f);
+  TEST_VCVT_2FP16(float, f, 16, 32, 4);
+#endif
 }
author	Christophe Lyon <christophe.lyon@st.com>	2013-04-11 15:05:18 +0200
committer	Christophe Lyon <christophe.lyon@st.com>	2013-04-11 15:05:18 +0200
commit	34adaf63011ec336c8fa4594c6bba1c59840e167 (patch)
tree	421db32a0477c74f47a6e51f040cfd60f6db5936 /ref_vcvt.c
parent	91ae809096ef1dae9756ada87efe475ce79e4bb5 (diff)
download	platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.gz platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.bz2 platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.zip