aboutsummaryrefslogtreecommitdiffstats
path: root/ref_vstX_lane.c
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@st.com>2013-04-11 15:05:18 +0200
committerChristophe Lyon <christophe.lyon@st.com>2013-04-11 15:05:18 +0200
commit34adaf63011ec336c8fa4594c6bba1c59840e167 (patch)
tree421db32a0477c74f47a6e51f040cfd60f6db5936 /ref_vstX_lane.c
parent91ae809096ef1dae9756ada87efe475ce79e4bb5 (diff)
downloadplatform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.gz
platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.tar.bz2
platform_external_arm-neon-tests-34adaf63011ec336c8fa4594c6bba1c59840e167.zip
Added support for half-precision (fp16) variants.
Diffstat (limited to 'ref_vstX_lane.c')
-rw-r--r--ref_vstX_lane.c54
1 files changed, 54 insertions, 0 deletions
diff --git a/ref_vstX_lane.c b/ref_vstX_lane.c
index d8965c9..7d14c6c 100644
--- a/ref_vstX_lane.c
+++ b/ref_vstX_lane.c
@@ -92,6 +92,12 @@ void exec_vstX_lane (void)
DECL_VSTX_LANE(poly, 16, 8, X); \
DECL_VSTX_LANE(float, 32, 4, X)
+#if __ARM_NEON_FP16_INTRINSICS
+#define DECL_ALL_VSTX_LANE_FP16(X) \
+ DECL_VSTX_LANE(float, 16, 4, X); \
+ DECL_VSTX_LANE(float, 16, 8, X)
+#endif
+
#define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L]
/* Use the same lanes regardless of the size of the array (X), for
@@ -113,6 +119,12 @@ void exec_vstX_lane (void)
TEST_VSTX_LANE(q, poly, p, 16, 8, X, 5); \
TEST_VSTX_LANE(q, float, f, 32, 4, X, 2)
+#if __ARM_NEON_FP16_INTRINSICS
+#define TEST_ALL_VSTX_LANE_FP16(X) \
+ TEST_VSTX_LANE(, float, f, 16, 4, X, 3); \
+ TEST_VSTX_LANE(q, float, f, 16, 8, X, 6)
+#endif
+
#define TEST_ALL_EXTRA_CHUNKS(X, Y) \
TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \
@@ -130,10 +142,21 @@ void exec_vstX_lane (void)
TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
+#if __ARM_NEON_FP16_INTRINSICS
+#define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \
+ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK(float, 16, 8, X, Y)
+#endif
+
/* Declare the temporary buffers / variables */
DECL_ALL_VSTX_LANE(2);
DECL_ALL_VSTX_LANE(3);
DECL_ALL_VSTX_LANE(4);
+#if __ARM_NEON_FP16_INTRINSICS
+ DECL_ALL_VSTX_LANE_FP16(2);
+ DECL_ALL_VSTX_LANE_FP16(3);
+ DECL_ALL_VSTX_LANE_FP16(4);
+#endif
/* Define dummy input arrays, large enough for x4 vectors */
DUMMY_ARRAY(buffer_src, int, 8, 8, 4);
@@ -151,14 +174,24 @@ void exec_vstX_lane (void)
DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
DUMMY_ARRAY(buffer_src, float, 32, 4, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+ DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
+ DUMMY_ARRAY(buffer_src, float, 16, 8, 4);
+#endif
/* Check vst2_lane/vst2q_lane */
clean_results ();
#define TEST_MSG "VST2_LANE/VST2Q_LANE"
TEST_ALL_VSTX_LANE(2);
+#if __ARM_NEON_FP16_INTRINSICS
+ TEST_ALL_VSTX_LANE_FP16(2);
+#endif
dump_results_hex2 (TEST_MSG, " chunk 0");
TEST_ALL_EXTRA_CHUNKS(2, 1);
+#if __ARM_NEON_FP16_INTRINSICS
+ TEST_ALL_EXTRA_CHUNKS_FP16(2, 1);
+#endif
dump_results_hex2 (TEST_MSG, " chunk 1");
/* Check vst3_lane/vst3q_lane */
@@ -166,11 +199,20 @@ void exec_vstX_lane (void)
#undef TEST_MSG
#define TEST_MSG "VST3_LANE/VST3Q_LANE"
TEST_ALL_VSTX_LANE(3);
+#if __ARM_NEON_FP16_INTRINSICS
+ TEST_ALL_VSTX_LANE_FP16(3);
+#endif
dump_results_hex2 (TEST_MSG, " chunk 0");
TEST_ALL_EXTRA_CHUNKS(3, 1);
+#if __ARM_NEON_FP16_INTRINSICS
+ TEST_ALL_EXTRA_CHUNKS_FP16(3, 1);
+#endif
dump_results_hex2 (TEST_MSG, " chunk 1");
TEST_ALL_EXTRA_CHUNKS(3, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+ TEST_ALL_EXTRA_CHUNKS_FP16(3, 2);
+#endif
dump_results_hex2 (TEST_MSG, " chunk 2");
/* Check vst4_lane/vst4q_lane */
@@ -178,12 +220,24 @@ void exec_vstX_lane (void)
#undef TEST_MSG
#define TEST_MSG "VST4_LANE/VST4Q_LANE"
TEST_ALL_VSTX_LANE(4);
+#if __ARM_NEON_FP16_INTRINSICS
+ TEST_ALL_VSTX_LANE_FP16(4);
+#endif
dump_results_hex2 (TEST_MSG, " chunk 0");
TEST_ALL_EXTRA_CHUNKS(4, 1);
+#if __ARM_NEON_FP16_INTRINSICS
+ TEST_ALL_EXTRA_CHUNKS_FP16(4, 1);
+#endif
dump_results_hex2 (TEST_MSG, " chunk 1");
TEST_ALL_EXTRA_CHUNKS(4, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+ TEST_ALL_EXTRA_CHUNKS_FP16(4, 2);
+#endif
dump_results_hex2 (TEST_MSG, " chunk 2");
TEST_ALL_EXTRA_CHUNKS(4, 3);
+#if __ARM_NEON_FP16_INTRINSICS
+ TEST_ALL_EXTRA_CHUNKS_FP16(4, 3);
+#endif
dump_results_hex2 (TEST_MSG, " chunk 3");
}