diff options
Diffstat (limited to 'ref_vstX_lane.c')
-rw-r--r-- | ref_vstX_lane.c | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/ref_vstX_lane.c b/ref_vstX_lane.c index d8965c9..7d14c6c 100644 --- a/ref_vstX_lane.c +++ b/ref_vstX_lane.c @@ -92,6 +92,12 @@ void exec_vstX_lane (void) DECL_VSTX_LANE(poly, 16, 8, X); \ DECL_VSTX_LANE(float, 32, 4, X) +#if __ARM_NEON_FP16_INTRINSICS +#define DECL_ALL_VSTX_LANE_FP16(X) \ + DECL_VSTX_LANE(float, 16, 4, X); \ + DECL_VSTX_LANE(float, 16, 8, X) +#endif + #define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L] /* Use the same lanes regardless of the size of the array (X), for @@ -113,6 +119,12 @@ void exec_vstX_lane (void) TEST_VSTX_LANE(q, poly, p, 16, 8, X, 5); \ TEST_VSTX_LANE(q, float, f, 32, 4, X, 2) +#if __ARM_NEON_FP16_INTRINSICS +#define TEST_ALL_VSTX_LANE_FP16(X) \ + TEST_VSTX_LANE(, float, f, 16, 4, X, 3); \ + TEST_VSTX_LANE(q, float, f, 16, 8, X, 6) +#endif + #define TEST_ALL_EXTRA_CHUNKS(X, Y) \ TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ @@ -130,10 +142,21 @@ void exec_vstX_lane (void) TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 4, X, Y) +#if __ARM_NEON_FP16_INTRINSICS +#define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \ + TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \ + TEST_EXTRA_CHUNK(float, 16, 8, X, Y) +#endif + /* Declare the temporary buffers / variables */ DECL_ALL_VSTX_LANE(2); DECL_ALL_VSTX_LANE(3); DECL_ALL_VSTX_LANE(4); +#if __ARM_NEON_FP16_INTRINSICS + DECL_ALL_VSTX_LANE_FP16(2); + DECL_ALL_VSTX_LANE_FP16(3); + DECL_ALL_VSTX_LANE_FP16(4); +#endif /* Define dummy input arrays, large enough for x4 vectors */ DUMMY_ARRAY(buffer_src, int, 8, 8, 4); @@ -151,14 +174,24 @@ void exec_vstX_lane (void) DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); DUMMY_ARRAY(buffer_src, float, 32, 4, 4); +#if __ARM_NEON_FP16_INTRINSICS + DUMMY_ARRAY(buffer_src, float, 16, 4, 4); + DUMMY_ARRAY(buffer_src, float, 16, 8, 4); +#endif /* Check vst2_lane/vst2q_lane */ clean_results (); #define TEST_MSG "VST2_LANE/VST2Q_LANE" TEST_ALL_VSTX_LANE(2); +#if __ARM_NEON_FP16_INTRINSICS + TEST_ALL_VSTX_LANE_FP16(2); +#endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(2, 1); +#if __ARM_NEON_FP16_INTRINSICS + TEST_ALL_EXTRA_CHUNKS_FP16(2, 1); +#endif dump_results_hex2 (TEST_MSG, " chunk 1"); /* Check vst3_lane/vst3q_lane */ @@ -166,11 +199,20 @@ void exec_vstX_lane (void) #undef TEST_MSG #define TEST_MSG "VST3_LANE/VST3Q_LANE" TEST_ALL_VSTX_LANE(3); +#if __ARM_NEON_FP16_INTRINSICS + TEST_ALL_VSTX_LANE_FP16(3); +#endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(3, 1); +#if __ARM_NEON_FP16_INTRINSICS + TEST_ALL_EXTRA_CHUNKS_FP16(3, 1); +#endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(3, 2); +#if __ARM_NEON_FP16_INTRINSICS + TEST_ALL_EXTRA_CHUNKS_FP16(3, 2); +#endif dump_results_hex2 (TEST_MSG, " chunk 2"); /* Check vst4_lane/vst4q_lane */ @@ -178,12 +220,24 @@ void exec_vstX_lane (void) #undef TEST_MSG #define TEST_MSG "VST4_LANE/VST4Q_LANE" TEST_ALL_VSTX_LANE(4); +#if __ARM_NEON_FP16_INTRINSICS + TEST_ALL_VSTX_LANE_FP16(4); +#endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(4, 1); +#if __ARM_NEON_FP16_INTRINSICS + TEST_ALL_EXTRA_CHUNKS_FP16(4, 1); +#endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(4, 2); +#if __ARM_NEON_FP16_INTRINSICS + TEST_ALL_EXTRA_CHUNKS_FP16(4, 2); +#endif dump_results_hex2 (TEST_MSG, " chunk 2"); TEST_ALL_EXTRA_CHUNKS(4, 3); +#if __ARM_NEON_FP16_INTRINSICS + TEST_ALL_EXTRA_CHUNKS_FP16(4, 3); +#endif dump_results_hex2 (TEST_MSG, " chunk 3"); } |