aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@st.com>2013-01-17 17:23:11 +0100
committerChristophe Lyon <christophe.lyon@st.com>2013-01-17 17:51:15 +0100
commit01af0a532c91523692d7b32ed08e0aace8123cba (patch)
tree199360c2ab91c91a7c1382c676982a09ea251ef2
parent302cedf48e55284564aae5256adfdebb1a7a6a0c (diff)
downloadplatform_external_arm-neon-tests-01af0a532c91523692d7b32ed08e0aace8123cba.tar.gz
platform_external_arm-neon-tests-01af0a532c91523692d7b32ed08e0aace8123cba.tar.bz2
platform_external_arm-neon-tests-01af0a532c91523692d7b32ed08e0aace8123cba.zip
Reorganize input data buffer to try to detect some out of bounds accesses.
Add dedicated input for vdup, vld1_dup, vldX, vldX_dup, vldX_lane, vstX_lane.
-rw-r--r--compute_ref.axfbin3246928 -> 3175368 bytes
-rw-r--r--compute_ref.c3
-rwxr-xr-xcompute_ref.gccarmbin4291760 -> 3967245 bytes
-rw-r--r--compute_ref.gccarm-rvctbin4498672 -> 4438112 bytes
-rw-r--r--compute_ref_data.c322
-rw-r--r--ref-rvct-neon.txt118
-rw-r--r--ref_vdup.c4
-rw-r--r--ref_vld1_dup.c6
-rw-r--r--ref_vldX.c3
-rw-r--r--ref_vldX_dup.c3
-rw-r--r--ref_vldX_lane.c11
-rw-r--r--ref_vstX_lane.c3
-rw-r--r--stm-arm-neon-ref.h176
13 files changed, 557 insertions, 92 deletions
diff --git a/compute_ref.axf b/compute_ref.axf
index ab1ae89..c1e139e 100644
--- a/compute_ref.axf
+++ b/compute_ref.axf
Binary files differ
diff --git a/compute_ref.c b/compute_ref.c
index 47f51cb..6d4c1e9 100644
--- a/compute_ref.c
+++ b/compute_ref.c
@@ -187,6 +187,8 @@ extern void exec_dsp(void); /* DSP (non-NEON) intrinsics */
extern void exec_dspfns(void); /* DSP FNS (non-NEON/ITU) intrinsics */
#endif
+#include "compute_ref_data.c"
+
int main (void)
{
#if defined(_MSC_VER)
@@ -339,7 +341,6 @@ int main (void)
exec_vrecpe ();
exec_vrsqrte ();
-
exec_vcage ();
exec_vcale ();
exec_vcagt ();
diff --git a/compute_ref.gccarm b/compute_ref.gccarm
index b42db61..61a18d8 100755
--- a/compute_ref.gccarm
+++ b/compute_ref.gccarm
Binary files differ
diff --git a/compute_ref.gccarm-rvct b/compute_ref.gccarm-rvct
index 1bfc143..3587a97 100644
--- a/compute_ref.gccarm-rvct
+++ b/compute_ref.gccarm-rvct
Binary files differ
diff --git a/compute_ref_data.c b/compute_ref_data.c
new file mode 100644
index 0000000..48e8025
--- /dev/null
+++ b/compute_ref_data.c
@@ -0,0 +1,322 @@
+
+#ifdef __arm__
+#include <arm_neon.h>
+#else
+#include "stm-arm-neon.h"
+#endif
+#include "stm-arm-neon-ref.h"
+
+/* Initialization helpers; 4 slices are needed for vld2, vld3 and
+ vld4. */
+#define MY_INIT_TAB(T,W,N) xNAME(INIT_TAB,N)(T##W##_t)
+#define MY_INIT_TAB2(T,W,N) xNAME(INIT_TAB2,N)(T##W##_t)
+#define MY_INIT_TAB3(T,W,N) xNAME(INIT_TAB3,N)(T##W##_t)
+#define MY_INIT_TAB4(T,W,N) xNAME(INIT_TAB4,N)(T##W##_t)
+
+/* Initialized input buffers. */
+#define VECT_VAR_DECL_INIT(V, T, W, N) \
+ VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,N) };
+
+/* Specialized initializer with 4 entries, as used by vldX_dup and
+ vdup tests, which iterated 4 times on input buffers. */
+#define VECT_VAR_DECL_INIT4(V, T, W, N) \
+ VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,4) };
+
+/* Initializers for arrays of vectors. */
+#define VECT_ARRAY_INIT2(V, T, W, N) \
+ T##W##_t VECT_ARRAY_VAR(V,T,W,N,2)[] = \
+ { MY_INIT_TAB(T,W,N) \
+ MY_INIT_TAB2(T,W,N) };
+
+#define VECT_ARRAY_INIT3(V, T, W, N) \
+ T##W##_t VECT_ARRAY_VAR(V,T,W,N,3)[] = \
+ { MY_INIT_TAB(T,W,N) \
+ MY_INIT_TAB2(T,W,N) \
+ MY_INIT_TAB3(T,W,N) };
+
+#define VECT_ARRAY_INIT4(V, T, W, N) \
+ T##W##_t VECT_ARRAY_VAR(V,T,W,N,4)[] = \
+ { MY_INIT_TAB(T,W,N) \
+ MY_INIT_TAB2(T,W,N) \
+ MY_INIT_TAB3(T,W,N) \
+ MY_INIT_TAB4(T,W,N) };
+
+/* Sample initialization vectors. */
+#define INIT_TAB_1(T) \
+ (T)-16,
+#define INIT_TAB2_1(T) \
+ (T)-15,
+#define INIT_TAB3_1(T) \
+ (T)-14,
+#define INIT_TAB4_1(T) \
+ (T)-13,
+
+#define INIT_TAB_2(T) \
+ (T)-16, (T)-15,
+#define INIT_TAB2_2(T) \
+ (T)-14, (T)-13,
+#define INIT_TAB3_2(T) \
+ (T)-12, (T)-11,
+#define INIT_TAB4_2(T) \
+ (T)-10, (T)-9,
+
+/* Initializer for vld3_lane tests. */
+#define INIT_TAB_3(T) \
+ (T)-16, (T)-15, (T)-14,
+
+#define INIT_TAB_4(T) \
+ (T)-16, (T)-15, (T)-14, (T)-13,
+#define INIT_TAB2_4(T) \
+ (T)-12, (T)-11, (T)-10, (T)-9,
+#define INIT_TAB3_4(T) \
+ (T)-8, (T)-7, (T)-6, (T)-5,
+#define INIT_TAB4_4(T) \
+ (T)-4, (T)-3, (T)-2, (T)-1,
+
+#define INIT_TAB_8(T) \
+ (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
+#define INIT_TAB2_8(T) \
+ (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
+#define INIT_TAB3_8(T) \
+ (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7,
+#define INIT_TAB4_8(T) \
+ (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
+
+#define INIT_TAB_16(T) \
+ (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9, \
+ (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
+#define INIT_TAB2_16(T) \
+ (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7, \
+ (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
+#define INIT_TAB3_16(T) \
+ (T)16, (T)17, (T)18, (T)19, (T)20, (T)21, (T)22, (T)23, \
+ (T)24, (T)25, (T)26, (T)27, (T)28, (T)29, (T)30, (T)31,
+#define INIT_TAB4_16(T) \
+ (T)32, (T)33, (T)34, (T)35, (T)36, (T)37, (T)38, (T)39, \
+ (T)40, (T)41, (T)42, (T)43, (T)44, (T)45, (T)46, (T)47,
+
+/* Input buffers, one of each size. */
+/* Insert some padding to try to exhibit out of bounds accesses. */
+VECT_VAR_DECL_INIT(buffer, int, 8, 8);
+PAD(buffer_pad, int, 8, 8);
+VECT_VAR_DECL_INIT(buffer, int, 16, 4);
+PAD(buffer_pad, int, 16, 4);
+VECT_VAR_DECL_INIT(buffer, int, 32, 2);
+PAD(buffer_pad, int, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 64, 1);
+PAD(buffer_pad, int, 64, 1);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
+PAD(buffer_pad, uint, 8, 8);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
+PAD(buffer_pad, uint, 16, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
+PAD(buffer_pad, uint, 32, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
+PAD(buffer_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT(buffer, float, 32, 2);
+PAD(buffer_pad, float, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 8, 16);
+PAD(buffer_pad, int, 8, 16);
+VECT_VAR_DECL_INIT(buffer, int, 16, 8);
+PAD(buffer_pad, int, 16, 8);
+VECT_VAR_DECL_INIT(buffer, int, 32, 4);
+PAD(buffer_pad, int, 32, 4);
+VECT_VAR_DECL_INIT(buffer, int, 64, 2);
+PAD(buffer_pad, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
+PAD(buffer_pad, uint, 8, 16);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
+PAD(buffer_pad, uint, 16, 8);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
+PAD(buffer_pad, uint, 32, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
+PAD(buffer_pad, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer, float, 32, 4);
+PAD(buffer_pad, float, 32, 4);
+
+/* The tests for vld1_dup and vdup expect at least 4 entries in the
+ input buffer, so force 1- and 2-elements initializers to have 4
+ entries. */
+VECT_VAR_DECL_INIT(buffer_dup, int, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, int, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, int, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, int, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, int, 32, 2);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 1);
+VECT_VAR_DECL(buffer_dup_pad, int, 64, 1);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, uint, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, uint, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, uint, 32, 2);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 1);
+VECT_VAR_DECL(buffer_dup_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, float, 32, 2);
+VECT_VAR_DECL_INIT(buffer_dup, int, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, int, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, int, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, int, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, int, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, int, 32, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 2);
+VECT_VAR_DECL(buffer_dup_pad, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, uint, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, uint, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, uint, 32, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 2);
+VECT_VAR_DECL(buffer_dup_pad, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, float, 32, 4);
+
+/* Input buffers for vld2, 1 of each size */
+VECT_ARRAY_INIT2(buffer_vld2, int, 8, 8);
+PAD(buffer_vld2_pad, int, 8, 8);
+VECT_ARRAY_INIT2(buffer_vld2, int, 16, 4);
+PAD(buffer_vld2_pad, int, 16, 4);
+VECT_ARRAY_INIT2(buffer_vld2, int, 32, 2);
+PAD(buffer_vld2_pad, int, 32, 2);
+VECT_ARRAY_INIT2(buffer_vld2, int, 64, 1);
+PAD(buffer_vld2_pad, int, 64, 1);
+VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 8);
+PAD(buffer_vld2_pad, uint, 8, 8);
+VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 4);
+PAD(buffer_vld2_pad, uint, 16, 4);
+VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 2);
+PAD(buffer_vld2_pad, uint, 32, 2);
+VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 1);
+PAD(buffer_vld2_pad, uint, 64, 1);
+VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2);
+PAD(buffer_vld2_pad, float, 32, 2);
+VECT_ARRAY_INIT2(buffer_vld2, int, 8, 16);
+PAD(buffer_vld2_pad, int, 8, 16);
+VECT_ARRAY_INIT2(buffer_vld2, int, 16, 8);
+PAD(buffer_vld2_pad, int, 16, 8);
+VECT_ARRAY_INIT2(buffer_vld2, int, 32, 4);
+PAD(buffer_vld2_pad, int, 32, 4);
+VECT_ARRAY_INIT2(buffer_vld2, int, 64, 2);
+PAD(buffer_vld2_pad, int, 64, 2);
+VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 16);
+PAD(buffer_vld2_pad, uint, 8, 16);
+VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 8);
+PAD(buffer_vld2_pad, uint, 16, 8);
+VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 4);
+PAD(buffer_vld2_pad, uint, 32, 4);
+VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 2);
+PAD(buffer_vld2_pad, uint, 64, 2);
+VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4);
+PAD(buffer_vld2_pad, float, 32, 4);
+
+/* Input buffers for vld3, 1 of each size */
+VECT_ARRAY_INIT3(buffer_vld3, int, 8, 8);
+PAD(buffer_vld3_pad, int, 8, 8);
+VECT_ARRAY_INIT3(buffer_vld3, int, 16, 4);
+PAD(buffer_vld3_pad, int, 16, 4);
+VECT_ARRAY_INIT3(buffer_vld3, int, 32, 2);
+PAD(buffer_vld3_pad, int, 32, 2);
+VECT_ARRAY_INIT3(buffer_vld3, int, 64, 1);
+PAD(buffer_vld3_pad, int, 64, 1);
+VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 8);
+PAD(buffer_vld3_pad, uint, 8, 8);
+VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 4);
+PAD(buffer_vld3_pad, uint, 16, 4);
+VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 2);
+PAD(buffer_vld3_pad, uint, 32, 2);
+VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 1);
+PAD(buffer_vld3_pad, uint, 64, 1);
+VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2);
+PAD(buffer_vld3_pad, float, 32, 2);
+VECT_ARRAY_INIT3(buffer_vld3, int, 8, 16);
+PAD(buffer_vld3_pad, int, 8, 16);
+VECT_ARRAY_INIT3(buffer_vld3, int, 16, 8);
+PAD(buffer_vld3_pad, int, 16, 8);
+VECT_ARRAY_INIT3(buffer_vld3, int, 32, 4);
+PAD(buffer_vld3_pad, int, 32, 4);
+VECT_ARRAY_INIT3(buffer_vld3, int, 64, 2);
+PAD(buffer_vld3_pad, int, 64, 2);
+VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 16);
+PAD(buffer_vld3_pad, uint, 8, 16);
+VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 8);
+PAD(buffer_vld3_pad, uint, 16, 8);
+VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 4);
+PAD(buffer_vld3_pad, uint, 32, 4);
+VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 2);
+PAD(buffer_vld3_pad, uint, 64, 2);
+VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4);
+PAD(buffer_vld3_pad, float, 32, 4);
+
+/* Input buffers for vld4, 1 of each size */
+VECT_ARRAY_INIT4(buffer_vld4, int, 8, 8);
+PAD(buffer_vld4_pad, int, 8, 8);
+VECT_ARRAY_INIT4(buffer_vld4, int, 16, 4);
+PAD(buffer_vld4_pad, int, 16, 4);
+VECT_ARRAY_INIT4(buffer_vld4, int, 32, 2);
+PAD(buffer_vld4_pad, int, 32, 2);
+VECT_ARRAY_INIT4(buffer_vld4, int, 64, 1);
+PAD(buffer_vld4_pad, int, 64, 1);
+VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 8);
+PAD(buffer_vld4_pad, uint, 8, 8);
+VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 4);
+PAD(buffer_vld4_pad, uint, 16, 4);
+VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 2);
+PAD(buffer_vld4_pad, uint, 32, 2);
+VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 1);
+PAD(buffer_vld4_pad, uint, 64, 1);
+VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2);
+PAD(buffer_vld4_pad, float, 32, 2);
+VECT_ARRAY_INIT4(buffer_vld4, int, 8, 16);
+PAD(buffer_vld4_pad, int, 8, 16);
+VECT_ARRAY_INIT4(buffer_vld4, int, 16, 8);
+PAD(buffer_vld4_pad, int, 16, 8);
+VECT_ARRAY_INIT4(buffer_vld4, int, 32, 4);
+PAD(buffer_vld4_pad, int, 32, 4);
+VECT_ARRAY_INIT4(buffer_vld4, int, 64, 2);
+PAD(buffer_vld4_pad, int, 64, 2);
+VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 16);
+PAD(buffer_vld4_pad, uint, 8, 16);
+VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 8);
+PAD(buffer_vld4_pad, uint, 16, 8);
+VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 4);
+PAD(buffer_vld4_pad, uint, 32, 4);
+VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 2);
+PAD(buffer_vld4_pad, uint, 64, 2);
+VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4);
+PAD(buffer_vld4_pad, float, 32, 4);
+
+/* Input buffers for vld2_lane */
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 8, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 16, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 32, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 8, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 16, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2);
+
+/* Input buffers for vld3_lane */
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 8, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 16, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 32, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 8, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 16, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3);
+
+/* Input buffers for vld4_lane */
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 8, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 16, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 32, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 64, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 8, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 16, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4);
diff --git a/ref-rvct-neon.txt b/ref-rvct-neon.txt
index 010e138..8ef013c 100644
--- a/ref-rvct-neon.txt
+++ b/ref-rvct-neon.txt
@@ -2014,11 +2014,11 @@ VLD3/VLD3Q:41:result_uint16x4 [] = { fff8, fff9, fffa, fffb, }
VLD3/VLD3Q:42:result_uint32x2 [] = { fffffff4, fffffff5, }
VLD3/VLD3Q:43:result_uint64x1 [] = { fffffffffffffff2, }
VLD3/VLD3Q:44:result_float32x2 [] = { c1400000 -0x1.8000000p+3 -12, c1300000 -0x1.6000000p+3 -11, }
-VLD3/VLD3Q:45:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, }
+VLD3/VLD3Q:45:result_int8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, }
VLD3/VLD3Q:46:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, }
VLD3/VLD3Q:47:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, }
VLD3/VLD3Q:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, }
-VLD3/VLD3Q:49:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, }
+VLD3/VLD3Q:49:result_uint8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, }
VLD3/VLD3Q:50:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, }
VLD3/VLD3Q:51:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, }
VLD3/VLD3Q:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
@@ -2074,11 +2074,11 @@ VLD4/VLD4Q:41:result_uint16x4 [] = { fff8, fff9, fffa, fffb, }
VLD4/VLD4Q:42:result_uint32x2 [] = { fffffff4, fffffff5, }
VLD4/VLD4Q:43:result_uint64x1 [] = { fffffffffffffff2, }
VLD4/VLD4Q:44:result_float32x2 [] = { c1400000 -0x1.8000000p+3 -12, c1300000 -0x1.6000000p+3 -11, }
-VLD4/VLD4Q:45:result_int8x16 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, fffffff4, ffffffff, fffffff5, ffffffff, fffffff6, ffffffff, fffffff7, ffffffff, }
+VLD4/VLD4Q:45:result_int8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, }
VLD4/VLD4Q:46:result_int16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, }
VLD4/VLD4Q:47:result_int32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, }
VLD4/VLD4Q:48:result_int64x2 [] = { 3333333333333333, 3333333333333333, }
-VLD4/VLD4Q:49:result_uint8x16 [] = { f0, ff, f1, ff, f2, ff, f3, ff, f4, ff, f5, ff, f6, ff, f7, ff, }
+VLD4/VLD4Q:49:result_uint8x16 [] = { 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1a, 1b, 1c, 1d, 1e, 1f, }
VLD4/VLD4Q:50:result_uint16x8 [] = { 0, 1, 2, 3, 4, 5, 6, 7, }
VLD4/VLD4Q:51:result_uint32x4 [] = { fffffff8, fffffff9, fffffffa, fffffffb, }
VLD4/VLD4Q:52:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
@@ -2094,11 +2094,11 @@ VLD4/VLD4Q:59:result_uint16x4 [] = { fffc, fffd, fffe, ffff, }
VLD4/VLD4Q:60:result_uint32x2 [] = { fffffff6, fffffff7, }
VLD4/VLD4Q:61:result_uint64x1 [] = { fffffffffffffff3, }
VLD4/VLD4Q:62:result_float32x2 [] = { c1200000 -0x1.4000000p+3 -10, c1100000 -0x1.2000000p+3 -9, }
-VLD4/VLD4Q:63:result_int8x16 [] = { fffffff8, ffffffff, fffffff9, ffffffff, fffffffa, ffffffff, fffffffb, ffffffff, fffffffc, ffffffff, fffffffd, ffffffff, fffffffe, ffffffff, ffffffff, ffffffff, }
+VLD4/VLD4Q:63:result_int8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, }
VLD4/VLD4Q:64:result_int16x8 [] = { 8, 9, a, b, c, d, e, f, }
VLD4/VLD4Q:65:result_int32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, }
VLD4/VLD4Q:66:result_int64x2 [] = { 3333333333333333, 3333333333333333, }
-VLD4/VLD4Q:67:result_uint8x16 [] = { f8, ff, f9, ff, fa, ff, fb, ff, fc, ff, fd, ff, fe, ff, ff, ff, }
+VLD4/VLD4Q:67:result_uint8x16 [] = { 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 2a, 2b, 2c, 2d, 2e, 2f, }
VLD4/VLD4Q:68:result_uint16x8 [] = { 8, 9, a, b, c, d, e, f, }
VLD4/VLD4Q:69:result_uint32x4 [] = { fffffffc, fffffffd, fffffffe, ffffffff, }
VLD4/VLD4Q:70:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
@@ -2831,24 +2831,24 @@ VTRN/VTRNQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
VTRN/VTRNQ:17:result_float32x4 [] = { c1800000 -0x1.0000000p+4 -16, c1700000 -0x1.e000000p+3 -15, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, }
VTRN/VTRNQ chunk 1 output:
-VTRN/VTRNQ:18:result_int8x8 [] = { fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, }
-VTRN/VTRNQ:19:result_int16x4 [] = { fffffff1, 22, 22, fffffff2, }
-VTRN/VTRNQ:20:result_int32x2 [] = { fffffff1, 33, }
+VTRN/VTRNQ:18:result_int8x8 [] = { fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, }
+VTRN/VTRNQ:19:result_int16x4 [] = { fffffff2, fffffff3, 22, 22, }
+VTRN/VTRNQ:20:result_int32x2 [] = { 33, 33, }
VTRN/VTRNQ:21:result_int64x1 [] = { 3333333333333333, }
-VTRN/VTRNQ:22:result_uint8x8 [] = { f1, 55, 55, f2, f3, 55, 55, f4, }
-VTRN/VTRNQ:23:result_uint16x4 [] = { fff1, 66, 66, fff2, }
-VTRN/VTRNQ:24:result_uint32x2 [] = { fffffff1, 77, }
+VTRN/VTRNQ:22:result_uint8x8 [] = { f4, f5, 55, 55, f6, f7, 55, 55, }
+VTRN/VTRNQ:23:result_uint16x4 [] = { fff2, fff3, 66, 66, }
+VTRN/VTRNQ:24:result_uint32x2 [] = { 77, 77, }
VTRN/VTRNQ:25:result_uint64x1 [] = { 3333333333333333, }
-VTRN/VTRNQ:26:result_float32x2 [] = { c1700000 -0x1.e000000p+3 -15, 42066666 0x1.0ccccc0p+5 33.6, }
-VTRN/VTRNQ:27:result_int8x16 [] = { fffffff1, 11, 11, fffffff2, fffffff3, 11, 11, fffffff4, fffffff5, 11, 11, fffffff6, fffffff7, 11, 11, fffffff8, }
-VTRN/VTRNQ:28:result_int16x8 [] = { fffffff1, 22, 22, fffffff2, fffffff3, 22, 22, fffffff4, }
-VTRN/VTRNQ:29:result_int32x4 [] = { fffffff1, 33, 33, fffffff2, }
+VTRN/VTRNQ:26:result_float32x2 [] = { 42066666 0x1.0ccccc0p+5 33.6, 42066666 0x1.0ccccc0p+5 33.6, }
+VTRN/VTRNQ:27:result_int8x16 [] = { fffffff8, fffffff9, 11, 11, fffffffa, fffffffb, 11, 11, fffffffc, fffffffd, 11, 11, fffffffe, ffffffff, 11, 11, }
+VTRN/VTRNQ:28:result_int16x8 [] = { fffffff4, fffffff5, 22, 22, fffffff6, fffffff7, 22, 22, }
+VTRN/VTRNQ:29:result_int32x4 [] = { fffffff2, fffffff3, 33, 33, }
VTRN/VTRNQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, }
-VTRN/VTRNQ:31:result_uint8x16 [] = { f1, 55, 55, f2, f3, 55, 55, f4, f5, 55, 55, f6, f7, 55, 55, f8, }
-VTRN/VTRNQ:32:result_uint16x8 [] = { fff1, 66, 66, fff2, fff3, 66, 66, fff4, }
-VTRN/VTRNQ:33:result_uint32x4 [] = { fffffff1, 77, 77, fffffff2, }
+VTRN/VTRNQ:31:result_uint8x16 [] = { f8, f9, 55, 55, fa, fb, 55, 55, fc, fd, 55, 55, fe, ff, 55, 55, }
+VTRN/VTRNQ:32:result_uint16x8 [] = { fff4, fff5, 66, 66, fff6, fff7, 66, 66, }
+VTRN/VTRNQ:33:result_uint32x4 [] = { fffffff2, fffffff3, 77, 77, }
VTRN/VTRNQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
-VTRN/VTRNQ:35:result_float32x4 [] = { c1700000 -0x1.e000000p+3 -15, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, c1600000 -0x1.c000000p+3 -14, }
+VTRN/VTRNQ:35:result_float32x4 [] = { c1600000 -0x1.c000000p+3 -14, c1500000 -0x1.a000000p+3 -13, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, }
VUZP/VUZPQ chunk 0 output:
VUZP/VUZPQ:0:result_int8x8 [] = { fffffff0, fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, }
@@ -2871,24 +2871,24 @@ VUZP/VUZPQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
VUZP/VUZPQ:17:result_float32x4 [] = { c1800000 -0x1.0000000p+4 -16, c1700000 -0x1.e000000p+3 -15, c1600000 -0x1.c000000p+3 -14, c1500000 -0x1.a000000p+3 -13, }
VUZP/VUZPQ chunk 1 output:
-VUZP/VUZPQ:18:result_int8x8 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 11, }
-VUZP/VUZPQ:19:result_int16x4 [] = { fffffff1, fffffff2, fffffff3, 22, }
-VUZP/VUZPQ:20:result_int32x2 [] = { fffffff1, 33, }
+VUZP/VUZPQ:18:result_int8x8 [] = { 11, 11, 11, 11, 11, 11, 11, 11, }
+VUZP/VUZPQ:19:result_int16x4 [] = { 22, 22, 22, 22, }
+VUZP/VUZPQ:20:result_int32x2 [] = { 33, 33, }
VUZP/VUZPQ:21:result_int64x1 [] = { 3333333333333333, }
-VUZP/VUZPQ:22:result_uint8x8 [] = { f1, f2, f3, f4, f5, f6, f7, 55, }
-VUZP/VUZPQ:23:result_uint16x4 [] = { fff1, fff2, fff3, 66, }
-VUZP/VUZPQ:24:result_uint32x2 [] = { fffffff1, 77, }
+VUZP/VUZPQ:22:result_uint8x8 [] = { 55, 55, 55, 55, 55, 55, 55, 55, }
+VUZP/VUZPQ:23:result_uint16x4 [] = { 66, 66, 66, 66, }
+VUZP/VUZPQ:24:result_uint32x2 [] = { 77, 77, }
VUZP/VUZPQ:25:result_uint64x1 [] = { 3333333333333333, }
-VUZP/VUZPQ:26:result_float32x2 [] = { c1700000 -0x1.e000000p+3 -15, 42066666 0x1.0ccccc0p+5 33.6, }
-VUZP/VUZPQ:27:result_int8x16 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, fffffff8, fffffff9, fffffffa, fffffffb, fffffffc, fffffffd, fffffffe, ffffffff, 11, }
-VUZP/VUZPQ:28:result_int16x8 [] = { fffffff1, fffffff2, fffffff3, fffffff4, fffffff5, fffffff6, fffffff7, 22, }
-VUZP/VUZPQ:29:result_int32x4 [] = { fffffff1, fffffff2, fffffff3, 33, }
+VUZP/VUZPQ:26:result_float32x2 [] = { 42066666 0x1.0ccccc0p+5 33.6, 42066666 0x1.0ccccc0p+5 33.6, }
+VUZP/VUZPQ:27:result_int8x16 [] = { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, }
+VUZP/VUZPQ:28:result_int16x8 [] = { 22, 22, 22, 22, 22, 22, 22, 22, }
+VUZP/VUZPQ:29:result_int32x4 [] = { 33, 33, 33, 33, }
VUZP/VUZPQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, }
-VUZP/VUZPQ:31:result_uint8x16 [] = { f1, f2, f3, f4, f5, f6, f7, f8, f9, fa, fb, fc, fd, fe, ff, 55, }
-VUZP/VUZPQ:32:result_uint16x8 [] = { fff1, fff2, fff3, fff4, fff5, fff6, fff7, 66, }
-VUZP/VUZPQ:33:result_uint32x4 [] = { fffffff1, fffffff2, fffffff3, 77, }
+VUZP/VUZPQ:31:result_uint8x16 [] = { 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, }
+VUZP/VUZPQ:32:result_uint16x8 [] = { 66, 66, 66, 66, 66, 66, 66, 66, }
+VUZP/VUZPQ:33:result_uint32x4 [] = { 77, 77, 77, 77, }
VUZP/VUZPQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
-VUZP/VUZPQ:35:result_float32x4 [] = { c1700000 -0x1.e000000p+3 -15, c1600000 -0x1.c000000p+3 -14, c1500000 -0x1.a000000p+3 -13, 42073333 0x1.0e66660p+5 33.8, }
+VUZP/VUZPQ:35:result_float32x4 [] = { 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, }
VZIP/VZIPQ chunk 0 output:
VZIP/VZIPQ:0:result_int8x8 [] = { fffffff0, fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, }
@@ -2911,24 +2911,24 @@ VZIP/VZIPQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
VZIP/VZIPQ:17:result_float32x4 [] = { c1800000 -0x1.0000000p+4 -16, c1600000 -0x1.c000000p+3 -14, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, }
VZIP/VZIPQ chunk 1 output:
-VZIP/VZIPQ:18:result_int8x8 [] = { fffffff4, 11, 11, fffffff1, fffffff5, 11, 11, fffffff2, }
-VZIP/VZIPQ:19:result_int16x4 [] = { fffffff2, 22, 22, fffffff1, }
-VZIP/VZIPQ:20:result_int32x2 [] = { fffffff1, 33, }
+VZIP/VZIPQ:18:result_int8x8 [] = { fffffff2, fffffff6, 11, 11, fffffff3, fffffff7, 11, 11, }
+VZIP/VZIPQ:19:result_int16x4 [] = { fffffff1, fffffff3, 22, 22, }
+VZIP/VZIPQ:20:result_int32x2 [] = { 33, 33, }
VZIP/VZIPQ:21:result_int64x1 [] = { 3333333333333333, }
-VZIP/VZIPQ:22:result_uint8x8 [] = { f4, 55, 55, f1, f5, 55, 55, f2, }
-VZIP/VZIPQ:23:result_uint16x4 [] = { fff2, 66, 66, fff1, }
-VZIP/VZIPQ:24:result_uint32x2 [] = { fffffff1, 77, }
+VZIP/VZIPQ:22:result_uint8x8 [] = { f2, f6, 55, 55, f3, f7, 55, 55, }
+VZIP/VZIPQ:23:result_uint16x4 [] = { fff1, fff3, 66, 66, }
+VZIP/VZIPQ:24:result_uint32x2 [] = { 77, 77, }
VZIP/VZIPQ:25:result_uint64x1 [] = { 3333333333333333, }
-VZIP/VZIPQ:26:result_float32x2 [] = { c1700000 -0x1.e000000p+3 -15, 42066666 0x1.0ccccc0p+5 33.6, }
-VZIP/VZIPQ:27:result_int8x16 [] = { fffffff8, 11, 11, fffffff1, fffffff9, 11, 11, fffffff2, fffffffa, 11, 11, fffffff3, fffffffb, 11, 11, fffffff4, }
-VZIP/VZIPQ:28:result_int16x8 [] = { fffffff4, 22, 22, fffffff1, fffffff5, 22, 22, fffffff2, }
-VZIP/VZIPQ:29:result_int32x4 [] = { fffffff2, 33, 33, fffffff1, }
+VZIP/VZIPQ:26:result_float32x2 [] = { 42066666 0x1.0ccccc0p+5 33.6, 42066666 0x1.0ccccc0p+5 33.6, }
+VZIP/VZIPQ:27:result_int8x16 [] = { fffffff4, fffffffc, 11, 11, fffffff5, fffffffd, 11, 11, fffffff6, fffffffe, 11, 11, fffffff7, ffffffff, 11, 11, }
+VZIP/VZIPQ:28:result_int16x8 [] = { fffffff2, fffffff6, 22, 22, fffffff3, fffffff7, 22, 22, }
+VZIP/VZIPQ:29:result_int32x4 [] = { fffffff1, fffffff3, 33, 33, }
VZIP/VZIPQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, }
-VZIP/VZIPQ:31:result_uint8x16 [] = { f8, 55, 55, f1, f9, 55, 55, f2, fa, 55, 55, f3, fb, 55, 55, f4, }
-VZIP/VZIPQ:32:result_uint16x8 [] = { fff4, 66, 66, fff1, fff5, 66, 66, fff2, }
-VZIP/VZIPQ:33:result_uint32x4 [] = { fffffff2, 77, 77, fffffff1, }
+VZIP/VZIPQ:31:result_uint8x16 [] = { f4, fc, 55, 55, f5, fd, 55, 55, f6, fe, 55, 55, f7, ff, 55, 55, }
+VZIP/VZIPQ:32:result_uint16x8 [] = { fff2, fff6, 66, 66, fff3, fff7, 66, 66, }
+VZIP/VZIPQ:33:result_uint32x4 [] = { fffffff1, fffffff3, 77, 77, }
VZIP/VZIPQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
-VZIP/VZIPQ:35:result_float32x4 [] = { c1600000 -0x1.c000000p+3 -14, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, c1700000 -0x1.e000000p+3 -15, }
+VZIP/VZIPQ:35:result_float32x4 [] = { c1700000 -0x1.e000000p+3 -15, c1500000 -0x1.a000000p+3 -13, 42073333 0x1.0e66660p+5 33.8, 42073333 0x1.0e66660p+5 33.8, }
VREINTERPRET/VREINTERPRETQ output:
VREINTERPRET/VREINTERPRETQ:0:result_int8x8 [] = { fffffff0, ffffffff, fffffff1, ffffffff, fffffff2, ffffffff, fffffff3, ffffffff, }
@@ -5390,6 +5390,26 @@ VCLZ/VCLZQ:15:result_uint32x4 [] = { 1f, 1f, 1f, 1f, }
VCLZ/VCLZQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
VCLZ/VCLZQ:17:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, }
+VCLZ/VCLZQ (input=0) output:
+VCLZ/VCLZQ:18:result_int8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, }
+VCLZ/VCLZQ:19:result_int16x4 [] = { 10, 10, 10, 10, }
+VCLZ/VCLZQ:20:result_int32x2 [] = { 20, 20, }
+VCLZ/VCLZQ:21:result_int64x1 [] = { 3333333333333333, }
+VCLZ/VCLZQ:22:result_uint8x8 [] = { 8, 8, 8, 8, 8, 8, 8, 8, }
+VCLZ/VCLZQ:23:result_uint16x4 [] = { 10, 10, 10, 10, }
+VCLZ/VCLZQ:24:result_uint32x2 [] = { 20, 20, }
+VCLZ/VCLZQ:25:result_uint64x1 [] = { 3333333333333333, }
+VCLZ/VCLZQ:26:result_float32x2 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, }
+VCLZ/VCLZQ:27:result_int8x16 [] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, }
+VCLZ/VCLZQ:28:result_int16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, }
+VCLZ/VCLZQ:29:result_int32x4 [] = { 20, 20, 20, 20, }
+VCLZ/VCLZQ:30:result_int64x2 [] = { 3333333333333333, 3333333333333333, }
+VCLZ/VCLZQ:31:result_uint8x16 [] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, }
+VCLZ/VCLZQ:32:result_uint16x8 [] = { 10, 10, 10, 10, 10, 10, 10, 10, }
+VCLZ/VCLZQ:33:result_uint32x4 [] = { 20, 20, 20, 20, }
+VCLZ/VCLZQ:34:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
+VCLZ/VCLZQ:35:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, }
+
VCLS/VCLSQ (positive input) output:
VCLS/VCLSQ:0:result_int8x8 [] = { 6, 6, 6, 6, 6, 6, 6, 6, }
VCLS/VCLSQ:1:result_int16x4 [] = { 2, 2, 2, 2, }
@@ -5410,7 +5430,7 @@ VCLS/VCLSQ:15:result_uint32x4 [] = { 33333333, 33333333, 33333333, 33333333, }
VCLS/VCLSQ:16:result_uint64x2 [] = { 3333333333333333, 3333333333333333, }
VCLS/VCLSQ:17:result_float32x4 [] = { 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, 33333333 0x1.6666660p-25 4.172325e-08, }
-VCLS/VCLSQ (positive input) output:
+VCLS/VCLSQ (negative input) output:
VCLS/VCLSQ:18:result_int8x8 [] = { 7, 7, 7, 7, 7, 7, 7, 7, }
VCLS/VCLSQ:19:result_int16x4 [] = { 1, 1, 1, 1, }
VCLS/VCLSQ:20:result_int32x2 [] = { 1, 1, }
diff --git a/ref_vdup.c b/ref_vdup.c
index 63eda9f..01d580a 100644
--- a/ref_vdup.c
+++ b/ref_vdup.c
@@ -40,13 +40,13 @@ void exec_vdup (void)
#undef TEST_VDUP
#define TEST_VDUP(Q, T1, T2, W, N) \
VECT_VAR(vector, T1, W, N) = \
- vdup##Q##_n_##T2##W(VECT_VAR(buffer, T1, W, N)[i]); \
+ vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
/* Basic test: vec=vmov(x), then store the result. */
#define TEST_VMOV(Q, T1, T2, W, N) \
VECT_VAR(vector, T1, W, N) = \
- vmov##Q##_n_##T2##W(VECT_VAR(buffer, T1, W, N)[i]); \
+ vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector, T1, W, N))
/* With ARM RVCT, we need to declare variables before any executable
diff --git a/ref_vld1_dup.c b/ref_vld1_dup.c
index 2115f96..5134da1 100644
--- a/ref_vld1_dup.c
+++ b/ref_vld1_dup.c
@@ -50,10 +50,10 @@ void exec_vld1_dup (void)
for (i=0; i<3; i++) {
clean_results ();
- TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer);
+ TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer_dup);
- TEST_VLD1_DUP(vector, buffer, , float, f, 32, 2);
- TEST_VLD1_DUP(vector, buffer, q, float, f, 32, 4);
+ TEST_VLD1_DUP(vector, buffer_dup, , float, f, 32, 2);
+ TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 32, 4);
dump_results_hex (TEST_MSG);
}
diff --git a/ref_vldX.c b/ref_vldX.c
index 09830c5..ef7209e 100644
--- a/ref_vldX.c
+++ b/ref_vldX.c
@@ -46,7 +46,8 @@ void exec_vldX (void)
result. */
#define TEST_VLDX(Q, T1, T2, W, N, X) \
VECT_ARRAY_VAR(vector, T1, W, N, X) = \
- vld##X##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \
+ /* Use dedicated init buffer, of size X */ \
+ vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X)); \
vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
VECT_ARRAY_VAR(vector, T1, W, N, X)); \
memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
diff --git a/ref_vldX_dup.c b/ref_vldX_dup.c
index 5f9b1eb..1b8a3e8 100644
--- a/ref_vldX_dup.c
+++ b/ref_vldX_dup.c
@@ -47,7 +47,8 @@ void exec_vldX_dup (void)
/* Fill vector with buffer item #i */
#define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \
VECT_ARRAY_VAR(vector, T1, W, N, X) = \
- vld##X##Q##_dup_##T2##W(VECT_VAR(buffer, T1, W, N)); \
+ /* Use dedicated init buffer, of size X */ \
+ vld##X##Q##_dup_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X)); \
\
vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
VECT_ARRAY_VAR(vector, T1, W, N, X)); \
diff --git a/ref_vldX_lane.c b/ref_vldX_lane.c
index 0b86971..7ad3d31 100644
--- a/ref_vldX_lane.c
+++ b/ref_vldX_lane.c
@@ -53,7 +53,8 @@ void exec_vldX_lane (void)
vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
\
VECT_ARRAY_VAR(vector, T1, W, N, X) = \
- vld##X##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \
+ /* Use dedicated init buffer, of size X */ \
+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
L); \
vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
@@ -85,7 +86,13 @@ void exec_vldX_lane (void)
DECL_VLDX_LANE(float, 32, 2, X); \
DECL_VLDX_LANE(float, 32, 4, X)
-#define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L]
+ /* Add some padding to try to catch out of bound accesses. */
+ /* Use an array instead of a plain char to comply with rvct
+ constraints. */
+#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
+#define DUMMY_ARRAY(V, T, W, N, L) \
+ VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
+ ARRAY1(V##_pad,T,W,N)
/* Use the same lanes regardless of the size of the array (X), for
simplicity */
diff --git a/ref_vstX_lane.c b/ref_vstX_lane.c
index e0c3ce7..2d15d34 100644
--- a/ref_vstX_lane.c
+++ b/ref_vstX_lane.c
@@ -55,7 +55,8 @@ void exec_vstX_lane (void)
vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
\
VECT_ARRAY_VAR(vector, T1, W, N, X) = \
- vld##X##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \
+ /* Use dedicated init buffer, of size X */ \
+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
L); \
vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h
index 2d38742..dc53ef3 100644
--- a/stm-arm-neon-ref.h
+++ b/stm-arm-neon-ref.h
@@ -62,10 +62,17 @@ static int32_t _ptrInf[]={0x7f800000L};
#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
-#define VECT_VAR_DECL_INIT(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N) INIT_TAB(T##W##_t)
+
+/* This one is used for padding between input buffers. */
+#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42;
+
+/* Array declarations. */
#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
+#define ARRAY4(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[4]
+/* Arrays of vectors. */
#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
+#define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
static int result_idx = 0;
#define DUMP(MSG,T,W,N,FMT) \
@@ -123,37 +130,142 @@ static int result_idx = 0;
extern FILE* log_file;
extern FILE* ref_file;
-/* Sample initialization vectors. For simplicity, use the same one for
- each vector size (it's not a problem if it's too large), and have
- it large enough for the vld4 input samples. */
-#define INIT_TAB(T) [] = { \
- (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9, (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1, \
- (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7, (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15, \
- (T)16, (T)17,(T)18,(T)19,(T)20, (T)21, (T)22, (T)23, (T)24, (T)25, (T)26, (T)27, (T)28, (T)29, (T)30, (T)31, \
- (T)32, (T)33,(T)34,(T)35,(T)36, (T)37, (T)38, (T)39, (T)40, (T)41, (T)42, (T)43, (T)44, (T)45, (T)46, (T)47, \
- }
-
-/* Input buffers, 1 of each size */
-static VECT_VAR_DECL_INIT(buffer, int, 8, 8);
-static VECT_VAR_DECL_INIT(buffer, int, 16, 4);
-static VECT_VAR_DECL_INIT(buffer, int, 32, 2);
-static VECT_VAR_DECL_INIT(buffer, int, 64, 1);
-static VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
-static VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
-static VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
-static VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
-static VECT_VAR_DECL_INIT(buffer, float, 32, 2);
-static VECT_VAR_DECL_INIT(buffer, int, 8, 16);
-static VECT_VAR_DECL_INIT(buffer, int, 16, 8);
-static VECT_VAR_DECL_INIT(buffer, int, 32, 4);
-static VECT_VAR_DECL_INIT(buffer, int, 64, 2);
-static VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
-static VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
-static VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
-static VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
-static VECT_VAR_DECL_INIT(buffer, float, 32, 4);
-
-/* Output buffers, 1 of each size */
+/* Input buffers, one of each size */
+extern ARRAY(buffer, int, 8, 8);
+extern ARRAY(buffer, int, 16, 4);
+extern ARRAY(buffer, int, 32, 2);
+extern ARRAY(buffer, int, 64, 1);
+extern ARRAY(buffer, uint, 8, 8);
+extern ARRAY(buffer, uint, 16, 4);
+extern ARRAY(buffer, uint, 32, 2);
+extern ARRAY(buffer, uint, 64, 1);
+extern ARRAY(buffer, float, 32, 2);
+extern ARRAY(buffer, int, 8, 16);
+extern ARRAY(buffer, int, 16, 8);
+extern ARRAY(buffer, int, 32, 4);
+extern ARRAY(buffer, int, 64, 2);
+extern ARRAY(buffer, uint, 8, 16);
+extern ARRAY(buffer, uint, 16, 8);
+extern ARRAY(buffer, uint, 32, 4);
+extern ARRAY(buffer, uint, 64, 2);
+extern ARRAY(buffer, float, 32, 4);
+
+/* The tests for vld1_dup and vdup expect at least 4 entries in the
+ input buffer, so force 1- and 2-elements initializers to have 4
+ entries. */
+extern ARRAY(buffer_dup, int, 8, 8);
+extern ARRAY(buffer_dup, int, 16, 4);
+extern ARRAY4(buffer_dup, int, 32, 2);
+extern ARRAY4(buffer_dup, int, 64, 1);
+extern ARRAY(buffer_dup, uint, 8, 8);
+extern ARRAY(buffer_dup, uint, 16, 4);
+extern ARRAY4(buffer_dup, uint, 32, 2);
+extern ARRAY4(buffer_dup, uint, 64, 1);
+extern ARRAY4(buffer_dup, float, 32, 2);
+extern ARRAY(buffer_dup, int, 8, 16);
+extern ARRAY(buffer_dup, int, 16, 8);
+extern ARRAY(buffer_dup, int, 32, 4);
+extern ARRAY4(buffer_dup, int, 64, 2);
+extern ARRAY(buffer_dup, uint, 8, 16);
+extern ARRAY(buffer_dup, uint, 16, 8);
+extern ARRAY(buffer_dup, uint, 32, 4);
+extern ARRAY4(buffer_dup, uint, 64, 2);
+extern ARRAY(buffer_dup, float, 32, 4);
+
+/* Input buffers for vld2, one of each size */
+extern VECT_ARRAY(buffer_vld2, int, 8, 8, 2);
+extern VECT_ARRAY(buffer_vld2, int, 16, 4, 2);
+extern VECT_ARRAY(buffer_vld2, int, 32, 2, 2);
+extern VECT_ARRAY(buffer_vld2, int, 64, 1, 2);
+extern VECT_ARRAY(buffer_vld2, uint, 8, 8, 2);
+extern VECT_ARRAY(buffer_vld2, uint, 16, 4, 2);
+extern VECT_ARRAY(buffer_vld2, uint, 32, 2, 2);
+extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2);
+extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2);
+extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2);
+extern VECT_ARRAY(buffer_vld2, int, 16, 8, 2);
+extern VECT_ARRAY(buffer_vld2, int, 32, 4, 2);
+extern VECT_ARRAY(buffer_vld2, int, 64, 2, 2);
+extern VECT_ARRAY(buffer_vld2, uint, 8, 16, 2);
+extern VECT_ARRAY(buffer_vld2, uint, 16, 8, 2);
+extern VECT_ARRAY(buffer_vld2, uint, 32, 4, 2);
+extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2);
+extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2);
+
+/* Input buffers for vld3, one of each size */
+extern VECT_ARRAY(buffer_vld3, int, 8, 8, 3);
+extern VECT_ARRAY(buffer_vld3, int, 16, 4, 3);
+extern VECT_ARRAY(buffer_vld3, int, 32, 2, 3);
+extern VECT_ARRAY(buffer_vld3, int, 64, 1, 3);
+extern VECT_ARRAY(buffer_vld3, uint, 8, 8, 3);
+extern VECT_ARRAY(buffer_vld3, uint, 16, 4, 3);
+extern VECT_ARRAY(buffer_vld3, uint, 32, 2, 3);
+extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3);
+extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3);
+extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3);
+extern VECT_ARRAY(buffer_vld3, int, 16, 8, 3);
+extern VECT_ARRAY(buffer_vld3, int, 32, 4, 3);
+extern VECT_ARRAY(buffer_vld3, int, 64, 2, 3);
+extern VECT_ARRAY(buffer_vld3, uint, 8, 16, 3);
+extern VECT_ARRAY(buffer_vld3, uint, 16, 8, 3);
+extern VECT_ARRAY(buffer_vld3, uint, 32, 4, 3);
+extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3);
+extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3);
+
+/* Input buffers for vld4, one of each size */
+extern VECT_ARRAY(buffer_vld4, int, 8, 8, 4);
+extern VECT_ARRAY(buffer_vld4, int, 16, 4, 4);
+extern VECT_ARRAY(buffer_vld4, int, 32, 2, 4);
+extern VECT_ARRAY(buffer_vld4, int, 64, 1, 4);
+extern VECT_ARRAY(buffer_vld4, uint, 8, 8, 4);
+extern VECT_ARRAY(buffer_vld4, uint, 16, 4, 4);
+extern VECT_ARRAY(buffer_vld4, uint, 32, 2, 4);
+extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4);
+extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4);
+extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4);
+extern VECT_ARRAY(buffer_vld4, int, 16, 8, 4);
+extern VECT_ARRAY(buffer_vld4, int, 32, 4, 4);
+extern VECT_ARRAY(buffer_vld4, int, 64, 2, 4);
+extern VECT_ARRAY(buffer_vld4, uint, 8, 16, 4);
+extern VECT_ARRAY(buffer_vld4, uint, 16, 8, 4);
+extern VECT_ARRAY(buffer_vld4, uint, 32, 4, 4);
+extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4);
+extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4);
+
+/* Input buffers for vld2_lane */
+extern VECT_VAR_DECL(buffer_vld2_lane, int, 8, 2)[2];
+extern VECT_VAR_DECL(buffer_vld2_lane, int, 16, 2)[2];
+extern VECT_VAR_DECL(buffer_vld2_lane, int, 32, 2)[2];
+extern VECT_VAR_DECL(buffer_vld2_lane, int, 64, 2)[2];
+extern VECT_VAR_DECL(buffer_vld2_lane, uint, 8, 2)[2];
+extern VECT_VAR_DECL(buffer_vld2_lane, uint, 16, 2)[2];
+extern VECT_VAR_DECL(buffer_vld2_lane, uint, 32, 2)[2];
+extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2];
+extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2];
+
+/* Input buffers for vld3_lane */
+extern VECT_VAR_DECL(buffer_vld3_lane, int, 8, 3)[3];
+extern VECT_VAR_DECL(buffer_vld3_lane, int, 16, 3)[3];
+extern VECT_VAR_DECL(buffer_vld3_lane, int, 32, 3)[3];
+extern VECT_VAR_DECL(buffer_vld3_lane, int, 64, 3)[3];
+extern VECT_VAR_DECL(buffer_vld3_lane, uint, 8, 3)[3];
+extern VECT_VAR_DECL(buffer_vld3_lane, uint, 16, 3)[3];
+extern VECT_VAR_DECL(buffer_vld3_lane, uint, 32, 3)[3];
+extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3];
+extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3];
+
+/* Input buffers for vld4_lane */
+extern VECT_VAR_DECL(buffer_vld4_lane, int, 8, 4)[4];
+extern VECT_VAR_DECL(buffer_vld4_lane, int, 16, 4)[4];
+extern VECT_VAR_DECL(buffer_vld4_lane, int, 32, 4)[4];
+extern VECT_VAR_DECL(buffer_vld4_lane, int, 64, 4)[4];
+extern VECT_VAR_DECL(buffer_vld4_lane, uint, 8, 4)[4];
+extern VECT_VAR_DECL(buffer_vld4_lane, uint, 16, 4)[4];
+extern VECT_VAR_DECL(buffer_vld4_lane, uint, 32, 4)[4];
+extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4];
+extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4];
+
+/* Output buffers, one of each size */
static ARRAY(result, int, 8, 8);
static ARRAY(result, int, 16, 4);
static ARRAY(result, int, 32, 2);