From c1cc7826d74587e0dc1c855810633a219b161ab3 Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Tue, 20 Jan 2015 16:04:24 +0100 Subject: __set_neon_cumulative_sat() modifies the contents on the QC flag, and some intrinsics do so too: this patch adds the explicit dependency on the asm statement, to avoid code reordering or removal. When writing QC, the asm statement now has a fake input dependency, which is the output of the intrinsic being tested. Modifying the __set_neon_cumulative_sat macro is necessary, to be able to accept all the possible input types. Update the generic code in ref_v_binary_sat_op.c and ref_v_unary_sat_op.c accordingly, as well as all the tests involving QC. --- ref_v_binary_sat_op.c | 2 +- ref_v_unary_sat_op.c | 2 +- ref_vqdmlal.c | 2 +- ref_vqdmlal_lane.c | 2 +- ref_vqdmlal_n.c | 2 +- ref_vqdmulh.c | 2 +- ref_vqdmulh_lane.c | 2 +- ref_vqdmulh_n.c | 2 +- ref_vqdmull.c | 2 +- ref_vqdmull_lane.c | 2 +- ref_vqdmull_n.c | 2 +- ref_vqmovn.c | 2 +- ref_vqmovun.c | 2 +- ref_vqrdmulh.c | 16 ++++++++-------- ref_vqrdmulh_lane.c | 2 +- ref_vqrdmulh_n.c | 16 ++++++++-------- ref_vqrshl.c | 2 +- ref_vqrshrn_n.c | 2 +- ref_vqrshrun_n.c | 16 ++++++++-------- ref_vqshl.c | 18 +++++++++--------- ref_vqshl_n.c | 2 +- ref_vqshlu_n.c | 18 +++++++++--------- ref_vqshrn_n.c | 2 +- ref_vqshrun_n.c | 18 +++++++++--------- stm-arm-neon-ref.h | 37 +++++++++++++++++++++---------------- 25 files changed, 90 insertions(+), 85 deletions(-) diff --git a/ref_v_binary_sat_op.c b/ref_v_binary_sat_op.c index 71af870..532da16 100644 --- a/ref_v_binary_sat_op.c +++ b/ref_v_binary_sat_op.c @@ -41,7 +41,7 @@ FNNAME (INSN_NAME) /* vector_res = OP(vector1,vector2), then store the result. */ #define TEST_BINARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \ VECT_VAR(vector2, T1, W, N)); \ diff --git a/ref_v_unary_sat_op.c b/ref_v_unary_sat_op.c index 7e6673e..b9fea48 100644 --- a/ref_v_unary_sat_op.c +++ b/ref_v_unary_sat_op.c @@ -40,7 +40,7 @@ FNNAME (INSN_NAME) { /* Basic test: y=OP(x), then store the result. */ #define TEST_UNARY_SAT_OP1(INSN, Q, T1, T2, W, N) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ diff --git a/ref_vqdmlal.c b/ref_vqdmlal.c index d51d568..59c3672 100644 --- a/ref_vqdmlal.c +++ b/ref_vqdmlal.c @@ -45,7 +45,7 @@ FNNAME (INSN_NAME) /* vector_res = OP(vector, vector3, vector4), then store the result. */ #define TEST_VQDMLXL1(INSN, T1, T2, W, W2, N) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector3, T1, W2, N), \ diff --git a/ref_vqdmlal_lane.c b/ref_vqdmlal_lane.c index 53073d8..e7d42f7 100644 --- a/ref_vqdmlal_lane.c +++ b/ref_vqdmlal_lane.c @@ -44,7 +44,7 @@ FNNAME (INSN_NAME) /* vector_res = vqdmlxl_lane(vector, vector3, vector4, lane), then store the result. */ #define TEST_VQDMLXL_LANE1(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector3, T1, W2, N), \ diff --git a/ref_vqdmlal_n.c b/ref_vqdmlal_n.c index 318a4ea..86f1e30 100644 --- a/ref_vqdmlal_n.c +++ b/ref_vqdmlal_n.c @@ -44,7 +44,7 @@ FNNAME (INSN_NAME) /* vector_res = vqdmlxl_n(vector, vector3, val), then store the result. */ #define TEST_VQDMLXL_N1(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##_##T2##W2(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector3, T1, W2, N), \ diff --git a/ref_vqdmulh.c b/ref_vqdmulh.c index f78b649..59ff820 100644 --- a/ref_vqdmulh.c +++ b/ref_vqdmulh.c @@ -41,7 +41,7 @@ FNNAME (INSN) { /* vector_res = vqdmulh(vector,vector2,lane), then store the result. */ #define TEST_VQDMULH2(INSN, Q, T1, T2, W, N) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector2, T1, W, N)); \ diff --git a/ref_vqdmulh_lane.c b/ref_vqdmulh_lane.c index 93db623..ae52667 100644 --- a/ref_vqdmulh_lane.c +++ b/ref_vqdmulh_lane.c @@ -40,7 +40,7 @@ FNNAME (INSN) { /* vector_res = vqdmulh_lane(vector,vector2,lane), then store the result. */ #define TEST_VQDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector2, T1, W, N2), \ diff --git a/ref_vqdmulh_n.c b/ref_vqdmulh_n.c index 60716f7..376259e 100644 --- a/ref_vqdmulh_n.c +++ b/ref_vqdmulh_n.c @@ -42,7 +42,7 @@ FNNAME (INSN) /* vector_res = vqdmulh_n(vector,val), then store the result. */ #define TEST_VQDMULH_N2(INSN, Q, T1, T2, W, N, L) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ L); \ diff --git a/ref_vqdmull.c b/ref_vqdmull.c index f97a6c8..478181d 100644 --- a/ref_vqdmull.c +++ b/ref_vqdmull.c @@ -40,7 +40,7 @@ FNNAME (INSN) { /* Basic test: y=vqdmull(x,x), then store the result. */ #define TEST_VQDMULL2(INSN, T1, T2, W, W2, N) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ VECT_VAR(vector_res, T1, W2, N) = \ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector2, T1, W, N)); \ diff --git a/ref_vqdmull_lane.c b/ref_vqdmull_lane.c index b2ee183..bf92c6b 100644 --- a/ref_vqdmull_lane.c +++ b/ref_vqdmull_lane.c @@ -42,7 +42,7 @@ FNNAME (INSN) /* vector_res = vqdmull_lane(vector,vector2,lane), then store the result. */ #define TEST_VQDMULL_LANE2(INSN, T1, T2, W, W2, N, L) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ VECT_VAR(vector_res, T1, W2, N) = \ INSN##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector2, T1, W, N), \ diff --git a/ref_vqdmull_n.c b/ref_vqdmull_n.c index 92b1e48..7a482b2 100644 --- a/ref_vqdmull_n.c +++ b/ref_vqdmull_n.c @@ -42,7 +42,7 @@ FNNAME (INSN) /* vector_res = vqdmull_n(vector,val), then store the result. */ #define TEST_VQDMULL_N2(INSN, T1, T2, W, W2, N, L) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ VECT_VAR(vector_res, T1, W2, N) = \ INSN##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ L); \ diff --git a/ref_vqmovn.c b/ref_vqmovn.c index 87e119c..0f7c933 100644 --- a/ref_vqmovn.c +++ b/ref_vqmovn.c @@ -41,7 +41,7 @@ FNNAME (INSN_NAME) { /* Basic test: y=OP(x), then store the result. */ #define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ diff --git a/ref_vqmovun.c b/ref_vqmovun.c index a898d77..5582cea 100644 --- a/ref_vqmovun.c +++ b/ref_vqmovun.c @@ -41,7 +41,7 @@ FNNAME (INSN_NAME) { /* Basic test: y=OP(x), then store the result. */ #define TEST_UNARY_OP1(INSN, T1, T2, W, W2, N) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##_s##W2(VECT_VAR(vector, int, W2, N)); \ vst1##_##T2##W(VECT_VAR(result, T1, W, N), \ diff --git a/ref_vqrdmulh.c b/ref_vqrdmulh.c index 37193b7..f499b47 100644 --- a/ref_vqrdmulh.c +++ b/ref_vqrdmulh.c @@ -40,14 +40,14 @@ THE SOFTWARE. FNNAME (INSN) { /* vector_res = vqrdmulh(vector,vector2), then store the result. */ -#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N) \ - Set_Neon_Cumulative_Sat(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector2, T1, W, N)); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ +#define TEST_VQRDMULH2(INSN, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ diff --git a/ref_vqrdmulh_lane.c b/ref_vqrdmulh_lane.c index 807f5c2..b2c37db 100644 --- a/ref_vqrdmulh_lane.c +++ b/ref_vqrdmulh_lane.c @@ -41,7 +41,7 @@ FNNAME (INSN) { /* vector_res = vqrdmulh_lane(vector,vector2,lane), then store the result. */ #define TEST_VQRDMULH_LANE2(INSN, Q, T1, T2, W, N, N2, L) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector2, T1, W, N2), \ diff --git a/ref_vqrdmulh_n.c b/ref_vqrdmulh_n.c index 92b79b6..01f0e0b 100644 --- a/ref_vqrdmulh_n.c +++ b/ref_vqrdmulh_n.c @@ -42,14 +42,14 @@ FNNAME (INSN) int i; /* vector_res = vqrdmulh_n(vector,val), then store the result. */ -#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L) \ - Set_Neon_Cumulative_Sat(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ - L); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ +#define TEST_VQRDMULH_N2(INSN, Q, T1, T2, W, N, L) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + L); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ diff --git a/ref_vqrshl.c b/ref_vqrshl.c index 5028bf9..e4a33e5 100644 --- a/ref_vqrshl.c +++ b/ref_vqrshl.c @@ -41,7 +41,7 @@ FNNAME (INSN) { /* Basic test: v3=vqrshl(v1,v2), then store the result. */ #define TEST_VQRSHL2(INSN, T3, Q, T1, T2, W, N) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ VECT_VAR(vector_shift, T3, W, N)); \ diff --git a/ref_vqrshrn_n.c b/ref_vqrshrn_n.c index 2126d3a..34bf082 100644 --- a/ref_vqrshrn_n.c +++ b/ref_vqrshrn_n.c @@ -41,7 +41,7 @@ FNNAME (INSN) { /* Basic test: y=vqrshrn_n(x,v), then store the result. */ #define TEST_VQRSHRN_N2(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ VECT_VAR(vector_res, T1, W2, N) = \ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ V); \ diff --git a/ref_vqrshrun_n.c b/ref_vqrshrun_n.c index 3ef1322..53d11f5 100644 --- a/ref_vqrshrun_n.c +++ b/ref_vqrshrun_n.c @@ -40,14 +40,14 @@ THE SOFTWARE. FNNAME (INSN) { /* Basic test: y=vqrshrun_n(x,v), then store the result. */ -#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Cumulative_Sat(0); \ - VECT_VAR(vector_res, uint, W2, N) = \ - INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ - V); \ - vst1_u##W2(VECT_VAR(result, uint, W2, N), \ - VECT_VAR(vector_res, uint, W2, N)); \ - dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ +#define TEST_VQRSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \ + VECT_VAR(vector_res, uint, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_u##W2(VECT_VAR(result, uint, W2, N), \ + VECT_VAR(vector_res, uint, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ diff --git a/ref_vqshl.c b/ref_vqshl.c index 84ca9a0..a9d29d7 100644 --- a/ref_vqshl.c +++ b/ref_vqshl.c @@ -40,15 +40,15 @@ THE SOFTWARE. FNNAME (INSN) { /* Basic test: v3=vqshl(v1,v2), then store the result. */ -#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N) \ - Set_Neon_Cumulative_Sat(0); \ - VECT_VAR(vector_res, T1, W, N) = \ - INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ - VECT_VAR(vector_shift, T3, W, N)); \ - vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ - VECT_VAR(vector_res, T1, W, N)); \ - dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQSHL2(INSN, T3, Q, T1, T2, W, N) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector_shift, T3, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ + VECT_VAR(vector_res, T1, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQSHL1(INSN, T3, Q, T1, T2, W, N) \ diff --git a/ref_vqshl_n.c b/ref_vqshl_n.c index 263e661..3ee26b5 100644 --- a/ref_vqshl_n.c +++ b/ref_vqshl_n.c @@ -41,7 +41,7 @@ FNNAME (INSN) { /* Basic test: v2=vqshl_n(v1,v), then store the result. */ #define TEST_VQSHL_N2(INSN, Q, T1, T2, W, N, V) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W, N)); \ VECT_VAR(vector_res, T1, W, N) = \ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ V); \ diff --git a/ref_vqshlu_n.c b/ref_vqshlu_n.c index b72261c..27d53de 100644 --- a/ref_vqshlu_n.c +++ b/ref_vqshlu_n.c @@ -40,15 +40,15 @@ THE SOFTWARE. FNNAME (INSN) { /* Basic test: v2=vqshlu_n(v1,v), then store the result. */ -#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V) \ - Set_Neon_Cumulative_Sat(0); \ - VECT_VAR(vector_res, T3, W, N) = \ - INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ - V); \ - vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \ - VECT_VAR(vector_res, T3, W, N)); \ - dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N)); \ + VECT_VAR(vector_res, T3, W, N) = \ + INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \ + VECT_VAR(vector_res, T3, W, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V) \ diff --git a/ref_vqshrn_n.c b/ref_vqshrn_n.c index 9e61ccb..96b8d61 100644 --- a/ref_vqshrn_n.c +++ b/ref_vqshrn_n.c @@ -41,7 +41,7 @@ FNNAME (INSN) { /* Basic test: y=vqshrn_n(x,v), then store the result. */ #define TEST_VQSHRN_N2(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Cumulative_Sat(0); \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T1, W2, N)); \ VECT_VAR(vector_res, T1, W2, N) = \ INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ V); \ diff --git a/ref_vqshrun_n.c b/ref_vqshrun_n.c index c2a2e15..871da96 100644 --- a/ref_vqshrun_n.c +++ b/ref_vqshrun_n.c @@ -40,15 +40,15 @@ THE SOFTWARE. FNNAME (INSN) { /* Basic test: y=vqshrun_n(x,v), then store the result. */ -#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ - Set_Neon_Cumulative_Sat(0); \ - VECT_VAR(vector_res, uint, W2, N) = \ - INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ - V); \ - vst1_u##W2(VECT_VAR(result, uint, W2, N), \ - VECT_VAR(vector_res, uint, W2, N)); \ - dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ - xSTR(T1), W, N) +#define TEST_VQSHRUN_N2(INSN, T1, T2, W, W2, N, V) \ + Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, uint, W2, N)); \ + VECT_VAR(vector_res, uint, W2, N) = \ + INSN##_##T2##W(VECT_VAR(vector, T1, W, N), \ + V); \ + vst1_u##W2(VECT_VAR(result, uint, W2, N), \ + VECT_VAR(vector_res, uint, W2, N)); \ + dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##_##T2##W), \ + xSTR(T1), W, N) /* Two auxliary macros are necessary to expand INSN */ #define TEST_VQSHRUN_N1(INSN, T1, T2, W, W2, N, V) \ diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h index 33677fd..f7c7cc6 100644 --- a/stm-arm-neon-ref.h +++ b/stm-arm-neon-ref.h @@ -576,11 +576,15 @@ typedef union { #ifdef __ARMCC_VERSION register _ARM_FPSCR _afpscr_for_qc __asm("fpscr"); # define Neon_Cumulative_Sat _afpscr_for_qc.b.QC -# define Set_Neon_Cumulative_Sat(x) {Neon_Cumulative_Sat = (x);} +# define Set_Neon_Cumulative_Sat(x, depend) {Neon_Cumulative_Sat = (x);} #else /* GCC/ARM does not know this register */ # define Neon_Cumulative_Sat __read_neon_cumulative_sat() -# define Set_Neon_Cumulative_Sat(x) __set_neon_cumulative_sat((x)) +/* We need a fake dependency to ensure correct ordering of asm + statements to preset the QC flag value, and Neon operators writing + to QC. */ +#define Set_Neon_Cumulative_Sat(x, depend) \ + __set_neon_cumulative_sat((x), (depend)) # if defined(__aarch64__) static volatile int __read_neon_cumulative_sat (void) { @@ -588,13 +592,14 @@ static volatile int __read_neon_cumulative_sat (void) { asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); return _afpscr_for_qc.b.QC; } -static void __set_neon_cumulative_sat (int x) { - _ARM_FPSCR _afpscr_for_qc; - asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); - _afpscr_for_qc.b.QC = x; - asm volatile ("msr fpsr,%0" : : "r" (_afpscr_for_qc)); - return; -} + +#define __set_neon_cumulative_sat(x, depend) { \ + _ARM_FPSCR _afpscr_for_qc; \ + asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc)); \ + _afpscr_for_qc.b.QC = x; \ + asm volatile ("msr fpsr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \ + } + # else static volatile int __read_neon_cumulative_sat (void) { _ARM_FPSCR _afpscr_for_qc; @@ -602,13 +607,13 @@ static volatile int __read_neon_cumulative_sat (void) { return _afpscr_for_qc.b.QC; } -static void __set_neon_cumulative_sat (int x) { - _ARM_FPSCR _afpscr_for_qc; - asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); - _afpscr_for_qc.b.QC = x; - asm volatile ("vmsr fpscr,%0" : : "r" (_afpscr_for_qc)); - return; -} +#define __set_neon_cumulative_sat(x, depend) { \ + _ARM_FPSCR _afpscr_for_qc; \ + asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc)); \ + _afpscr_for_qc.b.QC = x; \ + asm volatile ("vmsr fpscr,%1" : "=X" (depend) : "r" (_afpscr_for_qc)); \ + } + # endif #endif -- cgit v1.2.3