diff options
-rw-r--r-- | compute_ref.axf | bin | 3554024 -> 3551484 bytes | |||
-rw-r--r-- | compute_ref_data.c | 26 | ||||
-rw-r--r-- | ref_vcombine.c | 8 | ||||
-rw-r--r-- | ref_vcreate.c | 8 | ||||
-rw-r--r-- | ref_vcvt.c | 14 | ||||
-rw-r--r-- | ref_vget_high.c | 6 | ||||
-rw-r--r-- | ref_vget_low.c | 6 | ||||
-rw-r--r-- | ref_vld1.c | 4 | ||||
-rw-r--r-- | ref_vld1_dup.c | 4 | ||||
-rw-r--r-- | ref_vld1_lane.c | 12 | ||||
-rw-r--r-- | ref_vldX.c | 26 | ||||
-rw-r--r-- | ref_vldX_dup.c | 26 | ||||
-rw-r--r-- | ref_vldX_lane.c | 28 | ||||
-rw-r--r-- | ref_vreinterpret.c | 8 | ||||
-rw-r--r-- | ref_vst1_lane.c | 6 | ||||
-rw-r--r-- | ref_vstX_lane.c | 28 | ||||
-rw-r--r-- | stm-arm-neon-ref.h | 40 |
17 files changed, 125 insertions, 125 deletions
diff --git a/compute_ref.axf b/compute_ref.axf Binary files differindex df8be34..e7035b9 100644 --- a/compute_ref.axf +++ b/compute_ref.axf diff --git a/compute_ref_data.c b/compute_ref_data.c index ef7a6e0..8dbf727 100644 --- a/compute_ref_data.c +++ b/compute_ref_data.c @@ -143,7 +143,7 @@ VECT_VAR_DECL_INIT(buffer, uint, 64, 1); PAD(buffer_pad, uint, 64, 1); VECT_VAR_DECL_INIT(buffer, float, 32, 2); PAD(buffer_pad, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) /* We need a different initialization for ARMCC, because the compiler performs the conversion to half-precision internal representation. */ @@ -177,7 +177,7 @@ VECT_VAR_DECL_INIT(buffer, poly, 16, 8); PAD(buffer_pad, poly, 16, 8); VECT_VAR_DECL_INIT(buffer, float, 32, 4); PAD(buffer_pad, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_float16x8[8] = {-16, -15, -14, -13, -12, -11, -10, -9}; #else @@ -214,7 +214,7 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4); VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4); VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2); VECT_VAR_DECL(buffer_dup_pad, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_dup_float16x4[4] = {-16, -15, -14, -13}; #else @@ -245,7 +245,7 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8); VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8); VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4); VECT_VAR_DECL(buffer_dup_pad, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_dup_float16x8[8] = {-16, -15, -14, -13, -12, -11, -10, -9}; #else @@ -280,7 +280,7 @@ VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4); PAD(buffer_vld2_pad, poly, 16, 4); VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2); PAD(buffer_vld2_pad, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_vld2_float16x4x2[4*2] = {-16, -15, -14, -13, -12, -11, -10, -9}; #else @@ -313,7 +313,7 @@ VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8); PAD(buffer_vld2_pad, poly, 16, 8); VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4); PAD(buffer_vld2_pad, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_vld2_float16x8x2[8*2] = {-16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1}; @@ -353,7 +353,7 @@ VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4); PAD(buffer_vld3_pad, poly, 16, 4); VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2); PAD(buffer_vld3_pad, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_vld3_float16x4x3[4*3] = {-16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5}; @@ -389,7 +389,7 @@ VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8); PAD(buffer_vld3_pad, poly, 16, 8); VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4); PAD(buffer_vld3_pad, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_vld3_float16x8x3[8*3] = {-16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, @@ -434,7 +434,7 @@ VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4); PAD(buffer_vld4_pad, poly, 16, 4); VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2); PAD(buffer_vld4_pad, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_vld4_float16x4x4[4*4] = {-16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1}; @@ -472,7 +472,7 @@ VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8); PAD(buffer_vld4_pad, poly, 16, 8); VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4); PAD(buffer_vld4_pad, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_vld4_float16x8x4[8*4] = {-16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, @@ -511,7 +511,7 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2); VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2); VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2); VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_vld2_lane_float16x2[2] = {-16, -15}; #else @@ -532,7 +532,7 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3); VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3); VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3); VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_vld3_lane_float16x3[3] = {-16, -15, -14}; #else @@ -554,7 +554,7 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4); VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4); VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4); VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #ifdef __ARMCC_VERSION __fp16 buffer_vld4_lane_float16x4[4] = {-16, -15, -14, -13}; #else diff --git a/ref_vcombine.c b/ref_vcombine.c index 6cca113..cfe5c1a 100644 --- a/ref_vcombine.c +++ b/ref_vcombine.c @@ -46,7 +46,7 @@ void exec_vcombine (void) DECL_VARIABLE_64BITS_VARIANTS(vector64_a); DECL_VARIABLE_64BITS_VARIANTS(vector64_b); DECL_VARIABLE_128BITS_VARIANTS(vector128); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector64_a, float, 16, 4); DECL_VARIABLE(vector64_b, float, 16, 4); DECL_VARIABLE(vector64_b_init, uint, 16, 4); @@ -55,7 +55,7 @@ void exec_vcombine (void) TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector64_a, buffer); VLOAD(vector64_a, buffer, , float, f, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) VLOAD(vector64_a, buffer, , float, f, 16, 4); #endif @@ -71,7 +71,7 @@ void exec_vcombine (void) VDUP(vector64_b, , poly, p, 16, 4, 0x66); VDUP(vector64_b, , float, f, 32, 2, 3.3f); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) /* There is no vdup_n_f16, so we need another initialization method. */ VDUP(vector64_b_init, , uint, u, 16, 4, 0x4b80 /* 15 */); @@ -92,7 +92,7 @@ void exec_vcombine (void) TEST_VCOMBINE(poly, p, 8, 8, 16); TEST_VCOMBINE(poly, p, 16, 4, 8); TEST_VCOMBINE(float, f, 32, 2, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VCOMBINE(float, f, 16, 4, 8); #endif diff --git a/ref_vcreate.c b/ref_vcreate.c index e11cceb..cfd50d7 100644 --- a/ref_vcreate.c +++ b/ref_vcreate.c @@ -63,7 +63,7 @@ FNNAME (INSN_NAME) DECL_VAL(val, uint, 64, 1); DECL_VAL(val, poly, 8, 8); DECL_VAL(val, poly, 16, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VAL(val, float, 16, 4); #endif @@ -72,7 +72,7 @@ FNNAME (INSN_NAME) DECL_VARIABLE(vector_res, int, 32, 2); DECL_VARIABLE(vector_res, int, 64, 1); DECL_VARIABLE(vector_res, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector_res, float, 16, 4); #endif DECL_VARIABLE(vector_res, uint, 8, 8); @@ -90,7 +90,7 @@ FNNAME (INSN_NAME) VECT_VAR(val, int, 32, 2) = 0x123456789abcdef0LL; VECT_VAR(val, int, 64, 1) = 0x123456789abcdef0LL; VECT_VAR(val, float, 32, 2) = 0x123456789abcdef0LL; -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) VECT_VAR(val, float, 16, 4) = 0x123456789abcdef0LL; #endif VECT_VAR(val, uint, 8, 8) = 0x123456789abcdef0ULL; @@ -104,7 +104,7 @@ FNNAME (INSN_NAME) TEST_VCREATE(int, s, 16, 4); TEST_VCREATE(int, s, 32, 2); TEST_VCREATE(float, f, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VCREATE(float, f, 16, 4); #endif TEST_VCREATE(int, s, 64, 1); @@ -52,7 +52,7 @@ void exec_vcvt (void) VECT_VAR(vector_res, T1, W, N)); \ DUMP_FP(TEST_MSG, T1, W, N, PRIx##W); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_VCVT_FP16(T1, T2, W1, W2, N) \ VECT_VAR(vector_res, T1, W1, N) = \ vcvt_##T2##W1##_##T2##W2(VECT_VAR(vector, T1, W2, N)); \ @@ -86,7 +86,7 @@ void exec_vcvt (void) statement */ DECL_VARIABLE_ALL_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector_res); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector_init, uint, 16, 4); DECL_VARIABLE(vector_init, uint, 16, 8); DECL_VARIABLE(vector, float, 16, 4); @@ -102,7 +102,7 @@ void exec_vcvt (void) TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) VLOAD(vector, buffer, , float, f, 16, 4); VLOAD(vector, buffer, q, float, f, 16, 8); #endif @@ -114,7 +114,7 @@ void exec_vcvt (void) TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, -15.3f); TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, 5.3f); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) /* FP16 tests. */ /* There is no vdup_n_f16, so we need another initialization method. */ @@ -136,7 +136,7 @@ void exec_vcvt (void) /* vcvt_f32_xx */ TEST_VCVT_FP(, float, f, 32, 2, int, s); TEST_VCVT_FP(, float, f, 32, 2, uint, u); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VCVT_FP16(float, f, 32, 16, 4); #endif @@ -152,7 +152,7 @@ void exec_vcvt (void) TEST_VSET_LANE(vector, q, float, f, 32, 4, 1, -0.0f); TEST_VSET_LANE(vector, q, float, f, 32, 4, 2, 15.12f); TEST_VSET_LANE(vector, q, float, f, 32, 4, 3, -15.12f); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VCVT_2FP16(float, f, 16, 32, 4); #endif @@ -214,7 +214,7 @@ void exec_vcvt (void) /* vcvtq_n_xx_f32 */ TEST_VCVT_N(q, int, s, 32, 4, float, f, 31); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #undef TEST_MSG #define TEST_MSG "VCVT FP16" fprintf(ref_file, "\n%s output:\n", TEST_MSG " (check fp16-fp32 inf/nan/denormal)"); diff --git a/ref_vget_high.c b/ref_vget_high.c index a30ea89..3c894ae 100644 --- a/ref_vget_high.c +++ b/ref_vget_high.c @@ -44,14 +44,14 @@ void exec_vget_high (void) statement */ DECL_VARIABLE_64BITS_VARIANTS(vector64); DECL_VARIABLE_128BITS_VARIANTS(vector128); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector64, float, 16, 4); DECL_VARIABLE(vector128, float, 16, 8); #endif TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer); VLOAD(vector128, buffer, q, float, f, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) VLOAD(vector128, buffer, q, float, f, 16, 8); #endif @@ -68,7 +68,7 @@ void exec_vget_high (void) TEST_VGET_HIGH(poly, p, 8, 8, 16); TEST_VGET_HIGH(poly, p, 16, 4, 8); TEST_VGET_HIGH(float, f, 32, 2, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VGET_HIGH(float, f, 16, 4, 8); #endif diff --git a/ref_vget_low.c b/ref_vget_low.c index c724fcb..1ed87c9 100644 --- a/ref_vget_low.c +++ b/ref_vget_low.c @@ -44,14 +44,14 @@ void exec_vget_low (void) statement */ DECL_VARIABLE_64BITS_VARIANTS(vector64); DECL_VARIABLE_128BITS_VARIANTS(vector128); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector64, float, 16, 4); DECL_VARIABLE(vector128, float, 16, 8); #endif TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer); VLOAD(vector128, buffer, q, float, f, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) VLOAD(vector128, buffer, q, float, f, 16, 8); #endif @@ -68,7 +68,7 @@ void exec_vget_low (void) TEST_VGET_LOW(poly, p, 8, 8, 16); TEST_VGET_LOW(poly, p, 16, 4, 8); TEST_VGET_LOW(float, f, 32, 2, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VGET_LOW(float, f, 16, 4, 8); #endif @@ -43,7 +43,7 @@ void exec_vld1 (void) /* With ARM RVCT, we need to declare variables before any executable statement */ DECL_VARIABLE_ALL_VARIANTS(vector); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector, float, 16, 4); DECL_VARIABLE(vector, float, 16, 8); #endif @@ -55,7 +55,7 @@ void exec_vld1 (void) TEST_VLD1(vector, buffer, , float, f, 32, 2); TEST_VLD1(vector, buffer, q, float, f, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VLD1(vector, buffer, , float, f, 16, 4); TEST_VLD1(vector, buffer, q, float, f, 16, 8); #endif diff --git a/ref_vld1_dup.c b/ref_vld1_dup.c index 66fe6a2..d5eb80d 100644 --- a/ref_vld1_dup.c +++ b/ref_vld1_dup.c @@ -45,7 +45,7 @@ void exec_vld1_dup (void) /* With ARM RVCT, we need to declare variables before any executable statement */ DECL_VARIABLE_ALL_VARIANTS(vector); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector, float, 16, 4); DECL_VARIABLE(vector, float, 16, 8); #endif @@ -59,7 +59,7 @@ void exec_vld1_dup (void) TEST_VLD1_DUP(vector, buffer_dup, , float, f, 32, 2); TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VLD1_DUP(vector, buffer_dup, , float, f, 16, 4); TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 16, 8); #endif diff --git a/ref_vld1_lane.c b/ref_vld1_lane.c index f4a9af6..9736748 100644 --- a/ref_vld1_lane.c +++ b/ref_vld1_lane.c @@ -47,12 +47,12 @@ void exec_vld1_lane (void) /* With ARM RVCT, we need to declare variables before any executable statement */ DECL_VARIABLE_ALL_VARIANTS(vector); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector, float, 16, 4); DECL_VARIABLE(vector, float, 16, 8); #endif DECL_VARIABLE_ALL_VARIANTS(vector_src); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector_src, float, 16, 4); DECL_VARIABLE(vector_src, float, 16, 8); #endif @@ -68,7 +68,7 @@ void exec_vld1_lane (void) ARRAY(buffer_src, poly, 8, 8); ARRAY(buffer_src, poly, 16, 4); ARRAY(buffer_src, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) ARRAY(buffer_src, float, 16, 4); #endif @@ -83,7 +83,7 @@ void exec_vld1_lane (void) ARRAY(buffer_src, poly, 8, 16); ARRAY(buffer_src, poly, 16, 8); ARRAY(buffer_src, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) ARRAY(buffer_src, float, 16, 8); #endif @@ -101,7 +101,7 @@ void exec_vld1_lane (void) TEST_VLD1_LANE(, poly, p, 8, 8, 7); TEST_VLD1_LANE(, poly, p, 16, 4, 3); TEST_VLD1_LANE(, float, f, 32, 2, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VLD1_LANE(, float, f, 16, 4, 2); #endif @@ -116,7 +116,7 @@ void exec_vld1_lane (void) TEST_VLD1_LANE(q, poly, p, 8, 16, 12); TEST_VLD1_LANE(q, poly, p, 16, 8, 6); TEST_VLD1_LANE(q, float, f, 32, 4, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VLD1_LANE(q, float, f, 16, 8, 5); #endif @@ -85,7 +85,7 @@ void exec_vldX (void) DECL_VLDX(poly, 16, 8, X); \ DECL_VLDX(float, 32, 4, X) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define DECL_ALL_VLDX_FP16(X) \ DECL_VLDX(float, 16, 4, X); \ DECL_VLDX(float, 16, 8, X) @@ -113,7 +113,7 @@ void exec_vldX (void) TEST_VLDX(q, poly, p, 16, 8, X); \ TEST_VLDX(q, float, f, 32, 4, X) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_ALL_VLDX_FP16(X) \ TEST_VLDX(, float, f, 16, 4, X); \ TEST_VLDX(q, float, f, 16, 8, X) @@ -141,7 +141,7 @@ void exec_vldX (void) TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 4, X, Y) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(float, 16, 8, X, Y) @@ -151,7 +151,7 @@ void exec_vldX (void) DECL_ALL_VLDX(3); DECL_ALL_VLDX(4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_ALL_VLDX_FP16(2); DECL_ALL_VLDX_FP16(3); DECL_ALL_VLDX_FP16(4); @@ -161,13 +161,13 @@ void exec_vldX (void) clean_results (); #define TEST_MSG "VLD2/VLD2Q" TEST_ALL_VLDX(2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VLDX_FP16(2); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(2, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(2, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); @@ -177,18 +177,18 @@ void exec_vldX (void) #undef TEST_MSG #define TEST_MSG "VLD3/VLD3Q" TEST_ALL_VLDX(3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VLDX_FP16(3); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(3, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(3, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(3, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(3, 2); #endif dump_results_hex2 (TEST_MSG, " chunk 2"); @@ -198,23 +198,23 @@ void exec_vldX (void) #undef TEST_MSG #define TEST_MSG "VLD4/VLD4Q" TEST_ALL_VLDX(4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VLDX_FP16(4); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(4, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(4, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 2); #endif dump_results_hex2 (TEST_MSG, " chunk 2"); TEST_ALL_EXTRA_CHUNKS(4, 3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 3); #endif dump_results_hex2 (TEST_MSG, " chunk 3"); diff --git a/ref_vldX_dup.c b/ref_vldX_dup.c index 6d1c180..15ba2c8 100644 --- a/ref_vldX_dup.c +++ b/ref_vldX_dup.c @@ -75,7 +75,7 @@ void exec_vldX_dup (void) DECL_VLDX_DUP(poly, 16, 4, X); \ DECL_VLDX_DUP(float, 32, 2, X) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define DECL_ALL_VLDX_DUP_FP16(X) \ DECL_VLDX_DUP(float, 16, 4, X) #endif @@ -93,7 +93,7 @@ void exec_vldX_dup (void) TEST_VLDX_DUP(, poly, p, 16, 4, X); \ TEST_VLDX_DUP(, float, f, 32, 2, X) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_ALL_VLDX_DUP_FP16(X) \ TEST_VLDX_DUP(, float, f, 16, 4, X) #endif @@ -111,7 +111,7 @@ void exec_vldX_dup (void) TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 2, X, Y) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \ TEST_EXTRA_CHUNK(float, 16, 4, X, Y) #endif @@ -120,7 +120,7 @@ void exec_vldX_dup (void) DECL_ALL_VLDX_DUP(2); DECL_ALL_VLDX_DUP(3); DECL_ALL_VLDX_DUP(4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_ALL_VLDX_DUP_FP16(2); DECL_ALL_VLDX_DUP_FP16(3); DECL_ALL_VLDX_DUP_FP16(4); @@ -130,12 +130,12 @@ void exec_vldX_dup (void) clean_results (); #define TEST_MSG "VLD2_DUP/VLD2Q_DUP" TEST_ALL_VLDX_DUP(2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VLDX_DUP_FP16(2); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(2, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(2, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); @@ -145,17 +145,17 @@ void exec_vldX_dup (void) #undef TEST_MSG #define TEST_MSG "VLD3_DUP/VLD3Q_DUP" TEST_ALL_VLDX_DUP(3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VLDX_DUP_FP16(3); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(3, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(3, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(3, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(3, 2); #endif dump_results_hex2 (TEST_MSG, " chunk 2"); @@ -165,22 +165,22 @@ void exec_vldX_dup (void) #undef TEST_MSG #define TEST_MSG "VLD4_DUP/VLD4Q_DUP" TEST_ALL_VLDX_DUP(4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VLDX_DUP_FP16(4); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(4, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(4, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 2); #endif dump_results_hex2 (TEST_MSG, " chunk 2"); TEST_ALL_EXTRA_CHUNKS(4, 3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 3); #endif dump_results_hex2 (TEST_MSG, " chunk 3"); diff --git a/ref_vldX_lane.c b/ref_vldX_lane.c index c0d5a34..7a74232 100644 --- a/ref_vldX_lane.c +++ b/ref_vldX_lane.c @@ -89,7 +89,7 @@ void exec_vldX_lane (void) DECL_VLDX_LANE(float, 32, 2, X); \ DECL_VLDX_LANE(float, 32, 4, X) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define DECL_ALL_VLDX_LANE_FP16(X) \ DECL_VLDX_LANE(float, 16, 4, X); \ DECL_VLDX_LANE(float, 16, 8, X) @@ -122,7 +122,7 @@ void exec_vldX_lane (void) TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \ TEST_VLDX_LANE(q, float, f, 32, 4, X, 2) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_ALL_VLDX_LANE_FP16(X) \ TEST_VLDX_LANE(, float, f, 16, 4, X, 0); \ TEST_VLDX_LANE(q, float, f, 16, 8, X, 2) @@ -145,7 +145,7 @@ void exec_vldX_lane (void) TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 4, X, Y) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(float, 16, 8, X, Y) @@ -155,7 +155,7 @@ void exec_vldX_lane (void) DECL_ALL_VLDX_LANE(2); DECL_ALL_VLDX_LANE(3); DECL_ALL_VLDX_LANE(4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_ALL_VLDX_LANE_FP16(2); DECL_ALL_VLDX_LANE_FP16(3); DECL_ALL_VLDX_LANE_FP16(4); @@ -177,7 +177,7 @@ void exec_vldX_lane (void) DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); DUMMY_ARRAY(buffer_src, float, 32, 2, 4); DUMMY_ARRAY(buffer_src, float, 32, 4, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DUMMY_ARRAY(buffer_src, float, 16, 4, 4); DUMMY_ARRAY(buffer_src, float, 16, 8, 4); #endif @@ -186,12 +186,12 @@ void exec_vldX_lane (void) clean_results (); #define TEST_MSG "VLD2_LANE/VLD2Q_LANE" TEST_ALL_VLDX_LANE(2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VLDX_LANE_FP16(2); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(2, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(2, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); @@ -201,17 +201,17 @@ void exec_vldX_lane (void) #undef TEST_MSG #define TEST_MSG "VLD3_LANE/VLD3Q_LANE" TEST_ALL_VLDX_LANE(3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VLDX_LANE_FP16(3); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(3, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(3, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(3, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(3, 2); #endif dump_results_hex2 (TEST_MSG, " chunk 2"); @@ -221,22 +221,22 @@ void exec_vldX_lane (void) #undef TEST_MSG #define TEST_MSG "VLD4_LANE/VLD4Q_LANE" TEST_ALL_VLDX_LANE(4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VLDX_LANE_FP16(4); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(4, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(4, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 2); #endif dump_results_hex2 (TEST_MSG, " chunk 2"); TEST_ALL_EXTRA_CHUNKS(4, 3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 3); #endif dump_results_hex2 (TEST_MSG, " chunk 3"); diff --git a/ref_vreinterpret.c b/ref_vreinterpret.c index 02b3e82..1b3241c 100644 --- a/ref_vreinterpret.c +++ b/ref_vreinterpret.c @@ -59,7 +59,7 @@ void exec_vreinterpret (void) VECT_VAR(vector_res, T1, W, N)); \ DUMP_FP(TEST_MSG, T1, W, N, PRIx##W); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_VREINTERPRET_FP16(Q, T1, T2, W, N, TS1, TS2, WS, NS) \ VECT_VAR(vector_res, T1, W, N) = \ vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vector, TS1, WS, NS)); \ @@ -72,7 +72,7 @@ void exec_vreinterpret (void) statement */ DECL_VARIABLE_ALL_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector_res); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector, float, 16, 4); DECL_VARIABLE(vector_res, float, 16, 4); DECL_VARIABLE(vector, float, 16, 8); @@ -86,7 +86,7 @@ void exec_vreinterpret (void) TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) VLOAD(vector, buffer, , float, f, 16, 4); VLOAD(vector, buffer, q, float, f, 16, 8); #endif @@ -342,7 +342,7 @@ void exec_vreinterpret (void) TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, float, f, 32, 4); TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, float, f, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) /* vreinterpret_f16_xx */ TEST_VREINTERPRET_FP16(, float, f, 16, 4, int, s, 8, 8); TEST_VREINTERPRET_FP16(, float, f, 16, 4, int, s, 16, 4); diff --git a/ref_vst1_lane.c b/ref_vst1_lane.c index c7e533f..2225749 100644 --- a/ref_vst1_lane.c +++ b/ref_vst1_lane.c @@ -43,7 +43,7 @@ void exec_vst1_lane (void) /* With ARM RVCT, we need to declare variables before any executable statement */ DECL_VARIABLE_ALL_VARIANTS(vector); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_VARIABLE(vector, float, 16, 4); DECL_VARIABLE(vector, float, 16, 8); #endif @@ -62,7 +62,7 @@ void exec_vst1_lane (void) TEST_VST1_LANE(, poly, p, 8, 8, 6); TEST_VST1_LANE(, poly, p, 16, 4, 2); TEST_VST1_LANE(, float, f, 32, 2, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VST1_LANE(, float, f, 16, 4, 2); #endif @@ -77,7 +77,7 @@ void exec_vst1_lane (void) TEST_VST1_LANE(q, poly, p, 8, 16, 10); TEST_VST1_LANE(q, poly, p, 16, 8, 4); TEST_VST1_LANE(q, float, f, 32, 4, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_VST1_LANE(q, float, f, 16, 8, 5); #endif diff --git a/ref_vstX_lane.c b/ref_vstX_lane.c index 3c0f97a..50dd045 100644 --- a/ref_vstX_lane.c +++ b/ref_vstX_lane.c @@ -92,7 +92,7 @@ void exec_vstX_lane (void) DECL_VSTX_LANE(poly, 16, 8, X); \ DECL_VSTX_LANE(float, 32, 4, X) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define DECL_ALL_VSTX_LANE_FP16(X) \ DECL_VSTX_LANE(float, 16, 4, X); \ DECL_VSTX_LANE(float, 16, 8, X) @@ -119,7 +119,7 @@ void exec_vstX_lane (void) TEST_VSTX_LANE(q, poly, p, 16, 8, X, 5); \ TEST_VSTX_LANE(q, float, f, 32, 4, X, 2) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_ALL_VSTX_LANE_FP16(X) \ TEST_VSTX_LANE(, float, f, 16, 4, X, 3); \ TEST_VSTX_LANE(q, float, f, 16, 8, X, 6) @@ -142,7 +142,7 @@ void exec_vstX_lane (void) TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 4, X, Y) -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define TEST_ALL_EXTRA_CHUNKS_FP16(X, Y) \ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \ TEST_EXTRA_CHUNK(float, 16, 8, X, Y) @@ -152,7 +152,7 @@ void exec_vstX_lane (void) DECL_ALL_VSTX_LANE(2); DECL_ALL_VSTX_LANE(3); DECL_ALL_VSTX_LANE(4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DECL_ALL_VSTX_LANE_FP16(2); DECL_ALL_VSTX_LANE_FP16(3); DECL_ALL_VSTX_LANE_FP16(4); @@ -174,7 +174,7 @@ void exec_vstX_lane (void) DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); DUMMY_ARRAY(buffer_src, float, 32, 4, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DUMMY_ARRAY(buffer_src, float, 16, 4, 4); DUMMY_ARRAY(buffer_src, float, 16, 8, 4); #endif @@ -183,13 +183,13 @@ void exec_vstX_lane (void) clean_results (); #define TEST_MSG "VST2_LANE/VST2Q_LANE" TEST_ALL_VSTX_LANE(2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VSTX_LANE_FP16(2); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(2, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(2, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); @@ -199,18 +199,18 @@ void exec_vstX_lane (void) #undef TEST_MSG #define TEST_MSG "VST3_LANE/VST3Q_LANE" TEST_ALL_VSTX_LANE(3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VSTX_LANE_FP16(3); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(3, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(3, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(3, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(3, 2); #endif dump_results_hex2 (TEST_MSG, " chunk 2"); @@ -220,23 +220,23 @@ void exec_vstX_lane (void) #undef TEST_MSG #define TEST_MSG "VST4_LANE/VST4Q_LANE" TEST_ALL_VSTX_LANE(4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_VSTX_LANE_FP16(4); #endif dump_results_hex2 (TEST_MSG, " chunk 0"); TEST_ALL_EXTRA_CHUNKS(4, 1); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 1); #endif dump_results_hex2 (TEST_MSG, " chunk 1"); TEST_ALL_EXTRA_CHUNKS(4, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 2); #endif dump_results_hex2 (TEST_MSG, " chunk 2"); TEST_ALL_EXTRA_CHUNKS(4, 3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) TEST_ALL_EXTRA_CHUNKS_FP16(4, 3); #endif dump_results_hex2 (TEST_MSG, " chunk 3"); diff --git a/stm-arm-neon-ref.h b/stm-arm-neon-ref.h index f7c7cc6..2f2d255 100644 --- a/stm-arm-neon-ref.h +++ b/stm-arm-neon-ref.h @@ -150,7 +150,7 @@ static int result_idx = 0; fprintf(gcc_tests_file, " };\n"); \ } -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) #define float16_t __fp16 #define DUMP_FP16(MSG,T,W,N,FMT) \ @@ -226,7 +226,7 @@ extern ARRAY(buffer, uint, 64, 1); extern ARRAY(buffer, poly, 8, 8); extern ARRAY(buffer, poly, 16, 4); extern ARRAY(buffer, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern ARRAY(buffer, float, 16, 4); #endif extern ARRAY(buffer, int, 8, 16); @@ -240,7 +240,7 @@ extern ARRAY(buffer, uint, 64, 2); extern ARRAY(buffer, poly, 8, 16); extern ARRAY(buffer, poly, 16, 8); extern ARRAY(buffer, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern ARRAY(buffer, float, 16, 8); #endif @@ -258,7 +258,7 @@ extern ARRAY4(buffer_dup, uint, 64, 1); extern ARRAY(buffer_dup, poly, 8, 8); extern ARRAY(buffer_dup, poly, 16, 4); extern ARRAY4(buffer_dup, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern ARRAY4(buffer_dup, float, 16, 4); #endif extern ARRAY(buffer_dup, int, 8, 16); @@ -272,7 +272,7 @@ extern ARRAY4(buffer_dup, uint, 64, 2); extern ARRAY(buffer_dup, poly, 8, 16); extern ARRAY(buffer_dup, poly, 16, 8); extern ARRAY(buffer_dup, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern ARRAY(buffer_dup, float, 16, 8); #endif @@ -288,7 +288,7 @@ extern VECT_ARRAY(buffer_vld2, uint, 64, 1, 2); extern VECT_ARRAY(buffer_vld2, poly, 8, 8, 2); extern VECT_ARRAY(buffer_vld2, poly, 16, 4, 2); extern VECT_ARRAY(buffer_vld2, float, 32, 2, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern VECT_ARRAY(buffer_vld2, float, 16, 4, 2); #endif extern VECT_ARRAY(buffer_vld2, int, 8, 16, 2); @@ -302,7 +302,7 @@ extern VECT_ARRAY(buffer_vld2, uint, 64, 2, 2); extern VECT_ARRAY(buffer_vld2, poly, 8, 16, 2); extern VECT_ARRAY(buffer_vld2, poly, 16, 8, 2); extern VECT_ARRAY(buffer_vld2, float, 32, 4, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern VECT_ARRAY(buffer_vld2, float, 16, 8, 2); #endif @@ -318,7 +318,7 @@ extern VECT_ARRAY(buffer_vld3, uint, 64, 1, 3); extern VECT_ARRAY(buffer_vld3, poly, 8, 8, 3); extern VECT_ARRAY(buffer_vld3, poly, 16, 4, 3); extern VECT_ARRAY(buffer_vld3, float, 32, 2, 3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern VECT_ARRAY(buffer_vld3, float, 16, 4, 3); #endif extern VECT_ARRAY(buffer_vld3, int, 8, 16, 3); @@ -332,7 +332,7 @@ extern VECT_ARRAY(buffer_vld3, uint, 64, 2, 3); extern VECT_ARRAY(buffer_vld3, poly, 8, 16, 3); extern VECT_ARRAY(buffer_vld3, poly, 16, 8, 3); extern VECT_ARRAY(buffer_vld3, float, 32, 4, 3); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern VECT_ARRAY(buffer_vld3, float, 16, 8, 3); #endif @@ -348,7 +348,7 @@ extern VECT_ARRAY(buffer_vld4, uint, 64, 1, 4); extern VECT_ARRAY(buffer_vld4, poly, 8, 8, 4); extern VECT_ARRAY(buffer_vld4, poly, 16, 4, 4); extern VECT_ARRAY(buffer_vld4, float, 32, 2, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern VECT_ARRAY(buffer_vld4, float, 16, 4, 4); #endif extern VECT_ARRAY(buffer_vld4, int, 8, 16, 4); @@ -362,7 +362,7 @@ extern VECT_ARRAY(buffer_vld4, uint, 64, 2, 4); extern VECT_ARRAY(buffer_vld4, poly, 8, 16, 4); extern VECT_ARRAY(buffer_vld4, poly, 16, 8, 4); extern VECT_ARRAY(buffer_vld4, float, 32, 4, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern VECT_ARRAY(buffer_vld4, float, 16, 8, 4); #endif @@ -378,7 +378,7 @@ extern VECT_VAR_DECL(buffer_vld2_lane, uint, 64, 2)[2]; extern VECT_VAR_DECL(buffer_vld2_lane, poly, 8, 2)[2]; extern VECT_VAR_DECL(buffer_vld2_lane, poly, 16, 2)[2]; extern VECT_VAR_DECL(buffer_vld2_lane, float, 32, 2)[2]; -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2)[2]; #endif @@ -394,7 +394,7 @@ extern VECT_VAR_DECL(buffer_vld3_lane, uint, 64, 3)[3]; extern VECT_VAR_DECL(buffer_vld3_lane, poly, 8, 3)[3]; extern VECT_VAR_DECL(buffer_vld3_lane, poly, 16, 3)[3]; extern VECT_VAR_DECL(buffer_vld3_lane, float, 32, 3)[3]; -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3)[3]; #endif @@ -410,7 +410,7 @@ extern VECT_VAR_DECL(buffer_vld4_lane, uint, 64, 4)[4]; extern VECT_VAR_DECL(buffer_vld4_lane, poly, 8, 4)[4]; extern VECT_VAR_DECL(buffer_vld4_lane, poly, 16, 4)[4]; extern VECT_VAR_DECL(buffer_vld4_lane, float, 32, 4)[4]; -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) extern VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4)[4]; #endif @@ -426,7 +426,7 @@ static ARRAY(result, uint, 64, 1); static ARRAY(result, poly, 8, 8); static ARRAY(result, poly, 16, 4); static ARRAY(result, float, 32, 2); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) static ARRAY(result, float, 16, 4); #endif static ARRAY(result, int, 8, 16); @@ -440,7 +440,7 @@ static ARRAY(result, uint, 64, 2); static ARRAY(result, poly, 8, 16); static ARRAY(result, poly, 16, 8); static ARRAY(result, float, 32, 4); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) static ARRAY(result, float, 16, 8); #endif @@ -463,7 +463,7 @@ static void dump_results (char *test_name) DUMP_POLY(test_name, poly, 8, 8, PRIu8); DUMP_POLY(test_name, poly, 16, 4, PRIu16); DUMP_FP(test_name, float, 32, 2, PRIx32); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DUMP_FP16(test_name, float, 16, 4, PRIu16); #endif @@ -478,7 +478,7 @@ static void dump_results (char *test_name) DUMP_POLY(test_name, poly, 8, 16, PRIu8); DUMP_POLY(test_name, poly, 16, 8, PRIu16); DUMP_FP(test_name, float, 32, 4, PRIx32); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DUMP_FP16(test_name, float, 16, 8, PRIu16); #endif } @@ -502,7 +502,7 @@ static void dump_results_hex2 (const char *test_name, const char* comment) DUMP_POLY(test_name, poly, 8, 8, PRIx8); DUMP_POLY(test_name, poly, 16, 4, PRIx16); DUMP_FP(test_name, float, 32, 2, PRIx32); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DUMP_FP16(test_name, float, 16, 4, PRIx16); #endif @@ -517,7 +517,7 @@ static void dump_results_hex2 (const char *test_name, const char* comment) DUMP_POLY(test_name, poly, 8, 16, PRIx8); DUMP_POLY(test_name, poly, 16, 8, PRIx16); DUMP_FP(test_name, float, 32, 4, PRIx32); -#if defined(__ARM_FP16_FORMAT_IEEE) +#if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) DUMP_FP16(test_name, float, 16, 8, PRIx16); #endif } |