From 71959ce8f47f676a26bb21da7117101d9d73867e Mon Sep 17 00:00:00 2001 From: Yunlian Jiang Date: Tue, 24 Apr 2018 22:15:37 -0700 Subject: gcc: backport patch to support vrnd<*>_f64 aarch64 intrinsics. This backports upstream patch to add support of vrdn<*>_f64 intrinisics for aarch64. This is needed because glibc 2.27 uses some of them. Author: mshawcroft Date: Tue Apr 22 08:39:48 2014 +0000 [AArch64] vrnd<*>_f64 patch This patch adds vrnd<*>_f64 aarch64 intrinsics. A testcase for those intrinsics is added. Run a complete LE and BE regression run with no regressions. BUG=chromium:834385 TEST=./setup_board --board kevin --nousepkg cross-aarch64-cros-linux-gnu-glibc builds for glibc 2.27 Change-Id: Iaf9508a433e35d3304e44ac4b6c93071ff6c5ec0 --- gcc-4.9/gcc/config/aarch64/aarch64-builtins.c | 2 + .../gcc/config/aarch64/aarch64-simd-builtins.def | 2 +- gcc-4.9/gcc/config/aarch64/aarch64-simd.md | 2 +- gcc-4.9/gcc/config/aarch64/aarch64.md | 2 +- gcc-4.9/gcc/config/aarch64/arm_neon.h | 43 ++++++++++++++++++++++ 5 files changed, 48 insertions(+), 3 deletions(-) (limited to 'gcc-4.9/gcc/config') diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c index a5af874bf..eea05cdb2 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c @@ -311,6 +311,8 @@ aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) #define BUILTIN_VDQF(T, N, MAP) \ VAR3 (T, N, MAP, v2sf, v4sf, v2df) +#define BUILTIN_VDQF_DF(T, N, MAP) \ + VAR4 (T, N, MAP, v2sf, v4sf, v2df, df) #define BUILTIN_VDQH(T, N, MAP) \ VAR2 (T, N, MAP, v4hi, v8hi) #define BUILTIN_VDQHS(T, N, MAP) \ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def index c9b7570e5..c5e3b3e9f 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def @@ -265,7 +265,7 @@ BUILTIN_VDQF (UNOP, nearbyint, 2) BUILTIN_VDQF (UNOP, rint, 2) BUILTIN_VDQF (UNOP, round, 2) - BUILTIN_VDQF (UNOP, frintn, 2) + BUILTIN_VDQF_DF (UNOP, frintn, 2) /* Implemented by l2. */ VAR1 (UNOP, lbtruncv2sf, 2, v2si) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md index 7626ed31f..9ccf484c7 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md @@ -1576,7 +1576,7 @@ ) ;; Vector versions of the floating-point frint patterns. -;; Expands to btrunc, ceil, floor, nearbyint, rint, round. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. (define_insn "2" [(set (match_operand:VDQF 0 "register_operand" "=w") (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md index fe68bfea1..cab3dfc0f 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64.md @@ -3229,7 +3229,7 @@ ;; ------------------------------------------------------------------- ;; frint floating-point round to integral standard patterns. -;; Expands to btrunc, ceil, floor, nearbyint, rint, round. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. (define_insn "2" [(set (match_operand:GPF 0 "register_operand" "=w") diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h index ae0ae9c1b..03addc955 100644 --- a/gcc-4.9/gcc/config/aarch64/arm_neon.h +++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h @@ -22469,6 +22469,12 @@ vrnd_f32 (float32x2_t __a) return __builtin_aarch64_btruncv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrnd_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndq_f32 (float32x4_t __a) { @@ -22489,6 +22495,12 @@ vrnda_f32 (float32x2_t __a) return __builtin_aarch64_roundv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrnda_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndaq_f32 (float32x4_t __a) { @@ -22509,6 +22521,12 @@ vrndi_f32 (float32x2_t __a) return __builtin_aarch64_nearbyintv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndi_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndiq_f32 (float32x4_t __a) { @@ -22529,6 +22547,12 @@ vrndm_f32 (float32x2_t __a) return __builtin_aarch64_floorv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndm_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndmq_f32 (float32x4_t __a) { @@ -22548,6 +22572,13 @@ vrndn_f32 (float32x2_t __a) { return __builtin_aarch64_frintnv2sf (__a); } + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndn_f64 (float64x1_t __a) +{ + return __builtin_aarch64_frintndf (__a); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndnq_f32 (float32x4_t __a) { @@ -22568,6 +22599,12 @@ vrndp_f32 (float32x2_t __a) return __builtin_aarch64_ceilv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndp_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndpq_f32 (float32x4_t __a) { @@ -22588,6 +22625,12 @@ vrndx_f32 (float32x2_t __a) return __builtin_aarch64_rintv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndx_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndxq_f32 (float32x4_t __a) { -- cgit v1.2.3