diff options
Diffstat (limited to 'gcc-4.8/gcc/config/aarch64')
-rw-r--r-- | gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def | 4 | ||||
-rw-r--r-- | gcc-4.8/gcc/config/aarch64/aarch64-simd.md | 185 | ||||
-rw-r--r-- | gcc-4.8/gcc/config/aarch64/aarch64.c | 9 | ||||
-rw-r--r-- | gcc-4.8/gcc/config/aarch64/aarch64.h | 6 | ||||
-rw-r--r-- | gcc-4.8/gcc/config/aarch64/aarch64.md | 2 | ||||
-rw-r--r-- | gcc-4.8/gcc/config/aarch64/arm_neon.h | 68 | ||||
-rw-r--r-- | gcc-4.8/gcc/config/aarch64/iterators.md | 61 | ||||
-rw-r--r-- | gcc-4.8/gcc/config/aarch64/predicates.md | 5 |
8 files changed, 247 insertions, 93 deletions
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def index 955da265a..ed73c15d7 100644 --- a/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def @@ -217,8 +217,8 @@ BUILTIN_VSDQ_I_DI (BINOP, cmle) BUILTIN_VSDQ_I_DI (BINOP, cmlt) /* Implemented by aarch64_cm<cmp><mode>. */ - BUILTIN_VSDQ_I_DI (BINOP, cmhs) - BUILTIN_VSDQ_I_DI (BINOP, cmhi) + BUILTIN_VSDQ_I_DI (BINOP, cmgeu) + BUILTIN_VSDQ_I_DI (BINOP, cmgtu) BUILTIN_VSDQ_I_DI (BINOP, cmtst) /* Implemented by aarch64_<fmaxmin><mode>. */ diff --git a/gcc-4.8/gcc/config/aarch64/aarch64-simd.md b/gcc-4.8/gcc/config/aarch64/aarch64-simd.md index 00f3c3121..481222cf5 100644 --- a/gcc-4.8/gcc/config/aarch64/aarch64-simd.md +++ b/gcc-4.8/gcc/config/aarch64/aarch64-simd.md @@ -21,7 +21,7 @@ ; Main data types used by the insntructions -(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,HI,QI" +(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI" (const_string "unknown")) @@ -1548,12 +1548,12 @@ case LTU: case GEU: - emit_insn (gen_aarch64_cmhs<mode> (mask, operands[4], operands[5])); + emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5])); break; case LEU: case GTU: - emit_insn (gen_aarch64_cmhi<mode> (mask, operands[4], operands[5])); + emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5])); break; case NE: @@ -3034,48 +3034,181 @@ ) -;; cm(eq|ge|le|lt|gt) +;; cm(eq|ge|gt|lt|le) +;; Note, we have constraints for Dz and Z as different expanders +;; have different ideas of what should be passed to this pattern. -(define_insn "aarch64_cm<cmp><mode>" +(define_insn "aarch64_cm<optab><mode>" [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") - (unspec:<V_cmp_result> - [(match_operand:VSDQ_I_DI 1 "register_operand" "w,w") - (match_operand:VSDQ_I_DI 2 "aarch64_simd_reg_or_zero" "w,Z")] - VCMP_S))] + (neg:<V_cmp_result> + (COMPARISONS:<V_cmp_result> + (match_operand:VDQ 1 "register_operand" "w,w") + (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz") + )))] "TARGET_SIMD" "@ - cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype> - cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0" + cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> + cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0" [(set_attr "simd_type" "simd_cmp") (set_attr "simd_mode" "<MODE>")] ) -;; cm(hs|hi|tst) +(define_insn_and_split "aarch64_cm<optab>di" + [(set (match_operand:DI 0 "register_operand" "=w,w,r") + (neg:DI + (COMPARISONS:DI + (match_operand:DI 1 "register_operand" "w,w,r") + (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") + )))] + "TARGET_SIMD" + "@ + cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> + cm<optab>\t%d0, %d1, #0 + #" + "reload_completed + /* We need to prevent the split from + happening in the 'w' constraint cases. */ + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_dup 1) + (match_dup 2))) + (set (match_dup 0) + (neg:DI + (COMPARISONS:DI + (match_operand 3 "cc_register" "") + (const_int 0))))] + { + enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); + rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); + rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + [(set_attr "simd_type" "simd_cmp") + (set_attr "simd_mode" "DI")] +) + +;; cm(hs|hi) -(define_insn "aarch64_cm<cmp><mode>" +(define_insn "aarch64_cm<optab><mode>" [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") - (unspec:<V_cmp_result> - [(match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:VSDQ_I_DI 2 "register_operand" "w")] - VCMP_U))] + (neg:<V_cmp_result> + (UCOMPARISONS:<V_cmp_result> + (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w") + )))] "TARGET_SIMD" - "cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" + "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" [(set_attr "simd_type" "simd_cmp") (set_attr "simd_mode" "<MODE>")] ) -;; fcm(eq|ge|le|lt|gt) +(define_insn_and_split "aarch64_cm<optab>di" + [(set (match_operand:DI 0 "register_operand" "=w,r") + (neg:DI + (UCOMPARISONS:DI + (match_operand:DI 1 "register_operand" "w,r") + (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") + )))] + "TARGET_SIMD" + "@ + cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> + #" + "reload_completed + /* We need to prevent the split from + happening in the 'w' constraint cases. */ + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_dup 1) + (match_dup 2))) + (set (match_dup 0) + (neg:DI + (UCOMPARISONS:DI + (match_operand 3 "cc_register" "") + (const_int 0))))] + { + enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); + rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); + rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + [(set_attr "simd_type" "simd_cmp") + (set_attr "simd_mode" "DI")] +) + +;; cmtst + +(define_insn "aarch64_cmtst<mode>" + [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") + (neg:<V_cmp_result> + (ne:<V_cmp_result> + (and:VDQ + (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")) + (vec_duplicate:<V_cmp_result> (const_int 0)))))] + "TARGET_SIMD" + "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" + [(set_attr "simd_type" "simd_cmp") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn_and_split "aarch64_cmtstdi" + [(set (match_operand:DI 0 "register_operand" "=w,r") + (neg:DI + (ne:DI + (and:DI + (match_operand:DI 1 "register_operand" "w,r") + (match_operand:DI 2 "register_operand" "w,r")) + (const_int 0))))] + "TARGET_SIMD" + "@ + cmtst\t%d0, %d1, %d2 + #" + "reload_completed + /* We need to prevent the split from + happening in the 'w' constraint cases. */ + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:DI (match_dup 1) + (match_dup 2)) + (const_int 0))) + (set (match_dup 0) + (neg:DI + (ne:DI + (match_operand 3 "cc_register" "") + (const_int 0))))] + { + rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); + enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); + rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); + rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + [(set_attr "simd_type" "simd_cmp") + (set_attr "simd_mode" "DI")] +) + +;; fcm(eq|ge|gt|le|lt) -(define_insn "aarch64_cm<cmp><mode>" +(define_insn "aarch64_cm<optab><mode>" [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") - (unspec:<V_cmp_result> - [(match_operand:VDQF 1 "register_operand" "w,w") - (match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")] - VCMP_S))] + (neg:<V_cmp_result> + (COMPARISONS:<V_cmp_result> + (match_operand:VALLF 1 "register_operand" "w,w") + (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz") + )))] "TARGET_SIMD" "@ - fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype> - fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" + fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> + fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" [(set_attr "simd_type" "simd_fcmp") (set_attr "simd_mode" "<MODE>")] ) diff --git a/gcc-4.8/gcc/config/aarch64/aarch64.c b/gcc-4.8/gcc/config/aarch64/aarch64.c index 4d5166618..0d4b26515 100644 --- a/gcc-4.8/gcc/config/aarch64/aarch64.c +++ b/gcc-4.8/gcc/config/aarch64/aarch64.c @@ -4568,9 +4568,11 @@ aarch64_address_cost (rtx x ATTRIBUTE_UNUSED, } static int -aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, - reg_class_t from, reg_class_t to) +aarch64_register_move_cost (enum machine_mode mode, + reg_class_t from_i, reg_class_t to_i) { + enum reg_class from = (enum reg_class) from_i; + enum reg_class to = (enum reg_class) to_i; const struct cpu_regmove_cost *regmove_cost = aarch64_tune_params->regmove_cost; @@ -4586,8 +4588,7 @@ aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, secondary reload. A general register is used as a scratch to move the upper DI value and the lower DI value is moved directly, hence the cost is the sum of three moves. */ - - if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128) + if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128) return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP; return regmove_cost->FP2FP; diff --git a/gcc-4.8/gcc/config/aarch64/aarch64.h b/gcc-4.8/gcc/config/aarch64/aarch64.h index c3efd2a88..19ac5ebc4 100644 --- a/gcc-4.8/gcc/config/aarch64/aarch64.h +++ b/gcc-4.8/gcc/config/aarch64/aarch64.h @@ -73,9 +73,9 @@ #define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN) /* AdvSIMD is supported in the default configuration, unless disabled by - -mgeneral-regs-only. */ -#define TARGET_SIMD !TARGET_GENERAL_REGS_ONLY -#define TARGET_FLOAT !TARGET_GENERAL_REGS_ONLY + -mgeneral-regs-only or the +nosimd extension. */ +#define TARGET_SIMD (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SIMD) +#define TARGET_FLOAT (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_FP) #define UNITS_PER_WORD 8 diff --git a/gcc-4.8/gcc/config/aarch64/aarch64.md b/gcc-4.8/gcc/config/aarch64/aarch64.md index 04a5e01f9..c5cfedb8f 100644 --- a/gcc-4.8/gcc/config/aarch64/aarch64.md +++ b/gcc-4.8/gcc/config/aarch64/aarch64.md @@ -2211,7 +2211,7 @@ (set_attr "mode" "SI")] ) -(define_insn "*cstore<mode>_neg" +(define_insn "cstore<mode>_neg" [(set (match_operand:ALLI 0 "register_operand" "=r") (neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)])))] diff --git a/gcc-4.8/gcc/config/aarch64/arm_neon.h b/gcc-4.8/gcc/config/aarch64/arm_neon.h index b083eb4e6..73c7e7d63 100644 --- a/gcc-4.8/gcc/config/aarch64/arm_neon.h +++ b/gcc-4.8/gcc/config/aarch64/arm_neon.h @@ -19551,28 +19551,28 @@ vcge_s64 (int64x1_t __a, int64x1_t __b) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vcge_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a, + return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vcge_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a, + return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vcge_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a, + return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcge_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a, + return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a, (int64x1_t) __b); } @@ -19603,28 +19603,28 @@ vcgeq_s64 (int64x2_t __a, int64x2_t __b) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a, + return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a, + return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a, + return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vcgeq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a, + return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, (int64x2_t) __b); } @@ -19637,7 +19637,7 @@ vcged_s64 (int64x1_t __a, int64x1_t __b) __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcged_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a, + return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a, (int64x1_t) __b); } @@ -19676,28 +19676,28 @@ vcgt_s64 (int64x1_t __a, int64x1_t __b) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vcgt_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a, + return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, (int8x8_t) __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vcgt_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a, + return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, (int16x4_t) __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vcgt_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a, + return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, (int32x2_t) __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcgt_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a, + return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a, (int64x1_t) __b); } @@ -19728,28 +19728,28 @@ vcgtq_s64 (int64x2_t __a, int64x2_t __b) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a, + return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, (int8x16_t) __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a, + return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, (int16x8_t) __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a, + return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, (int32x4_t) __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vcgtq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a, + return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, (int64x2_t) __b); } @@ -19762,7 +19762,7 @@ vcgtd_s64 (int64x1_t __a, int64x1_t __b) __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcgtd_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a, + return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a, (int64x1_t) __b); } @@ -19801,28 +19801,28 @@ vcle_s64 (int64x1_t __a, int64x1_t __b) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vcle_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b, + return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b, (int8x8_t) __a); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vcle_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b, + return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b, (int16x4_t) __a); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vcle_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b, + return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b, (int32x2_t) __a); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcle_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b, + return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __b, (int64x1_t) __a); } @@ -19853,28 +19853,28 @@ vcleq_s64 (int64x2_t __a, int64x2_t __b) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcleq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b, + return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b, (int8x16_t) __a); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vcleq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b, + return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b, (int16x8_t) __a); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vcleq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b, + return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b, (int32x4_t) __a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vcleq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b, + return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b, (int64x2_t) __a); } @@ -19919,28 +19919,28 @@ vclt_s64 (int64x1_t __a, int64x1_t __b) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vclt_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b, + return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b, (int8x8_t) __a); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vclt_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b, + return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b, (int16x4_t) __a); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vclt_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b, + return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b, (int32x2_t) __a); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vclt_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b, + return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __b, (int64x1_t) __a); } @@ -19971,28 +19971,28 @@ vcltq_s64 (int64x2_t __a, int64x2_t __b) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcltq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b, + return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b, (int8x16_t) __a); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vcltq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b, + return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b, (int16x8_t) __a); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vcltq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b, + return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b, (int32x4_t) __a); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vcltq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b, + return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b, (int64x2_t) __a); } diff --git a/gcc-4.8/gcc/config/aarch64/iterators.md b/gcc-4.8/gcc/config/aarch64/iterators.md index ce81ac5ce..d19b26a64 100644 --- a/gcc-4.8/gcc/config/aarch64/iterators.md +++ b/gcc-4.8/gcc/config/aarch64/iterators.md @@ -83,6 +83,9 @@ ;; Vector Float modes. (define_mode_iterator VDQF [V2SF V4SF V2DF]) +;; All Float modes. +(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF]) + ;; Vector Float modes with 2 elements. (define_mode_iterator V2F [V2SF V2DF]) @@ -213,13 +216,6 @@ UNSPEC_URSHL ; Used in aarch64-simd.md. UNSPEC_SQRSHL ; Used in aarch64-simd.md. UNSPEC_UQRSHL ; Used in aarch64-simd.md. - UNSPEC_CMEQ ; Used in aarch64-simd.md. - UNSPEC_CMLE ; Used in aarch64-simd.md. - UNSPEC_CMLT ; Used in aarch64-simd.md. - UNSPEC_CMGE ; Used in aarch64-simd.md. - UNSPEC_CMGT ; Used in aarch64-simd.md. - UNSPEC_CMHS ; Used in aarch64-simd.md. - UNSPEC_CMHI ; Used in aarch64-simd.md. UNSPEC_SSLI ; Used in aarch64-simd.md. UNSPEC_USLI ; Used in aarch64-simd.md. UNSPEC_SSRI ; Used in aarch64-simd.md. @@ -227,7 +223,6 @@ UNSPEC_SSHLL ; Used in aarch64-simd.md. UNSPEC_USHLL ; Used in aarch64-simd.md. UNSPEC_ADDP ; Used in aarch64-simd.md. - UNSPEC_CMTST ; Used in aarch64-simd.md. UNSPEC_FMAX ; Used in aarch64-simd.md. UNSPEC_FMIN ; Used in aarch64-simd.md. UNSPEC_BSL ; Used in aarch64-simd.md. @@ -251,6 +246,7 @@ ;; For scalar usage of vector/FP registers (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d") + (SF "s") (DF "d") (V8QI "") (V16QI "") (V4HI "") (V8HI "") (V2SI "") (V4SI "") @@ -305,7 +301,8 @@ (V4SF ".4s") (V2DF ".2d") (DI "") (SI "") (HI "") (QI "") - (TI "")]) + (TI "") (SF "") + (DF "")]) ;; Register suffix narrowed modes for VQN. (define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h") @@ -444,7 +441,8 @@ (V2SI "V2SI") (V4SI "V4SI") (DI "DI") (V2DI "V2DI") (V2SF "V2SI") (V4SF "V4SI") - (V2DF "V2DI")]) + (V2DF "V2DI") (DF "DI") + (SF "SI")]) ;; Lower case mode of results of comparison operations. (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi") @@ -452,7 +450,8 @@ (V2SI "v2si") (V4SI "v4si") (DI "di") (V2DI "v2di") (V2SF "v2si") (V4SF "v4si") - (V2DF "v2di")]) + (V2DF "v2di") (DF "di") + (SF "si")]) ;; Vm for lane instructions is restricted to FP_LO_REGS. (define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x") @@ -543,6 +542,12 @@ ;; Code iterator for signed variants of vector saturating binary ops. (define_code_iterator SBINQOPS [ss_plus ss_minus]) +;; Comparison operators for <F>CM. +(define_code_iterator COMPARISONS [lt le eq ge gt]) + +;; Unsigned comparison operators. +(define_code_iterator UCOMPARISONS [ltu leu geu gtu]) + ;; ------------------------------------------------------------------- ;; Code Attributes ;; ------------------------------------------------------------------- @@ -571,7 +576,28 @@ (eq "eq") (ne "ne") (lt "lt") - (ge "ge")]) + (ge "ge") + (le "le") + (gt "gt") + (ltu "ltu") + (leu "leu") + (geu "geu") + (gtu "gtu")]) + +;; For comparison operators we use the FCM* and CM* instructions. +;; As there are no CMLE or CMLT instructions which act on 3 vector +;; operands, we must use CMGE or CMGT and swap the order of the +;; source operands. + +(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt") + (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")]) +(define_code_attr cmp_1 [(lt "2") (le "2") (eq "1") (ge "1") (gt "1") + (ltu "2") (leu "2") (geu "1") (gtu "1")]) +(define_code_attr cmp_2 [(lt "1") (le "1") (eq "2") (ge "2") (gt "2") + (ltu "1") (leu "1") (geu "2") (gtu "2")]) + +(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT") + (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")]) ;; Optab prefix for sign/zero-extending operations (define_code_attr su_optab [(sign_extend "") (zero_extend "u") @@ -680,11 +706,6 @@ UNSPEC_SQSHRN UNSPEC_UQSHRN UNSPEC_SQRSHRN UNSPEC_UQRSHRN]) -(define_int_iterator VCMP_S [UNSPEC_CMEQ UNSPEC_CMGE UNSPEC_CMGT - UNSPEC_CMLE UNSPEC_CMLT]) - -(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST]) - (define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 UNSPEC_TRN1 UNSPEC_TRN2 UNSPEC_UZP1 UNSPEC_UZP2]) @@ -768,12 +789,6 @@ (UNSPEC_RADDHN2 "add") (UNSPEC_RSUBHN2 "sub")]) -(define_int_attr cmp [(UNSPEC_CMGE "ge") (UNSPEC_CMGT "gt") - (UNSPEC_CMLE "le") (UNSPEC_CMLT "lt") - (UNSPEC_CMEQ "eq") - (UNSPEC_CMHS "hs") (UNSPEC_CMHI "hi") - (UNSPEC_CMTST "tst")]) - (define_int_attr offsetlr [(UNSPEC_SSLI "1") (UNSPEC_USLI "1") (UNSPEC_SSRI "0") (UNSPEC_USRI "0")]) diff --git a/gcc-4.8/gcc/config/aarch64/predicates.md b/gcc-4.8/gcc/config/aarch64/predicates.md index 8f80b2028..8514e8f8f 100644 --- a/gcc-4.8/gcc/config/aarch64/predicates.md +++ b/gcc-4.8/gcc/config/aarch64/predicates.md @@ -31,6 +31,11 @@ (ior (match_operand 0 "register_operand") (match_test "op == const0_rtx")))) +(define_predicate "aarch64_reg_or_fp_zero" + (and (match_code "reg,subreg,const_double") + (ior (match_operand 0 "register_operand") + (match_test "aarch64_float_const_zero_rtx_p (op)")))) + (define_predicate "aarch64_reg_zero_or_m1_or_1" (and (match_code "reg,subreg,const_int") (ior (match_operand 0 "register_operand") |