From e3cc64dec20832769406aa38cde83c7dd4194bf4 Mon Sep 17 00:00:00 2001 From: Ben Cheng Date: Tue, 22 Apr 2014 13:33:12 -0700 Subject: [4.9] GCC 4.9.0 official release refresh Change-Id: Ic99a7da8b44b789a48aeec93b33e93944d6e6767 --- gcc-4.9/gcc/config/i386/avx512fintrin.h | 83 +++++++++++++ gcc-4.9/gcc/config/i386/bmiintrin.h | 48 +++++++- gcc-4.9/gcc/config/i386/constraints.md | 7 ++ gcc-4.9/gcc/config/i386/i386.c | 6 +- gcc-4.9/gcc/config/i386/i386.md | 176 +++++++++++++++++++++++---- gcc-4.9/gcc/config/i386/predicates.md | 14 +++ gcc-4.9/gcc/config/i386/sse.md | 207 ++++++++++++++++++++++++-------- 7 files changed, 459 insertions(+), 82 deletions(-) (limited to 'gcc-4.9/gcc/config/i386') diff --git a/gcc-4.9/gcc/config/i386/avx512fintrin.h b/gcc-4.9/gcc/config/i386/avx512fintrin.h index 960286618..314895ad7 100644 --- a/gcc-4.9/gcc/config/i386/avx512fintrin.h +++ b/gcc-4.9/gcc/config/i386/avx512fintrin.h @@ -39,6 +39,8 @@ typedef double __v8df __attribute__ ((__vector_size__ (64))); typedef float __v16sf __attribute__ ((__vector_size__ (64))); typedef long long __v8di __attribute__ ((__vector_size__ (64))); typedef int __v16si __attribute__ ((__vector_size__ (64))); +typedef short __v32hi __attribute__ ((__vector_size__ (64))); +typedef char __v64qi __attribute__ ((__vector_size__ (64))); /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ @@ -130,6 +132,32 @@ _mm512_undefined_si512 (void) return __Y; } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set1_epi8 (char __A) +{ + return __extension__ (__m512i)(__v64qi) + { __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A }; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set1_epi16 (short __A) +{ + return __extension__ (__m512i)(__v32hi) + { __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A }; +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_set1_pd (double __A) @@ -152,6 +180,54 @@ _mm512_set1_ps (float __A) (__mmask16) -1); } +/* Create the vector [A B C D A B C D A B C D A B C D]. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set4_epi32 (int __A, int __B, int __C, int __D) +{ + return __extension__ (__m512i)(__v16si) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set4_epi64 (long long __A, long long __B, long long __C, + long long __D) +{ + return __extension__ (__m512i) (__v8di) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set4_pd (double __A, double __B, double __C, double __D) +{ + return __extension__ (__m512d) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set4_ps (float __A, float __B, float __C, float __D) +{ + return __extension__ (__m512) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +#define _mm512_setr4_epi64(e0,e1,e2,e3) \ + _mm512_set4_epi64(e3,e2,e1,e0) + +#define _mm512_setr4_epi32(e0,e1,e2,e3) \ + _mm512_set4_epi32(e3,e2,e1,e0) + +#define _mm512_setr4_pd(e0,e1,e2,e3) \ + _mm512_set4_pd(e3,e2,e1,e0) + +#define _mm512_setr4_ps(e0,e1,e2,e3) \ + _mm512_set4_ps(e3,e2,e1,e0) + extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_setzero_ps (void) @@ -167,6 +243,13 @@ _mm512_setzero_pd (void) return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_setzero_epi32 (void) +{ + return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_setzero_si512 (void) diff --git a/gcc-4.9/gcc/config/i386/bmiintrin.h b/gcc-4.9/gcc/config/i386/bmiintrin.h index b86adf179..b2d7c60ea 100644 --- a/gcc-4.9/gcc/config/i386/bmiintrin.h +++ b/gcc-4.9/gcc/config/i386/bmiintrin.h @@ -40,7 +40,6 @@ __tzcnt_u16 (unsigned short __X) return __builtin_ctzs (__X); } - extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __andn_u32 (unsigned int __X, unsigned int __Y) { @@ -65,18 +64,35 @@ __blsi_u32 (unsigned int __X) return __X & -__X; } +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsi_u32 (unsigned int __X) +{ + return __blsi_u32 (__X); +} + extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsmsk_u32 (unsigned int __X) { return __X ^ (__X - 1); } +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsmsk_u32 (unsigned int __X) +{ + return __blsmsk_u32 (__X); +} + extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsr_u32 (unsigned int __X) { return __X & (__X - 1); } +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsr_u32 (unsigned int __X) +{ + return __blsr_u32 (__X); +} extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u32 (unsigned int __X) @@ -84,6 +100,12 @@ __tzcnt_u32 (unsigned int __X) return __builtin_ctz (__X); } +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tzcnt_u32 (unsigned int __X) +{ + return __builtin_ctz (__X); +} + #ifdef __x86_64__ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -110,24 +132,48 @@ __blsi_u64 (unsigned long long __X) return __X & -__X; } +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsi_u64 (unsigned long long __X) +{ + return __blsi_u64 (__X); +} + extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsmsk_u64 (unsigned long long __X) { return __X ^ (__X - 1); } +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsmsk_u64 (unsigned long long __X) +{ + return __blsmsk_u64 (__X); +} + extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsr_u64 (unsigned long long __X) { return __X & (__X - 1); } +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsr_u64 (unsigned long long __X) +{ + return __blsr_u64 (__X); +} + extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u64 (unsigned long long __X) { return __builtin_ctzll (__X); } +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tzcnt_u64 (unsigned long long __X) +{ + return __builtin_ctzll (__X); +} + #endif /* __x86_64__ */ #ifdef __DISABLE_BMI__ diff --git a/gcc-4.9/gcc/config/i386/constraints.md b/gcc-4.9/gcc/config/i386/constraints.md index 65335f128..567e70564 100644 --- a/gcc-4.9/gcc/config/i386/constraints.md +++ b/gcc-4.9/gcc/config/i386/constraints.md @@ -220,6 +220,13 @@ ;; We use W prefix to denote any number of ;; constant-or-symbol-reference constraints +(define_constraint "We" + "32-bit signed integer constant, or a symbolic reference known + to fit that range (for sign-extending conversion operations that + require non-VOIDmode immediate operands)." + (and (match_operand 0 "x86_64_immediate_operand") + (match_test "GET_MODE (op) != VOIDmode"))) + (define_constraint "Wz" "32-bit unsigned integer constant, or a symbolic reference known to fit that range (for zero-extending conversion operations that diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c index 842be686d..3eefe4ac5 100644 --- a/gcc-4.9/gcc/config/i386/i386.c +++ b/gcc-4.9/gcc/config/i386/i386.c @@ -13925,13 +13925,13 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (GET_CODE (XEXP (x, 0)) == MULT) { changed = 1; - XEXP (x, 0) = force_operand (XEXP (x, 0), 0); + XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); } if (GET_CODE (XEXP (x, 1)) == MULT) { changed = 1; - XEXP (x, 1) = force_operand (XEXP (x, 1), 0); + XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); } if (changed @@ -22755,7 +22755,7 @@ counter_mode (rtx count_exp) static rtx ix86_copy_addr_to_reg (rtx addr) { - if (GET_MODE (addr) == Pmode) + if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode) return copy_addr_to_reg (addr); else { diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md index 4a8b46388..25e2e93e3 100644 --- a/gcc-4.9/gcc/config/i386/i386.md +++ b/gcc-4.9/gcc/config/i386/i386.md @@ -971,6 +971,15 @@ (DI "x86_64_general_operand") (TI "x86_64_general_operand")]) +;; General sign extend operand predicate for integer modes, +;; which disallows VOIDmode operands and thus it is suitable +;; for use inside sign_extend. +(define_mode_attr general_sext_operand + [(QI "sext_operand") + (HI "sext_operand") + (SI "x86_64_sext_operand") + (DI "x86_64_sext_operand")]) + ;; General sign/zero extend operand predicate for integer modes. (define_mode_attr general_szext_operand [(QI "general_operand") @@ -4730,8 +4739,13 @@ && X87_ENABLE_FLOAT (mode, mode)") (eq_attr "alternative" "1") + /* ??? For sched1 we need constrain_operands to be able to + select an alternative. Leave this enabled before RA. */ (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS - || optimize_function_for_size_p (cfun)") + || optimize_function_for_size_p (cfun) + || !(reload_completed + || reload_in_progress + || lra_in_progress)") ] (symbol_ref "true"))) ]) @@ -5821,10 +5835,11 @@ (eq:CCO (plus: (sign_extend: (match_operand:SWI 1 "nonimmediate_operand")) - (sign_extend: - (match_operand:SWI 2 ""))) + (match_dup 4)) (sign_extend: - (plus:SWI (match_dup 1) (match_dup 2))))) + (plus:SWI (match_dup 1) + (match_operand:SWI 2 + ""))))) (set (match_operand:SWI 0 "register_operand") (plus:SWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else @@ -5832,7 +5847,13 @@ (label_ref (match_operand 3)) (pc)))] "" - "ix86_fixup_binary_operands_no_copy (PLUS, mode, operands);") +{ + ix86_fixup_binary_operands_no_copy (PLUS, mode, operands); + if (CONST_INT_P (operands[2])) + operands[4] = operands[2]; + else + operands[4] = gen_rtx_SIGN_EXTEND (mode, operands[2]); +}) (define_insn "*addv4" [(set (reg:CCO FLAGS_REG) @@ -5840,7 +5861,8 @@ (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) (sign_extend: - (match_operand:SWI 2 "" ","))) + (match_operand:SWI 2 "" + "mWe,We"))) (sign_extend: (plus:SWI (match_dup 1) (match_dup 2))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=,m") @@ -5850,6 +5872,31 @@ [(set_attr "type" "alu") (set_attr "mode" "")]) +(define_insn "*addv4_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO (plus: + (sign_extend: + (match_operand:SWI 1 "nonimmediate_operand" "0")) + (match_operand: 3 "const_int_operand" "i")) + (sign_extend: + (plus:SWI (match_dup 1) + (match_operand:SWI 2 "x86_64_immediate_operand" + ""))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (plus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, mode, operands) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == INTVAL (operands[3])" + "add{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "") + (set (attr "length_immediate") + (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (match_test " == 8") + (const_string "4")] + (const_string "")))]) + ;; The lea patterns for modes less than 32 bits need to be matched by ;; several insns converted to real lea by splitters. @@ -6093,10 +6140,11 @@ (eq:CCO (minus: (sign_extend: (match_operand:SWI 1 "nonimmediate_operand")) - (sign_extend: - (match_operand:SWI 2 ""))) + (match_dup 4)) (sign_extend: - (minus:SWI (match_dup 1) (match_dup 2))))) + (minus:SWI (match_dup 1) + (match_operand:SWI 2 + ""))))) (set (match_operand:SWI 0 "register_operand") (minus:SWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else @@ -6104,7 +6152,13 @@ (label_ref (match_operand 3)) (pc)))] "" - "ix86_fixup_binary_operands_no_copy (MINUS, mode, operands);") +{ + ix86_fixup_binary_operands_no_copy (MINUS, mode, operands); + if (CONST_INT_P (operands[2])) + operands[4] = operands[2]; + else + operands[4] = gen_rtx_SIGN_EXTEND (mode, operands[2]); +}) (define_insn "*subv4" [(set (reg:CCO FLAGS_REG) @@ -6112,7 +6166,8 @@ (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "0,0")) (sign_extend: - (match_operand:SWI 2 "" ",m"))) + (match_operand:SWI 2 "" + "We,m"))) (sign_extend: (minus:SWI (match_dup 1) (match_dup 2))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") @@ -6122,6 +6177,31 @@ [(set_attr "type" "alu") (set_attr "mode" "")]) +(define_insn "*subv4_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO (minus: + (sign_extend: + (match_operand:SWI 1 "nonimmediate_operand" "0")) + (match_operand: 3 "const_int_operand" "i")) + (sign_extend: + (minus:SWI (match_dup 1) + (match_operand:SWI 2 "x86_64_immediate_operand" + ""))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (minus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (MINUS, mode, operands) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == INTVAL (operands[3])" + "sub{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "") + (set (attr "length_immediate") + (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (match_test " == 8") + (const_string "4")] + (const_string "")))]) + (define_insn "*sub_3" [(set (reg FLAGS_REG) (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0") @@ -6442,52 +6522,98 @@ (eq:CCO (mult: (sign_extend: (match_operand:SWI48 1 "register_operand")) - (sign_extend: - (match_operand:SWI48 2 ""))) + (match_dup 4)) (sign_extend: - (mult:SWI48 (match_dup 1) (match_dup 2))))) + (mult:SWI48 (match_dup 1) + (match_operand:SWI48 2 + ""))))) (set (match_operand:SWI48 0 "register_operand") (mult:SWI48 (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) - (pc)))]) + (pc)))] + "" +{ + if (CONST_INT_P (operands[2])) + operands[4] = operands[2]; + else + operands[4] = gen_rtx_SIGN_EXTEND (mode, operands[2]); +}) (define_insn "*mulv4" [(set (reg:CCO FLAGS_REG) (eq:CCO (mult: (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand" "%rm,rm,0")) + (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0")) (sign_extend: - (match_operand:SWI 2 "" "K,,mr"))) + (match_operand:SWI48 2 "" + "We,mr"))) (sign_extend: - (mult:SWI (match_dup 1) (match_dup 2))))) - (set (match_operand:SWI 0 "register_operand" "=r,r,r") - (mult:SWI (match_dup 1) (match_dup 2)))] + (mult:SWI48 (match_dup 1) (match_dup 2))))) + (set (match_operand:SWI48 0 "register_operand" "=r,r") + (mult:SWI48 (match_dup 1) (match_dup 2)))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ - imul{}\t{%2, %1, %0|%0, %1, %2} imul{}\t{%2, %1, %0|%0, %1, %2} imul{}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") - (set_attr "prefix_0f" "0,0,1") + (set_attr "prefix_0f" "0,1") (set (attr "athlon_decode") (cond [(eq_attr "cpu" "athlon") (const_string "vector") - (eq_attr "alternative" "1") + (eq_attr "alternative" "0") (const_string "vector") - (and (eq_attr "alternative" "2") + (and (eq_attr "alternative" "1") (match_operand 1 "memory_operand")) (const_string "vector")] (const_string "direct"))) (set (attr "amdfam10_decode") - (cond [(and (eq_attr "alternative" "0,1") + (cond [(and (eq_attr "alternative" "1") (match_operand 1 "memory_operand")) (const_string "vector")] (const_string "direct"))) (set_attr "bdver1_decode" "direct") (set_attr "mode" "")]) +(define_insn "*mulv4_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO (mult: + (sign_extend: + (match_operand:SWI48 1 "nonimmediate_operand" "rm,rm")) + (match_operand: 3 "const_int_operand" "K,i")) + (sign_extend: + (mult:SWI48 (match_dup 1) + (match_operand:SWI 2 "x86_64_immediate_operand" + "K,"))))) + (set (match_operand:SWI48 0 "register_operand" "=r,r") + (mult:SWI48 (match_dup 1) (match_dup 2)))] + "!(MEM_P (operands[1]) && MEM_P (operands[2])) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == INTVAL (operands[3])" + "@ + imul{}\t{%2, %1, %0|%0, %1, %2} + imul{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "imul") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(match_operand 1 "memory_operand") + (const_string "vector")] + (const_string "direct"))) + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "") + (set (attr "length_immediate") + (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (match_test " == 8") + (const_string "4")] + (const_string "")))]) + (define_expand "mul3" [(parallel [(set (match_operand: 0 "register_operand") (mult: diff --git a/gcc-4.9/gcc/config/i386/predicates.md b/gcc-4.9/gcc/config/i386/predicates.md index 0492241fd..2ef138424 100644 --- a/gcc-4.9/gcc/config/i386/predicates.md +++ b/gcc-4.9/gcc/config/i386/predicates.md @@ -338,6 +338,20 @@ (match_operand 0 "x86_64_immediate_operand")) (match_operand 0 "general_operand"))) +;; Return true if OP is non-VOIDmode general operand representable +;; on x86_64. This predicate is used in sign-extending conversion +;; operations that require non-VOIDmode immediate operands. +(define_predicate "x86_64_sext_operand" + (and (match_test "GET_MODE (op) != VOIDmode") + (match_operand 0 "x86_64_general_operand"))) + +;; Return true if OP is non-VOIDmode general operand. This predicate +;; is used in sign-extending conversion operations that require +;; non-VOIDmode immediate operands. +(define_predicate "sext_operand" + (and (match_test "GET_MODE (op) != VOIDmode") + (match_operand 0 "general_operand"))) + ;; Return true if OP is representable on x86_64 as zero-extended operand. ;; This predicate is used in zero-extending conversion operations that ;; require non-VOIDmode immediate operands. diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md index f30b27e86..72a4d6d07 100644 --- a/gcc-4.9/gcc/config/i386/sse.md +++ b/gcc-4.9/gcc/config/i386/sse.md @@ -2712,50 +2712,46 @@ (fma:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand") (match_operand:FMAMODEM 2 "nonimmediate_operand") - (match_operand:FMAMODEM 3 "nonimmediate_operand")))] - "") + (match_operand:FMAMODEM 3 "nonimmediate_operand")))]) (define_expand "fms4" [(set (match_operand:FMAMODEM 0 "register_operand") (fma:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand") (match_operand:FMAMODEM 2 "nonimmediate_operand") - (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] - "") + (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]) (define_expand "fnma4" [(set (match_operand:FMAMODEM 0 "register_operand") (fma:FMAMODEM (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) (match_operand:FMAMODEM 2 "nonimmediate_operand") - (match_operand:FMAMODEM 3 "nonimmediate_operand")))] - "") + (match_operand:FMAMODEM 3 "nonimmediate_operand")))]) (define_expand "fnms4" [(set (match_operand:FMAMODEM 0 "register_operand") (fma:FMAMODEM (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) (match_operand:FMAMODEM 2 "nonimmediate_operand") - (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] - "") + (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]) ;; The builtins for intrinsics are not constrained by SSE math enabled. -(define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") - (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") - (V4SF "TARGET_FMA || TARGET_FMA4") - (V2DF "TARGET_FMA || TARGET_FMA4") - (V8SF "TARGET_FMA || TARGET_FMA4") - (V4DF "TARGET_FMA || TARGET_FMA4") - (V16SF "TARGET_AVX512F") - (V8DF "TARGET_AVX512F")]) +(define_mode_iterator FMAMODE + [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (V4SF "TARGET_FMA || TARGET_FMA4") + (V2DF "TARGET_FMA || TARGET_FMA4") + (V8SF "TARGET_FMA || TARGET_FMA4") + (V4DF "TARGET_FMA || TARGET_FMA4") + (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) (define_expand "fma4i_fmadd_" [(set (match_operand:FMAMODE 0 "register_operand") (fma:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand") (match_operand:FMAMODE 2 "nonimmediate_operand") - (match_operand:FMAMODE 3 "nonimmediate_operand")))] - "") + (match_operand:FMAMODE 3 "nonimmediate_operand")))]) (define_expand "avx512f_fmadd__maskz" [(match_operand:VF_512 0 "register_operand") @@ -2771,12 +2767,20 @@ DONE; }) +(define_mode_iterator FMAMODE_NOVF512 + [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (V4SF "TARGET_FMA || TARGET_FMA4") + (V2DF "TARGET_FMA || TARGET_FMA4") + (V8SF "TARGET_FMA || TARGET_FMA4") + (V4DF "TARGET_FMA || TARGET_FMA4")]) + (define_insn "fma_fmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "" "%0,0,v,x,x") - (match_operand:FMAMODE 2 "" ",v,,x,m") - (match_operand:FMAMODE 3 "" "v,,0,xm,x")))] + [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x") + (fma:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 1 "" "%0,0,v,x,x") + (match_operand:FMAMODE_NOVF512 2 "" ",v,,x,m") + (match_operand:FMAMODE_NOVF512 3 "" "v,,0,xm,x")))] " && " "@ vfmadd132\t{%2, %3, %0|%0, %3, %2} @@ -2788,6 +2792,21 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) +(define_insn "fma_fmadd_" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (fma:VF_512 + (match_operand:VF_512 1 "" "%0,0,v") + (match_operand:VF_512 2 "" ",v,") + (match_operand:VF_512 3 "" "v,,0")))] + " && " + "@ + vfmadd132\t{%2, %3, %0|%0, %3, %2} + vfmadd213\t{%3, %2, %0|%0, %2, %3} + vfmadd231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + (define_insn "avx512f_fmadd__mask" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -2821,12 +2840,12 @@ (set_attr "mode" "")]) (define_insn "fma_fmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "" "%0, 0, v, x,x") - (match_operand:FMAMODE 2 "" ",v,,x,m") - (neg:FMAMODE - (match_operand:FMAMODE 3 "" "v,,0,xm,x"))))] + [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x") + (fma:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 1 "" "%0,0,v,x,x") + (match_operand:FMAMODE_NOVF512 2 "" ",v,,x,m") + (neg:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 3 "" "v,,0,xm,x"))))] " && " "@ vfmsub132\t{%2, %3, %0|%0, %3, %2} @@ -2838,6 +2857,22 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) +(define_insn "fma_fmsub_" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (fma:VF_512 + (match_operand:VF_512 1 "" "%0,0,v") + (match_operand:VF_512 2 "" ",v,") + (neg:VF_512 + (match_operand:VF_512 3 "" "v,,0"))))] + " && " + "@ + vfmsub132\t{%2, %3, %0|%0, %3, %2} + vfmsub213\t{%3, %2, %0|%0, %2, %3} + vfmsub231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + (define_insn "avx512f_fmsub__mask" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -2873,12 +2908,12 @@ (set_attr "mode" "")]) (define_insn "fma_fnmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "" "%0,0,v,x,x")) - (match_operand:FMAMODE 2 "" ",v,,x,m") - (match_operand:FMAMODE 3 "" "v,,0,xm,x")))] + [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x") + (fma:FMAMODE_NOVF512 + (neg:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 1 "" "%0,0,v,x,x")) + (match_operand:FMAMODE_NOVF512 2 "" ",v,,x,m") + (match_operand:FMAMODE_NOVF512 3 "" "v,,0,xm,x")))] " && " "@ vfnmadd132\t{%2, %3, %0|%0, %3, %2} @@ -2890,6 +2925,22 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) +(define_insn "fma_fnmadd_" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (fma:VF_512 + (neg:VF_512 + (match_operand:VF_512 1 "" "%0,0,v")) + (match_operand:VF_512 2 "" ",v,") + (match_operand:VF_512 3 "" "v,,0")))] + " && " + "@ + vfnmadd132\t{%2, %3, %0|%0, %3, %2} + vfnmadd213\t{%3, %2, %0|%0, %2, %3} + vfnmadd231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + (define_insn "avx512f_fnmadd__mask" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -2925,13 +2976,13 @@ (set_attr "mode" "")]) (define_insn "fma_fnmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "" "%0,0,v,x,x")) - (match_operand:FMAMODE 2 "" ",v,,x,m") - (neg:FMAMODE - (match_operand:FMAMODE 3 "" "v,,0,xm,x"))))] + [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x") + (fma:FMAMODE_NOVF512 + (neg:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 1 "" "%0,0,v,x,x")) + (match_operand:FMAMODE_NOVF512 2 "" ",v,,x,m") + (neg:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 3 "" "v,,0,xm,x"))))] " && " "@ vfnmsub132\t{%2, %3, %0|%0, %3, %2} @@ -2943,6 +2994,23 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) +(define_insn "fma_fnmsub_" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (fma:VF_512 + (neg:VF_512 + (match_operand:VF_512 1 "" "%0,0,v")) + (match_operand:VF_512 2 "" ",v,") + (neg:VF_512 + (match_operand:VF_512 3 "" "v,,0"))))] + " && " + "@ + vfnmsub132\t{%2, %3, %0|%0, %3, %2} + vfnmsub213\t{%3, %2, %0|%0, %2, %3} + vfnmsub231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + (define_insn "avx512f_fnmsub__mask" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -3014,11 +3082,11 @@ }) (define_insn "fma_fmaddsub_" - [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") - (unspec:VF - [(match_operand:VF 1 "" "%0,0,v,x,x") - (match_operand:VF 2 "" ",v,,x,m") - (match_operand:VF 3 "" "v,,0,xm,x")] + [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") + (unspec:VF_128_256 + [(match_operand:VF_128_256 1 "" "%0,0,v,x,x") + (match_operand:VF_128_256 2 "" ",v,,x,m") + (match_operand:VF_128_256 3 "" "v,,0,xm,x")] UNSPEC_FMADDSUB))] "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && && " "@ @@ -3031,6 +3099,22 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) +(define_insn "fma_fmaddsub_" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (unspec:VF_512 + [(match_operand:VF_512 1 "" "%0,0,v") + (match_operand:VF_512 2 "" ",v,") + (match_operand:VF_512 3 "" "v,,0")] + UNSPEC_FMADDSUB))] + "TARGET_AVX512F && && " + "@ + vfmaddsub132\t{%2, %3, %0|%0, %3, %2} + vfmaddsub213\t{%3, %2, %0|%0, %2, %3} + vfmaddsub231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + (define_insn "avx512f_fmaddsub__mask" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -3066,12 +3150,12 @@ (set_attr "mode" "")]) (define_insn "fma_fmsubadd_" - [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") - (unspec:VF - [(match_operand:VF 1 "" "%0,0,v,x,x") - (match_operand:VF 2 "" ",v,,x,m") - (neg:VF - (match_operand:VF 3 "" "v,,0,xm,x"))] + [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") + (unspec:VF_128_256 + [(match_operand:VF_128_256 1 "" "%0,0,v,x,x") + (match_operand:VF_128_256 2 "" ",v,,x,m") + (neg:VF_128_256 + (match_operand:VF_128_256 3 "" "v,,0,xm,x"))] UNSPEC_FMADDSUB))] "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && && " "@ @@ -3084,6 +3168,23 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) +(define_insn "fma_fmsubadd_" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (unspec:VF_512 + [(match_operand:VF_512 1 "" "%0,0,v") + (match_operand:VF_512 2 "" ",v,") + (neg:VF_512 + (match_operand:VF_512 3 "" "v,,0"))] + UNSPEC_FMADDSUB))] + "TARGET_AVX512F && && " + "@ + vfmsubadd132\t{%2, %3, %0|%0, %3, %2} + vfmsubadd213\t{%3, %2, %0|%0, %2, %3} + vfmsubadd231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + (define_insn "avx512f_fmsubadd__mask" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 -- cgit v1.2.3