diff options
author | Ben Cheng <bccheng@google.com> | 2014-04-22 13:33:12 -0700 |
---|---|---|
committer | Ben Cheng <bccheng@google.com> | 2014-04-22 13:33:12 -0700 |
commit | e3cc64dec20832769406aa38cde83c7dd4194bf4 (patch) | |
tree | ef8e39be37cfe0cb69d850043b7924389ff17164 /gcc-4.9/gcc/config/i386 | |
parent | f33c7b3122b1d7950efa88067c9a156229ba647b (diff) | |
download | toolchain_gcc-e3cc64dec20832769406aa38cde83c7dd4194bf4.tar.gz toolchain_gcc-e3cc64dec20832769406aa38cde83c7dd4194bf4.tar.bz2 toolchain_gcc-e3cc64dec20832769406aa38cde83c7dd4194bf4.zip |
[4.9] GCC 4.9.0 official release refresh
Change-Id: Ic99a7da8b44b789a48aeec93b33e93944d6e6767
Diffstat (limited to 'gcc-4.9/gcc/config/i386')
-rw-r--r-- | gcc-4.9/gcc/config/i386/avx512fintrin.h | 83 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/bmiintrin.h | 48 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/constraints.md | 7 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/i386.c | 6 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/i386.md | 176 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/predicates.md | 14 | ||||
-rw-r--r-- | gcc-4.9/gcc/config/i386/sse.md | 207 |
7 files changed, 459 insertions, 82 deletions
diff --git a/gcc-4.9/gcc/config/i386/avx512fintrin.h b/gcc-4.9/gcc/config/i386/avx512fintrin.h index 960286618..314895ad7 100644 --- a/gcc-4.9/gcc/config/i386/avx512fintrin.h +++ b/gcc-4.9/gcc/config/i386/avx512fintrin.h @@ -39,6 +39,8 @@ typedef double __v8df __attribute__ ((__vector_size__ (64))); typedef float __v16sf __attribute__ ((__vector_size__ (64))); typedef long long __v8di __attribute__ ((__vector_size__ (64))); typedef int __v16si __attribute__ ((__vector_size__ (64))); +typedef short __v32hi __attribute__ ((__vector_size__ (64))); +typedef char __v64qi __attribute__ ((__vector_size__ (64))); /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ @@ -130,6 +132,32 @@ _mm512_undefined_si512 (void) return __Y; } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set1_epi8 (char __A) +{ + return __extension__ (__m512i)(__v64qi) + { __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A }; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set1_epi16 (short __A) +{ + return __extension__ (__m512i)(__v32hi) + { __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A }; +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_set1_pd (double __A) @@ -152,6 +180,54 @@ _mm512_set1_ps (float __A) (__mmask16) -1); } +/* Create the vector [A B C D A B C D A B C D A B C D]. */ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set4_epi32 (int __A, int __B, int __C, int __D) +{ + return __extension__ (__m512i)(__v16si) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set4_epi64 (long long __A, long long __B, long long __C, + long long __D) +{ + return __extension__ (__m512i) (__v8di) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set4_pd (double __A, double __B, double __C, double __D) +{ + return __extension__ (__m512d) + { __D, __C, __B, __A, __D, __C, __B, __A }; +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_set4_ps (float __A, float __B, float __C, float __D) +{ + return __extension__ (__m512) + { __D, __C, __B, __A, __D, __C, __B, __A, + __D, __C, __B, __A, __D, __C, __B, __A }; +} + +#define _mm512_setr4_epi64(e0,e1,e2,e3) \ + _mm512_set4_epi64(e3,e2,e1,e0) + +#define _mm512_setr4_epi32(e0,e1,e2,e3) \ + _mm512_set4_epi32(e3,e2,e1,e0) + +#define _mm512_setr4_pd(e0,e1,e2,e3) \ + _mm512_set4_pd(e3,e2,e1,e0) + +#define _mm512_setr4_ps(e0,e1,e2,e3) \ + _mm512_set4_ps(e3,e2,e1,e0) + extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_setzero_ps (void) @@ -169,6 +245,13 @@ _mm512_setzero_pd (void) extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_setzero_epi32 (void) +{ + return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_setzero_si512 (void) { return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; diff --git a/gcc-4.9/gcc/config/i386/bmiintrin.h b/gcc-4.9/gcc/config/i386/bmiintrin.h index b86adf179..b2d7c60ea 100644 --- a/gcc-4.9/gcc/config/i386/bmiintrin.h +++ b/gcc-4.9/gcc/config/i386/bmiintrin.h @@ -40,7 +40,6 @@ __tzcnt_u16 (unsigned short __X) return __builtin_ctzs (__X); } - extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __andn_u32 (unsigned int __X, unsigned int __Y) { @@ -66,17 +65,34 @@ __blsi_u32 (unsigned int __X) } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsi_u32 (unsigned int __X) +{ + return __blsi_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsmsk_u32 (unsigned int __X) { return __X ^ (__X - 1); } extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsmsk_u32 (unsigned int __X) +{ + return __blsmsk_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsr_u32 (unsigned int __X) { return __X & (__X - 1); } +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsr_u32 (unsigned int __X) +{ + return __blsr_u32 (__X); +} extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u32 (unsigned int __X) @@ -84,6 +100,12 @@ __tzcnt_u32 (unsigned int __X) return __builtin_ctz (__X); } +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tzcnt_u32 (unsigned int __X) +{ + return __builtin_ctz (__X); +} + #ifdef __x86_64__ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -111,23 +133,47 @@ __blsi_u64 (unsigned long long __X) } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsi_u64 (unsigned long long __X) +{ + return __blsi_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsmsk_u64 (unsigned long long __X) { return __X ^ (__X - 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsmsk_u64 (unsigned long long __X) +{ + return __blsmsk_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __blsr_u64 (unsigned long long __X) { return __X & (__X - 1); } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsr_u64 (unsigned long long __X) +{ + return __blsr_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __tzcnt_u64 (unsigned long long __X) { return __builtin_ctzll (__X); } +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tzcnt_u64 (unsigned long long __X) +{ + return __builtin_ctzll (__X); +} + #endif /* __x86_64__ */ #ifdef __DISABLE_BMI__ diff --git a/gcc-4.9/gcc/config/i386/constraints.md b/gcc-4.9/gcc/config/i386/constraints.md index 65335f128..567e70564 100644 --- a/gcc-4.9/gcc/config/i386/constraints.md +++ b/gcc-4.9/gcc/config/i386/constraints.md @@ -220,6 +220,13 @@ ;; We use W prefix to denote any number of ;; constant-or-symbol-reference constraints +(define_constraint "We" + "32-bit signed integer constant, or a symbolic reference known + to fit that range (for sign-extending conversion operations that + require non-VOIDmode immediate operands)." + (and (match_operand 0 "x86_64_immediate_operand") + (match_test "GET_MODE (op) != VOIDmode"))) + (define_constraint "Wz" "32-bit unsigned integer constant, or a symbolic reference known to fit that range (for zero-extending conversion operations that diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c index 842be686d..3eefe4ac5 100644 --- a/gcc-4.9/gcc/config/i386/i386.c +++ b/gcc-4.9/gcc/config/i386/i386.c @@ -13925,13 +13925,13 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (GET_CODE (XEXP (x, 0)) == MULT) { changed = 1; - XEXP (x, 0) = force_operand (XEXP (x, 0), 0); + XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); } if (GET_CODE (XEXP (x, 1)) == MULT) { changed = 1; - XEXP (x, 1) = force_operand (XEXP (x, 1), 0); + XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); } if (changed @@ -22755,7 +22755,7 @@ counter_mode (rtx count_exp) static rtx ix86_copy_addr_to_reg (rtx addr) { - if (GET_MODE (addr) == Pmode) + if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode) return copy_addr_to_reg (addr); else { diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md index 4a8b46388..25e2e93e3 100644 --- a/gcc-4.9/gcc/config/i386/i386.md +++ b/gcc-4.9/gcc/config/i386/i386.md @@ -971,6 +971,15 @@ (DI "x86_64_general_operand") (TI "x86_64_general_operand")]) +;; General sign extend operand predicate for integer modes, +;; which disallows VOIDmode operands and thus it is suitable +;; for use inside sign_extend. +(define_mode_attr general_sext_operand + [(QI "sext_operand") + (HI "sext_operand") + (SI "x86_64_sext_operand") + (DI "x86_64_sext_operand")]) + ;; General sign/zero extend operand predicate for integer modes. (define_mode_attr general_szext_operand [(QI "general_operand") @@ -4730,8 +4739,13 @@ && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode)") (eq_attr "alternative" "1") + /* ??? For sched1 we need constrain_operands to be able to + select an alternative. Leave this enabled before RA. */ (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS - || optimize_function_for_size_p (cfun)") + || optimize_function_for_size_p (cfun) + || !(reload_completed + || reload_in_progress + || lra_in_progress)") ] (symbol_ref "true"))) ]) @@ -5821,10 +5835,11 @@ (eq:CCO (plus:<DWI> (sign_extend:<DWI> (match_operand:SWI 1 "nonimmediate_operand")) - (sign_extend:<DWI> - (match_operand:SWI 2 "<general_operand>"))) + (match_dup 4)) (sign_extend:<DWI> - (plus:SWI (match_dup 1) (match_dup 2))))) + (plus:SWI (match_dup 1) + (match_operand:SWI 2 + "<general_operand>"))))) (set (match_operand:SWI 0 "register_operand") (plus:SWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else @@ -5832,7 +5847,13 @@ (label_ref (match_operand 3)) (pc)))] "" - "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);") +{ + ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands); + if (CONST_INT_P (operands[2])) + operands[4] = operands[2]; + else + operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]); +}) (define_insn "*addv<mode>4" [(set (reg:CCO FLAGS_REG) @@ -5840,7 +5861,8 @@ (sign_extend:<DWI> (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) (sign_extend:<DWI> - (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>"))) + (match_operand:SWI 2 "<general_sext_operand>" + "<r>mWe,<r>We"))) (sign_extend:<DWI> (plus:SWI (match_dup 1) (match_dup 2))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m") @@ -5850,6 +5872,31 @@ [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) +(define_insn "*addv<mode>4_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO (plus:<DWI> + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand" "0")) + (match_operand:<DWI> 3 "const_int_operand" "i")) + (sign_extend:<DWI> + (plus:SWI (match_dup 1) + (match_operand:SWI 2 "x86_64_immediate_operand" + "<i>"))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (plus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == INTVAL (operands[3])" + "add{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>") + (set (attr "length_immediate") + (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (match_test "<MODE_SIZE> == 8") + (const_string "4")] + (const_string "<MODE_SIZE>")))]) + ;; The lea patterns for modes less than 32 bits need to be matched by ;; several insns converted to real lea by splitters. @@ -6093,10 +6140,11 @@ (eq:CCO (minus:<DWI> (sign_extend:<DWI> (match_operand:SWI 1 "nonimmediate_operand")) - (sign_extend:<DWI> - (match_operand:SWI 2 "<general_operand>"))) + (match_dup 4)) (sign_extend:<DWI> - (minus:SWI (match_dup 1) (match_dup 2))))) + (minus:SWI (match_dup 1) + (match_operand:SWI 2 + "<general_operand>"))))) (set (match_operand:SWI 0 "register_operand") (minus:SWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else @@ -6104,7 +6152,13 @@ (label_ref (match_operand 3)) (pc)))] "" - "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);") +{ + ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands); + if (CONST_INT_P (operands[2])) + operands[4] = operands[2]; + else + operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]); +}) (define_insn "*subv<mode>4" [(set (reg:CCO FLAGS_REG) @@ -6112,7 +6166,8 @@ (sign_extend:<DWI> (match_operand:SWI 1 "nonimmediate_operand" "0,0")) (sign_extend:<DWI> - (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))) + (match_operand:SWI 2 "<general_sext_operand>" + "<r>We,<r>m"))) (sign_extend:<DWI> (minus:SWI (match_dup 1) (match_dup 2))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") @@ -6122,6 +6177,31 @@ [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) +(define_insn "*subv<mode>4_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO (minus:<DWI> + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand" "0")) + (match_operand:<DWI> 3 "const_int_operand" "i")) + (sign_extend:<DWI> + (minus:SWI (match_dup 1) + (match_operand:SWI 2 "x86_64_immediate_operand" + "<i>"))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (minus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == INTVAL (operands[3])" + "sub{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>") + (set (attr "length_immediate") + (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (match_test "<MODE_SIZE> == 8") + (const_string "4")] + (const_string "<MODE_SIZE>")))]) + (define_insn "*sub<mode>_3" [(set (reg FLAGS_REG) (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0") @@ -6442,52 +6522,98 @@ (eq:CCO (mult:<DWI> (sign_extend:<DWI> (match_operand:SWI48 1 "register_operand")) - (sign_extend:<DWI> - (match_operand:SWI48 2 "<general_operand>"))) + (match_dup 4)) (sign_extend:<DWI> - (mult:SWI48 (match_dup 1) (match_dup 2))))) + (mult:SWI48 (match_dup 1) + (match_operand:SWI48 2 + "<general_operand>"))))) (set (match_operand:SWI48 0 "register_operand") (mult:SWI48 (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) - (pc)))]) + (pc)))] + "" +{ + if (CONST_INT_P (operands[2])) + operands[4] = operands[2]; + else + operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]); +}) (define_insn "*mulv<mode>4" [(set (reg:CCO FLAGS_REG) (eq:CCO (mult:<DWI> (sign_extend:<DWI> - (match_operand:SWI 1 "nonimmediate_operand" "%rm,rm,0")) + (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0")) (sign_extend:<DWI> - (match_operand:SWI 2 "<general_operand>" "K,<i>,mr"))) + (match_operand:SWI48 2 "<general_sext_operand>" + "We,mr"))) (sign_extend:<DWI> - (mult:SWI (match_dup 1) (match_dup 2))))) - (set (match_operand:SWI 0 "register_operand" "=r,r,r") - (mult:SWI (match_dup 1) (match_dup 2)))] + (mult:SWI48 (match_dup 1) (match_dup 2))))) + (set (match_operand:SWI48 0 "register_operand" "=r,r") + (mult:SWI48 (match_dup 1) (match_dup 2)))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} - imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} imul{<imodesuffix>}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") - (set_attr "prefix_0f" "0,0,1") + (set_attr "prefix_0f" "0,1") (set (attr "athlon_decode") (cond [(eq_attr "cpu" "athlon") (const_string "vector") - (eq_attr "alternative" "1") + (eq_attr "alternative" "0") (const_string "vector") - (and (eq_attr "alternative" "2") + (and (eq_attr "alternative" "1") (match_operand 1 "memory_operand")) (const_string "vector")] (const_string "direct"))) (set (attr "amdfam10_decode") - (cond [(and (eq_attr "alternative" "0,1") + (cond [(and (eq_attr "alternative" "1") (match_operand 1 "memory_operand")) (const_string "vector")] (const_string "direct"))) (set_attr "bdver1_decode" "direct") (set_attr "mode" "<MODE>")]) +(define_insn "*mulv<mode>4_1" + [(set (reg:CCO FLAGS_REG) + (eq:CCO (mult:<DWI> + (sign_extend:<DWI> + (match_operand:SWI48 1 "nonimmediate_operand" "rm,rm")) + (match_operand:<DWI> 3 "const_int_operand" "K,i")) + (sign_extend:<DWI> + (mult:SWI48 (match_dup 1) + (match_operand:SWI 2 "x86_64_immediate_operand" + "K,<i>"))))) + (set (match_operand:SWI48 0 "register_operand" "=r,r") + (mult:SWI48 (match_dup 1) (match_dup 2)))] + "!(MEM_P (operands[1]) && MEM_P (operands[2])) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == INTVAL (operands[3])" + "@ + imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "imul") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(match_operand 1 "memory_operand") + (const_string "vector")] + (const_string "direct"))) + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "<MODE>") + (set (attr "length_immediate") + (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") + (const_string "1") + (match_test "<MODE_SIZE> == 8") + (const_string "4")] + (const_string "<MODE_SIZE>")))]) + (define_expand "<u>mul<mode><dwi>3" [(parallel [(set (match_operand:<DWI> 0 "register_operand") (mult:<DWI> diff --git a/gcc-4.9/gcc/config/i386/predicates.md b/gcc-4.9/gcc/config/i386/predicates.md index 0492241fd..2ef138424 100644 --- a/gcc-4.9/gcc/config/i386/predicates.md +++ b/gcc-4.9/gcc/config/i386/predicates.md @@ -338,6 +338,20 @@ (match_operand 0 "x86_64_immediate_operand")) (match_operand 0 "general_operand"))) +;; Return true if OP is non-VOIDmode general operand representable +;; on x86_64. This predicate is used in sign-extending conversion +;; operations that require non-VOIDmode immediate operands. +(define_predicate "x86_64_sext_operand" + (and (match_test "GET_MODE (op) != VOIDmode") + (match_operand 0 "x86_64_general_operand"))) + +;; Return true if OP is non-VOIDmode general operand. This predicate +;; is used in sign-extending conversion operations that require +;; non-VOIDmode immediate operands. +(define_predicate "sext_operand" + (and (match_test "GET_MODE (op) != VOIDmode") + (match_operand 0 "general_operand"))) + ;; Return true if OP is representable on x86_64 as zero-extended operand. ;; This predicate is used in zero-extending conversion operations that ;; require non-VOIDmode immediate operands. diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md index f30b27e86..72a4d6d07 100644 --- a/gcc-4.9/gcc/config/i386/sse.md +++ b/gcc-4.9/gcc/config/i386/sse.md @@ -2712,50 +2712,46 @@ (fma:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand") (match_operand:FMAMODEM 2 "nonimmediate_operand") - (match_operand:FMAMODEM 3 "nonimmediate_operand")))] - "") + (match_operand:FMAMODEM 3 "nonimmediate_operand")))]) (define_expand "fms<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") (fma:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand") (match_operand:FMAMODEM 2 "nonimmediate_operand") - (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] - "") + (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]) (define_expand "fnma<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") (fma:FMAMODEM (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) (match_operand:FMAMODEM 2 "nonimmediate_operand") - (match_operand:FMAMODEM 3 "nonimmediate_operand")))] - "") + (match_operand:FMAMODEM 3 "nonimmediate_operand")))]) (define_expand "fnms<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") (fma:FMAMODEM (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) (match_operand:FMAMODEM 2 "nonimmediate_operand") - (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] - "") + (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]) ;; The builtins for intrinsics are not constrained by SSE math enabled. -(define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") - (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") - (V4SF "TARGET_FMA || TARGET_FMA4") - (V2DF "TARGET_FMA || TARGET_FMA4") - (V8SF "TARGET_FMA || TARGET_FMA4") - (V4DF "TARGET_FMA || TARGET_FMA4") - (V16SF "TARGET_AVX512F") - (V8DF "TARGET_AVX512F")]) +(define_mode_iterator FMAMODE + [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (V4SF "TARGET_FMA || TARGET_FMA4") + (V2DF "TARGET_FMA || TARGET_FMA4") + (V8SF "TARGET_FMA || TARGET_FMA4") + (V4DF "TARGET_FMA || TARGET_FMA4") + (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) (define_expand "fma4i_fmadd_<mode>" [(set (match_operand:FMAMODE 0 "register_operand") (fma:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand") (match_operand:FMAMODE 2 "nonimmediate_operand") - (match_operand:FMAMODE 3 "nonimmediate_operand")))] - "") + (match_operand:FMAMODE 3 "nonimmediate_operand")))]) (define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>" [(match_operand:VF_512 0 "register_operand") @@ -2771,12 +2767,20 @@ DONE; }) +(define_mode_iterator FMAMODE_NOVF512 + [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (V4SF "TARGET_FMA || TARGET_FMA4") + (V2DF "TARGET_FMA || TARGET_FMA4") + (V8SF "TARGET_FMA || TARGET_FMA4") + (V4DF "TARGET_FMA || TARGET_FMA4")]) + (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" - [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x") - (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") - (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))] + [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x") + (fma:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x") + (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") + (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))] "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} @@ -2788,6 +2792,21 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (fma:VF_512 + (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v") + (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") + (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))] + "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" + "@ + vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + (define_insn "avx512f_fmadd_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -2821,12 +2840,12 @@ (set_attr "mode" "<MODE>")]) (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>" - [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0, 0, v, x,x") - (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") - (neg:FMAMODE - (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))] + [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x") + (fma:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x") + (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") + (neg:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))] "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} @@ -2838,6 +2857,22 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (fma:VF_512 + (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v") + (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") + (neg:VF_512 + (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))] + "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" + "@ + vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + (define_insn "avx512f_fmsub_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -2873,12 +2908,12 @@ (set_attr "mode" "<MODE>")]) (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>" - [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x")) - (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") - (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))] + [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x") + (fma:FMAMODE_NOVF512 + (neg:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")) + (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") + (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))] "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} @@ -2890,6 +2925,22 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (fma:VF_512 + (neg:VF_512 + (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")) + (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") + (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))] + "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" + "@ + vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + (define_insn "avx512f_fnmadd_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -2925,13 +2976,13 @@ (set_attr "mode" "<MODE>")]) (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>" - [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x")) - (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") - (neg:FMAMODE - (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))] + [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x") + (fma:FMAMODE_NOVF512 + (neg:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")) + (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") + (neg:FMAMODE_NOVF512 + (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))] "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} @@ -2943,6 +2994,23 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (fma:VF_512 + (neg:VF_512 + (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")) + (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") + (neg:VF_512 + (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))] + "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>" + "@ + vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + (define_insn "avx512f_fnmsub_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -3014,11 +3082,11 @@ }) (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>" - [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") - (unspec:VF - [(match_operand:VF 1 "<round_nimm_predicate>" "%0,0,v,x,x") - (match_operand:VF 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") - (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")] + [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") + (unspec:VF_128_256 + [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x") + (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") + (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")] UNSPEC_FMADDSUB))] "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ @@ -3031,6 +3099,22 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (unspec:VF_512 + [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v") + (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") + (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")] + UNSPEC_FMADDSUB))] + "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" + "@ + vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + (define_insn "avx512f_fmaddsub_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 @@ -3066,12 +3150,12 @@ (set_attr "mode" "<MODE>")]) (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>" - [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") - (unspec:VF - [(match_operand:VF 1 "<round_nimm_predicate>" "%0,0,v,x,x") - (match_operand:VF 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") - (neg:VF - (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))] + [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") + (unspec:VF_128_256 + [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x") + (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m") + (neg:VF_128_256 + (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))] UNSPEC_FMADDSUB))] "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ @@ -3084,6 +3168,23 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>" + [(set (match_operand:VF_512 0 "register_operand" "=v,v,v") + (unspec:VF_512 + [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v") + (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") + (neg:VF_512 + (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))] + UNSPEC_FMADDSUB))] + "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" + "@ + vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} + vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} + vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" + [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + (define_insn "avx512f_fmsubadd_<mode>_mask<round_name>" [(set (match_operand:VF_512 0 "register_operand" "=v,v") (vec_merge:VF_512 |