aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/config/i386
diff options
context:
space:
mode:
authorBen Cheng <bccheng@google.com>2014-04-22 13:33:12 -0700
committerBen Cheng <bccheng@google.com>2014-04-22 13:33:12 -0700
commite3cc64dec20832769406aa38cde83c7dd4194bf4 (patch)
treeef8e39be37cfe0cb69d850043b7924389ff17164 /gcc-4.9/gcc/config/i386
parentf33c7b3122b1d7950efa88067c9a156229ba647b (diff)
downloadtoolchain_gcc-e3cc64dec20832769406aa38cde83c7dd4194bf4.tar.gz
toolchain_gcc-e3cc64dec20832769406aa38cde83c7dd4194bf4.tar.bz2
toolchain_gcc-e3cc64dec20832769406aa38cde83c7dd4194bf4.zip
[4.9] GCC 4.9.0 official release refresh
Change-Id: Ic99a7da8b44b789a48aeec93b33e93944d6e6767
Diffstat (limited to 'gcc-4.9/gcc/config/i386')
-rw-r--r--gcc-4.9/gcc/config/i386/avx512fintrin.h83
-rw-r--r--gcc-4.9/gcc/config/i386/bmiintrin.h48
-rw-r--r--gcc-4.9/gcc/config/i386/constraints.md7
-rw-r--r--gcc-4.9/gcc/config/i386/i386.c6
-rw-r--r--gcc-4.9/gcc/config/i386/i386.md176
-rw-r--r--gcc-4.9/gcc/config/i386/predicates.md14
-rw-r--r--gcc-4.9/gcc/config/i386/sse.md207
7 files changed, 459 insertions, 82 deletions
diff --git a/gcc-4.9/gcc/config/i386/avx512fintrin.h b/gcc-4.9/gcc/config/i386/avx512fintrin.h
index 960286618..314895ad7 100644
--- a/gcc-4.9/gcc/config/i386/avx512fintrin.h
+++ b/gcc-4.9/gcc/config/i386/avx512fintrin.h
@@ -39,6 +39,8 @@ typedef double __v8df __attribute__ ((__vector_size__ (64)));
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
typedef int __v16si __attribute__ ((__vector_size__ (64)));
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
@@ -130,6 +132,32 @@ _mm512_undefined_si512 (void)
return __Y;
}
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi8 (char __A)
+{
+ return __extension__ (__m512i)(__v64qi)
+ { __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi16 (short __A)
+{
+ return __extension__ (__m512i)(__v32hi)
+ { __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A };
+}
+
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_pd (double __A)
@@ -152,6 +180,54 @@ _mm512_set1_ps (float __A)
(__mmask16) -1);
}
+/* Create the vector [A B C D A B C D A B C D A B C D]. */
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
+{
+ return __extension__ (__m512i)(__v16si)
+ { __D, __C, __B, __A, __D, __C, __B, __A,
+ __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_epi64 (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return __extension__ (__m512i) (__v8di)
+ { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_pd (double __A, double __B, double __C, double __D)
+{
+ return __extension__ (__m512d)
+ { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_ps (float __A, float __B, float __C, float __D)
+{
+ return __extension__ (__m512)
+ { __D, __C, __B, __A, __D, __C, __B, __A,
+ __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+#define _mm512_setr4_epi64(e0,e1,e2,e3) \
+ _mm512_set4_epi64(e3,e2,e1,e0)
+
+#define _mm512_setr4_epi32(e0,e1,e2,e3) \
+ _mm512_set4_epi32(e3,e2,e1,e0)
+
+#define _mm512_setr4_pd(e0,e1,e2,e3) \
+ _mm512_set4_pd(e3,e2,e1,e0)
+
+#define _mm512_setr4_ps(e0,e1,e2,e3) \
+ _mm512_set4_ps(e3,e2,e1,e0)
+
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_ps (void)
@@ -169,6 +245,13 @@ _mm512_setzero_pd (void)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_epi32 (void)
+{
+ return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_si512 (void)
{
return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
diff --git a/gcc-4.9/gcc/config/i386/bmiintrin.h b/gcc-4.9/gcc/config/i386/bmiintrin.h
index b86adf179..b2d7c60ea 100644
--- a/gcc-4.9/gcc/config/i386/bmiintrin.h
+++ b/gcc-4.9/gcc/config/i386/bmiintrin.h
@@ -40,7 +40,6 @@ __tzcnt_u16 (unsigned short __X)
return __builtin_ctzs (__X);
}
-
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__andn_u32 (unsigned int __X, unsigned int __Y)
{
@@ -66,17 +65,34 @@ __blsi_u32 (unsigned int __X)
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsi_u32 (unsigned int __X)
+{
+ return __blsi_u32 (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsmsk_u32 (unsigned int __X)
{
return __X ^ (__X - 1);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsmsk_u32 (unsigned int __X)
+{
+ return __blsmsk_u32 (__X);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsr_u32 (unsigned int __X)
{
return __X & (__X - 1);
}
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsr_u32 (unsigned int __X)
+{
+ return __blsr_u32 (__X);
+}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzcnt_u32 (unsigned int __X)
@@ -84,6 +100,12 @@ __tzcnt_u32 (unsigned int __X)
return __builtin_ctz (__X);
}
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tzcnt_u32 (unsigned int __X)
+{
+ return __builtin_ctz (__X);
+}
+
#ifdef __x86_64__
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -111,23 +133,47 @@ __blsi_u64 (unsigned long long __X)
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsi_u64 (unsigned long long __X)
+{
+ return __blsi_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsmsk_u64 (unsigned long long __X)
{
return __X ^ (__X - 1);
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsmsk_u64 (unsigned long long __X)
+{
+ return __blsmsk_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsr_u64 (unsigned long long __X)
{
return __X & (__X - 1);
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsr_u64 (unsigned long long __X)
+{
+ return __blsr_u64 (__X);
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzcnt_u64 (unsigned long long __X)
{
return __builtin_ctzll (__X);
}
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tzcnt_u64 (unsigned long long __X)
+{
+ return __builtin_ctzll (__X);
+}
+
#endif /* __x86_64__ */
#ifdef __DISABLE_BMI__
diff --git a/gcc-4.9/gcc/config/i386/constraints.md b/gcc-4.9/gcc/config/i386/constraints.md
index 65335f128..567e70564 100644
--- a/gcc-4.9/gcc/config/i386/constraints.md
+++ b/gcc-4.9/gcc/config/i386/constraints.md
@@ -220,6 +220,13 @@
;; We use W prefix to denote any number of
;; constant-or-symbol-reference constraints
+(define_constraint "We"
+ "32-bit signed integer constant, or a symbolic reference known
+ to fit that range (for sign-extending conversion operations that
+ require non-VOIDmode immediate operands)."
+ (and (match_operand 0 "x86_64_immediate_operand")
+ (match_test "GET_MODE (op) != VOIDmode")))
+
(define_constraint "Wz"
"32-bit unsigned integer constant, or a symbolic reference known
to fit that range (for zero-extending conversion operations that
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
index 842be686d..3eefe4ac5 100644
--- a/gcc-4.9/gcc/config/i386/i386.c
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -13925,13 +13925,13 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
if (GET_CODE (XEXP (x, 0)) == MULT)
{
changed = 1;
- XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+ XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
}
if (GET_CODE (XEXP (x, 1)) == MULT)
{
changed = 1;
- XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+ XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
}
if (changed
@@ -22755,7 +22755,7 @@ counter_mode (rtx count_exp)
static rtx
ix86_copy_addr_to_reg (rtx addr)
{
- if (GET_MODE (addr) == Pmode)
+ if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
return copy_addr_to_reg (addr);
else
{
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md
index 4a8b46388..25e2e93e3 100644
--- a/gcc-4.9/gcc/config/i386/i386.md
+++ b/gcc-4.9/gcc/config/i386/i386.md
@@ -971,6 +971,15 @@
(DI "x86_64_general_operand")
(TI "x86_64_general_operand")])
+;; General sign extend operand predicate for integer modes,
+;; which disallows VOIDmode operands and thus it is suitable
+;; for use inside sign_extend.
+(define_mode_attr general_sext_operand
+ [(QI "sext_operand")
+ (HI "sext_operand")
+ (SI "x86_64_sext_operand")
+ (DI "x86_64_sext_operand")])
+
;; General sign/zero extend operand predicate for integer modes.
(define_mode_attr general_szext_operand
[(QI "general_operand")
@@ -4730,8 +4739,13 @@
&& X87_ENABLE_FLOAT (<MODEF:MODE>mode,
<SWI48:MODE>mode)")
(eq_attr "alternative" "1")
+ /* ??? For sched1 we need constrain_operands to be able to
+ select an alternative. Leave this enabled before RA. */
(symbol_ref "TARGET_INTER_UNIT_CONVERSIONS
- || optimize_function_for_size_p (cfun)")
+ || optimize_function_for_size_p (cfun)
+ || !(reload_completed
+ || reload_in_progress
+ || lra_in_progress)")
]
(symbol_ref "true")))
])
@@ -5821,10 +5835,11 @@
(eq:CCO (plus:<DWI>
(sign_extend:<DWI>
(match_operand:SWI 1 "nonimmediate_operand"))
- (sign_extend:<DWI>
- (match_operand:SWI 2 "<general_operand>")))
+ (match_dup 4))
(sign_extend:<DWI>
- (plus:SWI (match_dup 1) (match_dup 2)))))
+ (plus:SWI (match_dup 1)
+ (match_operand:SWI 2
+ "<general_operand>")))))
(set (match_operand:SWI 0 "register_operand")
(plus:SWI (match_dup 1) (match_dup 2)))])
(set (pc) (if_then_else
@@ -5832,7 +5847,13 @@
(label_ref (match_operand 3))
(pc)))]
""
- "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
+{
+ ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
+ if (CONST_INT_P (operands[2]))
+ operands[4] = operands[2];
+ else
+ operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
+})
(define_insn "*addv<mode>4"
[(set (reg:CCO FLAGS_REG)
@@ -5840,7 +5861,8 @@
(sign_extend:<DWI>
(match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
(sign_extend:<DWI>
- (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>")))
+ (match_operand:SWI 2 "<general_sext_operand>"
+ "<r>mWe,<r>We")))
(sign_extend:<DWI>
(plus:SWI (match_dup 1) (match_dup 2)))))
(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m")
@@ -5850,6 +5872,31 @@
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "*addv<mode>4_1"
+ [(set (reg:CCO FLAGS_REG)
+ (eq:CCO (plus:<DWI>
+ (sign_extend:<DWI>
+ (match_operand:SWI 1 "nonimmediate_operand" "0"))
+ (match_operand:<DWI> 3 "const_int_operand" "i"))
+ (sign_extend:<DWI>
+ (plus:SWI (match_dup 1)
+ (match_operand:SWI 2 "x86_64_immediate_operand"
+ "<i>")))))
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (plus:SWI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+ && CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == INTVAL (operands[3])"
+ "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "<MODE>")
+ (set (attr "length_immediate")
+ (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
+ (const_string "1")
+ (match_test "<MODE_SIZE> == 8")
+ (const_string "4")]
+ (const_string "<MODE_SIZE>")))])
+
;; The lea patterns for modes less than 32 bits need to be matched by
;; several insns converted to real lea by splitters.
@@ -6093,10 +6140,11 @@
(eq:CCO (minus:<DWI>
(sign_extend:<DWI>
(match_operand:SWI 1 "nonimmediate_operand"))
- (sign_extend:<DWI>
- (match_operand:SWI 2 "<general_operand>")))
+ (match_dup 4))
(sign_extend:<DWI>
- (minus:SWI (match_dup 1) (match_dup 2)))))
+ (minus:SWI (match_dup 1)
+ (match_operand:SWI 2
+ "<general_operand>")))))
(set (match_operand:SWI 0 "register_operand")
(minus:SWI (match_dup 1) (match_dup 2)))])
(set (pc) (if_then_else
@@ -6104,7 +6152,13 @@
(label_ref (match_operand 3))
(pc)))]
""
- "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
+{
+ ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
+ if (CONST_INT_P (operands[2]))
+ operands[4] = operands[2];
+ else
+ operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
+})
(define_insn "*subv<mode>4"
[(set (reg:CCO FLAGS_REG)
@@ -6112,7 +6166,8 @@
(sign_extend:<DWI>
(match_operand:SWI 1 "nonimmediate_operand" "0,0"))
(sign_extend:<DWI>
- (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+ (match_operand:SWI 2 "<general_sext_operand>"
+ "<r>We,<r>m")))
(sign_extend:<DWI>
(minus:SWI (match_dup 1) (match_dup 2)))))
(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
@@ -6122,6 +6177,31 @@
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "*subv<mode>4_1"
+ [(set (reg:CCO FLAGS_REG)
+ (eq:CCO (minus:<DWI>
+ (sign_extend:<DWI>
+ (match_operand:SWI 1 "nonimmediate_operand" "0"))
+ (match_operand:<DWI> 3 "const_int_operand" "i"))
+ (sign_extend:<DWI>
+ (minus:SWI (match_dup 1)
+ (match_operand:SWI 2 "x86_64_immediate_operand"
+ "<i>")))))
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (minus:SWI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+ && CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == INTVAL (operands[3])"
+ "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "<MODE>")
+ (set (attr "length_immediate")
+ (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
+ (const_string "1")
+ (match_test "<MODE_SIZE> == 8")
+ (const_string "4")]
+ (const_string "<MODE_SIZE>")))])
+
(define_insn "*sub<mode>_3"
[(set (reg FLAGS_REG)
(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
@@ -6442,52 +6522,98 @@
(eq:CCO (mult:<DWI>
(sign_extend:<DWI>
(match_operand:SWI48 1 "register_operand"))
- (sign_extend:<DWI>
- (match_operand:SWI48 2 "<general_operand>")))
+ (match_dup 4))
(sign_extend:<DWI>
- (mult:SWI48 (match_dup 1) (match_dup 2)))))
+ (mult:SWI48 (match_dup 1)
+ (match_operand:SWI48 2
+ "<general_operand>")))))
(set (match_operand:SWI48 0 "register_operand")
(mult:SWI48 (match_dup 1) (match_dup 2)))])
(set (pc) (if_then_else
(eq (reg:CCO FLAGS_REG) (const_int 0))
(label_ref (match_operand 3))
- (pc)))])
+ (pc)))]
+ ""
+{
+ if (CONST_INT_P (operands[2]))
+ operands[4] = operands[2];
+ else
+ operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
+})
(define_insn "*mulv<mode>4"
[(set (reg:CCO FLAGS_REG)
(eq:CCO (mult:<DWI>
(sign_extend:<DWI>
- (match_operand:SWI 1 "nonimmediate_operand" "%rm,rm,0"))
+ (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0"))
(sign_extend:<DWI>
- (match_operand:SWI 2 "<general_operand>" "K,<i>,mr")))
+ (match_operand:SWI48 2 "<general_sext_operand>"
+ "We,mr")))
(sign_extend:<DWI>
- (mult:SWI (match_dup 1) (match_dup 2)))))
- (set (match_operand:SWI 0 "register_operand" "=r,r,r")
- (mult:SWI (match_dup 1) (match_dup 2)))]
+ (mult:SWI48 (match_dup 1) (match_dup 2)))))
+ (set (match_operand:SWI48 0 "register_operand" "=r,r")
+ (mult:SWI48 (match_dup 1) (match_dup 2)))]
"!(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
imul{<imodesuffix>}\t{%2, %0|%0, %2}"
[(set_attr "type" "imul")
- (set_attr "prefix_0f" "0,0,1")
+ (set_attr "prefix_0f" "0,1")
(set (attr "athlon_decode")
(cond [(eq_attr "cpu" "athlon")
(const_string "vector")
- (eq_attr "alternative" "1")
+ (eq_attr "alternative" "0")
(const_string "vector")
- (and (eq_attr "alternative" "2")
+ (and (eq_attr "alternative" "1")
(match_operand 1 "memory_operand"))
(const_string "vector")]
(const_string "direct")))
(set (attr "amdfam10_decode")
- (cond [(and (eq_attr "alternative" "0,1")
+ (cond [(and (eq_attr "alternative" "1")
(match_operand 1 "memory_operand"))
(const_string "vector")]
(const_string "direct")))
(set_attr "bdver1_decode" "direct")
(set_attr "mode" "<MODE>")])
+(define_insn "*mulv<mode>4_1"
+ [(set (reg:CCO FLAGS_REG)
+ (eq:CCO (mult:<DWI>
+ (sign_extend:<DWI>
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm,rm"))
+ (match_operand:<DWI> 3 "const_int_operand" "K,i"))
+ (sign_extend:<DWI>
+ (mult:SWI48 (match_dup 1)
+ (match_operand:SWI 2 "x86_64_immediate_operand"
+ "K,<i>")))))
+ (set (match_operand:SWI48 0 "register_operand" "=r,r")
+ (mult:SWI48 (match_dup 1) (match_dup 2)))]
+ "!(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == INTVAL (operands[3])"
+ "@
+ imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+ imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "imul")
+ (set (attr "athlon_decode")
+ (cond [(eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (eq_attr "alternative" "1")
+ (const_string "vector")]
+ (const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(match_operand 1 "memory_operand")
+ (const_string "vector")]
+ (const_string "direct")))
+ (set_attr "bdver1_decode" "direct")
+ (set_attr "mode" "<MODE>")
+ (set (attr "length_immediate")
+ (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
+ (const_string "1")
+ (match_test "<MODE_SIZE> == 8")
+ (const_string "4")]
+ (const_string "<MODE_SIZE>")))])
+
(define_expand "<u>mul<mode><dwi>3"
[(parallel [(set (match_operand:<DWI> 0 "register_operand")
(mult:<DWI>
diff --git a/gcc-4.9/gcc/config/i386/predicates.md b/gcc-4.9/gcc/config/i386/predicates.md
index 0492241fd..2ef138424 100644
--- a/gcc-4.9/gcc/config/i386/predicates.md
+++ b/gcc-4.9/gcc/config/i386/predicates.md
@@ -338,6 +338,20 @@
(match_operand 0 "x86_64_immediate_operand"))
(match_operand 0 "general_operand")))
+;; Return true if OP is non-VOIDmode general operand representable
+;; on x86_64. This predicate is used in sign-extending conversion
+;; operations that require non-VOIDmode immediate operands.
+(define_predicate "x86_64_sext_operand"
+ (and (match_test "GET_MODE (op) != VOIDmode")
+ (match_operand 0 "x86_64_general_operand")))
+
+;; Return true if OP is non-VOIDmode general operand. This predicate
+;; is used in sign-extending conversion operations that require
+;; non-VOIDmode immediate operands.
+(define_predicate "sext_operand"
+ (and (match_test "GET_MODE (op) != VOIDmode")
+ (match_operand 0 "general_operand")))
+
;; Return true if OP is representable on x86_64 as zero-extended operand.
;; This predicate is used in zero-extending conversion operations that
;; require non-VOIDmode immediate operands.
diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md
index f30b27e86..72a4d6d07 100644
--- a/gcc-4.9/gcc/config/i386/sse.md
+++ b/gcc-4.9/gcc/config/i386/sse.md
@@ -2712,50 +2712,46 @@
(fma:FMAMODEM
(match_operand:FMAMODEM 1 "nonimmediate_operand")
(match_operand:FMAMODEM 2 "nonimmediate_operand")
- (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
- "")
+ (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
(define_expand "fms<mode>4"
[(set (match_operand:FMAMODEM 0 "register_operand")
(fma:FMAMODEM
(match_operand:FMAMODEM 1 "nonimmediate_operand")
(match_operand:FMAMODEM 2 "nonimmediate_operand")
- (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
- "")
+ (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
(define_expand "fnma<mode>4"
[(set (match_operand:FMAMODEM 0 "register_operand")
(fma:FMAMODEM
(neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
(match_operand:FMAMODEM 2 "nonimmediate_operand")
- (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
- "")
+ (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
(define_expand "fnms<mode>4"
[(set (match_operand:FMAMODEM 0 "register_operand")
(fma:FMAMODEM
(neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
(match_operand:FMAMODEM 2 "nonimmediate_operand")
- (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
- "")
+ (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
;; The builtins for intrinsics are not constrained by SSE math enabled.
-(define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
- (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
- (V4SF "TARGET_FMA || TARGET_FMA4")
- (V2DF "TARGET_FMA || TARGET_FMA4")
- (V8SF "TARGET_FMA || TARGET_FMA4")
- (V4DF "TARGET_FMA || TARGET_FMA4")
- (V16SF "TARGET_AVX512F")
- (V8DF "TARGET_AVX512F")])
+(define_mode_iterator FMAMODE
+ [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (V4SF "TARGET_FMA || TARGET_FMA4")
+ (V2DF "TARGET_FMA || TARGET_FMA4")
+ (V8SF "TARGET_FMA || TARGET_FMA4")
+ (V4DF "TARGET_FMA || TARGET_FMA4")
+ (V16SF "TARGET_AVX512F")
+ (V8DF "TARGET_AVX512F")])
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand")
(match_operand:FMAMODE 2 "nonimmediate_operand")
- (match_operand:FMAMODE 3 "nonimmediate_operand")))]
- "")
+ (match_operand:FMAMODE 3 "nonimmediate_operand")))])
(define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
[(match_operand:VF_512 0 "register_operand")
@@ -2771,12 +2767,20 @@
DONE;
})
+(define_mode_iterator FMAMODE_NOVF512
+ [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (V4SF "TARGET_FMA || TARGET_FMA4")
+ (V2DF "TARGET_FMA || TARGET_FMA4")
+ (V8SF "TARGET_FMA || TARGET_FMA4")
+ (V4DF "TARGET_FMA || TARGET_FMA4")])
+
(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x")
- (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
- (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
+ [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
+ (fma:FMAMODE_NOVF512
+ (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
+ (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+ (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2788,6 +2792,21 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+ (fma:VF_512
+ (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "avx512f_fmadd_<mode>_mask<round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v,v")
(vec_merge:VF_512
@@ -2821,12 +2840,12 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0, 0, v, x,x")
- (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
+ [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
+ (fma:FMAMODE_NOVF512
+ (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
+ (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+ (neg:FMAMODE_NOVF512
+ (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2838,6 +2857,22 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+ (fma:VF_512
+ (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (neg:VF_512
+ (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "avx512f_fmsub_<mode>_mask<round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v,v")
(vec_merge:VF_512
@@ -2873,12 +2908,12 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
- (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
- (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
+ [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
+ (fma:FMAMODE_NOVF512
+ (neg:FMAMODE_NOVF512
+ (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
+ (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+ (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2890,6 +2925,22 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
+ (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v,v")
(vec_merge:VF_512
@@ -2925,13 +2976,13 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
- (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
+ [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
+ (fma:FMAMODE_NOVF512
+ (neg:FMAMODE_NOVF512
+ (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
+ (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+ (neg:FMAMODE_NOVF512
+ (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2943,6 +2994,23 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
+ (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (neg:VF_512
+ (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v,v")
(vec_merge:VF_512
@@ -3014,11 +3082,11 @@
})
(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
- (unspec:VF
- [(match_operand:VF 1 "<round_nimm_predicate>" "%0,0,v,x,x")
- (match_operand:VF 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
- (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
+ [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
+ (unspec:VF_128_256
+ [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
+ (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+ (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
UNSPEC_FMADDSUB))]
"(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@@ -3031,6 +3099,22 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v,v")
(vec_merge:VF_512
@@ -3066,12 +3150,12 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
- (unspec:VF
- [(match_operand:VF 1 "<round_nimm_predicate>" "%0,0,v,x,x")
- (match_operand:VF 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
- (neg:VF
- (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
+ [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
+ (unspec:VF_128_256
+ [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
+ (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+ (neg:VF_128_256
+ (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
UNSPEC_FMADDSUB))]
"(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@@ -3084,6 +3168,23 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (neg:VF_512
+ (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
+ UNSPEC_FMADDSUB))]
+ "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v,v")
(vec_merge:VF_512