From e3cc64dec20832769406aa38cde83c7dd4194bf4 Mon Sep 17 00:00:00 2001
From: Ben Cheng <bccheng@google.com>
Date: Tue, 22 Apr 2014 13:33:12 -0700
Subject: [4.9] GCC 4.9.0 official release refresh

Change-Id: Ic99a7da8b44b789a48aeec93b33e93944d6e6767
---
 gcc-4.9/gcc/config/i386/avx512fintrin.h |  83 +++++++++++++
 gcc-4.9/gcc/config/i386/bmiintrin.h     |  48 +++++++-
 gcc-4.9/gcc/config/i386/constraints.md  |   7 ++
 gcc-4.9/gcc/config/i386/i386.c          |   6 +-
 gcc-4.9/gcc/config/i386/i386.md         | 176 +++++++++++++++++++++++----
 gcc-4.9/gcc/config/i386/predicates.md   |  14 +++
 gcc-4.9/gcc/config/i386/sse.md          | 207 ++++++++++++++++++++++++--------
 7 files changed, 459 insertions(+), 82 deletions(-)

(limited to 'gcc-4.9/gcc/config/i386')

diff --git a/gcc-4.9/gcc/config/i386/avx512fintrin.h b/gcc-4.9/gcc/config/i386/avx512fintrin.h
index 960286618..314895ad7 100644
--- a/gcc-4.9/gcc/config/i386/avx512fintrin.h
+++ b/gcc-4.9/gcc/config/i386/avx512fintrin.h
@@ -39,6 +39,8 @@ typedef double __v8df __attribute__ ((__vector_size__ (64)));
 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
 typedef int __v16si __attribute__ ((__vector_size__ (64)));
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
 
 /* The Intel API is flexible enough that we must allow aliasing with other
    vector types, and their scalar components.  */
@@ -130,6 +132,32 @@ _mm512_undefined_si512 (void)
   return __Y;
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi8 (char __A)
+{
+  return __extension__ (__m512i)(__v64qi)
+	 { __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_epi16 (short __A)
+{
+  return __extension__ (__m512i)(__v32hi)
+	 { __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A,
+	   __A, __A, __A, __A, __A, __A, __A, __A };
+}
+
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_set1_pd (double __A)
@@ -152,6 +180,54 @@ _mm512_set1_ps (float __A)
 						 (__mmask16) -1);
 }
 
+/* Create the vector [A B C D A B C D A B C D A B C D].  */
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
+{
+  return __extension__ (__m512i)(__v16si)
+	 { __D, __C, __B, __A, __D, __C, __B, __A,
+	   __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_epi64 (long long __A, long long __B, long long __C,
+		   long long __D)
+{
+  return __extension__ (__m512i) (__v8di)
+	 { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_pd (double __A, double __B, double __C, double __D)
+{
+  return __extension__ (__m512d)
+	 { __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set4_ps (float __A, float __B, float __C, float __D)
+{
+  return __extension__ (__m512)
+	 { __D, __C, __B, __A, __D, __C, __B, __A,
+	   __D, __C, __B, __A, __D, __C, __B, __A };
+}
+
+#define _mm512_setr4_epi64(e0,e1,e2,e3)					      \
+  _mm512_set4_epi64(e3,e2,e1,e0)
+
+#define _mm512_setr4_epi32(e0,e1,e2,e3)					      \
+  _mm512_set4_epi32(e3,e2,e1,e0)
+
+#define _mm512_setr4_pd(e0,e1,e2,e3)					      \
+  _mm512_set4_pd(e3,e2,e1,e0)
+
+#define _mm512_setr4_ps(e0,e1,e2,e3)					      \
+  _mm512_set4_ps(e3,e2,e1,e0)
+
 extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_setzero_ps (void)
@@ -167,6 +243,13 @@ _mm512_setzero_pd (void)
   return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_epi32 (void)
+{
+  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_setzero_si512 (void)
diff --git a/gcc-4.9/gcc/config/i386/bmiintrin.h b/gcc-4.9/gcc/config/i386/bmiintrin.h
index b86adf179..b2d7c60ea 100644
--- a/gcc-4.9/gcc/config/i386/bmiintrin.h
+++ b/gcc-4.9/gcc/config/i386/bmiintrin.h
@@ -40,7 +40,6 @@ __tzcnt_u16 (unsigned short __X)
   return __builtin_ctzs (__X);
 }
 
-
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __andn_u32 (unsigned int __X, unsigned int __Y)
 {
@@ -65,18 +64,35 @@ __blsi_u32 (unsigned int __X)
   return __X & -__X;
 }
 
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsi_u32 (unsigned int __X)
+{
+  return __blsi_u32 (__X);
+}
+
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __blsmsk_u32 (unsigned int __X)
 {
   return __X ^ (__X - 1);
 }
 
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsmsk_u32 (unsigned int __X)
+{
+  return __blsmsk_u32 (__X);
+}
+
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __blsr_u32 (unsigned int __X)
 {
   return __X & (__X - 1);
 }
 
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsr_u32 (unsigned int __X)
+{
+  return __blsr_u32 (__X);
+}
 
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __tzcnt_u32 (unsigned int __X)
@@ -84,6 +100,12 @@ __tzcnt_u32 (unsigned int __X)
   return __builtin_ctz (__X);
 }
 
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tzcnt_u32 (unsigned int __X)
+{
+  return __builtin_ctz (__X);
+}
+
 
 #ifdef  __x86_64__
 extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -110,24 +132,48 @@ __blsi_u64 (unsigned long long __X)
   return __X & -__X;
 }
 
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsi_u64 (unsigned long long __X)
+{
+  return __blsi_u64 (__X);
+}
+
 extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __blsmsk_u64 (unsigned long long __X)
 {
   return __X ^ (__X - 1);
 }
 
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsmsk_u64 (unsigned long long __X)
+{
+  return __blsmsk_u64 (__X);
+}
+
 extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __blsr_u64 (unsigned long long __X)
 {
   return __X & (__X - 1);
 }
 
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_blsr_u64 (unsigned long long __X)
+{
+  return __blsr_u64 (__X);
+}
+
 extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __tzcnt_u64 (unsigned long long __X)
 {
   return __builtin_ctzll (__X);
 }
 
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tzcnt_u64 (unsigned long long __X)
+{
+  return __builtin_ctzll (__X);
+}
+
 #endif /* __x86_64__  */
 
 #ifdef __DISABLE_BMI__
diff --git a/gcc-4.9/gcc/config/i386/constraints.md b/gcc-4.9/gcc/config/i386/constraints.md
index 65335f128..567e70564 100644
--- a/gcc-4.9/gcc/config/i386/constraints.md
+++ b/gcc-4.9/gcc/config/i386/constraints.md
@@ -220,6 +220,13 @@
 ;; We use W prefix to denote any number of
 ;; constant-or-symbol-reference constraints
 
+(define_constraint "We"
+  "32-bit signed integer constant, or a symbolic reference known
+   to fit that range (for sign-extending conversion operations that
+   require non-VOIDmode immediate operands)."
+  (and (match_operand 0 "x86_64_immediate_operand")
+       (match_test "GET_MODE (op) != VOIDmode")))
+
 (define_constraint "Wz"
   "32-bit unsigned integer constant, or a symbolic reference known
    to fit that range (for zero-extending conversion operations that
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
index 842be686d..3eefe4ac5 100644
--- a/gcc-4.9/gcc/config/i386/i386.c
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -13925,13 +13925,13 @@ ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
       if (GET_CODE (XEXP (x, 0)) == MULT)
 	{
 	  changed = 1;
-	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+	  XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
 	}
 
       if (GET_CODE (XEXP (x, 1)) == MULT)
 	{
 	  changed = 1;
-	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+	  XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
 	}
 
       if (changed
@@ -22755,7 +22755,7 @@ counter_mode (rtx count_exp)
 static rtx
 ix86_copy_addr_to_reg (rtx addr)
 {
-  if (GET_MODE (addr) == Pmode)
+  if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
     return copy_addr_to_reg (addr);
   else
     {
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md
index 4a8b46388..25e2e93e3 100644
--- a/gcc-4.9/gcc/config/i386/i386.md
+++ b/gcc-4.9/gcc/config/i386/i386.md
@@ -971,6 +971,15 @@
 	 (DI "x86_64_general_operand")
 	 (TI "x86_64_general_operand")])
 
+;; General sign extend operand predicate for integer modes,
+;; which disallows VOIDmode operands and thus it is suitable
+;; for use inside sign_extend.
+(define_mode_attr general_sext_operand
+	[(QI "sext_operand")
+	 (HI "sext_operand")
+	 (SI "x86_64_sext_operand")
+	 (DI "x86_64_sext_operand")])
+
 ;; General sign/zero extend operand predicate for integer modes.
 (define_mode_attr general_szext_operand
 	[(QI "general_operand")
@@ -4730,8 +4739,13 @@
                            && X87_ENABLE_FLOAT (<MODEF:MODE>mode,
                                                 <SWI48:MODE>mode)")
             (eq_attr "alternative" "1")
+              /* ??? For sched1 we need constrain_operands to be able to
+                 select an alternative.  Leave this enabled before RA.  */
               (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS
-                           || optimize_function_for_size_p (cfun)")
+                           || optimize_function_for_size_p (cfun)
+                           || !(reload_completed
+                                || reload_in_progress
+                                || lra_in_progress)")
            ]
            (symbol_ref "true")))
    ])
@@ -5821,10 +5835,11 @@
 		   (eq:CCO (plus:<DWI>
 			      (sign_extend:<DWI>
 				 (match_operand:SWI 1 "nonimmediate_operand"))
-			      (sign_extend:<DWI>
-				 (match_operand:SWI 2 "<general_operand>")))
+			      (match_dup 4))
 			   (sign_extend:<DWI>
-			      (plus:SWI (match_dup 1) (match_dup 2)))))
+			      (plus:SWI (match_dup 1)
+					(match_operand:SWI 2
+					   "<general_operand>")))))
 	      (set (match_operand:SWI 0 "register_operand")
 		   (plus:SWI (match_dup 1) (match_dup 2)))])
    (set (pc) (if_then_else
@@ -5832,7 +5847,13 @@
 	       (label_ref (match_operand 3))
 	       (pc)))]
   ""
-  "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
+{
+  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
+  if (CONST_INT_P (operands[2]))
+    operands[4] = operands[2];
+  else
+    operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
+})
 
 (define_insn "*addv<mode>4"
   [(set (reg:CCO FLAGS_REG)
@@ -5840,7 +5861,8 @@
 		   (sign_extend:<DWI>
 		      (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))
 		   (sign_extend:<DWI>
-		      (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>")))
+		      (match_operand:SWI 2 "<general_sext_operand>"
+					   "<r>mWe,<r>We")))
 		(sign_extend:<DWI>
 		   (plus:SWI (match_dup 1) (match_dup 2)))))
    (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m")
@@ -5850,6 +5872,31 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*addv<mode>4_1"
+  [(set (reg:CCO FLAGS_REG)
+	(eq:CCO (plus:<DWI>
+		   (sign_extend:<DWI>
+		      (match_operand:SWI 1 "nonimmediate_operand" "0"))
+		   (match_operand:<DWI> 3 "const_int_operand" "i"))
+		(sign_extend:<DWI>
+		   (plus:SWI (match_dup 1)
+			     (match_operand:SWI 2 "x86_64_immediate_operand"
+						  "<i>")))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(plus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+   && CONST_INT_P (operands[2])
+   && INTVAL (operands[2]) == INTVAL (operands[3])"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")
+   (set (attr "length_immediate")
+	(cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
+		  (const_string "1")
+	       (match_test "<MODE_SIZE> == 8")
+		  (const_string "4")]
+	      (const_string "<MODE_SIZE>")))])
+
 ;; The lea patterns for modes less than 32 bits need to be matched by
 ;; several insns converted to real lea by splitters.
 
@@ -6093,10 +6140,11 @@
 		   (eq:CCO (minus:<DWI>
 			      (sign_extend:<DWI>
 				 (match_operand:SWI 1 "nonimmediate_operand"))
-			      (sign_extend:<DWI>
-				 (match_operand:SWI 2 "<general_operand>")))
+			      (match_dup 4))
 			   (sign_extend:<DWI>
-			      (minus:SWI (match_dup 1) (match_dup 2)))))
+			      (minus:SWI (match_dup 1)
+					 (match_operand:SWI 2
+					    "<general_operand>")))))
 	      (set (match_operand:SWI 0 "register_operand")
 		   (minus:SWI (match_dup 1) (match_dup 2)))])
    (set (pc) (if_then_else
@@ -6104,7 +6152,13 @@
 	       (label_ref (match_operand 3))
 	       (pc)))]
   ""
-  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
+{
+  ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
+  if (CONST_INT_P (operands[2]))
+    operands[4] = operands[2];
+  else
+    operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
+})
 
 (define_insn "*subv<mode>4"
   [(set (reg:CCO FLAGS_REG)
@@ -6112,7 +6166,8 @@
 		   (sign_extend:<DWI>
 		      (match_operand:SWI 1 "nonimmediate_operand" "0,0"))
 		   (sign_extend:<DWI>
-		      (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+		      (match_operand:SWI 2 "<general_sext_operand>"
+					   "<r>We,<r>m")))
 		(sign_extend:<DWI>
 		   (minus:SWI (match_dup 1) (match_dup 2)))))
    (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
@@ -6122,6 +6177,31 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*subv<mode>4_1"
+  [(set (reg:CCO FLAGS_REG)
+	(eq:CCO (minus:<DWI>
+		   (sign_extend:<DWI>
+		      (match_operand:SWI 1 "nonimmediate_operand" "0"))
+		   (match_operand:<DWI> 3 "const_int_operand" "i"))
+		(sign_extend:<DWI>
+		   (minus:SWI (match_dup 1)
+			      (match_operand:SWI 2 "x86_64_immediate_operand"
+						   "<i>")))))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+	(minus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+   && CONST_INT_P (operands[2])
+   && INTVAL (operands[2]) == INTVAL (operands[3])"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")
+   (set (attr "length_immediate")
+	(cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
+		  (const_string "1")
+	       (match_test "<MODE_SIZE> == 8")
+		  (const_string "4")]
+	      (const_string "<MODE_SIZE>")))])
+
 (define_insn "*sub<mode>_3"
   [(set (reg FLAGS_REG)
 	(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
@@ -6442,52 +6522,98 @@
 		   (eq:CCO (mult:<DWI>
 			      (sign_extend:<DWI>
 				 (match_operand:SWI48 1 "register_operand"))
-			      (sign_extend:<DWI>
-				 (match_operand:SWI48 2 "<general_operand>")))
+			      (match_dup 4))
 			   (sign_extend:<DWI>
-			      (mult:SWI48 (match_dup 1) (match_dup 2)))))
+			      (mult:SWI48 (match_dup 1)
+					  (match_operand:SWI48 2
+					     "<general_operand>")))))
 	      (set (match_operand:SWI48 0 "register_operand")
 		   (mult:SWI48 (match_dup 1) (match_dup 2)))])
    (set (pc) (if_then_else
 	       (eq (reg:CCO FLAGS_REG) (const_int 0))
 	       (label_ref (match_operand 3))
-	       (pc)))])
+	       (pc)))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    operands[4] = operands[2];
+  else
+    operands[4] = gen_rtx_SIGN_EXTEND (<DWI>mode, operands[2]);
+})
 
 (define_insn "*mulv<mode>4"
   [(set (reg:CCO FLAGS_REG)
 	(eq:CCO (mult:<DWI>
 		   (sign_extend:<DWI>
-		      (match_operand:SWI 1 "nonimmediate_operand" "%rm,rm,0"))
+		      (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0"))
 		   (sign_extend:<DWI>
-		      (match_operand:SWI 2 "<general_operand>" "K,<i>,mr")))
+		      (match_operand:SWI48 2 "<general_sext_operand>"
+					     "We,mr")))
 		(sign_extend:<DWI>
-		   (mult:SWI (match_dup 1) (match_dup 2)))))
-   (set (match_operand:SWI 0 "register_operand" "=r,r,r")
-	(mult:SWI (match_dup 1) (match_dup 2)))]
+		   (mult:SWI48 (match_dup 1) (match_dup 2)))))
+   (set (match_operand:SWI48 0 "register_operand" "=r,r")
+	(mult:SWI48 (match_dup 1) (match_dup 2)))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
-   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
    imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
    imul{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "imul")
-   (set_attr "prefix_0f" "0,0,1")
+   (set_attr "prefix_0f" "0,1")
    (set (attr "athlon_decode")
 	(cond [(eq_attr "cpu" "athlon")
 		  (const_string "vector")
-	       (eq_attr "alternative" "1")
+	       (eq_attr "alternative" "0")
 		  (const_string "vector")
-	       (and (eq_attr "alternative" "2")
+	       (and (eq_attr "alternative" "1")
 		    (match_operand 1 "memory_operand"))
 		  (const_string "vector")]
 	      (const_string "direct")))
    (set (attr "amdfam10_decode")
-	(cond [(and (eq_attr "alternative" "0,1")
+	(cond [(and (eq_attr "alternative" "1")
 		    (match_operand 1 "memory_operand"))
 		  (const_string "vector")]
 	      (const_string "direct")))
    (set_attr "bdver1_decode" "direct")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*mulv<mode>4_1"
+  [(set (reg:CCO FLAGS_REG)
+	(eq:CCO (mult:<DWI>
+		   (sign_extend:<DWI>
+		      (match_operand:SWI48 1 "nonimmediate_operand" "rm,rm"))
+		   (match_operand:<DWI> 3 "const_int_operand" "K,i"))
+		(sign_extend:<DWI>
+		   (mult:SWI48 (match_dup 1)
+			       (match_operand:SWI 2 "x86_64_immediate_operand"
+						    "K,<i>")))))
+   (set (match_operand:SWI48 0 "register_operand" "=r,r")
+	(mult:SWI48 (match_dup 1) (match_dup 2)))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))
+   && CONST_INT_P (operands[2])
+   && INTVAL (operands[2]) == INTVAL (operands[3])"
+  "@
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
+   imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "imul")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "cpu" "athlon")
+		  (const_string "vector")
+	       (eq_attr "alternative" "1")
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(match_operand 1 "memory_operand")
+		  (const_string "vector")]
+	      (const_string "direct")))
+   (set_attr "bdver1_decode" "direct")
+   (set_attr "mode" "<MODE>")
+   (set (attr "length_immediate")
+	(cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)")
+		  (const_string "1")
+	       (match_test "<MODE_SIZE> == 8")
+		  (const_string "4")]
+	      (const_string "<MODE_SIZE>")))])
+
 (define_expand "<u>mul<mode><dwi>3"
   [(parallel [(set (match_operand:<DWI> 0 "register_operand")
 		   (mult:<DWI>
diff --git a/gcc-4.9/gcc/config/i386/predicates.md b/gcc-4.9/gcc/config/i386/predicates.md
index 0492241fd..2ef138424 100644
--- a/gcc-4.9/gcc/config/i386/predicates.md
+++ b/gcc-4.9/gcc/config/i386/predicates.md
@@ -338,6 +338,20 @@
 	 (match_operand 0 "x86_64_immediate_operand"))
     (match_operand 0 "general_operand")))
 
+;; Return true if OP is non-VOIDmode general operand representable
+;; on x86_64.  This predicate is used in sign-extending conversion
+;; operations that require non-VOIDmode immediate operands.
+(define_predicate "x86_64_sext_operand"
+  (and (match_test "GET_MODE (op) != VOIDmode")
+       (match_operand 0 "x86_64_general_operand")))
+
+;; Return true if OP is non-VOIDmode general operand.  This predicate
+;; is used in sign-extending conversion operations that require
+;; non-VOIDmode immediate operands.
+(define_predicate "sext_operand"
+  (and (match_test "GET_MODE (op) != VOIDmode")
+       (match_operand 0 "general_operand")))
+
 ;; Return true if OP is representable on x86_64 as zero-extended operand.
 ;; This predicate is used in zero-extending conversion operations that
 ;; require non-VOIDmode immediate operands.
diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md
index f30b27e86..72a4d6d07 100644
--- a/gcc-4.9/gcc/config/i386/sse.md
+++ b/gcc-4.9/gcc/config/i386/sse.md
@@ -2712,50 +2712,46 @@
 	(fma:FMAMODEM
 	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
 	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
-	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
-  "")
+	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
 
 (define_expand "fms<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
 	(fma:FMAMODEM
 	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
 	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
-	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
-  "")
+	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
 
 (define_expand "fnma<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
 	(fma:FMAMODEM
 	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
 	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
-	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
-  "")
+	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
 
 (define_expand "fnms<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
 	(fma:FMAMODEM
 	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
 	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
-	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
-  "")
+	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
 
 ;; The builtins for intrinsics are not constrained by SSE math enabled.
-(define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
-			       (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
-			       (V4SF "TARGET_FMA || TARGET_FMA4")
-			       (V2DF "TARGET_FMA || TARGET_FMA4")
-			       (V8SF "TARGET_FMA || TARGET_FMA4")
-			       (V4DF "TARGET_FMA || TARGET_FMA4")
-			       (V16SF "TARGET_AVX512F")
-			       (V8DF "TARGET_AVX512F")])
+(define_mode_iterator FMAMODE
+  [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+   (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+   (V4SF "TARGET_FMA || TARGET_FMA4")
+   (V2DF "TARGET_FMA || TARGET_FMA4")
+   (V8SF "TARGET_FMA || TARGET_FMA4")
+   (V4DF "TARGET_FMA || TARGET_FMA4")
+   (V16SF "TARGET_AVX512F")
+   (V8DF "TARGET_AVX512F")])
 
 (define_expand "fma4i_fmadd_<mode>"
   [(set (match_operand:FMAMODE 0 "register_operand")
 	(fma:FMAMODE
 	  (match_operand:FMAMODE 1 "nonimmediate_operand")
 	  (match_operand:FMAMODE 2 "nonimmediate_operand")
-	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
-  "")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand")))])
 
 (define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
   [(match_operand:VF_512 0 "register_operand")
@@ -2771,12 +2767,20 @@
   DONE;
 })
 
+(define_mode_iterator FMAMODE_NOVF512
+  [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+   (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+   (V4SF "TARGET_FMA || TARGET_FMA4")
+   (V2DF "TARGET_FMA || TARGET_FMA4")
+   (V8SF "TARGET_FMA || TARGET_FMA4")
+   (V4DF "TARGET_FMA || TARGET_FMA4")])
+
 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
-	(fma:FMAMODE
-	  (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x")
-	  (match_operand:FMAMODE 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
-	  (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
+  [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
+	(fma:FMAMODE_NOVF512
+	  (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
+	  (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	  (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
   "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2788,6 +2792,21 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+	(fma:VF_512
+	  (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
+	  (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+	  (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "avx512f_fmadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
@@ -2821,12 +2840,12 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
-	(fma:FMAMODE
-	  (match_operand:FMAMODE   1 "<round_nimm_predicate>" "%0, 0, v, x,x")
-	  (match_operand:FMAMODE   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
-	  (neg:FMAMODE
-	    (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
+  [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
+	(fma:FMAMODE_NOVF512
+	  (match_operand:FMAMODE_NOVF512   1 "<round_nimm_predicate>" "%0,0,v,x,x")
+	  (match_operand:FMAMODE_NOVF512   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	  (neg:FMAMODE_NOVF512
+	    (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
   "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2838,6 +2857,22 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+	(fma:VF_512
+	  (match_operand:VF_512   1 "<round_nimm_predicate>" "%0,0,v")
+	  (match_operand:VF_512   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+	  (neg:VF_512
+	    (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "avx512f_fmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
@@ -2873,12 +2908,12 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
-	(fma:FMAMODE
-	  (neg:FMAMODE
-	    (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
-	  (match_operand:FMAMODE   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
-	  (match_operand:FMAMODE   3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
+  [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
+	(fma:FMAMODE_NOVF512
+	  (neg:FMAMODE_NOVF512
+	    (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
+	  (match_operand:FMAMODE_NOVF512   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	  (match_operand:FMAMODE_NOVF512   3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
   "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2890,6 +2925,22 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+	(fma:VF_512
+	  (neg:VF_512
+	    (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
+	  (match_operand:VF_512   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+	  (match_operand:VF_512   3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
@@ -2925,13 +2976,13 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
-	(fma:FMAMODE
-	  (neg:FMAMODE
-	    (match_operand:FMAMODE 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
-	  (match_operand:FMAMODE   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
-	  (neg:FMAMODE
-	    (match_operand:FMAMODE 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
+  [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
+	(fma:FMAMODE_NOVF512
+	  (neg:FMAMODE_NOVF512
+	    (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
+	  (match_operand:FMAMODE_NOVF512   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	  (neg:FMAMODE_NOVF512
+	    (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
   "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2943,6 +2994,23 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+	(fma:VF_512
+	  (neg:VF_512
+	    (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
+	  (match_operand:VF_512   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+	  (neg:VF_512
+	    (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+  "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
@@ -3014,11 +3082,11 @@
 })
 
 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
-  [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
-	(unspec:VF
-	  [(match_operand:VF 1 "<round_nimm_predicate>" "%0,0,v,x,x")
-	   (match_operand:VF 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
-	   (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
+  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
+	(unspec:VF_128_256
+	  [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
+	   (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	   (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
 	  UNSPEC_FMADDSUB))]
   "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
@@ -3031,6 +3099,22 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
+	   (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+	   (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
@@ -3066,12 +3150,12 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
-  [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
-	(unspec:VF
-	  [(match_operand:VF   1 "<round_nimm_predicate>" "%0,0,v,x,x")
-	   (match_operand:VF   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
-	   (neg:VF
-	     (match_operand:VF 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
+  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
+	(unspec:VF_128_256
+	  [(match_operand:VF_128_256   1 "<round_nimm_predicate>" "%0,0,v,x,x")
+	   (match_operand:VF_128_256   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
+	   (neg:VF_128_256
+	     (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
 	  UNSPEC_FMADDSUB))]
   "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
@@ -3084,6 +3168,23 @@
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
+	(unspec:VF_512
+	  [(match_operand:VF_512   1 "<round_nimm_predicate>" "%0,0,v")
+	   (match_operand:VF_512   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+	   (neg:VF_512
+	     (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+  "@
+   vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+   vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+   vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
+   (set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v,v")
 	(vec_merge:VF_512
-- 
cgit v1.2.3