104 files changed, 16860 insertions, 4722 deletions
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def
index 955da265a..ed73c15d7 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc-4.8/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -217,8 +217,8 @@
   BUILTIN_VSDQ_I_DI (BINOP, cmle)
   BUILTIN_VSDQ_I_DI (BINOP, cmlt)
   /* Implemented by aarch64_cm<cmp><mode>.  */
-  BUILTIN_VSDQ_I_DI (BINOP, cmhs)
-  BUILTIN_VSDQ_I_DI (BINOP, cmhi)
+  BUILTIN_VSDQ_I_DI (BINOP, cmgeu)
+  BUILTIN_VSDQ_I_DI (BINOP, cmgtu)
   BUILTIN_VSDQ_I_DI (BINOP, cmtst)
 
   /* Implemented by aarch64_<fmaxmin><mode>.  */
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64-simd.md b/gcc-4.8/gcc/config/aarch64/aarch64-simd.md
index 00f3c3121..481222cf5 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc-4.8/gcc/config/aarch64/aarch64-simd.md
@@ -21,7 +21,7 @@
 
 ; Main data types used by the insntructions
 
-(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,HI,QI"
+(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI"
   (const_string "unknown"))
 
 
@@ -1548,12 +1548,12 @@
 
     case LTU:
     case GEU:
-      emit_insn (gen_aarch64_cmhs<mode> (mask, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
       break;
 
     case LEU:
     case GTU:
-      emit_insn (gen_aarch64_cmhi<mode> (mask, operands[4], operands[5]));
+      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
       break;
 
     case NE:
@@ -3034,48 +3034,181 @@
 )
 
 
-;; cm(eq|ge|le|lt|gt)
+;; cm(eq|ge|gt|lt|le)
+;; Note, we have constraints for Dz and Z as different expanders
+;; have different ideas of what should be passed to this pattern.
 
-(define_insn "aarch64_cm<cmp><mode>"
+(define_insn "aarch64_cm<optab><mode>"
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
-        (unspec:<V_cmp_result>
-	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w,w")
-	   (match_operand:VSDQ_I_DI 2 "aarch64_simd_reg_or_zero" "w,Z")]
-          VCMP_S))]
+	(neg:<V_cmp_result>
+	  (COMPARISONS:<V_cmp_result>
+	    (match_operand:VDQ 1 "register_operand" "w,w")
+	    (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
+	  )))]
   "TARGET_SIMD"
   "@
-  cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
-  cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
+  cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
+  cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
   [(set_attr "simd_type" "simd_cmp")
    (set_attr "simd_mode" "<MODE>")]
 )
 
-;; cm(hs|hi|tst)
+(define_insn_and_split "aarch64_cm<optab>di"
+  [(set (match_operand:DI 0 "register_operand" "=w,w,r")
+	(neg:DI
+	  (COMPARISONS:DI
+	    (match_operand:DI 1 "register_operand" "w,w,r")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
+	  )))]
+  "TARGET_SIMD"
+  "@
+  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
+  cm<optab>\t%d0, %d1, #0
+  #"
+  "reload_completed
+   /* We need to prevent the split from
+      happening in the 'w' constraint cases.  */
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+  [(set (reg:CC CC_REGNUM)
+    (compare:CC
+      (match_dup 1)
+      (match_dup 2)))
+  (set (match_dup 0)
+    (neg:DI
+      (COMPARISONS:DI
+	(match_operand 3 "cc_register" "")
+	(const_int 0))))]
+  {
+    enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+    DONE;
+  }
+  [(set_attr "simd_type" "simd_cmp")
+   (set_attr "simd_mode" "DI")]
+)
+
+;; cm(hs|hi)
 
-(define_insn "aarch64_cm<cmp><mode>"
+(define_insn "aarch64_cm<optab><mode>"
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
-        (unspec:<V_cmp_result>
-	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
-	   (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
-          VCMP_U))]
+	(neg:<V_cmp_result>
+	  (UCOMPARISONS:<V_cmp_result>
+	    (match_operand:VDQ 1 "register_operand" "w")
+	    (match_operand:VDQ 2 "register_operand" "w")
+	  )))]
   "TARGET_SIMD"
-  "cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
   [(set_attr "simd_type" "simd_cmp")
    (set_attr "simd_mode" "<MODE>")]
 )
 
-;; fcm(eq|ge|le|lt|gt)
+(define_insn_and_split "aarch64_cm<optab>di"
+  [(set (match_operand:DI 0 "register_operand" "=w,r")
+	(neg:DI
+	  (UCOMPARISONS:DI
+	    (match_operand:DI 1 "register_operand" "w,r")
+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
+	  )))]
+  "TARGET_SIMD"
+  "@
+  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
+  #"
+  "reload_completed
+   /* We need to prevent the split from
+      happening in the 'w' constraint cases.  */
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+  [(set (reg:CC CC_REGNUM)
+    (compare:CC
+      (match_dup 1)
+      (match_dup 2)))
+  (set (match_dup 0)
+    (neg:DI
+      (UCOMPARISONS:DI
+	(match_operand 3 "cc_register" "")
+	(const_int 0))))]
+  {
+    enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+    DONE;
+  }
+  [(set_attr "simd_type" "simd_cmp")
+   (set_attr "simd_mode" "DI")]
+)
+
+;; cmtst
+
+(define_insn "aarch64_cmtst<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
+	(neg:<V_cmp_result>
+	  (ne:<V_cmp_result>
+	    (and:VDQ
+	      (match_operand:VDQ 1 "register_operand" "w")
+	      (match_operand:VDQ 2 "register_operand" "w"))
+	    (vec_duplicate:<V_cmp_result> (const_int 0)))))]
+  "TARGET_SIMD"
+  "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+  [(set_attr "simd_type" "simd_cmp")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn_and_split "aarch64_cmtstdi"
+  [(set (match_operand:DI 0 "register_operand" "=w,r")
+	(neg:DI
+	  (ne:DI
+	    (and:DI
+	      (match_operand:DI 1 "register_operand" "w,r")
+	      (match_operand:DI 2 "register_operand" "w,r"))
+	    (const_int 0))))]
+  "TARGET_SIMD"
+  "@
+  cmtst\t%d0, %d1, %d2
+  #"
+  "reload_completed
+   /* We need to prevent the split from
+      happening in the 'w' constraint cases.  */
+   && GP_REGNUM_P (REGNO (operands[0]))
+   && GP_REGNUM_P (REGNO (operands[1]))"
+   [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (and:DI (match_dup 1)
+		  (match_dup 2))
+	 (const_int 0)))
+  (set (match_dup 0)
+    (neg:DI
+      (ne:DI
+	(match_operand 3 "cc_register" "")
+	(const_int 0))))]
+  {
+    rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
+    enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
+    rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
+    rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
+    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+    DONE;
+  }
+  [(set_attr "simd_type" "simd_cmp")
+   (set_attr "simd_mode" "DI")]
+)
+
+;; fcm(eq|ge|gt|le|lt)
 
-(define_insn "aarch64_cm<cmp><mode>"
+(define_insn "aarch64_cm<optab><mode>"
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
-	(unspec:<V_cmp_result>
-	  [(match_operand:VDQF 1 "register_operand" "w,w")
-	   (match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")]
-	   VCMP_S))]
+	(neg:<V_cmp_result>
+	  (COMPARISONS:<V_cmp_result>
+	    (match_operand:VALLF 1 "register_operand" "w,w")
+	    (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+	  )))]
   "TARGET_SIMD"
   "@
-  fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
-  fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
+  fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
+  fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
   [(set_attr "simd_type" "simd_fcmp")
    (set_attr "simd_mode" "<MODE>")]
 )
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64.c b/gcc-4.8/gcc/config/aarch64/aarch64.c
index 4d5166618..0d4b26515 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64.c
+++ b/gcc-4.8/gcc/config/aarch64/aarch64.c
@@ -4568,9 +4568,11 @@ aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
 }
 
 static int
-aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
-			    reg_class_t from, reg_class_t to)
+aarch64_register_move_cost (enum machine_mode mode,
+			    reg_class_t from_i, reg_class_t to_i)
 {
+  enum reg_class from = (enum reg_class) from_i;
+  enum reg_class to = (enum reg_class) to_i;
   const struct cpu_regmove_cost *regmove_cost
     = aarch64_tune_params->regmove_cost;
 
@@ -4586,8 +4588,7 @@ aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
      secondary reload.  A general register is used as a scratch to move
      the upper DI value and the lower DI value is moved directly,
      hence the cost is the sum of three moves. */
-
-  if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
+  if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
 
   return regmove_cost->FP2FP;
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64.h b/gcc-4.8/gcc/config/aarch64/aarch64.h
index c3efd2a88..19ac5ebc4 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64.h
+++ b/gcc-4.8/gcc/config/aarch64/aarch64.h
@@ -73,9 +73,9 @@
 #define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN)
 
 /* AdvSIMD is supported in the default configuration, unless disabled by
-   -mgeneral-regs-only.  */
-#define TARGET_SIMD !TARGET_GENERAL_REGS_ONLY
-#define TARGET_FLOAT !TARGET_GENERAL_REGS_ONLY
+   -mgeneral-regs-only or the +nosimd extension.  */
+#define TARGET_SIMD (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SIMD)
+#define TARGET_FLOAT (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_FP)
 
 #define UNITS_PER_WORD		8
 
diff --git a/gcc-4.8/gcc/config/aarch64/aarch64.md b/gcc-4.8/gcc/config/aarch64/aarch64.md
index 04a5e01f9..c5cfedb8f 100644
--- a/gcc-4.8/gcc/config/aarch64/aarch64.md
+++ b/gcc-4.8/gcc/config/aarch64/aarch64.md
@@ -2211,7 +2211,7 @@
    (set_attr "mode" "SI")]
 )
 
-(define_insn "*cstore<mode>_neg"
+(define_insn "cstore<mode>_neg"
   [(set (match_operand:ALLI 0 "register_operand" "=r")
 	(neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator"
 		  [(match_operand 2 "cc_register" "") (const_int 0)])))]
diff --git a/gcc-4.8/gcc/config/aarch64/arm_neon.h b/gcc-4.8/gcc/config/aarch64/arm_neon.h
index b083eb4e6..73c7e7d63 100644
--- a/gcc-4.8/gcc/config/aarch64/arm_neon.h
+++ b/gcc-4.8/gcc/config/aarch64/arm_neon.h
@@ -19551,28 +19551,28 @@ vcge_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a,
+  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
 						 (int8x8_t) __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a,
+  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
 						  (int16x4_t) __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a,
+  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
 						  (int32x2_t) __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
+  return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a,
 						(int64x1_t) __b);
 }
 
@@ -19603,28 +19603,28 @@ vcgeq_s64 (int64x2_t __a, int64x2_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a,
+  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
 						   (int8x16_t) __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a,
+  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
 						  (int16x8_t) __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a,
+  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
 						  (int32x4_t) __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a,
+  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
 						  (int64x2_t) __b);
 }
 
@@ -19637,7 +19637,7 @@ vcged_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
+  return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a,
 						(int64x1_t) __b);
 }
 
@@ -19676,28 +19676,28 @@ vcgt_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a,
+  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
 						 (int8x8_t) __b);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a,
+  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
 						  (int16x4_t) __b);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a,
+  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
 						  (int32x2_t) __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
+  return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a,
 						(int64x1_t) __b);
 }
 
@@ -19728,28 +19728,28 @@ vcgtq_s64 (int64x2_t __a, int64x2_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a,
+  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
 						   (int8x16_t) __b);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a,
+  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
 						  (int16x8_t) __b);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a,
+  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
 						  (int32x4_t) __b);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a,
+  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
 						  (int64x2_t) __b);
 }
 
@@ -19762,7 +19762,7 @@ vcgtd_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
+  return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a,
 						(int64x1_t) __b);
 }
 
@@ -19801,28 +19801,28 @@ vcle_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b,
+  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
 						 (int8x8_t) __a);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b,
+  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
 						  (int16x4_t) __a);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b,
+  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
 						  (int32x2_t) __a);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b,
+  return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __b,
 						(int64x1_t) __a);
 }
 
@@ -19853,28 +19853,28 @@ vcleq_s64 (int64x2_t __a, int64x2_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b,
+  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
 						   (int8x16_t) __a);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b,
+  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
 						  (int16x8_t) __a);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b,
+  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
 						  (int32x4_t) __a);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b,
+  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
 						  (int64x2_t) __a);
 }
 
@@ -19919,28 +19919,28 @@ vclt_s64 (int64x1_t __a, int64x1_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b,
+  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
 						 (int8x8_t) __a);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b,
+  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
 						  (int16x4_t) __a);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b,
+  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
 						  (int32x2_t) __a);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b,
+  return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __b,
 						(int64x1_t) __a);
 }
 
@@ -19971,28 +19971,28 @@ vcltq_s64 (int64x2_t __a, int64x2_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b,
+  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
 						   (int8x16_t) __a);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b,
+  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
 						  (int16x8_t) __a);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b,
+  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
 						  (int32x4_t) __a);
 }
 
 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b,
+  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
 						  (int64x2_t) __a);
 }
 
diff --git a/gcc-4.8/gcc/config/aarch64/iterators.md b/gcc-4.8/gcc/config/aarch64/iterators.md
index ce81ac5ce..d19b26a64 100644
--- a/gcc-4.8/gcc/config/aarch64/iterators.md
+++ b/gcc-4.8/gcc/config/aarch64/iterators.md
@@ -83,6 +83,9 @@
 ;; Vector Float modes.
 (define_mode_iterator VDQF [V2SF V4SF V2DF])
 
+;; All Float modes.
+(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
+
 ;; Vector Float modes with 2 elements.
 (define_mode_iterator V2F [V2SF V2DF])
 
@@ -213,13 +216,6 @@
     UNSPEC_URSHL	; Used in aarch64-simd.md.
     UNSPEC_SQRSHL	; Used in aarch64-simd.md.
     UNSPEC_UQRSHL	; Used in aarch64-simd.md.
-    UNSPEC_CMEQ		; Used in aarch64-simd.md.
-    UNSPEC_CMLE		; Used in aarch64-simd.md.
-    UNSPEC_CMLT		; Used in aarch64-simd.md.
-    UNSPEC_CMGE		; Used in aarch64-simd.md.
-    UNSPEC_CMGT		; Used in aarch64-simd.md.
-    UNSPEC_CMHS		; Used in aarch64-simd.md.
-    UNSPEC_CMHI		; Used in aarch64-simd.md.
     UNSPEC_SSLI		; Used in aarch64-simd.md.
     UNSPEC_USLI		; Used in aarch64-simd.md.
     UNSPEC_SSRI		; Used in aarch64-simd.md.
@@ -227,7 +223,6 @@
     UNSPEC_SSHLL	; Used in aarch64-simd.md.
     UNSPEC_USHLL	; Used in aarch64-simd.md.
     UNSPEC_ADDP		; Used in aarch64-simd.md.
-    UNSPEC_CMTST	; Used in aarch64-simd.md.
     UNSPEC_FMAX		; Used in aarch64-simd.md.
     UNSPEC_FMIN		; Used in aarch64-simd.md.
     UNSPEC_BSL		; Used in aarch64-simd.md.
@@ -251,6 +246,7 @@
 
 ;; For scalar usage of vector/FP registers
 (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
+		    (SF "s") (DF "d")
 		    (V8QI "") (V16QI "")
 		    (V4HI "") (V8HI "")
 		    (V2SI "") (V4SI  "")
@@ -305,7 +301,8 @@
 			 (V4SF ".4s") (V2DF ".2d")
 			 (DI   "")    (SI   "")
 			 (HI   "")    (QI   "")
-			 (TI   "")])
+			 (TI   "")    (SF   "")
+			 (DF   "")])
 
 ;; Register suffix narrowed modes for VQN.
 (define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
@@ -444,7 +441,8 @@
 				(V2SI "V2SI") (V4SI  "V4SI")
 				(DI   "DI")   (V2DI  "V2DI")
 				(V2SF "V2SI") (V4SF  "V4SI")
-				(V2DF "V2DI")])
+				(V2DF "V2DI") (DF    "DI")
+				(SF   "SI")])
 
 ;; Lower case mode of results of comparison operations.
 (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
@@ -452,7 +450,8 @@
 				(V2SI "v2si") (V4SI  "v4si")
 				(DI   "di")   (V2DI  "v2di")
 				(V2SF "v2si") (V4SF  "v4si")
-				(V2DF "v2di")])
+				(V2DF "v2di") (DF    "di")
+				(SF   "si")])
 
 ;; Vm for lane instructions is restricted to FP_LO_REGS.
 (define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
@@ -543,6 +542,12 @@
 ;; Code iterator for signed variants of vector saturating binary ops.
 (define_code_iterator SBINQOPS [ss_plus ss_minus])
 
+;; Comparison operators for <F>CM.
+(define_code_iterator COMPARISONS [lt le eq ge gt])
+
+;; Unsigned comparison operators.
+(define_code_iterator UCOMPARISONS [ltu leu geu gtu])
+
 ;; -------------------------------------------------------------------
 ;; Code Attributes
 ;; -------------------------------------------------------------------
@@ -571,7 +576,28 @@
 			 (eq "eq")
 			 (ne "ne")
 			 (lt "lt")
-			 (ge "ge")])
+			 (ge "ge")
+			 (le "le")
+			 (gt "gt")
+			 (ltu "ltu")
+			 (leu "leu")
+			 (geu "geu")
+			 (gtu "gtu")])
+
+;; For comparison operators we use the FCM* and CM* instructions.
+;; As there are no CMLE or CMLT instructions which act on 3 vector
+;; operands, we must use CMGE or CMGT and swap the order of the
+;; source operands.
+
+(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt")
+			   (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")])
+(define_code_attr cmp_1   [(lt "2") (le "2") (eq "1") (ge "1") (gt "1")
+			   (ltu "2") (leu "2") (geu "1") (gtu "1")])
+(define_code_attr cmp_2   [(lt "1") (le "1") (eq "2") (ge "2") (gt "2")
+			   (ltu "1") (leu "1") (geu "2") (gtu "2")])
+
+(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT")
+			   (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")])
 
 ;; Optab prefix for sign/zero-extending operations
 (define_code_attr su_optab [(sign_extend "") (zero_extend "u")
@@ -680,11 +706,6 @@
                                UNSPEC_SQSHRN UNSPEC_UQSHRN
                                UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
 
-(define_int_iterator VCMP_S [UNSPEC_CMEQ UNSPEC_CMGE UNSPEC_CMGT
-			     UNSPEC_CMLE UNSPEC_CMLT])
-
-(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
-
 (define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
 			      UNSPEC_TRN1 UNSPEC_TRN2
 			      UNSPEC_UZP1 UNSPEC_UZP2])
@@ -768,12 +789,6 @@
 			 (UNSPEC_RADDHN2 "add")
 			 (UNSPEC_RSUBHN2 "sub")])
 
-(define_int_attr cmp [(UNSPEC_CMGE "ge") (UNSPEC_CMGT "gt")
-		      (UNSPEC_CMLE "le") (UNSPEC_CMLT "lt")
-                      (UNSPEC_CMEQ "eq")
-		      (UNSPEC_CMHS "hs") (UNSPEC_CMHI "hi")
-		      (UNSPEC_CMTST "tst")])
-
 (define_int_attr offsetlr [(UNSPEC_SSLI	"1") (UNSPEC_USLI "1")
 			   (UNSPEC_SSRI	"0") (UNSPEC_USRI "0")])
 
diff --git a/gcc-4.8/gcc/config/aarch64/predicates.md b/gcc-4.8/gcc/config/aarch64/predicates.md
index 8f80b2028..8514e8f8f 100644
--- a/gcc-4.8/gcc/config/aarch64/predicates.md
+++ b/gcc-4.8/gcc/config/aarch64/predicates.md
@@ -31,6 +31,11 @@
        (ior (match_operand 0 "register_operand")
 	    (match_test "op == const0_rtx"))))
 
+(define_predicate "aarch64_reg_or_fp_zero"
+  (and (match_code "reg,subreg,const_double")
+       (ior (match_operand 0 "register_operand")
+	    (match_test "aarch64_float_const_zero_rtx_p (op)"))))
+
 (define_predicate "aarch64_reg_zero_or_m1_or_1"
   (and (match_code "reg,subreg,const_int")
        (ior (match_operand 0 "register_operand")
diff --git a/gcc-4.8/gcc/config/arm/arm-ldmstm.ml b/gcc-4.8/gcc/config/arm/arm-ldmstm.ml
index 2bc9702be..0f637d3fe 100644
--- a/gcc-4.8/gcc/config/arm/arm-ldmstm.ml
+++ b/gcc-4.8/gcc/config/arm/arm-ldmstm.ml
@@ -67,10 +67,13 @@ let destreg nregs first op_type thumb =
     Printf.sprintf ("(match_operand:SI %d \"s_register_operand\" \"%s%s\")")
       (nregs + 1) (inout_constr op_type) (constr thumb)
 
+let reg_predicate thumb =
+  if thumb then "low_register_operand" else "arm_hard_general_register_operand"
+
 let write_ldm_set thumb nregs offset opnr first =
   let indent = "     " in
   Printf.printf "%s" (if first then "    [" else indent);
-  Printf.printf "(set (match_operand:SI %d \"arm_hard_register_operand\" \"\")\n" opnr;
+  Printf.printf "(set (match_operand:SI %d \"%s\" \"\")\n" opnr (reg_predicate thumb);
   Printf.printf "%s     (mem:SI " indent;
   begin if offset != 0 then Printf.printf "(plus:SI " end;
   Printf.printf "%s" (destreg nregs first IN thumb);
@@ -84,7 +87,7 @@ let write_stm_set thumb nregs offset opnr first =
   begin if offset != 0 then Printf.printf "(plus:SI " end;
   Printf.printf "%s" (destreg nregs first IN thumb);
   begin if offset != 0 then Printf.printf " (const_int %d))" offset end;
-  Printf.printf ")\n%s     (match_operand:SI %d \"arm_hard_register_operand\" \"\"))" indent opnr 
+  Printf.printf ")\n%s     (match_operand:SI %d \"%s\" \"\"))" indent opnr (reg_predicate thumb)
 
 let write_ldm_peep_set extra_indent nregs opnr first =
   let indent = "   " ^ extra_indent in
diff --git a/gcc-4.8/gcc/config/arm/arm.c b/gcc-4.8/gcc/config/arm/arm.c
index e8d83e0f3..14c79ec99 100644
--- a/gcc-4.8/gcc/config/arm/arm.c
+++ b/gcc-4.8/gcc/config/arm/arm.c
@@ -230,7 +230,6 @@ static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 static void arm_option_override (void);
 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
 static bool arm_cannot_copy_insn_p (rtx);
-static bool arm_tls_symbol_p (rtx x);
 static int arm_issue_rate (void);
 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 static bool arm_output_addr_const_extra (FILE *, rtx);
@@ -5446,7 +5445,8 @@ require_pic_register (void)
   if (!crtl->uses_pic_offset_table)
     {
       gcc_assert (can_create_pseudo_p ());
-      if (arm_pic_register != INVALID_REGNUM)
+      if (arm_pic_register != INVALID_REGNUM
+	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
 	{
 	  if (!cfun->machine->pic_reg)
 	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
@@ -5472,6 +5472,11 @@ require_pic_register (void)
 	      crtl->uses_pic_offset_table = 1;
 	      start_sequence ();
 
+	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
+		  && arm_pic_register > LAST_LO_REGNUM)
+		emit_move_insn (cfun->machine->pic_reg,
+				gen_rtx_REG (Pmode, arm_pic_register));
+	      else
 	      arm_load_pic_register (0UL);
 
 	      seq = get_insns ();
@@ -5730,6 +5735,14 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
 	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
 	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
 	    }
+	  else if (arm_pic_register != INVALID_REGNUM
+		   && arm_pic_register > LAST_LO_REGNUM
+		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
+	    {
+	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
+	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
+	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
+	    }
 	  else
 	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
 	}
@@ -6595,6 +6608,32 @@ legitimize_tls_address (rtx x, rtx reg)
 rtx
 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
 {
+  if (arm_tls_referenced_p (x))
+    {
+      rtx addend = NULL;
+
+      if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+	{
+	  addend = XEXP (XEXP (x, 0), 1);
+	  x = XEXP (XEXP (x, 0), 0);
+	}
+
+      if (GET_CODE (x) != SYMBOL_REF)
+	return x;
+
+      gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
+
+      x = legitimize_tls_address (x, NULL_RTX);
+
+      if (addend)
+	{
+	  x = gen_rtx_PLUS (SImode, x, addend);
+	  orig_x = x;
+	}
+      else
+	return x;
+    }
+
   if (!TARGET_ARM)
     {
       /* TODO: legitimize_address for Thumb2.  */
@@ -6603,9 +6642,6 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
       return thumb_legitimize_address (x, orig_x, mode);
     }
 
-  if (arm_tls_symbol_p (x))
-    return legitimize_tls_address (x, NULL_RTX);
-
   if (GET_CODE (x) == PLUS)
     {
       rtx xop0 = XEXP (x, 0);
@@ -6717,9 +6753,6 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
 rtx
 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
 {
-  if (arm_tls_symbol_p (x))
-    return legitimize_tls_address (x, NULL_RTX);
-
   if (GET_CODE (x) == PLUS
       && CONST_INT_P (XEXP (x, 1))
       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
@@ -7010,20 +7043,6 @@ thumb_legitimize_reload_address (rtx *x_p,
 
 /* Test for various thread-local symbols.  */
 
-/* Return TRUE if X is a thread-local symbol.  */
-
-static bool
-arm_tls_symbol_p (rtx x)
-{
-  if (! TARGET_HAVE_TLS)
-    return false;
-
-  if (GET_CODE (x) != SYMBOL_REF)
-    return false;
-
-  return SYMBOL_REF_TLS_MODEL (x) != 0;
-}
-
 /* Helper for arm_tls_referenced_p.  */
 
 static int
@@ -21305,7 +21324,11 @@ arm_expand_neon_args (rtx target, int icode, int have_retval,
 						    type_mode);
             }
 
-          op[argc] = expand_normal (arg[argc]);
+	  /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
+	     be returned.  */
+	  op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
+				  (thisarg == NEON_ARG_MEMORY
+				   ? EXPAND_MEMORY : EXPAND_NORMAL));
 
           switch (thisarg)
             {
@@ -21324,6 +21347,9 @@ arm_expand_neon_args (rtx target, int icode, int have_retval,
               break;
 
             case NEON_ARG_MEMORY:
+	      /* Check if expand failed.  */
+	      if (op[argc] == const0_rtx)
+		return 0;
 	      gcc_assert (MEM_P (op[argc]));
 	      PUT_MODE (op[argc], mode[argc]);
 	      /* ??? arm_neon.h uses the same built-in functions for signed
@@ -23611,7 +23637,7 @@ arm_expand_epilogue_apcs_frame (bool really_return)
   if (crtl->calls_eh_return)
     emit_insn (gen_addsi3 (stack_pointer_rtx,
                stack_pointer_rtx,
-               GEN_INT (ARM_EH_STACKADJ_REGNUM)));
+			   gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
 
   if (IS_STACKALIGN (func_type))
     /* Restore the original stack pointer.  Before prologue, the stack was
diff --git a/gcc-4.8/gcc/config/arm/arm.h b/gcc-4.8/gcc/config/arm/arm.h
index 478a3d7c8..b7cf352ba 100644
--- a/gcc-4.8/gcc/config/arm/arm.h
+++ b/gcc-4.8/gcc/config/arm/arm.h
@@ -1213,9 +1213,13 @@ enum reg_class
    VFPv2.
    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
    VFP registers in little-endian order.  We can't describe that accurately to
-   GCC, so avoid taking subregs of such values.  */
+   GCC, so avoid taking subregs of such values.
+   The only exception is going from a 128-bit to a 64-bit type.  In that case
+   the data layout happens to be consistent for big-endian, so we explicitly allow
+   that case.  */
 #define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)	\
   (TARGET_VFP && TARGET_BIG_END				\
+   && !(GET_MODE_SIZE (FROM) == 16 && GET_MODE_SIZE (TO) == 8)	\
    && (GET_MODE_SIZE (FROM) > UNITS_PER_WORD		\
        || GET_MODE_SIZE (TO) > UNITS_PER_WORD)		\
    && reg_classes_intersect_p (VFP_REGS, (CLASS)))
@@ -2140,14 +2144,9 @@ extern int making_const_table;
 #undef ASM_OUTPUT_BEFORE_CASE_LABEL
 #define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) /* Empty.  */
 
-/* Make sure subsequent insns are aligned after a TBB.  */
-#define ASM_OUTPUT_CASE_END(FILE, NUM, JUMPTABLE)	\
-  do							\
-    {							\
-      if (GET_MODE (PATTERN (JUMPTABLE)) == QImode)	\
-	ASM_OUTPUT_ALIGN (FILE, 1);			\
-    }							\
-  while (0)
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL)                \
+   (GET_CODE (PATTERN (prev_active_insn (LABEL))) == ADDR_DIFF_VEC \
+   ? 1 : 0)
 
 #define ARM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) 	\
   do							\
diff --git a/gcc-4.8/gcc/config/arm/arm.md b/gcc-4.8/gcc/config/arm/arm.md
index aea02fbf4..cdb761a43 100644
--- a/gcc-4.8/gcc/config/arm/arm.md
+++ b/gcc-4.8/gcc/config/arm/arm.md
@@ -11036,7 +11036,7 @@
     [(set (match_operand:SI 1 "s_register_operand" "+rk")
           (plus:SI (match_dup 1)
                    (match_operand:SI 2 "const_int_operand" "I")))
-     (set (match_operand:DF 3 "arm_hard_register_operand" "")
+     (set (match_operand:DF 3 "vfp_hard_register_operand" "")
           (mem:DF (match_dup 1)))])]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "*
@@ -11270,7 +11270,9 @@
 	     (match_operand:SI 1 "" "")
 	     (match_operand:SI 2 "" ""))]
   "TARGET_32BIT && arm_arch5e"
-  "pld\\t%a0")
+  "pld\\t%a0"
+  [(set_attr "type" "load1")]
+)
 
 ;; General predication pattern
 
diff --git a/gcc-4.8/gcc/config/arm/iterators.md b/gcc-4.8/gcc/config/arm/iterators.md
index b3ad42b37..df82c061f 100644
--- a/gcc-4.8/gcc/config/arm/iterators.md
+++ b/gcc-4.8/gcc/config/arm/iterators.md
@@ -493,6 +493,10 @@
                                   (UNSPEC_VRINTA "no") (UNSPEC_VRINTM "no")
                                   (UNSPEC_VRINTR "yes") (UNSPEC_VRINTX "yes")])
 
+(define_int_attr vrint_conds [(UNSPEC_VRINTZ "nocond") (UNSPEC_VRINTP "unconditional")
+                              (UNSPEC_VRINTA "unconditional") (UNSPEC_VRINTM "unconditional")
+                              (UNSPEC_VRINTR "nocond") (UNSPEC_VRINTX "nocond")])
+
 (define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p")
                                 (UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m")
                                 (UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")])
diff --git a/gcc-4.8/gcc/config/arm/ldmstm.md b/gcc-4.8/gcc/config/arm/ldmstm.md
index 8ebdfc817..ed206b00c 100644
--- a/gcc-4.8/gcc/config/arm/ldmstm.md
+++ b/gcc-4.8/gcc/config/arm/ldmstm.md
@@ -23,15 +23,15 @@
 
 (define_insn "*ldm4_ia"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (match_operand:SI 5 "s_register_operand" "rk")))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 8))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
@@ -41,15 +41,15 @@
 
 (define_insn "*thumb_ldm4_ia"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "low_register_operand" "")
           (mem:SI (match_operand:SI 5 "s_register_operand" "l")))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 8))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
@@ -60,15 +60,15 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 5 "s_register_operand" "+&rk")
           (plus:SI (match_dup 5) (const_int 16)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (match_dup 5)))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 8))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
@@ -80,15 +80,15 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 5 "s_register_operand" "+&l")
           (plus:SI (match_dup 5) (const_int 16)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "low_register_operand" "")
           (mem:SI (match_dup 5)))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 8))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
@@ -98,13 +98,13 @@
 (define_insn "*stm4_ia"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (match_operand:SI 5 "s_register_operand" "rk"))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
-          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(ia%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
@@ -115,13 +115,13 @@
     [(set (match_operand:SI 5 "s_register_operand" "+&rk")
           (plus:SI (match_dup 5) (const_int 16)))
      (set (mem:SI (match_dup 5))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
-          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
@@ -132,29 +132,29 @@
     [(set (match_operand:SI 5 "s_register_operand" "+&l")
           (plus:SI (match_dup 5) (const_int 16)))
      (set (mem:SI (match_dup 5))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "low_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "low_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))
+          (match_operand:SI 3 "low_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
-          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 4 "low_register_operand" ""))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
   "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")])
 
 (define_insn "*ldm4_ib"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
                   (const_int 4))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 8))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 16))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
@@ -166,16 +166,16 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 5 "s_register_operand" "+&rk")
           (plus:SI (match_dup 5) (const_int 16)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 4))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 8))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 12))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int 16))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
@@ -186,13 +186,13 @@
 (define_insn "*stm4_ib"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int 4)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
-          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "stm%(ib%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
@@ -203,13 +203,13 @@
     [(set (match_operand:SI 5 "s_register_operand" "+&rk")
           (plus:SI (match_dup 5) (const_int 16)))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
-          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
   "stm%(ib%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
@@ -217,16 +217,16 @@
 
 (define_insn "*ldm4_da"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
                   (const_int -12))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -8))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -4))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
           (mem:SI (match_dup 5)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "ldm%(da%)\t%5, {%1, %2, %3, %4}"
@@ -237,16 +237,16 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 5 "s_register_operand" "+&rk")
           (plus:SI (match_dup 5) (const_int -16)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -12))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -8))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -4))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
           (mem:SI (match_dup 5)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
   "ldm%(da%)\t%5!, {%1, %2, %3, %4}"
@@ -256,13 +256,13 @@
 (define_insn "*stm4_da"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -12)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
      (set (mem:SI (match_dup 5))
-          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "stm%(da%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
@@ -273,13 +273,13 @@
     [(set (match_operand:SI 5 "s_register_operand" "+&rk")
           (plus:SI (match_dup 5) (const_int -16)))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
      (set (mem:SI (match_dup 5))
-          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
   "stm%(da%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
@@ -287,16 +287,16 @@
 
 (define_insn "*ldm4_db"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
                   (const_int -16))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -12))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -8))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
@@ -308,16 +308,16 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 5 "s_register_operand" "+&rk")
           (plus:SI (match_dup 5) (const_int -16)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -16))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -12))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -8))))
-     (set (match_operand:SI 4 "arm_hard_register_operand" "")
+     (set (match_operand:SI 4 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 5)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
@@ -328,13 +328,13 @@
 (define_insn "*stm4_db"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -16)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
-          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(db%)\t%5, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
@@ -345,13 +345,13 @@
     [(set (match_operand:SI 5 "s_register_operand" "+&rk")
           (plus:SI (match_dup 5) (const_int -16)))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -16)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
-          (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 4 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
   "stm%(db%)\t%5!, {%1, %2, %3, %4}"
   [(set_attr "type" "store4")
@@ -466,12 +466,12 @@
 
 (define_insn "*ldm3_ia"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (match_operand:SI 4 "s_register_operand" "rk")))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
@@ -481,12 +481,12 @@
 
 (define_insn "*thumb_ldm3_ia"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "low_register_operand" "")
           (mem:SI (match_operand:SI 4 "s_register_operand" "l")))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
@@ -497,12 +497,12 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 4 "s_register_operand" "+&rk")
           (plus:SI (match_dup 4) (const_int 12)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (match_dup 4)))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
@@ -514,12 +514,12 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 4 "s_register_operand" "+&l")
           (plus:SI (match_dup 4) (const_int 12)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "low_register_operand" "")
           (mem:SI (match_dup 4)))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
@@ -529,11 +529,11 @@
 (define_insn "*stm3_ia"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (match_operand:SI 4 "s_register_operand" "rk"))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(ia%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
@@ -544,11 +544,11 @@
     [(set (match_operand:SI 4 "s_register_operand" "+&rk")
           (plus:SI (match_dup 4) (const_int 12)))
      (set (mem:SI (match_dup 4))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(ia%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
@@ -559,24 +559,24 @@
     [(set (match_operand:SI 4 "s_register_operand" "+&l")
           (plus:SI (match_dup 4) (const_int 12)))
      (set (mem:SI (match_dup 4))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "low_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "low_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 3 "low_register_operand" ""))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
   "stm%(ia%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")])
 
 (define_insn "*ldm3_ib"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
                   (const_int 4))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 12))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
@@ -588,13 +588,13 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 4 "s_register_operand" "+&rk")
           (plus:SI (match_dup 4) (const_int 12)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 4))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 8))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int 12))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
@@ -605,11 +605,11 @@
 (define_insn "*stm3_ib"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int 4)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "stm%(ib%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
@@ -620,11 +620,11 @@
     [(set (match_operand:SI 4 "s_register_operand" "+&rk")
           (plus:SI (match_dup 4) (const_int 12)))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "stm%(ib%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
@@ -632,13 +632,13 @@
 
 (define_insn "*ldm3_da"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
                   (const_int -8))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (match_dup 4)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "ldm%(da%)\t%4, {%1, %2, %3}"
@@ -649,13 +649,13 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 4 "s_register_operand" "+&rk")
           (plus:SI (match_dup 4) (const_int -12)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -8))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -4))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (match_dup 4)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "ldm%(da%)\t%4!, {%1, %2, %3}"
@@ -665,11 +665,11 @@
 (define_insn "*stm3_da"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -8)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (match_dup 4))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "stm%(da%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
@@ -680,11 +680,11 @@
     [(set (match_operand:SI 4 "s_register_operand" "+&rk")
           (plus:SI (match_dup 4) (const_int -12)))
      (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (match_dup 4))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
   "stm%(da%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
@@ -692,13 +692,13 @@
 
 (define_insn "*ldm3_db"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
                   (const_int -12))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -8))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
@@ -710,13 +710,13 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 4 "s_register_operand" "+&rk")
           (plus:SI (match_dup 4) (const_int -12)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -12))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -8))))
-     (set (match_operand:SI 3 "arm_hard_register_operand" "")
+     (set (match_operand:SI 3 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 4)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
@@ -727,11 +727,11 @@
 (define_insn "*stm3_db"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -12)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(db%)\t%4, {%1, %2, %3}"
   [(set_attr "type" "store3")
@@ -742,11 +742,11 @@
     [(set (match_operand:SI 4 "s_register_operand" "+&rk")
           (plus:SI (match_dup 4) (const_int -12)))
      (set (mem:SI (plus:SI (match_dup 4) (const_int -12)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
-          (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 3 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
   "stm%(db%)\t%4!, {%1, %2, %3}"
   [(set_attr "type" "store3")
@@ -847,9 +847,9 @@
 
 (define_insn "*ldm2_ia"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (match_operand:SI 3 "s_register_operand" "rk")))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
@@ -859,9 +859,9 @@
 
 (define_insn "*thumb_ldm2_ia"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "low_register_operand" "")
           (mem:SI (match_operand:SI 3 "s_register_operand" "l")))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 2"
@@ -872,9 +872,9 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 3 "s_register_operand" "+&rk")
           (plus:SI (match_dup 3) (const_int 8)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (match_dup 3)))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
@@ -886,9 +886,9 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 3 "s_register_operand" "+&l")
           (plus:SI (match_dup 3) (const_int 8)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "low_register_operand" "")
           (mem:SI (match_dup 3)))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "low_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
@@ -898,9 +898,9 @@
 (define_insn "*stm2_ia"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (match_operand:SI 3 "s_register_operand" "rk"))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "stm%(ia%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
@@ -911,9 +911,9 @@
     [(set (match_operand:SI 3 "s_register_operand" "+&rk")
           (plus:SI (match_dup 3) (const_int 8)))
      (set (mem:SI (match_dup 3))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(ia%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
@@ -924,19 +924,19 @@
     [(set (match_operand:SI 3 "s_register_operand" "+&l")
           (plus:SI (match_dup 3) (const_int 8)))
      (set (mem:SI (match_dup 3))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "low_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 2 "low_register_operand" ""))])]
   "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
   "stm%(ia%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")])
 
 (define_insn "*ldm2_ib"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
                   (const_int 4))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 8))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
@@ -948,10 +948,10 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 3 "s_register_operand" "+&rk")
           (plus:SI (match_dup 3) (const_int 8)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 4))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int 8))))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
@@ -962,9 +962,9 @@
 (define_insn "*stm2_ib"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int 4)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "stm%(ib%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
@@ -975,9 +975,9 @@
     [(set (match_operand:SI 3 "s_register_operand" "+&rk")
           (plus:SI (match_dup 3) (const_int 8)))
      (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "stm%(ib%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
@@ -985,10 +985,10 @@
 
 (define_insn "*ldm2_da"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
                   (const_int -4))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (match_dup 3)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "ldm%(da%)\t%3, {%1, %2}"
@@ -999,10 +999,10 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 3 "s_register_operand" "+&rk")
           (plus:SI (match_dup 3) (const_int -8)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int -4))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (match_dup 3)))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "ldm%(da%)\t%3!, {%1, %2}"
@@ -1012,9 +1012,9 @@
 (define_insn "*stm2_da"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -4)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (match_dup 3))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
   "stm%(da%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
@@ -1025,9 +1025,9 @@
     [(set (match_operand:SI 3 "s_register_operand" "+&rk")
           (plus:SI (match_dup 3) (const_int -8)))
      (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (match_dup 3))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
   "stm%(da%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
@@ -1035,10 +1035,10 @@
 
 (define_insn "*ldm2_db"
   [(match_parallel 0 "load_multiple_operation"
-    [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+    [(set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
                   (const_int -8))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
@@ -1050,10 +1050,10 @@
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 3 "s_register_operand" "+&rk")
           (plus:SI (match_dup 3) (const_int -8)))
-     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+     (set (match_operand:SI 1 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int -8))))
-     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+     (set (match_operand:SI 2 "arm_hard_general_register_operand" "")
           (mem:SI (plus:SI (match_dup 3)
                   (const_int -4))))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
@@ -1064,9 +1064,9 @@
 (define_insn "*stm2_db"
   [(match_parallel 0 "store_multiple_operation"
     [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -8)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
   "stm%(db%)\t%3, {%1, %2}"
   [(set_attr "type" "store2")
@@ -1077,9 +1077,9 @@
     [(set (match_operand:SI 3 "s_register_operand" "+&rk")
           (plus:SI (match_dup 3) (const_int -8)))
      (set (mem:SI (plus:SI (match_dup 3) (const_int -8)))
-          (match_operand:SI 1 "arm_hard_register_operand" ""))
+          (match_operand:SI 1 "arm_hard_general_register_operand" ""))
      (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
-          (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+          (match_operand:SI 2 "arm_hard_general_register_operand" ""))])]
   "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
   "stm%(db%)\t%3!, {%1, %2}"
   [(set_attr "type" "store2")
diff --git a/gcc-4.8/gcc/config/arm/predicates.md b/gcc-4.8/gcc/config/arm/predicates.md
index f493d8284..238f9fd7e 100644
--- a/gcc-4.8/gcc/config/arm/predicates.md
+++ b/gcc-4.8/gcc/config/arm/predicates.md
@@ -31,11 +31,11 @@
 	      || REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
 })
 
-;; Any hard register.
-(define_predicate "arm_hard_register_operand"
+;; Any general register.
+(define_predicate "arm_hard_general_register_operand"
   (match_code "reg")
 {
-  return REGNO (op) < FIRST_PSEUDO_REGISTER;
+  return REGNO (op) <= LAST_ARM_REGNUM;
 })
 
 ;; A low register.
@@ -76,6 +76,12 @@
 		  && REGNO_REG_CLASS (REGNO (op)) == VFP_REGS)));
 })
 
+(define_predicate "vfp_hard_register_operand"
+  (match_code "reg")
+{
+  return (IS_VFP_REGNUM (REGNO (op)));
+})
+
 (define_predicate "zero_operand"
   (and (match_code "const_int,const_double,const_vector")
        (match_test "op == CONST0_RTX (mode)")))
diff --git a/gcc-4.8/gcc/config/arm/vfp.md b/gcc-4.8/gcc/config/arm/vfp.md
index 10e271ae5..13e151f5a 100644
--- a/gcc-4.8/gcc/config/arm/vfp.md
+++ b/gcc-4.8/gcc/config/arm/vfp.md
@@ -1264,6 +1264,7 @@
   "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
   "vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1"
   [(set_attr "predicable" "<vrint_predicable>")
+   (set_attr "conds" "<vrint_conds>")
    (set_attr "type" "f_rint<vfp_type>")]
 )
 
@@ -1280,7 +1281,8 @@
 		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
   "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
   "vmaxnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "type" "f_minmax<vfp_type>")]
+  [(set_attr "type" "f_minmax<vfp_type>")
+   (set_attr "conds" "unconditional")]
 )
 
 (define_insn "smin<mode>3"
@@ -1289,7 +1291,8 @@
 		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
   "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
   "vminnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "type" "f_minmax<vfp_type>")]
+  [(set_attr "type" "f_minmax<vfp_type>")
+   (set_attr "conds" "unconditional")]
 )
 
 ;; Unimplemented insns:
diff --git a/gcc-4.8/gcc/config/avr/avr.c b/gcc-4.8/gcc/config/avr/avr.c
index 74872aa6c..86c46e9c4 100644
--- a/gcc-4.8/gcc/config/avr/avr.c
+++ b/gcc-4.8/gcc/config/avr/avr.c
@@ -587,10 +587,14 @@ avr_set_current_function (tree decl)
       const char *name;
 
       name = DECL_ASSEMBLER_NAME_SET_P (decl)
-        /* Remove the leading '*' added in set_user_assembler_name.  */
-        ? 1 + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))
+        ? IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))
         : IDENTIFIER_POINTER (DECL_NAME (decl));
 
+      /* Skip a leading '*' that might still prefix the assembler name,
+         e.g. in non-LTO runs.  */
+
+      name = default_strip_name_encoding (name);
+
       /* Silently ignore 'signal' if 'interrupt' is present.  AVR-LibC startet
          using this when it switched from SIGNAL and INTERRUPT to ISR.  */
 
@@ -2339,6 +2343,12 @@ avr_notice_update_cc (rtx body ATTRIBUTE_UNUSED, rtx insn)
         }
       break;
 
+    case CC_SET_VZN:
+      /* Insn like INC, DEC, NEG that set Z,N,V.  We currently don't make use
+         of this combination, cf. also PR61055.  */
+      CC_STATUS_INIT;
+      break;
+
     case CC_SET_CZN:
       /* Insn sets the Z,N,C flags of CC to recog_operand[0].
          The V flag may or may not be known but that's ok because
@@ -3977,7 +3987,7 @@ avr_out_store_psi (rtx insn, rtx *op, int *plen)
                                 "std Y+61,%A1"    CR_TAB
                                 "std Y+62,%B1"    CR_TAB
                                 "std Y+63,%C1"    CR_TAB
-                                "sbiw r28,%o0-60", op, plen, -5);
+                                "sbiw r28,%o0-61", op, plen, -5);
 
           return avr_asm_len ("subi r28,lo8(-%o0)" CR_TAB
                               "sbci r29,hi8(-%o0)" CR_TAB
@@ -6274,7 +6284,7 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
 
   if (REG_P (xop[2]))
     {
-      *pcc = MINUS == code ? (int) CC_SET_CZN : (int) CC_SET_N;
+      *pcc = MINUS == code ? (int) CC_SET_CZN : (int) CC_CLOBBER;
 
       for (i = 0; i < n_bytes; i++)
         {
@@ -6383,7 +6393,7 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
                                op, plen, 1);
 
                   if (n_bytes == 2 && PLUS == code)
-                    *pcc = CC_SET_ZN;
+                    *pcc = CC_SET_CZN;
                 }
 
               i++;
@@ -6406,6 +6416,7 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
         {
           avr_asm_len ((code == PLUS) ^ (val8 == 1) ? "dec %0" : "inc %0",
                        op, plen, 1);
+          *pcc = CC_CLOBBER;
           break;
         }
 
@@ -6806,8 +6817,8 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc, bool out_label)
 
   /* Work out the shortest sequence.  */
 
-  avr_out_plus_1 (op, &len_minus, MINUS, &cc_plus, code_sat, sign, out_label);
-  avr_out_plus_1 (op, &len_plus, PLUS, &cc_minus, code_sat, sign, out_label);
+  avr_out_plus_1 (op, &len_minus, MINUS, &cc_minus, code_sat, sign, out_label);
+  avr_out_plus_1 (op, &len_plus, PLUS, &cc_plus, code_sat, sign, out_label);
 
   if (plen)
     {
diff --git a/gcc-4.8/gcc/config/avr/avr.md b/gcc-4.8/gcc/config/avr/avr.md
index f2681233a..ae2e7651d 100644
--- a/gcc-4.8/gcc/config/avr/avr.md
+++ b/gcc-4.8/gcc/config/avr/avr.md
@@ -89,7 +89,7 @@
 (include "constraints.md")
 
 ;; Condition code settings.
-(define_attr "cc" "none,set_czn,set_zn,set_n,compare,clobber,
+(define_attr "cc" "none,set_czn,set_zn,set_vzn,set_n,compare,clobber,
                    plus,ldi"
   (const_string "none"))
 
@@ -1097,7 +1097,7 @@
 	inc %0\;inc %0
 	dec %0\;dec %0"
   [(set_attr "length" "1,1,1,1,2,2")
-   (set_attr "cc" "set_czn,set_czn,set_zn,set_zn,set_zn,set_zn")])
+   (set_attr "cc" "set_czn,set_czn,set_vzn,set_vzn,set_vzn,set_vzn")])
 
 ;; "addhi3"
 ;; "addhq3" "adduhq3"
@@ -1368,7 +1368,7 @@
 	dec %0\;dec %0
 	inc %0\;inc %0"
   [(set_attr "length" "1,1,1,1,2,2")
-   (set_attr "cc" "set_czn,set_czn,set_zn,set_zn,set_zn,set_zn")])
+   (set_attr "cc" "set_czn,set_czn,set_vzn,set_vzn,set_vzn,set_vzn")])
 
 ;; "subhi3"
 ;; "subhq3" "subuhq3"
@@ -3991,7 +3991,7 @@
   ""
   "neg %0"
   [(set_attr "length" "1")
-   (set_attr "cc" "set_zn")])
+   (set_attr "cc" "set_vzn")])
 
 (define_insn "*negqihi2"
   [(set (match_operand:HI 0 "register_operand"                        "=r")
diff --git a/gcc-4.8/gcc/config/cris/cris.md b/gcc-4.8/gcc/config/cris/cris.md
index 8a7f0bfdd..b3e9f0494 100644
--- a/gcc-4.8/gcc/config/cris/cris.md
+++ b/gcc-4.8/gcc/config/cris/cris.md
@@ -758,7 +758,7 @@
 		      (match_operand:SI 1 "const_int_operand" ""))
 	     (match_operand:SI 2 "register_operand" ""))])
 	  (match_operand 3 "register_operand" ""))
-     (set (match_operand:SI 4 "register_operand" "")
+     (set (match_operand:SI 4 "cris_nonsp_register_operand" "")
 	  (plus:SI (mult:SI (match_dup 0)
 			    (match_dup 1))
 		   (match_dup 2)))])]
@@ -859,7 +859,7 @@
 	     (match_operand:SI 0 "cris_bdap_operand" "")
 	     (match_operand:SI 1 "cris_bdap_operand" ""))])
 	  (match_operand 2 "register_operand" ""))
-     (set (match_operand:SI 3 "register_operand" "")
+     (set (match_operand:SI 3 "cris_nonsp_register_operand" "")
 	  (plus:SI (match_dup 0) (match_dup 1)))])]
   "reload_completed && reg_overlap_mentioned_p (operands[3], operands[2])"
   [(set (match_dup 4) (match_dup 2))
@@ -3960,7 +3960,7 @@
 ;; up.
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operator
 	 4 "cris_operand_extend_operator"
 	 [(match_operand 1 "register_operand" "")
@@ -3990,7 +3990,7 @@
 ;; Call this op-extend-split-rx=rz
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operator
 	 4 "cris_plus_or_bound_operator"
 	 [(match_operand 1 "register_operand" "")
@@ -4018,7 +4018,7 @@
 ;; Call this op-extend-split-swapped
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operator
 	 4 "cris_plus_or_bound_operator"
 	 [(match_operator
@@ -4044,7 +4044,7 @@
 ;; bound.  Call this op-extend-split-swapped-rx=rz.
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operator
 	 4 "cris_plus_or_bound_operator"
 	 [(match_operator
@@ -4075,7 +4075,7 @@
 ;; Call this op-extend.
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operator
 	 3 "cris_orthogonal_operator"
 	 [(match_operand 1 "register_operand" "")
@@ -4099,7 +4099,7 @@
 ;; Call this op-split-rx=rz
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operator
 	 3 "cris_commutative_orth_op"
 	 [(match_operand 2 "memory_operand" "")
@@ -4123,7 +4123,7 @@
 ;; Call this op-split-swapped.
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operator
 	 3 "cris_commutative_orth_op"
 	 [(match_operand 1 "register_operand" "")
@@ -4146,7 +4146,7 @@
 ;; Call this op-split-swapped-rx=rz.
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operator
 	 3 "cris_orthogonal_operator"
 	 [(match_operand 2 "memory_operand" "")
@@ -4555,10 +4555,11 @@
 ;; We're not allowed to generate copies of registers with different mode
 ;; until after reload; copying pseudos upsets reload.  CVS as of
 ;; 2001-08-24, unwind-dw2-fde.c, _Unwind_Find_FDE ICE in
-;; cselib_invalidate_regno.
+;; cselib_invalidate_regno.  Also, don't do this for the stack-pointer,
+;; as we don't want it set temporarily to an invalid value.
 
 (define_split ; indir_to_reg_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operand 1 "indirect_operand" ""))]
   "reload_completed
    && REG_P (operands[0])
@@ -4574,7 +4575,7 @@
 ;; As the above, but MOVS and MOVU.
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "cris_nonsp_register_operand" "")
 	(match_operator
 	 4 "cris_extend_operator"
 	 [(match_operand 1 "indirect_operand" "")]))]
diff --git a/gcc-4.8/gcc/config/cris/predicates.md b/gcc-4.8/gcc/config/cris/predicates.md
index 040482ba9..2acd02f8a 100644
--- a/gcc-4.8/gcc/config/cris/predicates.md
+++ b/gcc-4.8/gcc/config/cris/predicates.md
@@ -76,6 +76,10 @@
 	    (match_test "cris_simple_address_operand (XEXP (op, 0),
 						      Pmode)"))))
 
+(define_predicate "cris_nonsp_register_operand"
+  (and (match_operand 0 "register_operand")
+       (match_test "op != stack_pointer_rtx")))
+
 ;; The caller needs to use :SI.
 (define_predicate "cris_bdap_sign_extend_operand"
 ; Disabled until <URL:http://gcc.gnu.org/ml/gcc-patches/2005-10/msg01376.html>
diff --git a/gcc-4.8/gcc/config/darwin.c b/gcc-4.8/gcc/config/darwin.c
index e07fa4c83..05646f781 100644
--- a/gcc-4.8/gcc/config/darwin.c
+++ b/gcc-4.8/gcc/config/darwin.c
@@ -1329,6 +1329,9 @@ is_objc_metadata (tree decl)
   return NULL_TREE;
 }
 
+static int classes_seen;
+static int objc_metadata_seen;
+
 /* Return the section required for Objective C ABI 2 metadata.  */
 static section *
 darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
@@ -1338,12 +1341,9 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
   gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
   p = IDENTIFIER_POINTER (ident);
 
-  /* If we are in LTO, then we don't know the state of flag_next_runtime
-     or flag_objc_abi when the code was generated.  We set these from the
-     meta-data - which is needed to deal with const string constructors.  */
+  gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi == 2);
 
-  flag_next_runtime = 1;
-  flag_objc_abi = 2;
+  objc_metadata_seen = 1;
 
   if (base == data_section)
     base = darwin_sections[objc2_metadata_section];
@@ -1366,7 +1366,10 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
   else if (!strncmp (p, "V2_NLCL", 7))
     return darwin_sections[objc2_nonlazy_class_section];
   else if (!strncmp (p, "V2_CLAB", 7))
+    {
+      classes_seen = 1;
     return darwin_sections[objc2_classlist_section];
+    }
   else if (!strncmp (p, "V2_SRFS", 7))
     return darwin_sections[objc2_selector_refs_section];
   else if (!strncmp (p, "V2_NLCA", 7))
@@ -1401,12 +1404,9 @@ darwin_objc1_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
   gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
   p = IDENTIFIER_POINTER (ident);
 
-  /* If we are in LTO, then we don't know the state of flag_next_runtime
-     or flag_objc_abi when the code was generated.  We set these from the
-     meta-data - which is needed to deal with const string constructors.  */
-  flag_next_runtime = 1;
-  if (!global_options_set.x_flag_objc_abi)
-    flag_objc_abi = 1;
+  gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi < 2);
+
+  objc_metadata_seen = 1;
 
   /* String sections first, cos there are lots of strings.  */
   if      (!strncmp (p, "V1_STRG", 7))
@@ -1419,7 +1419,10 @@ darwin_objc1_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
     return darwin_sections[objc_meth_var_types_section];
 
   else if (!strncmp (p, "V1_CLAS", 7))
+    {
+      classes_seen = 1;
     return darwin_sections[objc_class_section];
+    }
   else if (!strncmp (p, "V1_META", 7))
     return darwin_sections[objc_meta_class_section];
   else if (!strncmp (p, "V1_CATG", 7))
@@ -1603,8 +1606,6 @@ machopic_select_section (tree decl,
       if (TREE_CODE (name) == TYPE_DECL)
         name = DECL_NAME (name);
 
-      /* FIXME: This is unsatisfactory for LTO, since it relies on other
-	 metadata determining the source FE.  */
       if (!strcmp (IDENTIFIER_POINTER (name), "__builtin_ObjCString"))
 	{
 	  if (flag_next_runtime)
@@ -2845,6 +2846,33 @@ darwin_file_end (void)
     finalize_ctors ();
   if (!vec_safe_is_empty (dtors))
     finalize_dtors ();
+
+  /* If we are expecting to output NeXT ObjC meta-data, (and we actually see
+     some) then we output the fix-and-continue marker (Image Info).
+     This applies to Objective C, Objective C++ and LTO with either language
+     as part of the input.  */
+  if (flag_next_runtime && objc_metadata_seen)
+    {
+      unsigned int flags = 0;
+      if (flag_objc_abi >= 2)
+	{
+	  flags = 16;
+	  output_section_asm_op
+	    (darwin_sections[objc2_image_info_section]->unnamed.data);
+	}
+      else
+	output_section_asm_op
+	  (darwin_sections[objc_image_info_section]->unnamed.data);
+
+      ASM_OUTPUT_ALIGN (asm_out_file, 2);
+      fputs ("L_OBJC_ImageInfo:\n", asm_out_file);
+
+      flags |= (flag_replace_objc_classes && classes_seen) ? 1 : 0;
+      flags |= flag_objc_gc ? 2 : 0;
+
+      fprintf (asm_out_file, "\t.long\t0\n\t.long\t%u\n", flags);
+     }
+
   machopic_finish (asm_out_file);
   if (strcmp (lang_hooks.name, "GNU C++") == 0)
     {
diff --git a/gcc-4.8/gcc/config/i386/driver-i386.c b/gcc-4.8/gcc/config/i386/driver-i386.c
index 763758495..148fbc227 100644
--- a/gcc-4.8/gcc/config/i386/driver-i386.c
+++ b/gcc-4.8/gcc/config/i386/driver-i386.c
@@ -126,6 +126,18 @@ decode_caches_intel (unsigned reg, bool xeon_mp,
       case 0x0c:
 	level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
 	break;
+      case 0x0d:
+	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
+	break;
+      case 0x0e:
+	level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
+	break;
+      case 0x21:
+	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
+	break;
+      case 0x24:
+	level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
+	break;
       case 0x2c:
 	level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
 	break;
@@ -162,6 +174,9 @@ decode_caches_intel (unsigned reg, bool xeon_mp,
       case 0x45:
 	level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
 	break;
+      case 0x48:
+	level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
+	break;
       case 0x49:
 	if (xeon_mp)
 	  break;
@@ -203,6 +218,9 @@ decode_caches_intel (unsigned reg, bool xeon_mp,
       case 0x7f:
 	level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
 	break;
+      case 0x80:
+	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
+	break;
       case 0x82:
 	level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
 	break;
@@ -470,6 +488,28 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_xsaveopt = eax & bit_XSAVEOPT;
     }
 
+  /* Check cpuid level of extended features.  */
+  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+  if (ext_level > 0x80000000)
+    {
+      __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+      has_lahf_lm = ecx & bit_LAHF_LM;
+      has_sse4a = ecx & bit_SSE4a;
+      has_abm = ecx & bit_ABM;
+      has_lwp = ecx & bit_LWP;
+      has_fma4 = ecx & bit_FMA4;
+      has_xop = ecx & bit_XOP;
+      has_tbm = ecx & bit_TBM;
+      has_lzcnt = ecx & bit_LZCNT;
+      has_prfchw = ecx & bit_PRFCHW;
+
+      has_longmode = edx & bit_LM;
+      has_3dnowp = edx & bit_3DNOWP;
+      has_3dnow = edx & bit_3DNOW;
+    }
+
   /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
 #define XCR_XFEATURE_ENABLED_MASK	0x0
 #define XSTATE_FP			0x1
@@ -488,33 +528,12 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_avx2 = 0;
       has_fma = 0;
       has_fma4 = 0;
+      has_f16c = 0;
       has_xop = 0;
       has_xsave = 0;
       has_xsaveopt = 0;
     }
 
-  /* Check cpuid level of extended features.  */
-  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
-
-  if (ext_level > 0x80000000)
-    {
-      __cpuid (0x80000001, eax, ebx, ecx, edx);
-
-      has_lahf_lm = ecx & bit_LAHF_LM;
-      has_sse4a = ecx & bit_SSE4a;
-      has_abm = ecx & bit_ABM;
-      has_lwp = ecx & bit_LWP;
-      has_fma4 = ecx & bit_FMA4;
-      has_xop = ecx & bit_XOP;
-      has_tbm = ecx & bit_TBM;
-      has_lzcnt = ecx & bit_LZCNT;
-      has_prfchw = ecx & bit_PRFCHW;
-
-      has_longmode = edx & bit_LM;
-      has_3dnowp = edx & bit_3DNOWP;
-      has_3dnow = edx & bit_3DNOW;
-    }
-
   if (!arch)
     {
       if (vendor == signature_AMD_ebx
diff --git a/gcc-4.8/gcc/config/i386/f16cintrin.h b/gcc-4.8/gcc/config/i386/f16cintrin.h
index 88903c162..4a29fccff 100644
--- a/gcc-4.8/gcc/config/i386/f16cintrin.h
+++ b/gcc-4.8/gcc/config/i386/f16cintrin.h
@@ -35,7 +35,7 @@
 extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _cvtsh_ss (unsigned short __S)
 {
-  __v8hi __H = __extension__ (__v8hi){ __S, 0, 0, 0, 0, 0, 0, 0 };
+  __v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
   __v4sf __A = __builtin_ia32_vcvtph2ps (__H);
   return __builtin_ia32_vec_ext_v4sf (__A, 0);
 }
diff --git a/gcc-4.8/gcc/config/i386/i386.c b/gcc-4.8/gcc/config/i386/i386.c
index 98dc2e41e..0569828f3 100644
--- a/gcc-4.8/gcc/config/i386/i386.c
+++ b/gcc-4.8/gcc/config/i386/i386.c
@@ -1378,7 +1378,7 @@ struct processor_costs nocona_cost = {
   8,					/* MMX or SSE register to integer */
   8,					/* size of l1 cache.  */
   1024,					/* size of l2 cache.  */
-  128,					/* size of prefetch block */
+  64,					/* size of prefetch block */
   8,					/* number of parallel prefetches */
   1,					/* Branch cost */
   COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
@@ -1894,10 +1894,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM  | m_AMDFAM10 | m_BDVER | m_GENERIC,
 
   /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
-  m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER,
+  m_COREI7 | m_HASWELL | m_AMDFAM10 | m_BDVER | m_BTVER,
 
   /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
-  m_COREI7 | m_BDVER,
+  m_COREI7 | m_HASWELL| m_BDVER,
 
   /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
   m_BDVER ,
@@ -2409,6 +2409,7 @@ static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
 /* Processor target table, indexed by processor number */
 struct ptt
 {
+  const char *const name;			/* processor name  */
   const struct processor_costs *cost;		/* Processor costs */
   const int align_loop;				/* Default alignments.  */
   const int align_loop_max_skip;
@@ -2417,66 +2418,31 @@ struct ptt
   const int align_func;
 };
 
+/* This table must be in sync with enum processor_type in i386.h.  */ 
 static const struct ptt processor_target_table[PROCESSOR_max] =
 {
-  {&i386_cost, 4, 3, 4, 3, 4},
-  {&i486_cost, 16, 15, 16, 15, 16},
-  {&pentium_cost, 16, 7, 16, 7, 16},
-  {&pentiumpro_cost, 16, 15, 16, 10, 16},
-  {&geode_cost, 0, 0, 0, 0, 0},
-  {&k6_cost, 32, 7, 32, 7, 32},
-  {&athlon_cost, 16, 7, 16, 7, 16},
-  {&pentium4_cost, 0, 0, 0, 0, 0},
-  {&k8_cost, 16, 7, 16, 7, 16},
-  {&nocona_cost, 0, 0, 0, 0, 0},
-  /* Core 2  */
-  {&core_cost, 16, 10, 16, 10, 16},
-  /* Core i7  */
-  {&core_cost, 16, 10, 16, 10, 16},
-  /* Core avx2  */
-  {&core_cost, 16, 10, 16, 10, 16},
-  {&generic32_cost, 16, 7, 16, 7, 16},
-  {&generic64_cost, 16, 10, 16, 10, 16},
-  {&amdfam10_cost, 32, 24, 32, 7, 32},
-  {&bdver1_cost, 16, 10, 16, 7, 11},
-  {&bdver2_cost, 16, 10, 16, 7, 11},
-  {&bdver3_cost, 16, 10, 16, 7, 11},
-  {&btver1_cost, 16, 10, 16, 7, 11},
-  {&btver2_cost, 16, 10, 16, 7, 11},
-  {&atom_cost, 16, 15, 16, 7, 16}
-};
-
-static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
-{
-  "generic",
-  "i386",
-  "i486",
-  "pentium",
-  "pentium-mmx",
-  "pentiumpro",
-  "pentium2",
-  "pentium3",
-  "pentium4",
-  "pentium-m",
-  "prescott",
-  "nocona",
-  "core2",
-  "corei7",
-  "core-avx2",
-  "atom",
-  "geode",
-  "k6",
-  "k6-2",
-  "k6-3",
-  "athlon",
-  "athlon-4",
-  "k8",
-  "amdfam10",
-  "bdver1",
-  "bdver2",
-  "bdver3",
-  "btver1",
-  "btver2"
+  {"generic", &generic32_cost, 16, 7, 16, 7, 16},
+  {"generic", &generic64_cost, 16, 10, 16, 10, 16},
+  {"i386", &i386_cost, 4, 3, 4, 3, 4},
+  {"i486", &i486_cost, 16, 15, 16, 15, 16},
+  {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
+  {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
+  {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
+  {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
+  {"core2", &core_cost, 16, 10, 16, 10, 16},
+  {"corei7", &core_cost, 16, 10, 16, 10, 16},
+  {"core-avx2", &core_cost, 16, 10, 16, 10, 16},
+  {"atom", &atom_cost, 16, 15, 16, 7, 16},
+  {"geode", &geode_cost, 0, 0, 0, 0, 0},
+  {"k6", &k6_cost, 32, 7, 32, 7, 32},
+  {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
+  {"k8", &k8_cost, 16, 7, 16, 7, 16},
+  {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
+  {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
+  {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
+  {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
+  {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
+  {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
 };
 
 static bool
@@ -2921,7 +2887,7 @@ ix86_option_override_internal (bool main_args_p)
 	| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
 	| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
 	| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
-	| PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE | PTA_FXSR | PTA_XSAVE
+	| PTA_FMA | PTA_MOVBE | PTA_HLE | PTA_FXSR | PTA_XSAVE
 	| PTA_XSAVEOPT},
       {"atom", PROCESSOR_ATOM, CPU_ATOM,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
@@ -2984,7 +2950,7 @@ ix86_option_override_internal (bool main_args_p)
       {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
 	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
 	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE 
 	| PTA_XSAVEOPT},
@@ -3126,7 +3092,8 @@ ix86_option_override_internal (bool main_args_p)
 	ix86_tune_string = ix86_arch_string;
       if (!ix86_tune_string)
 	{
-	  ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
+	  ix86_tune_string
+	    = processor_target_table[TARGET_CPU_DEFAULT].name;
 	  ix86_tune_defaulted = 1;
 	}
 
@@ -4092,19 +4059,15 @@ ix86_function_specific_print (FILE *file, int indent,
     = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
 			  NULL, NULL, ptr->x_ix86_fpmath, false);
 
+  gcc_assert (ptr->arch < PROCESSOR_max);
   fprintf (file, "%*sarch = %d (%s)\n",
 	   indent, "",
-	   ptr->arch,
-	   ((ptr->arch < TARGET_CPU_DEFAULT_max)
-	    ? cpu_names[ptr->arch]
-	    : "<unknown>"));
+	   ptr->arch, processor_target_table[ptr->arch].name);
 
+  gcc_assert (ptr->tune < PROCESSOR_max);
   fprintf (file, "%*stune = %d (%s)\n",
 	   indent, "",
-	   ptr->tune,
-	   ((ptr->tune < TARGET_CPU_DEFAULT_max)
-	    ? cpu_names[ptr->tune]
-	    : "<unknown>"));
+	   ptr->tune, processor_target_table[ptr->tune].name);
 
   fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
 
@@ -5227,7 +5190,12 @@ ix86_function_regparm (const_tree type, const_tree decl)
   /* Use register calling convention for local functions when possible.  */
   if (decl
       && TREE_CODE (decl) == FUNCTION_DECL
-      && optimize
+      /* Caller and callee must agree on the calling convention, so
+	 checking here just optimize means that with
+	 __attribute__((optimize (...))) caller could use regparm convention
+	 and callee not, or vice versa.  Instead look at whether the callee
+	 is optimized or not.  */
+      && opt_for_fn (decl, optimize)
       && !(profile_flag && !flag_fentry))
     {
       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
@@ -5423,6 +5391,17 @@ ix86_legitimate_combined_insn (rtx insn)
 	  bool win;
 	  int j;
 
+	  /* For pre-AVX disallow unaligned loads/stores where the
+	     instructions don't support it.  */
+	  if (!TARGET_AVX
+	      && VECTOR_MODE_P (GET_MODE (op))
+	      && misaligned_operand (op, GET_MODE (op)))
+	    {
+	      int min_align = get_attr_ssememalign (insn);
+	      if (min_align == 0)
+		return false;
+	    }
+
 	  /* A unary operator may be accepted by the predicate, but it
 	     is irrelevant for matching constraints.  */
 	  if (UNARY_P (op))
@@ -5761,9 +5740,9 @@ init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 	  cum->nregs = 0;
 	  cum->sse_nregs = 0;
 	  cum->mmx_nregs = 0;
-	  cum->warn_avx = 0;
-	  cum->warn_sse = 0;
-	  cum->warn_mmx = 0;
+	  cum->warn_avx = false;
+	  cum->warn_sse = false;
+	  cum->warn_mmx = false;
 	  return;
 	}
 
@@ -5804,10 +5783,14 @@ init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 
    The midde-end can't deal with the vector types > 16 bytes.  In this
    case, we return the original mode and warn ABI change if CUM isn't
-   NULL.  */
+   NULL. 
+
+   If INT_RETURN is true, warn ABI change if the vector mode isn't
+   available for function return value.  */
 
 static enum machine_mode
-type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
+type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
+		   bool in_return)
 {
   enum machine_mode mode = TYPE_MODE (type);
 
@@ -5833,32 +5816,60 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
 		if (size == 32 && !TARGET_AVX)
 		  {
 		    static bool warnedavx;
+		    static bool warnedavx_ret;
 
-		    if (cum
-			&& !warnedavx
-			&& cum->warn_avx)
+		    if (cum && cum->warn_avx && !warnedavx)
 		      {
+			if (warning (OPT_Wpsabi, "AVX vector argument "
+				     "without AVX enabled changes the ABI"))
 			warnedavx = true;
-			warning (0, "AVX vector argument without AVX "
-				 "enabled changes the ABI");
 		      }
+		    else if (in_return && !warnedavx_ret)
+		      {
+			if (warning (OPT_Wpsabi, "AVX vector return "
+				     "without AVX enabled changes the ABI"))
+			  warnedavx_ret = true;
+		      }
+
 		    return TYPE_MODE (type);
 		  }
-		else if ((size == 8 || size == 16) && !TARGET_SSE)
+		else if (((size == 8 && TARGET_64BIT) || size == 16)
+			 && !TARGET_SSE)
 		  {
 		    static bool warnedsse;
+		    static bool warnedsse_ret;
 
-		    if (cum
-			&& !warnedsse
-			&& cum->warn_sse)
+		    if (cum && cum->warn_sse && !warnedsse)
 		      {
+			if (warning (OPT_Wpsabi, "SSE vector argument "
+				     "without SSE enabled changes the ABI"))
 			warnedsse = true;
-			warning (0, "SSE vector argument without SSE "
-				 "enabled changes the ABI");
 		      }
-		    return mode;
+		    else if (!TARGET_64BIT && in_return && !warnedsse_ret)
+		      {
+			if (warning (OPT_Wpsabi, "SSE vector return "
+				     "without SSE enabled changes the ABI"))
+			  warnedsse_ret = true;
+		      }
+		  }
+		else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
+		  {
+		    static bool warnedmmx;
+		    static bool warnedmmx_ret;
+
+		    if (cum && cum->warn_mmx && !warnedmmx)
+		      {
+			if (warning (OPT_Wpsabi, "MMX vector argument "
+				     "without MMX enabled changes the ABI"))
+			  warnedmmx = true;
+		      }
+		    else if (in_return && !warnedmmx_ret)
+		      {
+			if (warning (OPT_Wpsabi, "MMX vector return "
+				     "without MMX enabled changes the ABI"))
+			  warnedmmx_ret = true;
+		      }
 		  }
-		else
 		  return mode;
 	      }
 
@@ -6204,25 +6215,28 @@ classify_argument (enum machine_mode mode, const_tree type,
     case CHImode:
     case CQImode:
       {
-	int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
+	int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
+
+	/* Analyze last 128 bits only.  */
+	size = (size - 1) & 0x7f;
 
-	if (size <= 32)
+	if (size < 32)
 	  {
 	    classes[0] = X86_64_INTEGERSI_CLASS;
 	    return 1;
 	  }
-	else if (size <= 64)
+	else if (size < 64)
 	  {
 	    classes[0] = X86_64_INTEGER_CLASS;
 	    return 1;
 	  }
-	else if (size <= 64+32)
+	else if (size < 64+32)
 	  {
 	    classes[0] = X86_64_INTEGER_CLASS;
 	    classes[1] = X86_64_INTEGERSI_CLASS;
 	    return 2;
 	  }
-	else if (size <= 64+64)
+	else if (size < 64+64)
 	  {
 	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
 	    return 2;
@@ -6489,7 +6503,7 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
   if (n == 2
       && regclass[0] == X86_64_INTEGER_CLASS
       && regclass[1] == X86_64_INTEGER_CLASS
-      && (mode == CDImode || mode == TImode || mode == TFmode)
+      && (mode == CDImode || mode == TImode)
       && intreg[0] + 1 == intreg[1])
     return gen_rtx_REG (mode, intreg[0]);
 
@@ -6739,7 +6753,7 @@ ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 
   if (type)
-    mode = type_natural_mode (type, NULL);
+    mode = type_natural_mode (type, NULL, false);
 
   if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
     function_arg_advance_ms_64 (cum, bytes, words);
@@ -6767,8 +6781,6 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
 		 enum machine_mode orig_mode, const_tree type,
 		 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
 {
-  static bool warnedsse, warnedmmx;
-
   /* Avoid the AL settings for the Unix64 ABI.  */
   if (mode == VOIDmode)
     return constm1_rtx;
@@ -6825,12 +6837,6 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
     case V2DFmode:
       if (!type || !AGGREGATE_TYPE_P (type))
 	{
-	  if (!TARGET_SSE && !warnedsse && cum->warn_sse)
-	    {
-	      warnedsse = true;
-	      warning (0, "SSE vector argument without SSE enabled "
-		       "changes the ABI");
-	    }
 	  if (cum->sse_nregs)
 	    return gen_reg_or_parallel (mode, orig_mode,
 				        cum->sse_regno + FIRST_SSE_REG);
@@ -6863,12 +6869,6 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
     case V1DImode:
       if (!type || !AGGREGATE_TYPE_P (type))
 	{
-	  if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
-	    {
-	      warnedmmx = true;
-	      warning (0, "MMX vector argument without MMX enabled "
-		       "changes the ABI");
-	    }
 	  if (cum->mmx_nregs)
 	    return gen_reg_or_parallel (mode, orig_mode,
 				        cum->mmx_regno + FIRST_MMX_REG);
@@ -6991,7 +6991,7 @@ ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
   /* To simplify the code below, represent vector types with a vector mode
      even if MMX/SSE are not active.  */
   if (type && TREE_CODE (type) == VECTOR_TYPE)
-    mode = type_natural_mode (type, cum);
+    mode = type_natural_mode (type, cum, false);
 
   if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
     arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
@@ -7249,9 +7249,15 @@ ix86_function_value_regno_p (const unsigned int regno)
   switch (regno)
     {
     case AX_REG:
+    case DX_REG:
       return true;
+    case DI_REG:
+    case SI_REG:
+      return TARGET_64BIT && ix86_abi != MS_ABI;
 
-    case FIRST_FLOAT_REG:
+      /* Complex values are returned in %st(0)/%st(1) pair.  */
+    case ST0_REG:
+    case ST1_REG:
       /* TODO: The function should depend on current function ABI but
        builtins.c would need updating then. Therefore we use the
        default ABI.  */
@@ -7259,10 +7265,12 @@ ix86_function_value_regno_p (const unsigned int regno)
 	return false;
       return TARGET_FLOAT_RETURNS_IN_80387;
 
-    case FIRST_SSE_REG:
+      /* Complex values are returned in %xmm0/%xmm1 pair.  */
+    case XMM0_REG:
+    case XMM1_REG:
       return TARGET_SSE;
 
-    case FIRST_MMX_REG:
+    case MM0_REG:
       if (TARGET_MACHO || TARGET_64BIT)
 	return false;
       return TARGET_MMX;
@@ -7434,7 +7442,7 @@ ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
   enum machine_mode mode, orig_mode;
 
   orig_mode = TYPE_MODE (valtype);
-  mode = type_natural_mode (valtype, NULL);
+  mode = type_natural_mode (valtype, NULL, true);
   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
 }
 
@@ -7549,7 +7557,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 #ifdef SUBTARGET_RETURN_IN_MEMORY
   return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
 #else
-  const enum machine_mode mode = type_natural_mode (type, NULL);
+  const enum machine_mode mode = type_natural_mode (type, NULL, true);
 
   if (TARGET_64BIT)
     {
@@ -7563,52 +7571,6 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 #endif
 }
 
-/* When returning SSE vector types, we have a choice of either
-     (1) being abi incompatible with a -march switch, or
-     (2) generating an error.
-   Given no good solution, I think the safest thing is one warning.
-   The user won't be able to use -Werror, but....
-
-   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
-   called in response to actually generating a caller or callee that
-   uses such a type.  As opposed to TARGET_RETURN_IN_MEMORY, which is called
-   via aggregate_value_p for general type probing from tree-ssa.  */
-
-static rtx
-ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
-{
-  static bool warnedsse, warnedmmx;
-
-  if (!TARGET_64BIT && type)
-    {
-      /* Look at the return type of the function, not the function type.  */
-      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
-
-      if (!TARGET_SSE && !warnedsse)
-	{
-	  if (mode == TImode
-	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
-	    {
-	      warnedsse = true;
-	      warning (0, "SSE vector return without SSE enabled "
-		       "changes the ABI");
-	    }
-	}
-
-      if (!TARGET_MMX && !warnedmmx)
-	{
-	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
-	    {
-	      warnedmmx = true;
-	      warning (0, "MMX vector return without MMX enabled "
-		       "changes the ABI");
-	    }
-	}
-    }
-
-  return NULL;
-}
-
 
 /* Create the va_list data type.  */
 
@@ -8033,7 +7995,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
   size = int_size_in_bytes (type);
   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 
-  nat_mode = type_natural_mode (type, NULL);
+  nat_mode = type_natural_mode (type, NULL, false);
   switch (nat_mode)
     {
     case V8SFmode:
@@ -10574,18 +10536,20 @@ ix86_expand_prologue (void)
 	}
       m->fs.sp_offset += allocate;
 
+      /* Use stack_pointer_rtx for relative addressing so that code
+	 works for realigned stack, too.  */
       if (r10_live && eax_live)
         {
-	  t = choose_baseaddr (m->fs.sp_offset - allocate);
+	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
 	  emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
 			  gen_frame_mem (word_mode, t));
-	  t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
+	  t = plus_constant (Pmode, t, UNITS_PER_WORD);
 	  emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
 			  gen_frame_mem (word_mode, t));
 	}
       else if (eax_live || r10_live)
 	{
-	  t = choose_baseaddr (m->fs.sp_offset - allocate);
+	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
 	  emit_move_insn (gen_rtx_REG (word_mode,
 				       (eax_live ? AX_REG : R10_REG)),
 			  gen_frame_mem (word_mode, t));
@@ -11178,8 +11142,9 @@ ix86_expand_epilogue (int style)
 	  m->fs.cfa_offset -= UNITS_PER_WORD;
 	  m->fs.sp_offset -= UNITS_PER_WORD;
 
-	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
-			copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
+	  x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
 	  add_reg_note (insn, REG_CFA_REGISTER,
 			gen_rtx_SET (VOIDmode, ecx, pc_rtx));
 	  RTX_FRAME_RELATED_P (insn) = 1;
@@ -11575,30 +11540,6 @@ ix86_live_on_entry (bitmap regs)
     }
 }
 
-/* Determine if op is suitable SUBREG RTX for address.  */
-
-static bool
-ix86_address_subreg_operand (rtx op)
-{
-  enum machine_mode mode;
-
-  if (!REG_P (op))
-    return false;
-
-  mode = GET_MODE (op);
-
-  if (GET_MODE_CLASS (mode) != MODE_INT)
-    return false;
-
-  /* Don't allow SUBREGs that span more than a word.  It can lead to spill
-     failures when the register is one word out of a two word structure.  */
-  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
-    return false;
-
-  /* Allow only SUBREGs of non-eliminable hard registers.  */
-  return register_no_elim_operand (op, mode);
-}
-
 /* Extract the parts of an RTL expression that is a valid memory address
    for an instruction.  Return 0 if the structure of the address is
    grossly off.  Return -1 if the address contains ASHIFT, so it is not
@@ -11655,7 +11596,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
     base = addr;
   else if (GET_CODE (addr) == SUBREG)
     {
-      if (ix86_address_subreg_operand (SUBREG_REG (addr)))
+      if (REG_P (SUBREG_REG (addr)))
 	base = addr;
       else
 	return 0;
@@ -11719,7 +11660,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
 	      break;
 
 	    case SUBREG:
-	      if (!ix86_address_subreg_operand (SUBREG_REG (op)))
+	      if (!REG_P (SUBREG_REG (op)))
 		return 0;
 	      /* FALLTHRU */
 
@@ -11764,19 +11705,6 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
       scale = 1 << scale;
       retval = -1;
     }
-  else if (CONST_INT_P (addr))
-    {
-      if (!x86_64_immediate_operand (addr, VOIDmode))
-	return 0;
-
-      /* Constant addresses are sign extended to 64bit, we have to
-	 prevent addresses from 0x80000000 to 0xffffffff in x32 mode.  */
-      if (TARGET_X32
-	  && val_signbit_known_set_p (SImode, INTVAL (addr)))
-	return 0;
-
-      disp = addr;
-    }
   else
     disp = addr;			/* displacement */
 
@@ -11785,18 +11713,12 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
       if (REG_P (index))
 	;
       else if (GET_CODE (index) == SUBREG
-	       && ix86_address_subreg_operand (SUBREG_REG (index)))
+	       && REG_P (SUBREG_REG (index)))
 	;
       else
 	return 0;
     }
 
-/* Address override works only on the (%reg) part of %fs:(%reg).  */
-  if (seg != SEG_DEFAULT
-      && ((base && GET_MODE (base) != word_mode)
-	  || (index && GET_MODE (index) != word_mode)))
-    return 0;
-
   /* Extract the integral value of scale.  */
   if (scale_rtx)
     {
@@ -12272,6 +12194,45 @@ ix86_legitimize_reload_address (rtx x,
   return false;
 }
 
+/* Determine if op is suitable RTX for an address register.
+   Return naked register if a register or a register subreg is
+   found, otherwise return NULL_RTX.  */
+
+static rtx
+ix86_validate_address_register (rtx op)
+{
+  enum machine_mode mode = GET_MODE (op);
+
+  /* Only SImode or DImode registers can form the address.  */
+  if (mode != SImode && mode != DImode)
+    return NULL_RTX;
+
+  if (REG_P (op))
+    return op;
+  else if (GET_CODE (op) == SUBREG)
+    {
+      rtx reg = SUBREG_REG (op);
+
+      if (!REG_P (reg))
+	return NULL_RTX;
+
+      mode = GET_MODE (reg);
+
+      /* Don't allow SUBREGs that span more than a word.  It can
+	 lead to spill failures when the register is one word out
+	 of a two word structure.  */
+      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	return NULL_RTX;
+
+      /* Allow only SUBREGs of non-eliminable hard registers.  */
+      if (register_no_elim_operand (reg, mode))
+	return reg;
+    }
+
+  /* Op is not a register.  */
+  return NULL_RTX;
+}
+
 /* Recognizes RTL expressions that are valid memory addresses for an
    instruction.  The MODE argument is the machine mode for the MEM
    expression that wants to use this address.
@@ -12287,6 +12248,7 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
   struct ix86_address parts;
   rtx base, index, disp;
   HOST_WIDE_INT scale;
+  enum ix86_address_seg seg;
 
   if (ix86_decompose_address (addr, &parts) <= 0)
     /* Decomposition failed.  */
@@ -12296,21 +12258,14 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
   index = parts.index;
   disp = parts.disp;
   scale = parts.scale;
+  seg = parts.seg;
 
   /* Validate base register.  */
   if (base)
     {
-      rtx reg;
+      rtx reg = ix86_validate_address_register (base);
 
-      if (REG_P (base))
-  	reg = base;
-      else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
-	reg = SUBREG_REG (base);
-      else
-	/* Base is not a register.  */
-	return false;
-
-      if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
+      if (reg == NULL_RTX)
 	return false;
 
       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
@@ -12322,17 +12277,9 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
   /* Validate index register.  */
   if (index)
     {
-      rtx reg;
-
-      if (REG_P (index))
-  	reg = index;
-      else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
-	reg = SUBREG_REG (index);
-      else
-	/* Index is not a register.  */
-	return false;
+      rtx reg = ix86_validate_address_register (index);
 
-      if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
+      if (reg == NULL_RTX)
 	return false;
 
       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
@@ -12346,6 +12293,12 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
       && GET_MODE (base) != GET_MODE (index))
     return false;
 
+  /* Address override works only on the (%reg) part of %fs:(%reg).  */
+  if (seg != SEG_DEFAULT
+      && ((base && GET_MODE (base) != word_mode)
+	  || (index && GET_MODE (index) != word_mode)))
+    return false;
+
   /* Validate scale factor.  */
   if (scale != 1)
     {
@@ -12467,6 +12420,12 @@ ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
 	       && !x86_64_immediate_operand (disp, VOIDmode))
 	/* Displacement is out of range.  */
 	return false;
+      /* In x32 mode, constant addresses are sign extended to 64bit, so
+	 we have to prevent addresses from 0x80000000 to 0xffffffff.  */
+      else if (TARGET_X32 && !(index || base)
+	       && CONST_INT_P (disp)
+	       && val_signbit_known_set_p (SImode, INTVAL (disp)))
+	return false;
     }
 
   /* Everything looks valid.  */
@@ -13831,8 +13790,6 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
 	 Those same assemblers have the same but opposite lossage on cmov.  */
       if (mode == CCmode)
 	suffix = fp ? "nbe" : "a";
-      else if (mode == CCCmode)
-	suffix = "b";
       else
 	gcc_unreachable ();
       break;
@@ -13854,8 +13811,12 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
 	}
       break;
     case LTU:
-      gcc_assert (mode == CCmode || mode == CCCmode);
+      if (mode == CCmode)
       suffix = "b";
+      else if (mode == CCCmode)
+	suffix = "c";
+      else
+	gcc_unreachable ();
       break;
     case GE:
       switch (mode)
@@ -13875,20 +13836,20 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
 	}
       break;
     case GEU:
-      /* ??? As above.  */
-      gcc_assert (mode == CCmode || mode == CCCmode);
+      if (mode == CCmode)
       suffix = fp ? "nb" : "ae";
+      else if (mode == CCCmode)
+	suffix = "nc";
+      else
+	gcc_unreachable ();
       break;
     case LE:
       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
       suffix = "le";
       break;
     case LEU:
-      /* ??? As above.  */
       if (mode == CCmode)
 	suffix = "be";
-      else if (mode == CCCmode)
-	suffix = fp ? "nb" : "ae";
       else
 	gcc_unreachable ();
       break;
@@ -15354,7 +15315,7 @@ ix86_avx_u128_mode_needed (rtx insn)
 	      rtx arg = XEXP (XEXP (link, 0), 0);
 
 	      if (ix86_check_avx256_register (&arg, NULL))
-		return AVX_U128_ANY;
+		return AVX_U128_DIRTY;
 	    }
 	}
 
@@ -15474,8 +15435,8 @@ ix86_avx_u128_mode_after (int mode, rtx insn)
     {
       bool avx_reg256_found = false;
       note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
-      if (!avx_reg256_found)
-	return AVX_U128_CLEAN;
+
+      return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
     }
 
   /* Otherwise, return current mode.  Remember that if insn
@@ -17364,8 +17325,18 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
     return false;
 
-  /* Check it is correct to split here.  */
-  if (!ix86_ok_to_clobber_flags(insn))
+  /* The "at least two components" test below might not catch simple
+     move or zero extension insns if parts.base is non-NULL and parts.disp
+     is const0_rtx as the only components in the address, e.g. if the
+     register is %rbp or %r13.  As this test is much cheaper and moves or
+     zero extensions are the common case, do this check first.  */
+  if (REG_P (operands[1])
+      || (SImode_address_operand (operands[1], VOIDmode)
+	  && REG_P (XEXP (operands[1], 0))))
+    return false;
+
+  /* Check if it is OK to split here.  */
+  if (!ix86_ok_to_clobber_flags (insn))
     return false;
 
   ok = ix86_decompose_address (operands[1], &parts);
@@ -18500,11 +18471,6 @@ ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
 	return CCmode;
     case GTU:			/* CF=0 & ZF=0 */
     case LEU:			/* CF=1 | ZF=1 */
-      /* Detect overflow checks.  They need just the carry flag.  */
-      if (GET_CODE (op0) == MINUS
-	  && rtx_equal_p (op1, XEXP (op0, 0)))
-	return CCCmode;
-      else
 	return CCmode;
       /* Codes possibly doable only with sign flag when
          comparing against zero.  */
@@ -20514,7 +20480,7 @@ ix86_expand_vec_perm (rtx operands[])
 	  return;
 
 	case V8SFmode:
-	  mask = gen_lowpart (V8SFmode, mask);
+	  mask = gen_lowpart (V8SImode, mask);
 	  if (one_operand_shuffle)
 	    emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
 	  else
@@ -21754,6 +21720,21 @@ counter_mode (rtx count_exp)
   return SImode;
 }
 
+/* Copy the address to a Pmode register.  This is used for x32 to
+   truncate DImode TLS address to a SImode register. */
+
+static rtx
+ix86_copy_addr_to_reg (rtx addr)
+{
+  if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
+    return copy_addr_to_reg (addr);
+  else
+    {
+      gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
+      return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
+    }
+}
+
 /* When SRCPTR is non-NULL, output simple loop to move memory
    pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
    overall size is COUNT specified in bytes.  When SRCPTR is NULL, output the
@@ -22742,8 +22723,8 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
   gcc_assert (alg != no_stringop);
   if (!count)
     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
-  destreg = copy_addr_to_reg (XEXP (dst, 0));
-  srcreg = copy_addr_to_reg (XEXP (src, 0));
+  destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
+  srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
   switch (alg)
     {
     case libcall:
@@ -23133,7 +23114,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
   gcc_assert (alg != no_stringop);
   if (!count)
     count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
-  destreg = copy_addr_to_reg (XEXP (dst, 0));
+  destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
   switch (alg)
     {
     case libcall:
@@ -25002,7 +24983,8 @@ ix86_constant_alignment (tree exp, int align)
 int
 ix86_data_alignment (tree type, int align)
 {
-  int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
+  int max_align
+    = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
 
   if (AGGREGATE_TYPE_P (type)
       && TYPE_SIZE (type)
@@ -27998,8 +27980,8 @@ static const struct builtin_description bdesc_multi_arg[] =
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3,         "__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3,        "__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
 
-  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_2_SF },
-  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_2_DF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
@@ -31264,6 +31246,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
   tree arg;
   rtx pat, op;
   unsigned int i, nargs, arg_adjust, memory;
+  bool aligned_mem = false;
   struct
     {
       rtx op;
@@ -31309,6 +31292,15 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
       nargs = 1;
       klass = load;
       memory = 0;
+      switch (icode)
+	{
+	case CODE_FOR_sse4_1_movntdqa:
+	case CODE_FOR_avx2_movntdqa:
+	  aligned_mem = true;
+	  break;
+	default:
+	  break;
+	}
       break;
     case VOID_FTYPE_PV2SF_V4SF:
     case VOID_FTYPE_PV4DI_V4DI:
@@ -31326,6 +31318,26 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
       klass = store;
       /* Reserve memory operand for target.  */
       memory = ARRAY_SIZE (args);
+      switch (icode)
+	{
+	/* These builtins and instructions require the memory
+	   to be properly aligned.  */
+	case CODE_FOR_avx_movntv4di:
+	case CODE_FOR_sse2_movntv2di:
+	case CODE_FOR_avx_movntv8sf:
+	case CODE_FOR_sse_movntv4sf:
+	case CODE_FOR_sse4a_vmmovntv4sf:
+	case CODE_FOR_avx_movntv4df:
+	case CODE_FOR_sse2_movntv2df:
+	case CODE_FOR_sse4a_vmmovntv2df:
+	case CODE_FOR_sse2_movntidi:
+	case CODE_FOR_sse_movntq:
+	case CODE_FOR_sse2_movntisi:
+	  aligned_mem = true;
+	  break;
+	default:
+	  break;
+	}
       break;
     case V4SF_FTYPE_V4SF_PCV2SF:
     case V2DF_FTYPE_V2DF_PCDOUBLE:
@@ -31382,6 +31394,17 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
 	{
 	  op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
 	  target = gen_rtx_MEM (tmode, op);
+	  /* target at this point has just BITS_PER_UNIT MEM_ALIGN
+	     on it.  Try to improve it using get_pointer_alignment,
+	     and if the special builtin is one that requires strict
+	     mode alignment, also from it's GET_MODE_ALIGNMENT.
+	     Failure to do so could lead to ix86_legitimate_combined_insn
+	     rejecting all changes to such insns.  */
+	  unsigned int align = get_pointer_alignment (arg);
+	  if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
+	    align = GET_MODE_ALIGNMENT (tmode);
+	  if (MEM_ALIGN (target) < align)
+	    set_mem_align (target, align);
 	}
       else
 	target = force_reg (tmode, op);
@@ -31427,8 +31450,17 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
 	      /* This must be the memory operand.  */
 	      op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
 	      op = gen_rtx_MEM (mode, op);
-	      gcc_assert (GET_MODE (op) == mode
-			  || GET_MODE (op) == VOIDmode);
+	      /* op at this point has just BITS_PER_UNIT MEM_ALIGN
+		 on it.  Try to improve it using get_pointer_alignment,
+		 and if the special builtin is one that requires strict
+		 mode alignment, also from it's GET_MODE_ALIGNMENT.
+		 Failure to do so could lead to ix86_legitimate_combined_insn
+		 rejecting all changes to such insns.  */
+	      unsigned int align = get_pointer_alignment (arg);
+	      if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
+		align = GET_MODE_ALIGNMENT (mode);
+	      if (MEM_ALIGN (op) < align)
+		set_mem_align (op, align);
 	    }
 	  else
 	    {
@@ -32041,7 +32073,8 @@ rdrand_step:
       else
 	op2 = gen_rtx_SUBREG (SImode, op0, 0);
 
-      if (target == 0)
+      if (target == 0
+	  || !register_operand (target, SImode))
 	target = gen_reg_rtx (SImode);
 
       pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
@@ -32083,7 +32116,8 @@ rdseed_step:
                          const0_rtx);
       emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
 
-      if (target == 0)
+      if (target == 0
+	  || !register_operand (target, SImode))
         target = gen_reg_rtx (SImode);
 
       emit_insn (gen_zero_extendqisi2 (target, op2));
@@ -32222,7 +32256,9 @@ addcarryx:
       mode4 = insn_data[icode].operand[5].mode;
 
       if (target == NULL_RTX
-	  || GET_MODE (target) != insn_data[icode].operand[0].mode)
+	  || GET_MODE (target) != insn_data[icode].operand[0].mode
+	  || !insn_data[icode].operand[0].predicate (target,
+						     GET_MODE (target)))
 	subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
       else
 	subtarget = target;
@@ -33893,10 +33929,10 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
     {
       /* We implement the move patterns for all vector modes into and
 	 out of SSE registers, even when no operation instructions
-	 are available.  OImode move is available only when AVX is
-	 enabled.  */
-      return ((TARGET_AVX && mode == OImode)
-	      || VALID_AVX256_REG_MODE (mode)
+	 are available.  OImode and AVX modes are available only when
+	 AVX is enabled.  */
+      return ((TARGET_AVX
+	       && VALID_AVX256_REG_OR_OI_MODE (mode))
 	      || VALID_SSE_REG_MODE (mode)
 	      || VALID_SSE2_REG_MODE (mode)
 	      || VALID_MMX_REG_MODE (mode)
@@ -35034,7 +35070,7 @@ x86_output_mi_thunk (FILE *file,
 	{
 	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
 	  tmp = gen_rtx_CONST (Pmode, tmp);
-	  fnaddr = gen_rtx_MEM (Pmode, tmp);
+	  fnaddr = gen_const_mem (Pmode, tmp);
 	}
     }
   else
@@ -35054,8 +35090,9 @@ x86_output_mi_thunk (FILE *file,
 	  output_set_got (tmp, NULL_RTX);
 
 	  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
-	  fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
-	  fnaddr = gen_rtx_MEM (Pmode, fnaddr);
+	  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
+	  fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
+	  fnaddr = gen_const_mem (Pmode, fnaddr);
 	}
     }
 
@@ -35246,7 +35283,10 @@ ix86_avoid_jump_mispredicts (void)
      The smallest offset in the page INSN can start is the case where START
      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
      We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
-     */
+
+     Don't consider asm goto as jump, while it can contain a jump, it doesn't
+     have to, control transfer to label(s) can be performed through other
+     means, and also we estimate minimum length of all asm stmts as 0.  */
   for (insn = start; insn; insn = NEXT_INSN (insn))
     {
       int min_size;
@@ -35274,6 +35314,7 @@ ix86_avoid_jump_mispredicts (void)
 		{
 		  start = NEXT_INSN (start);
 		  if ((JUMP_P (start)
+		       && asm_noperands (PATTERN (start)) < 0
 		       && GET_CODE (PATTERN (start)) != ADDR_VEC
 		       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
 		      || CALL_P (start))
@@ -35292,6 +35333,7 @@ ix86_avoid_jump_mispredicts (void)
 	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
 		 INSN_UID (insn), min_size);
       if ((JUMP_P (insn)
+	   && asm_noperands (PATTERN (insn)) < 0
 	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
 	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
 	  || CALL_P (insn))
@@ -35303,6 +35345,7 @@ ix86_avoid_jump_mispredicts (void)
 	{
 	  start = NEXT_INSN (start);
 	  if ((JUMP_P (start)
+	       && asm_noperands (PATTERN (start)) < 0
 	       && GET_CODE (PATTERN (start)) != ADDR_VEC
 	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
 	      || CALL_P (start))
@@ -39385,6 +39428,8 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
       else
 	dfinal.perm[i] = e;
     }
+
+  if (!d->testing_p)
   dfinal.op0 = gen_reg_rtx (dfinal.vmode);
   dfinal.op1 = dfinal.op0;
   dfinal.one_operand_p = true;
@@ -39820,6 +39865,9 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
     return false;
   gcc_assert (!d->one_operand_p);
 
+  if (d->testing_p)
+    return true;
+
   nelt = d->nelt;
   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
 
@@ -40019,6 +40067,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
   switch (d->vmode)
     {
     case V4DFmode:
+      if (d->testing_p)
+	break;
       t1 = gen_reg_rtx (V4DFmode);
       t2 = gen_reg_rtx (V4DFmode);
 
@@ -40038,6 +40088,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
       {
 	int mask = odd ? 0xdd : 0x88;
 
+	if (d->testing_p)
+	  break;
 	t1 = gen_reg_rtx (V8SFmode);
 	t2 = gen_reg_rtx (V8SFmode);
 	t3 = gen_reg_rtx (V8SFmode);
@@ -40079,6 +40131,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
 	return expand_vec_perm_pshufb2 (d);
       else
 	{
+	  if (d->testing_p)
+	    break;
 	  /* We need 2*log2(N)-1 operations to achieve odd/even
 	     with interleave. */
 	  t1 = gen_reg_rtx (V8HImode);
@@ -40100,6 +40154,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
 	return expand_vec_perm_pshufb2 (d);
       else
 	{
+	  if (d->testing_p)
+	    break;
 	  t1 = gen_reg_rtx (V16QImode);
 	  t2 = gen_reg_rtx (V16QImode);
 	  t3 = gen_reg_rtx (V16QImode);
@@ -40132,6 +40188,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
 	  return expand_vec_perm_even_odd_1 (&d_copy, odd);
 	}
 
+      if (d->testing_p)
+	break;
+
       t1 = gen_reg_rtx (V4DImode);
       t2 = gen_reg_rtx (V4DImode);
 
@@ -40158,6 +40217,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
 	  return expand_vec_perm_even_odd_1 (&d_copy, odd);
 	}
 
+      if (d->testing_p)
+	break;
+
       t1 = gen_reg_rtx (V8SImode);
       t2 = gen_reg_rtx (V8SImode);
 
@@ -40250,6 +40312,8 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
     case V16QImode:
       /* These can be implemented via interleave.  We save one insn by
 	 stopping once we have promoted to V4SImode and then use pshufd.  */
+      if (d->testing_p)
+	return true;
       do
 	{
 	  rtx dest;
@@ -42487,8 +42551,6 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
 
 #undef TARGET_PROMOTE_PROTOTYPES
 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
-#undef TARGET_STRUCT_VALUE_RTX
-#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
 #undef TARGET_SETUP_INCOMING_VARARGS
 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
 #undef TARGET_MUST_PASS_IN_STACK
diff --git a/gcc-4.8/gcc/config/i386/i386.h b/gcc-4.8/gcc/config/i386/i386.h
index f68aa93e0..3810be048 100644
--- a/gcc-4.8/gcc/config/i386/i386.h
+++ b/gcc-4.8/gcc/config/i386/i386.h
@@ -197,10 +197,10 @@ extern const struct processor_costs ix86_size_cost;
 
 /* Macros used in the machine description to test the flags.  */
 
-/* configure can arrange to make this 2, to force a 486.  */
+/* configure can arrange to change it.  */
 
 #ifndef TARGET_CPU_DEFAULT
-#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_generic
+#define TARGET_CPU_DEFAULT PROCESSOR_GENERIC32
 #endif
 
 #ifndef TARGET_FPMATH_DEFAULT
@@ -594,43 +594,6 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 /* Target Pragmas.  */
 #define REGISTER_TARGET_PRAGMAS() ix86_register_pragmas ()
 
-enum target_cpu_default
-{
-  TARGET_CPU_DEFAULT_generic = 0,
-
-  TARGET_CPU_DEFAULT_i386,
-  TARGET_CPU_DEFAULT_i486,
-  TARGET_CPU_DEFAULT_pentium,
-  TARGET_CPU_DEFAULT_pentium_mmx,
-  TARGET_CPU_DEFAULT_pentiumpro,
-  TARGET_CPU_DEFAULT_pentium2,
-  TARGET_CPU_DEFAULT_pentium3,
-  TARGET_CPU_DEFAULT_pentium4,
-  TARGET_CPU_DEFAULT_pentium_m,
-  TARGET_CPU_DEFAULT_prescott,
-  TARGET_CPU_DEFAULT_nocona,
-  TARGET_CPU_DEFAULT_core2,
-  TARGET_CPU_DEFAULT_corei7,
-  TARGET_CPU_DEFAULT_haswell,
-  TARGET_CPU_DEFAULT_atom,
-
-  TARGET_CPU_DEFAULT_geode,
-  TARGET_CPU_DEFAULT_k6,
-  TARGET_CPU_DEFAULT_k6_2,
-  TARGET_CPU_DEFAULT_k6_3,
-  TARGET_CPU_DEFAULT_athlon,
-  TARGET_CPU_DEFAULT_athlon_sse,
-  TARGET_CPU_DEFAULT_k8,
-  TARGET_CPU_DEFAULT_amdfam10,
-  TARGET_CPU_DEFAULT_bdver1,
-  TARGET_CPU_DEFAULT_bdver2,
-  TARGET_CPU_DEFAULT_bdver3,
-  TARGET_CPU_DEFAULT_btver1,
-  TARGET_CPU_DEFAULT_btver2,
-
-  TARGET_CPU_DEFAULT_max
-};
-
 #ifndef CC1_SPEC
 #define CC1_SPEC "%(cc1_cpu) "
 #endif
@@ -2095,32 +2058,33 @@ do {									\
    with x86-64 medium memory model */
 #define DEFAULT_LARGE_SECTION_THRESHOLD 65536
 
-/* Which processor to tune code generation for.  */
+/* Which processor to tune code generation for.  These must be in sync
+   with processor_target_table in i386.c.  */ 
 
 enum processor_type
 {
-  PROCESSOR_I386 = 0,			/* 80386 */
+  PROCESSOR_GENERIC32 = 0,
+  PROCESSOR_GENERIC64,
+  PROCESSOR_I386,			/* 80386 */
   PROCESSOR_I486,			/* 80486DX, 80486SX, 80486DX[24] */
   PROCESSOR_PENTIUM,
   PROCESSOR_PENTIUMPRO,
-  PROCESSOR_GEODE,
-  PROCESSOR_K6,
-  PROCESSOR_ATHLON,
   PROCESSOR_PENTIUM4,
-  PROCESSOR_K8,
   PROCESSOR_NOCONA,
   PROCESSOR_CORE2,
   PROCESSOR_COREI7,
   PROCESSOR_HASWELL,
-  PROCESSOR_GENERIC32,
-  PROCESSOR_GENERIC64,
+  PROCESSOR_ATOM,
+  PROCESSOR_GEODE,
+  PROCESSOR_K6,
+  PROCESSOR_ATHLON,
+  PROCESSOR_K8,
   PROCESSOR_AMDFAM10,
   PROCESSOR_BDVER1,
   PROCESSOR_BDVER2,
   PROCESSOR_BDVER3,
   PROCESSOR_BTVER1,
   PROCESSOR_BTVER2,
-  PROCESSOR_ATOM,
   PROCESSOR_max
 };
 
diff --git a/gcc-4.8/gcc/config/i386/i386.md b/gcc-4.8/gcc/config/i386/i386.md
index 576a19cf5..3b5bf884d 100644
--- a/gcc-4.8/gcc/config/i386/i386.md
+++ b/gcc-4.8/gcc/config/i386/i386.md
@@ -363,6 +363,13 @@
 	   (const_string "unknown")]
 	 (const_string "integer")))
 
+;; The minimum required alignment of vector mode memory operands of the SSE
+;; (non-VEX/EVEX) instruction in bits, if it is different from
+;; GET_MODE_ALIGNMENT of the operand, otherwise 0.  If an instruction has
+;; multiple alternatives, this should be conservative maximum of those minimum
+;; required alignments.
+(define_attr "ssememalign" "" (const_int 0))
+
 ;; The (bounding maximum) length of an instruction immediate.
 (define_attr "length_immediate" ""
   (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
@@ -1570,7 +1577,7 @@
   split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
 
   operands[1] = gen_lowpart (DImode, operands[2]);
-  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
 						   GEN_INT (4)));
 })
 
@@ -1587,7 +1594,7 @@
   split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]);
 
   operands[1] = gen_lowpart (DImode, operands[2]);
-  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
 						   GEN_INT (4)));
 })
 
@@ -2715,7 +2722,20 @@
   "reload_completed"
   [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
    (set (mem:SF (reg:P SP_REG)) (match_dup 1))]
-  "operands[2] = GEN_INT (-GET_MODE_SIZE (<P:MODE>mode));")
+{
+  rtx op = XEXP (operands[0], 0);
+  if (GET_CODE (op) == PRE_DEC)
+    {
+      gcc_assert (!TARGET_64BIT);
+      op = GEN_INT (-4);
+    }
+  else
+    {
+      op = XEXP (XEXP (op, 1), 1);
+      gcc_assert (CONST_INT_P (op));
+    }
+  operands[2] = op;
+})
 
 (define_split
   [(set (match_operand:SF 0 "push_operand")
@@ -5443,6 +5463,12 @@
     mode = SImode;
 
   ix86_split_lea_for_addr (curr_insn, operands, mode);
+
+  /* Zero-extend return register to DImode for zero-extended addresses.  */
+  if (mode != <MODE>mode)
+    emit_insn (gen_zero_extendsidi2
+    	       (operands[0], gen_lowpart (mode, operands[0])));
+
   DONE;
 }
   [(set_attr "type" "lea")
@@ -6589,7 +6615,7 @@
    (set_attr "use_carry" "1")
    (set_attr "mode" "<MODE>")])
 
-;; Overflow setting add and subtract instructions
+;; Overflow setting add instructions
 
 (define_insn "*add<mode>3_cconly_overflow"
   [(set (reg:CCC FLAGS_REG)
@@ -6604,43 +6630,31 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*sub<mode>3_cconly_overflow"
-  [(set (reg:CCC FLAGS_REG)
-	(compare:CCC
-	  (minus:SWI
-	    (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
-	    (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
-	  (match_dup 0)))]
-  ""
-  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "icmp")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<plusminus_insn><mode>3_cc_overflow"
+(define_insn "*add<mode>3_cc_overflow"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
-	    (plusminus:SWI
-		(match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+	    (plus:SWI
+		(match_operand:SWI 1 "nonimmediate_operand" "%0,0")
 		(match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
 	    (match_dup 1)))
    (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
-	(plusminus:SWI (match_dup 1) (match_dup 2)))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<plusminus_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+	(plus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<plusminus_insn>si3_zext_cc_overflow"
+(define_insn "*addsi3_zext_cc_overflow"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
-	  (plusminus:SI
-	    (match_operand:SI 1 "nonimmediate_operand" "<comm>0")
+	  (plus:SI
+	    (match_operand:SI 1 "nonimmediate_operand" "%0")
 	    (match_operand:SI 2 "x86_64_general_operand" "rme"))
 	  (match_dup 1)))
    (set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<plusminus_mnemonic>{l}\t{%2, %k0|%k0, %2}"
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+  "add{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
@@ -8012,7 +8026,18 @@
 	 (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm")
 	(and:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+  "TARGET_64BIT
+   && ix86_match_ccmode
+	(insn,
+	 /* If we are going to emit andl instead of andq, and the operands[2]
+	    constant might have the SImode sign bit set, make sure the sign
+	    flag isn't tested, because the instruction will set the sign flag
+	    based on bit 31 rather than bit 63.  If it isn't CONST_INT,
+	    conservatively assume it might have bit 31 set.  */
+	 (satisfies_constraint_Z (operands[2])
+	  && (!CONST_INT_P (operands[2])
+	      || val_signbit_known_set_p (SImode, INTVAL (operands[2]))))
+	 ? CCZmode : CCNOmode)
    && ix86_binary_operator_ok (AND, DImode, operands)"
   "@
    and{l}\t{%k2, %k0|%k0, %k2}
@@ -17569,7 +17594,13 @@
 (define_insn "trap"
   [(trap_if (const_int 1) (const_int 6))]
   ""
-  { return ASM_SHORT "0x0b0f"; }
+{
+#ifdef HAVE_AS_IX86_UD2
+  return "ud2";
+#else
+  return ASM_SHORT "0x0b0f";
+#endif
+}
   [(set_attr "length" "2")])
 
 (define_expand "prefetch"
diff --git a/gcc-4.8/gcc/config/i386/sse.md b/gcc-4.8/gcc/config/i386/sse.md
index 05cf7f9a0..8b44e1d9e 100644
--- a/gcc-4.8/gcc/config/i386/sse.md
+++ b/gcc-4.8/gcc/config/i386/sse.md
@@ -605,6 +605,7 @@
 }
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
 	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
@@ -634,6 +635,7 @@
 }
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
 	(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
@@ -663,6 +665,7 @@
 }
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
    (set (attr "prefix_data16")
      (if_then_else
        (match_test "TARGET_AVX")
@@ -696,6 +699,7 @@
 }
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
    (set (attr "prefix_data16")
      (if_then_else
        (match_test "TARGET_AVX")
@@ -721,6 +725,7 @@
   "%vlddqu\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
+   (set_attr "ssememalign" "8")
    (set (attr "prefix_data16")
      (if_then_else
        (match_test "TARGET_AVX")
@@ -1001,6 +1006,7 @@
    vrcpss\t{%1, %2, %0|%0, %2, %1}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse")
+   (set_attr "ssememalign" "32")
    (set_attr "atom_sse_attr" "rcp")
    (set_attr "btver2_sse_attr" "rcp")
    (set_attr "prefix" "orig,vex")
@@ -1089,6 +1095,7 @@
    vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse")
+   (set_attr "ssememalign" "32")
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "SF")])
 
@@ -2844,6 +2851,7 @@
   "%vcvtdq2pd\t{%1, %0|%0, %q1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_vex")
+   (set_attr "ssememalign" "64")
    (set_attr "mode" "V2DF")])
 
 (define_insn "avx_cvtpd2dq256"
@@ -3572,6 +3580,7 @@
    %vmovhps\t{%2, %0|%0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
    (set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
@@ -3617,6 +3626,7 @@
    %vmovlps\t{%2, %H0|%H0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
    (set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
@@ -3941,6 +3951,7 @@
    %vmovhlps\t{%1, %d0|%d0, %1}
    %vmovlps\t{%H1, %d0|%d0, %H1}"
   [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2SF,V4SF,V2SF")])
 
@@ -3980,6 +3991,7 @@
    %vmovlps\t{%2, %H0|%H0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
    (set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
    (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
 
@@ -4033,6 +4045,7 @@
    %vmovlps\t{%2, %0|%0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
    (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "length_immediate" "1,1,*,*,*")
    (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
@@ -4643,6 +4656,7 @@
    %vmovhpd\t{%1, %0|%0, %1}"
   [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix_data16" "*,*,*,1,*,1")
    (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
    (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
@@ -4744,6 +4758,7 @@
    %vmovlpd\t{%2, %H0|%H0, %2}"
   [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
    (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix_data16" "*,*,*,1,*,1")
    (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
    (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
@@ -4982,6 +4997,7 @@
    movhlps\t{%1, %0|%0, %1}
    movlps\t{%H1, %0|%0, %H1}"
   [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "mode" "V2SF,V4SF,V2SF")])
 
 ;; Avoid combining registers from different units in a single alternative,
@@ -5077,6 +5093,7 @@
    #"
   [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
    (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix_data16" "1,*,*,*,*,*,*")
    (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
    (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
@@ -5145,6 +5162,7 @@
 	      (const_string "imov")
 	   ]
 	   (const_string "ssemov")))
+   (set_attr "ssememalign" "64")
    (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
    (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
    (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
@@ -5189,6 +5207,7 @@
        (const_string "1")
        (const_string "*")))
    (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
    (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
 
@@ -8736,6 +8755,7 @@
   "TARGET_SSE4_1"
   "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
   [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -8766,6 +8786,7 @@
   "TARGET_SSE4_1"
   "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
   [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "32")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -8791,6 +8812,7 @@
   "TARGET_SSE4_1"
   "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
   [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -8818,6 +8840,7 @@
   "TARGET_SSE4_1"
   "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
   [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "16")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -8845,6 +8868,7 @@
   "TARGET_SSE4_1"
   "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
   [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "32")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -8868,6 +8892,7 @@
   "TARGET_SSE4_1"
   "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
   [(set_attr "type" "ssemov")
+   (set_attr "ssememalign" "64")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
@@ -9151,6 +9176,7 @@
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,load")
    (set_attr "mode" "TI")])
@@ -9213,6 +9239,7 @@
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "load")
    (set_attr "mode" "TI")])
@@ -9240,6 +9267,7 @@
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "btver2_decode" "vector")
    (set_attr "memory" "none,load")
@@ -9267,6 +9295,7 @@
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "btver2_decode" "vector")
@@ -9293,6 +9322,7 @@
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,load,none,load")
    (set_attr "btver2_decode" "vector,vector,vector,vector") 
@@ -9346,6 +9376,7 @@
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,load")
    (set_attr "mode" "TI")])
@@ -9399,6 +9430,7 @@
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "load")
    (set_attr "mode" "TI")])
@@ -9421,6 +9453,7 @@
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "memory" "none,load")
@@ -9445,6 +9478,7 @@
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "maybe_vex")
    (set_attr "memory" "none,load")
@@ -9469,6 +9503,7 @@
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "ssememalign" "8")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,load,none,load")
    (set_attr "prefix" "maybe_vex")
@@ -10213,21 +10248,18 @@
   [(set_attr "type" "ssecvt1")
    (set_attr "mode" "<MODE>")])
 
-;; scalar insns
 (define_expand "xop_vmfrcz<mode>2"
   [(set (match_operand:VF_128 0 "register_operand")
 	(vec_merge:VF_128
 	  (unspec:VF_128
 	   [(match_operand:VF_128 1 "nonimmediate_operand")]
 	   UNSPEC_FRCZ)
-	  (match_dup 3)
+	  (match_dup 2)
 	  (const_int 1)))]
   "TARGET_XOP"
-{
-  operands[3] = CONST0_RTX (<MODE>mode);
-})
+  "operands[2] = CONST0_RTX (<MODE>mode);")
 
-(define_insn "*xop_vmfrcz_<mode>"
+(define_insn "*xop_vmfrcz<mode>2"
   [(set (match_operand:VF_128 0 "register_operand" "=x")
 	(vec_merge:VF_128
 	  (unspec:VF_128
diff --git a/gcc-4.8/gcc/config/i386/t-rtems b/gcc-4.8/gcc/config/i386/t-rtems
index 6161ec100..fef4c22e9 100644
--- a/gcc-4.8/gcc/config/i386/t-rtems
+++ b/gcc-4.8/gcc/config/i386/t-rtems
@@ -17,11 +17,10 @@
 # <http://www.gnu.org/licenses/>.
 #
 
-MULTILIB_OPTIONS = mtune=i486/mtune=pentium/mtune=pentiumpro \
-msoft-float
+MULTILIB_OPTIONS = mtune=i486/mtune=pentium/mtune=pentiumpro msoft-float
 MULTILIB_DIRNAMES= m486 mpentium mpentiumpro soft-float
-MULTILIB_MATCHES = msoft-float=mno-m80387
-MULTILIB_MATCHES += mtune?pentium=mtune?k6 mtune?pentiumpro=mtune?mathlon
+MULTILIB_MATCHES = msoft-float=mno-80387
+MULTILIB_MATCHES += mtune?pentium=mtune?k6 mtune?pentiumpro=mtune?athlon
 MULTILIB_EXCEPTIONS = \
 mtune=pentium/*msoft-float* \
 mtune=pentiumpro/*msoft-float*
diff --git a/gcc-4.8/gcc/config/i386/winnt.c b/gcc-4.8/gcc/config/i386/winnt.c
index 7e7c1555a..1e49a461a 100644
--- a/gcc-4.8/gcc/config/i386/winnt.c
+++ b/gcc-4.8/gcc/config/i386/winnt.c
@@ -547,8 +547,9 @@ i386_pe_asm_named_section (const char *name, unsigned int flags,
 	 sets 'discard' characteristic, rather than telling linker
 	 to warn of size or content mismatch, so do the same.  */ 
       bool discard = (flags & SECTION_CODE)
-		      || lookup_attribute ("selectany",
-					   DECL_ATTRIBUTES (decl));	 
+		      || (TREE_CODE (decl) != IDENTIFIER_NODE
+			  && lookup_attribute ("selectany",
+					       DECL_ATTRIBUTES (decl)));
       fprintf (asm_out_file, "\t.linkonce %s\n",
 	       (discard  ? "discard" : "same_size"));
     }
diff --git a/gcc-4.8/gcc/config/i386/xopintrin.h b/gcc-4.8/gcc/config/i386/xopintrin.h
index 66b0f0de5..d2a99a167 100644
--- a/gcc-4.8/gcc/config/i386/xopintrin.h
+++ b/gcc-4.8/gcc/config/i386/xopintrin.h
@@ -745,13 +745,17 @@ _mm_frcz_pd (__m128d __A)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_frcz_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B);
+  return (__m128) __builtin_ia32_movss ((__v4sf)__A,
+					(__v4sf)
+					__builtin_ia32_vfrczss ((__v4sf)__B));
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_frcz_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B);
+  return (__m128d) __builtin_ia32_movsd ((__v2df)__A,
+					 (__v2df)
+					 __builtin_ia32_vfrczsd ((__v2df)__B));
 }
 
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc-4.8/gcc/config/m68k/m68k.c b/gcc-4.8/gcc/config/m68k/m68k.c
index 5e3236fa8..a662bed81 100644
--- a/gcc-4.8/gcc/config/m68k/m68k.c
+++ b/gcc-4.8/gcc/config/m68k/m68k.c
@@ -3325,12 +3325,12 @@ handle_move_double (rtx operands[2],
 	latehalf[1] = adjust_address (operands[1], SImode, 0);
     }
 
-  /* If insn is effectively movd N(sp),-(sp) then we will do the
-     high word first.  We should use the adjusted operand 1 (which is N+4(sp))
-     for the low word as well, to compensate for the first decrement of sp.  */
+  /* If insn is effectively movd N(REG),-(REG) then we will do the high
+     word first.  We should use the adjusted operand 1 (which is N+4(REG))
+     for the low word as well, to compensate for the first decrement of
+     REG.  */
   if (optype0 == PUSHOP
-      && REGNO (XEXP (XEXP (operands[0], 0), 0)) == STACK_POINTER_REGNUM
-      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
+      && reg_overlap_mentioned_p (XEXP (XEXP (operands[0], 0), 0), operands[1]))
     operands[1] = middlehalf[1] = latehalf[1];
 
   /* For (set (reg:DI N) (mem:DI ... (reg:SI N) ...)),
diff --git a/gcc-4.8/gcc/config/microblaze/microblaze.c b/gcc-4.8/gcc/config/microblaze/microblaze.c
index c121c2bae..914246bd5 100644
--- a/gcc-4.8/gcc/config/microblaze/microblaze.c
+++ b/gcc-4.8/gcc/config/microblaze/microblaze.c
@@ -1609,21 +1609,28 @@ static int
 microblaze_version_to_int (const char *version)
 {
   const char *p, *v;
-  const char *tmpl = "vX.YY.Z";
+  const char *tmpl = "vXX.YY.Z";
   int iver = 0;
 
   p = version;
   v = tmpl;
 
-  while (*v)
+  while (*p)
     {
       if (*v == 'X')
 	{			/* Looking for major  */
+          if (*p == '.')
+            {
+              *v++;
+            }
+          else
+            {
 	  if (!(*p >= '0' && *p <= '9'))
 	    return -1;
 	  iver += (int) (*p - '0');
 	  iver *= 10;
 	}
+        }
       else if (*v == 'Y')
 	{			/* Looking for minor  */
 	  if (!(*p >= '0' && *p <= '9'))
@@ -3064,6 +3071,73 @@ expand_pic_symbol_ref (enum machine_mode mode ATTRIBUTE_UNUSED, rtx op)
   return result;
 }
 
+static void
+microblaze_asm_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+        HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+        tree function)
+{
+  rtx this_rtx, insn, funexp;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  If the function returns a structure,
+     the structure return pointer is in MB_ABI_FIRST_ARG_REGNUM.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, (MB_ABI_FIRST_ARG_REGNUM + 1));
+  else
+    this_rtx = gen_rtx_REG (Pmode, MB_ABI_FIRST_ARG_REGNUM);
+
+  /* Apply the constant offset, if required.  */
+  if (delta)
+    emit_insn (gen_addsi3 (this_rtx, this_rtx, GEN_INT (delta)));
+
+  /* Apply the offset from the vtable, if required.  */
+  if (vcall_offset)
+  {
+    rtx vcall_offset_rtx = GEN_INT (vcall_offset);
+    rtx temp1 = gen_rtx_REG (Pmode, MB_ABI_TEMP1_REGNUM);
+
+    emit_move_insn (temp1, gen_rtx_MEM (Pmode, this_rtx));
+
+    rtx loc = gen_rtx_PLUS (Pmode, temp1, vcall_offset_rtx);
+    emit_move_insn (temp1, gen_rtx_MEM (Pmode, loc));
+
+    emit_insn (gen_addsi3 (this_rtx, this_rtx, temp1));
+  }
+
+  /* Generate a tail call to the target function.  */
+  if (!TREE_USED (function))
+  {
+    assemble_external (function);
+    TREE_USED (function) = 1;
+  }
+
+  funexp = XEXP (DECL_RTL (function), 0);
+  rtx temp2 = gen_rtx_REG (Pmode, MB_ABI_TEMP2_REGNUM);
+
+  if (flag_pic)
+    emit_move_insn (temp2, expand_pic_symbol_ref (Pmode, funexp));
+  else
+    emit_move_insn (temp2, funexp);
+
+  emit_insn (gen_indirect_jump (temp2));
+
+  /* Run just enough of rest_of_compilation.  This sequence was
+     "borrowed" from rs6000.c.  */
+  insn = get_insns ();
+  shorten_branches (insn);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
 bool
 microblaze_expand_move (enum machine_mode mode, rtx operands[])
 {
@@ -3234,65 +3308,45 @@ microblaze_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
   emit_move_insn (mem, fnaddr);
 }
 
-/* Emit instruction to perform compare.  
-   cmp is (compare_op op0 op1).  */
-static rtx
-microblaze_emit_compare (enum machine_mode mode, rtx cmp, enum rtx_code *cmp_code)
+/* Generate conditional branch -- first, generate test condition,
+   second, generate correct branch instruction.  */
+
+void
+microblaze_expand_conditional_branch (enum machine_mode mode, rtx operands[])
 {
-  rtx cmp_op0 = XEXP (cmp, 0);
-  rtx cmp_op1 = XEXP (cmp, 1);
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx cmp_op0 = operands[1];
+  rtx cmp_op1 = operands[2];
+  rtx label1 = operands[3];
   rtx comp_reg = gen_reg_rtx (SImode);
-  enum rtx_code code = *cmp_code;
+  rtx condition;
   
   gcc_assert ((GET_CODE (cmp_op0) == REG) || (GET_CODE (cmp_op0) == SUBREG));
 
   /* If comparing against zero, just test source reg.  */
   if (cmp_op1 == const0_rtx) 
-    return cmp_op0;
+    {
+      comp_reg = cmp_op0;
+      condition = gen_rtx_fmt_ee (signed_condition (code), SImode, comp_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (condition, label1));
+    }
 
-  if (code == EQ || code == NE)
+  else if (code == EQ || code == NE)
     {
       /* Use xor for equal/not-equal comparison.  */
       emit_insn (gen_xorsi3 (comp_reg, cmp_op0, cmp_op1));
+      condition = gen_rtx_fmt_ee (signed_condition (code), SImode, comp_reg, const0_rtx);
+      emit_jump_insn (gen_condjump (condition, label1));
     }
-  else if (code == GT || code == GTU || code == LE || code == LEU)
-    {
-      /* MicroBlaze compare is not symmetrical.  */
-      /* Swap argument order.  */
-      cmp_op1 = force_reg (mode, cmp_op1);
-      if (code == GT || code == LE) 
-        emit_insn (gen_signed_compare (comp_reg, cmp_op0, cmp_op1));
       else
-        emit_insn (gen_unsigned_compare (comp_reg, cmp_op0, cmp_op1));
-      /* Translate test condition.  */
-      *cmp_code = swap_condition (code);
-    }
-  else /* if (code == GE || code == GEU || code == LT || code == LTU) */
     {
+      /* Generate compare and branch in single instruction. */
       cmp_op1 = force_reg (mode, cmp_op1);
-      if (code == GE || code == LT) 
-        emit_insn (gen_signed_compare (comp_reg, cmp_op1, cmp_op0));
-      else
-        emit_insn (gen_unsigned_compare (comp_reg, cmp_op1, cmp_op0));
+      condition = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+      emit_jump_insn (gen_branch_compare(condition, cmp_op0, cmp_op1, label1));
     }
-
-  return comp_reg;
 }
 
-/* Generate conditional branch -- first, generate test condition,
-   second, generate correct branch instruction.  */
-
-void
-microblaze_expand_conditional_branch (enum machine_mode mode, rtx operands[])
-{
-  enum rtx_code code = GET_CODE (operands[0]);
-  rtx comp;
-  rtx condition;
-
-  comp = microblaze_emit_compare (mode, operands[0], &code);
-  condition = gen_rtx_fmt_ee (signed_condition (code), SImode, comp, const0_rtx);
-  emit_jump_insn (gen_condjump (condition, operands[3]));
-}
 
 void
 microblaze_expand_conditional_branch_sf (rtx operands[])
@@ -3501,6 +3555,12 @@ microblaze_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x
 #undef TARGET_SECONDARY_RELOAD
 #define TARGET_SECONDARY_RELOAD		microblaze_secondary_reload
 
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK      microblaze_asm_output_mi_thunk
+
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK  hook_bool_const_tree_hwi_hwi_const_tree_true
+
 #undef TARGET_SCHED_ADJUST_COST
 #define TARGET_SCHED_ADJUST_COST	microblaze_adjust_cost
 
diff --git a/gcc-4.8/gcc/config/microblaze/microblaze.h b/gcc-4.8/gcc/config/microblaze/microblaze.h
index bc4d9a128..34c87c980 100644
--- a/gcc-4.8/gcc/config/microblaze/microblaze.h
+++ b/gcc-4.8/gcc/config/microblaze/microblaze.h
@@ -213,6 +213,12 @@ extern enum pipeline_type microblaze_pipe;
 #define STRICT_ALIGNMENT        1
 #define PCC_BITFIELD_TYPE_MATTERS 1
 
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
 #define CONSTANT_ALIGNMENT(EXP, ALIGN)					\
   ((TREE_CODE (EXP) == STRING_CST  || TREE_CODE (EXP) == CONSTRUCTOR)	\
    && (ALIGN) < BITS_PER_WORD						\
@@ -407,9 +413,6 @@ extern enum reg_class microblaze_regno_to_class[];
 	  || GET_MODE (X) == VOIDmode)					\
 	 ? (GR_REGS) : (CLASS))))
 
-#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE)			\
-  (GET_MODE_CLASS (MODE) == MODE_INT)
-
 /* Stack layout; function entry, exit and calling.  */
 
 #define STACK_GROWS_DOWNWARD
diff --git a/gcc-4.8/gcc/config/microblaze/microblaze.md b/gcc-4.8/gcc/config/microblaze/microblaze.md
index 3618cad52..f1913b3c1 100644
--- a/gcc-4.8/gcc/config/microblaze/microblaze.md
+++ b/gcc-4.8/gcc/config/microblaze/microblaze.md
@@ -74,7 +74,7 @@
 ;; bshift 	Shift operations
 
 (define_attr "type"
-  "unknown,branch,jump,call,load,store,move,arith,darith,imul,idiv,icmp,multi,nop,no_delay_arith,no_delay_load,no_delay_store,no_delay_imul,no_delay_move,bshift,fadd,frsub,fmul,fdiv,fcmp,fsl,fsqrt,fcvt"
+  "unknown,branch,jump,call,load,store,move,arith,darith,imul,idiv,icmp,multi,nop,no_delay_arith,no_delay_load,no_delay_store,no_delay_imul,no_delay_move,bshift,fadd,frsub,fmul,fdiv,fcmp,fsl,fsqrt,fcvt,trap"
   (const_string "unknown"))
 
 ;; Main data type used by the insn
@@ -365,7 +365,8 @@
   [(set (match_operand:HI 0 "register_operand" "=r")
         (bswap:HI (match_operand:HI 1 "register_operand" "r")))]
   "TARGET_REORDER"
-  "swaph %0, %1"
+  "swapb %0, %1
+   swaph %0, %0"
 )
 
 ;;----------------------------------------------------------------
@@ -1118,6 +1119,18 @@
   }
 )
 
+;;Load and store reverse
+(define_insn "movsi4_rev"
+  [(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Q")
+        (bswap:SI (match_operand:SF 1 "reg_or_mem_operand" "Q,r")))]
+  "TARGET_REORDER"
+  "@
+   lwr\t%0,%y1,r0
+   swr\t%1,%y0,r0"
+  [(set_attr "type"     "load,store")
+  (set_attr "mode"      "SI")
+  (set_attr "length"    "4,4")])
+
 ;; 32-bit floating point moves
 
 (define_expand "movsf"
@@ -1472,7 +1485,7 @@
   (set_attr "length"   "124")]
 )
 
-(define_insn "*ashlri_reg"
+(define_insn "*ashrsi_reg"
   [(set (match_operand:SI 0 "register_operand" "=&d")
        (ashiftrt:SI (match_operand:SI 1 "register_operand"  "d")
                    (match_operand:SI 2 "register_operand" "d")))]
@@ -1561,7 +1574,7 @@
   (set_attr "length"   "124")]
 )
 
-(define_insn "*lshlri_reg"
+(define_insn "*lshrsi_reg"
   [(set (match_operand:SI 0 "register_operand" "=&d")
        (lshiftrt:SI (match_operand:SI 1 "register_operand"  "d")
                    (match_operand:SI 2 "register_operand" "d")))]
@@ -1622,34 +1635,12 @@
   (set_attr "length"	"4")]
 )              
 
-(define_insn "signed_compare"
-  [(set (match_operand:SI 0 "register_operand" "=d")
-	(unspec
-		[(match_operand:SI 1 "register_operand" "d")
-		 (match_operand:SI 2 "register_operand" "d")] UNSPEC_CMP))]
-  ""
-  "cmp\t%0,%1,%2"
-  [(set_attr "type"	"arith")
-  (set_attr "mode"	"SI")
-  (set_attr "length"	"4")])
-
-(define_insn "unsigned_compare"
-  [(set (match_operand:SI 0 "register_operand" "=d")
-	(unspec 
-		[(match_operand:SI 1 "register_operand" "d")
-		 (match_operand:SI 2 "register_operand" "d")] UNSPEC_CMPU))]
-  ""
-  "cmpu\t%0,%1,%2"
-  [(set_attr "type"	"arith")
-  (set_attr "mode"	"SI")
-  (set_attr "length"	"4")])
-
 ;;----------------------------------------------------------------
 ;; Setting a register from an floating point comparison. 
 ;;----------------------------------------------------------------
 (define_insn "cstoresf4"
    [(set (match_operand:SI 0 "register_operand" "=r")
-        (match_operator 1 "comparison_operator"
+        (match_operator:SI 1 "ordered_comparison_operator"
 	      [(match_operand:SF 2 "register_operand" "r")
 	       (match_operand:SF 3 "register_operand" "r")]))]
   "TARGET_HARD_FLOAT"
@@ -1678,7 +1669,7 @@
 
 (define_expand "cbranchsf4"
   [(set (pc)
-	(if_then_else (match_operator 0 "comparison_operator"
+	(if_then_else (match_operator 0 "ordered_comparison_operator"
 		       [(match_operand:SF 1 "register_operand")
 		        (match_operand:SF 2 "register_operand")])
 		      (label_ref (match_operand 3 ""))
@@ -1717,6 +1708,47 @@
    (set_attr "length"	"4")]
 )
 
+(define_insn "branch_compare"
+  [(set (pc)
+        (if_then_else (match_operator:SI 0 "cmp_op"
+                                         [(match_operand:SI 1 "register_operand" "d")
+                                          (match_operand:SI 2 "register_operand" "d")
+                                         ])
+                      (label_ref (match_operand 3))
+                      (pc)))
+  (clobber(reg:SI R_TMP))]
+  ""
+  {
+    operands[4] = gen_rtx_REG (SImode, MB_ABI_ASM_TEMP_REGNUM);
+    enum rtx_code code = GET_CODE (operands[0]);
+
+    if (code == GT || code == LE)
+      {
+        output_asm_insn ("cmp\tr18,%z1,%z2", operands);
+        code = swap_condition (code);
+      }
+    else if (code == GTU || code == LEU)
+      {
+        output_asm_insn ("cmpu\tr18,%z1,%z2", operands);
+        code = swap_condition (code);
+      }
+    else if (code == GE || code == LT)
+      {
+        output_asm_insn ("cmp\tr18,%z2,%z1", operands);
+      }
+    else if (code == GEU || code == LTU)
+      {
+        output_asm_insn ("cmpu\tr18,%z2,%z1", operands);
+      }
+
+    operands[0] = gen_rtx_fmt_ee (signed_condition (code), SImode, operands[4], const0_rtx);
+    return "b%C0i%?\tr18,%3";
+  }
+  [(set_attr "type"     "branch")
+   (set_attr "mode"     "none")
+   (set_attr "length"   "12")]
+)
+
 ;;----------------------------------------------------------------
 ;; Unconditional branches
 ;;----------------------------------------------------------------
@@ -2201,6 +2233,14 @@
   (set_attr "mode"	"none")
   (set_attr "length"	"4")])
 
+;; Trap instruction pattern for __builtin_trap. Same as the glibc ABORT_INSTRUCTION
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "brki\tr0,-1"
+ [(set_attr "type" "trap")]
+)
+
 ;; The insn to set GOT. The hardcoded number "8" accounts for $pc difference
 ;; between "mfs" and "addik" instructions.
 (define_insn "set_got"
diff --git a/gcc-4.8/gcc/config/microblaze/predicates.md b/gcc-4.8/gcc/config/microblaze/predicates.md
index 5fd1bd403..905d46a9d 100644
--- a/gcc-4.8/gcc/config/microblaze/predicates.md
+++ b/gcc-4.8/gcc/config/microblaze/predicates.md
@@ -85,6 +85,10 @@
   (ior (match_operand 0 "const_0_operand")
        (match_operand 0 "register_operand")))
 
+(define_predicate "reg_or_mem_operand"
+  (ior (match_operand 0 "memory_operand")
+       (match_operand 0 "register_operand")))
+
 ;;  Return if the operand is either the PC or a label_ref.  
 (define_special_predicate "pc_or_label_operand"
   (ior (match_code "pc,label_ref")
@@ -119,3 +123,7 @@
 ;; Test for valid PIC call operand
 (define_predicate "call_insn_plt_operand"
   (match_test "PLT_ADDR_P (op)"))
+
+;; Return if the code of this rtx pattern is a comparison.
+(define_predicate "cmp_op"
+  (match_code "gt,ge,gtu,geu,lt,le,ltu,leu"))
diff --git a/gcc-4.8/gcc/config/mips/driver-native.c b/gcc-4.8/gcc/config/mips/driver-native.c
index 8b7ed5b55..758efa9a4 100644
--- a/gcc-4.8/gcc/config/mips/driver-native.c
+++ b/gcc-4.8/gcc/config/mips/driver-native.c
@@ -58,11 +58,17 @@ host_detect_local_cpu (int argc, const char **argv)
     if (strncmp (buf, "cpu model", sizeof ("cpu model") - 1) == 0)
       {
 	if (strstr (buf, "Godson2 V0.2") != NULL
-	    || strstr (buf, "Loongson-2 V0.2") != NULL)
+	    || strstr (buf, "Loongson-2 V0.2") != NULL
+	    || strstr (buf, "Loongson-2E") != NULL)
 	  cpu = "loongson2e";
 	else if (strstr (buf, "Godson2 V0.3") != NULL
-		 || strstr (buf, "Loongson-2 V0.3") != NULL)
+		 || strstr (buf, "Loongson-2 V0.3") != NULL
+		 || strstr (buf, "Loongson-2F") != NULL)
 	  cpu = "loongson2f";
+	else if (strstr (buf, "Godson3 V0.5") != NULL
+		 || strstr (buf, "Loongson-3 V0.5") != NULL
+		 || strstr (buf, "Loongson-3A") != NULL)
+	  cpu = "loongson3a";
 	else if (strstr (buf, "SiByte SB1") != NULL)
 	  cpu = "sb1";
 	else if (strstr (buf, "R5000") != NULL)
diff --git a/gcc-4.8/gcc/config/mips/mips.c b/gcc-4.8/gcc/config/mips/mips.c
index 252e82848..41c5d6b6b 100644
--- a/gcc-4.8/gcc/config/mips/mips.c
+++ b/gcc-4.8/gcc/config/mips/mips.c
@@ -3560,17 +3560,6 @@ mips_set_reg_reg_cost (enum machine_mode mode)
     }
 }
 
-/* Return the cost of an operand X that can be trucated for free.
-   SPEED says whether we're optimizing for size or speed.  */
-
-static int
-mips_truncated_op_cost (rtx x, bool speed)
-{
-  if (GET_CODE (x) == TRUNCATE)
-    x = XEXP (x, 0);
-  return set_src_cost (x, speed);
-}
-
 /* Implement TARGET_RTX_COSTS.  */
 
 static bool
@@ -3951,13 +3940,12 @@ mips_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
     case ZERO_EXTEND:
       if (outer_code == SET
 	  && ISA_HAS_BADDU
+	  && (GET_CODE (XEXP (x, 0)) == TRUNCATE
+	      || GET_CODE (XEXP (x, 0)) == SUBREG)
 	  && GET_MODE (XEXP (x, 0)) == QImode
-	  && GET_CODE (XEXP (x, 0)) == PLUS)
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
 	{
-	  rtx plus = XEXP (x, 0);
-	  *total = (COSTS_N_INSNS (1)
-		    + mips_truncated_op_cost (XEXP (plus, 0), speed)
-		    + mips_truncated_op_cost (XEXP (plus, 1), speed));
+	  *total = set_src_cost (XEXP (XEXP (x, 0), 0), speed);
 	  return true;
 	}
       *total = mips_zero_extend_cost (mode, XEXP (x, 0));
@@ -8057,7 +8045,7 @@ mips_print_operand (FILE *file, rtx op, int letter)
     case 't':
       {
 	int truth = (code == NE) == (letter == 'T');
-	fputc ("zfnt"[truth * 2 + (GET_MODE (op) == CCmode)], file);
+	fputc ("zfnt"[truth * 2 + ST_REG_P (REGNO (XEXP (op, 0)))], file);
       }
       break;
 
diff --git a/gcc-4.8/gcc/config/mips/mips.h b/gcc-4.8/gcc/config/mips/mips.h
index 819571c1a..183d96d0a 100644
--- a/gcc-4.8/gcc/config/mips/mips.h
+++ b/gcc-4.8/gcc/config/mips/mips.h
@@ -946,6 +946,11 @@ struct mips_cpu_info {
 				  || TARGET_SMARTMIPS)			\
 				 && !TARGET_MIPS16)
 
+/* ISA has the WSBH (word swap bytes within halfwords) instruction.
+   64-bit targets also provide DSBH and DSHD.  */
+#define ISA_HAS_WSBH		((ISA_MIPS32R2 || ISA_MIPS64R2)		\
+				 && !TARGET_MIPS16)
+
 /* ISA has data prefetch instructions.  This controls use of 'pref'.  */
 #define ISA_HAS_PREFETCH	((ISA_MIPS4				\
 				  || TARGET_LOONGSON_2EF		\
diff --git a/gcc-4.8/gcc/config/mips/mips.md b/gcc-4.8/gcc/config/mips/mips.md
index ddc4e7c3a..f1ff03a4d 100644
--- a/gcc-4.8/gcc/config/mips/mips.md
+++ b/gcc-4.8/gcc/config/mips/mips.md
@@ -73,6 +73,11 @@
   UNSPEC_STORE_LEFT
   UNSPEC_STORE_RIGHT
 
+  ;; Integer operations that are too cumbersome to describe directly.
+  UNSPEC_WSBH
+  UNSPEC_DSBH
+  UNSPEC_DSHD
+
   ;; Floating-point moves.
   UNSPEC_LOAD_LOW
   UNSPEC_LOAD_HIGH
@@ -1292,20 +1297,32 @@
 
 ;; Combiner patterns for unsigned byte-add.
 
-(define_insn "*baddu_si"
+(define_insn "*baddu_si_eb"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (zero_extend:SI
+	 (subreg:QI
+	  (plus:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "register_operand" "d")) 3)))]
+  "ISA_HAS_BADDU && BYTES_BIG_ENDIAN"
+  "baddu\\t%0,%1,%2"
+  [(set_attr "alu_type" "add")])
+
+(define_insn "*baddu_si_el"
   [(set (match_operand:SI 0 "register_operand" "=d")
         (zero_extend:SI
-	 (plus:QI (match_operand:QI 1 "register_operand" "d")
-		  (match_operand:QI 2 "register_operand" "d"))))]
-  "ISA_HAS_BADDU"
+	 (subreg:QI
+	  (plus:SI (match_operand:SI 1 "register_operand" "d")
+		   (match_operand:SI 2 "register_operand" "d")) 0)))]
+  "ISA_HAS_BADDU && !BYTES_BIG_ENDIAN"
   "baddu\\t%0,%1,%2"
   [(set_attr "alu_type" "add")])
 
 (define_insn "*baddu_di<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=d")
         (zero_extend:GPR
-	 (plus:QI (truncate:QI (match_operand:DI 1 "register_operand" "d"))
-		  (truncate:QI (match_operand:DI 2 "register_operand" "d")))))]
+	 (truncate:QI
+	  (plus:DI (match_operand:DI 1 "register_operand" "d")
+		   (match_operand:DI 2 "register_operand" "d")))))]
   "ISA_HAS_BADDU && TARGET_64BIT"
   "baddu\\t%0,%1,%2"
   [(set_attr "alu_type" "add")])
@@ -5365,6 +5382,56 @@
 }
   [(set_attr "type" "shift")
    (set_attr "mode" "<MODE>")])
+
+(define_insn "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=d")
+	(bswap:HI (match_operand:HI 1 "register_operand" "d")))]
+  "ISA_HAS_WSBH"
+  "wsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn_and_split "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(bswap:SI (match_operand:SI 1 "register_operand" "d")))]
+  "ISA_HAS_WSBH && ISA_HAS_ROR"
+  "#"
+  ""
+  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_WSBH))
+   (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))]
+  ""
+  [(set_attr "length" "8")])
+
+(define_insn_and_split "bswapdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(bswap:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_64BIT && ISA_HAS_WSBH"
+  "#"
+  ""
+  [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_DSBH))
+   (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_DSHD))]
+  ""
+  [(set_attr "length" "8")])
+
+(define_insn "wsbh"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "d")] UNSPEC_WSBH))]
+  "ISA_HAS_WSBH"
+  "wsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn "dsbh"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSBH))]
+  "TARGET_64BIT && ISA_HAS_WSBH"
+  "dsbh\t%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_insn "dshd"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "d")] UNSPEC_DSHD))]
+  "TARGET_64BIT && ISA_HAS_WSBH"
+  "dshd\t%0,%1"
+  [(set_attr "type" "shift")])
 
 ;;
 ;;  ....................
diff --git a/gcc-4.8/gcc/config/pa/pa.c b/gcc-4.8/gcc/config/pa/pa.c
index 87ef8fbbc..760385834 100644
--- a/gcc-4.8/gcc/config/pa/pa.c
+++ b/gcc-4.8/gcc/config/pa/pa.c
@@ -912,9 +912,12 @@ static rtx
 legitimize_tls_address (rtx addr)
 {
   rtx ret, insn, tmp, t1, t2, tp;
-  enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
 
-  switch (model) 
+  /* Currently, we can't handle anything but a SYMBOL_REF.  */
+  if (GET_CODE (addr) != SYMBOL_REF)
+    return addr;
+
+  switch (SYMBOL_REF_TLS_MODEL (addr)) 
     {
       case TLS_MODEL_GLOBAL_DYNAMIC:
 	tmp = gen_reg_rtx (Pmode);
@@ -1035,7 +1038,7 @@ hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
       && !REG_POINTER (XEXP (x, 1)))
     return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
 
-  if (PA_SYMBOL_REF_TLS_P (x))
+  if (pa_tls_referenced_p (x))
     return legitimize_tls_address (x);
   else if (flag_pic)
     return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
@@ -1916,9 +1919,10 @@ pa_emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
      not consider them legitimate constants.  Loop optimizations can
      call the emit_move_xxx with one as a source.  */
   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
-      || function_label_operand (operand1, VOIDmode)
       || (GET_CODE (operand1) == HIGH
-	  && symbolic_operand (XEXP (operand1, 0), mode)))
+	  && symbolic_operand (XEXP (operand1, 0), mode))
+      || function_label_operand (operand1, VOIDmode)
+      || pa_tls_referenced_p (operand1))
     {
       int ishighonly = 0;
 
@@ -2625,14 +2629,14 @@ pa_output_move_double (rtx *operands)
   if (optype0 == REGOP)
     latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
   else if (optype0 == OFFSOP)
-    latehalf[0] = adjust_address (operands[0], SImode, 4);
+    latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
   else
     latehalf[0] = operands[0];
 
   if (optype1 == REGOP)
     latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
   else if (optype1 == OFFSOP)
-    latehalf[1] = adjust_address (operands[1], SImode, 4);
+    latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
   else if (optype1 == CNSTOP)
     split_double (operands[1], &operands[1], &latehalf[1]);
   else
@@ -7539,7 +7543,7 @@ pa_attr_length_millicode_call (rtx insn)
       if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
 	return 8;
 
-      if (TARGET_LONG_ABS_CALL && !flag_pic)
+      if (!flag_pic)
 	return 12;
 
       return 24;
@@ -8104,7 +8108,8 @@ pa_attr_length_indirect_call (rtx insn)
     return 12;
 
   if (TARGET_FAST_INDIRECT_CALLS
-      || (!TARGET_PORTABLE_RUNTIME
+      || (!TARGET_LONG_CALLS
+	  && !TARGET_PORTABLE_RUNTIME
 	  && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
 	      || distance < MAX_PCREL17F_OFFSET)))
     return 8;
@@ -10397,7 +10402,7 @@ pa_legitimate_constant_p (enum machine_mode mode, rtx x)
   /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
      legitimate constants.  The other variants can't be handled by
      the move patterns after reload starts.  */
-  if (PA_SYMBOL_REF_TLS_P (x))
+  if (pa_tls_referenced_p (x))
     return false;
 
   if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
@@ -10522,13 +10527,13 @@ pa_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
 
 	  /* When INT14_OK_STRICT is false, a secondary reload is needed
 	     to adjust the displacement of SImode and DImode floating point
-	     instructions.  So, we return false when STRICT is true.  We
+	     instructions but this may fail when the register also needs
+	     reloading.  So, we return false when STRICT is true.  We
 	     also reject long displacements for float mode addresses since
 	     the majority of accesses will use floating point instructions
 	     that don't support 14-bit offsets.  */
 	  if (!INT14_OK_STRICT
-	      && reload_in_progress
-	      && strict
+	      && (strict || !(reload_in_progress || reload_completed))
 	      && mode != QImode
 	      && mode != HImode)
 	    return false;
@@ -10588,8 +10593,7 @@ pa_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
 	    return true;
 
 	  if (!INT14_OK_STRICT
-	      && reload_in_progress
-	      && strict
+	      && (strict || !(reload_in_progress || reload_completed))
 	      && mode != QImode
 	      && mode != HImode)
 	    return false;
diff --git a/gcc-4.8/gcc/config/pa/pa.h b/gcc-4.8/gcc/config/pa/pa.h
index 620768fb0..16fa8c54b 100644
--- a/gcc-4.8/gcc/config/pa/pa.h
+++ b/gcc-4.8/gcc/config/pa/pa.h
@@ -784,9 +784,9 @@ extern int may_call_alloca;
 
 #define MAX_REGS_PER_ADDRESS 2
 
-/* Non-TLS symbolic references.  */
-#define PA_SYMBOL_REF_TLS_P(RTX) \
-  (GET_CODE (RTX) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (RTX) != 0)
+/* TLS symbolic reference.  */
+#define PA_SYMBOL_REF_TLS_P(X) \
+  (GET_CODE (X) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (X) != 0)
 
 /* Recognize any constant value that is a valid address except
    for symbolic addresses.  We get better CSE by rejecting them
@@ -796,7 +796,8 @@ extern int may_call_alloca;
 #define CONSTANT_ADDRESS_P(X) \
   ((GET_CODE (X) == LABEL_REF 						\
    || (GET_CODE (X) == SYMBOL_REF && !SYMBOL_REF_TLS_MODEL (X))		\
-   || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST		\
+   || GET_CODE (X) == CONST_INT						\
+   || (GET_CODE (X) == CONST && !pa_tls_referenced_p (X))		\
    || GET_CODE (X) == HIGH) 						\
    && (reload_in_progress || reload_completed				\
        || ! pa_symbolic_expression_p (X)))
diff --git a/gcc-4.8/gcc/config/rs6000/altivec.h b/gcc-4.8/gcc/config/rs6000/altivec.h
index fd6d07f50..f101751f6 100644
--- a/gcc-4.8/gcc/config/rs6000/altivec.h
+++ b/gcc-4.8/gcc/config/rs6000/altivec.h
@@ -319,6 +319,56 @@
 #define vec_sqrt __builtin_vec_sqrt
 #define vec_vsx_ld __builtin_vec_vsx_ld
 #define vec_vsx_st __builtin_vec_vsx_st
+
+/* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions
+   instead of __builtin_vec_<xxx>  */
+#define vec_xxsldwi __builtin_vsx_xxsldwi
+#define vec_xxpermdi __builtin_vsx_xxpermdi
+#endif
+
+#ifdef _ARCH_PWR8
+/* Vector additions added in ISA 2.07.  */
+#define vec_eqv __builtin_vec_eqv
+#define vec_nand __builtin_vec_nand
+#define vec_orc __builtin_vec_orc
+#define vec_vaddcuq __builtin_vec_vaddcuq
+#define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vadduqm __builtin_vec_vadduqm
+#define vec_vbpermq __builtin_vec_vbpermq
+#define vec_vclz __builtin_vec_vclz
+#define vec_vclzb __builtin_vec_vclzb
+#define vec_vclzd __builtin_vec_vclzd
+#define vec_vclzh __builtin_vec_vclzh
+#define vec_vclzw __builtin_vec_vclzw
+#define vec_vaddecuq __builtin_vec_vaddecuq
+#define vec_vaddeuqm __builtin_vec_vaddeuqm
+#define vec_vsubecuq __builtin_vec_vsubecuq
+#define vec_vsubeuqm __builtin_vec_vsubeuqm
+#define vec_vgbbd __builtin_vec_vgbbd
+#define vec_vmaxsd __builtin_vec_vmaxsd
+#define vec_vmaxud __builtin_vec_vmaxud
+#define vec_vminsd __builtin_vec_vminsd
+#define vec_vminud __builtin_vec_vminud
+#define vec_vmrgew __builtin_vec_vmrgew
+#define vec_vmrgow __builtin_vec_vmrgow
+#define vec_vpksdss __builtin_vec_vpksdss
+#define vec_vpksdus __builtin_vec_vpksdus
+#define vec_vpkudum __builtin_vec_vpkudum
+#define vec_vpkudus __builtin_vec_vpkudus
+#define vec_vpopcnt __builtin_vec_vpopcnt
+#define vec_vpopcntb __builtin_vec_vpopcntb
+#define vec_vpopcntd __builtin_vec_vpopcntd
+#define vec_vpopcnth __builtin_vec_vpopcnth
+#define vec_vpopcntw __builtin_vec_vpopcntw
+#define vec_vrld __builtin_vec_vrld
+#define vec_vsld __builtin_vec_vsld
+#define vec_vsrad __builtin_vec_vsrad
+#define vec_vsrd __builtin_vec_vsrd
+#define vec_vsubcuq __builtin_vec_vsubcuq
+#define vec_vsubudm __builtin_vec_vsubudm
+#define vec_vsubuqm __builtin_vec_vsubuqm
+#define vec_vupkhsw __builtin_vec_vupkhsw
+#define vec_vupklsw __builtin_vec_vupklsw
 #endif
 
 /* Predicates.
diff --git a/gcc-4.8/gcc/config/rs6000/altivec.md b/gcc-4.8/gcc/config/rs6000/altivec.md
index 1b0b5c3fb..f13b5ceb3 100644
--- a/gcc-4.8/gcc/config/rs6000/altivec.md
+++ b/gcc-4.8/gcc/config/rs6000/altivec.md
@@ -41,15 +41,12 @@
    UNSPEC_VMULOSB
    UNSPEC_VMULOUH
    UNSPEC_VMULOSH
-   UNSPEC_VPKUHUM
-   UNSPEC_VPKUWUM
    UNSPEC_VPKPX
-   UNSPEC_VPKSHSS
-   UNSPEC_VPKSWSS
-   UNSPEC_VPKUHUS
-   UNSPEC_VPKSHUS
-   UNSPEC_VPKUWUS
-   UNSPEC_VPKSWUS
+   UNSPEC_VPACK_SIGN_SIGN_SAT
+   UNSPEC_VPACK_SIGN_UNS_SAT
+   UNSPEC_VPACK_UNS_UNS_SAT
+   UNSPEC_VPACK_UNS_UNS_MOD
+   UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
    UNSPEC_VSLV4SI
    UNSPEC_VSLO
    UNSPEC_VSR
@@ -71,12 +68,12 @@
    UNSPEC_VLOGEFP
    UNSPEC_VEXPTEFP
    UNSPEC_VLSDOI
-   UNSPEC_VUPKHSB
+   UNSPEC_VUNPACK_HI_SIGN
+   UNSPEC_VUNPACK_LO_SIGN
+   UNSPEC_VUNPACK_HI_SIGN_DIRECT
+   UNSPEC_VUNPACK_LO_SIGN_DIRECT
    UNSPEC_VUPKHPX
-   UNSPEC_VUPKHSH
-   UNSPEC_VUPKLSB
    UNSPEC_VUPKLPX
-   UNSPEC_VUPKLSH
    UNSPEC_DST
    UNSPEC_DSTT
    UNSPEC_DSTST
@@ -134,6 +131,21 @@
    UNSPEC_VUPKLS_V4SF
    UNSPEC_VUPKHU_V4SF
    UNSPEC_VUPKLU_V4SF
+   UNSPEC_VGBBD
+   UNSPEC_VMRGH_DIRECT
+   UNSPEC_VMRGL_DIRECT
+   UNSPEC_VSPLT_DIRECT
+   UNSPEC_VSUMSWS_DIRECT
+   UNSPEC_VADDCUQ
+   UNSPEC_VADDEUQM
+   UNSPEC_VADDECUQ
+   UNSPEC_VSUBCUQ
+   UNSPEC_VSUBEUQM
+   UNSPEC_VSUBECUQ
+   UNSPEC_VBPERMQ
+   UNSPEC_BCDADD
+   UNSPEC_BCDSUB
+   UNSPEC_BCD_OVERFLOW
 ])
 
 (define_c_enum "unspecv"
@@ -146,6 +158,8 @@
 
 ;; Vec int modes
 (define_mode_iterator VI [V4SI V8HI V16QI])
+;; Like VI, but add ISA 2.07 integer vector ops
+(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
 ;; Short vec in modes
 (define_mode_iterator VIshort [V8HI V16QI])
 ;; Vec float modes
@@ -154,13 +168,24 @@
 (define_mode_iterator V [V4SI V8HI V16QI V4SF])
 ;; Vec modes for move/logical/permute ops, include vector types for move not
 ;; otherwise handled by altivec (v2df, v2di, ti)
-(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI TI])
+(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI V1TI TI])
 
 ;; Like VM, except don't do TImode
-(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI])
-
-(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")])
-(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI V1TI])
+
+(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
+(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
+			   (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
+			   (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
+			   (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")
+			   (V1TI "VECTOR_UNIT_ALTIVEC_P (V1TImode)")])
+
+;; Vector pack/unpack
+(define_mode_iterator VP [V2DI V4SI V8HI])
+(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")])
+(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")])
+(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")])
 
 ;; Vector move instructions.
 (define_insn "*altivec_mov<mode>"
@@ -378,10 +403,10 @@
 
 ;; add
 (define_insn "add<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (plus:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (plus:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vaddu<VI_char>m %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -398,7 +423,7 @@
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
 		     UNSPEC_VADDCUW))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
   "vaddcuw %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -408,7 +433,7 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VADDU))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "<VI_unit>"
   "vaddu<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -418,16 +443,16 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VADDS))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vadds<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 ;; sub
 (define_insn "sub<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (minus:VI (match_operand:VI 1 "register_operand" "v")
-                  (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (minus:VI2 (match_operand:VI2 1 "register_operand" "v")
+		   (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsubu<VI_char>m %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -444,7 +469,7 @@
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
 		     UNSPEC_VSUBCUW))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
   "vsubcuw %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -454,7 +479,7 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VSUBU))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vsubu<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -464,7 +489,7 @@
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VSUBS))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vsubs<VI_char>s %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -483,7 +508,7 @@
         (unspec:VI [(match_operand:VI 1 "register_operand" "v")
                     (match_operand:VI 2 "register_operand" "v")]
 		   UNSPEC_VAVGS))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "vavgs<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -492,31 +517,31 @@
         (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
                       (match_operand:V4SF 2 "register_operand" "v")] 
                       UNSPEC_VCMPBFP))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
   "vcmpbfp %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_eq<mode>"
-  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
-	(eq:VI (match_operand:VI 1 "altivec_register_operand" "v")
-	       (match_operand:VI 2 "altivec_register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		(match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
   "vcmpequ<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_gt<mode>"
-  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
-	(gt:VI (match_operand:VI 1 "altivec_register_operand" "v")
-	       (match_operand:VI 2 "altivec_register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		(match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
   "vcmpgts<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_gtu<mode>"
-  [(set (match_operand:VI 0 "altivec_register_operand" "=v")
-	(gtu:VI (match_operand:VI 1 "altivec_register_operand" "v")
-		(match_operand:VI 2 "altivec_register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+	(gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+		 (match_operand:VI2 2 "altivec_register_operand" "v")))]
+  "<VI_unit>"
   "vcmpgtu<VI_char> %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
@@ -642,7 +667,7 @@
    convert_move (small_swap, swap, 0);
  
    low_product = gen_reg_rtx (V4SImode);
-   emit_insn (gen_vec_widen_umult_odd_v8hi (low_product, one, two));
+   emit_insn (gen_altivec_vmulouh (low_product, one, two));
  
    high_product = gen_reg_rtx (V4SImode);
    emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero));
@@ -666,13 +691,22 @@
    rtx high = gen_reg_rtx (V4SImode);
    rtx low = gen_reg_rtx (V4SImode);
 
-   emit_insn (gen_vec_widen_smult_even_v8hi (even, operands[1], operands[2]));
-   emit_insn (gen_vec_widen_smult_odd_v8hi (odd, operands[1], operands[2]));
-
-   emit_insn (gen_altivec_vmrghw (high, even, odd));
-   emit_insn (gen_altivec_vmrglw (low, even, odd));
-
-   emit_insn (gen_altivec_vpkuwum (operands[0], high, low));
+   if (BYTES_BIG_ENDIAN)
+     {
+       emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmrghw_direct (high, even, odd));
+       emit_insn (gen_altivec_vmrglw_direct (low, even, odd));
+       emit_insn (gen_altivec_vpkuwum_direct (operands[0], high, low));
+     }
+   else
+     {
+       emit_insn (gen_altivec_vmulosh (even, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmulesh (odd, operands[1], operands[2]));
+       emit_insn (gen_altivec_vmrghw_direct (high, odd, even));
+       emit_insn (gen_altivec_vmrglw_direct (low, odd, even));
+       emit_insn (gen_altivec_vpkuwum_direct (operands[0], low, high));
+     } 
 
    DONE;
 }")
@@ -744,18 +778,18 @@
 ;; max
 
 (define_insn "umax<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (umax:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (umax:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vmaxu<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "smax<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (smax:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (smax:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vmaxs<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -768,18 +802,18 @@
   [(set_attr "type" "veccmp")])
 
 (define_insn "umin<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (umin:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (umin:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vminu<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "smin<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (smin:VI (match_operand:VI 1 "register_operand" "v")
-                 (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (smin:VI2 (match_operand:VI2 1 "register_operand" "v")
+		  (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vmins<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -823,7 +857,39 @@
   "vmladduhm %0,%1,%2,%3"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vmrghb"
+(define_expand "altivec_vmrghb"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25),
+                     GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27),
+		     GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29),
+		     GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17),
+                     GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19),
+		     GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21),
+		     GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrghb_internal"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(vec_select:V16QI
 	  (vec_concat:V32QI
@@ -838,10 +904,52 @@
 		     (const_int 6) (const_int 22)
 		     (const_int 7) (const_int 23)])))]
   "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghb %0,%1,%2";
+  else
+    return "vmrglb %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghb_direct"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+                       (match_operand:V16QI 2 "register_operand" "v")]
+		      UNSPEC_VMRGH_DIRECT))]
+  "TARGET_ALTIVEC"
   "vmrghb %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrghh"
+(define_expand "altivec_vmrghh"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13),
+                     GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9),
+                     GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrghh_internal"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
 	(vec_select:V8HI
 	  (vec_concat:V16HI
@@ -852,10 +960,50 @@
 		     (const_int 2) (const_int 10)
 		     (const_int 3) (const_int 11)])))]
   "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghh %0,%1,%2";
+  else
+    return "vmrglh %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghh_direct"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMRGH_DIRECT))]
+  "TARGET_ALTIVEC"
   "vmrghh %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrghw"
+(define_expand "altivec_vmrghw"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrghw_internal"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (vec_select:V4SI
 	  (vec_concat:V8SI
@@ -864,6 +1012,20 @@
 	  (parallel [(const_int 0) (const_int 4)
 		     (const_int 1) (const_int 5)])))]
   "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghw %0,%1,%2";
+  else
+    return "vmrglw %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrghw_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+                     UNSPEC_VMRGH_DIRECT))]
+  "TARGET_ALTIVEC"
   "vmrghw %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
@@ -876,10 +1038,47 @@
 	  (parallel [(const_int 0) (const_int 4)
 		     (const_int 1) (const_int 5)])))]
   "VECTOR_MEM_ALTIVEC_P (V4SFmode)"
-  "vmrghw %0,%1,%2"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrghw %0,%1,%2";
+  else
+    return "vmrglw %0,%2,%1";
+}
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrglb"
+(define_expand "altivec_vmrglb"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (16, GEN_INT (0), GEN_INT (16), GEN_INT (1), GEN_INT (17),
+                     GEN_INT (2), GEN_INT (18), GEN_INT (3), GEN_INT (19),
+		     GEN_INT (4), GEN_INT (20), GEN_INT (5), GEN_INT (21),
+		     GEN_INT (6), GEN_INT (22), GEN_INT (7), GEN_INT (23));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (16, GEN_INT (8), GEN_INT (24), GEN_INT (9), GEN_INT (25),
+                     GEN_INT (10), GEN_INT (26), GEN_INT (11), GEN_INT (27),
+		     GEN_INT (12), GEN_INT (28), GEN_INT (13), GEN_INT (29),
+		     GEN_INT (14), GEN_INT (30), GEN_INT (15), GEN_INT (31));
+      x = gen_rtx_VEC_CONCAT (V32QImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V16QImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrglb_internal"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
         (vec_select:V16QI
 	  (vec_concat:V32QI
@@ -894,10 +1093,52 @@
 		     (const_int 14) (const_int 30)
 		     (const_int 15) (const_int 31)])))]
   "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglb %0,%1,%2";
+  else
+    return "vmrghb %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglb_direct"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+    		       (match_operand:V16QI 2 "register_operand" "v")]
+                      UNSPEC_VMRGL_DIRECT))]
+  "TARGET_ALTIVEC"
   "vmrglb %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrglh"
+(define_expand "altivec_vmrglh"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (8, GEN_INT (0), GEN_INT (8), GEN_INT (1), GEN_INT (9),
+                     GEN_INT (2), GEN_INT (10), GEN_INT (3), GEN_INT (11));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (8, GEN_INT (4), GEN_INT (12), GEN_INT (5), GEN_INT (13),
+                     GEN_INT (6), GEN_INT (14), GEN_INT (7), GEN_INT (15));
+      x = gen_rtx_VEC_CONCAT (V16HImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V8HImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrglh_internal"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (vec_select:V8HI
 	  (vec_concat:V16HI
@@ -908,10 +1149,50 @@
 		     (const_int 6) (const_int 14)
 		     (const_int 7) (const_int 15)])))]
   "TARGET_ALTIVEC"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglh %0,%1,%2";
+  else
+    return "vmrghh %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglh_direct"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+		      (match_operand:V8HI 2 "register_operand" "v")]
+                     UNSPEC_VMRGL_DIRECT))]
+  "TARGET_ALTIVEC"
   "vmrglh %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmrglw"
+(define_expand "altivec_vmrglw"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (4, GEN_INT (0), GEN_INT (4), GEN_INT (1), GEN_INT (5));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (4, GEN_INT (2), GEN_INT (6), GEN_INT (3), GEN_INT (7));
+      x = gen_rtx_VEC_CONCAT (V8SImode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (V4SImode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vmrglw_internal"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (vec_select:V4SI
 	  (vec_concat:V8SI
@@ -920,6 +1201,20 @@
 	  (parallel [(const_int 2) (const_int 6)
 		     (const_int 3) (const_int 7)])))]
   "VECTOR_MEM_ALTIVEC_P (V4SImode)"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglw %0,%1,%2";
+  else
+    return "vmrghw %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vmrglw_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+	              (match_operand:V4SI 2 "register_operand" "v")]
+                     UNSPEC_VMRGL_DIRECT))]
+  "TARGET_ALTIVEC"
   "vmrglw %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
@@ -932,10 +1227,154 @@
 	 (parallel [(const_int 2) (const_int 6)
 		    (const_int 3) (const_int 7)])))]
   "VECTOR_MEM_ALTIVEC_P (V4SFmode)"
-  "vmrglw %0,%1,%2"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrglw %0,%1,%2";
+  else
+    return "vmrghw %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+;; Power8 vector merge even/odd
+(define_insn "p8_vmrgew"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "TARGET_P8_VECTOR"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrgew %0,%1,%2";
+  else
+    return "vmrgow %0,%2,%1";
+}
   [(set_attr "type" "vecperm")])
 
-(define_insn "vec_widen_umult_even_v16qi"
+(define_insn "p8_vmrgow"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "v")
+	    (match_operand:V4SI 2 "register_operand" "v"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  "TARGET_P8_VECTOR"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "vmrgow %0,%1,%2";
+  else
+    return "vmrgew %0,%2,%1";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_expand "vec_widen_umult_even_v16qi"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_even_v16qi"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_even_v8hi"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_even_v8hi"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_odd_v16qi"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_odd_v16qi"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:V16QI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_umult_odd_v8hi"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_odd_v8hi"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:V8HI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2]));
+  else
+    emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "altivec_vmuleub"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
                       (match_operand:V16QI 2 "register_operand" "v")]
@@ -944,43 +1383,25 @@
   "vmuleub %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "vec_widen_smult_even_v16qi"
+(define_insn "altivec_vmuloub"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
                       (match_operand:V16QI 2 "register_operand" "v")]
-		     UNSPEC_VMULESB))]
-  "TARGET_ALTIVEC"
-  "vmulesb %0,%1,%2"
-  [(set_attr "type" "veccomplex")])
-
-(define_insn "vec_widen_umult_even_v8hi"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
-                      (match_operand:V8HI 2 "register_operand" "v")]
-		     UNSPEC_VMULEUH))]
-  "TARGET_ALTIVEC"
-  "vmuleuh %0,%1,%2"
-  [(set_attr "type" "veccomplex")])
-
-(define_insn "vec_widen_smult_even_v8hi"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
-                      (match_operand:V8HI 2 "register_operand" "v")]
-		     UNSPEC_VMULESH))]
+		     UNSPEC_VMULOUB))]
   "TARGET_ALTIVEC"
-  "vmulesh %0,%1,%2"
+  "vmuloub %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "vec_widen_umult_odd_v16qi"
+(define_insn "altivec_vmulesb"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
                       (match_operand:V16QI 2 "register_operand" "v")]
-		     UNSPEC_VMULOUB))]
+		     UNSPEC_VMULESB))]
   "TARGET_ALTIVEC"
-  "vmuloub %0,%1,%2"
+  "vmulesb %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "vec_widen_smult_odd_v16qi"
+(define_insn "altivec_vmulosb"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
                       (match_operand:V16QI 2 "register_operand" "v")]
@@ -989,7 +1410,16 @@
   "vmulosb %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "vec_widen_umult_odd_v8hi"
+(define_insn "altivec_vmuleuh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:V8HI 2 "register_operand" "v")]
+		     UNSPEC_VMULEUH))]
+  "TARGET_ALTIVEC"
+  "vmuleuh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulouh"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
                       (match_operand:V8HI 2 "register_operand" "v")]
@@ -998,158 +1428,121 @@
   "vmulouh %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "vec_widen_smult_odd_v8hi"
+(define_insn "altivec_vmulesh"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
                       (match_operand:V8HI 2 "register_operand" "v")]
-		     UNSPEC_VMULOSH))]
+		     UNSPEC_VMULESH))]
   "TARGET_ALTIVEC"
-  "vmulosh %0,%1,%2"
+  "vmulesh %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-
-;; logical ops.  Have the logical ops follow the memory ops in
-;; terms of whether to prefer VSX or Altivec
-
-(define_insn "*altivec_and<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (and:VM (match_operand:VM 1 "register_operand" "v")
-		(match_operand:VM 2 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vand %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*altivec_ior<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (ior:VM (match_operand:VM 1 "register_operand" "v")
-		(match_operand:VM 2 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vor %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*altivec_xor<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (xor:VM (match_operand:VM 1 "register_operand" "v")
-		(match_operand:VM 2 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vxor %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*altivec_one_cmpl<mode>2"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (not:VM (match_operand:VM 1 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vnor %0,%1,%1"
-  [(set_attr "type" "vecsimple")])
-  
-(define_insn "*altivec_nor<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
-			(match_operand:VM 2 "register_operand" "v"))))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vnor %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*altivec_andc<mode>3"
-  [(set (match_operand:VM 0 "register_operand" "=v")
-        (and:VM (not:VM (match_operand:VM 2 "register_operand" "v"))
-		(match_operand:VM 1 "register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vandc %0,%1,%2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "altivec_vpkuhum"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
+(define_insn "altivec_vmulosh"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
                        (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKUHUM))]
+		     UNSPEC_VMULOSH))]
   "TARGET_ALTIVEC"
-  "vpkuhum %0,%1,%2"
-  [(set_attr "type" "vecperm")])
+  "vmulosh %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vpkuwum"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKUWUM))]
-  "TARGET_ALTIVEC"
-  "vpkuwum %0,%1,%2"
-  [(set_attr "type" "vecperm")])
 
+;; Vector pack/unpack
 (define_insn "altivec_vpkpx"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
 		     UNSPEC_VPKPX))]
   "TARGET_ALTIVEC"
-  "vpkpx %0,%1,%2"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpkpx %0,%1,%2\";
+    else
+      return \"vpkpx %0,%2,%1\";
+  }"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkshss"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKSHSS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkshss %0,%1,%2"
+(define_insn "altivec_vpks<VI_char>ss"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_SIGN_SIGN_SAT))]
+  "<VI_unit>"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpks<VI_char>ss %0,%1,%2\";
+    else
+      return \"vpks<VI_char>ss %0,%2,%1\";
+  }"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkswss"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKSWSS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkswss %0,%1,%2"
+(define_insn "altivec_vpks<VI_char>us"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_SIGN_UNS_SAT))]
+  "<VI_unit>"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpks<VI_char>us %0,%1,%2\";
+    else
+      return \"vpks<VI_char>us %0,%2,%1\";
+  }"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkuhus"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKUHUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkuhus %0,%1,%2"
+(define_insn "altivec_vpku<VI_char>us"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_SAT))]
+  "<VI_unit>"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpku<VI_char>us %0,%1,%2\";
+    else
+      return \"vpku<VI_char>us %0,%2,%1\";
+  }"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkshus"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-		      UNSPEC_VPKSHUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkshus %0,%1,%2"
+(define_insn "altivec_vpku<VI_char>um"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_MOD))]
+  "<VI_unit>"
+  "*
+  {
+    if (VECTOR_ELT_ORDER_BIG)
+      return \"vpku<VI_char>um %0,%1,%2\";
+    else
+      return \"vpku<VI_char>um %0,%2,%1\";
+  }"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vpkuwus"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKUWUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkuwus %0,%1,%2"
-  [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkswus"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-		     UNSPEC_VPKSWUS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
-  "TARGET_ALTIVEC"
-  "vpkswus %0,%1,%2"
+(define_insn "altivec_vpku<VI_char>um_direct"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+	(unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+			   UNSPEC_VPACK_UNS_UNS_MOD_DIRECT))]
+  "<VI_unit>"
+  "*
+  {
+    if (BYTES_BIG_ENDIAN)
+      return \"vpku<VI_char>um %0,%1,%2\";
+    else
+      return \"vpku<VI_char>um %0,%2,%1\";
+  }"
   [(set_attr "type" "vecperm")])
 
 (define_insn "*altivec_vrl<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (rotate:VI (match_operand:VI 1 "register_operand" "v")
-		   (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
+		    (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vrl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -1172,26 +1565,26 @@
   [(set_attr "type" "vecperm")])
 
 (define_insn "*altivec_vsl<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (ashift:VI (match_operand:VI 1 "register_operand" "v")
-		   (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (ashift:VI2 (match_operand:VI2 1 "register_operand" "v")
+		    (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsl<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "*altivec_vsr<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
-		     (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+		      (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsr<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "*altivec_vsra<VI_char>"
-  [(set (match_operand:VI 0 "register_operand" "=v")
-        (ashiftrt:VI (match_operand:VI 1 "register_operand" "v")
-		     (match_operand:VI 2 "register_operand" "v")))]
-  "TARGET_ALTIVEC"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+		      (match_operand:VI2 2 "register_operand" "v")))]
+  "<VI_unit>"
   "vsra<VI_char> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
@@ -1233,64 +1626,242 @@
   "vsum4s<VI_char>s %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
+;; FIXME: For the following two patterns, the scratch should only be
+;; allocated for !VECTOR_ELT_ORDER_BIG, and the instructions should
+;; be emitted separately.
 (define_insn "altivec_vsum2sws"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
 		     UNSPEC_VSUM2SWS))
-   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+   (clobber (match_scratch:V4SI 3 "=v"))]
   "TARGET_ALTIVEC"
-  "vsum2sws %0,%1,%2"
-  [(set_attr "type" "veccomplex")])
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vsum2sws %0,%1,%2";
+  else
+    return "vsldoi %3,%2,%2,12\n\tvsum2sws %3,%1,%3\n\tvsldoi %0,%3,%3,4";
+}
+  [(set_attr "type" "veccomplex")
+   (set (attr "length")
+     (if_then_else
+       (match_test "VECTOR_ELT_ORDER_BIG")
+       (const_string "4")
+       (const_string "12")))])
 
 (define_insn "altivec_vsumsws"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
                       (match_operand:V4SI 2 "register_operand" "v")]
 		     UNSPEC_VSUMSWS))
+   (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vsumsws %0,%1,%2";
+  else
+    return "vspltw %3,%2,0\n\tvsumsws %3,%1,%3\n\tvsldoi %0,%3,%3,12";
+}
+  [(set_attr "type" "veccomplex")
+   (set (attr "length")
+     (if_then_else
+       (match_test "(VECTOR_ELT_ORDER_BIG)")
+       (const_string "4")
+       (const_string "12")))])
+
+(define_insn "altivec_vsumsws_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VSUMSWS_DIRECT))
    (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
   "TARGET_ALTIVEC"
   "vsumsws %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vspltb"
+(define_expand "altivec_vspltb"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  We have to reflect
+     the actual selected index for the splat in the RTL.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (15 - INTVAL (operands[2]));
+
+  v = gen_rtvec (1, operands[2]);
+  x = gen_rtx_VEC_SELECT (QImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
+  x = gen_rtx_VEC_DUPLICATE (V16QImode, x);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vspltb_internal"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
         (vec_duplicate:V16QI
 	 (vec_select:QI (match_operand:V16QI 1 "register_operand" "v")
 			(parallel
 			 [(match_operand:QI 2 "u5bit_cint_operand" "")]))))]
   "TARGET_ALTIVEC"
+{
+  /* For true LE, this adjusts the selected index.  For LE with 
+     -maltivec=be, this reverses what was done in the define_expand
+     because the instruction already has big-endian bias.  */
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (15 - INTVAL (operands[2]));
+
+  return "vspltb %0,%1,%2";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vspltb_direct"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSPLT_DIRECT))]
+  "TARGET_ALTIVEC"
   "vspltb %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vsplth"
+(define_expand "altivec_vsplth"
+  [(use (match_operand:V8HI 0 "register_operand" ""))
+   (use (match_operand:V8HI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  We have to reflect
+     the actual selected index for the splat in the RTL.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (7 - INTVAL (operands[2]));
+
+  v = gen_rtvec (1, operands[2]);
+  x = gen_rtx_VEC_SELECT (HImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
+  x = gen_rtx_VEC_DUPLICATE (V8HImode, x);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vsplth_internal"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
 	(vec_duplicate:V8HI
 	 (vec_select:HI (match_operand:V8HI 1 "register_operand" "v")
 			(parallel
 			 [(match_operand:QI 2 "u5bit_cint_operand" "")]))))]
   "TARGET_ALTIVEC"
+{
+  /* For true LE, this adjusts the selected index.  For LE with 
+     -maltivec=be, this reverses what was done in the define_expand
+     because the instruction already has big-endian bias.  */
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (7 - INTVAL (operands[2]));
+
+  return "vsplth %0,%1,%2";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vsplth_direct"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+                      (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                     UNSPEC_VSPLT_DIRECT))]
+  "TARGET_ALTIVEC"
   "vsplth %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vspltw"
+(define_expand "altivec_vspltw"
+  [(use (match_operand:V4SI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  We have to reflect
+     the actual selected index for the splat in the RTL.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  v = gen_rtvec (1, operands[2]);
+  x = gen_rtx_VEC_SELECT (SImode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
+  x = gen_rtx_VEC_DUPLICATE (V4SImode, x);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vspltw_internal"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 	(vec_duplicate:V4SI
 	 (vec_select:SI (match_operand:V4SI 1 "register_operand" "v")
 			(parallel
 			 [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))]
   "TARGET_ALTIVEC"
+{
+  /* For true LE, this adjusts the selected index.  For LE with 
+     -maltivec=be, this reverses what was done in the define_expand
+     because the instruction already has big-endian bias.  */
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  return "vspltw %0,%1,%2";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vspltw_direct"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                     UNSPEC_VSPLT_DIRECT))]
+  "TARGET_ALTIVEC"
   "vspltw %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vspltsf"
+(define_expand "altivec_vspltsf"
+  [(use (match_operand:V4SF 0 "register_operand" ""))
+   (use (match_operand:V4SF 1 "register_operand" ""))
+   (use (match_operand:QI 2 "u5bit_cint_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  We have to reflect
+     the actual selected index for the splat in the RTL.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  v = gen_rtvec (1, operands[2]);
+  x = gen_rtx_VEC_SELECT (SFmode, operands[1], gen_rtx_PARALLEL (VOIDmode, v));
+  x = gen_rtx_VEC_DUPLICATE (V4SFmode, x);
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
+
+(define_insn "*altivec_vspltsf_internal"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
 	(vec_duplicate:V4SF
 	 (vec_select:SF (match_operand:V4SF 1 "register_operand" "v")
 			(parallel
 			 [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))]
   "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
-  "vspltw %0,%1,%2"
+{
+  /* For true LE, this adjusts the selected index.  For LE with 
+     -maltivec=be, this reverses what was done in the define_expand
+     because the instruction already has big-endian bias.  */
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  return "vspltw %0,%1,%2";
+}
   [(set_attr "type" "vecperm")])
 
 (define_insn "altivec_vspltis<VI_char>"
@@ -1308,7 +1879,22 @@
   "vrfiz %0,%1"
   [(set_attr "type" "vecfloat")])
 
-(define_insn "altivec_vperm_<mode>"
+(define_expand "altivec_vperm_<mode>"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:V16QI 3 "register_operand" "v")]
+		   UNSPEC_VPERM))]
+  "TARGET_ALTIVEC"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_vec_perm_le (operands);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_vperm_<mode>_internal"
   [(set (match_operand:VM 0 "register_operand" "=v")
 	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
 		    (match_operand:VM 2 "register_operand" "v")
@@ -1318,7 +1904,22 @@
   "vperm %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vperm_<mode>_uns"
+(define_expand "altivec_vperm_<mode>_uns"
+  [(set (match_operand:VM 0 "register_operand" "=v")
+	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
+		    (match_operand:VM 2 "register_operand" "v")
+		    (match_operand:V16QI 3 "register_operand" "v")]
+		   UNSPEC_VPERM_UNS))]
+  "TARGET_ALTIVEC"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_vec_perm_le (operands);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_vperm_<mode>_uns_internal"
   [(set (match_operand:VM 0 "register_operand" "=v")
 	(unspec:VM [(match_operand:VM 1 "register_operand" "v")
 		    (match_operand:VM 2 "register_operand" "v")
@@ -1335,7 +1936,12 @@
 		       (match_operand:V16QI 3 "register_operand" "")]
 		      UNSPEC_VPERM))]
   "TARGET_ALTIVEC"
-  "")
+{
+  if (!BYTES_BIG_ENDIAN) {
+    altivec_expand_vec_perm_le (operands);
+    DONE;
+  }
+})
 
 (define_expand "vec_perm_constv16qi"
   [(match_operand:V16QI 0 "register_operand" "")
@@ -1476,89 +2082,109 @@
   "vsldoi %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupkhsb"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-	(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-		     UNSPEC_VUPKHSB))]
-  "TARGET_ALTIVEC"
-  "vupkhsb %0,%1"
+(define_insn "altivec_vupkhs<VU_char>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_HI_SIGN))]
+  "<VI_unit>"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vupkhs<VU_char> %0,%1";
+  else
+    return "vupkls<VU_char> %0,%1";
+}
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupkhpx"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-		     UNSPEC_VUPKHPX))]
-  "TARGET_ALTIVEC"
-  "vupkhpx %0,%1"
+(define_insn "*altivec_vupkhs<VU_char>_direct"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
+  "<VI_unit>"
+  "vupkhs<VU_char> %0,%1"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupkhsh"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-		     UNSPEC_VUPKHSH))]
-  "TARGET_ALTIVEC"
-  "vupkhsh %0,%1"
+(define_insn "altivec_vupkls<VU_char>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_LO_SIGN))]
+  "<VI_unit>"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vupkls<VU_char> %0,%1";
+  else
+    return "vupkhs<VU_char> %0,%1";
+}
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupklsb"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-	(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-		     UNSPEC_VUPKLSB))]
-  "TARGET_ALTIVEC"
-  "vupklsb %0,%1"
+(define_insn "*altivec_vupkls<VU_char>_direct"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+	(unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		     UNSPEC_VUNPACK_LO_SIGN_DIRECT))]
+  "<VI_unit>"
+  "vupkls<VU_char> %0,%1"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupklpx"
+(define_insn "altivec_vupkhpx"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-		     UNSPEC_VUPKLPX))]
+		     UNSPEC_VUPKHPX))]
   "TARGET_ALTIVEC"
-  "vupklpx %0,%1"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vupkhpx %0,%1";
+  else
+    return "vupklpx %0,%1";
+}
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vupklsh"
+(define_insn "altivec_vupklpx"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 	(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-		     UNSPEC_VUPKLSH))]
+		     UNSPEC_VUPKLPX))]
   "TARGET_ALTIVEC"
-  "vupklsh %0,%1"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    return "vupklpx %0,%1";
+  else
+    return "vupkhpx %0,%1";
+}
   [(set_attr "type" "vecperm")])
 
 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
 ;; indicate a combined status
 (define_insn "*altivec_vcmpequ<VI_char>_p"
   [(set (reg:CC 74)
-	(unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v")
-			   (match_operand:VI 2 "register_operand" "v"))]
+	(unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
+			   (match_operand:VI2 2 "register_operand" "v"))]
 		   UNSPEC_PREDICATE))
-   (set (match_operand:VI 0 "register_operand" "=v")
-	(eq:VI (match_dup 1)
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(eq:VI2 (match_dup 1)
 	       (match_dup 2)))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "<VI_unit>"
   "vcmpequ<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_vcmpgts<VI_char>_p"
   [(set (reg:CC 74)
-	(unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v")
-			   (match_operand:VI 2 "register_operand" "v"))]
+	(unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v")
+			   (match_operand:VI2 2 "register_operand" "v"))]
 		   UNSPEC_PREDICATE))
-   (set (match_operand:VI 0 "register_operand" "=v")
-	(gt:VI (match_dup 1)
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(gt:VI2 (match_dup 1)
 	       (match_dup 2)))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "<VI_unit>"
   "vcmpgts<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
 (define_insn "*altivec_vcmpgtu<VI_char>_p"
   [(set (reg:CC 74)
-	(unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v")
-			    (match_operand:VI 2 "register_operand" "v"))]
+	(unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v")
+			    (match_operand:VI2 2 "register_operand" "v"))]
 		   UNSPEC_PREDICATE))
-   (set (match_operand:VI 0 "register_operand" "=v")
-	(gtu:VI (match_dup 1)
+   (set (match_operand:VI2 0 "register_operand" "=v")
+	(gtu:VI2 (match_dup 1)
 		(match_dup 2)))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "<VI_unit>"
   "vcmpgtu<VI_char>. %0,%1,%2"
   [(set_attr "type" "veccmp")])
 
@@ -1710,7 +2336,21 @@
 ;; Parallel some of the LVE* and STV*'s with unspecs because some have
 ;; identical rtl but different instructions-- and gcc gets confused.
 
-(define_insn "altivec_lve<VI_char>x"
+(define_expand "altivec_lve<VI_char>x"
+  [(parallel
+    [(set (match_operand:VI 0 "register_operand" "=v")
+	  (match_operand:VI 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVE)])]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVE);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_lve<VI_char>x_internal"
   [(parallel
     [(set (match_operand:VI 0 "register_operand" "=v")
 	  (match_operand:VI 1 "memory_operand" "Z"))
@@ -1728,16 +2368,44 @@
   "lvewx %0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_insn "altivec_lvxl"
+(define_expand "altivec_lvxl_<mode>"
   [(parallel
-    [(set (match_operand:V4SI 0 "register_operand" "=v")
-	  (match_operand:V4SI 1 "memory_operand" "Z"))
+    [(set (match_operand:VM2 0 "register_operand" "=v")
+	  (match_operand:VM2 1 "memory_operand" "Z"))
      (unspec [(const_int 0)] UNSPEC_SET_VSCR)])]
   "TARGET_ALTIVEC"
-  "lvxl %0,%y1"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_SET_VSCR);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_lvxl_<mode>_internal"
+  [(parallel
+    [(set (match_operand:VM2 0 "register_operand" "=v")
+	  (match_operand:VM2 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_SET_VSCR)])]
+  "TARGET_ALTIVEC"
+  "lvx %0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_insn "altivec_lvx_<mode>"
+(define_expand "altivec_lvx_<mode>"
+  [(parallel
+    [(set (match_operand:VM2 0 "register_operand" "=v")
+	  (match_operand:VM2 1 "memory_operand" "Z"))
+     (unspec [(const_int 0)] UNSPEC_LVX)])]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVX);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_lvx_<mode>_internal"
   [(parallel
     [(set (match_operand:VM2 0 "register_operand" "=v")
 	  (match_operand:VM2 1 "memory_operand" "Z"))
@@ -1746,7 +2414,21 @@
   "lvx %0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_insn "altivec_stvx_<mode>"
+(define_expand "altivec_stvx_<mode>"
+  [(parallel
+    [(set (match_operand:VM2 0 "memory_operand" "=Z")
+	  (match_operand:VM2 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVX)])]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVX);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_stvx_<mode>_internal"
   [(parallel
     [(set (match_operand:VM2 0 "memory_operand" "=Z")
 	  (match_operand:VM2 1 "register_operand" "v"))
@@ -1755,16 +2437,42 @@
   "stvx %1,%y0"
   [(set_attr "type" "vecstore")])
 
-(define_insn "altivec_stvxl"
+(define_expand "altivec_stvxl_<mode>"
   [(parallel
-    [(set (match_operand:V4SI 0 "memory_operand" "=Z")
-	  (match_operand:V4SI 1 "register_operand" "v"))
+    [(set (match_operand:VM2 0 "memory_operand" "=Z")
+	  (match_operand:VM2 1 "register_operand" "v"))
+     (unspec [(const_int 0)] UNSPEC_STVXL)])]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVXL);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_stvxl_<mode>_internal"
+  [(parallel
+    [(set (match_operand:VM2 0 "memory_operand" "=Z")
+	  (match_operand:VM2 1 "register_operand" "v"))
      (unspec [(const_int 0)] UNSPEC_STVXL)])]
   "TARGET_ALTIVEC"
   "stvxl %1,%y0"
   [(set_attr "type" "vecstore")])
 
-(define_insn "altivec_stve<VI_char>x"
+(define_expand "altivec_stve<VI_char>x"
+  [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z")
+	(unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))]
+  "TARGET_ALTIVEC"
+{
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      altivec_expand_stvex_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVE);
+      DONE;
+    }
+})
+
+(define_insn "*altivec_stve<VI_char>x_internal"
   [(set (match_operand:<VI_scalar> 0 "memory_operand" "=Z")
 	(unspec:<VI_scalar> [(match_operand:VI 1 "register_operand" "v")] UNSPEC_STVE))]
   "TARGET_ALTIVEC"
@@ -1779,20 +2487,28 @@
   [(set_attr "type" "vecstore")])
 
 ;; Generate
-;;    vspltis? SCRATCH0,0
+;;    xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
 ;;    vsubu?m SCRATCH2,SCRATCH1,%1
 ;;    vmaxs? %0,%1,SCRATCH2"
 (define_expand "abs<mode>2"
-  [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
-   (set (match_dup 3)
-        (minus:VI (match_dup 2)
-                  (match_operand:VI 1 "register_operand" "v")))
-   (set (match_operand:VI 0 "register_operand" "=v")
-        (smax:VI (match_dup 1) (match_dup 3)))]
-  "TARGET_ALTIVEC"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4)
+        (minus:VI2 (match_dup 2)
+		   (match_operand:VI2 1 "register_operand" "v")))
+   (set (match_operand:VI2 0 "register_operand" "=v")
+        (smax:VI2 (match_dup 1) (match_dup 4)))]
+  "<VI_unit>"
 {
-  operands[2] = gen_reg_rtx (GET_MODE (operands[0]));
-  operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
+  int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
+  rtvec v = rtvec_alloc (n_elt);
+
+  /* Create an all 0 constant.  */
+  for (i = 0; i < n_elt; ++i)
+    RTVEC_ELT (v, i) = const0_rtx;
+
+  operands[2] = gen_reg_rtx (<MODE>mode);
+  operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
+  operands[4] = gen_reg_rtx (<MODE>mode);
 })
 
 ;; Generate
@@ -1844,7 +2560,7 @@
 
   emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
   emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero));
-  emit_insn (gen_altivec_vsumsws (dest, vtmp1, vzero));
+  emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero));
   DONE;
 })
 
@@ -1860,7 +2576,7 @@
 
   emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
   emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero));
-  emit_insn (gen_altivec_vsumsws (dest, vtmp1, vzero));
+  emit_insn (gen_altivec_vsumsws_direct (dest, vtmp1, vzero));
   DONE;
 })
 
@@ -1950,49 +2666,19 @@
   DONE;
 }")
 
-(define_expand "vec_unpacks_hi_v16qi"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-                     UNSPEC_VUPKHSB))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupkhsb (operands[0], operands[1]));
-  DONE;
-}")
-
-(define_expand "vec_unpacks_hi_v8hi"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-                     UNSPEC_VUPKHSH))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupkhsh (operands[0], operands[1]));
-  DONE;
-}")
-
-(define_expand "vec_unpacks_lo_v16qi"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
-                     UNSPEC_VUPKLSB))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupklsb (operands[0], operands[1]));
-  DONE;
-}")
+(define_expand "vec_unpacks_hi_<VP_small_lc>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		   UNSPEC_VUNPACK_HI_SIGN_DIRECT))]
+  "<VI_unit>"
+  "")
 
-(define_expand "vec_unpacks_lo_v8hi"
-  [(set (match_operand:V4SI 0 "register_operand" "=v")
-        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
-                     UNSPEC_VUPKLSH))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vupklsh (operands[0], operands[1]));
-  DONE;
-}")
+(define_expand "vec_unpacks_lo_<VP_small_lc>"
+  [(set (match_operand:VP 0 "register_operand" "=v")
+        (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+		   UNSPEC_VUNPACK_LO_SIGN_DIRECT))]
+  "<VI_unit>"
+  "")
 
 (define_insn "vperm_v8hiv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
@@ -2025,25 +2711,26 @@
   rtx vzero = gen_reg_rtx (V8HImode);
   rtx mask = gen_reg_rtx (V16QImode);
   rtvec v = rtvec_alloc (16);
+  bool be = BYTES_BIG_ENDIAN;
    
   emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
    
-  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 0);
-  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
-  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 2);
-  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
-  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 4);
-  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
-  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 6);
-  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
+  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 :  7);
+  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ?  0 : 16);
+  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ? 16 :  6);
+  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  1 : 16);
+  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 :  5);
+  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ?  2 : 16);
+  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ? 16 :  4);
+  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ?  3 : 16);
+  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 :  3);
+  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ?  4 : 16);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 :  2);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ?  5 : 16);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  1);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ?  6 : 16);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  0);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
 
   emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
@@ -2060,25 +2747,26 @@
   rtx vzero = gen_reg_rtx (V4SImode);
   rtx mask = gen_reg_rtx (V16QImode);
   rtvec v = rtvec_alloc (16);
+  bool be = BYTES_BIG_ENDIAN;
 
   emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
  
-  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
-  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 0);
-  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
-  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
-  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 2);
-  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
-  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
-  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 4);
-  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
-  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
-  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 6);
-  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
+  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 :  7);
+  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ? 17 :  6);
+  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ?  0 : 17);
+  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  1 : 16);
+  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 :  5);
+  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ? 17 :  4);
+  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ?  2 : 17);
+  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ?  3 : 16);
+  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 :  3);
+  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ? 17 :  2);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ?  4 : 17);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ?  5 : 16);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  1);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 :  0);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ?  6 : 17);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
 
   emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
@@ -2095,25 +2783,26 @@
   rtx vzero = gen_reg_rtx (V8HImode);
   rtx mask = gen_reg_rtx (V16QImode);
   rtvec v = rtvec_alloc (16);
+  bool be = BYTES_BIG_ENDIAN;
 
   emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
 
-  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 8);
-  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
-  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 10);
-  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
-  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 12);
-  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
-  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 14);
-  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
+  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
+  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ?  8 : 16);
+  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ? 16 : 14);
+  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  9 : 16);
+  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
+  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ? 10 : 16);
+  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ? 16 : 12);
+  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
+  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
+  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ? 12 : 16);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 10);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  9);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 14 : 16);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  8);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
 
   emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
@@ -2130,25 +2819,26 @@
   rtx vzero = gen_reg_rtx (V4SImode);
   rtx mask = gen_reg_rtx (V16QImode);
   rtvec v = rtvec_alloc (16);
+  bool be = BYTES_BIG_ENDIAN;
 
   emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
  
-  RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
-  RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 8);
-  RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
-  RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
-  RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10);
-  RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
-  RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
-  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 12);
-  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
-  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
-  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
-  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 14);
-  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
+  RTVEC_ELT (v,  0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
+  RTVEC_ELT (v,  1) = gen_rtx_CONST_INT (QImode, be ? 17 : 14);
+  RTVEC_ELT (v,  2) = gen_rtx_CONST_INT (QImode, be ?  8 : 17);
+  RTVEC_ELT (v,  3) = gen_rtx_CONST_INT (QImode, be ?  9 : 16);
+  RTVEC_ELT (v,  4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
+  RTVEC_ELT (v,  5) = gen_rtx_CONST_INT (QImode, be ? 17 : 12);
+  RTVEC_ELT (v,  6) = gen_rtx_CONST_INT (QImode, be ? 10 : 17);
+  RTVEC_ELT (v,  7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
+  RTVEC_ELT (v,  8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
+  RTVEC_ELT (v,  9) = gen_rtx_CONST_INT (QImode, be ? 17 : 10);
+  RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 12 : 17);
+  RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
+  RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 :  9);
+  RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 :  8);
+  RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17);
+  RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
 
   emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
@@ -2166,9 +2856,18 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2183,9 +2882,18 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2200,9 +2908,18 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghh_direct (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2217,9 +2934,18 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglh_direct (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2234,9 +2960,18 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2251,9 +2986,18 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2268,9 +3012,18 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrghw_direct (operands[0], vo, ve));
+    }
   DONE;
 }")
 
@@ -2285,35 +3038,28 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
-  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
-  DONE;
-}")
-
-(define_expand "vec_pack_trunc_v8hi"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-        (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
-                       (match_operand:V8HI 2 "register_operand" "v")]
-                      UNSPEC_VPKUHUM))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2]));
+  if (BYTES_BIG_ENDIAN)
+    {
+      emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw_direct (operands[0], ve, vo));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2]));
+      emit_insn (gen_altivec_vmrglw_direct (operands[0], vo, ve));
+    }
   DONE;
 }")
                                                                                 
-(define_expand "vec_pack_trunc_v4si"
-  [(set (match_operand:V8HI 0 "register_operand" "=v")
-        (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
-                      (match_operand:V4SI 2 "register_operand" "v")]
-                     UNSPEC_VPKUWUM))]
-  "TARGET_ALTIVEC"
-  "
-{
-  emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2]));
-  DONE;
-}")
+(define_expand "vec_pack_trunc_<mode>"
+  [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+        (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+			    (match_operand:VP 2 "register_operand" "v")]
+                      UNSPEC_VPACK_UNS_UNS_MOD))]
+  "<VI_unit>"
+  "")
 
 (define_expand "altivec_negv4sf2"
   [(use (match_operand:V4SF 0 "register_operand" ""))
@@ -2460,3 +3206,243 @@
   emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
   DONE;
 }")
+
+
+;; Power8 vector instructions encoded as Altivec instructions
+
+;; Vector count leading zeros
+(define_insn "*p8v_clz<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+	(clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vclz<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector population count
+(define_insn "*p8v_popcount<mode>2"
+  [(set (match_operand:VI2 0 "register_operand" "=v")
+        (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+  "TARGET_P8_VECTOR"
+  "vpopcnt<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Vector Gather Bits by Bytes by Doubleword
+(define_insn "p8v_vgbbd"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+		      UNSPEC_VGBBD))]
+  "TARGET_P8_VECTOR"
+  "vgbbd %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+
+;; 128-bit binary integer arithmetic
+;; We have a special container type (V1TImode) to allow operations using the
+;; ISA 2.07 128-bit binary support to target the VMX/altivec registers without
+;; having to worry about the register allocator deciding GPRs are better.
+
+(define_insn "altivec_vadduqm"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(plus:V1TI (match_operand:V1TI 1 "register_operand" "v")
+		   (match_operand:V1TI 2 "register_operand" "v")))]
+  "TARGET_VADDUQM"
+  "vadduqm %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddcuq"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")]
+		     UNSPEC_VADDCUQ))]
+  "TARGET_VADDUQM"
+  "vaddcuq %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubuqm"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(minus:V1TI (match_operand:V1TI 1 "register_operand" "v")
+		    (match_operand:V1TI 2 "register_operand" "v")))]
+  "TARGET_VADDUQM"
+  "vsubuqm %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubcuq"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")]
+		     UNSPEC_VSUBCUQ))]
+  "TARGET_VADDUQM"
+  "vsubcuq %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddeuqm"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:V1TI 3 "register_operand" "v")]
+		     UNSPEC_VADDEUQM))]
+  "TARGET_VADDUQM"
+  "vaddeuqm %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vaddecuq"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:V1TI 3 "register_operand" "v")]
+		     UNSPEC_VADDECUQ))]
+  "TARGET_VADDUQM"
+  "vaddecuq %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubeuqm"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:V1TI 3 "register_operand" "v")]
+		   UNSPEC_VSUBEUQM))]
+  "TARGET_VADDUQM"
+  "vsubeuqm %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "altivec_vsubecuq"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:V1TI 3 "register_operand" "v")]
+		     UNSPEC_VSUBECUQ))]
+  "TARGET_VADDUQM"
+  "vsubecuq %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; We use V2DI as the output type to simplify converting the permute
+;; bits into an integer
+(define_insn "altivec_vbpermq"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(unspec:V2DI [(match_operand:V16QI 1 "register_operand" "v")
+		      (match_operand:V16QI 2 "register_operand" "v")]
+		     UNSPEC_VBPERMQ))]
+  "TARGET_P8_VECTOR"
+  "vbpermq %0,%1,%2"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Decimal Integer operations
+(define_int_iterator UNSPEC_BCD_ADD_SUB [UNSPEC_BCDADD UNSPEC_BCDSUB])
+
+(define_int_attr bcd_add_sub [(UNSPEC_BCDADD "add")
+			      (UNSPEC_BCDSUB "sub")])
+
+(define_code_iterator BCD_TEST [eq lt gt unordered])
+
+(define_insn "bcd<bcd_add_sub>"
+  [(set (match_operand:V1TI 0 "register_operand" "")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "")
+		      (match_operand:V1TI 2 "register_operand" "")
+		      (match_operand:QI 3 "const_0_to_1_operand" "")]
+		     UNSPEC_BCD_ADD_SUB))
+   (clobber (reg:CCFP 74))]
+  "TARGET_P8_VECTOR"
+  "bcd<bcd_add_sub>. %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+;; Use a floating point type (V2DFmode) for the compare to set CR6 so that we
+;; can use the unordered test for BCD nans and add/subtracts that overflow.  An
+;; UNORDERED test on an integer type (like V1TImode) is not defined.  The type
+;; probably should be one that can go in the VMX (Altivec) registers, so we
+;; can't use DDmode or DFmode.
+(define_insn "*bcd<bcd_add_sub>_test"
+  [(set (reg:CCFP 74)
+	(compare:CCFP
+	 (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "v")
+		       (match_operand:V1TI 2 "register_operand" "v")
+		       (match_operand:QI 3 "const_0_to_1_operand" "i")]
+		      UNSPEC_BCD_ADD_SUB)
+	 (match_operand:V2DF 4 "zero_constant" "j")))
+   (clobber (match_scratch:V1TI 0 "=v"))]
+  "TARGET_P8_VECTOR"
+  "bcd<bcd_add_sub>. %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_insn "*bcd<bcd_add_sub>_test2"
+  [(set (match_operand:V1TI 0 "register_operand" "=v")
+	(unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v")
+		      (match_operand:V1TI 2 "register_operand" "v")
+		      (match_operand:QI 3 "const_0_to_1_operand" "i")]
+		     UNSPEC_BCD_ADD_SUB))
+   (set (reg:CCFP 74)
+	(compare:CCFP
+	 (unspec:V2DF [(match_dup 1)
+		       (match_dup 2)
+		       (match_dup 3)]
+		      UNSPEC_BCD_ADD_SUB)
+	 (match_operand:V2DF 4 "zero_constant" "j")))]
+  "TARGET_P8_VECTOR"
+  "bcd<bcd_add_sub>. %0,%1,%2,%3"
+  [(set_attr "length" "4")
+   (set_attr "type" "vecsimple")])
+
+(define_expand "bcd<bcd_add_sub>_<code>"
+  [(parallel [(set (reg:CCFP 74)
+		   (compare:CCFP
+		    (unspec:V2DF [(match_operand:V1TI 1 "register_operand" "")
+				  (match_operand:V1TI 2 "register_operand" "")
+				  (match_operand:QI 3 "const_0_to_1_operand" "")]
+				 UNSPEC_BCD_ADD_SUB)
+		    (match_dup 4)))
+	      (clobber (match_scratch:V1TI 5 ""))])
+   (set (match_operand:SI 0 "register_operand" "")
+	(BCD_TEST:SI (reg:CCFP 74)
+		     (const_int 0)))]
+  "TARGET_P8_VECTOR"
+{
+  operands[4] = CONST0_RTX (V2DFmode);
+})
+
+;; Peephole2 pattern to combine a bcdadd/bcdsub that calculates the value and
+;; the bcdadd/bcdsub that tests the value.  The combiner won't work since
+;; CR6 is a hard coded register.  Unfortunately, all of the Altivec predicate
+;; support is hard coded to use the fixed register CR6 instead of creating
+;; a register class for CR6.
+
+(define_peephole2
+  [(parallel [(set (match_operand:V1TI 0 "register_operand" "")
+		   (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "")
+				 (match_operand:V1TI 2 "register_operand" "")
+				 (match_operand:QI 3 "const_0_to_1_operand" "")]
+				UNSPEC_BCD_ADD_SUB))
+	      (clobber (reg:CCFP 74))])
+   (parallel [(set (reg:CCFP 74)
+		   (compare:CCFP
+		    (unspec:V2DF [(match_dup 1)
+				  (match_dup 2)
+				  (match_dup 3)]
+				 UNSPEC_BCD_ADD_SUB)
+		    (match_operand:V2DF 4 "zero_constant" "")))
+	      (clobber (match_operand:V1TI 5 "register_operand" ""))])]
+  "TARGET_P8_VECTOR"
+  [(parallel [(set (match_dup 0)
+		   (unspec:V1TI [(match_dup 1)
+				 (match_dup 2)
+				 (match_dup 3)]
+				UNSPEC_BCD_ADD_SUB))
+	      (set (reg:CCFP 74)
+		   (compare:CCFP
+		    (unspec:V2DF [(match_dup 1)
+				  (match_dup 2)
+				  (match_dup 3)]
+				 UNSPEC_BCD_ADD_SUB)
+		    (match_dup 4)))])])
diff --git a/gcc-4.8/gcc/config/rs6000/constraints.md b/gcc-4.8/gcc/config/rs6000/constraints.md
index ccb61edce..4467b9e3d 100644
--- a/gcc-4.8/gcc/config/rs6000/constraints.md
+++ b/gcc-4.8/gcc/config/rs6000/constraints.md
@@ -52,21 +52,61 @@
   "@internal")
 
 ;; Use w as a prefix to add VSX modes
-;; vector double (V2DF)
+;; any VSX register
+(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
+  "Any VSX register if the -mvsx option was used or NO_REGS.")
+
 (define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]"
-  "@internal")
+  "VSX vector register to hold vector double data or NO_REGS.")
 
-;; vector float (V4SF)
 (define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]"
-  "@internal")
+  "VSX vector register to hold vector float data or NO_REGS.")
+
+(define_register_constraint "wg" "rs6000_constraints[RS6000_CONSTRAINT_wg]"
+  "If -mmfpgpr was used, a floating point register or NO_REGS.")
+
+(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
+  "Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]"
+  "VSX register if direct move instructions are enabled, or NO_REGS.")
+
+;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
+;; direct move directly, and movsf can't to move between the register sets.
+;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode
+(define_register_constraint "wn" "NO_REGS" "No register (NO_REGS).")
+
+(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
+  "General purpose register if 64-bit instructions are enabled or NO_REGS.")
 
-;; scalar double (DF)
 (define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]"
-  "@internal")
+  "VSX vector register to hold scalar double values or NO_REGS.")
 
-;; any VSX register
-(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
-  "@internal")
+(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]"
+  "VSX vector register to hold 128 bit integer or NO_REGS.")
+
+(define_register_constraint "wu" "rs6000_constraints[RS6000_CONSTRAINT_wu]"
+  "Altivec register to use for float/32-bit int loads/stores  or NO_REGS.")
+
+(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
+  "Altivec register to use for double loads/stores  or NO_REGS.")
+
+(define_register_constraint "ww" "rs6000_constraints[RS6000_CONSTRAINT_ww]"
+  "FP or VSX register to perform float operations under -mvsx or NO_REGS.")
+
+(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
+  "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wy" "rs6000_constraints[RS6000_CONSTRAINT_wy]"
+  "VSX vector register to hold scalar float values or NO_REGS.")
+
+(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
+  "Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
+
+;; Lq/stq validates the address for load/store quad
+(define_memory_constraint "wQ"
+  "Memory operand suitable for the load/store quad instructions"
+  (match_operand 0 "quad_memory_operand"))
 
 ;; Altivec style load/store that ignores the bottom bits of the address
 (define_memory_constraint "wZ"
diff --git a/gcc-4.8/gcc/config/rs6000/crypto.md b/gcc-4.8/gcc/config/rs6000/crypto.md
new file mode 100644
index 000000000..9f7e4a1b2
--- /dev/null
+++ b/gcc-4.8/gcc/config/rs6000/crypto.md
@@ -0,0 +1,101 @@
+;; Cryptographic instructions added in ISA 2.07
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec"
+  [UNSPEC_VCIPHER
+   UNSPEC_VNCIPHER
+   UNSPEC_VCIPHERLAST
+   UNSPEC_VNCIPHERLAST
+   UNSPEC_VSBOX
+   UNSPEC_VSHASIGMA
+   UNSPEC_VPERMXOR
+   UNSPEC_VPMSUM])
+
+;; Iterator for VPMSUM/VPERMXOR
+(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI])
+
+(define_mode_attr CR_char [(V16QI "b")
+			   (V8HI  "h")
+			   (V4SI  "w")
+			   (V2DI  "d")])
+
+;; Iterator for VSHASIGMAD/VSHASIGMAW
+(define_mode_iterator CR_hash [V4SI V2DI])
+
+;; Iterator for the other crypto functions
+(define_int_iterator CR_code   [UNSPEC_VCIPHER
+				UNSPEC_VNCIPHER
+				UNSPEC_VCIPHERLAST
+				UNSPEC_VNCIPHERLAST])
+
+(define_int_attr CR_insn [(UNSPEC_VCIPHER      "vcipher")
+			  (UNSPEC_VNCIPHER     "vncipher")
+			  (UNSPEC_VCIPHERLAST  "vcipherlast")
+			  (UNSPEC_VNCIPHERLAST "vncipherlast")])
+
+;; 2 operand crypto instructions
+(define_insn "crypto_<CR_insn>"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
+		      (match_operand:V2DI 2 "register_operand" "v")]
+		     CR_code))]
+  "TARGET_CRYPTO"
+  "<CR_insn> %0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+(define_insn "crypto_vpmsum<CR_char>"
+  [(set (match_operand:CR_mode 0 "register_operand" "=v")
+	(unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+			 (match_operand:CR_mode 2 "register_operand" "v")]
+			UNSPEC_VPMSUM))]
+  "TARGET_CRYPTO"
+  "vpmsum<CR_char> %0,%1,%2"
+  [(set_attr "type" "crypto")])
+
+;; 3 operand crypto instructions
+(define_insn "crypto_vpermxor_<mode>"
+  [(set (match_operand:CR_mode 0 "register_operand" "=v")
+	(unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+			 (match_operand:CR_mode 2 "register_operand" "v")
+			 (match_operand:CR_mode 3 "register_operand" "v")]
+			UNSPEC_VPERMXOR))]
+  "TARGET_CRYPTO"
+  "vpermxor %0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
+
+;; 1 operand crypto instruction
+(define_insn "crypto_vsbox"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")]
+		     UNSPEC_VSBOX))]
+  "TARGET_CRYPTO"
+  "vsbox %0,%1"
+  [(set_attr "type" "crypto")])
+
+;; Hash crypto instructions
+(define_insn "crypto_vshasigma<CR_char>"
+  [(set (match_operand:CR_hash 0 "register_operand" "=v")
+	(unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v")
+			 (match_operand:SI 2 "const_0_to_1_operand" "n")
+			 (match_operand:SI 3 "const_0_to_15_operand" "n")]
+			UNSPEC_VSHASIGMA))]
+  "TARGET_CRYPTO"
+  "vshasigma<CR_char> %0,%1,%2,%3"
+  [(set_attr "type" "crypto")])
diff --git a/gcc-4.8/gcc/config/rs6000/darwin.h b/gcc-4.8/gcc/config/rs6000/darwin.h
index 9d92d2af4..0cf2f4c34 100644
--- a/gcc-4.8/gcc/config/rs6000/darwin.h
+++ b/gcc-4.8/gcc/config/rs6000/darwin.h
@@ -205,7 +205,8 @@ extern int darwin_emit_branch_islands;
     "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",             \
     "vrsave", "vscr",							\
     "spe_acc", "spefscr",                                               \
-    "sfp"								\
+    "sfp",								\
+    "tfhar", "tfiar", "texasr"						\
 }
 
 /* This outputs NAME to FILE.  */
diff --git a/gcc-4.8/gcc/config/rs6000/dfp.md b/gcc-4.8/gcc/config/rs6000/dfp.md
index 040bae490..a3c09390a 100644
--- a/gcc-4.8/gcc/config/rs6000/dfp.md
+++ b/gcc-4.8/gcc/config/rs6000/dfp.md
@@ -29,77 +29,6 @@
   ])
 
 
-(define_expand "movsd"
-  [(set (match_operand:SD 0 "nonimmediate_operand" "")
-	(match_operand:SD 1 "any_operand" ""))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "{ rs6000_emit_move (operands[0], operands[1], SDmode); DONE; }")
-
-(define_split
-  [(set (match_operand:SD 0 "gpc_reg_operand" "")
-	(match_operand:SD 1 "const_double_operand" ""))]
-  "reload_completed
-   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
-  [(set (match_dup 2) (match_dup 3))]
-  "
-{
-  long l;
-  REAL_VALUE_TYPE rv;
-
-  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
-
-  if (! TARGET_POWERPC64)
-    operands[2] = operand_subword (operands[0], 0, 0, SDmode);
-  else
-    operands[2] = gen_lowpart (SImode, operands[0]);
-
-  operands[3] = gen_int_mode (l, SImode);
-}")
-
-(define_insn "movsd_hardfloat"
-  [(set (match_operand:SD 0 "nonimmediate_operand" "=r,r,m,f,*c*l,!r,*h,!r,!r")
-	(match_operand:SD 1 "input_operand"        "r,m,r,f,r,h,0,G,Fn"))]
-  "(gpc_reg_operand (operands[0], SDmode)
-   || gpc_reg_operand (operands[1], SDmode))
-   && (TARGET_HARD_FLOAT && TARGET_FPRS)"
-  "@
-   mr %0,%1
-   lwz%U1%X1 %0,%1
-   stw%U0%X0 %1,%0
-   fmr %0,%1
-   mt%0 %1
-   mf%1 %0
-   nop
-   #
-   #"
-  [(set_attr "type" "*,load,store,fp,mtjmpr,mfjmpr,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,8")])
-
-(define_insn "movsd_softfloat"
-  [(set (match_operand:SD 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,r,*h")
-	(match_operand:SD 1 "input_operand" "r,r,h,m,r,I,L,R,G,Fn,0"))]
-  "(gpc_reg_operand (operands[0], SDmode)
-   || gpc_reg_operand (operands[1], SDmode))
-   && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
-  "@
-   mr %0,%1
-   mt%0 %1
-   mf%1 %0
-   lwz%U1%X1 %0,%1
-   stw%U0%X0 %1,%0
-   li %0,%1
-   lis %0,%v1
-   la %0,%a1
-   #
-   #
-   nop"
-  [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,4")])
-
 (define_insn "movsd_store"
   [(set (match_operand:DD 0 "nonimmediate_operand" "=m")
 	(unspec:DD [(match_operand:SD 1 "input_operand" "d")]
@@ -108,7 +37,14 @@
    || gpc_reg_operand (operands[1], SDmode))
    && TARGET_HARD_FLOAT && TARGET_FPRS"
   "stfd%U0%X0 %1,%0"
-  [(set_attr "type" "fpstore")
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	(const_string "fpstore_ux")
+	(if_then_else
+	  (match_test "update_address_mem (operands[0], VOIDmode)")
+	  (const_string "fpstore_u")
+	  (const_string "fpstore"))))
    (set_attr "length" "4")])
 
 (define_insn "movsd_load"
@@ -119,7 +55,14 @@
    || gpc_reg_operand (operands[1], DDmode))
    && TARGET_HARD_FLOAT && TARGET_FPRS"
   "lfd%U1%X1 %0,%1"
-  [(set_attr "type" "fpload")
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(if_then_else
+	  (match_test "update_address_mem (operands[1], VOIDmode)")
+	  (const_string "fpload_u")
+	  (const_string "fpload"))))
    (set_attr "length" "4")])
 
 ;; Hardware support for decimal floating point operations.
@@ -182,211 +125,6 @@
   "fnabs %0,%1"
   [(set_attr "type" "fp")])
 
-(define_expand "movdd"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "")
-	(match_operand:DD 1 "any_operand" ""))]
-  ""
-  "{ rs6000_emit_move (operands[0], operands[1], DDmode); DONE; }")
-
-(define_split
-  [(set (match_operand:DD 0 "gpc_reg_operand" "")
-	(match_operand:DD 1 "const_int_operand" ""))]
-  "! TARGET_POWERPC64 && reload_completed
-   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
-  [(set (match_dup 2) (match_dup 4))
-   (set (match_dup 3) (match_dup 1))]
-  "
-{
-  int endian = (WORDS_BIG_ENDIAN == 0);
-  HOST_WIDE_INT value = INTVAL (operands[1]);
-
-  operands[2] = operand_subword (operands[0], endian, 0, DDmode);
-  operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
-#if HOST_BITS_PER_WIDE_INT == 32
-  operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
-#else
-  operands[4] = GEN_INT (value >> 32);
-  operands[1] = GEN_INT (((value & 0xffffffff) ^ 0x80000000) - 0x80000000);
-#endif
-}")
-
-(define_split
-  [(set (match_operand:DD 0 "gpc_reg_operand" "")
-	(match_operand:DD 1 "const_double_operand" ""))]
-  "! TARGET_POWERPC64 && reload_completed
-   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
-  [(set (match_dup 2) (match_dup 4))
-   (set (match_dup 3) (match_dup 5))]
-  "
-{
-  int endian = (WORDS_BIG_ENDIAN == 0);
-  long l[2];
-  REAL_VALUE_TYPE rv;
-
-  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
-
-  operands[2] = operand_subword (operands[0], endian, 0, DDmode);
-  operands[3] = operand_subword (operands[0], 1 - endian, 0, DDmode);
-  operands[4] = gen_int_mode (l[endian], SImode);
-  operands[5] = gen_int_mode (l[1 - endian], SImode);
-}")
-
-(define_split
-  [(set (match_operand:DD 0 "gpc_reg_operand" "")
-	(match_operand:DD 1 "const_double_operand" ""))]
-  "TARGET_POWERPC64 && reload_completed
-   && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && REGNO (SUBREG_REG (operands[0])) <= 31))"
-  [(set (match_dup 2) (match_dup 3))]
-  "
-{
-  int endian = (WORDS_BIG_ENDIAN == 0);
-  long l[2];
-  REAL_VALUE_TYPE rv;
-#if HOST_BITS_PER_WIDE_INT >= 64
-  HOST_WIDE_INT val;
-#endif
-
-  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
-
-  operands[2] = gen_lowpart (DImode, operands[0]);
-  /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN.  */
-#if HOST_BITS_PER_WIDE_INT >= 64
-  val = ((HOST_WIDE_INT)(unsigned long)l[endian] << 32
-	 | ((HOST_WIDE_INT)(unsigned long)l[1 - endian]));
-
-  operands[3] = gen_int_mode (val, DImode);
-#else
-  operands[3] = immed_double_const (l[1 - endian], l[endian], DImode);
-#endif
-}")
-
-;; Don't have reload use general registers to load a constant.  First,
-;; it might not work if the output operand is the equivalent of
-;; a non-offsettable memref, but also it is less efficient than loading
-;; the constant into an FP register, since it will probably be used there.
-;; The "??" is a kludge until we can figure out a more reasonable way
-;; of handling these non-offsettable values.
-(define_insn "*movdd_hardfloat32"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=!r,??r,m,d,d,m,!r,!r,!r")
-	(match_operand:DD 1 "input_operand" "r,m,r,d,m,d,G,H,F"))]
-  "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "*
-{
-  switch (which_alternative)
-    {
-    default:
-      gcc_unreachable ();
-    case 0:
-    case 1:
-    case 2:
-      return \"#\";
-    case 3:
-      return \"fmr %0,%1\";
-    case 4:
-      return \"lfd%U1%X1 %0,%1\";
-    case 5:
-      return \"stfd%U0%X0 %1,%0\";
-    case 6:
-    case 7:
-    case 8:
-      return \"#\";
-    }
-}"
-  [(set_attr "type" "two,load,store,fp,fpload,fpstore,*,*,*")
-   (set_attr "length" "8,16,16,4,4,4,8,12,16")])
-
-(define_insn "*movdd_softfloat32"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=r,r,m,r,r,r")
-	(match_operand:DD 1 "input_operand" "r,m,r,G,H,F"))]
-  "! TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "#"
-  [(set_attr "type" "two,load,store,*,*,*")
-   (set_attr "length" "8,8,8,8,12,16")])
-
-; ld/std require word-aligned displacements -> 'Y' constraint.
-; List Y->r and r->Y before r->r for reload.
-(define_insn "*movdd_hardfloat64_mfpgpr"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r,r,d")
-	(match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F,d,r"))]
-  "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "@
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   fmr %0,%1
-   lfd%U1%X1 %0,%1
-   stfd%U0%X0 %1,%0
-   mt%0 %1
-   mf%1 %0
-   nop
-   #
-   #
-   #
-   mftgpr %0,%1
-   mffgpr %0,%1"
-  [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
-
-; ld/std require word-aligned displacements -> 'Y' constraint.
-; List Y->r and r->Y before r->r for reload.
-(define_insn "*movdd_hardfloat64"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=Y,r,!r,d,d,m,*c*l,!r,*h,!r,!r,!r")
-	(match_operand:DD 1 "input_operand" "r,Y,r,d,m,d,r,h,0,G,H,F"))]
-  "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "@
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   fmr %0,%1
-   lfd%U1%X1 %0,%1
-   stfd%U0%X0 %1,%0
-   mt%0 %1
-   mf%1 %0
-   nop
-   #
-   #
-   #"
-  [(set_attr "type" "store,load,*,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,12,16")])
-
-(define_insn "*movdd_softfloat64"
-  [(set (match_operand:DD 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
-	(match_operand:DD 1 "input_operand" "Y,r,r,r,h,G,H,F,0"))]
-  "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
-   && (gpc_reg_operand (operands[0], DDmode)
-       || gpc_reg_operand (operands[1], DDmode))"
-  "@
-   ld%U1%X1 %0,%1
-   std%U0%X0 %1,%0
-   mr %0,%1
-   mt%0 %1
-   mf%1 %0
-   #
-   #
-   #
-   nop"
-  [(set_attr "type" "load,store,*,mtjmpr,mfjmpr,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,8,12,16,4")])
-
 (define_expand "negtd2"
   [(set (match_operand:TD 0 "gpc_reg_operand" "")
 	(neg:TD (match_operand:TD 1 "gpc_reg_operand" "")))]
@@ -410,39 +148,24 @@
   "")
 
 (define_insn "*abstd2_fpr"
-  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
-	(abs:TD (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
+	(abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "fabs %0,%1"
-  [(set_attr "type" "fp")])
+  "@
+   fabs %0,%1
+   fabs %0,%1\;fmr %L0,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4,8")])
 
 (define_insn "*nabstd2_fpr"
-  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
-	(neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "d"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "fnabs %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "movtd"
-  [(set (match_operand:TD 0 "general_operand" "")
-	(match_operand:TD 1 "any_operand" ""))]
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d,d")
+	(neg:TD (abs:TD (match_operand:TD 1 "gpc_reg_operand" "0,d"))))]
   "TARGET_HARD_FLOAT && TARGET_FPRS"
-  "{ rs6000_emit_move (operands[0], operands[1], TDmode); DONE; }")
-
-; It's important to list the Y->r and r->Y moves before r->r because
-; otherwise reload, given m->r, will try to pick r->r and reload it,
-; which doesn't make progress.
-(define_insn_and_split "*movtd_internal"
-  [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
-	(match_operand:TD 1 "input_operand"         "d,m,d,r,YGHF,r"))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], TDmode)
-       || gpc_reg_operand (operands[1], TDmode))"
-  "#"
-  "&& reload_completed"
-  [(pc)]
-{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
-  [(set_attr "length" "8,8,8,20,20,16")])
+  "@
+   fnabs %0,%1
+   fnabs %0,%1\;fmr %L0,%L1"
+  [(set_attr "type" "fp")
+   (set_attr "length" "4,8")])
 
 ;; Hardware support for decimal floating point operations.
 
@@ -599,3 +322,72 @@
   "TARGET_DFP"
   "dctfixq %0,%1"
   [(set_attr "type" "fp")])
+
+
+;; Decimal builtin support
+
+(define_c_enum "unspec"
+  [UNSPEC_DDEDPD
+   UNSPEC_DENBCD
+   UNSPEC_DXEX
+   UNSPEC_DIEX
+   UNSPEC_DSCLI
+   UNSPEC_DSCRI])
+
+(define_mode_iterator D64_D128 [DD TD])
+
+(define_mode_attr dfp_suffix [(DD "")
+			      (TD "q")])
+
+(define_insn "dfp_ddedpd_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:QI 1 "const_0_to_3_operand" "i")
+			  (match_operand:D64_D128 2 "gpc_reg_operand" "d")]
+			 UNSPEC_DDEDPD))]
+  "TARGET_DFP"
+  "ddedpd<dfp_suffix> %1,%0,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_denbcd_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:QI 1 "const_0_to_1_operand" "i")
+			  (match_operand:D64_D128 2 "gpc_reg_operand" "d")]
+			 UNSPEC_DENBCD))]
+  "TARGET_DFP"
+  "denbcd<dfp_suffix> %1,%0,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_dxex_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d")]
+			 UNSPEC_DXEX))]
+  "TARGET_DFP"
+  "dxex<dfp_suffix> %0,%1"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_diex_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d")
+			  (match_operand:D64_D128 2 "gpc_reg_operand" "d")]
+			 UNSPEC_DXEX))]
+  "TARGET_DFP"
+  "diex<dfp_suffix> %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_dscli_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d")
+			  (match_operand:QI 2 "immediate_operand" "i")]
+			 UNSPEC_DSCLI))]
+  "TARGET_DFP"
+  "dscli<dfp_suffix> %0,%1,%2"
+  [(set_attr "type" "fp")])
+
+(define_insn "dfp_dscri_<mode>"
+  [(set (match_operand:D64_D128 0 "gpc_reg_operand" "=d")
+	(unspec:D64_D128 [(match_operand:D64_D128 1 "gpc_reg_operand" "d")
+			  (match_operand:QI 2 "immediate_operand" "i")]
+			 UNSPEC_DSCRI))]
+  "TARGET_DFP"
+  "dscri<dfp_suffix> %0,%1,%2"
+  [(set_attr "type" "fp")])
diff --git a/gcc-4.8/gcc/config/rs6000/htm.md b/gcc-4.8/gcc/config/rs6000/htm.md
new file mode 100644
index 000000000..e8ec91aef
--- /dev/null
+++ b/gcc-4.8/gcc/config/rs6000/htm.md
@@ -0,0 +1,366 @@
+;; Hardware Transactional Memory (HTM) patterns.
+;; Copyright (C) 2013 Free Software Foundation, Inc.
+;; Contributed by Peter Bergner <bergner@vnet.ibm.com>.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [(TFHAR_SPR		128)
+   (TFIAR_SPR		129)
+   (TEXASR_SPR		130)
+   (TEXASRU_SPR		131)
+   (MAX_HTM_OPERANDS	4)
+  ])
+
+;;
+;; UNSPEC_VOLATILE usage
+;;
+
+(define_c_enum "unspecv"
+  [UNSPECV_HTM_TABORT
+   UNSPECV_HTM_TABORTDC
+   UNSPECV_HTM_TABORTDCI
+   UNSPECV_HTM_TABORTWC
+   UNSPECV_HTM_TABORTWCI
+   UNSPECV_HTM_TBEGIN
+   UNSPECV_HTM_TCHECK
+   UNSPECV_HTM_TEND
+   UNSPECV_HTM_TRECHKPT
+   UNSPECV_HTM_TRECLAIM
+   UNSPECV_HTM_TSR
+   UNSPECV_HTM_MFSPR
+   UNSPECV_HTM_MTSPR
+  ])
+
+
+(define_expand "tabort"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand:SI 1 "int_reg_operand" "")]
+			    UNSPECV_HTM_TABORT))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabort_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand:SI 0 "int_reg_operand" "r")]
+			    UNSPECV_HTM_TABORT))]
+  "TARGET_HTM"
+  "tabort. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tabortdc"
+  [(set (match_dup 4)
+	(unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")
+			     (match_operand:SI 3 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TABORTDC))
+   (set (match_dup 5)
+	(eq:SI (match_dup 4)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 5)))]
+  "TARGET_HTM"
+{
+  operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[5] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabortdc_internal"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+			     (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TABORTDC))]
+  "TARGET_HTM"
+  "tabortdc. %0,%1,%2"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tabortdci"
+  [(set (match_dup 4)
+	(unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")
+			     (match_operand 3 "s5bit_cint_operand" "n")]
+			    UNSPECV_HTM_TABORTDCI))
+   (set (match_dup 5)
+	(eq:SI (match_dup 4)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 5)))]
+  "TARGET_HTM"
+{
+  operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[5] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabortdci_internal"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+			     (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand 2 "s5bit_cint_operand" "n")]
+			    UNSPECV_HTM_TABORTDCI))]
+  "TARGET_HTM"
+  "tabortdci. %0,%1,%2"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tabortwc"
+  [(set (match_dup 4)
+	(unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")
+			     (match_operand:SI 3 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TABORTWC))
+   (set (match_dup 5)
+	(eq:SI (match_dup 4)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 5)))]
+  "TARGET_HTM"
+{
+  operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[5] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabortwc_internal"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+			     (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TABORTWC))]
+  "TARGET_HTM"
+  "tabortwc. %0,%1,%2"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tabortwci"
+  [(set (match_dup 4)
+	(unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n")
+			     (match_operand:SI 2 "gpc_reg_operand" "r")
+			     (match_operand 3 "s5bit_cint_operand" "n")]
+			    UNSPECV_HTM_TABORTWCI))
+   (set (match_dup 5)
+	(eq:SI (match_dup 4)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 5)))]
+  "TARGET_HTM"
+{
+  operands[4] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[5] = gen_reg_rtx (SImode);
+})
+
+(define_expand "ttest"
+  [(set (match_dup 1)
+	(unspec_volatile:CC [(const_int 0)
+			     (reg:SI 0)
+			     (const_int 0)]
+			    UNSPECV_HTM_TABORTWCI))
+   (set (subreg:CC (match_dup 2) 0) (match_dup 1))
+   (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 24)))
+   (parallel [(set (match_operand:SI 0 "int_reg_operand" "")
+		   (and:SI (match_dup 3) (const_int 15)))
+              (clobber (scratch:CC))])]
+  "TARGET_HTM"
+{
+  operands[1] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[2] = gen_reg_rtx (SImode);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tabortwci_internal"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n")
+			     (match_operand:SI 1 "gpc_reg_operand" "r")
+			     (match_operand 2 "s5bit_cint_operand" "n")]
+			    UNSPECV_HTM_TABORTWCI))]
+  "TARGET_HTM"
+  "tabortwci. %0,%1,%2"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tbegin"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TBEGIN))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tbegin_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TBEGIN))]
+  "TARGET_HTM"
+  "tbegin. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tcheck"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand 1 "u3bit_cint_operand" "n")]
+			    UNSPECV_HTM_TCHECK))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tcheck_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "u3bit_cint_operand" "n")]
+			    UNSPECV_HTM_TCHECK))]
+  "TARGET_HTM"
+  "tcheck. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tend"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TEND))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tend_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TEND))]
+  "TARGET_HTM"
+  "tend. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "trechkpt"
+  [(set (match_dup 1)
+	(unspec_volatile:CC [(const_int 0)]
+			    UNSPECV_HTM_TRECHKPT))
+   (set (match_dup 2)
+	(eq:SI (match_dup 1)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 2)))]
+  "TARGET_HTM"
+{
+  operands[1] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[2] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*trechkpt_internal"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(const_int 0)]
+			    UNSPECV_HTM_TRECHKPT))]
+  "TARGET_HTM"
+  "trechkpt."
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "treclaim"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand:SI 1 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TRECLAIM))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*treclaim_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")]
+			    UNSPECV_HTM_TRECLAIM))]
+  "TARGET_HTM"
+  "treclaim. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_expand "tsr"
+  [(set (match_dup 2)
+	(unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TSR))
+   (set (match_dup 3)
+	(eq:SI (match_dup 2)
+	       (const_int 0)))
+   (set (match_operand:SI 0 "int_reg_operand" "")
+	(minus:SI (const_int 1) (match_dup 3)))]
+  "TARGET_HTM"
+{
+  operands[2] = gen_rtx_REG (CCmode, CR0_REGNO);
+  operands[3] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*tsr_internal"
+  [(set (match_operand:CC 1 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")]
+			    UNSPECV_HTM_TSR))]
+  "TARGET_HTM"
+  "tsr. %0"
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_insn "htm_mfspr_<mode>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=r")
+        (unspec_volatile:P [(match_operand 1 "u10bit_cint_operand" "n")
+			    (match_operand:P 2 "htm_spr_reg_operand" "")]
+			   UNSPECV_HTM_MFSPR))]
+  "TARGET_HTM"
+  "mfspr %0,%1";
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
+
+(define_insn "htm_mtspr_<mode>"
+  [(set (match_operand:P 2 "htm_spr_reg_operand" "")
+        (unspec_volatile:P [(match_operand:P 0 "gpc_reg_operand" "r")
+			    (match_operand 1 "u10bit_cint_operand" "n")]
+                           UNSPECV_HTM_MTSPR))]
+  "TARGET_HTM"
+  "mtspr %1,%0";
+  [(set_attr "type" "htm")
+   (set_attr "length" "4")])
diff --git a/gcc-4.8/gcc/config/rs6000/htmintrin.h b/gcc-4.8/gcc/config/rs6000/htmintrin.h
new file mode 100644
index 000000000..212cc92f8
--- /dev/null
+++ b/gcc-4.8/gcc/config/rs6000/htmintrin.h
@@ -0,0 +1,131 @@
+/* Hardware Transactional Memory (HTM) intrinsics.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Peter Bergner <bergner@vnet.ibm.com>.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __HTM__
+# error "HTM instruction set not enabled"
+#endif /* __HTM__ */
+
+#ifndef _HTMINTRIN_H
+#define _HTMINTRIN_H
+
+#include <stdint.h>
+
+typedef uint64_t texasr_t;
+typedef uint32_t texasru_t;
+typedef uint32_t texasrl_t;
+typedef uintptr_t tfiar_t;
+typedef uintptr_t tfhar_t;
+
+#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3)
+#define _HTM_NONTRANSACTIONAL 0x0
+#define _HTM_SUSPENDED        0x1
+#define _HTM_TRANSACTIONAL    0x2
+
+/* The following macros use the IBM bit numbering for BITNUM
+   as used in the ISA documentation.  */
+
+#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \
+  (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1))
+#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \
+  (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))
+
+#define _TEXASR_FAILURE_CODE(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 7, 8)
+#define _TEXASRU_FAILURE_CODE(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8)
+
+#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 7, 1)
+#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)
+
+#define _TEXASR_DISALLOWED(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 8, 1)
+#define _TEXASRU_DISALLOWED(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1)
+
+#define _TEXASR_NESTING_OVERFLOW(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 9, 1)
+#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1)
+
+#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 10, 1)
+#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1)
+
+#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 11, 1)
+#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1)
+
+#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 12, 1)
+#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1)
+
+#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 13, 1)
+#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1)
+
+#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 14, 1)
+#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1)
+
+#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 15, 1)
+#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1)
+
+#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 16, 1)
+#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1)
+
+#define _TEXASR_ABORT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 31, 1)
+#define _TEXASRU_ABORT(TEXASRU) \
+  _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1)
+
+
+#define _TEXASR_SUSPENDED(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 32, 1)
+
+#define _TEXASR_PRIVILEGE(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 35, 2)
+
+#define _TEXASR_FAILURE_SUMMARY(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 36, 1)
+
+#define _TEXASR_TFIAR_EXACT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 37, 1)
+
+#define _TEXASR_ROT(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 38, 1)
+
+#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \
+  _TEXASR_EXTRACT_BITS(TEXASR, 63, 12)
+
+#endif /* _HTMINTRIN_H */
diff --git a/gcc-4.8/gcc/config/rs6000/htmxlintrin.h b/gcc-4.8/gcc/config/rs6000/htmxlintrin.h
new file mode 100644
index 000000000..3908229d5
--- /dev/null
+++ b/gcc-4.8/gcc/config/rs6000/htmxlintrin.h
@@ -0,0 +1,213 @@
+/* XL compiler Hardware Transactional Memory (HTM) execution intrinsics.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Peter Bergner <bergner@vnet.ibm.com>.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __HTM__
+# error "HTM instruction set not enabled"
+#endif /* __HTM__ */
+
+#ifndef _HTMXLINTRIN_H
+#define _HTMXLINTRIN_H
+
+#include <stdint.h>
+#include <htmintrin.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define _TEXASR_PTR(TM_BUF) \
+  ((texasr_t *)((TM_BUF)+0))
+#define _TEXASRU_PTR(TM_BUF) \
+  ((texasru_t *)((TM_BUF)+0))
+#define _TEXASRL_PTR(TM_BUF) \
+  ((texasrl_t *)((TM_BUF)+4))
+#define _TFIAR_PTR(TM_BUF) \
+  ((tfiar_t *)((TM_BUF)+8))
+
+typedef char TM_buff_type[16];
+
+/* Compatibility macro with s390.  This macro can be used to determine
+   whether a transaction was successfully started from the __TM_begin()
+   and __TM_simple_begin() intrinsic functions below.  */
+#define _HTM_TBEGIN_STARTED     1
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_simple_begin (void)
+{
+  if (__builtin_expect (__builtin_tbegin (0), 1))
+    return _HTM_TBEGIN_STARTED;
+  return 0;
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_begin (void* const TM_buff)
+{
+  *_TEXASRL_PTR (TM_buff) = 0;
+  if (__builtin_expect (__builtin_tbegin (0), 1))
+    return _HTM_TBEGIN_STARTED;
+#ifdef __powerpc64__
+  *_TEXASR_PTR (TM_buff) = __builtin_get_texasr ();
+#else
+  *_TEXASRU_PTR (TM_buff) = __builtin_get_texasru ();
+  *_TEXASRL_PTR (TM_buff) = __builtin_get_texasr ();
+#endif
+  *_TFIAR_PTR (TM_buff) = __builtin_get_tfiar ();
+  return 0;
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_end (void)
+{
+  if (__builtin_expect (__builtin_tend (0), 1))
+    return 1;
+  return 0;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_abort (void)
+{
+  __builtin_tabort (0);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_named_abort (unsigned char const code)
+{
+  __builtin_tabort (code);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_resume (void)
+{
+  __builtin_tresume ();
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_suspend (void)
+{
+  __builtin_tsuspend ();
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_user_abort (void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_ABORT (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_named_user_abort (void* const TM_buff, unsigned char *code)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+
+  *code = _TEXASRU_FAILURE_CODE (texasru);
+  return _TEXASRU_ABORT (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_illegal (void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_DISALLOWED (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_footprint_exceeded (void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_nesting_depth (void* const TM_buff)
+{
+  texasrl_t texasrl;
+
+  if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)
+    {
+      texasrl = *_TEXASRL_PTR (TM_buff);
+      if (!_TEXASR_FAILURE_SUMMARY (texasrl))
+        texasrl = 0;
+    }
+  else
+    texasrl = (texasrl_t) __builtin_get_texasr ();
+
+  return _TEXASR_TRANSACTION_LEVEL (texasrl);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_nested_too_deep(void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_NESTING_OVERFLOW (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_conflict(void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  /* Return TEXASR bits 11 (Self-Induced Conflict) through
+     14 (Translation Invalidation Conflict).  */
+  return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0;
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_failure_persistent(void* const TM_buff)
+{
+  texasru_t texasru = *_TEXASRU_PTR (TM_buff);
+  return _TEXASRU_FAILURE_PERSISTENT (texasru);
+}
+
+extern __inline long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_address(void* const TM_buff)
+{
+  return *_TFIAR_PTR (TM_buff);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_code(void* const TM_buff)
+{
+  return *_TEXASR_PTR (TM_buff);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTMXLINTRIN_H */
diff --git a/gcc-4.8/gcc/config/rs6000/linux64.h b/gcc-4.8/gcc/config/rs6000/linux64.h
index 3f280581f..fe860adf2 100644
--- a/gcc-4.8/gcc/config/rs6000/linux64.h
+++ b/gcc-4.8/gcc/config/rs6000/linux64.h
@@ -25,9 +25,6 @@
 
 #ifndef RS6000_BI_ARCH
 
-#undef	DEFAULT_ABI
-#define	DEFAULT_ABI ABI_AIX
-
 #undef	TARGET_64BIT
 #define	TARGET_64BIT 1
 
@@ -74,7 +71,11 @@ extern int dot_symbols;
 #undef  PROCESSOR_DEFAULT
 #define PROCESSOR_DEFAULT PROCESSOR_POWER7
 #undef  PROCESSOR_DEFAULT64
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
+#define PROCESSOR_DEFAULT64 PROCESSOR_POWER8
+#else
 #define PROCESSOR_DEFAULT64 PROCESSOR_POWER7
+#endif
 
 /* We don't need to generate entries in .fixup, except when
    -mrelocatable or -mrelocatable-lib is given.  */
@@ -88,6 +89,12 @@ extern int dot_symbols;
 #define INVALID_64BIT "-m%s not supported in this configuration"
 #define INVALID_32BIT INVALID_64BIT
 
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
+#define ELFv2_ABI_CHECK (rs6000_elf_abi != 1)
+#else
+#define ELFv2_ABI_CHECK (rs6000_elf_abi == 2)
+#endif
+
 #undef	SUBSUBTARGET_OVERRIDE_OPTIONS
 #define	SUBSUBTARGET_OVERRIDE_OPTIONS				\
   do								\
@@ -102,6 +109,12 @@ extern int dot_symbols;
 	      error (INVALID_64BIT, "call");			\
 	    }							\
 	  dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");	\
+	  if (ELFv2_ABI_CHECK)					\
+	    {							\
+	      rs6000_current_abi = ABI_ELFv2;			\
+	      if (dot_symbols)					\
+		error ("-mcall-aixdesc incompatible with -mabi=elfv2"); \
+	    }							\
 	  if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)	\
 	    {							\
 	      rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;	\
@@ -136,7 +149,10 @@ extern int dot_symbols;
 		SET_CMODEL (CMODEL_MEDIUM);			\
 	      if (rs6000_current_cmodel != CMODEL_SMALL)	\
 		{						\
-		  TARGET_NO_FP_IN_TOC = 0;			\
+		  if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \
+		    TARGET_NO_FP_IN_TOC				\
+		      = rs6000_current_cmodel == CMODEL_MEDIUM;	\
+		  if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \
 		  TARGET_NO_SUM_IN_TOC = 0;			\
 		}						\
 	    }							\
@@ -351,7 +367,11 @@ extern int dot_symbols;
 #define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
 
 #define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
-#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld64.so.1"
+#ifdef LINUX64_DEFAULT_ABI_ELFv2
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:/lib64/ld64.so.1;:/lib64/ld64.so.2}"
+#else
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv2:/lib64/ld64.so.2;:/lib64/ld64.so.1}"
+#endif
 #define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
 #define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
 #if DEFAULT_LIBC == LIBC_UCLIBC
diff --git a/gcc-4.8/gcc/config/rs6000/option-defaults.h b/gcc-4.8/gcc/config/rs6000/option-defaults.h
index 2d7e36a61..0d7ba1ea3 100644
--- a/gcc-4.8/gcc/config/rs6000/option-defaults.h
+++ b/gcc-4.8/gcc/config/rs6000/option-defaults.h
@@ -54,6 +54,7 @@
    --with-float is ignored if -mhard-float or -msoft-float are
      specified.  */
 #define OPTION_DEFAULT_SPECS \
+  {"abi", "%{!mabi=elfv*:-mabi=%(VALUE)}" }, \
   {"tune", "%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}" }, \
   {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
   {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:-mtune=%(VALUE)}}}" }, \
diff --git a/gcc-4.8/gcc/config/rs6000/power8.md b/gcc-4.8/gcc/config/rs6000/power8.md
new file mode 100644
index 000000000..83bf71974
--- /dev/null
+++ b/gcc-4.8/gcc/config/rs6000/power8.md
@@ -0,0 +1,373 @@
+;; Scheduling description for IBM POWER8 processor.
+;; Copyright (C) 2013 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power8fxu,power8lsu,power8vsu,power8misc")
+
+(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu")
+(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu")
+(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu")
+(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu")
+(define_cpu_unit "bpu_power8,cru_power8" "power8misc")
+(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\
+		  du5_power8,du6_power8"  "power8misc")
+
+
+; Dispatch group reservations
+(define_reservation "DU_any_power8"
+		    "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\
+		     du5_power8")
+
+; 2-way Cracked instructions go in slots 0-1
+;   (can also have a second in slots 3-4 if insns are adjacent)
+(define_reservation "DU_cracked_power8"
+		    "du0_power8+du1_power8")
+
+; Insns that are first in group
+(define_reservation "DU_first_power8"
+		    "du0_power8")
+
+; Insns that are first and last in group
+(define_reservation "DU_both_power8"
+		    "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\
+		     du5_power8+du6_power8")
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\
+	      du5_power8,du6_power8")
+(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\
+	      du6_power8")
+(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8")
+(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8")
+(absence_set "du4_power8" "du5_power8,du6_power8")
+(absence_set "du5_power8" "du6_power8")
+
+
+; Execution unit reservations
+(define_reservation "FXU_power8"
+                    "fxu0_power8|fxu1_power8")
+
+(define_reservation "LU_power8"
+                    "lu0_power8|lu1_power8")
+
+(define_reservation "LSU_power8"
+                    "lsu0_power8|lsu1_power8")
+
+(define_reservation "LU_or_LSU_power8"
+                    "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8")
+
+(define_reservation "VSU_power8"
+                    "vsu0_power8|vsu1_power8")
+
+
+; LS Unit
+(define_insn_reservation "power8-load" 3
+  (and (eq_attr "type" "load")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-load-update" 3
+  (and (eq_attr "type" "load_u,load_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_or_LSU_power8+FXU_power8")
+
+(define_insn_reservation "power8-load-ext" 3
+  (and (eq_attr "type" "load_ext")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_or_LSU_power8,FXU_power8")
+
+(define_insn_reservation "power8-load-ext-update" 3
+  (and (eq_attr "type" "load_ext_u,load_ext_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-fpload" 5
+  (and (eq_attr "type" "fpload,vecload")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LU_power8")
+
+(define_insn_reservation "power8-fpload-update" 5
+  (and (eq_attr "type" "fpload_u,fpload_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LU_power8+FXU_power8")
+
+(define_insn_reservation "power8-store" 5 ; store-forwarding latency
+  (and (eq_attr "type" "store,store_u")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-store-update-indexed" 5
+  (and (eq_attr "type" "store_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-fpstore" 5
+  (and (eq_attr "type" "fpstore")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-fpstore-update" 5
+  (and (eq_attr "type" "fpstore_u,fpstore_ux")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-vecstore" 5
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-larx" 3
+  (and (eq_attr "type" "load_l")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-stcx" 10
+  (and (eq_attr "type" "store_c")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-sync" 1
+  (and (eq_attr "type" "sync,isync")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,LSU_power8")
+
+
+; FX Unit
+(define_insn_reservation "power8-1cyc" 1
+  (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+                        var_shift_rotate,exts,isel")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 2 "power8-1cyc"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+;		 "power8-load,power8-load-update,power8-load-ext,\
+;		  power8-load-ext-update,power8-fpload,power8-fpload-update,\
+;		  power8-store,power8-store-update,power8-store-update-indexed,\
+;		  power8-fpstore,power8-fpstore-update,power8-vecstore,\
+;		  power8-larx,power8-stcx")
+
+(define_insn_reservation "power8-2cyc" 2
+  (and (eq_attr "type" "cntlz,popcnt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-two" 2
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-three" 3
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8")
+
+; cmp - Normal compare insns
+(define_insn_reservation "power8-cmp" 2
+  (and (eq_attr "type" "cmp")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; fast_compare : add./and./nor./etc
+(define_insn_reservation "power8-fast-compare" 2
+  (and (eq_attr "type" "fast_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+; compare : rldicl./exts./etc
+; delayed_compare : rlwinm./slwi./etc
+; var_delayed_compare : rlwnm./slw./etc
+(define_insn_reservation "power8-compare" 2
+  (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,FXU_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 3 "power8-fast-compare,power8-compare"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+
+; 5 cycle CR latency 
+(define_bypass 5 "power8-fast-compare,power8-compare"
+		 "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+(define_insn_reservation "power8-mul" 4
+  (and (eq_attr "type" "imul,imul2,imul3,lmul")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-mul-compare" 4
+  (and (eq_attr "type" "imul_compare,lmul_compare")
+       (eq_attr "cpu" "power8"))
+  "DU_cracked_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 5 "power8-mul,power8-mul-compare"
+		 "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+		  power8-vecstore,power8-larx,power8-stcx")
+
+; 7 cycle CR latency 
+(define_bypass 7 "power8-mul,power8-mul-compare"
+		 "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+; FXU divides are not pipelined
+(define_insn_reservation "power8-idiv" 37
+  (and (eq_attr "type" "idiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,fxu0_power8*37|fxu1_power8*37")
+
+(define_insn_reservation "power8-ldiv" 68
+  (and (eq_attr "type" "ldiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,fxu0_power8*68|fxu1_power8*68")
+
+(define_insn_reservation "power8-mtjmpr" 5
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,FXU_power8")
+
+; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode
+(define_insn_reservation "power8-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,FXU_power8")
+
+
+; CR Unit
+(define_insn_reservation "power8-mfjmpr" 5
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8+FXU_power8")
+
+(define_insn_reservation "power8-crlogical" 3
+  (and (eq_attr "type" "cr_logical,delayed_cr")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcr" 5
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power8"))
+  "DU_both_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcrf" 3
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power8"))
+  "DU_first_power8,cru_power8")
+
+
+; BR Unit
+; Branches take dispatch slot 7, but reserve any remaining prior slots to
+; prevent other insns from grabbing them once this is assigned.
+(define_insn_reservation "power8-branch" 3
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power8"))
+  "(du6_power8\
+   |du5_power8+du6_power8\
+   |du4_power8+du5_power8+du6_power8\
+   |du3_power8+du4_power8+du5_power8+du6_power8\
+   |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+   |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+   |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\
+    du6_power8),bpu_power8")
+
+; Branch updating LR/CTR feeding mf[lr|ctr]
+(define_bypass 4 "power8-branch" "power8-mfjmpr")
+
+
+; VS Unit (includes FP/VSX/VMX/DFP/Crypto)
+(define_insn_reservation "power8-fp" 6
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+; Additional 3 cycles for any CR result
+(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch")
+
+(define_insn_reservation "power8-fpcompare" 8
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sdiv" 27
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-ddiv" 33
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sqrt" 32
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-dsqrt" 44
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecsimple" 2
+  (and (eq_attr "type" "vecperm,vecsimple,veccmp")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecnormal" 6
+  (and (eq_attr "type" "vecfloat,vecdouble")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_bypass 7 "power8-vecnormal"
+		 "power8-vecsimple,power8-veccomplex,power8-fpstore*,\
+		  power8-vecstore")
+
+(define_insn_reservation "power8-veccomplex" 7
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecfdiv" 25
+  (and (eq_attr "type" "vecfdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecdiv" 31
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mffgpr" 5
+  (and (eq_attr "type" "mffgpr")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mftgpr" 6
+  (and (eq_attr "type" "mftgpr")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-crypto" 7
+  (and (eq_attr "type" "crypto")
+       (eq_attr "cpu" "power8"))
+  "DU_any_power8,VSU_power8")
+
diff --git a/gcc-4.8/gcc/config/rs6000/ppc-asm.h b/gcc-4.8/gcc/config/rs6000/ppc-asm.h
index db490b6c9..8108efd07 100644
--- a/gcc-4.8/gcc/config/rs6000/ppc-asm.h
+++ b/gcc-4.8/gcc/config/rs6000/ppc-asm.h
@@ -256,7 +256,30 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  * the real function with one or two leading periods respectively.
  */
 
-#if defined (__powerpc64__)
+#if defined(__powerpc64__) && _CALL_ELF == 2
+
+/* Defining "toc" above breaks @toc in assembler code.  */
+#undef toc
+
+#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#define JUMP_TARGET(name) FUNC_NAME(name)
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name): \
+0:	addis 2,12,(.TOC.-0b)@ha; \
+	addi 2,2,(.TOC.-0b)@l; \
+	.localentry FUNC_NAME(name),.-FUNC_NAME(name)
+
+#define HIDDEN_FUNC(name) \
+  FUNC_START(name) \
+  .hidden FUNC_NAME(name);
+
+#define FUNC_END(name) \
+	.size FUNC_NAME(name),.-FUNC_NAME(name)
+
+#elif defined (__powerpc64__)
+
 #define FUNC_NAME(name) GLUE(.,name)
 #define JUMP_TARGET(name) FUNC_NAME(name)
 #define FUNC_START(name) \
diff --git a/gcc-4.8/gcc/config/rs6000/predicates.md b/gcc-4.8/gcc/config/rs6000/predicates.md
index 07944db10..0659e066e 100644
--- a/gcc-4.8/gcc/config/rs6000/predicates.md
+++ b/gcc-4.8/gcc/config/rs6000/predicates.md
@@ -124,6 +124,11 @@
   (and (match_code "const_int")
        (match_test "INTVAL (op) >= -16 && INTVAL (op) <= 15")))
 
+;; Return 1 if op is a unsigned 3-bit constant integer.
+(define_predicate "u3bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 7")))
+
 ;; Return 1 if op is a unsigned 5-bit constant integer.
 (define_predicate "u5bit_cint_operand"
   (and (match_code "const_int")
@@ -135,6 +140,11 @@
   (and (match_code "const_int")
        (match_test "INTVAL (op) >= -128 && INTVAL (op) <= 127")))
 
+;; Return 1 if op is a unsigned 10-bit constant integer.
+(define_predicate "u10bit_cint_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1023")))
+
 ;; Return 1 if op is a constant integer that can fit in a D field.
 (define_predicate "short_cint_operand"
   (and (match_code "const_int")
@@ -161,11 +171,21 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
 
+;; Match op = 0..3.
+(define_predicate "const_0_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
+
 ;; Match op = 2 or op = 3.
 (define_predicate "const_2_to_3_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
 
+;; Match op = 0..15
+(define_predicate "const_0_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
 ;; Return 1 if op is a register that is not special.
 (define_predicate "gpc_reg_operand"
   (match_operand 0 "register_operand")
@@ -182,9 +202,95 @@
   if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
     return 1;
 
+  if (TARGET_VSX && VSX_REGNO_P (REGNO (op)))
+    return 1;
+
   return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is a general purpose register.  Unlike gpc_reg_operand, don't
+;; allow floating point or vector registers.
+(define_predicate "int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
+    return 1;
+
+  return INT_REGNO_P (REGNO (op));
+})
+
+;; Like int_reg_operand, but only return true for base registers
+(define_predicate "base_reg_operand"
+  (match_operand 0 "int_reg_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  return (REGNO (op) != FIRST_GPR_REGNO);
+})
+
+;; Return 1 if op is a HTM specific SPR register.
+(define_predicate "htm_spr_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if (!TARGET_HTM)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  switch (REGNO (op))
+    {
+      case TFHAR_REGNO:
+      case TFIAR_REGNO:
+      case TEXASR_REGNO:
+	return 1;
+      default:
+	break;
+    }
+  
+  /* Unknown SPR.  */
+  return 0;
+})
+
+;; Return 1 if op is a general purpose register that is an even register
+;; which suitable for a load/store quad operation
+(define_predicate "quad_int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  HOST_WIDE_INT r;
+
+  if (!TARGET_QUAD_MEMORY && !TARGET_QUAD_MEMORY_ATOMIC)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  r = REGNO (op);
+  if (r >= FIRST_PSEUDO_REGISTER)
+    return 1;
+
+  return (INT_REGNO_P (r) && ((r & 1) == 0));
+})
+
 ;; Return 1 if op is a register that is a condition register field.
 (define_predicate "cc_reg_operand"
   (match_operand 0 "register_operand")
@@ -315,6 +421,11 @@
 		   && CONST_DOUBLE_HIGH (op) == 0")
       (match_operand 0 "gpc_reg_operand"))))
 
+;; Like reg_or_logical_cint_operand, but allow vsx registers
+(define_predicate "vsx_reg_or_cint_operand"
+  (ior (match_operand 0 "vsx_register_operand")
+       (match_operand 0 "reg_or_logical_cint_operand")))
+
 ;; Return 1 if operand is a CONST_DOUBLE that can be set in a register
 ;; with no more than one instruction per word.
 (define_predicate "easy_fp_constant"
@@ -333,6 +444,11 @@
       && mode != DImode)
     return 1;
 
+  /* The constant 0.0 is easy under VSX.  */
+  if ((mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode)
+      && VECTOR_UNIT_VSX_P (DFmode) && op == CONST0_RTX (mode))
+    return 1;
+
   if (DECIMAL_FLOAT_MODE_P (mode))
     return 0;
 
@@ -521,6 +637,55 @@
   (and (match_operand 0 "memory_operand")
        (match_test "offsettable_nonstrict_memref_p (op)")))
 
+;; Return 1 if the operand is suitable for load/store quad memory.
+;; This predicate only checks for non-atomic loads/stores (not lqarx/stqcx).
+(define_predicate "quad_memory_operand"
+  (match_code "mem")
+{
+  rtx addr, op0, op1;
+  int ret;
+
+  if (!TARGET_QUAD_MEMORY && !TARGET_SYNC_TI)
+    ret = 0;
+
+  else if (!memory_operand (op, mode))
+    ret = 0;
+
+  else if (GET_MODE_SIZE (GET_MODE (op)) != 16)
+    ret = 0;
+
+  else if (MEM_ALIGN (op) < 128)
+    ret = 0;
+
+  else
+    {
+      addr = XEXP (op, 0);
+      if (int_reg_operand (addr, Pmode))
+	ret = 1;
+
+      else if (GET_CODE (addr) != PLUS)
+	ret = 0;
+
+      else
+	{
+	  op0 = XEXP (addr, 0);
+	  op1 = XEXP (addr, 1);
+	  ret = (int_reg_operand (op0, Pmode)
+		 && GET_CODE (op1) == CONST_INT
+		 && IN_RANGE (INTVAL (op1), -32768, 32767)
+		 && (INTVAL (op1) & 15) == 0);
+	}
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false");
+      debug_rtx (op);
+    }
+
+  return ret;
+})
+
 ;; Return 1 if the operand is an indexed or indirect memory operand.
 (define_predicate "indexed_or_indirect_operand"
   (match_code "mem")
@@ -535,6 +700,19 @@
   return indexed_or_indirect_address (op, mode);
 })
 
+;; Like indexed_or_indirect_operand, but also allow a GPR register if direct
+;; moves are supported.
+(define_predicate "reg_or_indexed_operand"
+  (match_code "mem,reg")
+{
+  if (MEM_P (op))
+    return indexed_or_indirect_operand (op, mode);
+  else if (TARGET_DIRECT_MOVE)
+    return register_operand (op, mode);
+  return
+    0;
+})
+
 ;; Return 1 if the operand is an indexed or indirect memory operand with an
 ;; AND -16 in it, used to recognize when we need to switch to Altivec loads
 ;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
@@ -560,6 +738,28 @@
 			&& REG_P (XEXP (op, 1)))")
        (match_operand 0 "address_operand")))
 
+;; Return 1 if the operand is an index-form address.
+(define_special_predicate "indexed_address"
+  (match_test "(GET_CODE (op) == PLUS
+		&& REG_P (XEXP (op, 0))
+		&& REG_P (XEXP (op, 1)))"))
+
+;; Return 1 if the operand is a MEM with an update-form address. This may
+;; also include update-indexed form.
+(define_special_predicate "update_address_mem"
+  (match_test "(MEM_P (op)
+		&& (GET_CODE (XEXP (op, 0)) == PRE_INC
+		    || GET_CODE (XEXP (op, 0)) == PRE_DEC
+		    || GET_CODE (XEXP (op, 0)) == PRE_MODIFY))"))
+
+;; Return 1 if the operand is a MEM with an update-indexed-form address. Note
+;; that PRE_INC/PRE_DEC will always be non-indexed (i.e. non X-form) since the
+;; increment is based on the mode size and will therefor always be a const.
+(define_special_predicate "update_indexed_address_mem"
+  (match_test "(MEM_P (op)
+		&& GET_CODE (XEXP (op, 0)) == PRE_MODIFY
+		&& indexed_address (XEXP (XEXP (op, 0), 1), mode))"))
+
 ;; Used for the destination of the fix_truncdfsi2 expander.
 ;; If stfiwx will be used, the result goes to memory; otherwise,
 ;; we're going to emit a store and a load of a subreg, so the dest is a
@@ -883,7 +1083,8 @@
   (and (match_code "symbol_ref")
        (match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))
 		    && ((SYMBOL_REF_LOCAL_P (op)
-			 && (DEFAULT_ABI != ABI_AIX
+			 && ((DEFAULT_ABI != ABI_AIX
+			      && DEFAULT_ABI != ABI_ELFv2)
 			     || !SYMBOL_REF_EXTERNAL_P (op)))
 		        || (op == XEXP (DECL_RTL (current_function_decl),
 						  0)))")))
@@ -1364,6 +1565,26 @@
   return 1;
 })
 
+;; Return 1 if OP is valid for crsave insn, known to be a PARALLEL.
+(define_predicate "crsave_operation"
+  (match_code "parallel")
+{
+  int count = XVECLEN (op, 0);
+  int i;
+
+  for (i = 1; i < count; i++)
+    {
+      rtx exp = XVECEXP (op, 0, i);
+
+      if (GET_CODE (exp) != USE
+	  || GET_CODE (XEXP (exp, 0)) != REG
+	  || GET_MODE (XEXP (exp, 0)) != CCmode
+	  || ! CR_REGNO_P (REGNO (XEXP (exp, 0))))
+	return 0;
+    }
+  return 1;
+})
+
 ;; Return 1 if OP is valid for lmw insn, known to be a PARALLEL.
 (define_predicate "lmw_operation"
   (match_code "parallel")
@@ -1534,3 +1755,99 @@
 
   return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL;
 })
+
+;; Match the first insn (addis) in fusing the combination of addis and loads to
+;; GPR registers on power8.
+(define_predicate "fusion_gpr_addis"
+  (match_code "const_int,high,plus")
+{
+  HOST_WIDE_INT value;
+  rtx int_const;
+
+  if (GET_CODE (op) == HIGH)
+    return 1;
+
+  if (CONST_INT_P (op))
+    int_const = op;
+
+  else if (GET_CODE (op) == PLUS
+	   && base_reg_operand (XEXP (op, 0), Pmode)
+	   && CONST_INT_P (XEXP (op, 1)))
+    int_const = XEXP (op, 1);
+
+  else
+    return 0;
+
+  /* Power8 currently will only do the fusion if the top 11 bits of the addis
+     value are all 1's or 0's.  */
+  value = INTVAL (int_const);
+  if ((value & (HOST_WIDE_INT)0xffff) != 0)
+    return 0;
+
+  if ((value & (HOST_WIDE_INT)0xffff0000) == 0)
+    return 0;
+
+  return (IN_RANGE (value >> 16, -32, 31));
+})
+
+;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis
+;; and loads to GPR registers on power8.
+(define_predicate "fusion_gpr_mem_load"
+  (match_code "mem,sign_extend,zero_extend")
+{
+  rtx addr;
+
+  /* Handle sign/zero extend.  */
+  if (GET_CODE (op) == ZERO_EXTEND
+      || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND))
+    {
+      op = XEXP (op, 0);
+      mode = GET_MODE (op);
+    }
+
+  if (!MEM_P (op))
+    return 0;
+
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+      break;
+
+    case DImode:
+      if (!TARGET_POWERPC64)
+	return 0;
+      break;
+
+    default:
+      return 0;
+    }
+
+  addr = XEXP (op, 0);
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx base = XEXP (addr, 0);
+      rtx offset = XEXP (addr, 1);
+
+      return (base_reg_operand (base, GET_MODE (base))
+	      && satisfies_constraint_I (offset));
+    }
+
+  else if (GET_CODE (addr) == LO_SUM)
+    {
+      rtx base = XEXP (addr, 0);
+      rtx offset = XEXP (addr, 1);
+
+      if (!base_reg_operand (base, GET_MODE (base)))
+	return 0;
+
+      else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
+	return small_toc_ref (offset, GET_MODE (offset));
+
+      else if (TARGET_ELF && !TARGET_POWERPC64)
+	return CONSTANT_P (offset);
+    }
+
+  return 0;
+})
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000-builtin.def b/gcc-4.8/gcc/config/rs6000/rs6000-builtin.def
index a545fe3e4..45446290b 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc-4.8/gcc/config/rs6000/rs6000-builtin.def
@@ -30,7 +30,8 @@
    RS6000_BUILTIN_A -- ABS builtins
    RS6000_BUILTIN_D -- DST builtins
    RS6000_BUILTIN_E -- SPE EVSEL builtins.
-   RS6000_BUILTIN_P -- Altivec and VSX predicate builtins
+   RS6000_BUILTIN_H -- HTM builtins
+   RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins
    RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins
    RS6000_BUILTIN_S -- SPE predicate builtins
    RS6000_BUILTIN_X -- special builtins
@@ -66,6 +67,10 @@
   #error "RS6000_BUILTIN_E is not defined."
 #endif
 
+#ifndef RS6000_BUILTIN_H
+  #error "RS6000_BUILTIN_H is not defined."
+#endif
+
 #ifndef RS6000_BUILTIN_P
   #error "RS6000_BUILTIN_P is not defined."
 #endif
@@ -301,6 +306,174 @@
 		     | RS6000_BTC_SPECIAL),				\
 		    CODE_FOR_nothing)			/* ICODE */
 
+/* ISA 2.07 (power8) vector convenience macros.  */
+/* For the instructions that are encoded as altivec instructions use
+   __builtin_altivec_ as the builtin name.  */
+#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_PREDICATE),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+/* For the instructions encoded as VSX instructions use __builtin_vsx as the
+   builtin name.  */
+#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vsx_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_1(ENUM, NAME)					\
+  RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_2(ENUM, NAME)					\
+  RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_P8V_OVERLOAD_3(ENUM, NAME)					\
+  RS6000_BUILTIN_3 (P8V_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* Crypto convenience macros.  */
+#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME)				\
+  RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME)				\
+  RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME)				\
+  RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_crypto_" NAME,		/* NAME */	\
+		    RS6000_BTM_CRYPTO,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+/* HTM convenience macros.  */
+#define BU_HTM_0(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    RS6000_BTC_ ## ATTR,		/* ATTR */	\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_SPR0(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_SPR),					\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_HTM_SPR1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HTM,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY					\
+		     | RS6000_BTC_SPR					\
+		     | RS6000_BTC_VOID),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
 /* SPE convenience macros.  */
 #define BU_SPE_1(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM,		/* ENUM */	\
@@ -397,6 +570,75 @@
 		    MASK,				/* MASK */	\
 		    (ATTR | RS6000_BTC_SPECIAL),	/* ATTR */	\
 		    CODE_FOR_nothing)			/* ICODE */
+
+
+/* Decimal floating point builtins for instructions.  */
+#define BU_DFP_MISC_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_DFP,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_DFP_MISC_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_DFP,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+
+/* Miscellaneous builtins for instructions added in ISA 2.06.  These
+   instructions don't require either the DFP or VSX options, just the basic ISA
+   2.06 (popcntd) enablement since they operate on general purpose
+   registers.  */
+#define BU_P7_MISC_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_1 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_POPCNTD,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_P7_MISC_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_POPCNTD,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+
+/* Miscellaneous builtins for instructions added in ISA 2.07.  These
+   instructions do require the ISA 2.07 vector support, but they aren't vector
+   instructions.  */
+#define BU_P8V_MISC_3(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_3 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_P8_VECTOR,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_TERNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+/* Miscellaneous builtins.  */
+#define BU_MISC_1(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HARD_FLOAT,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_UNARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
+#define BU_MISC_2(ENUM, NAME, ATTR, ICODE)				\
+  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_" NAME,			/* NAME */	\
+		    RS6000_BTM_HARD_FLOAT,		/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_BINARY),				\
+		    CODE_FOR_ ## ICODE)			/* ICODE */
+
 #endif
 
 /* Insure 0 is not a legitimate index.  */
@@ -414,12 +656,14 @@ BU_ALTIVEC_3 (VMSUMSHM,       "vmsumshm",       CONST, 	altivec_vmsumshm)
 BU_ALTIVEC_3 (VMSUMUHS,       "vmsumuhs",       SAT,   	altivec_vmsumuhs)
 BU_ALTIVEC_3 (VMSUMSHS,       "vmsumshs",       SAT,   	altivec_vmsumshs)
 BU_ALTIVEC_3 (VNMSUBFP,       "vnmsubfp",       FP,    	nfmsv4sf4)
+BU_ALTIVEC_3 (VPERM_1TI,      "vperm_1ti",      CONST, 	altivec_vperm_v1ti)
 BU_ALTIVEC_3 (VPERM_2DF,      "vperm_2df",      CONST, 	altivec_vperm_v2df)
 BU_ALTIVEC_3 (VPERM_2DI,      "vperm_2di",      CONST, 	altivec_vperm_v2di)
 BU_ALTIVEC_3 (VPERM_4SF,      "vperm_4sf",      CONST, 	altivec_vperm_v4sf)
 BU_ALTIVEC_3 (VPERM_4SI,      "vperm_4si",      CONST, 	altivec_vperm_v4si)
 BU_ALTIVEC_3 (VPERM_8HI,      "vperm_8hi",      CONST, 	altivec_vperm_v8hi)
 BU_ALTIVEC_3 (VPERM_16QI,     "vperm_16qi",     CONST, 	altivec_vperm_v16qi_uns)
+BU_ALTIVEC_3 (VPERM_1TI_UNS,  "vperm_1ti_uns",  CONST, 	altivec_vperm_v1ti_uns)
 BU_ALTIVEC_3 (VPERM_2DI_UNS,  "vperm_2di_uns",  CONST, 	altivec_vperm_v2di_uns)
 BU_ALTIVEC_3 (VPERM_4SI_UNS,  "vperm_4si_uns",  CONST, 	altivec_vperm_v4si_uns)
 BU_ALTIVEC_3 (VPERM_8HI_UNS,  "vperm_8hi_uns",  CONST, 	altivec_vperm_v8hi_uns)
@@ -430,10 +674,12 @@ BU_ALTIVEC_3 (VSEL_8HI,       "vsel_8hi",       CONST, 	vector_select_v8hi)
 BU_ALTIVEC_3 (VSEL_16QI,      "vsel_16qi",      CONST, 	vector_select_v16qi)
 BU_ALTIVEC_3 (VSEL_2DF,       "vsel_2df",       CONST, 	vector_select_v2df)
 BU_ALTIVEC_3 (VSEL_2DI,       "vsel_2di",       CONST, 	vector_select_v2di)
+BU_ALTIVEC_3 (VSEL_1TI,       "vsel_1ti",       CONST, 	vector_select_v1ti)
 BU_ALTIVEC_3 (VSEL_4SI_UNS,   "vsel_4si_uns",   CONST, 	vector_select_v4si_uns)
 BU_ALTIVEC_3 (VSEL_8HI_UNS,   "vsel_8hi_uns",   CONST, 	vector_select_v8hi_uns)
 BU_ALTIVEC_3 (VSEL_16QI_UNS,  "vsel_16qi_uns",  CONST, 	vector_select_v16qi_uns)
 BU_ALTIVEC_3 (VSEL_2DI_UNS,   "vsel_2di_uns",   CONST, 	vector_select_v2di_uns)
+BU_ALTIVEC_3 (VSEL_1TI_UNS,   "vsel_1ti_uns",   CONST, 	vector_select_v1ti_uns)
 BU_ALTIVEC_3 (VSLDOI_16QI,    "vsldoi_16qi",    CONST, 	altivec_vsldoi_v16qi)
 BU_ALTIVEC_3 (VSLDOI_8HI,     "vsldoi_8hi",     CONST, 	altivec_vsldoi_v8hi)
 BU_ALTIVEC_3 (VSLDOI_4SI,     "vsldoi_4si",     CONST, 	altivec_vsldoi_v4si)
@@ -626,6 +872,8 @@ BU_ALTIVEC_X (ST_INTERNAL_2df,	"st_internal_4sf",  MEM)
 BU_ALTIVEC_X (LD_INTERNAL_2df,	"ld_internal_2df",  MEM)
 BU_ALTIVEC_X (ST_INTERNAL_2di,	"st_internal_2di",  MEM)
 BU_ALTIVEC_X (LD_INTERNAL_2di,	"ld_internal_2di",  MEM)
+BU_ALTIVEC_X (ST_INTERNAL_1ti,	"st_internal_1ti",  MEM)
+BU_ALTIVEC_X (LD_INTERNAL_1ti,	"ld_internal_1ti",  MEM)
 BU_ALTIVEC_X (MTVSCR,		"mtvscr",	    MISC)
 BU_ALTIVEC_X (MFVSCR,		"mfvscr",	    MISC)
 BU_ALTIVEC_X (DSSALL,		"dssall",	    MISC)
@@ -636,8 +884,26 @@ BU_ALTIVEC_X (LVEBX,		"lvebx",	    MEM)
 BU_ALTIVEC_X (LVEHX,		"lvehx",	    MEM)
 BU_ALTIVEC_X (LVEWX,		"lvewx",	    MEM)
 BU_ALTIVEC_X (LVXL,		"lvxl",		    MEM)
+BU_ALTIVEC_X (LVXL_V2DF,	"lvxl_v2df",	    MEM)
+BU_ALTIVEC_X (LVXL_V2DI,	"lvxl_v2di",	    MEM)
+BU_ALTIVEC_X (LVXL_V4SF,	"lvxl_v4sf",	    MEM)
+BU_ALTIVEC_X (LVXL_V4SI,	"lvxl_v4si",	    MEM)
+BU_ALTIVEC_X (LVXL_V8HI,	"lvxl_v8hi",	    MEM)
+BU_ALTIVEC_X (LVXL_V16QI,	"lvxl_v16qi",	    MEM)
 BU_ALTIVEC_X (LVX,		"lvx",		    MEM)
+BU_ALTIVEC_X (LVX_V2DF,		"lvx_v2df",	    MEM)
+BU_ALTIVEC_X (LVX_V2DI,		"lvx_v2di",	    MEM)
+BU_ALTIVEC_X (LVX_V4SF,		"lvx_v4sf",	    MEM)
+BU_ALTIVEC_X (LVX_V4SI,		"lvx_v4si",	    MEM)
+BU_ALTIVEC_X (LVX_V8HI,		"lvx_v8hi",	    MEM)
+BU_ALTIVEC_X (LVX_V16QI,	"lvx_v16qi",	    MEM)
 BU_ALTIVEC_X (STVX,		"stvx",		    MEM)
+BU_ALTIVEC_X (STVX_V2DF,	"stvx_v2df",	    MEM)
+BU_ALTIVEC_X (STVX_V2DI,	"stvx_v2di",	    MEM)
+BU_ALTIVEC_X (STVX_V4SF,	"stvx_v4sf",	    MEM)
+BU_ALTIVEC_X (STVX_V4SI,	"stvx_v4si",	    MEM)
+BU_ALTIVEC_X (STVX_V8HI,	"stvx_v8hi",	    MEM)
+BU_ALTIVEC_X (STVX_V16QI,	"stvx_v16qi",	    MEM)
 BU_ALTIVEC_C (LVLX,		"lvlx",		    MEM)
 BU_ALTIVEC_C (LVLXL,		"lvlxl",	    MEM)
 BU_ALTIVEC_C (LVRX,		"lvrx",		    MEM)
@@ -646,6 +912,12 @@ BU_ALTIVEC_X (STVEBX,		"stvebx",	    MEM)
 BU_ALTIVEC_X (STVEHX,		"stvehx",	    MEM)
 BU_ALTIVEC_X (STVEWX,		"stvewx",	    MEM)
 BU_ALTIVEC_X (STVXL,		"stvxl",	    MEM)
+BU_ALTIVEC_X (STVXL_V2DF,	"stvxl_v2df",	    MEM)
+BU_ALTIVEC_X (STVXL_V2DI,	"stvxl_v2di",	    MEM)
+BU_ALTIVEC_X (STVXL_V4SF,	"stvxl_v4sf",	    MEM)
+BU_ALTIVEC_X (STVXL_V4SI,	"stvxl_v4si",	    MEM)
+BU_ALTIVEC_X (STVXL_V8HI,	"stvxl_v8hi",	    MEM)
+BU_ALTIVEC_X (STVXL_V16QI,	"stvxl_v16qi",	    MEM)
 BU_ALTIVEC_C (STVLX,		"stvlx",	    MEM)
 BU_ALTIVEC_C (STVLXL,		"stvlxl",	    MEM)
 BU_ALTIVEC_C (STVRX,		"stvrx",	    MEM)
@@ -904,34 +1176,40 @@ BU_VSX_3 (XVMSUBDP,           "xvmsubdp",       CONST, 	fmsv2df4)
 BU_VSX_3 (XVNMADDDP,          "xvnmadddp",      CONST, 	nfmav2df4)
 BU_VSX_3 (XVNMSUBDP,          "xvnmsubdp",      CONST, 	nfmsv2df4)
 
+BU_VSX_3 (XXSEL_1TI,          "xxsel_1ti",      CONST, 	vector_select_v1ti)
 BU_VSX_3 (XXSEL_2DI,          "xxsel_2di",      CONST, 	vector_select_v2di)
 BU_VSX_3 (XXSEL_2DF,          "xxsel_2df",      CONST, 	vector_select_v2df)
 BU_VSX_3 (XXSEL_4SF,          "xxsel_4sf",      CONST, 	vector_select_v4sf)
 BU_VSX_3 (XXSEL_4SI,          "xxsel_4si",      CONST, 	vector_select_v4si)
 BU_VSX_3 (XXSEL_8HI,          "xxsel_8hi",      CONST, 	vector_select_v8hi)
 BU_VSX_3 (XXSEL_16QI,         "xxsel_16qi",     CONST, 	vector_select_v16qi)
+BU_VSX_3 (XXSEL_1TI_UNS,      "xxsel_1ti_uns",  CONST, 	vector_select_v1ti_uns)
 BU_VSX_3 (XXSEL_2DI_UNS,      "xxsel_2di_uns",  CONST, 	vector_select_v2di_uns)
 BU_VSX_3 (XXSEL_4SI_UNS,      "xxsel_4si_uns",  CONST, 	vector_select_v4si_uns)
 BU_VSX_3 (XXSEL_8HI_UNS,      "xxsel_8hi_uns",  CONST, 	vector_select_v8hi_uns)
 BU_VSX_3 (XXSEL_16QI_UNS,     "xxsel_16qi_uns", CONST, 	vector_select_v16qi_uns)
 
+BU_VSX_3 (VPERM_1TI,          "vperm_1ti",      CONST, 	altivec_vperm_v1ti)
 BU_VSX_3 (VPERM_2DI,          "vperm_2di",      CONST, 	altivec_vperm_v2di)
 BU_VSX_3 (VPERM_2DF,          "vperm_2df",      CONST, 	altivec_vperm_v2df)
 BU_VSX_3 (VPERM_4SF,          "vperm_4sf",      CONST, 	altivec_vperm_v4sf)
 BU_VSX_3 (VPERM_4SI,          "vperm_4si",      CONST, 	altivec_vperm_v4si)
 BU_VSX_3 (VPERM_8HI,          "vperm_8hi",      CONST, 	altivec_vperm_v8hi)
 BU_VSX_3 (VPERM_16QI,         "vperm_16qi",     CONST, 	altivec_vperm_v16qi)
+BU_VSX_3 (VPERM_1TI_UNS,      "vperm_1ti_uns",  CONST, 	altivec_vperm_v1ti_uns)
 BU_VSX_3 (VPERM_2DI_UNS,      "vperm_2di_uns",  CONST, 	altivec_vperm_v2di_uns)
 BU_VSX_3 (VPERM_4SI_UNS,      "vperm_4si_uns",  CONST, 	altivec_vperm_v4si_uns)
 BU_VSX_3 (VPERM_8HI_UNS,      "vperm_8hi_uns",  CONST, 	altivec_vperm_v8hi_uns)
 BU_VSX_3 (VPERM_16QI_UNS,     "vperm_16qi_uns", CONST, 	altivec_vperm_v16qi_uns)
 
+BU_VSX_3 (XXPERMDI_1TI,       "xxpermdi_1ti",   CONST, 	vsx_xxpermdi_v1ti)
 BU_VSX_3 (XXPERMDI_2DF,       "xxpermdi_2df",   CONST, 	vsx_xxpermdi_v2df)
 BU_VSX_3 (XXPERMDI_2DI,       "xxpermdi_2di",   CONST, 	vsx_xxpermdi_v2di)
 BU_VSX_3 (XXPERMDI_4SF,       "xxpermdi_4sf",   CONST, 	vsx_xxpermdi_v4sf)
 BU_VSX_3 (XXPERMDI_4SI,       "xxpermdi_4si",   CONST, 	vsx_xxpermdi_v4si)
 BU_VSX_3 (XXPERMDI_8HI,       "xxpermdi_8hi",   CONST, 	vsx_xxpermdi_v8hi)
 BU_VSX_3 (XXPERMDI_16QI,      "xxpermdi_16qi",  CONST, 	vsx_xxpermdi_v16qi)
+BU_VSX_3 (SET_1TI,            "set_1ti",        CONST, 	vsx_set_v1ti)
 BU_VSX_3 (SET_2DF,            "set_2df",        CONST, 	vsx_set_v2df)
 BU_VSX_3 (SET_2DI,            "set_2di",        CONST, 	vsx_set_v2di)
 BU_VSX_3 (XXSLDWI_2DI,        "xxsldwi_2di",    CONST, 	vsx_xxsldwi_v2di)
@@ -1012,7 +1290,7 @@ BU_VSX_1 (XVTSQRTSP_FG,	      "xvtsqrtsp_fg",	CONST,	vsx_tsqrtv4sf2_fg)
 BU_VSX_1 (XVRESP,	      "xvresp",		CONST,	vsx_frev4sf2)
 
 BU_VSX_1 (XSCVDPSP,	      "xscvdpsp",	CONST,	vsx_xscvdpsp)
-BU_VSX_1 (XSCVSPDP,	      "xscvspdp",	CONST,	vsx_xscvdpsp)
+BU_VSX_1 (XSCVSPDP,	      "xscvspdp",	CONST,	vsx_xscvspdp)
 BU_VSX_1 (XVCVDPSP,	      "xvcvdpsp",	CONST,	vsx_xvcvdpsp)
 BU_VSX_1 (XVCVSPDP,	      "xvcvspdp",	CONST,	vsx_xvcvspdp)
 BU_VSX_1 (XSTSQRTDP_FE,	      "xstsqrtdp_fe",	CONST,	vsx_tsqrtdf2_fe)
@@ -1052,9 +1330,9 @@ BU_VSX_1 (XVRSPIZ,	      "xvrspiz",	CONST,	vsx_btruncv4sf2)
 
 BU_VSX_1 (XSRDPI,	      "xsrdpi",		CONST,	vsx_xsrdpi)
 BU_VSX_1 (XSRDPIC,	      "xsrdpic",	CONST,	vsx_xsrdpic)
-BU_VSX_1 (XSRDPIM,	      "xsrdpim",	CONST,	vsx_floordf2)
-BU_VSX_1 (XSRDPIP,	      "xsrdpip",	CONST,	vsx_ceildf2)
-BU_VSX_1 (XSRDPIZ,	      "xsrdpiz",	CONST,	vsx_btruncdf2)
+BU_VSX_1 (XSRDPIM,	      "xsrdpim",	CONST,	floordf2)
+BU_VSX_1 (XSRDPIP,	      "xsrdpip",	CONST,	ceildf2)
+BU_VSX_1 (XSRDPIZ,	      "xsrdpiz",	CONST,	btruncdf2)
 
 /* VSX predicate functions.  */
 BU_VSX_P (XVCMPEQSP_P,	      "xvcmpeqsp_p",	CONST,	vector_eq_v4sf_p)
@@ -1066,6 +1344,7 @@ BU_VSX_P (XVCMPGTDP_P,	      "xvcmpgtdp_p",	CONST,	vector_gt_v2df_p)
 
 /* VSX builtins that are handled as special cases.  */
 BU_VSX_X (LXSDX,	      "lxsdx",		MEM)
+BU_VSX_X (LXVD2X_V1TI,	      "lxvd2x_v1ti",	MEM)
 BU_VSX_X (LXVD2X_V2DF,	      "lxvd2x_v2df",	MEM)
 BU_VSX_X (LXVD2X_V2DI,	      "lxvd2x_v2di",	MEM)
 BU_VSX_X (LXVDSX,	      "lxvdsx",		MEM)
@@ -1074,6 +1353,7 @@ BU_VSX_X (LXVW4X_V4SI,        "lxvw4x_v4si",	MEM)
 BU_VSX_X (LXVW4X_V8HI,        "lxvw4x_v8hi",	MEM)
 BU_VSX_X (LXVW4X_V16QI,	      "lxvw4x_v16qi",	MEM)
 BU_VSX_X (STXSDX,	      "stxsdx",		MEM)
+BU_VSX_X (STXVD2X_V1TI,	      "stxsdx_v1ti",	MEM)
 BU_VSX_X (STXVD2X_V2DF,	      "stxsdx_v2df",	MEM)
 BU_VSX_X (STXVD2X_V2DI,	      "stxsdx_v2di",	MEM)
 BU_VSX_X (STXVW4X_V4SF,	      "stxsdx_v4sf",	MEM)
@@ -1104,10 +1384,13 @@ BU_VSX_X (XSNMADDMDP,	      "xsnmaddmdp",	FP)
 BU_VSX_X (XSNMSUBADP,	      "xsnmsubadp",	FP)
 BU_VSX_X (XSNMSUBMDP,	      "xsnmsubmdp",	FP)
 BU_VSX_X (XSSUBDP,	      "xssubdp",	FP)
+BU_VSX_X (VEC_INIT_V1TI,      "vec_init_v1ti",	CONST)
 BU_VSX_X (VEC_INIT_V2DF,      "vec_init_v2df",	CONST)
 BU_VSX_X (VEC_INIT_V2DI,      "vec_init_v2di",	CONST)
+BU_VSX_X (VEC_SET_V1TI,	      "vec_set_v1ti",	CONST)
 BU_VSX_X (VEC_SET_V2DF,	      "vec_set_v2df",	CONST)
 BU_VSX_X (VEC_SET_V2DI,	      "vec_set_v2di",	CONST)
+BU_VSX_X (VEC_EXT_V1TI,	      "vec_ext_v1ti",	CONST)
 BU_VSX_X (VEC_EXT_V2DF,	      "vec_ext_v2df",	CONST)
 BU_VSX_X (VEC_EXT_V2DI,	      "vec_ext_v2di",	CONST)
 
@@ -1132,6 +1415,249 @@ BU_VSX_OVERLOAD_2 (XXSPLTW,  "xxspltw")
 BU_VSX_OVERLOAD_X (LD,	     "ld")
 BU_VSX_OVERLOAD_X (ST,	     "st")
 
+/* 1 argument VSX instructions added in ISA 2.07.  */
+BU_P8V_VSX_1 (XSCVSPDPN,      "xscvspdpn",	CONST,	vsx_xscvspdpn)
+BU_P8V_VSX_1 (XSCVDPSPN,      "xscvdpspn",	CONST,	vsx_xscvdpspn)
+
+/* 1 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_1 (ABS_V2DI,	      "abs_v2di",	CONST,	absv2di2)
+BU_P8V_AV_1 (VUPKHSW,	      "vupkhsw",	CONST,	altivec_vupkhsw)
+BU_P8V_AV_1 (VUPKLSW,	      "vupklsw",	CONST,	altivec_vupklsw)
+BU_P8V_AV_1 (VCLZB,	      "vclzb",		CONST,  clzv16qi2)
+BU_P8V_AV_1 (VCLZH,	      "vclzh",		CONST,  clzv8hi2)
+BU_P8V_AV_1 (VCLZW,	      "vclzw",		CONST,  clzv4si2)
+BU_P8V_AV_1 (VCLZD,	      "vclzd",		CONST,  clzv2di2)
+BU_P8V_AV_1 (VPOPCNTB,	      "vpopcntb",	CONST,  popcountv16qi2)
+BU_P8V_AV_1 (VPOPCNTH,	      "vpopcnth",	CONST,  popcountv8hi2)
+BU_P8V_AV_1 (VPOPCNTW,	      "vpopcntw",	CONST,  popcountv4si2)
+BU_P8V_AV_1 (VPOPCNTD,	      "vpopcntd",	CONST,  popcountv2di2)
+BU_P8V_AV_1 (VGBBD,	      "vgbbd",		CONST,  p8v_vgbbd)
+
+/* 2 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_2 (VADDCUQ,		"vaddcuq",	CONST,	altivec_vaddcuq)
+BU_P8V_AV_2 (VADDUDM,		"vaddudm",	CONST,	addv2di3)
+BU_P8V_AV_2 (VADDUQM,		"vadduqm",	CONST,	altivec_vadduqm)
+BU_P8V_AV_2 (VMINSD,		"vminsd",	CONST,	sminv2di3)
+BU_P8V_AV_2 (VMAXSD,		"vmaxsd",	CONST,	smaxv2di3)
+BU_P8V_AV_2 (VMINUD,		"vminud",	CONST,	uminv2di3)
+BU_P8V_AV_2 (VMAXUD,		"vmaxud",	CONST,	umaxv2di3)
+BU_P8V_AV_2 (VMRGEW,		"vmrgew",	CONST,	p8_vmrgew)
+BU_P8V_AV_2 (VMRGOW,		"vmrgow",	CONST,	p8_vmrgow)
+BU_P8V_AV_2 (VBPERMQ,		"vbpermq",	CONST,	altivec_vbpermq)
+BU_P8V_AV_2 (VPKUDUM,		"vpkudum",	CONST,	altivec_vpkudum)
+BU_P8V_AV_2 (VPKSDSS,		"vpksdss",	CONST,	altivec_vpksdss)
+BU_P8V_AV_2 (VPKUDUS,		"vpkudus",	CONST,	altivec_vpkudus)
+BU_P8V_AV_2 (VPKSDUS,		"vpksdus",	CONST,	altivec_vpksdus)
+BU_P8V_AV_2 (VRLD,		"vrld",		CONST,	vrotlv2di3)
+BU_P8V_AV_2 (VSLD,		"vsld",		CONST,	vashlv2di3)
+BU_P8V_AV_2 (VSRD,		"vsrd",		CONST,	vlshrv2di3)
+BU_P8V_AV_2 (VSRAD,		"vsrad",	CONST,	vashrv2di3)
+BU_P8V_AV_2 (VSUBCUQ,		"vsubcuq",	CONST,	altivec_vsubcuq)
+BU_P8V_AV_2 (VSUBUDM,		"vsubudm",	CONST,	subv2di3)
+BU_P8V_AV_2 (VSUBUQM,		"vsubuqm",	CONST,	altivec_vsubuqm)
+
+BU_P8V_AV_2 (EQV_V16QI,		"eqv_v16qi",	CONST,	eqvv16qi3)
+BU_P8V_AV_2 (EQV_V8HI,		"eqv_v8hi",	CONST,	eqvv8hi3)
+BU_P8V_AV_2 (EQV_V4SI,		"eqv_v4si",	CONST,	eqvv4si3)
+BU_P8V_AV_2 (EQV_V2DI,		"eqv_v2di",	CONST,	eqvv2di3)
+BU_P8V_AV_2 (EQV_V1TI,		"eqv_v1ti",	CONST,	eqvv1ti3)
+BU_P8V_AV_2 (EQV_V4SF,		"eqv_v4sf",	CONST,	eqvv4sf3)
+BU_P8V_AV_2 (EQV_V2DF,		"eqv_v2df",	CONST,	eqvv2df3)
+
+BU_P8V_AV_2 (NAND_V16QI,	"nand_v16qi",	CONST,	nandv16qi3)
+BU_P8V_AV_2 (NAND_V8HI,		"nand_v8hi",	CONST,	nandv8hi3)
+BU_P8V_AV_2 (NAND_V4SI,		"nand_v4si",	CONST,	nandv4si3)
+BU_P8V_AV_2 (NAND_V2DI,		"nand_v2di",	CONST,	nandv2di3)
+BU_P8V_AV_2 (NAND_V1TI,		"nand_v1ti",	CONST,	nandv1ti3)
+BU_P8V_AV_2 (NAND_V4SF,		"nand_v4sf",	CONST,	nandv4sf3)
+BU_P8V_AV_2 (NAND_V2DF,		"nand_v2df",	CONST,	nandv2df3)
+
+BU_P8V_AV_2 (ORC_V16QI,		"orc_v16qi",	CONST,	orcv16qi3)
+BU_P8V_AV_2 (ORC_V8HI,		"orc_v8hi",	CONST,	orcv8hi3)
+BU_P8V_AV_2 (ORC_V4SI,		"orc_v4si",	CONST,	orcv4si3)
+BU_P8V_AV_2 (ORC_V2DI,		"orc_v2di",	CONST,	orcv2di3)
+BU_P8V_AV_2 (ORC_V1TI,		"orc_v1ti",	CONST,	orcv1ti3)
+BU_P8V_AV_2 (ORC_V4SF,		"orc_v4sf",	CONST,	orcv4sf3)
+BU_P8V_AV_2 (ORC_V2DF,		"orc_v2df",	CONST,	orcv2df3)
+
+/* 3 argument altivec instructions added in ISA 2.07.  */
+BU_P8V_AV_3 (VADDEUQM,		"vaddeuqm",	CONST,	altivec_vaddeuqm)
+BU_P8V_AV_3 (VADDECUQ,		"vaddecuq",	CONST,	altivec_vaddecuq)
+BU_P8V_AV_3 (VSUBEUQM,		"vsubeuqm",	CONST,	altivec_vsubeuqm)
+BU_P8V_AV_3 (VSUBECUQ,		"vsubecuq",	CONST,	altivec_vsubecuq)
+
+/* Vector comparison instructions added in ISA 2.07.  */
+BU_P8V_AV_2 (VCMPEQUD,		"vcmpequd",	CONST,	vector_eqv2di)
+BU_P8V_AV_2 (VCMPGTSD,		"vcmpgtsd",	CONST,	vector_gtv2di)
+BU_P8V_AV_2 (VCMPGTUD,		"vcmpgtud",	CONST,	vector_gtuv2di)
+
+/* Vector comparison predicate instructions added in ISA 2.07.  */
+BU_P8V_AV_P (VCMPEQUD_P,	"vcmpequd_p",	CONST,	vector_eq_v2di_p)
+BU_P8V_AV_P (VCMPGTSD_P,	"vcmpgtsd_p",	CONST,	vector_gt_v2di_p)
+BU_P8V_AV_P (VCMPGTUD_P,	"vcmpgtud_p",	CONST,	vector_gtu_v2di_p)
+
+/* ISA 2.07 vector overloaded 1 argument functions.  */
+BU_P8V_OVERLOAD_1 (VUPKHSW,	"vupkhsw")
+BU_P8V_OVERLOAD_1 (VUPKLSW,	"vupklsw")
+BU_P8V_OVERLOAD_1 (VCLZ,	"vclz")
+BU_P8V_OVERLOAD_1 (VCLZB,	"vclzb")
+BU_P8V_OVERLOAD_1 (VCLZH,	"vclzh")
+BU_P8V_OVERLOAD_1 (VCLZW,	"vclzw")
+BU_P8V_OVERLOAD_1 (VCLZD,	"vclzd")
+BU_P8V_OVERLOAD_1 (VPOPCNT,	"vpopcnt")
+BU_P8V_OVERLOAD_1 (VPOPCNTB,	"vpopcntb")
+BU_P8V_OVERLOAD_1 (VPOPCNTH,	"vpopcnth")
+BU_P8V_OVERLOAD_1 (VPOPCNTW,	"vpopcntw")
+BU_P8V_OVERLOAD_1 (VPOPCNTD,	"vpopcntd")
+BU_P8V_OVERLOAD_1 (VGBBD,	"vgbbd")
+
+/* ISA 2.07 vector overloaded 2 argument functions.  */
+BU_P8V_OVERLOAD_2 (EQV,		"eqv")
+BU_P8V_OVERLOAD_2 (NAND,	"nand")
+BU_P8V_OVERLOAD_2 (ORC,		"orc")
+BU_P8V_OVERLOAD_2 (VADDCUQ,	"vaddcuq")
+BU_P8V_OVERLOAD_2 (VADDUDM,	"vaddudm")
+BU_P8V_OVERLOAD_2 (VADDUQM,	"vadduqm")
+BU_P8V_OVERLOAD_2 (VBPERMQ,	"vbpermq")
+BU_P8V_OVERLOAD_2 (VMAXSD,	"vmaxsd")
+BU_P8V_OVERLOAD_2 (VMAXUD,	"vmaxud")
+BU_P8V_OVERLOAD_2 (VMINSD,	"vminsd")
+BU_P8V_OVERLOAD_2 (VMINUD,	"vminud")
+BU_P8V_OVERLOAD_2 (VMRGEW,	"vmrgew")
+BU_P8V_OVERLOAD_2 (VMRGOW,	"vmrgow")
+BU_P8V_OVERLOAD_2 (VPKSDSS,	"vpksdss")
+BU_P8V_OVERLOAD_2 (VPKSDUS,	"vpksdus")
+BU_P8V_OVERLOAD_2 (VPKUDUM,	"vpkudum")
+BU_P8V_OVERLOAD_2 (VPKUDUS,	"vpkudus")
+BU_P8V_OVERLOAD_2 (VRLD,	"vrld")
+BU_P8V_OVERLOAD_2 (VSLD,	"vsld")
+BU_P8V_OVERLOAD_2 (VSRAD,	"vsrad")
+BU_P8V_OVERLOAD_2 (VSRD,	"vsrd")
+BU_P8V_OVERLOAD_2 (VSUBCUQ,	"vsubcuq")
+BU_P8V_OVERLOAD_2 (VSUBUDM,	"vsubudm")
+BU_P8V_OVERLOAD_2 (VSUBUQM,	"vsubuqm")
+
+/* ISA 2.07 vector overloaded 3 argument functions.  */
+BU_P8V_OVERLOAD_3 (VADDECUQ,	"vaddecuq")
+BU_P8V_OVERLOAD_3 (VADDEUQM,	"vaddeuqm")
+BU_P8V_OVERLOAD_3 (VSUBECUQ,	"vsubecuq")
+BU_P8V_OVERLOAD_3 (VSUBEUQM,	"vsubeuqm")
+
+
+/* 2 argument extended divide functions added in ISA 2.06.  */
+BU_P7_MISC_2 (DIVWE,		"divwe",	CONST,	dive_si)
+BU_P7_MISC_2 (DIVWEO,		"divweo",	CONST,	diveo_si)
+BU_P7_MISC_2 (DIVWEU,		"divweu",	CONST,	diveu_si)
+BU_P7_MISC_2 (DIVWEUO,		"divweuo",	CONST,	diveuo_si)
+BU_P7_MISC_2 (DIVDE,		"divde",	CONST,	dive_di)
+BU_P7_MISC_2 (DIVDEO,		"divdeo",	CONST,	diveo_di)
+BU_P7_MISC_2 (DIVDEU,		"divdeu",	CONST,	diveu_di)
+BU_P7_MISC_2 (DIVDEUO,		"divdeuo",	CONST,	diveuo_di)
+
+/* 1 argument DFP (decimal floating point) functions added in ISA 2.05.  */
+BU_DFP_MISC_1 (DXEX,		"dxex",		CONST,	dfp_dxex_dd)
+BU_DFP_MISC_1 (DXEXQ,		"dxexq",	CONST,	dfp_dxex_td)
+
+/* 2 argument DFP (decimal floating point) functions added in ISA 2.05.  */
+BU_DFP_MISC_2 (DDEDPD,		"ddedpd",	CONST,	dfp_ddedpd_dd)
+BU_DFP_MISC_2 (DDEDPDQ,		"ddedpdq",	CONST,	dfp_ddedpd_td)
+BU_DFP_MISC_2 (DENBCD,		"denbcd",	CONST,	dfp_denbcd_dd)
+BU_DFP_MISC_2 (DENBCDQ,		"denbcdq",	CONST,	dfp_denbcd_td)
+BU_DFP_MISC_2 (DIEX,		"diex",		CONST,	dfp_diex_dd)
+BU_DFP_MISC_2 (DIEXQ,		"diexq",	CONST,	dfp_diex_td)
+BU_DFP_MISC_2 (DSCLI,		"dscli",	CONST,	dfp_dscli_dd)
+BU_DFP_MISC_2 (DSCLIQ,		"dscliq",	CONST,	dfp_dscli_td)
+BU_DFP_MISC_2 (DSCRI,		"dscri",	CONST,	dfp_dscri_dd)
+BU_DFP_MISC_2 (DSCRIQ,		"dscriq",	CONST,	dfp_dscri_td)
+
+/* 1 argument BCD functions added in ISA 2.06.  */
+BU_P7_MISC_1 (CDTBCD,		"cdtbcd",	CONST,	cdtbcd)
+BU_P7_MISC_1 (CBCDTD,		"cbcdtd",	CONST,	cbcdtd)
+
+/* 2 argument BCD functions added in ISA 2.06.  */
+BU_P7_MISC_2 (ADDG6S,		"addg6s",	CONST,	addg6s)
+
+/* 3 argument BCD functions added in ISA 2.07.  */
+BU_P8V_MISC_3 (BCDADD,		"bcdadd",	CONST,	bcdadd)
+BU_P8V_MISC_3 (BCDADD_LT,	"bcdadd_lt",	CONST,	bcdadd_lt)
+BU_P8V_MISC_3 (BCDADD_EQ,	"bcdadd_eq",	CONST,	bcdadd_eq)
+BU_P8V_MISC_3 (BCDADD_GT,	"bcdadd_gt",	CONST,	bcdadd_gt)
+BU_P8V_MISC_3 (BCDADD_OV,	"bcdadd_ov",	CONST,	bcdadd_unordered)
+BU_P8V_MISC_3 (BCDSUB,		"bcdsub",	CONST,	bcdsub)
+BU_P8V_MISC_3 (BCDSUB_LT,	"bcdsub_lt",	CONST,	bcdsub_lt)
+BU_P8V_MISC_3 (BCDSUB_EQ,	"bcdsub_eq",	CONST,	bcdsub_eq)
+BU_P8V_MISC_3 (BCDSUB_GT,	"bcdsub_gt",	CONST,	bcdsub_gt)
+BU_P8V_MISC_3 (BCDSUB_OV,	"bcdsub_ov",	CONST,	bcdsub_unordered)
+
+/* 2 argument pack/unpack 128-bit floating point types.  */
+BU_DFP_MISC_2 (PACK_TD,		"pack_dec128",		CONST,	packtd)
+BU_DFP_MISC_2 (UNPACK_TD,	"unpack_dec128",	CONST,	unpacktd)
+
+BU_MISC_2 (PACK_TF,		"pack_longdouble",	CONST,	packtf)
+BU_MISC_2 (UNPACK_TF,		"unpack_longdouble",	CONST,	unpacktf)
+BU_MISC_1 (UNPACK_TF_0,		"longdouble_dw0",	CONST,	unpacktf_0)
+BU_MISC_1 (UNPACK_TF_1,		"longdouble_dw1",	CONST,	unpacktf_1)
+
+BU_P7_MISC_2 (PACK_V1TI,	"pack_vector_int128",	CONST,	packv1ti)
+BU_P7_MISC_2 (UNPACK_V1TI,	"unpack_vector_int128",	CONST,	unpackv1ti)
+
+
+/* 1 argument crypto functions.  */
+BU_CRYPTO_1 (VSBOX,		"vsbox",	  CONST, crypto_vsbox)
+
+/* 2 argument crypto functions.  */
+BU_CRYPTO_2 (VCIPHER,		"vcipher",	  CONST, crypto_vcipher)
+BU_CRYPTO_2 (VCIPHERLAST,	"vcipherlast",	  CONST, crypto_vcipherlast)
+BU_CRYPTO_2 (VNCIPHER,		"vncipher",	  CONST, crypto_vncipher)
+BU_CRYPTO_2 (VNCIPHERLAST,	"vncipherlast",	  CONST, crypto_vncipherlast)
+BU_CRYPTO_2 (VPMSUMB,		"vpmsumb",	  CONST, crypto_vpmsumb)
+BU_CRYPTO_2 (VPMSUMH,		"vpmsumh",	  CONST, crypto_vpmsumh)
+BU_CRYPTO_2 (VPMSUMW,		"vpmsumw",	  CONST, crypto_vpmsumw)
+BU_CRYPTO_2 (VPMSUMD,		"vpmsumd",	  CONST, crypto_vpmsumd)
+
+/* 3 argument crypto functions.  */
+BU_CRYPTO_3 (VPERMXOR_V2DI,	"vpermxor_v2di",  CONST, crypto_vpermxor_v2di)
+BU_CRYPTO_3 (VPERMXOR_V4SI,	"vpermxor_v4si",  CONST, crypto_vpermxor_v4si)
+BU_CRYPTO_3 (VPERMXOR_V8HI,	"vpermxor_v8hi",  CONST, crypto_vpermxor_v8hi)
+BU_CRYPTO_3 (VPERMXOR_V16QI,	"vpermxor_v16qi", CONST, crypto_vpermxor_v16qi)
+BU_CRYPTO_3 (VSHASIGMAW,	"vshasigmaw",	  CONST, crypto_vshasigmaw)
+BU_CRYPTO_3 (VSHASIGMAD,	"vshasigmad",	  CONST, crypto_vshasigmad)
+
+/* 2 argument crypto overloaded functions.  */
+BU_CRYPTO_OVERLOAD_2 (VPMSUM,	 "vpmsum")
+
+/* 3 argument crypto overloaded functions.  */
+BU_CRYPTO_OVERLOAD_3 (VPERMXOR,	 "vpermxor")
+BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma")
+
+
+/* HTM functions.  */
+BU_HTM_1  (TABORT,	"tabort",	MISC,	tabort)
+BU_HTM_3  (TABORTDC,	"tabortdc",	MISC,	tabortdc)
+BU_HTM_3  (TABORTDCI,	"tabortdci",	MISC,	tabortdci)
+BU_HTM_3  (TABORTWC,	"tabortwc",	MISC,	tabortwc)
+BU_HTM_3  (TABORTWCI,	"tabortwci",	MISC,	tabortwci)
+BU_HTM_1  (TBEGIN,	"tbegin",	MISC,	tbegin)
+BU_HTM_1  (TCHECK,	"tcheck",	MISC,	tcheck)
+BU_HTM_1  (TEND,	"tend",		MISC,	tend)
+BU_HTM_0  (TENDALL,	"tendall",	MISC,	tend)
+BU_HTM_0  (TRECHKPT,	"trechkpt",	MISC,	trechkpt)
+BU_HTM_1  (TRECLAIM,	"treclaim",	MISC,	treclaim)
+BU_HTM_0  (TRESUME,	"tresume",	MISC,	tsr)
+BU_HTM_0  (TSUSPEND,	"tsuspend",	MISC,	tsr)
+BU_HTM_1  (TSR,		"tsr",		MISC,	tsr)
+BU_HTM_0  (TTEST,	"ttest",	MISC,	ttest)
+
+BU_HTM_SPR0 (GET_TFHAR,		"get_tfhar",	MISC,	nothing)
+BU_HTM_SPR1 (SET_TFHAR,		"set_tfhar",	MISC,	nothing)
+BU_HTM_SPR0 (GET_TFIAR,		"get_tfiar",	MISC,	nothing)
+BU_HTM_SPR1 (SET_TFIAR,		"set_tfiar",	MISC,	nothing)
+BU_HTM_SPR0 (GET_TEXASR,	"get_texasr",	MISC,	nothing)
+BU_HTM_SPR1 (SET_TEXASR,	"set_texasr",	MISC,	nothing)
+BU_HTM_SPR0 (GET_TEXASRU,	"get_texasru",	MISC,	nothing)
+BU_HTM_SPR1 (SET_TEXASRU,	"set_texasru",	MISC,	nothing)
+
+
 /* 3 argument paired floating point builtins.  */
 BU_PAIRED_3 (MSUB,            "msub",           FP, 	fmsv2sf4)
 BU_PAIRED_3 (MADD,            "madd",           FP, 	fmav2sf4)
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000-c.c b/gcc-4.8/gcc/config/rs6000/rs6000-c.c
index a4f66ba8f..9cb6387b9 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000-c.c
+++ b/gcc-4.8/gcc/config/rs6000/rs6000-c.c
@@ -90,6 +90,8 @@ static GTY(()) tree pixel_keyword;
 static GTY(()) tree __bool_keyword;
 static GTY(()) tree bool_keyword;
 static GTY(()) tree _Bool_keyword;
+static GTY(()) tree __int128_type;
+static GTY(()) tree __uint128_type;
 
 /* Preserved across calls.  */
 static tree expand_bool_pixel;
@@ -122,9 +124,10 @@ altivec_categorize_keyword (const cpp_token *tok)
 static void
 init_vector_keywords (void)
 {
-  /* Keywords without two leading underscores are context-sensitive,
-     and hence implemented as conditional macros, controlled by the
-     rs6000_macro_to_expand() function below.  */
+  /* Keywords without two leading underscores are context-sensitive, and hence
+     implemented as conditional macros, controlled by the
+     rs6000_macro_to_expand() function below.  If we have ISA 2.07 64-bit
+     support, record the __int128_t and __uint128_t types.  */
 
   __vector_keyword = get_identifier ("__vector");
   C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL;
@@ -146,6 +149,12 @@ init_vector_keywords (void)
 
   _Bool_keyword = get_identifier ("_Bool");
   C_CPP_HASHNODE (_Bool_keyword)->flags |= NODE_CONDITIONAL;
+
+  if (TARGET_VADDUQM)
+    {
+      __int128_type = get_identifier ("__int128_t");
+      __uint128_type = get_identifier ("__uint128_t");
+    }
 }
 
 /* Called to decide whether a conditional macro should be expanded.
@@ -221,7 +230,8 @@ rs6000_macro_to_expand (cpp_reader *pfile, const cpp_token *tok)
 	      || rid_code == RID_SHORT || rid_code == RID_SIGNED
 	      || rid_code == RID_INT || rid_code == RID_CHAR
 	      || rid_code == RID_FLOAT
-	      || (rid_code == RID_DOUBLE && TARGET_VSX))
+	      || (rid_code == RID_DOUBLE && TARGET_VSX)
+	      || (rid_code == RID_INT128 && TARGET_VADDUQM))
 	    {
 	      expand_this = C_CPP_HASHNODE (__vector_keyword);
 	      /* If the next keyword is bool or pixel, it
@@ -248,6 +258,13 @@ rs6000_macro_to_expand (cpp_reader *pfile, const cpp_token *tok)
 		    expand_bool_pixel = __bool_keyword;
 		}
 	    }
+
+	  /* Support vector __int128_t, but we don't need to worry about bool
+	     or pixel on this type.  */
+	  else if (TARGET_VADDUQM
+		   && (ident == C_CPP_HASHNODE (__int128_type)
+		       || ident == C_CPP_HASHNODE (__uint128_type)))
+	    expand_this = C_CPP_HASHNODE (__vector_keyword);
 	}
     }
   else if (expand_bool_pixel && ident == C_CPP_HASHNODE (__pixel_keyword))
@@ -315,6 +332,8 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
     rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X");
   if ((flags & OPTION_MASK_POPCNTD) != 0)
     rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
+  if ((flags & OPTION_MASK_DIRECT_MOVE) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8");
   if ((flags & OPTION_MASK_SOFT_FLOAT) != 0)
     rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT");
   if ((flags & OPTION_MASK_RECIP_PRECISION) != 0)
@@ -331,6 +350,16 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
     }
   if ((flags & OPTION_MASK_VSX) != 0)
     rs6000_define_or_undefine_macro (define_p, "__VSX__");
+  if ((flags & OPTION_MASK_HTM) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__HTM__");
+  if ((flags & OPTION_MASK_P8_VECTOR) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+  if ((flags & OPTION_MASK_QUAD_MEMORY) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY__");
+  if ((flags & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__");
+  if ((flags & OPTION_MASK_CRYPTO) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
 
   /* options from the builtin masks.  */
   if ((bu_mask & RS6000_BTM_SPE) != 0)
@@ -453,6 +482,10 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile)
     case ABI_AIX:
       builtin_define ("_CALL_AIXDESC");
       builtin_define ("_CALL_AIX");
+      builtin_define ("_CALL_ELF=1");
+      break;
+    case ABI_ELFv2:
+      builtin_define ("_CALL_ELF=2");
       break;
     case ABI_DARWIN:
       builtin_define ("_CALL_DARWIN");
@@ -465,6 +498,13 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile)
   if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
     builtin_define ("__NO_FPRS__");
 
+  /* Whether aggregates passed by value are aligned to a 16 byte boundary
+     if their alignment is 16 bytes or larger.  */
+  if ((TARGET_MACHO && rs6000_darwin64_abi)
+      || DEFAULT_ABI == ABI_ELFv2
+      || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
+    builtin_define ("__STRUCT_PARM_ALIGN__=16");
+
   /* Generate defines for Xilinx FPU. */
   if (rs6000_xilinx_fpu) 
     {
@@ -505,6 +545,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP,
@@ -577,12 +619,20 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
@@ -601,6 +651,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
@@ -651,10 +705,27 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_VADDFP, ALTIVEC_BUILTIN_VADDFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM,
@@ -937,6 +1008,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
@@ -975,6 +1050,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
@@ -1021,6 +1100,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP,
     RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP,
@@ -1045,54 +1128,54 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DF,
     RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
     RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
     ~RS6000_BTI_unsigned_V2DI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
     RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SF,
     RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SF,
     RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
     RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
     RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
     RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
     RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V4SI,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
     RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
     RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
     RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
     RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V8HI,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
     RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
     RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
     RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX,
+  { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
   { ALTIVEC_BUILTIN_VEC_LDE, ALTIVEC_BUILTIN_LVEBX,
     RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
@@ -1130,55 +1213,55 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
   { ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF,
     RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF,
     RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
     RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
     RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
     RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
     RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SI,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
     RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
     RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
     RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
     RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V8HI,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
     RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
     RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
     RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI,
     ~RS6000_BTI_unsigned_V16QI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DF,
     RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI,
     RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
     ~RS6000_BTI_unsigned_V2DI, 0 },
-  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL,
+  { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V2DI,
     RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
@@ -1418,6 +1501,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP,
@@ -1604,6 +1699,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP,
@@ -1786,6 +1893,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
     RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
     RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
@@ -1812,6 +1925,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS,
     RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS,
@@ -1824,6 +1941,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
@@ -1844,6 +1963,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
@@ -1868,6 +1991,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
@@ -2032,6 +2159,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
@@ -2056,6 +2187,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
@@ -2196,10 +2331,27 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSUBFP, ALTIVEC_BUILTIN_VSUBFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM,
@@ -2730,63 +2882,63 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_NOT_OPAQUE },
   { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI,
     RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_NOT_OPAQUE },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DF,
     RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI,
     RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
     ~RS6000_BTI_unsigned_V2DI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V2DI,
     RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
     ~RS6000_BTI_bool_V2DI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SF,
     RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SF,
     RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
     RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
     RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
     RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
     RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V4SI,
     RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
     RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
     RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
     RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
     RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
     RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
     RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
     RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
     RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
     RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V16QI,
     RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
-  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX,
+  { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX_V8HI,
     RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
   { ALTIVEC_BUILTIN_VEC_STE, ALTIVEC_BUILTIN_STVEBX,
     RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
@@ -2858,64 +3010,64 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
   { ALTIVEC_BUILTIN_VEC_STVEBX, ALTIVEC_BUILTIN_STVEBX,
     RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_void },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SF,
     RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SF,
     RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
     RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
     RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V4SI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
     RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
     RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V4SI,
     RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
     RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
     RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V8HI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
     RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
     RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
     RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
     RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
     RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
     RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
     RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V16QI,
     RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V8HI,
     RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DF,
     RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DF,
     RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI,
     RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI,
     RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI,
     ~RS6000_BTI_unsigned_V2DI },
-  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL,
+  { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL_V2DI,
     RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI,
     ~RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX,
@@ -3327,6 +3479,20 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
@@ -3372,11 +3538,509 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
 
+  /* Power8 vector overloaded functions.  */
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_bool_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_bool_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_bool_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+  { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
+  { P8V_BUILTIN_VEC_VBPERMQ, P8V_BUILTIN_VBPERMQ,
+    RS6000_BTI_V2DI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+  { P8V_BUILTIN_VEC_VBPERMQ, P8V_BUILTIN_VBPERMQ,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+  { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+  { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+  { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+  { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+  { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+  { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI },
+  { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI },
+
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+  { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
+    RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+  { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM,
+    RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI,
+    RS6000_BTI_unsigned_V1TI, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+  { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_V16QI, 0, 0, 0 },
+  { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+    RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
+
+  /* Crypto builtins.  */
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+  { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+    RS6000_BTI_unsigned_V16QI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+    RS6000_BTI_unsigned_V8HI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V4SI, 0 },
+  { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_unsigned_V2DI, 0 },
+
+  { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+  { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+
   { (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 }
 };
 
@@ -3560,6 +4224,10 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       unsigned_p = TYPE_UNSIGNED (type);
       switch (TYPE_MODE (type))
 	{
+	  case TImode:
+	    type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+	    size = 1;
+	    break;
 	  case DImode:
 	    type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
 	    size = 2;
@@ -3591,7 +4259,7 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	return build_constructor (type, vec);
     }
 
-  /* For now use pointer tricks to do the extaction, unless we are on VSX
+  /* For now use pointer tricks to do the extraction, unless we are on VSX
      extracting a double from a constant offset.  */
   if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT)
     {
@@ -3619,6 +4287,17 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
 	goto bad; 
 
+      /* If we are targeting little-endian, but -maltivec=be has been
+	 specified to override the element order, adjust the element
+	 number accordingly.  */
+      if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2)
+	{
+	  unsigned int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1;
+	  arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2),
+				  build_int_cstu (TREE_TYPE (arg2), last_elem),
+				  arg2);
+	}
+
       /* If we can use the VSX xxpermdi instruction, use that for extract.  */
       mode = TYPE_MODE (arg1_type);
       if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode)
@@ -3636,6 +4315,14 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	  if (call)
 	    return build_call_expr (call, 2, arg1, arg2);
 	}
+      else if (mode == V1TImode && VECTOR_MEM_VSX_P (mode)
+	       && TREE_CODE (arg2) == INTEGER_CST
+	       && TREE_INT_CST_HIGH (arg2) == 0
+	       && TREE_INT_CST_LOW (arg2) == 0)
+	{
+	  tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
+	  return build_call_expr (call, 2, arg1, arg2);
+	}
 
       /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
       arg1_inner_type = TREE_TYPE (arg1_type);
@@ -3666,7 +4353,7 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       return stmt;
     }
 
-  /* For now use pointer tricks to do the insertation, unless we are on VSX
+  /* For now use pointer tricks to do the insertion, unless we are on VSX
      inserting a double to a constant offset..  */
   if (fcode == ALTIVEC_BUILTIN_VEC_INSERT)
     {
@@ -3696,6 +4383,17 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
 	goto bad; 
 
+      /* If we are targeting little-endian, but -maltivec=be has been
+	 specified to override the element order, adjust the element
+	 number accordingly.  */
+      if (!BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 2)
+	{
+	  unsigned int last_elem = TYPE_VECTOR_SUBPARTS (arg1_type) - 1;
+	  arg2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (arg2),
+				  build_int_cstu (TREE_TYPE (arg2), last_elem),
+				  arg2);
+	}
+
       /* If we can use the VSX xxpermdi instruction, use that for insert.  */
       mode = TYPE_MODE (arg1_type);
       if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
@@ -3715,6 +4413,17 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	  if (call)
 	    return build_call_expr (call, 3, arg1, arg0, arg2);
 	}
+      else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode)
+	       && TREE_CODE (arg2) == INTEGER_CST
+	       && TREE_INT_CST_HIGH (arg2) == 0
+	       && TREE_INT_CST_LOW (arg2) == 0)
+	{
+	  tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V1TI];
+
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
 
       /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
       arg1_inner_type = TREE_TYPE (arg1_type);
@@ -3824,7 +4533,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
 	    || rs6000_builtin_type_compatible (types[1], desc->op2))
 	&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
-	    || rs6000_builtin_type_compatible (types[2], desc->op3)))
+	    || rs6000_builtin_type_compatible (types[2], desc->op3))
+	&& rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
       return altivec_build_resolved_builtin (args, n, desc);
 
  bad:
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000-cpus.def b/gcc-4.8/gcc/config/rs6000/rs6000-cpus.def
index 3f17c6f23..089c98d72 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc-4.8/gcc/config/rs6000/rs6000-cpus.def
@@ -28,7 +28,7 @@
      ALTIVEC, since in general it isn't a win on power6.  In ISA 2.04, fsel,
      fre, fsqrt, etc. were no longer documented as optional.  Group masks by
      server and embedded. */
-#define ISA_2_5_MASKS_EMBEDDED	(ISA_2_2_MASKS				\
+#define ISA_2_5_MASKS_EMBEDDED	(ISA_2_4_MASKS				\
 				 | OPTION_MASK_CMPB			\
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_PPC_GFXOPT		\
@@ -38,12 +38,24 @@
 
   /* For ISA 2.06, don't add ISEL, since in general it isn't a win, but
      altivec is a win so enable it.  */
+  /* OPTION_MASK_VSX_TIMODE should be set, but disable it for now until
+     PR 58587 is fixed.  */
 #define ISA_2_6_MASKS_EMBEDDED	(ISA_2_5_MASKS_EMBEDDED | OPTION_MASK_POPCNTD)
 #define ISA_2_6_MASKS_SERVER	(ISA_2_5_MASKS_SERVER			\
 				 | OPTION_MASK_POPCNTD			\
 				 | OPTION_MASK_ALTIVEC			\
 				 | OPTION_MASK_VSX)
 
+/* For now, don't provide an embedded version of ISA 2.07.  */
+#define ISA_2_7_MASKS_SERVER	(ISA_2_6_MASKS_SERVER			\
+				 | OPTION_MASK_P8_FUSION		\
+				 | OPTION_MASK_P8_VECTOR		\
+				 | OPTION_MASK_CRYPTO			\
+				 | OPTION_MASK_DIRECT_MOVE		\
+				 | OPTION_MASK_HTM			\
+				 | OPTION_MASK_QUAD_MEMORY		\
+  				 | OPTION_MASK_QUAD_MEMORY_ATOMIC)
+
 #define POWERPC_7400_MASK	(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
 
 /* Deal with ports that do not have -mstrict-align.  */
@@ -60,23 +72,30 @@
 /* Mask of all options to set the default isa flags based on -mcpu=<xxx>.  */
 #define POWERPC_MASKS		(OPTION_MASK_ALTIVEC			\
 				 | OPTION_MASK_CMPB			\
+				 | OPTION_MASK_CRYPTO			\
 				 | OPTION_MASK_DFP			\
+				 | OPTION_MASK_DIRECT_MOVE		\
 				 | OPTION_MASK_DLMZB			\
 				 | OPTION_MASK_FPRND			\
+				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_ISEL			\
 				 | OPTION_MASK_MFCRF			\
 				 | OPTION_MASK_MFPGPR			\
 				 | OPTION_MASK_MULHW			\
 				 | OPTION_MASK_NO_UPDATE		\
+				 | OPTION_MASK_P8_FUSION		\
+				 | OPTION_MASK_P8_VECTOR		\
 				 | OPTION_MASK_POPCNTB			\
 				 | OPTION_MASK_POPCNTD			\
 				 | OPTION_MASK_POWERPC64		\
 				 | OPTION_MASK_PPC_GFXOPT		\
 				 | OPTION_MASK_PPC_GPOPT		\
+				 | OPTION_MASK_QUAD_MEMORY		\
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_SOFT_FLOAT		\
 				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
-				 | OPTION_MASK_VSX)
+				 | OPTION_MASK_VSX			\
+				 | OPTION_MASK_VSX_TIMODE)
 
 #endif
 
@@ -166,10 +185,7 @@ RS6000_CPU ("power7", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
 	    POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
 	    | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
 	    | MASK_VSX | MASK_RECIP_PRECISION)
-RS6000_CPU ("power8", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
-	    POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
-	    | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
-	    | MASK_VSX | MASK_RECIP_PRECISION)
+RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
 RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
 RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64)
 RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64)
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000-modes.def b/gcc-4.8/gcc/config/rs6000/rs6000-modes.def
index bc18f8a16..7062a22f2 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc-4.8/gcc/config/rs6000/rs6000-modes.def
@@ -38,6 +38,12 @@ VECTOR_MODES (INT, 8);        /*       V8QI  V4HI V2SI */
 VECTOR_MODES (INT, 16);       /* V16QI V8HI  V4SI V2DI */
 VECTOR_MODES (INT, 32);       /* V32QI V16HI V8SI V4DI */
 VECTOR_MODE (INT, DI, 1);
+VECTOR_MODE (INT, TI, 1);
 VECTOR_MODES (FLOAT, 8);      /*             V4HF V2SF */
 VECTOR_MODES (FLOAT, 16);     /*       V8HF  V4SF V2DF */
 VECTOR_MODES (FLOAT, 32);     /*       V16HF V8SF V4DF */
+
+/* Replacement for TImode that only is allowed in GPRs.  We also use PTImode
+   for quad memory atomic operations to force getting an even/odd register
+   combination.  */
+PARTIAL_INT_MODE (TI);
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000-opts.h b/gcc-4.8/gcc/config/rs6000/rs6000-opts.h
index fc843fd19..52fb58965 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc-4.8/gcc/config/rs6000/rs6000-opts.h
@@ -59,7 +59,8 @@ enum processor_type
    PROCESSOR_POWER7,
    PROCESSOR_CELL,
    PROCESSOR_PPCA2,
-   PROCESSOR_TITAN
+   PROCESSOR_TITAN,
+   PROCESSOR_POWER8
 };
 
 /* FP processor type.  */
@@ -100,7 +101,8 @@ enum group_termination
 /* Enumeration to give which calling sequence to use.  */
 enum rs6000_abi {
   ABI_NONE,
-  ABI_AIX,			/* IBM's AIX */
+  ABI_AIX,			/* IBM's AIX, or Linux ELFv1 */
+  ABI_ELFv2,			/* Linux ELFv2 ABI */
   ABI_V4,			/* System V.4/eabi */
   ABI_DARWIN			/* Apple's Darwin (OS X kernel) */
 };
@@ -131,11 +133,14 @@ enum rs6000_cmodel {
   CMODEL_LARGE
 };
 
-/* Describe which vector unit to use for a given machine mode.  */
+/* Describe which vector unit to use for a given machine mode.  The
+   VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and
+   P8_VECTOR are contiguous.  */
 enum rs6000_vector {
   VECTOR_NONE,			/* Type is not  a vector or not supported */
   VECTOR_ALTIVEC,		/* Use altivec for vector processing */
   VECTOR_VSX,			/* Use VSX for vector processing */
+  VECTOR_P8_VECTOR,		/* Use ISA 2.07 VSX for vector processing */
   VECTOR_PAIRED,		/* Use paired floating point for vectors */
   VECTOR_SPE,			/* Use SPE for vector processing */
   VECTOR_OTHER			/* Some other vector unit */
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000-protos.h b/gcc-4.8/gcc/config/rs6000/rs6000-protos.h
index d9bcf1a41..71d5d0a73 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc-4.8/gcc/config/rs6000/rs6000-protos.h
@@ -50,12 +50,19 @@ extern rtx rs6000_got_register (rtx);
 extern rtx find_addr_reg (rtx);
 extern rtx gen_easy_altivec_constant (rtx);
 extern const char *output_vec_const_move (rtx *);
+extern const char *rs6000_output_move_128bit (rtx *);
+extern bool rs6000_move_128bit_ok_p (rtx []);
+extern bool rs6000_split_128bit_ok_p (rtx []);
 extern void rs6000_expand_vector_init (rtx, rtx);
 extern void paired_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
 extern void rs6000_expand_vector_extract (rtx, rtx, int);
 extern bool altivec_expand_vec_perm_const (rtx op[4]);
+extern void altivec_expand_vec_perm_le (rtx op[4]);
 extern bool rs6000_expand_vec_perm_const (rtx op[4]);
+extern void altivec_expand_lvx_be (rtx, rtx, enum machine_mode, unsigned);
+extern void altivec_expand_stvx_be (rtx, rtx, enum machine_mode, unsigned);
+extern void altivec_expand_stvex_be (rtx, rtx, enum machine_mode, unsigned);
 extern void rs6000_expand_extract_even (rtx, rtx, rtx);
 extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
 extern void build_mask64_2_operands (rtx, rtx *);
@@ -70,6 +77,11 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
 extern int registers_ok_for_quad_peep (rtx, rtx);
 extern int mems_ok_for_quad_peep (rtx, rtx);
 extern bool gpr_or_gpr_p (rtx, rtx);
+extern bool direct_move_p (rtx, rtx);
+extern bool quad_load_store_p (rtx, rtx);
+extern bool fusion_gpr_load_p (rtx *, bool);
+extern void expand_fusion_gpr_load (rtx *);
+extern const char *emit_fusion_gpr_load (rtx *);
 extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
 							    enum reg_class);
 extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
@@ -116,6 +128,7 @@ extern rtx rs6000_longcall_ref (rtx);
 extern void rs6000_fatal_bad_address (rtx);
 extern rtx create_TOC_reference (rtx, rtx);
 extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_move (rtx, rtx, enum machine_mode);
 extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
 extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
 extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
@@ -135,9 +148,11 @@ extern rtx rs6000_address_for_fpconvert (rtx);
 extern rtx rs6000_address_for_altivec (rtx);
 extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
 extern int rs6000_loop_align (rtx);
+extern void rs6000_split_logical (rtx [], enum rtx_code, bool, bool, bool, rtx);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
+extern unsigned int rs6000_data_alignment (tree, unsigned int, enum data_align);
 extern unsigned int rs6000_special_round_type_align (tree, unsigned int,
 						     unsigned int);
 extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int,
@@ -146,6 +161,7 @@ extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *);
 extern rtx rs6000_libcall_value (enum machine_mode);
 extern rtx rs6000_va_arg (tree, tree);
 extern int function_ok_for_sibcall (tree);
+extern int rs6000_reg_parm_stack_space (tree);
 extern void rs6000_elf_declare_function_name (FILE *, const char *, tree);
 extern bool rs6000_elf_in_small_data_p (const_tree);
 #ifdef ARGS_SIZE_RTX
@@ -170,7 +186,8 @@ extern unsigned int rs6000_dbx_register_number (unsigned int);
 extern void rs6000_emit_epilogue (int);
 extern void rs6000_emit_eh_reg_restore (rtx, rtx);
 extern const char * output_isel (rtx *);
-extern void rs6000_call_indirect_aix (rtx, rtx, rtx);
+extern void rs6000_call_aix (rtx, rtx, rtx, rtx);
+extern void rs6000_sibcall_aix (rtx, rtx, rtx, rtx);
 extern void rs6000_aix_asm_output_dwarf_table_ref (char *);
 extern void get_ppc476_thunk_name (char name[32]);
 extern bool rs6000_overloaded_builtin_p (enum rs6000_builtins);
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000.c b/gcc-4.8/gcc/config/rs6000/rs6000.c
index f8579fd98..9cea1f112 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000.c
+++ b/gcc-4.8/gcc/config/rs6000/rs6000.c
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on IBM RS/6000.
-   Copyright (C) 1991-2013 Free Software Foundation, Inc.
+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
    This file is part of GCC.
@@ -56,6 +56,7 @@
 #include "intl.h"
 #include "params.h"
 #include "tm-constrs.h"
+#include "ira.h"
 #include "opts.h"
 #include "tree-vectorizer.h"
 #include "dumpfile.h"
@@ -96,6 +97,7 @@ typedef struct rs6000_stack {
   int spe_gp_save_offset;	/* offset to save spe 64-bit gprs  */
   int varargs_save_offset;	/* offset to save the varargs registers */
   int ehrd_offset;		/* offset to EH return data */
+  int ehcr_offset;		/* offset to EH CR field data */
   int reg_size;			/* register size (4 or 8) */
   HOST_WIDE_INT vars_size;	/* variable save area size */
   int parm_size;		/* outgoing parameter size */
@@ -139,6 +141,8 @@ typedef struct GTY(()) machine_function
      64-bits wide and is allocated early enough so that the offset
      does not overflow the 16-bit load/store offset field.  */
   rtx sdmode_stack_slot;
+  /* Flag if r2 setup is needed with ELFv2 ABI.  */
+  bool r2_setup_needed;
 } machine_function;
 
 /* Support targetm.vectorize.builtin_mask_for_load.  */
@@ -189,9 +193,6 @@ unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
 /* Map register number to register class.  */
 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
 
-/* Reload functions based on the type and the vector unit.  */
-static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2];
-
 static int dbg_cost_ctrl;
 
 /* Built in types.  */
@@ -289,6 +290,105 @@ static struct
    don't link in rs6000-c.c, so we can't call it directly.  */
 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
 
+/* Simplfy register classes into simpler classifications.  We assume
+   GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+   check for standard register classes (gpr/floating/altivec/vsx) and
+   floating/vector classes (float/altivec/vsx).  */
+
+enum rs6000_reg_type {
+  NO_REG_TYPE,
+  PSEUDO_REG_TYPE,
+  GPR_REG_TYPE,
+  VSX_REG_TYPE,
+  ALTIVEC_REG_TYPE,
+  FPR_REG_TYPE,
+  SPR_REG_TYPE,
+  CR_REG_TYPE,
+  SPE_ACC_TYPE,
+  SPEFSCR_REG_TYPE
+};
+
+/* Map register class to register type.  */
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+
+/* First/last register type for the 'normal' register types (i.e. general
+   purpose, floating point, altivec, and VSX registers).  */
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+
+
+/* Register classes we care about in secondary reload or go if legitimate
+   address.  We only need to worry about GPR, FPR, and Altivec registers here,
+   along an ANY field that is the OR of the 3 register classes.  */
+
+enum rs6000_reload_reg_type {
+  RELOAD_REG_GPR,			/* General purpose registers.  */
+  RELOAD_REG_FPR,			/* Traditional floating point regs.  */
+  RELOAD_REG_VMX,			/* Altivec (VMX) registers.  */
+  RELOAD_REG_ANY,			/* OR of GPR, FPR, Altivec masks.  */
+  N_RELOAD_REG
+};
+
+/* For setting up register classes, loop through the 3 register classes mapping
+   into real registers, and skip the ANY class, which is just an OR of the
+   bits.  */
+#define FIRST_RELOAD_REG_CLASS	RELOAD_REG_GPR
+#define LAST_RELOAD_REG_CLASS	RELOAD_REG_VMX
+
+/* Map reload register type to a register in the register class.  */
+struct reload_reg_map_type {
+  const char *name;			/* Register class name.  */
+  int reg;				/* Register in the register class.  */
+};
+
+static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
+  { "Gpr",	FIRST_GPR_REGNO },	/* RELOAD_REG_GPR.  */
+  { "Fpr",	FIRST_FPR_REGNO },	/* RELOAD_REG_FPR.  */
+  { "VMX",	FIRST_ALTIVEC_REGNO },	/* RELOAD_REG_VMX.  */
+  { "Any",	-1 },			/* RELOAD_REG_ANY.  */
+};
+
+/* Mask bits for each register class, indexed per mode.  Historically the
+   compiler has been more restrictive which types can do PRE_MODIFY instead of
+   PRE_INC and PRE_DEC, so keep track of sepaate bits for these two.  */
+typedef unsigned char addr_mask_type;
+
+#define RELOAD_REG_VALID	0x01	/* Mode valid in register..  */
+#define RELOAD_REG_MULTIPLE	0x02	/* Mode takes multiple registers.  */
+#define RELOAD_REG_INDEXED	0x04	/* Reg+reg addressing.  */
+#define RELOAD_REG_OFFSET	0x08	/* Reg+offset addressing. */
+#define RELOAD_REG_PRE_INCDEC	0x10	/* PRE_INC/PRE_DEC valid.  */
+#define RELOAD_REG_PRE_MODIFY	0x20	/* PRE_MODIFY valid.  */
+
+/* Register type masks based on the type, of valid addressing modes.  */
+struct rs6000_reg_addr {
+  enum insn_code reload_load;		/* INSN to reload for loading. */
+  enum insn_code reload_store;		/* INSN to reload for storing.  */
+  enum insn_code reload_fpr_gpr;	/* INSN to move from FPR to GPR.  */
+  enum insn_code reload_gpr_vsx;	/* INSN to move from GPR to VSX.  */
+  enum insn_code reload_vsx_gpr;	/* INSN to move from VSX to GPR.  */
+  addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks.  */
+};
+
+static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
+
+/* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
+static inline bool
+mode_supports_pre_incdec_p (enum machine_mode mode)
+{
+  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
+	  != 0);
+}
+
+/* Helper function to say whether a mode supports PRE_MODIFY.  */
+static inline bool
+mode_supports_pre_modify_p (enum machine_mode mode)
+{
+  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
+	  != 0);
+}
+
 
 /* Target cpu costs.  */
 
@@ -828,6 +928,25 @@ struct processor_costs power7_cost = {
   12,			/* prefetch streams */
 };
 
+/* Instruction costs on POWER8 processors.  */
+static const
+struct processor_costs power8_cost = {
+  COSTS_N_INSNS (3),	/* mulsi */
+  COSTS_N_INSNS (3),	/* mulsi_const */
+  COSTS_N_INSNS (3),	/* mulsi_const9 */
+  COSTS_N_INSNS (3),	/* muldi */
+  COSTS_N_INSNS (19),	/* divsi */
+  COSTS_N_INSNS (35),	/* divdi */
+  COSTS_N_INSNS (3),	/* fp */
+  COSTS_N_INSNS (3),	/* dmul */
+  COSTS_N_INSNS (14),	/* sdiv */
+  COSTS_N_INSNS (17),	/* ddiv */
+  128,			/* cache line size */
+  32,			/* l1 cache */
+  256,			/* l2 cache */
+  12,			/* prefetch streams */
+};
+
 /* Instruction costs on POWER A2 processors.  */
 static const
 struct processor_costs ppca2_cost = {
@@ -855,6 +974,7 @@ struct processor_costs ppca2_cost = {
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -878,6 +998,9 @@ struct processor_costs ppca2_cost = {
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)  \
   { NAME, ICODE, MASK, ATTR },
 
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)  \
+  { NAME, ICODE, MASK, ATTR },
+
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)  \
   { NAME, ICODE, MASK, ATTR },
 
@@ -908,6 +1031,7 @@ static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -948,6 +1072,7 @@ static void rs6000_common_init_builtins (void);
 static void paired_init_builtins (void);
 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
 static void spe_init_builtins (void);
+static void htm_init_builtins (void);
 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
@@ -1020,6 +1145,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *,
 static void rs6000_print_builtin_options (FILE *, int, const char *,
 					  HOST_WIDE_INT);
 
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
+					  enum rs6000_reg_type,
+					  enum machine_mode,
+					  secondary_reload_info *,
+					  bool);
+
 /* Hash table stuff for keeping track of TOC entries.  */
 
 struct GTY(()) toc_hash_struct
@@ -1068,7 +1200,9 @@ char rs6000_reg_names[][8] =
       /* SPE registers.  */
       "spe_acc", "spefscr",
       /* Soft frame pointer.  */
-      "sfp"
+      "sfp",
+      /* HTM SPR registers.  */
+      "tfhar", "tfiar", "texasr"
 };
 
 #ifdef TARGET_REGNAMES
@@ -1094,7 +1228,9 @@ static const char alt_reg_names[][8] =
   /* SPE registers.  */
   "spe_acc", "spefscr",
   /* Soft frame pointer.  */
-  "sfp"
+  "sfp",
+  /* HTM SPR registers.  */
+  "tfhar", "tfiar", "texasr"
 };
 #endif
 
@@ -1316,6 +1452,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
 
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB rs6000_return_in_msb
+
 #undef TARGET_SETUP_INCOMING_VARARGS
 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
 
@@ -1425,6 +1564,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
 
+#undef TARGET_LRA_P
+#define TARGET_LRA_P rs6000_lra_p
+
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
 
@@ -1513,8 +1655,9 @@ rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
 {
   unsigned HOST_WIDE_INT reg_size;
 
+  /* TF/TD modes are special in that they always take 2 registers.  */
   if (FP_REGNO_P (regno))
-    reg_size = (VECTOR_MEM_VSX_P (mode)
+    reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
 		? UNITS_PER_VSX_WORD
 		: UNITS_PER_FP_WORD);
 
@@ -1546,17 +1689,40 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
 {
   int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
 
+  /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
+     register combinations, and use PTImode where we need to deal with quad
+     word memory operations.  Don't allow quad words in the argument or frame
+     pointer registers, just registers 0..31.  */
+  if (mode == PTImode)
+    return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+	    && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+	    && ((regno & 1) == 0));
+
   /* VSX registers that overlap the FPR registers are larger than for non-VSX
      implementations.  Don't allow an item to be split between a FP register
-     and an Altivec register.  */
-  if (VECTOR_MEM_VSX_P (mode))
+     and an Altivec register.  Allow TImode in all VSX registers if the user
+     asked for it.  */
+  if (TARGET_VSX && VSX_REGNO_P (regno)
+      && (VECTOR_MEM_VSX_P (mode)
+	  || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
+	  || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
+	  || (TARGET_VSX_TIMODE && mode == TImode)
+	  || (TARGET_VADDUQM && mode == V1TImode)))
     {
       if (FP_REGNO_P (regno))
 	return FP_REGNO_P (last_regno);
 
       if (ALTIVEC_REGNO_P (regno))
+	{
+	  if (mode == SFmode && !TARGET_UPPER_REGS_SF)
+	    return 0;
+
+	  if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF)
+	    return 0;
+
 	return ALTIVEC_REGNO_P (last_regno);
     }
+    }
 
   /* The GPRs can hold any mode, but values bigger than one register
      cannot go past R31.  */
@@ -1564,8 +1730,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
     return INT_REGNO_P (last_regno);
 
   /* The float registers (except for VSX vector modes) can only hold floating
-     modes and DImode.  This excludes the 32-bit decimal float mode for
-     now.  */
+     modes and DImode.  */
   if (FP_REGNO_P (regno))
     {
       if (SCALAR_FLOAT_MODE_P (mode)
@@ -1593,15 +1758,15 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
 
   /* AltiVec only in AldyVec registers.  */
   if (ALTIVEC_REGNO_P (regno))
-    return VECTOR_MEM_ALTIVEC_OR_VSX_P (mode);
+    return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+	    || mode == V1TImode);
 
   /* ...but GPRs can hold SIMD data on the SPE in one register.  */
   if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
     return 1;
 
-  /* We cannot put TImode anywhere except general register and it must be able
-     to fit within the register set.  In the future, allow TImode in the
-     Altivec or VSX registers.  */
+  /* We cannot put non-VSX TImode or PTImode anywhere except general register
+     and it must be able to fit within the register set.  */
 
   return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
 }
@@ -1674,10 +1839,77 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
 	  comma = "";
 	}
 
+      len += fprintf (stderr, "%sreg-class = %s", comma,
+		      reg_class_names[(int)rs6000_regno_regclass[r]]);
+      comma = ", ";
+
+      if (len > 70)
+	{
+	  fprintf (stderr, ",\n\t");
+	  comma = "";
+	}
+
       fprintf (stderr, "%sregno = %d\n", comma, r);
     }
 }
 
+static const char *
+rs6000_debug_vector_unit (enum rs6000_vector v)
+{
+  const char *ret;
+
+  switch (v)
+    {
+    case VECTOR_NONE:	   ret = "none";      break;
+    case VECTOR_ALTIVEC:   ret = "altivec";   break;
+    case VECTOR_VSX:	   ret = "vsx";       break;
+    case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
+    case VECTOR_PAIRED:	   ret = "paired";    break;
+    case VECTOR_SPE:	   ret = "spe";       break;
+    case VECTOR_OTHER:	   ret = "other";     break;
+    default:		   ret = "unknown";   break;
+    }
+
+  return ret;
+}
+
+/* Print the address masks in a human readble fashion.  */
+DEBUG_FUNCTION void
+rs6000_debug_print_mode (ssize_t m)
+{
+  ssize_t rc;
+
+  fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
+  for (rc = 0; rc < N_RELOAD_REG; rc++)
+    {
+      addr_mask_type mask = reg_addr[m].addr_mask[rc];
+      fprintf (stderr,
+	       "  %s: %c%c%c%c%c%c",
+	       reload_reg_map[rc].name,
+	       (mask & RELOAD_REG_VALID)      != 0 ? 'v' : ' ',
+	       (mask & RELOAD_REG_MULTIPLE)   != 0 ? 'm' : ' ',
+	       (mask & RELOAD_REG_INDEXED)    != 0 ? 'i' : ' ',
+	       (mask & RELOAD_REG_OFFSET)     != 0 ? 'o' : ' ',
+	       (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
+	       (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
+    }
+
+  if (rs6000_vector_unit[m] != VECTOR_NONE
+      || rs6000_vector_mem[m] != VECTOR_NONE
+      || (reg_addr[m].reload_store != CODE_FOR_nothing)
+      || (reg_addr[m].reload_load != CODE_FOR_nothing))
+    {
+      fprintf (stderr,
+	       "  Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c",
+	       rs6000_debug_vector_unit (rs6000_vector_unit[m]),
+	       rs6000_debug_vector_unit (rs6000_vector_mem[m]),
+	       (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
+	       (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
+    }
+
+  fputs ("\n", stderr);
+}
+
 #define DEBUG_FMT_ID "%-32s= "
 #define DEBUG_FMT_D   DEBUG_FMT_ID "%d\n"
 #define DEBUG_FMT_WX  DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
@@ -1690,6 +1922,7 @@ rs6000_debug_reg_global (void)
   static const char *const tf[2] = { "false", "true" };
   const char *nl = (const char *)0;
   int m;
+  size_t m1, m2, v;
   char costly_num[20];
   char nop_num[20];
   char flags_buffer[40];
@@ -1700,20 +1933,69 @@ rs6000_debug_reg_global (void)
   const char *cmodel_str;
   struct cl_target_option cl_opts;
 
-  /* Map enum rs6000_vector to string.  */
-  static const char *rs6000_debug_vector_unit[] = {
-    "none",
-    "altivec",
-    "vsx",
-    "paired",
-    "spe",
-    "other"
+  /* Modes we want tieable information on.  */
+  static const enum machine_mode print_tieable_modes[] = {
+    QImode,
+    HImode,
+    SImode,
+    DImode,
+    TImode,
+    PTImode,
+    SFmode,
+    DFmode,
+    TFmode,
+    SDmode,
+    DDmode,
+    TDmode,
+    V8QImode,
+    V4HImode,
+    V2SImode,
+    V16QImode,
+    V8HImode,
+    V4SImode,
+    V2DImode,
+    V1TImode,
+    V32QImode,
+    V16HImode,
+    V8SImode,
+    V4DImode,
+    V2TImode,
+    V2SFmode,
+    V4SFmode,
+    V2DFmode,
+    V8SFmode,
+    V4DFmode,
+    CCmode,
+    CCUNSmode,
+    CCEQmode,
+  };
+
+  /* Virtual regs we are interested in.  */
+  const static struct {
+    int regno;			/* register number.  */
+    const char *name;		/* register name.  */
+  } virtual_regs[] = {
+    { STACK_POINTER_REGNUM,			"stack pointer:" },
+    { TOC_REGNUM,				"toc:          " },
+    { STATIC_CHAIN_REGNUM,			"static chain: " },
+    { RS6000_PIC_OFFSET_TABLE_REGNUM,		"pic offset:   " },
+    { HARD_FRAME_POINTER_REGNUM,		"hard frame:   " },
+    { ARG_POINTER_REGNUM,			"arg pointer:  " },
+    { FRAME_POINTER_REGNUM,			"frame pointer:" },
+    { FIRST_PSEUDO_REGISTER,			"first pseudo: " },
+    { FIRST_VIRTUAL_REGISTER,			"first virtual:" },
+    { VIRTUAL_INCOMING_ARGS_REGNUM,		"incoming_args:" },
+    { VIRTUAL_STACK_VARS_REGNUM,		"stack_vars:   " },
+    { VIRTUAL_STACK_DYNAMIC_REGNUM,		"stack_dynamic:" },
+    { VIRTUAL_OUTGOING_ARGS_REGNUM,		"outgoing_args:" },
+    { VIRTUAL_CFA_REGNUM,			"cfa (frame):  " },
+    { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM,	"stack boundry:" },
+    { LAST_VIRTUAL_REGISTER,			"last virtual: " },
   };
 
-  fprintf (stderr, "Register information: (last virtual reg = %d)\n",
-	   LAST_VIRTUAL_REGISTER);
-  rs6000_debug_reg_print (0, 31, "gr");
-  rs6000_debug_reg_print (32, 63, "fp");
+  fputs ("\nHard register information:\n", stderr);
+  rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
+  rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
   rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
 			  LAST_ALTIVEC_REGNO,
 			  "vs");
@@ -1726,6 +2008,10 @@ rs6000_debug_reg_global (void)
   rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
   rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
 
+  fputs ("\nVirtual/stack/frame registers:\n", stderr);
+  for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
+    fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
+
   fprintf (stderr,
 	   "\n"
 	   "d  reg_class = %s\n"
@@ -1734,23 +2020,68 @@ rs6000_debug_reg_global (void)
 	   "wa reg_class = %s\n"
 	   "wd reg_class = %s\n"
 	   "wf reg_class = %s\n"
-	   "ws reg_class = %s\n\n",
+	   "wg reg_class = %s\n"
+	   "wl reg_class = %s\n"
+	   "wm reg_class = %s\n"
+	   "wr reg_class = %s\n"
+	   "ws reg_class = %s\n"
+	   "wt reg_class = %s\n"
+	   "wu reg_class = %s\n"
+	   "wv reg_class = %s\n"
+	   "ww reg_class = %s\n"
+	   "wx reg_class = %s\n"
+	   "wy reg_class = %s\n"
+	   "wz reg_class = %s\n"
+	   "\n",
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
 	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
-	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]);
-
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
+	   reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
+
+  nl = "\n";
   for (m = 0; m < NUM_MACHINE_MODES; ++m)
-    if (rs6000_vector_unit[m] || rs6000_vector_mem[m])
+    rs6000_debug_print_mode (m);
+
+  fputs ("\n", stderr);
+
+  for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
+    {
+      enum machine_mode mode1 = print_tieable_modes[m1];
+      bool first_time = true;
+
+      nl = (const char *)0;
+      for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
+	{
+	  enum machine_mode mode2 = print_tieable_modes[m2];
+	  if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
       {
+	      if (first_time)
+		{
+		  fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
 	nl = "\n";
-	fprintf (stderr, "Vector mode: %-5s arithmetic: %-8s move: %-8s\n",
-		 GET_MODE_NAME (m),
-		 rs6000_debug_vector_unit[ rs6000_vector_unit[m] ],
-		 rs6000_debug_vector_unit[ rs6000_vector_mem[m] ]);
+		  first_time = false;
+		}
+
+	      fprintf (stderr, " %s", GET_MODE_NAME (mode2));
+	    }
+	}
+
+      if (!first_time)
+	fputs ("\n", stderr);
       }
 
   if (nl)
@@ -1913,6 +2244,7 @@ rs6000_debug_reg_global (void)
     {
     case ABI_NONE:	abi_str = "none";	break;
     case ABI_AIX:	abi_str = "aix";	break;
+    case ABI_ELFv2:	abi_str = "ELFv2";	break;
     case ABI_V4:	abi_str = "V4";		break;
     case ABI_DARWIN:	abi_str = "darwin";	break;
     default:		abi_str = "unknown";	break;
@@ -1932,9 +2264,34 @@ rs6000_debug_reg_global (void)
   if (rs6000_float_gprs)
     fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
 
+  fprintf (stderr, DEBUG_FMT_S, "fprs",
+	   (TARGET_FPRS ? "true" : "false"));
+
+  fprintf (stderr, DEBUG_FMT_S, "single_float",
+	   (TARGET_SINGLE_FLOAT ? "true" : "false"));
+
+  fprintf (stderr, DEBUG_FMT_S, "double_float",
+	   (TARGET_DOUBLE_FLOAT ? "true" : "false"));
+
+  fprintf (stderr, DEBUG_FMT_S, "soft_float",
+	   (TARGET_SOFT_FLOAT ? "true" : "false"));
+
+  fprintf (stderr, DEBUG_FMT_S, "e500_single",
+	   (TARGET_E500_SINGLE ? "true" : "false"));
+
+  fprintf (stderr, DEBUG_FMT_S, "e500_double",
+	   (TARGET_E500_DOUBLE ? "true" : "false"));
+
   if (TARGET_LINK_STACK)
     fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
 
+  if (targetm.lra_p ())
+    fprintf (stderr, DEBUG_FMT_S, "lra", "true");
+
+  if (TARGET_P8_FUSION)
+    fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
+	     (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
+
   fprintf (stderr, DEBUG_FMT_S, "plt-format",
 	   TARGET_SECURE_PLT ? "secure" : "bss");
   fprintf (stderr, DEBUG_FMT_S, "struct-return",
@@ -1954,11 +2311,106 @@ rs6000_debug_reg_global (void)
 	   (int)RS6000_BUILTIN_COUNT);
 }
 
+
+/* Update the addr mask bits in reg_addr to help secondary reload and go if
+   legitimate address support to figure out the appropriate addressing to
+   use.  */
+
+static void
+rs6000_setup_reg_addr_masks (void)
+{
+  ssize_t rc, reg, m, nregs;
+  addr_mask_type any_addr_mask, addr_mask;
+
+  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+    {
+      /* SDmode is special in that we want to access it only via REG+REG
+	 addressing on power7 and above, since we want to use the LFIWZX and
+	 STFIWZX instructions to load it.  */
+      bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
+
+      any_addr_mask = 0;
+      for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
+	{
+	  addr_mask = 0;
+	  reg = reload_reg_map[rc].reg;
+
+	  /* Can mode values go in the GPR/FPR/Altivec registers?  */
+	  if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
+	    {
+	      nregs = rs6000_hard_regno_nregs[m][reg];
+	      addr_mask |= RELOAD_REG_VALID;
+
+	      /* Indicate if the mode takes more than 1 physical register.  If
+		 it takes a single register, indicate it can do REG+REG
+		 addressing.  */
+	      if (nregs > 1 || m == BLKmode)
+		addr_mask |= RELOAD_REG_MULTIPLE;
+	      else
+		addr_mask |= RELOAD_REG_INDEXED;
+
+	      /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
+		 addressing.  Restrict addressing on SPE for 64-bit types
+		 because of the SUBREG hackery used to address 64-bit floats in
+		 '32-bit' GPRs.  To simplify secondary reload, don't allow
+		 update forms on scalar floating point types that can go in the
+		 upper registers.  */
+
+	      if (TARGET_UPDATE
+		  && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
+		  && GET_MODE_SIZE (m) <= 8
+		  && !VECTOR_MODE_P (m)
+		  && !COMPLEX_MODE_P (m)
+		  && !indexed_only_p
+		  && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8)
+		  && !(m == DFmode && TARGET_UPPER_REGS_DF)
+		  && !(m == SFmode && TARGET_UPPER_REGS_SF))
+		{
+		  addr_mask |= RELOAD_REG_PRE_INCDEC;
+
+		  /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
+		     we don't allow PRE_MODIFY for some multi-register
+		     operations.  */
+		  switch (m)
+		    {
+		    default:
+		      addr_mask |= RELOAD_REG_PRE_MODIFY;
+		      break;
+
+		    case DImode:
+		      if (TARGET_POWERPC64)
+			addr_mask |= RELOAD_REG_PRE_MODIFY;
+		      break;
+
+		    case DFmode:
+		    case DDmode:
+		      if (TARGET_DF_INSN)
+			addr_mask |= RELOAD_REG_PRE_MODIFY;
+		      break;
+		    }
+		}
+	    }
+
+	  /* GPR and FPR registers can do REG+OFFSET addressing, except
+	     possibly for SDmode.  */
+	  if ((addr_mask != 0) && !indexed_only_p
+	      && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
+	    addr_mask |= RELOAD_REG_OFFSET;
+
+	  reg_addr[m].addr_mask[rc] = addr_mask;
+	  any_addr_mask |= addr_mask;
+	}
+
+      reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
+    }
+}
+
+
 /* Initialize the various global tables that are based on register size.  */
 static void
 rs6000_init_hard_regno_mode_ok (bool global_init_p)
 {
-  int r, m, c;
+  ssize_t r, m, c;
   int align64;
   int align32;
 
@@ -1987,20 +2439,54 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
   rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
   rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
   rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
+  rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
+  rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
+  rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
   rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
   rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
 
-  /* Precalculate vector information, this must be set up before the
-     rs6000_hard_regno_nregs_internal below.  */
-  for (m = 0; m < NUM_MACHINE_MODES; ++m)
+  /* Precalculate register class to simpler reload register class.  We don't
+     need all of the register classes that are combinations of different
+     classes, just the simple ones that have constraint letters.  */
+  for (c = 0; c < N_REG_CLASSES; c++)
+    reg_class_to_reg_type[c] = NO_REG_TYPE;
+
+  reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
+  reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
+  reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
+  reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
+
+  if (TARGET_VSX)
     {
-      rs6000_vector_unit[m] = rs6000_vector_mem[m] = VECTOR_NONE;
-      rs6000_vector_reload[m][0] = CODE_FOR_nothing;
-      rs6000_vector_reload[m][1] = CODE_FOR_nothing;
+      reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
     }
+  else
+    {
+      reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
+      reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
+    }
+
+  /* Precalculate the valid memory formats as well as the vector information,
+     this must be set up before the rs6000_hard_regno_nregs_internal calls
+     below.  */
+  gcc_assert ((int)VECTOR_NONE == 0);
+  memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
+  memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
+
+  gcc_assert ((int)CODE_FOR_nothing == 0);
+  memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
 
-  for (c = 0; c < (int)(int)RS6000_CONSTRAINT_MAX; c++)
-    rs6000_constraints[c] = NO_REGS;
+  gcc_assert ((int)NO_REGS == 0);
+  memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
 
   /* The VSX hardware allows native alignment for vectors, but control whether the compiler
      believes it can use native alignment or still uses 128-bit alignment.  */
@@ -2062,13 +2548,19 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	}
     }
 
-  /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract.
-     Altivec doesn't have 64-bit support.  */
+  /* V2DImode, full mode depends on ISA 2.07 vector mode.  Allow under VSX to
+     do insert/splat/extract.  Altivec doesn't have 64-bit integer support.  */
   if (TARGET_VSX)
     {
       rs6000_vector_mem[V2DImode] = VECTOR_VSX;
-      rs6000_vector_unit[V2DImode] = VECTOR_NONE;
+      rs6000_vector_unit[V2DImode]
+	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
       rs6000_vector_align[V2DImode] = align64;
+
+      rs6000_vector_mem[V1TImode] = VECTOR_VSX;
+      rs6000_vector_unit[V1TImode]
+	= (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
+      rs6000_vector_align[V1TImode] = 128;
     }
 
   /* DFmode, see if we want to use the VSX unit.  */
@@ -2076,14 +2568,48 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
     {
       rs6000_vector_unit[DFmode] = VECTOR_VSX;
       rs6000_vector_mem[DFmode]
-	= (TARGET_VSX_SCALAR_MEMORY ? VECTOR_VSX : VECTOR_NONE);
+	= (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
       rs6000_vector_align[DFmode] = align64;
     }
 
+  /* Allow TImode in VSX register and set the VSX memory macros.  */
+  if (TARGET_VSX && TARGET_VSX_TIMODE)
+    {
+      rs6000_vector_mem[TImode] = VECTOR_VSX;
+      rs6000_vector_align[TImode] = align64;
+    }
+
   /* TODO add SPE and paired floating point vector support.  */
 
   /* Register class constraints for the constraints that depend on compile
-     switches.  */
+     switches. When the VSX code was added, different constraints were added
+     based on the type (DFmode, V2DFmode, V4SFmode).  For the vector types, all
+     of the VSX registers are used.  The register classes for scalar floating
+     point types is set, based on whether we allow that type into the upper
+     (Altivec) registers.  GCC has register classes to target the Altivec
+     registers for load/store operations, to select using a VSX memory
+     operation instead of the traditional floating point operation.  The
+     constraints are:
+
+	d  - Register class to use with traditional DFmode instructions.
+	f  - Register class to use with traditional SFmode instructions.
+	v  - Altivec register.
+	wa - Any VSX register.
+	wd - Preferred register class for V2DFmode.
+	wf - Preferred register class for V4SFmode.
+	wg - Float register for power6x move insns.
+	wl - Float register if we can do 32-bit signed int loads.
+	wm - VSX register for ISA 2.07 direct move operations.
+	wr - GPR if 64-bit mode is permitted.
+	ws - Register class to do ISA 2.06 DF operations.
+	wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
+	wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
+	wt - VSX register for TImode in VSX registers.
+	ww - Register class to do SF conversions in with VSX operations.
+	wx - Float register if we can do 32-bit int stores.
+	wy - Register class to do ISA 2.07 SF operations.
+	wz - Float register if we can do 32-bit unsigned int loads.  */
+
   if (TARGET_HARD_FLOAT && TARGET_FPRS)
     rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;
 
@@ -2092,63 +2618,163 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 
   if (TARGET_VSX)
     {
-      /* At present, we just use VSX_REGS, but we have different constraints
-	 based on the use, in case we want to fine tune the default register
-	 class used.  wa = any VSX register, wf = register class to use for
-	 V4SF, wd = register class to use for V2DF, and ws = register classs to
-	 use for DF scalars.  */
       rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
-      rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
       rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
-      rs6000_constraints[RS6000_CONSTRAINT_ws] = (TARGET_VSX_SCALAR_MEMORY
-						  ? VSX_REGS
-						  : FLOAT_REGS);
+      rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
+
+      if (TARGET_VSX_TIMODE)
+	rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;
+
+      if (TARGET_UPPER_REGS_DF)
+	{
+	  rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
+	  rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
+	}
+      else
+	rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
     }
 
+  /* Add conditional constraints based on various options, to allow us to
+     collapse multiple insn patterns.  */
   if (TARGET_ALTIVEC)
     rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
 
-  /* Set up the reload helper functions.  */
+  if (TARGET_MFPGPR)
+    rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
+
+  if (TARGET_LFIWAX)
+    rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
+
+  if (TARGET_DIRECT_MOVE)
+    rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
+
+  if (TARGET_POWERPC64)
+    rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
+
+  if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)
+    {
+      rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
+    }
+  else if (TARGET_P8_VECTOR)
+    {
+      rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
+      rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
+    }
+  else if (TARGET_VSX)
+    rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
+
+  if (TARGET_STFIWX)
+    rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
+
+  if (TARGET_LFIWZX)
+    rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
+
+  /* Set up the reload helper and direct move functions.  */
   if (TARGET_VSX || TARGET_ALTIVEC)
     {
       if (TARGET_64BIT)
 	{
-	  rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_di_store;
-	  rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_di_load;
-	  rs6000_vector_reload[V8HImode][0]  = CODE_FOR_reload_v8hi_di_store;
-	  rs6000_vector_reload[V8HImode][1]  = CODE_FOR_reload_v8hi_di_load;
-	  rs6000_vector_reload[V4SImode][0]  = CODE_FOR_reload_v4si_di_store;
-	  rs6000_vector_reload[V4SImode][1]  = CODE_FOR_reload_v4si_di_load;
-	  rs6000_vector_reload[V2DImode][0]  = CODE_FOR_reload_v2di_di_store;
-	  rs6000_vector_reload[V2DImode][1]  = CODE_FOR_reload_v2di_di_load;
-	  rs6000_vector_reload[V4SFmode][0]  = CODE_FOR_reload_v4sf_di_store;
-	  rs6000_vector_reload[V4SFmode][1]  = CODE_FOR_reload_v4sf_di_load;
-	  rs6000_vector_reload[V2DFmode][0]  = CODE_FOR_reload_v2df_di_store;
-	  rs6000_vector_reload[V2DFmode][1]  = CODE_FOR_reload_v2df_di_load;
-	  if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
+	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
+	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_di_load;
+	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_di_store;
+	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_di_load;
+	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_di_store;
+	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_di_load;
+	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_di_store;
+	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_di_load;
+	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_di_store;
+	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_di_load;
+	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_di_store;
+	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_di_load;
+	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_di_store;
+	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_di_load;
+	  if (TARGET_VSX && TARGET_UPPER_REGS_DF)
 	    {
-	      rs6000_vector_reload[DFmode][0]  = CODE_FOR_reload_df_di_store;
-	      rs6000_vector_reload[DFmode][1]  = CODE_FOR_reload_df_di_load;
+	      reg_addr[DFmode].reload_store  = CODE_FOR_reload_df_di_store;
+	      reg_addr[DFmode].reload_load   = CODE_FOR_reload_df_di_load;
+	      reg_addr[DDmode].reload_store  = CODE_FOR_reload_dd_di_store;
+	      reg_addr[DDmode].reload_load   = CODE_FOR_reload_dd_di_load;
+	    }
+	  if (TARGET_P8_VECTOR)
+	    {
+	      reg_addr[SFmode].reload_store  = CODE_FOR_reload_sf_di_store;
+	      reg_addr[SFmode].reload_load   = CODE_FOR_reload_sf_di_load;
+	      reg_addr[SDmode].reload_store  = CODE_FOR_reload_sd_di_store;
+	      reg_addr[SDmode].reload_load   = CODE_FOR_reload_sd_di_load;
+	    }
+	  if (TARGET_VSX_TIMODE)
+	    {
+	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_di_store;
+	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_di_load;
+	    }
+	  if (TARGET_DIRECT_MOVE)
+	    {
+	      if (TARGET_POWERPC64)
+		{
+		  reg_addr[TImode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxti;
+		  reg_addr[V1TImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv1ti;
+		  reg_addr[V2DFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2df;
+		  reg_addr[V2DImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2di;
+		  reg_addr[V4SFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4sf;
+		  reg_addr[V4SImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4si;
+		  reg_addr[V8HImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv8hi;
+		  reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
+		  reg_addr[SFmode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxsf;
+
+		  reg_addr[TImode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprti;
+		  reg_addr[V1TImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv1ti;
+		  reg_addr[V2DFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2df;
+		  reg_addr[V2DImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2di;
+		  reg_addr[V4SFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4sf;
+		  reg_addr[V4SImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4si;
+		  reg_addr[V8HImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv8hi;
+		  reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
+		  reg_addr[SFmode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprsf;
+		}
+	      else
+		{
+		  reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
+		  reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
+		  reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
+		}
 	    }
 	}
       else
 	{
-	  rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_si_store;
-	  rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_si_load;
-	  rs6000_vector_reload[V8HImode][0]  = CODE_FOR_reload_v8hi_si_store;
-	  rs6000_vector_reload[V8HImode][1]  = CODE_FOR_reload_v8hi_si_load;
-	  rs6000_vector_reload[V4SImode][0]  = CODE_FOR_reload_v4si_si_store;
-	  rs6000_vector_reload[V4SImode][1]  = CODE_FOR_reload_v4si_si_load;
-	  rs6000_vector_reload[V2DImode][0]  = CODE_FOR_reload_v2di_si_store;
-	  rs6000_vector_reload[V2DImode][1]  = CODE_FOR_reload_v2di_si_load;
-	  rs6000_vector_reload[V4SFmode][0]  = CODE_FOR_reload_v4sf_si_store;
-	  rs6000_vector_reload[V4SFmode][1]  = CODE_FOR_reload_v4sf_si_load;
-	  rs6000_vector_reload[V2DFmode][0]  = CODE_FOR_reload_v2df_si_store;
-	  rs6000_vector_reload[V2DFmode][1]  = CODE_FOR_reload_v2df_si_load;
-	  if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
+	  reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
+	  reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_si_load;
+	  reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_si_store;
+	  reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_si_load;
+	  reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_si_store;
+	  reg_addr[V4SImode].reload_load   = CODE_FOR_reload_v4si_si_load;
+	  reg_addr[V2DImode].reload_store  = CODE_FOR_reload_v2di_si_store;
+	  reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_si_load;
+	  reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_si_store;
+	  reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_si_load;
+	  reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_si_store;
+	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_si_load;
+	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_si_store;
+	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_si_load;
+	  if (TARGET_VSX && TARGET_UPPER_REGS_DF)
+	    {
+	      reg_addr[DFmode].reload_store  = CODE_FOR_reload_df_si_store;
+	      reg_addr[DFmode].reload_load   = CODE_FOR_reload_df_si_load;
+	      reg_addr[DDmode].reload_store  = CODE_FOR_reload_dd_si_store;
+	      reg_addr[DDmode].reload_load   = CODE_FOR_reload_dd_si_load;
+	    }
+	  if (TARGET_P8_VECTOR)
+	    {
+	      reg_addr[SFmode].reload_store  = CODE_FOR_reload_sf_si_store;
+	      reg_addr[SFmode].reload_load   = CODE_FOR_reload_sf_si_load;
+	      reg_addr[SDmode].reload_store  = CODE_FOR_reload_sd_si_store;
+	      reg_addr[SDmode].reload_load   = CODE_FOR_reload_sd_si_load;
+	    }
+	  if (TARGET_VSX_TIMODE)
 	    {
-	      rs6000_vector_reload[DFmode][0]  = CODE_FOR_reload_df_si_store;
-	      rs6000_vector_reload[DFmode][1]  = CODE_FOR_reload_df_si_load;
+	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_si_store;
+	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_si_load;
 	    }
 	}
     }
@@ -2267,6 +2893,11 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	}
     }
 
+  /* Update the addr mask bits in reg_addr to help secondary reload and go if
+     legitimate address support to figure out the appropriate addressing to
+     use.  */
+  rs6000_setup_reg_addr_masks ();
+
   if (global_init_p || TARGET_DEBUG_TARGET)
     {
       if (TARGET_DEBUG_REG)
@@ -2378,7 +3009,12 @@ rs6000_builtin_mask_calculate (void)
 	  | ((TARGET_FRSQRTE)		    ? RS6000_BTM_FRSQRTE  : 0)
 	  | ((TARGET_FRSQRTES)		    ? RS6000_BTM_FRSQRTES : 0)
 	  | ((TARGET_POPCNTD)		    ? RS6000_BTM_POPCNTD  : 0)
-	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL     : 0));
+	  | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL      : 0)
+	  | ((TARGET_P8_VECTOR)		    ? RS6000_BTM_P8_VECTOR : 0)
+	  | ((TARGET_CRYPTO)		    ? RS6000_BTM_CRYPTO	   : 0)
+	  | ((TARGET_HTM)		    ? RS6000_BTM_HTM	   : 0)
+	  | ((TARGET_DFP)		    ? RS6000_BTM_DFP	   : 0)
+	  | ((TARGET_HARD_FLOAT)	    ? RS6000_BTM_HARD_FLOAT : 0));
 }
 
 /* Override command line options.  Mostly we process the processor type and
@@ -2415,7 +3051,8 @@ rs6000_option_override_internal (bool global_init_p)
      calculation works better for RTL loop invariant motion on targets
      with enough (>= 32) registers.  It is an expensive optimization.
      So it is on only for peak performance.  */
-  if (optimize >= 3 && global_init_p)
+  if (optimize >= 3 && global_init_p
+      && !global_options_set.x_flag_ira_loop_pressure)
     flag_ira_loop_pressure = 1;
 
   /* Set the pointer size.  */
@@ -2609,6 +3246,24 @@ rs6000_option_override_internal (bool global_init_p)
 	}
     }
 
+  /* If little-endian, default to -mstrict-align on older processors.
+     Testing for htm matches power8 and later.  */
+  if (!BYTES_BIG_ENDIAN
+      && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
+    rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
+
+  /* -maltivec={le,be} implies -maltivec.  */
+  if (rs6000_altivec_element_order != 0)
+    rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
+
+  /* Disallow -maltivec=le in big endian mode for now.  This is not
+     known to be useful for anyone.  */
+  if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
+    {
+      warning (0, N_("-maltivec=le not allowed for big-endian targets"));
+      rs6000_altivec_element_order = 0;
+    }
+
   /* Add some warnings for VSX.  */
   if (TARGET_VSX)
     {
@@ -2619,15 +3274,13 @@ rs6000_option_override_internal (bool global_init_p)
 	  if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
 	    msg = N_("-mvsx requires hardware floating point");
 	  else
+	    {
 	    rs6000_isa_flags &= ~ OPTION_MASK_VSX;
+	      rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
+	    }
 	}
       else if (TARGET_PAIRED_FLOAT)
 	msg = N_("-mvsx and -mpaired are incompatible");
-      /* The hardware will allow VSX and little endian, but until we make sure
-	 things like vector select, etc. work don't allow VSX on little endian
-	 systems at this point.  */
-      else if (!BYTES_BIG_ENDIAN)
-	msg = N_("-mvsx used with little endian code");
       else if (TARGET_AVOID_XFORM > 0)
 	msg = N_("-mvsx needs indexed addressing");
       else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
@@ -2647,9 +3300,24 @@ rs6000_option_override_internal (bool global_init_p)
 	}
     }
 
+  /* If hard-float/altivec/vsx were explicitly turned off then don't allow
+     the -mcpu setting to enable options that conflict. */
+  if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
+      && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
+				       | OPTION_MASK_ALTIVEC
+				       | OPTION_MASK_VSX)) != 0)
+    rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
+			   | OPTION_MASK_DIRECT_MOVE)
+		         & ~rs6000_isa_flags_explicit);
+
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
+
   /* For the newer switches (vsx, dfp, etc.) set some of the older options,
      unless the user explicitly used the -mno-<option> to disable the code.  */
-  if (TARGET_VSX)
+  if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
+    rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+  else if (TARGET_VSX)
     rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
   else if (TARGET_POPCNTD)
     rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
@@ -2664,6 +3332,99 @@ rs6000_option_override_internal (bool global_init_p)
   else if (TARGET_ALTIVEC)
     rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
 
+  if (TARGET_CRYPTO && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
+	error ("-mcrypto requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
+    }
+
+  if (TARGET_DIRECT_MOVE && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
+	error ("-mdirect-move requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+	error ("-mpower8-vector requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+	error ("-mpower8-vector requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
+  if (TARGET_VSX_TIMODE && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
+	error ("-mvsx-timode requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
+    }
+
+  if (TARGET_DFP && !TARGET_HARD_FLOAT)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
+	error ("-mhard-dfp requires -mhard-float");
+      rs6000_isa_flags &= ~OPTION_MASK_DFP;
+    }
+
+  /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
+     silently turn off quad memory mode.  */
+  if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+	warning (0, N_("-mquad-memory requires 64-bit mode"));
+
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+	warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
+
+      rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
+			    | OPTION_MASK_QUAD_MEMORY_ATOMIC);
+    }
+
+  /* Non-atomic quad memory load/store are disabled for little endian, since
+     the words are reversed, but atomic operations can still be done by
+     swapping the words.  */
+  if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
+    {
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+	warning (0, N_("-mquad-memory is not available in little endian mode"));
+
+      rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
+    }
+
+  /* Assume if the user asked for normal quad memory instructions, they want
+     the atomic versions as well, unless they explicity told us not to use quad
+     word atomic instructions.  */
+  if (TARGET_QUAD_MEMORY
+      && !TARGET_QUAD_MEMORY_ATOMIC
+      && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
+    rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
+
+  /* Enable power8 fusion if we are tuning for power8, even if we aren't
+     generating power8 instructions.  */
+  if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
+    rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
+			 & OPTION_MASK_P8_FUSION);
+
+  /* Power8 does not fuse sign extended loads with the addis.  If we are
+     optimizing at high levels for speed, convert a sign extended load into a
+     zero extending load, and an explicit sign extension.  */
+  if (TARGET_P8_FUSION
+      && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
+      && optimize_function_for_speed_p (cfun)
+      && optimize >= 3)
+    rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
+
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
+
   /* E500mc does "better" if we inline more aggressively.  Respect the
      user's opinion, though.  */
   if (rs6000_block_move_inline_limit == 0
@@ -2787,9 +3548,13 @@ rs6000_option_override_internal (bool global_init_p)
 
   /* Place FP constants in the constant pool instead of TOC
      if section anchors enabled.  */
-  if (flag_section_anchors)
+  if (flag_section_anchors
+      && !global_options_set.x_TARGET_NO_FP_IN_TOC)
     TARGET_NO_FP_IN_TOC = 1;
 
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
+
 #ifdef SUBTARGET_OVERRIDE_OPTIONS
   SUBTARGET_OVERRIDE_OPTIONS;
 #endif
@@ -2800,6 +3565,9 @@ rs6000_option_override_internal (bool global_init_p)
   SUB3TARGET_OVERRIDE_OPTIONS;
 #endif
 
+  if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+    rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
+
   /* For the E500 family of cores, reset the single/double FP flags to let us
      check that they remain constant across attributes or pragmas.  Also,
      clear a possible request for string instructions, not supported and which
@@ -2849,16 +3617,19 @@ rs6000_option_override_internal (bool global_init_p)
 			&& rs6000_cpu != PROCESSOR_POWER5
 			&& rs6000_cpu != PROCESSOR_POWER6
 			&& rs6000_cpu != PROCESSOR_POWER7
+			&& rs6000_cpu != PROCESSOR_POWER8
 			&& rs6000_cpu != PROCESSOR_PPCA2
 			&& rs6000_cpu != PROCESSOR_CELL
 			&& rs6000_cpu != PROCESSOR_PPC476);
   rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
 			 || rs6000_cpu == PROCESSOR_POWER5
-			 || rs6000_cpu == PROCESSOR_POWER7);
+			 || rs6000_cpu == PROCESSOR_POWER7
+			 || rs6000_cpu == PROCESSOR_POWER8);
   rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
 				 || rs6000_cpu == PROCESSOR_POWER5
 				 || rs6000_cpu == PROCESSOR_POWER6
 				 || rs6000_cpu == PROCESSOR_POWER7
+				 || rs6000_cpu == PROCESSOR_POWER8
 				 || rs6000_cpu == PROCESSOR_PPCE500MC
 				 || rs6000_cpu == PROCESSOR_PPCE500MC64
 				 || rs6000_cpu == PROCESSOR_PPCE5500
@@ -2988,7 +3759,7 @@ rs6000_option_override_internal (bool global_init_p)
 
       /* We should always be splitting complex arguments, but we can't break
 	 Linux and Darwin ABIs at the moment.  For now, only AIX is fixed.  */
-      if (DEFAULT_ABI != ABI_AIX)
+      if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
 	targetm.calls.split_complex_arg = NULL;
     }
 
@@ -3102,6 +3873,10 @@ rs6000_option_override_internal (bool global_init_p)
 	rs6000_cost = &power7_cost;
 	break;
 
+      case PROCESSOR_POWER8:
+	rs6000_cost = &power8_cost;
+	break;
+
       case PROCESSOR_PPCA2:
 	rs6000_cost = &ppca2_cost;
 	break;
@@ -3274,7 +4049,8 @@ rs6000_loop_align (rtx label)
       && (rs6000_cpu == PROCESSOR_POWER4
 	  || rs6000_cpu == PROCESSOR_POWER5
 	  || rs6000_cpu == PROCESSOR_POWER6
-	  || rs6000_cpu == PROCESSOR_POWER7))
+	  || rs6000_cpu == PROCESSOR_POWER7
+	  || rs6000_cpu == PROCESSOR_POWER8))
     return 5;
   else
     return align_loops_log;
@@ -3493,6 +4269,8 @@ rs6000_preferred_simd_mode (enum machine_mode mode)
       {
       case SFmode:
 	return V4SFmode;
+      case TImode:
+	return V1TImode;
       case DImode:
 	return V2DImode;
       case SImode:
@@ -3813,6 +4591,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
       enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
       switch (fn)
 	{
+	case BUILT_IN_CLZIMAX:
+	case BUILT_IN_CLZLL:
+	case BUILT_IN_CLZL:
+	case BUILT_IN_CLZ:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
+	    }
+	  break;
 	case BUILT_IN_COPYSIGN:
 	  if (VECTOR_UNIT_VSX_P (V2DFmode)
 	      && out_mode == DFmode && out_n == 2
@@ -3828,6 +4622,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
 	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
 	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
 	  break;
+	case BUILT_IN_POPCOUNTIMAX:
+	case BUILT_IN_POPCOUNTLL:
+	case BUILT_IN_POPCOUNTL:
+	case BUILT_IN_POPCOUNT:
+	  if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+	    {
+	      if (out_mode == QImode && out_n == 16)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
+	      else if (out_mode == HImode && out_n == 8)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
+	      else if (out_mode == SImode && out_n == 4)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
+	      else if (out_mode == DImode && out_n == 2)
+		return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
+	    }
+	  break;
 	case BUILT_IN_SQRT:
 	  if (VECTOR_UNIT_VSX_P (V2DFmode)
 	      && out_mode == DFmode && out_n == 2
@@ -4043,7 +4853,11 @@ rs6000_file_start (void)
 	putc ('\n', file);
     }
 
-  if (DEFAULT_ABI == ABI_AIX || (TARGET_ELF && flag_pic == 2))
+  if (DEFAULT_ABI == ABI_ELFv2)
+    fprintf (file, "\t.abiversion 2\n");
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
+      || (TARGET_ELF && flag_pic == 2))
     {
       switch_to_section (toc_section);
       switch_to_section (text_section);
@@ -4230,7 +5044,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
   HOST_WIDE_INT splat_val;
   HOST_WIDE_INT msb_val;
 
-  if (mode == V2DImode || mode == V2DFmode)
+  if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
     return false;
 
   nunits = GET_MODE_NUNITS (mode);
@@ -4239,7 +5053,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
 
   val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
   splat_val = val;
-  msb_val = val > 0 ? 0 : -1;
+  msb_val = val >= 0 ? 0 : -1;
 
   /* Construct the value to be splatted, if possible.  If not, return 0.  */
   for (i = 2; i <= copies; i *= 2)
@@ -4274,15 +5088,16 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
 
   /* Check if VAL is present in every STEP-th element, and the
      other elements are filled with its most significant bit.  */
-  for (i = 0; i < nunits - 1; ++i)
+  for (i = 1; i < nunits; ++i)
     {
       HOST_WIDE_INT desired_val;
-      if (((BYTES_BIG_ENDIAN ? i + 1 : i) & (step - 1)) == 0)
+      unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
+      if ((i & (step - 1)) == 0)
 	desired_val = val;
       else
 	desired_val = msb_val;
 
-      if (desired_val != const_vector_elt_as_int (op, i))
+      if (desired_val != const_vector_elt_as_int (op, elt))
 	return false;
     }
 
@@ -4308,7 +5123,7 @@ easy_altivec_constant (rtx op, enum machine_mode mode)
   if (mode == V2DFmode)
     return zero_constant (op, mode);
 
-  if (mode == V2DImode)
+  else if (mode == V2DImode)
     {
       /* In case the compiler is built 32-bit, CONST_DOUBLE constants are not
 	 easy.  */
@@ -4326,6 +5141,10 @@ easy_altivec_constant (rtx op, enum machine_mode mode)
       return false;
     }
 
+  /* V1TImode is a special container for TImode.  Ignore for now.  */
+  else if (mode == V1TImode)
+    return false;
+
   /* Start with a vspltisw.  */
   step = GET_MODE_NUNITS (mode) / 4;
   copies = 1;
@@ -4407,7 +5226,7 @@ output_vec_const_move (rtx *operands)
       if (zero_constant (vec, mode))
 	return "xxlxor %x0,%x0,%x0";
 
-      if (mode == V2DImode
+      if ((mode == V2DImode || mode == V1TImode)
 	  && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
 	  && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
 	return "vspltisw %0,-1";
@@ -4698,9 +5517,12 @@ rs6000_expand_vector_init (rtx target, rtx vals)
 	{
 	  rtx freg = gen_reg_rtx (V4SFmode);
 	  rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+	  rtx cvt  = ((TARGET_XSCVDPSPN)
+		      ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+		      : gen_vsx_xscvdpsp_scalar (freg, sreg));
 
-	  emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
-	  emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
+	  emit_insn (cvt);
+	  emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
 	}
       else
 	{
@@ -4790,6 +5612,13 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
       return;
     }
 
+  /* Simplify setting single element vectors like V1TImode.  */
+  if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
+    {
+      emit_move_insn (target, gen_lowpart (mode, val));
+      return;
+    }
+
   /* Load single variable value.  */
   mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
   emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
@@ -4811,10 +5640,29 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
     XVECEXP (mask, 0, elt*width + i)
       = GEN_INT (i + 0x10);
   x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
+
+  if (BYTES_BIG_ENDIAN)
   x = gen_rtx_UNSPEC (mode,
 		      gen_rtvec (3, target, reg,
 				 force_reg (V16QImode, x)),
 		      UNSPEC_VPERM);
+  else 
+    {
+      /* Invert selector.  We prefer to generate VNAND on P8 so
+	 that future fusion opportunities can kick in, but must
+	 generate VNOR elsewhere.  */
+      rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
+      rtx iorx = (TARGET_P8_VECTOR
+		  ? gen_rtx_IOR (V16QImode, notx, notx)
+		  : gen_rtx_AND (V16QImode, notx, notx));
+      rtx tmp = gen_reg_rtx (V16QImode);
+      emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
+
+      /* Permute with operands reversed and adjusted selector.  */
+      x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
+			  UNSPEC_VPERM);
+    }
+
   emit_insn (gen_rtx_SET (VOIDmode, target, x));
 }
 
@@ -4833,6 +5681,10 @@ rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
 	{
 	default:
 	  break;
+	case V1TImode:
+	  gcc_assert (elt == 0 && inner_mode == TImode);
+	  emit_move_insn (target, gen_lowpart (TImode, vec));
+	  break;
 	case V2DFmode:
 	  emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
 	  return;
@@ -4938,7 +5790,7 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
 	 purpose.  */
       if (GET_CODE (op) == SUBREG
 	  && (mode == SImode || mode == DImode || mode == TImode
-	      || mode == DDmode || mode == TDmode)
+	      || mode == DDmode || mode == TDmode || mode == PTImode)
 	  && REG_P (SUBREG_REG (op))
 	  && (GET_MODE (SUBREG_REG (op)) == DFmode
 	      || GET_MODE (SUBREG_REG (op)) == TFmode))
@@ -4951,6 +5803,7 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
 	  && REG_P (SUBREG_REG (op))
 	  && (GET_MODE (SUBREG_REG (op)) == DImode
 	      || GET_MODE (SUBREG_REG (op)) == TImode
+	      || GET_MODE (SUBREG_REG (op)) == PTImode
 	      || GET_MODE (SUBREG_REG (op)) == DDmode
 	      || GET_MODE (SUBREG_REG (op)) == TDmode))
 	return true;
@@ -4966,6 +5819,48 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
   return false;
 }
 
+/* Return alignment of TYPE.  Existing alignment is ALIGN.  HOW
+   selects whether the alignment is abi mandated, optional, or
+   both abi and optional alignment.  */
+   
+unsigned int
+rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
+{
+  if (how != align_opt)
+    {
+      if (TREE_CODE (type) == VECTOR_TYPE)
+	{
+	  if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
+	      || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
+	    {
+	      if (align < 64)
+		align = 64;
+	    }
+	  else if (align < 128)
+	    align = 128;
+	}
+      else if (TARGET_E500_DOUBLE
+	       && TREE_CODE (type) == REAL_TYPE
+	       && TYPE_MODE (type) == DFmode)
+	{
+	  if (align < 64)
+	    align = 64;
+	}
+    }
+
+  if (how != align_abi)
+    {
+      if (TREE_CODE (type) == ARRAY_TYPE
+	  && TYPE_MODE (TREE_TYPE (type)) == QImode)
+	{
+	  if (align < BITS_PER_WORD)
+	    align = BITS_PER_WORD;
+	}
+    }
+
+  return align;
+}
+
 /* AIX increases natural record alignment to doubleword if the first
    field is an FP double while the FP fields remain word aligned.  */
 
@@ -5087,6 +5982,73 @@ gpr_or_gpr_p (rtx op0, rtx op1)
 	  || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
 }
 
+/* Return true if this is a move direct operation between GPR registers and
+   floating point/VSX registers.  */
+
+bool
+direct_move_p (rtx op0, rtx op1)
+{
+  int regno0, regno1;
+
+  if (!REG_P (op0) || !REG_P (op1))
+    return false;
+
+  if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
+    return false;
+
+  regno0 = REGNO (op0);
+  regno1 = REGNO (op1);
+  if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
+    return false;
+
+  if (INT_REGNO_P (regno0))
+    return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
+
+  else if (INT_REGNO_P (regno1))
+    {
+      if (TARGET_MFPGPR && FP_REGNO_P (regno0))
+	return true;
+
+      else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if this is a load or store quad operation.  This function does
+   not handle the atomic quad memory instructions.  */
+
+bool
+quad_load_store_p (rtx op0, rtx op1)
+{
+  bool ret;
+
+  if (!TARGET_QUAD_MEMORY)
+    ret = false;
+
+  else if (REG_P (op0) && MEM_P (op1))
+    ret = (quad_int_reg_operand (op0, GET_MODE (op0))
+	   && quad_memory_operand (op1, GET_MODE (op1))
+	   && !reg_overlap_mentioned_p (op0, op1));
+
+  else if (MEM_P (op0) && REG_P (op1))
+    ret = (quad_memory_operand (op0, GET_MODE (op0))
+	   && quad_int_reg_operand (op1, GET_MODE (op1)));
+
+  else
+    ret = false;
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n========== quad_load_store, return %s\n",
+	       ret ? "true" : "false");
+      debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
+    }
+
+  return ret;
+}
+
 /* Given an address, return a constant offset term if one exists.  */
 
 static rtx
@@ -5170,7 +6132,12 @@ reg_offset_addressing_ok_p (enum machine_mode mode)
     case V4SImode:
     case V2DFmode:
     case V2DImode:
-      /* AltiVec/VSX vector modes.  Only reg+reg addressing is valid.  */
+    case V1TImode:
+    case TImode:
+      /* AltiVec/VSX vector modes.  Only reg+reg addressing is valid.  While
+	 TImode is not a vector mode, if we want to use the VSX registers to
+	 move it around, we need to restrict ourselves to reg+reg
+	 addressing.  */
       if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
 	return false;
       break;
@@ -5184,6 +6151,13 @@ reg_offset_addressing_ok_p (enum machine_mode mode)
         return false;
       break;
 
+    case SDmode:
+      /* If we can do direct load/stores of SDmode, restrict it to reg+reg
+	 addressing for the LFIWZX and STFIWX instructions.  */
+      if (TARGET_NO_SDMODE_STACK)
+	return false;
+      break;
+
     default:
       break;
     }
@@ -5211,59 +6185,56 @@ virtual_stack_registers_memory_p (rtx op)
 	  && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
 }
 
-/* Return true if memory accesses to OP are known to never straddle
-   a 32k boundary.  */
+/* Return true if a MODE sized memory accesses to OP plus OFFSET
+   is known to not straddle a 32k boundary.  */
 
 static bool
 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
 			     enum machine_mode mode)
 {
   tree decl, type;
-  unsigned HOST_WIDE_INT dsize, dalign;
+  unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
 
   if (GET_CODE (op) != SYMBOL_REF)
     return false;
 
+  dsize = GET_MODE_SIZE (mode);
   decl = SYMBOL_REF_DECL (op);
   if (!decl)
     {
-      if (GET_MODE_SIZE (mode) == 0)
+      if (dsize == 0)
 	return false;
 
       /* -fsection-anchors loses the original SYMBOL_REF_DECL when
 	 replacing memory addresses with an anchor plus offset.  We
 	 could find the decl by rummaging around in the block->objects
 	 VEC for the given offset but that seems like too much work.  */
-      dalign = 1;
+      dalign = BITS_PER_UNIT;
       if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
 	  && SYMBOL_REF_ANCHOR_P (op)
 	  && SYMBOL_REF_BLOCK (op) != NULL)
 	{
 	  struct object_block *block = SYMBOL_REF_BLOCK (op);
-	  HOST_WIDE_INT lsb, mask;
 
-	  /* Given the alignment of the block..  */
 	  dalign = block->alignment;
-	  mask = dalign / BITS_PER_UNIT - 1;
-
-	  /* ..and the combined offset of the anchor and any offset
-	     to this block object..  */
 	  offset += SYMBOL_REF_BLOCK_OFFSET (op);
-	  lsb = offset & -offset;
+	}
+      else if (CONSTANT_POOL_ADDRESS_P (op))
+	{
+	  /* It would be nice to have get_pool_align()..  */
+	  enum machine_mode cmode = get_pool_mode (op);
 
-	  /* ..find how many bits of the alignment we know for the
-	     object.  */
-	  mask &= lsb - 1;
-	  dalign = mask + 1;
+	  dalign = GET_MODE_ALIGNMENT (cmode);
 	}
-      return dalign >= GET_MODE_SIZE (mode);
     }
-
-  if (DECL_P (decl))
+  else if (DECL_P (decl))
     {
-      if (TREE_CODE (decl) == FUNCTION_DECL)
-	return true;
+      dalign = DECL_ALIGN (decl);
 
+      if (dsize == 0)
+	{
+	  /* Allow BLKmode when the entire object is known to not
+	     cross a 32k boundary.  */
       if (!DECL_SIZE_UNIT (decl))
 	return false;
 
@@ -5274,12 +6245,22 @@ offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
       if (dsize > 32768)
 	return false;
 
-      dalign = DECL_ALIGN_UNIT (decl);
-      return dalign >= dsize;
+	  return dalign / BITS_PER_UNIT >= dsize;
     }
-
+    }
+  else
+    {
   type = TREE_TYPE (decl);
 
+      dalign = TYPE_ALIGN (type);
+      if (CONSTANT_CLASS_P (decl))
+	dalign = CONSTANT_ALIGNMENT (decl, dalign);
+      else
+	dalign = DATA_ALIGNMENT (decl, dalign);
+
+      if (dsize == 0)
+	{
+	  /* BLKmode, check the entire object.  */
   if (TREE_CODE (decl) == STRING_CST)
     dsize = TREE_STRING_LENGTH (decl);
   else if (TYPE_SIZE_UNIT (type)
@@ -5290,12 +6271,16 @@ offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
   if (dsize > 32768)
     return false;
 
-  dalign = TYPE_ALIGN (type);
-  if (CONSTANT_CLASS_P (decl))
-    dalign = CONSTANT_ALIGNMENT (decl, dalign);
-  else
-    dalign = DATA_ALIGNMENT (decl, dalign);
-  dalign /= BITS_PER_UNIT;
+	  return dalign / BITS_PER_UNIT >= dsize;
+	}
+    }
+
+  /* Find how many bits of the alignment we know for this access.  */
+  mask = dalign / BITS_PER_UNIT - 1;
+  lsb = offset & -offset;
+  mask &= lsb - 1;
+  dalign = mask + 1;
+
   return dalign >= dsize;
 }
 
@@ -5387,7 +6372,7 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
     return false;
   if (!reg_offset_addressing_ok_p (mode))
     return virtual_stack_registers_memory_p (x);
-  if (legitimate_constant_pool_address_p (x, mode, strict))
+  if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
     return true;
   if (GET_CODE (XEXP (x, 1)) != CONST_INT)
     return false;
@@ -5416,7 +6401,7 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
 
       /* If we are using VSX scalar loads, restrict ourselves to reg+reg
 	 addressing.  */
-      if (mode == DFmode && VECTOR_MEM_VSX_P (DFmode))
+      if (VECTOR_MEM_VSX_P (mode))
 	return false;
 
       if (!worst_case)
@@ -5428,12 +6413,14 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
       break;
 
     case TFmode:
-    case TDmode:
-    case TImode:
       if (TARGET_E500_DOUBLE)
 	return (SPE_CONST_OFFSET_OK (offset)
 		&& SPE_CONST_OFFSET_OK (offset + 8));
+      /* fall through */
 
+    case TDmode:
+    case TImode:
+    case PTImode:
       extra = 8;
       if (!worst_case)
 	break;
@@ -5526,9 +6513,21 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
 
   if (TARGET_ELF || TARGET_MACHO)
     {
-      if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
+      bool large_toc_ok;
+
+      if (DEFAULT_ABI == ABI_V4 && flag_pic)
 	return false;
-      if (TARGET_TOC)
+      /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
+	 push_reload from reload pass code.  LEGITIMIZE_RELOAD_ADDRESS
+	 recognizes some LO_SUM addresses as valid although this
+	 function says opposite.  In most cases, LRA through different
+	 transformations can generate correct code for address reloads.
+	 It can not manage only some LO_SUM cases.  So we need to add
+	 code analogous to one in rs6000_legitimize_reload_address for
+	 LOW_SUM here saying that some addresses are still valid.  */
+      large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
+		      && small_toc_ref (x, VOIDmode));
+      if (TARGET_TOC && ! large_toc_ok)
 	return false;
       if (GET_MODE_NUNITS (mode) != 1)
 	return false;
@@ -5538,7 +6537,7 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
 	       && (mode == DFmode || mode == DDmode)))
 	return false;
 
-      return CONSTANT_P (x);
+      return CONSTANT_P (x) || large_toc_ok;
     }
 
   return false;
@@ -5582,8 +6581,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
       if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
 	return force_reg (Pmode, XEXP (x, 0));
 
+      /* For TImode with load/store quad, restrict addresses to just a single
+	 pointer, so it works with both GPRs and VSX registers.  */
       /* Make sure both operands are registers.  */
-      else if (GET_CODE (x) == PLUS)
+      else if (GET_CODE (x) == PLUS
+	       && (mode != TImode || !TARGET_QUAD_MEMORY))
 	return gen_rtx_PLUS (Pmode,
 			     force_reg (Pmode, XEXP (x, 0)),
 			     force_reg (Pmode, XEXP (x, 1)));
@@ -5603,11 +6605,12 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
     case TFmode:
     case TDmode:
     case TImode:
+    case PTImode:
       /* As in legitimate_offset_address_p we do not assume
 	 worst-case.  The mode here is just a hint as to the registers
 	 used.  A TImode is usually in gprs, but may actually be in
 	 fprs.  Leave worst-case scenario for reload to handle via
-	 insn constraints.  */
+	 insn constraints.  PTImode is only GPRs.  */
       extra = 8;
       break;
     default:
@@ -6099,10 +7102,13 @@ rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
 				   1, const0_rtx, Pmode);
 
 	  r3 = gen_rtx_REG (Pmode, 3);
-	  if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+	  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	    {
+	      if (TARGET_64BIT)
 	    insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
-	  else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
+	      else
 	    insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
+	    }
 	  else if (DEFAULT_ABI == ABI_V4)
 	    insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
 	  else
@@ -6121,10 +7127,13 @@ rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
 				   1, const0_rtx, Pmode);
 
 	  r3 = gen_rtx_REG (Pmode, 3);
-	  if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+	  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	    {
+	      if (TARGET_64BIT)
 	    insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
-	  else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
+	      else
 	    insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
+	    }
 	  else if (DEFAULT_ABI == ABI_V4)
 	    insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
 	  else
@@ -6239,7 +7248,6 @@ use_toc_relative_ref (rtx sym)
 	   && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
 					       get_pool_mode (sym)))
 	  || (TARGET_CMODEL == CMODEL_MEDIUM
-	      && !CONSTANT_POOL_ADDRESS_P (sym)
 	      && SYMBOL_REF_LOCAL_P (sym)));
 }
 
@@ -6338,7 +7346,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
       && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
 				  || mode == DDmode || mode == TDmode
 				  || mode == DImode))
-      && VECTOR_MEM_NONE_P (mode))
+      && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
     {
       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
       HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
@@ -6369,7 +7377,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
 
   if (GET_CODE (x) == SYMBOL_REF
       && reg_offset_p
-      && VECTOR_MEM_NONE_P (mode)
+      && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
       && !SPE_VECTOR_MODE (mode)
 #if TARGET_MACHO
       && DEFAULT_ABI == ABI_DARWIN
@@ -6395,6 +7403,8 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
 	 mem is sufficiently aligned.  */
       && mode != TFmode
       && mode != TDmode
+      && (mode != TImode || !TARGET_VSX_TIMODE)
+      && mode != PTImode
       && (mode != DImode || TARGET_POWERPC64)
       && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
 	  || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
@@ -6515,15 +7525,9 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
     return 0;
   if (legitimate_indirect_address_p (x, reg_ok_strict))
     return 1;
-  if ((GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
-      && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
-      && !SPE_VECTOR_MODE (mode)
-      && mode != TFmode
-      && mode != TDmode
-      /* Restrict addressing for DI because of our SUBREG hackery.  */
-      && !(TARGET_E500_DOUBLE
-	   && (mode == DFmode || mode == DDmode || mode == DImode))
-      && TARGET_UPDATE
+  if (TARGET_UPDATE
+      && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
+      && mode_supports_pre_incdec_p (mode)
       && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
     return 1;
   if (virtual_stack_registers_memory_p (x))
@@ -6531,8 +7535,16 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
   if (reg_offset_p && legitimate_small_data_p (mode, x))
     return 1;
   if (reg_offset_p
-      && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
+      && legitimate_constant_pool_address_p (x, mode,
+					     reg_ok_strict || lra_in_progress))
     return 1;
+  /* For TImode, if we have load/store quad and TImode in VSX registers, only
+     allow register indirect addresses.  This will allow the values to go in
+     either GPRs or VSX registers without reloading.  The vector types would
+     tend to go into VSX registers, so we allow REG+REG, while TImode seems
+     somewhat split, in that some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
+    return 0;
   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
   if (! reg_ok_strict
       && reg_offset_p
@@ -6544,31 +7556,20 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
     return 1;
   if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
     return 1;
-  if (mode != TImode
-      && mode != TFmode
+  if (mode != TFmode
       && mode != TDmode
       && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
 	  || TARGET_POWERPC64
 	  || (mode != DFmode && mode != DDmode)
 	  || (TARGET_E500_DOUBLE && mode != DDmode))
       && (TARGET_POWERPC64 || mode != DImode)
+      && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
+      && mode != PTImode
       && !avoiding_indexed_address_p (mode)
       && legitimate_indexed_address_p (x, reg_ok_strict))
     return 1;
-  if (GET_CODE (x) == PRE_MODIFY
-      && mode != TImode
-      && mode != TFmode
-      && mode != TDmode
-      && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
-	  || TARGET_POWERPC64
-	  || ((mode != DFmode && mode != DDmode) || TARGET_E500_DOUBLE))
-      && (TARGET_POWERPC64 || mode != DImode)
-      && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
-      && !SPE_VECTOR_MODE (mode)
-      /* Restrict addressing for DI because of our SUBREG hackery.  */
-      && !(TARGET_E500_DOUBLE
-	   && (mode == DFmode || mode == DDmode || mode == DImode))
-      && TARGET_UPDATE
+  if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
+      && mode_supports_pre_modify_p (mode)
       && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
       && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
 					      reg_ok_strict, false)
@@ -6589,10 +7590,13 @@ rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
   bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
   fprintf (stderr,
 	   "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
-	   "strict = %d, code = %s\n",
+	   "strict = %d, reload = %s, code = %s\n",
 	   ret ? "true" : "false",
 	   GET_MODE_NAME (mode),
 	   reg_ok_strict,
+	   (reload_completed
+	    ? "after"
+	    : (reload_in_progress ? "progress" : "before")),
 	   GET_RTX_NAME (GET_CODE (x)));
   debug_rtx (x);
 
@@ -6758,7 +7762,7 @@ rs6000_conditional_register_usage (void)
 
   /* The TOC register is not killed across calls in a way that is
      visible to the compiler.  */
-  if (DEFAULT_ABI == ABI_AIX)
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     call_really_used_regs[2] = 0;
 
   if (DEFAULT_ABI == ABI_V4
@@ -6819,6 +7823,7 @@ rs6000_conditional_register_usage (void)
     }
 }
 
+
 /* Try to output insns to set TARGET equal to the constant C if it can
    be done in less than N insns.  Do all computations in MODE.
    Returns the place where the output has been placed if it can be
@@ -7006,7 +8011,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
 }
 
 /* Helper for the following.  Get rid of [r+r] memory refs
-   in cases where it won't work (TImode, TFmode, TDmode).  */
+   in cases where it won't work (TImode, TFmode, TDmode, PTImode).  */
 
 static void
 rs6000_eliminate_indexed_memrefs (rtx operands[2])
@@ -7031,6 +8036,131 @@ rs6000_eliminate_indexed_memrefs (rtx operands[2])
 			       copy_addr_to_reg (XEXP (operands[1], 0)));
 }
 
+/* Generate a vector of constants to permute MODE for a little-endian
+   storage operation by swapping the two halves of a vector.  */
+static rtvec
+rs6000_const_vec (enum machine_mode mode)
+{
+  int i, subparts;
+  rtvec v;
+
+  switch (mode)
+    {
+    case V1TImode:
+      subparts = 1;
+      break;
+    case V2DFmode:
+    case V2DImode:
+      subparts = 2;
+      break;
+    case V4SFmode:
+    case V4SImode:
+      subparts = 4;
+      break;
+    case V8HImode:
+      subparts = 8;
+      break;
+    case V16QImode:
+      subparts = 16;
+      break;
+    default:
+      gcc_unreachable();
+    }
+
+  v = rtvec_alloc (subparts);
+
+  for (i = 0; i < subparts / 2; ++i)
+    RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
+  for (i = subparts / 2; i < subparts; ++i)
+    RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
+
+  return v;
+}
+
+/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
+   for a VSX load or store operation.  */
+rtx
+rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
+{
+  rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
+  return gen_rtx_VEC_SELECT (mode, source, par);
+}
+
+/* Emit a little-endian load from vector memory location SOURCE to VSX
+   register DEST in mode MODE.  The load is done with two permuting
+   insn's that represent an lxvd2x and xxpermdi.  */
+void
+rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
+{
+  rtx tmp, permute_mem, permute_reg;
+
+  /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+     V1TImode).  */
+  if (mode == TImode || mode == V1TImode)
+    {
+      mode = V2DImode;
+      dest = gen_lowpart (V2DImode, dest);
+      source = adjust_address (source, V2DImode, 0);
+    }
+
+  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+  permute_mem = rs6000_gen_le_vsx_permute (source, mode);
+  permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
+  emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
+}
+
+/* Emit a little-endian store to vector memory location DEST from VSX
+   register SOURCE in mode MODE.  The store is done with two permuting
+   insn's that represent an xxpermdi and an stxvd2x.  */
+void
+rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
+{
+  rtx tmp, permute_src, permute_tmp;
+
+  /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+     V1TImode).  */
+  if (mode == TImode || mode == V1TImode)
+    {
+      mode = V2DImode;
+      dest = adjust_address (dest, V2DImode, 0);
+      source = gen_lowpart (V2DImode, source);
+    }
+
+  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+  permute_src = rs6000_gen_le_vsx_permute (source, mode);
+  permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
+  emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
+}
+
+/* Emit a sequence representing a little-endian VSX load or store,
+   moving data from SOURCE to DEST in mode MODE.  This is done
+   separately from rs6000_emit_move to ensure it is called only
+   during expand.  LE VSX loads and stores introduced later are
+   handled with a split.  The expand-time RTL generation allows
+   us to optimize away redundant pairs of register-permutes.  */
+void
+rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
+{
+  gcc_assert (!BYTES_BIG_ENDIAN
+	      && VECTOR_MEM_VSX_P (mode)
+	      && !gpr_or_gpr_p (dest, source)
+	      && (MEM_P (source) ^ MEM_P (dest)));
+
+  if (MEM_P (source))
+    {
+      gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
+      rs6000_emit_le_vsx_load (dest, source, mode);
+    }
+  else
+    {
+      if (!REG_P (source))
+	source = force_reg (mode, source);
+      rs6000_emit_le_vsx_store (dest, source, mode);
+    }
+}
+
 /* Emit a move from SOURCE to DEST in mode MODE.  */
 void
 rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
@@ -7149,8 +8279,71 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
     cfun->machine->sdmode_stack_slot =
       eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
 
+
+  if (lra_in_progress
+      && mode == SDmode
+      && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[0])) == NO_REGS
+      && (REG_P (operands[1])
+	  || (GET_CODE (operands[1]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[1])))))
+    {
+      int regno = REGNO (GET_CODE (operands[1]) == SUBREG
+			 ? SUBREG_REG (operands[1]) : operands[1]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[0]) != DDmode)
+	    operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
+	  emit_insn (gen_movsd_store (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+  if (lra_in_progress
+      && mode == SDmode
+      && (REG_P (operands[0])
+	  || (GET_CODE (operands[0]) == SUBREG
+	      && REG_P (SUBREG_REG (operands[0]))))
+      && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
+    {
+      int regno = REGNO (GET_CODE (operands[0]) == SUBREG
+			 ? SUBREG_REG (operands[0]) : operands[0]);
+      enum reg_class cl;
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  gcc_assert (cl != NO_REGS);
+	  regno = ira_class_hard_regs[cl][0];
+	}
+      if (FP_REGNO_P (regno))
+	{
+	  if (GET_MODE (operands[1]) != DDmode)
+	    operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
+	  emit_insn (gen_movsd_load (operands[0], operands[1]));
+	}
+      else if (INT_REGNO_P (regno))
+	emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+      else
+	gcc_unreachable();
+      return;
+    }
+
   if (reload_in_progress
       && mode == SDmode
+      && cfun->machine->sdmode_stack_slot != NULL_RTX
       && MEM_P (operands[0])
       && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
       && REG_P (operands[1]))
@@ -7163,7 +8356,9 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
 	}
       else if (INT_REGNO_P (REGNO (operands[1])))
 	{
-	  rtx mem = adjust_address_nv (operands[0], mode, 4);
+	  rtx mem = operands[0];
+	  if (BYTES_BIG_ENDIAN)
+	    mem = adjust_address_nv (mem, mode, 4);
 	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
 	  emit_insn (gen_movsd_hardfloat (mem, operands[1]));
 	}
@@ -7175,6 +8370,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
       && mode == SDmode
       && REG_P (operands[0])
       && MEM_P (operands[1])
+      && cfun->machine->sdmode_stack_slot != NULL_RTX
       && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
     {
       if (FP_REGNO_P (REGNO (operands[0])))
@@ -7185,7 +8381,9 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
 	}
       else if (INT_REGNO_P (REGNO (operands[0])))
 	{
-	  rtx mem = adjust_address_nv (operands[1], mode, 4);
+	  rtx mem = operands[1];
+	  if (BYTES_BIG_ENDIAN)
+	    mem = adjust_address_nv (mem, mode, 4);
 	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
 	  emit_insn (gen_movsd_hardfloat (operands[0], mem));
 	}
@@ -7230,6 +8428,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
     case V1DImode:
     case V2DFmode:
     case V2DImode:
+    case V1TImode:
       if (CONSTANT_P (operands[1])
 	  && !easy_vector_constant (operands[1], mode))
 	operands[1] = force_const_mem (mode, operands[1]);
@@ -7388,6 +8587,11 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
       break;
 
     case TImode:
+      if (!VECTOR_MEM_VSX_P (TImode))
+	rs6000_eliminate_indexed_memrefs (operands);
+      break;
+
+    case PTImode:
       rs6000_eliminate_indexed_memrefs (operands);
       break;
 
@@ -7426,18 +8630,231 @@ rs6000_member_type_forces_blk (const_tree field, enum machine_mode mode)
 }
 
 /* Nonzero if we can use a floating-point register to pass this arg.  */
-#define USE_FP_FOR_ARG_P(CUM,MODE,TYPE)		\
+#define USE_FP_FOR_ARG_P(CUM,MODE)		\
   (SCALAR_FLOAT_MODE_P (MODE)			\
    && (CUM)->fregno <= FP_ARG_MAX_REG		\
    && TARGET_HARD_FLOAT && TARGET_FPRS)
 
 /* Nonzero if we can use an AltiVec register to pass this arg.  */
-#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,TYPE,NAMED)		\
+#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED)			\
   (ALTIVEC_OR_VSX_VECTOR_MODE (MODE)				\
    && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG			\
    && TARGET_ALTIVEC_ABI					\
    && (NAMED))
 
+/* Walk down the type tree of TYPE counting consecutive base elements.
+   If *MODEP is VOIDmode, then set it to the first valid floating point
+   or vector type.  If a non-floating point or vector type is found, or
+   if a floating point or vector type that doesn't match a non-VOIDmode
+   *MODEP is found, then return -1, otherwise return the count in the
+   sub-tree.  */
+
+static int
+rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
+{
+  enum machine_mode mode;
+  HOST_WIDE_INT size;
+   
+  switch (TREE_CODE (type))
+    {
+    case REAL_TYPE:
+      mode = TYPE_MODE (type);
+      if (!SCALAR_FLOAT_MODE_P (mode))
+	return -1;
+   
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case COMPLEX_TYPE:
+      mode = TYPE_MODE (TREE_TYPE (type));
+      if (!SCALAR_FLOAT_MODE_P (mode))
+	return -1;
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      if (*modep == mode)
+	return 2;
+
+      break;
+
+    case VECTOR_TYPE:
+      if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
+	return -1;
+
+      /* Use V4SImode as representative of all 128-bit vector types.  */
+      size = int_size_in_bytes (type);
+      switch (size)
+	{
+	case 16:
+	  mode = V4SImode;
+	  break;
+	default:
+	  return -1;
+	}
+
+      if (*modep == VOIDmode)
+	*modep = mode;
+
+      /* Vector modes are considered to be opaque: two vectors are
+	 equivalent for the purposes of being homogeneous aggregates
+	 if they are the same size.  */
+      if (*modep == mode)
+	return 1;
+
+      break;
+
+    case ARRAY_TYPE:
+      {
+	int count;
+	tree index = TYPE_DOMAIN (type);
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
+	if (count == -1
+	    || !index
+	    || !TYPE_MAX_VALUE (index)
+	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
+	    || !TYPE_MIN_VALUE (index)
+	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
+	    || count < 0)
+	  return -1;
+
+	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
+		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case RECORD_TYPE:
+      {
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count += sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    case UNION_TYPE:
+    case QUAL_UNION_TYPE:
+      {
+	/* These aren't very interesting except in a degenerate case.  */
+	int count = 0;
+	int sub_count;
+	tree field;
+
+	/* Can't handle incomplete types.  */
+	if (!COMPLETE_TYPE_P (type))
+	  return -1;
+
+	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	  {
+	    if (TREE_CODE (field) != FIELD_DECL)
+	      continue;
+
+	    sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+	    if (sub_count < 0)
+	      return -1;
+	    count = count > sub_count ? count : sub_count;
+	  }
+
+	/* There must be no padding.  */
+	if (!host_integerp (TYPE_SIZE (type), 1)
+	    || (tree_low_cst (TYPE_SIZE (type), 1)
+		!= count * GET_MODE_BITSIZE (*modep)))
+	  return -1;
+
+	return count;
+      }
+
+    default:
+      break;
+    }
+
+  return -1;
+}
+
+/* If an argument, whose type is described by TYPE and MODE, is a homogeneous
+   float or vector aggregate that shall be passed in FP/vector registers
+   according to the ELFv2 ABI, return the homogeneous element mode in
+   *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
+
+   Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE.  */
+
+static bool
+rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
+				       enum machine_mode *elt_mode,
+				       int *n_elts)
+{
+  /* Note that we do not accept complex types at the top level as
+     homogeneous aggregates; these types are handled via the
+     targetm.calls.split_complex_arg mechanism.  Complex types
+     can be elements of homogeneous aggregates, however.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
+    {
+      enum machine_mode field_mode = VOIDmode;
+      int field_count = rs6000_aggregate_candidate (type, &field_mode);
+
+      if (field_count > 0)
+	{
+	  int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
+			(GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
+
+	  /* The ELFv2 ABI allows homogeneous aggregates to occupy
+	     up to AGGR_ARG_NUM_REG registers.  */
+	  if (field_count * n_regs <= AGGR_ARG_NUM_REG)
+	    {
+	      if (elt_mode)
+		*elt_mode = field_mode;
+	      if (n_elts)
+		*n_elts = field_count;
+	      return true;
+	    }
+	}
+    }
+
+  if (elt_mode)
+    *elt_mode = mode;
+  if (n_elts)
+    *n_elts = 1;
+  return false;
+}
+
 /* Return a nonzero value to say to return the function value in
    memory, just as large structures are always returned.  TYPE will be
    the data type of the value, and FNTYPE will be the type of the
@@ -7490,6 +8907,16 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
       /* Otherwise fall through to more conventional ABI rules.  */
     }
 
+  /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
+  if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
+					     NULL, NULL))
+    return false;
+
+  /* The ELFv2 ABI returns aggregates up to 16B in registers */
+  if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
+      && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
+    return false;
+
   if (AGGREGATE_TYPE_P (type)
       && (aix_struct_return
 	  || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
@@ -7521,6 +8948,19 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
   return false;
 }
 
+/* Specify whether values returned in registers should be at the most
+   significant end of a register.  We want aggregates returned by
+   value to match the way aggregates are passed to functions.  */
+
+static bool
+rs6000_return_in_msb (const_tree valtype)
+{
+  return (DEFAULT_ABI == ABI_ELFv2
+	  && BYTES_BIG_ENDIAN
+	  && AGGREGATE_TYPE_P (valtype)
+	  && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
+}
+
 #ifdef HAVE_AS_GNU_ATTRIBUTE
 /* Return TRUE if a call to function FNDECL may be one that
    potentially affects the function calling ABI of the object file.  */
@@ -7657,7 +9097,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
 static bool
 rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
 {
-  if (DEFAULT_ABI == ABI_AIX || TARGET_64BIT)
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
     return must_pass_in_stack_var_size (mode, type);
   else
     return must_pass_in_stack_var_size_or_pad (mode, type);
@@ -7738,6 +9178,11 @@ function_arg_padding (enum machine_mode mode, const_tree type)
 static unsigned int
 rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
 {
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
   if (DEFAULT_ABI == ABI_V4
       && (GET_MODE_SIZE (mode) == 8
 	  || (TARGET_HARD_FLOAT
@@ -7749,12 +9194,13 @@ rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
 	       && int_size_in_bytes (type) >= 8
 	       && int_size_in_bytes (type) < 16))
     return 64;
-  else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+  else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
 	   || (type && TREE_CODE (type) == VECTOR_TYPE
 	       && int_size_in_bytes (type) >= 16))
     return 128;
-  else if (TARGET_MACHO
- 	   && rs6000_darwin64_abi
+  else if (((TARGET_MACHO && rs6000_darwin64_abi)
+	    || DEFAULT_ABI == ABI_ELFv2
+            || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
  	   && mode == BLKmode
 	   && type && TYPE_ALIGN (type) > 64)
     return 128;
@@ -7762,6 +9208,16 @@ rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
     return PARM_BOUNDARY;
 }
 
+/* The offset in words to the start of the parameter save area.  */
+
+static unsigned int
+rs6000_parm_offset (void)
+{
+  return (DEFAULT_ABI == ABI_V4 ? 2
+	  : DEFAULT_ABI == ABI_ELFv2 ? 4
+	  : 6);
+}
+
 /* For a function parm of MODE and TYPE, return the starting word in
    the parameter area.  NWORDS of the parameter area are already used.  */
 
@@ -7770,11 +9226,9 @@ rs6000_parm_start (enum machine_mode mode, const_tree type,
 		   unsigned int nwords)
 {
   unsigned int align;
-  unsigned int parm_offset;
 
   align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
-  parm_offset = DEFAULT_ABI == ABI_V4 ? 2 : 6;
-  return nwords + (-(parm_offset + nwords) & align);
+  return nwords + (-(rs6000_parm_offset () + nwords) & align);
 }
 
 /* Compute the size (in words) of a function argument.  */
@@ -7881,7 +9335,7 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
 
 	if (TREE_CODE (ftype) == RECORD_TYPE)
 	  rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
-	else if (USE_FP_FOR_ARG_P (cum, mode, ftype))
+	else if (USE_FP_FOR_ARG_P (cum, mode))
 	  {
 	    unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
 	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
@@ -7922,7 +9376,7 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
 	    else
 	      cum->words += n_fpregs;
 	  }
-	else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1))
+	else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
 	  {
 	    rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
 	    cum->vregno++;
@@ -7959,6 +9413,11 @@ static void
 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 			       const_tree type, bool named, int depth)
 {
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
   /* Only tick off an argument if we're not recursing.  */
   if (depth == 0)
     cum->nargs_prototype--;
@@ -7979,15 +9438,16 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 #endif
 
   if (TARGET_ALTIVEC_ABI
-      && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+      && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
 	  || (type && TREE_CODE (type) == VECTOR_TYPE
 	      && int_size_in_bytes (type) == 16)))
     {
       bool stack = false;
 
-      if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
+      if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
 	{
-	  cum->vregno++;
+	  cum->vregno += n_elts;
+
 	  if (!TARGET_ALTIVEC)
 	    error ("cannot pass argument in vector register because"
 		   " altivec instructions are disabled, use -maltivec"
@@ -7996,7 +9456,8 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 	  /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
 	     even if it is going to be passed in a vector register.
 	     Darwin does the same for variable-argument functions.  */
-	  if ((DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+	  if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	       && TARGET_64BIT)
 	      || (cum->stdarg && DEFAULT_ABI != ABI_V4))
 	    stack = true;
 	}
@@ -8007,15 +9468,13 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 	{
 	  int align;
 
-	  /* Vector parameters must be 16-byte aligned.  This places
-	     them at 2 mod 4 in terms of words in 32-bit mode, since
-	     the parameter save area starts at offset 24 from the
-	     stack.  In 64-bit mode, they just have to start on an
-	     even word, since the parameter save area is 16-byte
-	     aligned.  Space for GPRs is reserved even if the argument
-	     will be passed in memory.  */
+	  /* Vector parameters must be 16-byte aligned.  In 32-bit
+	     mode this means we need to take into account the offset
+	     to the parameter save area.  In 64-bit mode, they just
+	     have to start on an even word, since the parameter save
+	     area is 16-byte aligned.  */
 	  if (TARGET_32BIT)
-	    align = (2 - cum->words) & 3;
+	    align = -(rs6000_parm_offset () + cum->words) & 3;
 	  else
 	    align = cum->words & 1;
 	  cum->words += align + rs6000_arg_size (mode, type);
@@ -8140,15 +9599,15 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
 
       cum->words = align_words + n_words;
 
-      if (SCALAR_FLOAT_MODE_P (mode)
+      if (SCALAR_FLOAT_MODE_P (elt_mode)
 	  && TARGET_HARD_FLOAT && TARGET_FPRS)
 	{
 	  /* _Decimal128 must be passed in an even/odd float register pair.
 	     This assumes that the register number is odd when fregno is
 	     odd.  */
-	  if (mode == TDmode && (cum->fregno % 2) == 1)
+	  if (elt_mode == TDmode && (cum->fregno % 2) == 1)
 	    cum->fregno++;
-	  cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
+	  cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
 	}
 
       if (TARGET_DEBUG_ARG)
@@ -8358,7 +9817,7 @@ rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
 
 	if (TREE_CODE (ftype) == RECORD_TYPE)
 	  rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
-	else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype))
+	else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
 	  {
 	    unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
 #if 0
@@ -8386,7 +9845,7 @@ rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
 	    if (mode == TFmode || mode == TDmode)
 	      cum->fregno++;
 	  }
-	else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, ftype, 1))
+	else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
 	  {
 	    rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
 	    rvec[(*k)++]
@@ -8503,6 +9962,84 @@ rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
   return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
 }
 
+/* We have an argument of MODE and TYPE that goes into FPRs or VRs,
+   but must also be copied into the parameter save area starting at
+   offset ALIGN_WORDS.  Fill in RVEC with the elements corresponding
+   to the GPRs and/or memory.  Return the number of elements used.  */
+
+static int
+rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
+			   int align_words, rtx *rvec)
+{
+  int k = 0;
+
+  if (align_words < GP_ARG_NUM_REG)
+    {
+      int n_words = rs6000_arg_size (mode, type);
+
+      if (align_words + n_words > GP_ARG_NUM_REG
+	  || mode == BLKmode
+	  || (TARGET_32BIT && TARGET_POWERPC64))
+	{
+	  /* If this is partially on the stack, then we only
+	     include the portion actually in registers here.  */
+	  enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
+	  int i = 0;
+
+	  if (align_words + n_words > GP_ARG_NUM_REG)
+	    {
+	      /* Not all of the arg fits in gprs.  Say that it goes in memory
+		 too, using a magic NULL_RTX component.  Also see comment in
+		 rs6000_mixed_function_arg for why the normal
+		 function_arg_partial_nregs scheme doesn't work in this case. */
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+	    }
+
+	  do
+	    {
+	      rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
+	      rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	    }
+	  while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
+	}
+      else
+	{
+	  /* The whole arg fits in gprs.  */
+	  rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+	  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
+	}
+    }
+  else
+    {
+      /* It's entirely in memory.  */
+      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+    }
+
+  return k;
+}
+
+/* RVEC is a vector of K components of an argument of mode MODE.
+   Construct the final function_arg return value from it.  */
+
+static rtx
+rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
+{
+  gcc_assert (k >= 1);
+
+  /* Avoid returning a PARALLEL in the trivial cases.  */
+  if (k == 1)
+    {
+      if (XEXP (rvec[0], 0) == NULL_RTX)
+	return NULL_RTX;
+
+      if (GET_MODE (XEXP (rvec[0], 0)) == mode)
+	return XEXP (rvec[0], 0);
+    }
+
+  return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+}
+
 /* Determine where to put an argument to a function.
    Value is zero to push the argument on the stack,
    or a hard register in which to store the argument.
@@ -8537,6 +10074,8 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   enum rs6000_abi abi = DEFAULT_ABI;
+  enum machine_mode elt_mode;
+  int n_elts;
 
   /* Return a marker to indicate whether CR1 needs to set or clear the
      bit that V.4 uses to say fp args were passed in registers.
@@ -8563,6 +10102,8 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
       return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
     }
 
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
   if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
     {
       rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
@@ -8571,33 +10112,30 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
       /* Else fall through to usual handling.  */
     }
 
-  if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
-    if (TARGET_64BIT && ! cum->prototype)
+  if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
       {
-	/* Vector parameters get passed in vector register
-	   and also in GPRs or memory, in absence of prototype.  */
-	int align_words;
-	rtx slot;
-	align_words = (cum->words + 1) & ~1;
+      rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+      rtx r, off;
+      int i, k = 0;
 
-	if (align_words >= GP_ARG_NUM_REG)
+      /* Do we also need to pass this argument in the parameter
+	 save area?  */
+      if (TARGET_64BIT && ! cum->prototype)
 	  {
-	    slot = NULL_RTX;
+	  int align_words = (cum->words + 1) & ~1;
+	  k = rs6000_psave_function_arg (mode, type, align_words, rvec);
 	  }
-	else
+
+      /* Describe where this argument goes in the vector registers.  */
+      for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
 	  {
-	    slot = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+	  r = gen_rtx_REG (elt_mode, cum->vregno + i);
+	  off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	  rvec[k++] =  gen_rtx_EXPR_LIST (VOIDmode, r, off);
 	  }
-	return gen_rtx_PARALLEL (mode,
-		 gen_rtvec (2,
-			    gen_rtx_EXPR_LIST (VOIDmode,
-					       slot, const0_rtx),
-			    gen_rtx_EXPR_LIST (VOIDmode,
-					       gen_rtx_REG (mode, cum->vregno),
-					       const0_rtx)));
+
+      return rs6000_finish_function_arg (mode, rvec, k);
       }
-    else
-      return gen_rtx_REG (mode, cum->vregno);
   else if (TARGET_ALTIVEC_ABI
 	   && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
 	       || (type && TREE_CODE (type) == VECTOR_TYPE
@@ -8612,13 +10150,13 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 	  int align, align_words, n_words;
 	  enum machine_mode part_mode;
 
-	  /* Vector parameters must be 16-byte aligned.  This places them at
-	     2 mod 4 in terms of words in 32-bit mode, since the parameter
-	     save area starts at offset 24 from the stack.  In 64-bit mode,
-	     they just have to start on an even word, since the parameter
-	     save area is 16-byte aligned.  */
+	  /* Vector parameters must be 16-byte aligned.  In 32-bit
+	     mode this means we need to take into account the offset
+	     to the parameter save area.  In 64-bit mode, they just
+	     have to start on an even word, since the parameter save
+	     area is 16-byte aligned.  */
 	  if (TARGET_32BIT)
-	    align = (2 - cum->words) & 3;
+	    align = -(rs6000_parm_offset () + cum->words) & 3;
 	  else
 	    align = cum->words & 1;
 	  align_words = cum->words + align;
@@ -8696,101 +10234,50 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
 
       /* _Decimal128 must be passed in an even/odd float register pair.
 	 This assumes that the register number is odd when fregno is odd.  */
-      if (mode == TDmode && (cum->fregno % 2) == 1)
+      if (elt_mode == TDmode && (cum->fregno % 2) == 1)
 	cum->fregno++;
 
-      if (USE_FP_FOR_ARG_P (cum, mode, type))
+      if (USE_FP_FOR_ARG_P (cum, elt_mode))
 	{
-	  rtx rvec[GP_ARG_NUM_REG + 1];
-	  rtx r;
-	  int k;
-	  bool needs_psave;
-	  enum machine_mode fmode = mode;
-	  unsigned long n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
+	  rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+	  rtx r, off;
+	  int i, k = 0;
+	  unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
 
-	  if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
-	    {
-	      /* Currently, we only ever need one reg here because complex
-		 doubles are split.  */
-	      gcc_assert (cum->fregno == FP_ARG_MAX_REG
-			  && (fmode == TFmode || fmode == TDmode));
+	  /* Do we also need to pass this argument in the parameter
+	     save area?  */
+	  if (type && (cum->nargs_prototype <= 0
+		       || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+			   && TARGET_XL_COMPAT
+			   && align_words >= GP_ARG_NUM_REG)))
+	    k = rs6000_psave_function_arg (mode, type, align_words, rvec);
 
-	      /* Long double or _Decimal128 split over regs and memory.  */
+	  /* Describe where this argument goes in the fprs.  */
+	  for (i = 0; i < n_elts
+		      && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
+	    {
+	      /* Check if the argument is split over registers and memory.
+		 This can only ever happen for long double or _Decimal128;
+		 complex types are handled via split_complex_arg.  */
+	      enum machine_mode fmode = elt_mode;
+	      if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
+	    {
+		  gcc_assert (fmode == TFmode || fmode == TDmode);
 	      fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
 	    }
 
-	  /* Do we also need to pass this arg in the parameter save
-	     area?  */
-	  needs_psave = (type
-			 && (cum->nargs_prototype <= 0
-			     || (DEFAULT_ABI == ABI_AIX
-				 && TARGET_XL_COMPAT
-				 && align_words >= GP_ARG_NUM_REG)));
-
-	  if (!needs_psave && mode == fmode)
-	    return gen_rtx_REG (fmode, cum->fregno);
-
-	  k = 0;
-	  if (needs_psave)
-	    {
-	      /* Describe the part that goes in gprs or the stack.
-		 This piece must come first, before the fprs.  */
-	      if (align_words < GP_ARG_NUM_REG)
-		{
-		  unsigned long n_words = rs6000_arg_size (mode, type);
-
-		  if (align_words + n_words > GP_ARG_NUM_REG
-		      || (TARGET_32BIT && TARGET_POWERPC64))
-		    {
-		      /* If this is partially on the stack, then we only
-			 include the portion actually in registers here.  */
-		      enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
-		      rtx off;
-		      int i = 0;
-		      if (align_words + n_words > GP_ARG_NUM_REG)
-			/* Not all of the arg fits in gprs.  Say that it
-			   goes in memory too, using a magic NULL_RTX
-			   component.  Also see comment in
-			   rs6000_mixed_function_arg for why the normal
-			   function_arg_partial_nregs scheme doesn't work
-			   in this case. */
-			rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX,
-						       const0_rtx);
-		      do
-			{
-			  r = gen_rtx_REG (rmode,
-					   GP_ARG_MIN_REG + align_words);
-			  off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
-			  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
-			}
-		      while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
-		    }
-		  else
-		    {
-		      /* The whole arg fits in gprs.  */
-		      r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
-		      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
-		    }
-		}
-	      else
-		/* It's entirely in memory.  */
-		rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+	      r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
+	      off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	      rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
 	    }
 
-	  /* Describe where this piece goes in the fprs.  */
-	  r = gen_rtx_REG (fmode, cum->fregno);
-	  rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
-
-	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+	  return rs6000_finish_function_arg (mode, rvec, k);
 	}
       else if (align_words < GP_ARG_NUM_REG)
-	{
+		{
 	  if (TARGET_32BIT && TARGET_POWERPC64)
 	    return rs6000_mixed_function_arg (mode, type, align_words);
 
-	  if (mode == BLKmode)
-	    mode = Pmode;
-
 	  return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
 	}
       else
@@ -8809,42 +10296,62 @@ rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
 			  tree type, bool named)
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  bool passed_in_gprs = true;
   int ret = 0;
   int align_words;
+  enum machine_mode elt_mode;
+  int n_elts;
+
+  rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
 
   if (DEFAULT_ABI == ABI_V4)
     return 0;
 
-  if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named)
-      && cum->nargs_prototype >= 0)
+  if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
+    {
+      /* If we are passing this arg in the fixed parameter save area
+         (gprs or memory) as well as VRs, we do not use the partial
+	 bytes mechanism; instead, rs6000_function_arg will return a
+	 PARALLEL including a memory element as necessary.  */
+      if (TARGET_64BIT && ! cum->prototype)
     return 0;
 
+      /* Otherwise, we pass in VRs only.  Check for partial copies.  */
+      passed_in_gprs = false;
+      if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
+	ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
+    }
+
   /* In this complicated case we just disable the partial_nregs code.  */
   if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
     return 0;
 
   align_words = rs6000_parm_start (mode, type, cum->words);
 
-  if (USE_FP_FOR_ARG_P (cum, mode, type))
+  if (USE_FP_FOR_ARG_P (cum, elt_mode))
     {
+      unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+
       /* If we are passing this arg in the fixed parameter save area
-	 (gprs or memory) as well as fprs, then this function should
-	 return the number of partial bytes passed in the parameter
-	 save area rather than partial bytes passed in fprs.  */
+         (gprs or memory) as well as FPRs, we do not use the partial
+	 bytes mechanism; instead, rs6000_function_arg will return a
+	 PARALLEL including a memory element as necessary.  */
       if (type
 	  && (cum->nargs_prototype <= 0
-	      || (DEFAULT_ABI == ABI_AIX
+	      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 		  && TARGET_XL_COMPAT
 		  && align_words >= GP_ARG_NUM_REG)))
 	return 0;
-      else if (cum->fregno + ((GET_MODE_SIZE (mode) + 7) >> 3)
-	       > FP_ARG_MAX_REG + 1)
-	ret = (FP_ARG_MAX_REG + 1 - cum->fregno) * 8;
-      else if (cum->nargs_prototype >= 0)
-	return 0;
+
+      /* Otherwise, we pass in FPRs only.  Check for partial copies.  */
+      passed_in_gprs = false;
+      if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
+	ret = ((FP_ARG_MAX_REG + 1 - cum->fregno)
+	       * MIN (8, GET_MODE_SIZE (elt_mode)));
     }
 
-  if (align_words < GP_ARG_NUM_REG
+  if (passed_in_gprs
+      && align_words < GP_ARG_NUM_REG
       && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
     ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
 
@@ -8925,6 +10432,139 @@ rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
   return 0;
 }
 
+/* Process parameter of type TYPE after ARGS_SO_FAR parameters were
+   already processes.  Return true if the parameter must be passed
+   (fully or partially) on the stack.  */
+
+static bool
+rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
+{
+  enum machine_mode mode;
+  int unsignedp;
+  rtx entry_parm;
+
+  /* Catch errors.  */
+  if (type == NULL || type == error_mark_node)
+    return true;
+
+  /* Handle types with no storage requirement.  */
+  if (TYPE_MODE (type) == VOIDmode)
+    return false;
+
+  /* Handle complex types.  */
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
+	    || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
+
+  /* Handle transparent aggregates.  */
+  if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
+      && TYPE_TRANSPARENT_AGGR (type))
+    type = TREE_TYPE (first_field (type));
+
+  /* See if this arg was passed by invisible reference.  */
+  if (pass_by_reference (get_cumulative_args (args_so_far),
+			 TYPE_MODE (type), type, true))
+    type = build_pointer_type (type);
+
+  /* Find mode as it is passed by the ABI.  */
+  unsignedp = TYPE_UNSIGNED (type);
+  mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
+
+  /* If we must pass in stack, we need a stack.  */
+  if (rs6000_must_pass_in_stack (mode, type))
+    return true;
+
+  /* If there is no incoming register, we need a stack.  */
+  entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
+  if (entry_parm == NULL)
+    return true;
+
+  /* Likewise if we need to pass both in registers and on the stack.  */
+  if (GET_CODE (entry_parm) == PARALLEL
+      && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
+    return true;
+
+  /* Also true if we're partially in registers and partially not.  */
+  if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
+    return true;
+
+  /* Update info on where next arg arrives in registers.  */
+  rs6000_function_arg_advance (args_so_far, mode, type, true);
+  return false;
+}
+
+/* Return true if FUN has no prototype, has a variable argument
+   list, or passes any parameter in memory.  */
+
+static bool
+rs6000_function_parms_need_stack (tree fun)
+{
+  function_args_iterator args_iter;
+  tree arg_type;
+  CUMULATIVE_ARGS args_so_far_v;
+  cumulative_args_t args_so_far;
+
+  if (!fun)
+    /* Must be a libcall, all of which only use reg parms.  */
+    return false;
+  if (!TYPE_P (fun))
+    fun = TREE_TYPE (fun);
+
+  /* Varargs functions need the parameter save area.  */
+  if (!prototype_p (fun) || stdarg_p (fun))
+    return true;
+
+  INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX);
+  args_so_far = pack_cumulative_args (&args_so_far_v);
+
+  if (aggregate_value_p (TREE_TYPE (fun), fun))
+    {
+      tree type = build_pointer_type (TREE_TYPE (fun));
+      rs6000_parm_needs_stack (args_so_far, type);
+    }
+
+  FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter)
+    if (rs6000_parm_needs_stack (args_so_far, arg_type))
+      return true;
+
+  return false;
+}
+
+/* Return the size of the REG_PARM_STACK_SPACE are for FUN.  This is
+   usually a constant depending on the ABI.  However, in the ELFv2 ABI
+   the register parameter area is optional when calling a function that
+   has a prototype is scope, has no variable argument list, and passes
+   all parameters in registers.  */
+
+int
+rs6000_reg_parm_stack_space (tree fun)
+{
+  int reg_parm_stack_space;
+
+  switch (DEFAULT_ABI)
+    {
+    default:
+      reg_parm_stack_space = 0;
+      break;
+
+    case ABI_AIX:
+    case ABI_DARWIN:
+      reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
+      break;
+
+    case ABI_ELFv2:
+      /* ??? Recomputing this every time is a bit expensive.  Is there
+	 a place to cache this information?  */
+      if (rs6000_function_parms_need_stack (fun))
+	reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
+      else
+	reg_parm_stack_space = 0;
+      break;
+    }
+
+  return reg_parm_stack_space;
+}
+
 static void
 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
 {
@@ -9306,8 +10946,10 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
      We don't need to check for pass-by-reference because of the test above.
      We can return a simplifed answer, since we know there's no offset to add.  */
 
-  if (TARGET_MACHO
-      && rs6000_darwin64_abi 
+  if (((TARGET_MACHO
+        && rs6000_darwin64_abi)
+       || DEFAULT_ABI == ABI_ELFv2
+       || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
       && integer_zerop (TYPE_SIZE (type)))
     {
       unsigned HOST_WIDE_INT align, boundary;
@@ -9602,6 +11244,7 @@ def_builtin (const char *name, tree type, enum rs6000_builtins code)
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -9615,6 +11258,7 @@ def_builtin (const char *name, tree type, enum rs6000_builtins code)
 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9633,6 +11277,7 @@ static const struct builtin_description bdesc_3arg[] =
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -9646,6 +11291,7 @@ static const struct builtin_description bdesc_3arg[] =
   { MASK, ICODE, NAME, ENUM },
 
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9664,6 +11310,7 @@ static const struct builtin_description bdesc_dst[] =
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -9677,6 +11324,7 @@ static const struct builtin_description bdesc_dst[] =
 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9693,6 +11341,7 @@ static const struct builtin_description bdesc_2arg[] =
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -9704,6 +11353,7 @@ static const struct builtin_description bdesc_2arg[] =
 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
   { MASK, ICODE, NAME, ENUM },
 
@@ -9725,6 +11375,7 @@ static const struct builtin_description bdesc_altivec_preds[] =
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -9736,6 +11387,7 @@ static const struct builtin_description bdesc_altivec_preds[] =
 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
@@ -9755,6 +11407,7 @@ static const struct builtin_description bdesc_spe_predicates[] =
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -9768,6 +11421,7 @@ static const struct builtin_description bdesc_spe_predicates[] =
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
   { MASK, ICODE, NAME, ENUM },
 
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9785,6 +11439,7 @@ static const struct builtin_description bdesc_spe_evsel[] =
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -9796,6 +11451,7 @@ static const struct builtin_description bdesc_spe_evsel[] =
 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
   { MASK, ICODE, NAME, ENUM },
@@ -9816,6 +11472,7 @@ static const struct builtin_description bdesc_paired_preds[] =
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -9829,6 +11486,7 @@ static const struct builtin_description bdesc_paired_preds[] =
 
 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9846,8 +11504,9 @@ static const struct builtin_description bdesc_abs[] =
 #undef RS6000_BUILTIN_2
 #undef RS6000_BUILTIN_3
 #undef RS6000_BUILTIN_A
-#undef RS6000_BUILTIN_E
 #undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -9861,6 +11520,7 @@ static const struct builtin_description bdesc_abs[] =
 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9871,17 +11531,49 @@ static const struct builtin_description bdesc_1arg[] =
 #include "rs6000-builtin.def"
 };
 
+/* HTM builtins.  */
 #undef RS6000_BUILTIN_1
 #undef RS6000_BUILTIN_2
 #undef RS6000_BUILTIN_3
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
 #undef RS6000_BUILTIN_X
 
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
+  { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_htm[] =
+{
+#include "rs6000-builtin.def"
+};
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+
 /* Return true if a builtin function is overloaded.  */
 bool
 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
@@ -10189,6 +11881,100 @@ paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
   return target;
 }
 
+/* Return a constant vector for use as a little-endian permute control vector
+   to reverse the order of elements of the given vector mode.  */
+static rtx
+swap_selector_for_mode (enum machine_mode mode)
+{
+  /* These are little endian vectors, so their elements are reversed
+     from what you would normally expect for a permute control vector.  */
+  unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
+  unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
+  unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
+  unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  unsigned int *swaparray, i;
+  rtx perm[16];
+
+  switch (mode)
+    {
+    case V2DFmode:
+    case V2DImode:
+      swaparray = swap2;
+      break;
+    case V4SFmode:
+    case V4SImode:
+      swaparray = swap4;
+      break;
+    case V8HImode:
+      swaparray = swap8;
+      break;
+    case V16QImode:
+      swaparray = swap16;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  for (i = 0; i < 16; ++i)
+    perm[i] = GEN_INT (swaparray[i]);
+
+  return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
+}
+
+/* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
+   with -maltivec=be specified.  Issue the load followed by an element-reversing
+   permute.  */
+void
+altivec_expand_lvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+{
+  rtx tmp = gen_reg_rtx (mode);
+  rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
+  rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
+  rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
+  rtx sel = swap_selector_for_mode (mode);
+  rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
+
+  gcc_assert (REG_P (op0));
+  emit_insn (par);
+  emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
+}
+
+/* Generate code for a "stvx" or "stvxl" built-in for a little endian target
+   with -maltivec=be specified.  Issue the store preceded by an element-reversing
+   permute.  */
+void
+altivec_expand_stvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+{
+  rtx tmp = gen_reg_rtx (mode);
+  rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
+  rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
+  rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
+  rtx sel = swap_selector_for_mode (mode);
+  rtx vperm;
+
+  gcc_assert (REG_P (op1));
+  vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
+  emit_insn (par);
+}
+
+/* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
+   specified.  Issue the store preceded by an element-reversing permute.  */
+void
+altivec_expand_stvex_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+{
+  enum machine_mode inner_mode = GET_MODE_INNER (mode);
+  rtx tmp = gen_reg_rtx (mode);
+  rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
+  rtx sel = swap_selector_for_mode (mode);
+  rtx vperm;
+
+  gcc_assert (REG_P (op1));
+  vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
+  emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
+  emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
+}
+
 static rtx
 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
 {
@@ -10351,6 +12137,197 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
   return NULL_RTX;
 }
 
+/* Return the appropriate SPR number associated with the given builtin.  */
+static inline HOST_WIDE_INT
+htm_spr_num (enum rs6000_builtins code)
+{
+  if (code == HTM_BUILTIN_GET_TFHAR
+      || code == HTM_BUILTIN_SET_TFHAR)
+    return TFHAR_SPR;
+  else if (code == HTM_BUILTIN_GET_TFIAR
+	   || code == HTM_BUILTIN_SET_TFIAR)
+    return TFIAR_SPR;
+  else if (code == HTM_BUILTIN_GET_TEXASR
+	   || code == HTM_BUILTIN_SET_TEXASR)
+    return TEXASR_SPR;
+  gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
+	      || code == HTM_BUILTIN_SET_TEXASRU);
+  return TEXASRU_SPR;
+}
+
+/* Return the appropriate SPR regno associated with the given builtin.  */
+static inline HOST_WIDE_INT
+htm_spr_regno (enum rs6000_builtins code)
+{
+  if (code == HTM_BUILTIN_GET_TFHAR
+      || code == HTM_BUILTIN_SET_TFHAR)
+    return TFHAR_REGNO;
+  else if (code == HTM_BUILTIN_GET_TFIAR
+	   || code == HTM_BUILTIN_SET_TFIAR)
+    return TFIAR_REGNO;
+  gcc_assert (code == HTM_BUILTIN_GET_TEXASR
+	      || code == HTM_BUILTIN_SET_TEXASR
+	      || code == HTM_BUILTIN_GET_TEXASRU
+	      || code == HTM_BUILTIN_SET_TEXASRU);
+  return TEXASR_REGNO;
+}
+
+/* Return the correct ICODE value depending on whether we are
+   setting or reading the HTM SPRs.  */
+static inline enum insn_code
+rs6000_htm_spr_icode (bool nonvoid)
+{
+  if (nonvoid)
+    return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
+  else
+    return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
+}
+
+/* Expand the HTM builtin in EXP and store the result in TARGET.
+   Store true in *EXPANDEDP if we found a builtin to expand.  */
+static rtx
+htm_expand_builtin (tree exp, rtx target, bool * expandedp)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+  enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
+  const struct builtin_description *d;
+  size_t i;
+
+  *expandedp = false;
+
+  /* Expand the HTM builtins.  */
+  d = bdesc_htm;
+  for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
+    if (d->code == fcode)
+      {
+	rtx op[MAX_HTM_OPERANDS], pat;
+	int nopnds = 0;
+	tree arg;
+	call_expr_arg_iterator iter;
+	unsigned attr = rs6000_builtin_info[fcode].attr;
+	enum insn_code icode = d->icode;
+
+	if (attr & RS6000_BTC_SPR)
+	  icode = rs6000_htm_spr_icode (nonvoid);
+
+	if (nonvoid)
+	  {
+	    enum machine_mode tmode = insn_data[icode].operand[0].mode;
+	    if (!target
+		|| GET_MODE (target) != tmode
+		|| !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	      target = gen_reg_rtx (tmode);
+	    op[nopnds++] = target;
+	  }
+
+	FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+	{
+	  const struct insn_operand_data *insn_op;
+
+	  if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
+	    return NULL_RTX;
+
+	  insn_op = &insn_data[icode].operand[nopnds];
+
+	  op[nopnds] = expand_normal (arg);
+
+	  if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
+	    {
+	      if (!strcmp (insn_op->constraint, "n"))
+		{
+		  int arg_num = (nonvoid) ? nopnds : nopnds + 1;
+		  if (!CONST_INT_P (op[nopnds]))
+		    error ("argument %d must be an unsigned literal", arg_num);
+		  else
+		    error ("argument %d is an unsigned literal that is "
+			   "out of range", arg_num);
+		  return const0_rtx;
+		}
+	      op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
+	    }
+
+	  nopnds++;
+	}
+
+	/* Handle the builtins for extended mnemonics.  These accept
+	   no arguments, but map to builtins that take arguments.  */
+	switch (fcode)
+	  {
+	  case HTM_BUILTIN_TENDALL:  /* Alias for: tend. 1  */
+	  case HTM_BUILTIN_TRESUME:  /* Alias for: tsr. 1  */
+	    op[nopnds++] = GEN_INT (1);
+#ifdef ENABLE_CHECKING
+	    attr |= RS6000_BTC_UNARY;
+#endif
+	    break;
+	  case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0  */
+	    op[nopnds++] = GEN_INT (0);
+#ifdef ENABLE_CHECKING
+	    attr |= RS6000_BTC_UNARY;
+#endif
+	    break;
+	  default:
+	    break;
+	  }
+
+	/* If this builtin accesses SPRs, then pass in the appropriate
+	   SPR number and SPR regno as the last two operands.  */
+	if (attr & RS6000_BTC_SPR)
+	  {
+	    op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
+	    op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
+	  }
+
+#ifdef ENABLE_CHECKING
+	int expected_nopnds = 0;
+	if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
+	  expected_nopnds = 1;
+	else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
+	  expected_nopnds = 2;
+	else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
+	  expected_nopnds = 3;
+	if (!(attr & RS6000_BTC_VOID))
+	  expected_nopnds += 1;
+	if (attr & RS6000_BTC_SPR)
+	  expected_nopnds += 2;
+
+	gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
+#endif
+
+	switch (nopnds)
+	  {
+	  case 0:
+	    pat = GEN_FCN (icode) (NULL_RTX);
+	    break;
+	  case 1:
+	    pat = GEN_FCN (icode) (op[0]);
+	    break;
+	  case 2:
+	    pat = GEN_FCN (icode) (op[0], op[1]);
+	    break;
+	  case 3:
+	    pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+	    break;
+	  case 4:
+	    pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+	if (!pat)
+	  return NULL_RTX;
+	emit_insn (pat);
+
+	*expandedp = true;
+	if (nonvoid)
+	  return target;
+	return const0_rtx;
+      }
+
+  return NULL_RTX;
+}
+
 static rtx
 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 {
@@ -10416,7 +12393,15 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	}
     }
   else if (icode == CODE_FOR_vsx_set_v2df
-           || icode == CODE_FOR_vsx_set_v2di)
+           || icode == CODE_FOR_vsx_set_v2di
+	   || icode == CODE_FOR_bcdadd
+	   || icode == CODE_FOR_bcdadd_lt
+	   || icode == CODE_FOR_bcdadd_eq
+	   || icode == CODE_FOR_bcdadd_gt
+	   || icode == CODE_FOR_bcdsub
+	   || icode == CODE_FOR_bcdsub_lt
+	   || icode == CODE_FOR_bcdsub_eq
+	   || icode == CODE_FOR_bcdsub_gt)
     {
       /* Only allow 1-bit unsigned literals.  */
       STRIP_NOPS (arg2);
@@ -10427,6 +12412,65 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  return const0_rtx;
 	}
     }
+  else if (icode == CODE_FOR_dfp_ddedpd_dd
+           || icode == CODE_FOR_dfp_ddedpd_td)
+    {
+      /* Only allow 2-bit unsigned literals where the value is 0 or 2.  */
+      STRIP_NOPS (arg0);
+      if (TREE_CODE (arg0) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg2) & ~0x3)
+	{
+	  error ("argument 1 must be 0 or 2");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_dfp_denbcd_dd
+	   || icode == CODE_FOR_dfp_denbcd_td)
+    {
+      /* Only allow 1-bit unsigned literals.  */
+      STRIP_NOPS (arg0);
+      if (TREE_CODE (arg0) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg0) & ~0x1)
+	{
+	  error ("argument 1 must be a 1-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_dfp_dscli_dd
+           || icode == CODE_FOR_dfp_dscli_td
+	   || icode == CODE_FOR_dfp_dscri_dd
+	   || icode == CODE_FOR_dfp_dscri_td)
+    {
+      /* Only allow 6-bit unsigned literals.  */
+      STRIP_NOPS (arg1);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || TREE_INT_CST_LOW (arg1) & ~0x3f)
+	{
+	  error ("argument 2 must be a 6-bit unsigned literal");
+	  return const0_rtx;
+	}
+    }
+  else if (icode == CODE_FOR_crypto_vshasigmaw
+	   || icode == CODE_FOR_crypto_vshasigmad)
+    {
+      /* Check whether the 2nd and 3rd arguments are integer constants and in
+	 range and prepare arguments.  */
+      STRIP_NOPS (arg1);
+      if (TREE_CODE (arg1) != INTEGER_CST
+	  || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
+	{
+	  error ("argument 2 must be 0 or 1");
+	  return const0_rtx;
+	}
+
+      STRIP_NOPS (arg2);
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
+	{
+	  error ("argument 3 must be in the range 0..15");
+	  return const0_rtx;
+	}
+    }
 
   if (target == 0
       || GET_MODE (target) != tmode
@@ -10481,6 +12525,8 @@ altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
       break;
     case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
       icode = CODE_FOR_vector_altivec_load_v2di;
+    case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
+      icode = CODE_FOR_vector_altivec_load_v1ti;
       break;
     default:
       *expandedp = false;
@@ -10540,6 +12586,8 @@ altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
       break;
     case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
       icode = CODE_FOR_vector_altivec_store_v2di;
+    case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
+      icode = CODE_FOR_vector_altivec_store_v1ti;
       break;
     default:
       *expandedp = false;
@@ -10632,21 +12680,33 @@ altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
   enum machine_mode tmode = TYPE_MODE (type);
   enum machine_mode inner_mode = GET_MODE_INNER (tmode);
   int i, n_elt = GET_MODE_NUNITS (tmode);
-  rtvec v = rtvec_alloc (n_elt);
 
   gcc_assert (VECTOR_MODE_P (tmode));
   gcc_assert (n_elt == call_expr_nargs (exp));
 
+  if (!target || !register_operand (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  /* If we have a vector compromised of a single element, such as V1TImode, do
+     the initialization directly.  */
+  if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
+    {
+      rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
+      emit_move_insn (target, gen_lowpart (tmode, x));
+    }
+  else
+    {
+      rtvec v = rtvec_alloc (n_elt);
+
   for (i = 0; i < n_elt; ++i)
     {
       rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
       RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
     }
 
-  if (!target || !register_operand (target, tmode))
-    target = gen_reg_rtx (tmode);
-
   rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
+    }
+
   return target;
 }
 
@@ -10769,16 +12829,38 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
 
   switch (fcode)
     {
+    case ALTIVEC_BUILTIN_STVX_V2DF:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
+    case ALTIVEC_BUILTIN_STVX_V2DI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
+    case ALTIVEC_BUILTIN_STVX_V4SF:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
     case ALTIVEC_BUILTIN_STVX:
+    case ALTIVEC_BUILTIN_STVX_V4SI:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
+    case ALTIVEC_BUILTIN_STVX_V8HI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
+    case ALTIVEC_BUILTIN_STVX_V16QI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
     case ALTIVEC_BUILTIN_STVEBX:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
     case ALTIVEC_BUILTIN_STVEHX:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
     case ALTIVEC_BUILTIN_STVEWX:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
+    case ALTIVEC_BUILTIN_STVXL_V2DF:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
+    case ALTIVEC_BUILTIN_STVXL_V2DI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
+    case ALTIVEC_BUILTIN_STVXL_V4SF:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
     case ALTIVEC_BUILTIN_STVXL:
-      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl, exp);
+    case ALTIVEC_BUILTIN_STVXL_V4SI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
+    case ALTIVEC_BUILTIN_STVXL_V8HI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
+    case ALTIVEC_BUILTIN_STVXL_V16QI:
+      return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
 
     case ALTIVEC_BUILTIN_STVLX:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
@@ -10789,6 +12871,8 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case ALTIVEC_BUILTIN_STVRXL:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
 
+    case VSX_BUILTIN_STXVD2X_V1TI:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
     case VSX_BUILTIN_STXVD2X_V2DF:
       return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
     case VSX_BUILTIN_STXVD2X_V2DI:
@@ -10869,6 +12953,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
     case VSX_BUILTIN_VEC_INIT_V2DF:
     case VSX_BUILTIN_VEC_INIT_V2DI:
+    case VSX_BUILTIN_VEC_INIT_V1TI:
       return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
 
     case ALTIVEC_BUILTIN_VEC_SET_V4SI:
@@ -10877,6 +12962,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case ALTIVEC_BUILTIN_VEC_SET_V4SF:
     case VSX_BUILTIN_VEC_SET_V2DF:
     case VSX_BUILTIN_VEC_SET_V2DI:
+    case VSX_BUILTIN_VEC_SET_V1TI:
       return altivec_expand_vec_set_builtin (exp);
 
     case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
@@ -10885,6 +12971,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
     case VSX_BUILTIN_VEC_EXT_V2DF:
     case VSX_BUILTIN_VEC_EXT_V2DI:
+    case VSX_BUILTIN_VEC_EXT_V1TI:
       return altivec_expand_vec_ext_builtin (exp, target);
 
     default:
@@ -10922,12 +13009,44 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case ALTIVEC_BUILTIN_LVEWX:
       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
 					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V2DF:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V2DI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V4SF:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
+					exp, target, false);
     case ALTIVEC_BUILTIN_LVXL:
-      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl,
+    case ALTIVEC_BUILTIN_LVXL_V4SI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V8HI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVXL_V16QI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V2DF:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V2DI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V4SF:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
 					exp, target, false);
     case ALTIVEC_BUILTIN_LVX:
+    case ALTIVEC_BUILTIN_LVX_V4SI:
       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
 					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V8HI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
+					exp, target, false);
+    case ALTIVEC_BUILTIN_LVX_V16QI:
+      return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
+					exp, target, false);
     case ALTIVEC_BUILTIN_LVLX:
       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
 					exp, target, true);
@@ -10940,6 +13059,9 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case ALTIVEC_BUILTIN_LVRXL:
       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
 					exp, target, true);
+    case VSX_BUILTIN_LXVD2X_V1TI:
+      return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
+					exp, target, false);
     case VSX_BUILTIN_LXVD2X_V2DF:
       return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
 					exp, target, false);
@@ -11411,12 +13533,24 @@ rs6000_invalid_builtin (enum rs6000_builtins fncode)
     error ("Builtin function %s is only valid for the cell processor", name);
   else if ((fnmask & RS6000_BTM_VSX) != 0)
     error ("Builtin function %s requires the -mvsx option", name);
+  else if ((fnmask & RS6000_BTM_HTM) != 0)
+    error ("Builtin function %s requires the -mhtm option", name);
   else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
     error ("Builtin function %s requires the -maltivec option", name);
   else if ((fnmask & RS6000_BTM_PAIRED) != 0)
     error ("Builtin function %s requires the -mpaired option", name);
   else if ((fnmask & RS6000_BTM_SPE) != 0)
     error ("Builtin function %s requires the -mspe option", name);
+  else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
+	   == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
+    error ("Builtin function %s requires the -mhard-dfp and"
+	   "-mpower8-vector options", name);
+  else if ((fnmask & RS6000_BTM_DFP) != 0)
+    error ("Builtin function %s requires the -mhard-dfp option", name);
+  else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
+    error ("Builtin function %s requires the -mpower8-vector option", name);
+  else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
+    error ("Builtin function %s requires the -mhard-float option", name);
   else
     error ("Builtin function %s is not supported with the current options",
 	   name);
@@ -11515,7 +13649,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
     case ALTIVEC_BUILTIN_MASK_FOR_STORE:
       {
-	int icode = (int) CODE_FOR_altivec_lvsr;
+	int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
+		     : (int) CODE_FOR_altivec_lvsl);
 	enum machine_mode tmode = insn_data[icode].operand[0].mode;
 	enum machine_mode mode = insn_data[icode].operand[1].mode;
 	tree arg;
@@ -11590,8 +13725,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       if (success)
 	return ret;
     }  
+  if (TARGET_HTM)
+    {
+      ret = htm_expand_builtin (exp, target, &success);
+
+      if (success)
+	return ret;
+    }  
 
-  gcc_assert (TARGET_ALTIVEC || TARGET_VSX || TARGET_SPE || TARGET_PAIRED_FLOAT);
+  unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
+  gcc_assert (attr == RS6000_BTC_UNARY
+	      || attr == RS6000_BTC_BINARY
+	      || attr == RS6000_BTC_TERNARY);
 
   /* Handle simple unary operations.  */
   d = bdesc_1arg;
@@ -11648,6 +13793,14 @@ rs6000_init_builtins (void)
   opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
   opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
 
+  /* We use V1TI mode as a special container to hold __int128_t items that
+     must live in VSX registers.  */
+  if (intTI_type_node)
+    {
+      V1TI_type_node = build_vector_type (intTI_type_node, 1);
+      unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
+    }
+
   /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
      types, especially in C++ land.  Similarly, 'vector pixel' is distinct from
      'vector unsigned short'.  */
@@ -11670,8 +13823,13 @@ rs6000_init_builtins (void)
   uintSI_type_internal_node = unsigned_intSI_type_node;
   intDI_type_internal_node = intDI_type_node;
   uintDI_type_internal_node = unsigned_intDI_type_node;
+  intTI_type_internal_node = intTI_type_node;
+  uintTI_type_internal_node = unsigned_intTI_type_node;
   float_type_internal_node = float_type_node;
   double_type_internal_node = double_type_node;
+  long_double_type_internal_node = long_double_type_node;
+  dfloat64_type_internal_node = dfloat64_type_node;
+  dfloat128_type_internal_node = dfloat128_type_node;
   void_type_internal_node = void_type_node;
 
   /* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -11682,8 +13840,15 @@ rs6000_init_builtins (void)
   builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
   builtin_mode_to_type[DImode][0] = intDI_type_node;
   builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
+  builtin_mode_to_type[TImode][0] = intTI_type_node;
+  builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
   builtin_mode_to_type[SFmode][0] = float_type_node;
   builtin_mode_to_type[DFmode][0] = double_type_node;
+  builtin_mode_to_type[TFmode][0] = long_double_type_node;
+  builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
+  builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
+  builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
+  builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
   builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
   builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
   builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
@@ -11752,14 +13917,41 @@ rs6000_init_builtins (void)
   tdecl = add_builtin_type ("__vector double", V2DF_type_node);
   TYPE_NAME (V2DF_type_node) = tdecl;
 
+  if (TARGET_POWERPC64)
+    {
   tdecl = add_builtin_type ("__vector long", V2DI_type_node);
   TYPE_NAME (V2DI_type_node) = tdecl;
 
-  tdecl = add_builtin_type ("__vector unsigned long", unsigned_V2DI_type_node);
+      tdecl = add_builtin_type ("__vector unsigned long",
+				unsigned_V2DI_type_node);
   TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
 
   tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
   TYPE_NAME (bool_V2DI_type_node) = tdecl;
+    }
+  else
+    {
+      tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
+      TYPE_NAME (V2DI_type_node) = tdecl;
+
+      tdecl = add_builtin_type ("__vector unsigned long long",
+				unsigned_V2DI_type_node);
+      TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
+
+      tdecl = add_builtin_type ("__vector __bool long long",
+				bool_V2DI_type_node);
+      TYPE_NAME (bool_V2DI_type_node) = tdecl;
+    }
+
+  if (V1TI_type_node)
+    {
+      tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
+      TYPE_NAME (V1TI_type_node) = tdecl;
+
+      tdecl = add_builtin_type ("__vector unsigned __int128",
+				unsigned_V1TI_type_node);
+      TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
+    }
 
   /* Paired and SPE builtins are only available if you build a compiler with
      the appropriate options, so only create those builtins with the
@@ -11772,6 +13964,9 @@ rs6000_init_builtins (void)
     spe_init_builtins ();
   if (TARGET_EXTRA_BUILTINS)
     altivec_init_builtins ();
+  if (TARGET_HTM)
+    htm_init_builtins ();
+
   if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
     rs6000_common_init_builtins ();
 
@@ -12117,6 +14312,10 @@ altivec_init_builtins (void)
     = build_function_type_list (integer_type_node,
 				integer_type_node, V4SI_type_node,
 				V4SI_type_node, NULL_TREE);
+  tree int_ftype_int_v2di_v2di
+    = build_function_type_list (integer_type_node,
+				integer_type_node, V2DI_type_node,
+				V2DI_type_node, NULL_TREE);
   tree void_ftype_v4si
     = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
   tree v8hi_ftype_void
@@ -12199,6 +14398,8 @@ altivec_init_builtins (void)
     = build_function_type_list (integer_type_node,
 				integer_type_node, V2DF_type_node,
 				V2DF_type_node, NULL_TREE);
+  tree v2di_ftype_v2di
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
   tree v4si_ftype_v4si
     = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
   tree v8hi_ftype_v8hi
@@ -12224,10 +14425,58 @@ altivec_init_builtins (void)
   def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
   def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
   def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
+  def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V2DF);
+  def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V2DI);
+  def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V4SF);
+  def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V4SI);
+  def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V8HI);
+  def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVXL_V16QI);
   def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
+  def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V2DF);
+  def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V2DI);
+  def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V4SF);
+  def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V4SI);
+  def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V8HI);
+  def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
+	       ALTIVEC_BUILTIN_LVX_V16QI);
   def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
+  def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V2DF);
+  def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V2DI);
+  def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V4SF);
+  def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V4SI);
+  def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V8HI);
+  def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
+	       ALTIVEC_BUILTIN_STVX_V16QI);
   def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
   def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
+  def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V2DF);
+  def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V2DI);
+  def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V4SF);
+  def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V4SI);
+  def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V8HI);
+  def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
+	       ALTIVEC_BUILTIN_STVXL_V16QI);
   def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
   def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
   def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
@@ -12334,6 +14583,9 @@ altivec_init_builtins (void)
 	case VOIDmode:
 	  type = int_ftype_int_opaque_opaque;
 	  break;
+	case V2DImode:
+	  type = int_ftype_int_v2di_v2di;
+	  break;
 	case V4SImode:
 	  type = int_ftype_int_v4si_v4si;
 	  break;
@@ -12367,6 +14619,9 @@ altivec_init_builtins (void)
 
       switch (mode0)
 	{
+	case V2DImode:
+	  type = v2di_ftype_v2di;
+	  break;
 	case V4SImode:
 	  type = v4si_ftype_v4si;
 	  break;
@@ -12497,6 +14752,107 @@ altivec_init_builtins (void)
   ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
 				    integer_type_node, NULL_TREE);
   def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
+
+
+  if (V1TI_type_node)
+    {
+      tree v1ti_ftype_long_pcvoid
+	= build_function_type_list (V1TI_type_node,
+				    long_integer_type_node, pcvoid_type_node,
+				    NULL_TREE);
+      tree void_ftype_v1ti_long_pvoid
+	= build_function_type_list (void_type_node,
+				    V1TI_type_node, long_integer_type_node,
+				    pvoid_type_node, NULL_TREE);
+      def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
+		   VSX_BUILTIN_LXVD2X_V1TI);
+      def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
+		   VSX_BUILTIN_STXVD2X_V1TI);
+      ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
+					NULL_TREE, NULL_TREE);
+      def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
+      ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
+					intTI_type_node,
+					integer_type_node, NULL_TREE);
+      def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
+      ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
+					integer_type_node, NULL_TREE);
+      def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
+    }
+
+}
+
+static void
+htm_init_builtins (void)
+{
+  HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
+  const struct builtin_description *d;
+  size_t i;
+
+  d = bdesc_htm;
+  for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
+    {
+      tree op[MAX_HTM_OPERANDS], type;
+      HOST_WIDE_INT mask = d->mask;
+      unsigned attr = rs6000_builtin_info[d->code].attr;
+      bool void_func = (attr & RS6000_BTC_VOID);
+      int attr_args = (attr & RS6000_BTC_TYPE_MASK);
+      int nopnds = 0;
+      tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
+					     : unsigned_type_node;
+
+      if ((mask & builtin_mask) != mask)
+	{
+	  if (TARGET_DEBUG_BUILTIN)
+	    fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
+	  continue;
+	}
+
+      if (d->name == 0)
+	{
+	  if (TARGET_DEBUG_BUILTIN)
+	    fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
+		     (long unsigned) i);
+	  continue;
+	}
+
+      op[nopnds++] = (void_func) ? void_type_node : argtype;
+
+      if (attr_args == RS6000_BTC_UNARY)
+	op[nopnds++] = argtype;
+      else if (attr_args == RS6000_BTC_BINARY)
+	{
+	  op[nopnds++] = argtype;
+	  op[nopnds++] = argtype;
+	}
+      else if (attr_args == RS6000_BTC_TERNARY)
+	{
+	  op[nopnds++] = argtype;
+	  op[nopnds++] = argtype;
+	  op[nopnds++] = argtype;
+	}
+
+      switch (nopnds)
+	{
+	case 1:
+	  type = build_function_type_list (op[0], NULL_TREE);
+	  break;
+	case 2:
+	  type = build_function_type_list (op[0], op[1], NULL_TREE);
+	  break;
+	case 3:
+	  type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
+	  break;
+	case 4:
+	  type = build_function_type_list (op[0], op[1], op[2], op[3],
+					   NULL_TREE);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      def_builtin (d->name, type, d->code);
+    }
 }
 
 /* Hash function for builtin functions with up to 3 arguments and a return
@@ -12572,11 +14928,34 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
      are type correct.  */
   switch (builtin)
     {
+      /* unsigned 1 argument functions.  */
+    case CRYPTO_BUILTIN_VSBOX:
+    case P8V_BUILTIN_VGBBD:
+    case MISC_BUILTIN_CDTBCD:
+    case MISC_BUILTIN_CBCDTD:
+      h.uns_p[0] = 1;
+      h.uns_p[1] = 1;
+      break;
+
       /* unsigned 2 argument functions.  */
     case ALTIVEC_BUILTIN_VMULEUB_UNS:
     case ALTIVEC_BUILTIN_VMULEUH_UNS:
     case ALTIVEC_BUILTIN_VMULOUB_UNS:
     case ALTIVEC_BUILTIN_VMULOUH_UNS:
+    case CRYPTO_BUILTIN_VCIPHER:
+    case CRYPTO_BUILTIN_VCIPHERLAST:
+    case CRYPTO_BUILTIN_VNCIPHER:
+    case CRYPTO_BUILTIN_VNCIPHERLAST:
+    case CRYPTO_BUILTIN_VPMSUMB:
+    case CRYPTO_BUILTIN_VPMSUMH:
+    case CRYPTO_BUILTIN_VPMSUMW:
+    case CRYPTO_BUILTIN_VPMSUMD:
+    case CRYPTO_BUILTIN_VPMSUM:
+    case MISC_BUILTIN_ADDG6S:
+    case MISC_BUILTIN_DIVWEU:
+    case MISC_BUILTIN_DIVWEUO:
+    case MISC_BUILTIN_DIVDEU:
+    case MISC_BUILTIN_DIVDEUO:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
@@ -12599,6 +14978,14 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
     case VSX_BUILTIN_XXSEL_8HI_UNS:
     case VSX_BUILTIN_XXSEL_4SI_UNS:
     case VSX_BUILTIN_XXSEL_2DI_UNS:
+    case CRYPTO_BUILTIN_VPERMXOR:
+    case CRYPTO_BUILTIN_VPERMXOR_V2DI:
+    case CRYPTO_BUILTIN_VPERMXOR_V4SI:
+    case CRYPTO_BUILTIN_VPERMXOR_V8HI:
+    case CRYPTO_BUILTIN_VPERMXOR_V16QI:
+    case CRYPTO_BUILTIN_VSHASIGMAW:
+    case CRYPTO_BUILTIN_VSHASIGMAD:
+    case CRYPTO_BUILTIN_VSHASIGMA:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
@@ -12630,9 +15017,18 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
       /* signed args, unsigned return.  */
     case VSX_BUILTIN_XVCVDPUXDS_UNS:
     case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
+    case MISC_BUILTIN_UNPACK_TD:
+    case MISC_BUILTIN_UNPACK_V1TI:
       h.uns_p[0] = 1;
       break;
 
+      /* unsigned arguments for 128-bit pack instructions.  */
+    case MISC_BUILTIN_PACK_TD:
+    case MISC_BUILTIN_PACK_V1TI:
+      h.uns_p[1] = 1;
+      h.uns_p[2] = 1;
+      break;
+
     default:
       break;
     }
@@ -12740,8 +15136,23 @@ rs6000_common_init_builtins (void)
       else
 	{
 	  enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
+			 d->name);
+
 	    continue;
+	    }
 
 	  type = builtin_function_type (insn_data[icode].operand[0].mode,
 					insn_data[icode].operand[1].mode,
@@ -12780,8 +15191,23 @@ rs6000_common_init_builtins (void)
       else
 	{
 	  enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
+			 d->name);
+
 	    continue;
+	    }
 
           mode0 = insn_data[icode].operand[0].mode;
           mode1 = insn_data[icode].operand[1].mode;
@@ -12842,8 +15268,23 @@ rs6000_common_init_builtins (void)
       else
         {
 	  enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
+	  if (d->name == 0)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
+			 (long unsigned)i);
+
+	      continue;
+	    }
+
+          if (icode == CODE_FOR_nothing)
+	    {
+	      if (TARGET_DEBUG_BUILTIN)
+		fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
+			 d->name);
+
 	    continue;
+	    }
 
           mode0 = insn_data[icode].operand[0].mode;
           mode1 = insn_data[icode].operand[1].mode;
@@ -13631,7 +16072,7 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
   static bool eliminated = false;
   rtx ret;
 
-  if (mode != SDmode)
+  if (mode != SDmode || TARGET_NO_SDMODE_STACK)
     ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
   else
     {
@@ -13660,6 +16101,17 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
   return ret;
 }
 
+/* Return the mode to be used for memory when a secondary memory
+   location is needed.  For SDmode values we need to use DDmode, in
+   all other cases we can use the same mode.  */
+enum machine_mode
+rs6000_secondary_memory_needed_mode (enum machine_mode mode)
+{
+  if (mode == SDmode)
+    return DDmode;
+  return mode;
+}
+
 static tree
 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
 {
@@ -13690,29 +16142,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
   return NULL_TREE;
 }
 
-enum reload_reg_type {
-  GPR_REGISTER_TYPE,
-  VECTOR_REGISTER_TYPE,
-  OTHER_REGISTER_TYPE
-};
+/* Classify a register type.  Because the FMRGOW/FMRGEW instructions only work
+   on traditional floating point registers, and the VMRGOW/VMRGEW instructions
+   only work on the traditional altivec registers, note if an altivec register
+   was chosen.  */
 
-static enum reload_reg_type
-rs6000_reload_register_type (enum reg_class rclass)
+static enum rs6000_reg_type
+register_to_reg_type (rtx reg, bool *is_altivec)
 {
-  switch (rclass)
+  HOST_WIDE_INT regno;
+  enum reg_class rclass;
+
+  if (GET_CODE (reg) == SUBREG)
+    reg = SUBREG_REG (reg);
+
+  if (!REG_P (reg))
+    return NO_REG_TYPE;
+
+  regno = REGNO (reg);
+  if (regno >= FIRST_PSEUDO_REGISTER)
     {
-    case GENERAL_REGS:
-    case BASE_REGS:
-      return GPR_REGISTER_TYPE;
+      if (!lra_in_progress && !reload_in_progress && !reload_completed)
+	return PSEUDO_REG_TYPE;
 
-    case FLOAT_REGS:
-    case ALTIVEC_REGS:
-    case VSX_REGS:
-      return VECTOR_REGISTER_TYPE;
+      regno = true_regnum (reg);
+      if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+	return PSEUDO_REG_TYPE;
+    }
 
-    default:
-      return OTHER_REGISTER_TYPE;
+  gcc_assert (regno >= 0);
+
+  if (is_altivec && ALTIVEC_REGNO_P (regno))
+    *is_altivec = true;
+
+  rclass = rs6000_regno_regclass[regno];
+  return reg_class_to_reg_type[(int)rclass];
+}
+
+/* Helper function for rs6000_secondary_reload to return true if a move to a
+   different register classe is really a simple move.  */
+
+static bool
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode)
+{
+  int size;
+
+  /* Add support for various direct moves available.  In this function, we only
+     look at cases where we don't need any extra registers, and one or more
+     simple move insns are issued.  At present, 32-bit integers are not allowed
+     in FPR/VSX registers.  Single precision binary floating is not a simple
+     move because we need to convert to the single precision memory layout.
+     The 4-byte SDmode can be moved.  */
+  size = GET_MODE_SIZE (mode);
+  if (TARGET_DIRECT_MOVE
+      && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
+      && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	  || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
+	   && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
+	       || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
+	   && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
+	       || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+    return true;
+
+  return false;
+}
+
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
+   special direct moves that involve allocating an extra register, return the
+   insn code of the helper function if there is such a function or
+   CODE_FOR_nothing if not.  */
+
+static bool
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
+				     enum rs6000_reg_type from_type,
+				     enum machine_mode mode,
+				     secondary_reload_info *sri,
+				     bool altivec_p)
+{
+  bool ret = false;
+  enum insn_code icode = CODE_FOR_nothing;
+  int cost = 0;
+  int size = GET_MODE_SIZE (mode);
+
+  if (TARGET_POWERPC64)
+    {
+      if (size == 16)
+	{
+	  /* Handle moving 128-bit values from GPRs to VSX point registers on
+	     power8 when running in 64-bit mode using XXPERMDI to glue the two
+	     64-bit values back together.  */
+	  if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	      icode = reg_addr[mode].reload_vsx_gpr;
+	    }
+
+	  /* Handle moving 128-bit values from VSX point registers to GPRs on
+	     power8 when running in 64-bit mode using XXPERMDI to get access to the
+	     bottom 64-bit value.  */
+	  else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	      icode = reg_addr[mode].reload_gpr_vsx;
+	    }
+	}
+
+      else if (mode == SFmode)
+	{
+	  if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	    {
+	      cost = 3;			/* xscvdpspn, mfvsrd, and.  */
+	      icode = reg_addr[mode].reload_gpr_vsx;
+	    }
+
+	  else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	    {
+	      cost = 2;			/* mtvsrz, xscvspdpn.  */
+	      icode = reg_addr[mode].reload_vsx_gpr;
+	    }
+	}
+    }
+
+  if (TARGET_POWERPC64 && size == 16)
+    {
+      /* Handle moving 128-bit values from GPRs to VSX point registers on
+	 power8 when running in 64-bit mode using XXPERMDI to glue the two
+	 64-bit values back together.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mtvsrd's, 1 xxpermdi.  */
+	  icode = reg_addr[mode].reload_vsx_gpr;
+	}
+
+      /* Handle moving 128-bit values from VSX point registers to GPRs on
+	 power8 when running in 64-bit mode using XXPERMDI to get access to the
+	 bottom 64-bit value.  */
+      else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+	{
+	  cost = 3;			/* 2 mfvsrd's, 1 xxpermdi.  */
+	  icode = reg_addr[mode].reload_gpr_vsx;
+	}
+    }
+
+  else if (!TARGET_POWERPC64 && size == 8)
+    {
+      /* Handle moving 64-bit values from GPRs to floating point registers on
+	 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
+	 values back together.  Altivec register classes must be handled
+	 specially since a different instruction is used, and the secondary
+	 reload support requires a single instruction class in the scratch
+	 register constraint.  However, right now TFmode is not allowed in
+	 Altivec registers, so the pattern will never match.  */
+      if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
+	{
+	  cost = 3;			/* 2 mtvsrwz's, 1 fmrgow.  */
+	  icode = reg_addr[mode].reload_fpr_gpr;
+	}
     }
+
+  if (icode != CODE_FOR_nothing)
+    {
+      ret = true;
+      if (sri)
+	{
+	  sri->icode = icode;
+	  sri->extra_cost = cost;
+	}
+    }
+
+  return ret;
+}
+
+/* Return whether a move between two register classes can be done either
+   directly (simple move) or via a pattern that uses a single extra temporary
+   (using power8's direct move in this case.  */
+
+static bool
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
+			      enum rs6000_reg_type from_type,
+			      enum machine_mode mode,
+			      secondary_reload_info *sri,
+			      bool altivec_p)
+{
+  /* Fall back to load/store reloads if either type is not a register.  */
+  if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
+    return false;
+
+  /* If we haven't allocated registers yet, assume the move can be done for the
+     standard register types.  */
+  if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
+      || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
+      || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
+    return true;
+
+  /* Moves to the same set of registers is a simple move for non-specialized
+     registers.  */
+  if (to_type == from_type && IS_STD_REG_TYPE (to_type))
+    return true;
+
+  /* Check whether a simple move can be done directly.  */
+  if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
+    {
+      if (sri)
+	    {
+	  sri->icode = CODE_FOR_nothing;
+	  sri->extra_cost = 0;
+	}
+      return true;
+    }
+
+  /* Now check if we can do it in a few steps.  */
+  return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
+					      altivec_p);
 }
 
 /* Inform reload about cases where moving X with a mode MODE to a register in
@@ -13738,11 +16387,35 @@ rs6000_secondary_reload (bool in_p,
   bool default_p = false;
 
   sri->icode = CODE_FOR_nothing;
+  icode = ((in_p)
+	   ? reg_addr[mode].reload_load
+	   : reg_addr[mode].reload_store);
 
-  /* Convert vector loads and stores into gprs to use an additional base
-     register.  */
-  icode = rs6000_vector_reload[mode][in_p != false];
-  if (icode != CODE_FOR_nothing)
+  if (REG_P (x) || register_operand (x, mode))
+    {
+      enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
+      bool altivec_p = (rclass == ALTIVEC_REGS);
+      enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
+
+      if (!in_p)
+	{
+	  enum rs6000_reg_type exchange = to_type;
+	  to_type = from_type;
+	  from_type = exchange;
+	}
+
+      /* Can we do a direct move of some sort?  */
+      if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
+					altivec_p))
+	{
+	  icode = (enum insn_code)sri->icode;
+	  default_p = false;
+	  ret = NO_REGS;
+	}
+    }
+
+  /* Handle vector moves with reload helper functions.  */
+  if (ret == ALL_REGS && icode != CODE_FOR_nothing)
     {
       ret = NO_REGS;
       sri->icode = CODE_FOR_nothing;
@@ -13754,11 +16427,20 @@ rs6000_secondary_reload (bool in_p,
 
 	  /* Loads to and stores from gprs can do reg+offset, and wouldn't need
 	     an extra register in that case, but it would need an extra
-	     register if the addressing is reg+reg or (reg+reg)&(-16).  */
+	     register if the addressing is reg+reg or (reg+reg)&(-16).  Special
+	     case load/store quad.  */
 	  if (rclass == GENERAL_REGS || rclass == BASE_REGS)
 	    {
-	      if (!legitimate_indirect_address_p (addr, false)
-		  && !rs6000_legitimate_offset_address_p (TImode, addr,
+	      if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
+		  && GET_MODE_SIZE (mode) == 16
+		  && quad_memory_operand (x, mode))
+		{
+		  sri->icode = icode;
+		  sri->extra_cost = 2;
+		}
+
+	      else if (!legitimate_indirect_address_p (addr, false)
+		       && !rs6000_legitimate_offset_address_p (PTImode, addr,
 							  false, true))
 		{
 		  sri->icode = icode;
@@ -13768,8 +16450,20 @@ rs6000_secondary_reload (bool in_p,
 				     + ((GET_CODE (addr) == AND) ? 1 : 0));
 		}
 	    }
+         /* Allow scalar loads to/from the traditional floating point
+            registers, even if VSX memory is set.  */
+         else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
+                  && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+                  && (legitimate_indirect_address_p (addr, false)
+                      || legitimate_indirect_address_p (addr, false)
+                      || rs6000_legitimate_offset_address_p (mode, addr,
+                                                             false, true)))
+
+           ;
 	  /* Loads to and stores from vector registers can only do reg+reg
-	     addressing.  Altivec registers can also do (reg+reg)&(-16).  */
+            addressing.  Altivec registers can also do (reg+reg)&(-16).  Allow
+            scalar modes loading up the traditional floating point registers
+            to use offset addresses.  */
 	  else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
 		   || rclass == FLOAT_REGS || rclass == NO_REGS)
 	    {
@@ -13813,12 +16507,12 @@ rs6000_secondary_reload (bool in_p,
 	  else
 	    {
 	      enum reg_class xclass = REGNO_REG_CLASS (regno);
-	      enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
-	      enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
+	      enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
+	      enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
 
 	      /* If memory is needed, use default_secondary_reload to create the
 		 stack slot.  */
-	      if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
+	      if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
 		default_p = true;
 	      else
 		ret = NO_REGS;
@@ -13828,7 +16522,7 @@ rs6000_secondary_reload (bool in_p,
 	default_p = true;
     }
   else if (TARGET_POWERPC64
-	   && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
     {
@@ -13867,7 +16561,7 @@ rs6000_secondary_reload (bool in_p,
 	default_p = true;
     }
   else if (!TARGET_POWERPC64
-	   && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
 	   && MEM_P (x)
 	   && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
     {
@@ -13945,6 +16639,36 @@ rs6000_secondary_reload (bool in_p,
   return ret;
 }
 
+/* Better tracing for rs6000_secondary_reload_inner.  */
+
+static void
+rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
+			       bool store_p)
+{
+  rtx set, clobber;
+
+  gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
+
+  fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
+	   store_p ? "store" : "load");
+
+  if (store_p)
+    set = gen_rtx_SET (VOIDmode, mem, reg);
+  else
+    set = gen_rtx_SET (VOIDmode, reg, mem);
+
+  clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
+  debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+}
+
+static void
+rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
+			      bool store_p)
+{
+  rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
+  gcc_unreachable ();
+}
+
 /* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
    to SP+reg addressing.  */
 
@@ -13963,21 +16687,16 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
   rtx cc_clobber;
 
   if (TARGET_DEBUG_ADDR)
-    {
-      fprintf (stderr, "\nrs6000_secondary_reload_inner, type = %s\n",
-	       store_p ? "store" : "load");
-      fprintf (stderr, "reg:\n");
-      debug_rtx (reg);
-      fprintf (stderr, "mem:\n");
-      debug_rtx (mem);
-      fprintf (stderr, "scratch:\n");
-      debug_rtx (scratch);
-    }
+    rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
+
+  if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+  if (GET_CODE (mem) != MEM)
+    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-  gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
-  gcc_assert (GET_CODE (mem) == MEM);
   rclass = REGNO_REG_CLASS (regno);
-  addr = XEXP (mem, 0);
+  addr = find_replacement (&XEXP (mem, 0));
 
   switch (rclass)
     {
@@ -13988,25 +16707,31 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
       if (GET_CODE (addr) == AND)
 	{
 	  and_op2 = XEXP (addr, 1);
-	  addr = XEXP (addr, 0);
+	  addr = find_replacement (&XEXP (addr, 0));
 	}
 
       if (GET_CODE (addr) == PRE_MODIFY)
 	{
-	  scratch_or_premodify = XEXP (addr, 0);
-	  gcc_assert (REG_P (scratch_or_premodify));
-	  gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
-	  addr = XEXP (addr, 1);
+	  scratch_or_premodify = find_replacement (&XEXP (addr, 0));
+	  if (!REG_P (scratch_or_premodify))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+	  addr = find_replacement (&XEXP (addr, 1));
+	  if (GET_CODE (addr) != PLUS)
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 	}
 
       if (GET_CODE (addr) == PLUS
 	  && (and_op2 != NULL_RTX
-	      || !rs6000_legitimate_offset_address_p (TImode, addr,
+	      || !rs6000_legitimate_offset_address_p (PTImode, addr,
 						      false, true)))
 	{
+	  /* find_replacement already recurses into both operands of
+	     PLUS so we don't need to call it here.  */
 	  addr_op1 = XEXP (addr, 0);
 	  addr_op2 = XEXP (addr, 1);
-	  gcc_assert (legitimate_indirect_address_p (addr_op1, false));
+	  if (!legitimate_indirect_address_p (addr_op1, false))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
 	  if (!REG_P (addr_op2)
 	      && (GET_CODE (addr_op2) != CONST_INT
@@ -14034,7 +16759,7 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 	  scratch_or_premodify = scratch;
 	}
       else if (!legitimate_indirect_address_p (addr, false)
-	       && !rs6000_legitimate_offset_address_p (TImode, addr,
+	       && !rs6000_legitimate_offset_address_p (PTImode, addr,
 						       false, true))
 	{
 	  if (TARGET_DEBUG_ADDR)
@@ -14050,9 +16775,21 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 	}
       break;
 
-      /* Float/Altivec registers can only handle reg+reg addressing.  Move
-	 other addresses into a scratch register.  */
+      /* Float registers can do offset+reg addressing for scalar types.  */
     case FLOAT_REGS:
+      if (legitimate_indirect_address_p (addr, false)	/* reg */
+	  || legitimate_indexed_address_p (addr, false)	/* reg+reg */
+	  || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+	      && and_op2 == NULL_RTX
+	      && scratch_or_premodify == scratch
+	      && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
+	break;
+
+      /* If this isn't a legacy floating point load/store, fall through to the
+	 VSX defaults.  */
+
+      /* VSX/Altivec registers can only handle reg+reg addressing.  Move other
+	 addresses into a scratch register.  */
     case VSX_REGS:
     case ALTIVEC_REGS:
 
@@ -14066,42 +16803,43 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 	      || !VECTOR_MEM_ALTIVEC_P (mode)))
 	{
 	  and_op2 = XEXP (addr, 1);
-	  addr = XEXP (addr, 0);
+	  addr = find_replacement (&XEXP (addr, 0));
 	}
 
       /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
 	 as the address later.  */
       if (GET_CODE (addr) == PRE_MODIFY
-	  && (!VECTOR_MEM_VSX_P (mode)
+	  && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+	       && (rclass != FLOAT_REGS
+		   || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
 	      || and_op2 != NULL_RTX
 	      || !legitimate_indexed_address_p (XEXP (addr, 1), false)))
 	{
-	  scratch_or_premodify = XEXP (addr, 0);
-	  gcc_assert (legitimate_indirect_address_p (scratch_or_premodify,
-						     false));
-	  gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
-	  addr = XEXP (addr, 1);
+	  scratch_or_premodify = find_replacement (&XEXP (addr, 0));
+	  if (!legitimate_indirect_address_p (scratch_or_premodify, false))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+	  addr = find_replacement (&XEXP (addr, 1));
+	  if (GET_CODE (addr) != PLUS)
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 	}
 
       if (legitimate_indirect_address_p (addr, false)	/* reg */
 	  || legitimate_indexed_address_p (addr, false)	/* reg+reg */
-	  || GET_CODE (addr) == PRE_MODIFY		/* VSX pre-modify */
 	  || (GET_CODE (addr) == AND			/* Altivec memory */
+	      && rclass == ALTIVEC_REGS
 	      && GET_CODE (XEXP (addr, 1)) == CONST_INT
 	      && INTVAL (XEXP (addr, 1)) == -16
-	      && VECTOR_MEM_ALTIVEC_P (mode))
-	  || (rclass == FLOAT_REGS			/* legacy float mem */
-	      && GET_MODE_SIZE (mode) == 8
-	      && and_op2 == NULL_RTX
-	      && scratch_or_premodify == scratch
-	      && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
+	      && (legitimate_indirect_address_p (XEXP (addr, 0), false)
+		  || legitimate_indexed_address_p (XEXP (addr, 0), false))))
 	;
 
       else if (GET_CODE (addr) == PLUS)
 	{
 	  addr_op1 = XEXP (addr, 0);
 	  addr_op2 = XEXP (addr, 1);
-	  gcc_assert (REG_P (addr_op1));
+	  if (!REG_P (addr_op1))
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
 	  if (TARGET_DEBUG_ADDR)
 	    {
@@ -14120,7 +16858,8 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 	}
 
       else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
-	       || GET_CODE (addr) == CONST_INT || REG_P (addr))
+	       || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
+	       || REG_P (addr))
 	{
 	  if (TARGET_DEBUG_ADDR)
 	    {
@@ -14136,12 +16875,12 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 	}
 
       else
-	gcc_unreachable ();
+	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
       break;
 
     default:
-      gcc_unreachable ();
+      rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
     }
 
   /* If the original address involved a pre-modify that we couldn't use the VSX
@@ -14188,7 +16927,7 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
   /* Adjust the address if it changed.  */
   if (addr != XEXP (mem, 0))
     {
-      mem = change_address (mem, mode, addr);
+      mem = replace_equiv_address_nv (mem, addr);
       if (TARGET_DEBUG_ADDR)
 	fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
     }
@@ -14253,8 +16992,10 @@ rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
   return;
 }
 
-/* Allocate a 64-bit stack slot to be used for copying SDmode
-   values through if this function has any SDmode references.  */
+/* Allocate a 64-bit stack slot to be used for copying SDmode values through if
+   this function has any SDmode references.  If we are on a power7 or later, we
+   don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
+   can load/store the value.  */
 
 static void
 rs6000_alloc_sdmode_stack_slot (void)
@@ -14264,6 +17005,13 @@ rs6000_alloc_sdmode_stack_slot (void)
   gimple_stmt_iterator gsi;
 
   gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
+  /* We use a different approach for dealing with the secondary
+     memory in LRA.  */
+  if (ira_use_lra_p)
+    return;
+
+  if (TARGET_NO_SDMODE_STACK)
+    return;
 
   FOR_EACH_BB (bb)
     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
@@ -14325,8 +17073,7 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
 {
   enum machine_mode mode = GET_MODE (x);
 
-  if (VECTOR_UNIT_VSX_P (mode)
-      && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
+  if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
     return rclass;
 
   if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
@@ -14334,8 +17081,14 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
       && easy_vector_constant (x, mode))
     return ALTIVEC_REGS;
 
-  if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
+  if ((CONSTANT_P (x) || GET_CODE (x) == PLUS))
+    {
+      if (reg_class_subset_p (GENERAL_REGS, rclass))
+	return GENERAL_REGS;
+      if (reg_class_subset_p (BASE_REGS, rclass))
+	return BASE_REGS;
     return NO_REGS;
+    }
 
   if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
     return GENERAL_REGS;
@@ -14349,7 +17102,8 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
       if (GET_MODE_SIZE (mode) <= 8)
 	return FLOAT_REGS;
 
-      if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode))
+      if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
+	  || mode == V1TImode)
 	return ALTIVEC_REGS;
 
       return rclass;
@@ -14381,42 +17135,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
    set and vice versa.  */
 
 static bool
-rs6000_secondary_memory_needed (enum reg_class class1,
-				enum reg_class class2,
+rs6000_secondary_memory_needed (enum reg_class from_class,
+				enum reg_class to_class,
 				enum machine_mode mode)
 {
-  if (class1 == class2)
-    return false;
-
-  /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
-     ALTIVEC_REGS, and FLOAT_REGS).  We don't need to use memory to copy
-     between these classes.  But we need memory for other things that can go in
-     FLOAT_REGS like SFmode.  */
-  if (TARGET_VSX
-      && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
-      && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
-	  || class1 == FLOAT_REGS))
-    return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
-	    && class2 != FLOAT_REGS);
+  enum rs6000_reg_type from_type, to_type;
+  bool altivec_p = ((from_class == ALTIVEC_REGS)
+		    || (to_class == ALTIVEC_REGS));
 
-  if (class1 == VSX_REGS || class2 == VSX_REGS)
-    return true;
-
-  if (class1 == FLOAT_REGS
-      && (!TARGET_MFPGPR || !TARGET_POWERPC64
-	  || ((mode != DFmode)
-	      && (mode != DDmode)
-	      && (mode != DImode))))
-    return true;
+  /* If a simple/direct move is available, we don't need secondary memory  */
+  from_type = reg_class_to_reg_type[(int)from_class];
+  to_type = reg_class_to_reg_type[(int)to_class];
 
-  if (class2 == FLOAT_REGS
-      && (!TARGET_MFPGPR || !TARGET_POWERPC64
-	  || ((mode != DFmode)
-	      && (mode != DDmode)
-	      && (mode != DImode))))
-    return true;
+  if (rs6000_secondary_reload_move (to_type, from_type, mode,
+				    (secondary_reload_info *)0, altivec_p))
+    return false;
 
-  if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+  /* If we have a floating point or vector register class, we need to use
+     memory to transfer the data.  */
+  if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
     return true;
 
   return false;
@@ -14424,17 +17161,19 @@ rs6000_secondary_memory_needed (enum reg_class class1,
 
 /* Debug version of rs6000_secondary_memory_needed.  */
 static bool
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
-				      enum reg_class class2,
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
+				      enum reg_class to_class,
 				      enum machine_mode mode)
 {
-  bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+  bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
 
   fprintf (stderr,
-	   "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
-	   "class2 = %s, mode = %s\n",
-	   ret ? "true" : "false", reg_class_names[class1],
-	   reg_class_names[class2], GET_MODE_NAME (mode));
+	   "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
+	   "to_class = %s, mode = %s\n",
+	   ret ? "true" : "false",
+	   reg_class_names[from_class],
+	   reg_class_names[to_class],
+	   GET_MODE_NAME (mode));
 
   return ret;
 }
@@ -14498,14 +17237,20 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
   /* Constants, memory, and FP registers can go into FP registers.  */
   if ((regno == -1 || FP_REGNO_P (regno))
       && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
-    return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
+    return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
 
   /* Memory, and FP/altivec registers can go into fp/altivec registers under
-     VSX.  */
+     VSX.  However, for scalar variables, use the traditional floating point
+     registers so that we can use offset+register addressing.  */
   if (TARGET_VSX
       && (regno == -1 || VSX_REGNO_P (regno))
       && VSX_REG_CLASS_P (rclass))
+    {
+      if (GET_MODE_SIZE (mode) < 16)
+	return FLOAT_REGS;
+
     return NO_REGS;
+    }
 
   /* Memory, and AltiVec registers can go into AltiVec registers.  */
   if ((regno == -1 || ALTIVEC_REGNO_P (regno))
@@ -14550,8 +17295,42 @@ rs6000_cannot_change_mode_class (enum machine_mode from,
   if (from_size != to_size)
     {
       enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
-      return ((from_size < 8 || to_size < 8 || TARGET_IEEEQUAD)
-	      && reg_classes_intersect_p (xclass, rclass));
+
+      if (reg_classes_intersect_p (xclass, rclass))
+	{
+	  unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
+	  unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
+
+	  /* Don't allow 64-bit types to overlap with 128-bit types that take a
+	     single register under VSX because the scalar part of the register
+	     is in the upper 64-bits, and not the lower 64-bits.  Types like
+	     TFmode/TDmode that take 2 scalar register can overlap.  128-bit
+	     IEEE floating point can't overlap, and neither can small
+	     values.  */
+
+	  if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
+	    return true;
+
+	  /* TDmode in floating-mode registers must always go into a register
+	     pair with the most significant word in the even-numbered register
+	     to match ISA requirements.  In little-endian mode, this does not
+	     match subreg numbering, so we cannot allow subregs.  */
+	  if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
+	    return true;
+
+	  if (from_size < 8 || to_size < 8)
+	    return true;
+
+	  if (from_size == 8 && (8 * to_nregs) != to_size)
+	    return true;
+
+	  if (to_size == 8 && (8 * from_nregs) != from_size)
+	    return true;
+
+	  return false;
+	}
+      else
+	return false;
     }
 
   if (TARGET_E500_DOUBLE
@@ -14565,9 +17344,18 @@ rs6000_cannot_change_mode_class (enum machine_mode from,
   /* Since the VSX register set includes traditional floating point registers
      and altivec registers, just check for the size being different instead of
      trying to check whether the modes are vector modes.  Otherwise it won't
-     allow say DF and DI to change classes.  */
+     allow say DF and DI to change classes.  For types like TFmode and TDmode
+     that take 2 64-bit registers, rather than a single 128-bit register, don't
+     allow subregs of those types to other 128 bit types.  */
   if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
+    {
+      unsigned num_regs = (from_size + 15) / 16;
+      if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
+	  || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
+	return true;
+
     return (from_size != 8 && from_size != 16);
+    }
 
   if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
       && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
@@ -14598,6 +17386,186 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from,
   return ret;
 }
 
+/* Return a string to do a move operation of 128 bits of data.  */
+
+const char *
+rs6000_output_move_128bit (rtx operands[])
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  enum machine_mode mode = GET_MODE (dest);
+  int dest_regno;
+  int src_regno;
+  bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
+  bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
+
+  if (REG_P (dest))
+    {
+      dest_regno = REGNO (dest);
+      dest_gpr_p = INT_REGNO_P (dest_regno);
+      dest_fp_p = FP_REGNO_P (dest_regno);
+      dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
+      dest_vsx_p = dest_fp_p | dest_vmx_p;
+    }
+  else
+    {
+      dest_regno = -1;
+      dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
+    }
+
+  if (REG_P (src))
+    {
+      src_regno = REGNO (src);
+      src_gpr_p = INT_REGNO_P (src_regno);
+      src_fp_p = FP_REGNO_P (src_regno);
+      src_vmx_p = ALTIVEC_REGNO_P (src_regno);
+      src_vsx_p = src_fp_p | src_vmx_p;
+    }
+  else
+    {
+      src_regno = -1;
+      src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
+    }
+
+  /* Register moves.  */
+  if (dest_regno >= 0 && src_regno >= 0)
+    {
+      if (dest_gpr_p)
+	{
+	  if (src_gpr_p)
+	    return "#";
+
+	  else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+	    return "#";
+	}
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (src_vsx_p)
+	    return "xxlor %x0,%x1,%x1";
+
+	  else if (TARGET_DIRECT_MOVE && src_gpr_p)
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
+	return "vor %0,%1,%1";
+
+      else if (dest_fp_p && src_fp_p)
+	return "#";
+    }
+
+  /* Loads.  */
+  else if (dest_regno >= 0 && MEM_P (src))
+    {
+      if (dest_gpr_p)
+	{
+	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "lq %0,%1";
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && dest_vmx_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "lvx %0,%y1";
+
+      else if (TARGET_VSX && dest_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "lxvw4x %x0,%y1";
+	  else
+	    return "lxvd2x %x0,%y1";
+	}
+
+      else if (TARGET_ALTIVEC && dest_vmx_p)
+	return "lvx %0,%y1";
+
+      else if (dest_fp_p)
+	return "#";
+    }
+
+  /* Stores.  */
+  else if (src_regno >= 0 && MEM_P (dest))
+    {
+      if (src_gpr_p)
+	{
+ 	  if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+	    return "stq %1,%0";
+	  else
+	    return "#";
+	}
+
+      else if (TARGET_ALTIVEC && src_vmx_p
+	       && altivec_indexed_or_indirect_operand (src, mode))
+	return "stvx %1,%y0";
+
+      else if (TARGET_VSX && src_vsx_p)
+	{
+	  if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+	    return "stxvw4x %x1,%y0";
+	  else
+	    return "stxvd2x %x1,%y0";
+	}
+
+      else if (TARGET_ALTIVEC && src_vmx_p)
+	return "stvx %1,%y0";
+
+      else if (src_fp_p)
+	return "#";
+    }
+
+  /* Constants.  */
+  else if (dest_regno >= 0
+	   && (GET_CODE (src) == CONST_INT
+	       || GET_CODE (src) == CONST_DOUBLE
+	       || GET_CODE (src) == CONST_VECTOR))
+    {
+      if (dest_gpr_p)
+	return "#";
+
+      else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
+	return "xxlxor %x0,%x0,%x0";
+
+      else if (TARGET_ALTIVEC && dest_vmx_p)
+	return output_vec_const_move (operands);
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\n===== Bad 128 bit move:\n");
+      debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
+    }
+
+  gcc_unreachable ();
+}
+
+/* Validate a 128-bit move.  */
+bool
+rs6000_move_128bit_ok_p (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  return (gpc_reg_operand (operands[0], mode)
+	  || gpc_reg_operand (operands[1], mode));
+}
+
+/* Return true if a 128-bit move needs to be split.  */
+bool
+rs6000_split_128bit_ok_p (rtx operands[])
+{
+  if (!reload_completed)
+    return false;
+
+  if (!gpr_or_gpr_p (operands[0], operands[1]))
+    return false;
+
+  if (quad_load_store_p (operands[0], operands[1]))
+    return false;
+
+  return true;
+}
+
+
 /* Given a comparison operation, return the bit number in CCR to test.  We
    know this is a valid comparison.
 
@@ -14823,6 +17791,7 @@ rs6000_output_function_entry (FILE *file, const char *fname)
 	    ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
 	  break;
 
+	case ABI_ELFv2:
 	case ABI_V4:
 	case ABI_DARWIN:
 	  break;
@@ -15302,7 +18271,7 @@ print_operand (FILE *file, rtx x, int code)
       return;
 
     case 'Y':
-      /* Like 'L', for third word of TImode  */
+      /* Like 'L', for third word of TImode/PTImode  */
       if (REG_P (x))
 	fputs (reg_names[REGNO (x) + 2], file);
       else if (MEM_P (x))
@@ -15352,7 +18321,7 @@ print_operand (FILE *file, rtx x, int code)
       return;
 
     case 'Z':
-      /* Like 'L', for last word of TImode.  */
+      /* Like 'L', for last word of TImode/PTImode.  */
       if (REG_P (x))
 	fputs (reg_names[REGNO (x) + 3], file);
       else if (MEM_P (x))
@@ -15383,7 +18352,8 @@ print_operand (FILE *file, rtx x, int code)
 	if ((TARGET_SPE || TARGET_E500_DOUBLE)
 	    && (GET_MODE_SIZE (GET_MODE (x)) == 8
 		|| GET_MODE (x) == TFmode
-		|| GET_MODE (x) == TImode))
+		|| GET_MODE (x) == TImode
+		|| GET_MODE (x) == PTImode))
 	  {
 	    /* Handle [reg].  */
 	    if (REG_P (tmp))
@@ -16766,7 +19736,8 @@ emit_unlikely_jump (rtx cond, rtx label)
 }
 
 /* A subroutine of the atomic operation splitters.  Emit a load-locked
-   instruction in MODE.  */
+   instruction in MODE.  For QI/HImode, possibly use a pattern than includes
+   the zero_extend operation.  */
 
 static void
 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
@@ -16775,12 +19746,26 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
 
   switch (mode)
     {
+    case QImode:
+      fn = gen_load_lockedqi;
+      break;
+    case HImode:
+      fn = gen_load_lockedhi;
+      break;
     case SImode:
+      if (GET_MODE (mem) == QImode)
+	fn = gen_load_lockedqi_si;
+      else if (GET_MODE (mem) == HImode)
+	fn = gen_load_lockedhi_si;
+      else
       fn = gen_load_lockedsi;
       break;
     case DImode:
       fn = gen_load_lockeddi;
       break;
+    case TImode:
+      fn = gen_load_lockedti;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -16797,12 +19782,21 @@ emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
 
   switch (mode)
     {
+    case QImode:
+      fn = gen_store_conditionalqi;
+      break;
+    case HImode:
+      fn = gen_store_conditionalhi;
+      break;
     case SImode:
       fn = gen_store_conditionalsi;
       break;
     case DImode:
       fn = gen_store_conditionaldi;
       break;
+    case TImode:
+      fn = gen_store_conditionalti;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -16949,7 +19943,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
 {
   rtx boolval, retval, mem, oldval, newval, cond;
   rtx label1, label2, x, mask, shift;
-  enum machine_mode mode;
+  enum machine_mode mode, orig_mode;
   enum memmodel mod_s, mod_f;
   bool is_weak;
 
@@ -16961,15 +19955,21 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
   is_weak = (INTVAL (operands[5]) != 0);
   mod_s = (enum memmodel) INTVAL (operands[6]);
   mod_f = (enum memmodel) INTVAL (operands[7]);
-  mode = GET_MODE (mem);
+  orig_mode = mode = GET_MODE (mem);
 
   mask = shift = NULL_RTX;
   if (mode == QImode || mode == HImode)
     {
+      /* Before power8, we didn't have access to lbarx/lharx, so generate a
+	 lwarx and shift/mask operations.  With power8, we need to do the
+	 comparison in SImode, but the store is still done in QI/HImode.  */
+      oldval = convert_modes (SImode, mode, oldval, 1);
+
+      if (!TARGET_SYNC_HI_QI)
+	{
       mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
 
       /* Shift and mask OLDVAL into position with the word.  */
-      oldval = convert_modes (SImode, mode, oldval, 1);
       oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
 				    NULL_RTX, 1, OPTAB_LIB_WIDEN);
 
@@ -16977,6 +19977,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
       newval = convert_modes (SImode, mode, newval, 1);
       newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
 				    NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	}
 
       /* Prepare to adjust the return value.  */
       retval = gen_reg_rtx (SImode);
@@ -17005,7 +20006,25 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
     }
 
   cond = gen_reg_rtx (CCmode);
+  /* If we have TImode, synthesize a comparison.  */
+  if (mode != TImode)
   x = gen_rtx_COMPARE (CCmode, x, oldval);
+  else
+    {
+      rtx xor1_result = gen_reg_rtx (DImode);
+      rtx xor2_result = gen_reg_rtx (DImode);
+      rtx or_result = gen_reg_rtx (DImode);
+      rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
+      rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
+      rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
+      rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
+
+      emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
+      emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
+      emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
+      x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
+    }
+
   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
 
   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
@@ -17015,7 +20034,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
   if (mask)
     x = rs6000_mask_atomic_subword (retval, newval, mask);
 
-  emit_store_conditional (mode, cond, mem, x);
+  emit_store_conditional (orig_mode, cond, mem, x);
 
   if (!is_weak)
     {
@@ -17033,6 +20052,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
 
   if (shift)
     rs6000_finish_atomic_subword (operands[1], retval, shift);
+  else if (mode != GET_MODE (operands[1]))
+    convert_move (operands[1], retval, 1);
 
   /* In all cases, CR0 contains EQ on success, and NE on failure.  */
   x = gen_rtx_EQ (SImode, cond, const0_rtx);
@@ -17056,7 +20077,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
   mode = GET_MODE (mem);
 
   mask = shift = NULL_RTX;
-  if (mode == QImode || mode == HImode)
+  if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
     {
       mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
 
@@ -17105,12 +20126,28 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 {
   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
   enum machine_mode mode = GET_MODE (mem);
+  enum machine_mode store_mode = mode;
   rtx label, x, cond, mask, shift;
   rtx before = orig_before, after = orig_after;
 
   mask = shift = NULL_RTX;
+  /* On power8, we want to use SImode for the operation.  On previous systems,
+     use the operation in a subword and shift/mask to get the proper byte or
+     halfword.  */
   if (mode == QImode || mode == HImode)
     {
+      if (TARGET_SYNC_HI_QI)
+	{
+	  val = convert_modes (SImode, mode, val, 1);
+
+	  /* Prepare to adjust the return value.  */
+	  before = gen_reg_rtx (SImode);
+	  if (after)
+	    after = gen_reg_rtx (SImode);
+	  mode = SImode;
+	}
+      else
+	{
       mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
 
       /* Shift and mask VAL into position with the word.  */
@@ -17151,7 +20188,8 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
       before = gen_reg_rtx (SImode);
       if (after)
 	after = gen_reg_rtx (SImode);
-      mode = SImode;
+	  store_mode = mode = SImode;
+	}
     }
 
   mem = rs6000_pre_atomic_barrier (mem, model);
@@ -17184,9 +20222,11 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
       x = rs6000_mask_atomic_subword (before, x, mask);
     }
+  else if (store_mode != mode)
+    x = convert_modes (store_mode, mode, x, 1);
 
   cond = gen_reg_rtx (CCmode);
-  emit_store_conditional (mode, cond, mem, x);
+  emit_store_conditional (store_mode, cond, mem, x);
 
   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
   emit_unlikely_jump (x, label);
@@ -17195,11 +20235,22 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
 
   if (shift)
     {
+      /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
+	 then do the calcuations in a SImode register.  */
       if (orig_before)
 	rs6000_finish_atomic_subword (orig_before, before, shift);
       if (orig_after)
 	rs6000_finish_atomic_subword (orig_after, after, shift);
     }
+  else if (store_mode != mode)
+    {
+      /* QImode/HImode on machines with lbarx/lharx where we do the native
+	 operation and then do the calcuations in a SImode register.  */
+      if (orig_before)
+	convert_move (orig_before, before, 1);
+      if (orig_after)
+	convert_move (orig_after, after, 1);
+    }
   else if (orig_after && after != orig_after)
     emit_move_insn (orig_after, after);
 }
@@ -17239,6 +20290,39 @@ rs6000_split_multireg_move (rtx dst, rtx src)
 
   gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
 
+  /* TDmode residing in FP registers is special, since the ISA requires that
+     the lower-numbered word of a register pair is always the most significant
+     word, even in little-endian mode.  This does not match the usual subreg
+     semantics, so we cannnot use simplify_gen_subreg in those cases.  Access
+     the appropriate constituent registers "by hand" in little-endian mode.
+
+     Note we do not need to check for destructive overlap here since TDmode
+     can only reside in even/odd register pairs.  */
+  if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
+    {
+      rtx p_src, p_dst;
+      int i;
+
+      for (i = 0; i < nregs; i++)
+	{
+	  if (REG_P (src) && FP_REGNO_P (REGNO (src)))
+	    p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
+	  else
+	    p_src = simplify_gen_subreg (reg_mode, src, mode,
+					 i * reg_mode_size);
+
+	  if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
+	    p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
+	  else
+	    p_dst = simplify_gen_subreg (reg_mode, dst, mode,
+					 i * reg_mode_size);
+
+	  emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
+	}
+
+      return;
+    }
+
   if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
     {
       /* Move register range backwards, if we might have destructive
@@ -17693,7 +20777,7 @@ rs6000_savres_strategy (rs6000_stack_t *info,
     }
   else
     {
-      gcc_checking_assert (DEFAULT_ABI == ABI_AIX);
+      gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
       if (info->first_fp_reg_save > 61)
 	strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
       strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
@@ -17704,7 +20788,8 @@ rs6000_savres_strategy (rs6000_stack_t *info,
      by the static chain.  It would require too much fiddling and the
      static chain is rarely used anyway.  FPRs are saved w.r.t the stack
      pointer on Darwin, and AIX uses r1 or r12.  */
-  if (using_static_chain_p && DEFAULT_ABI != ABI_AIX)
+  if (using_static_chain_p
+      && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
     strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
 		 | SAVE_INLINE_GPRS
 		 | SAVE_INLINE_VRS | REST_INLINE_VRS);
@@ -17837,6 +20922,34 @@ rs6000_savres_strategy (rs6000_stack_t *info,
    The required alignment for AIX configurations is two words (i.e., 8
    or 16 bytes).
 
+   The ELFv2 ABI is a variant of the AIX ABI.  Stack frames look like:
+
+	SP---->	+---------------------------------------+
+		| Back chain to caller			|  0
+		+---------------------------------------+
+		| Save area for CR			|  8
+		+---------------------------------------+
+		| Saved LR				|  16
+		+---------------------------------------+
+		| Saved TOC pointer			|  24
+		+---------------------------------------+
+		| Parameter save area (P)		|  32
+		+---------------------------------------+
+		| Alloca space (A)			|  32+P
+		+---------------------------------------+
+		| Local variable space (L)		|  32+P+A
+		+---------------------------------------+
+		| Save area for AltiVec registers (W)	|  32+P+A+L
+		+---------------------------------------+
+		| AltiVec alignment padding (Y)		|  32+P+A+L+W
+		+---------------------------------------+
+		| Save area for GP registers (G)	|  32+P+A+L+W+Y
+		+---------------------------------------+
+		| Save area for FP registers (F)	|  32+P+A+L+W+Y+G
+		+---------------------------------------+
+	old SP->| back chain to caller's caller		|  32+P+A+L+W+Y+G+F
+		+---------------------------------------+
+
 
    V.4 stack frames look like:
 
@@ -17897,6 +21010,7 @@ rs6000_stack_info (void)
   rs6000_stack_t *info_ptr = &stack_info;
   int reg_size = TARGET_32BIT ? 4 : 8;
   int ehrd_size;
+  int ehcr_size;
   int save_align;
   int first_gp;
   HOST_WIDE_INT non_fixed_size;
@@ -17990,6 +21104,18 @@ rs6000_stack_info (void)
   else
     ehrd_size = 0;
 
+  /* In the ELFv2 ABI, we also need to allocate space for separate
+     CR field save areas if the function calls __builtin_eh_return.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      /* This hard-codes that we have three call-saved CR fields.  */
+      ehcr_size = 3 * reg_size;
+      /* We do *not* use the regular CR save mechanism.  */
+      info_ptr->cr_save_p = 0;
+    }
+  else
+    ehcr_size = 0;
+
   /* Determine various sizes.  */
   info_ptr->reg_size     = reg_size;
   info_ptr->fixed_size   = RS6000_SAVE_AREA;
@@ -18029,6 +21155,7 @@ rs6000_stack_info (void)
       gcc_unreachable ();
 
     case ABI_AIX:
+    case ABI_ELFv2:
     case ABI_DARWIN:
       info_ptr->fp_save_offset   = - info_ptr->fp_size;
       info_ptr->gp_save_offset   = info_ptr->fp_save_offset - info_ptr->gp_size;
@@ -18058,6 +21185,8 @@ rs6000_stack_info (void)
 	}
       else
 	info_ptr->ehrd_offset      = info_ptr->gp_save_offset - ehrd_size;
+
+      info_ptr->ehcr_offset      = info_ptr->ehrd_offset - ehcr_size;
       info_ptr->cr_save_offset   = reg_size; /* first word when 64-bit.  */
       info_ptr->lr_save_offset   = 2*reg_size;
       break;
@@ -18120,6 +21249,7 @@ rs6000_stack_info (void)
 					 + info_ptr->spe_gp_size
 					 + info_ptr->spe_padding_size
 					 + ehrd_size
+					 + ehcr_size
 					 + info_ptr->cr_size
 					 + info_ptr->vrsave_size,
 					 save_align);
@@ -18133,7 +21263,7 @@ rs6000_stack_info (void)
 
   /* Determine if we need to save the link register.  */
   if (info_ptr->calls_p
-      || (DEFAULT_ABI == ABI_AIX
+      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 	  && crtl->profile
 	  && !TARGET_PROFILE_KERNEL)
       || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
@@ -18279,6 +21409,7 @@ debug_stack_info (rs6000_stack_t *info)
     default:		 abi_string = "Unknown";	break;
     case ABI_NONE:	 abi_string = "NONE";		break;
     case ABI_AIX:	 abi_string = "AIX";		break;
+    case ABI_ELFv2:	 abi_string = "ELFv2";		break;
     case ABI_DARWIN:	 abi_string = "Darwin";		break;
     case ABI_V4:	 abi_string = "V.4";		break;
     }
@@ -18400,7 +21531,8 @@ rs6000_return_addr (int count, rtx frame)
   /* Currently we don't optimize very well between prolog and body
      code and for PIC code the code can be actually quite bad, so
      don't try to be too clever here.  */
-  if (count != 0 || (DEFAULT_ABI != ABI_AIX && flag_pic))
+  if (count != 0
+      || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
     {
       cfun->machine->ra_needs_full_frame = 1;
 
@@ -18459,13 +21591,13 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp)
 	return false;
     }
 
-  /* Under the AIX ABI we can't allow calls to non-local functions,
-     because the callee may have a different TOC pointer to the
-     caller and there's no way to ensure we restore the TOC when we
-     return.  With the secure-plt SYSV ABI we can't make non-local
+  /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
+     functions, because the callee may have a different TOC pointer to
+     the caller and there's no way to ensure we restore the TOC when
+     we return.  With the secure-plt SYSV ABI we can't make non-local
      calls when -fpic/PIC because the plt call stubs use r30.  */
   if (DEFAULT_ABI == ABI_DARWIN
-      || (DEFAULT_ABI == ABI_AIX
+      || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 	  && decl
 	  && !DECL_EXTERNAL (decl)
 	  && (*targetm.binds_local_p) (decl))
@@ -18566,7 +21698,7 @@ rs6000_emit_load_toc_table (int fromprolog)
   rtx dest;
   dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
 
-  if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic)
+  if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
     {
       char buf[30];
       rtx lab, tmp1, tmp2, got;
@@ -18594,7 +21726,7 @@ rs6000_emit_load_toc_table (int fromprolog)
       emit_insn (gen_load_toc_v4_pic_si ());
       emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
     }
-  else if (TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2)
+  else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
     {
       char buf[30];
       rtx temp0 = (fromprolog
@@ -18642,7 +21774,7 @@ rs6000_emit_load_toc_table (int fromprolog)
     }
   else
     {
-      gcc_assert (DEFAULT_ABI == ABI_AIX);
+      gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
 
       if (TARGET_32BIT)
 	emit_insn (gen_load_toc_aix_si (dest));
@@ -19047,7 +22179,7 @@ output_probe_stack_range (rtx reg1, rtx reg2)
 
 static rtx
 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
-		      rtx reg2, rtx rreg)
+		      rtx reg2, rtx rreg, rtx split_reg)
 {
   rtx real, temp;
 
@@ -19138,6 +22270,11 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
 	  }
     }
 
+  /* If a store insn has been split into multiple insns, the
+     true source register is given by split_reg.  */
+  if (split_reg != NULL_RTX)
+    real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
+
   RTX_FRAME_RELATED_P (insn) = 1;
   add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
 
@@ -19245,7 +22382,7 @@ emit_frame_save (rtx frame_reg, enum machine_mode mode,
   reg = gen_rtx_REG (mode, regno);
   insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
   return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
-			       NULL_RTX, NULL_RTX);
+			       NULL_RTX, NULL_RTX, NULL_RTX);
 }
 
 /* Emit an offset memory reference suitable for a frame store, while
@@ -19361,7 +22498,7 @@ rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
       if ((sel & SAVRES_LR))
 	suffix = "_x";
     }
-  else if (DEFAULT_ABI == ABI_AIX)
+  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     {
 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
       /* No out-of-line save/restore routines for GPRs on AIX.  */
@@ -19502,7 +22639,7 @@ rs6000_emit_stack_reset (rs6000_stack_t *info,
 static inline unsigned
 ptr_regno_for_savres (int sel)
 {
-  if (DEFAULT_ABI == ABI_AIX)
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
   return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
 }
@@ -19587,6 +22724,43 @@ rs6000_emit_savres_rtx (rs6000_stack_t *info,
   return insn;
 }
 
+/* Emit code to store CR fields that need to be saved into REG.  */
+
+static void
+rs6000_emit_move_from_cr (rtx reg)
+{
+  /* Only the ELFv2 ABI allows storing only selected fields.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
+    {
+      int i, cr_reg[8], count = 0;
+
+      /* Collect CR fields that must be saved.  */
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  cr_reg[count++] = i;
+
+      /* If it's just a single one, use mfcrf.  */
+      if (count == 1)
+	{
+	  rtvec p = rtvec_alloc (1);
+	  rtvec r = rtvec_alloc (2);
+	  RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
+	  RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
+	  RTVEC_ELT (p, 0)
+	    = gen_rtx_SET (VOIDmode, reg,
+			   gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
+
+	  emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+	  return;
+	}
+
+      /* ??? It might be better to handle count == 2 / 3 cases here
+	 as well, using logical operations to combine the values.  */
+    }
+
+  emit_insn (gen_movesi_from_cr (reg));
+}
+
 /* Determine whether the gp REG is really used.  */
 
 static bool
@@ -19652,6 +22826,17 @@ rs6000_emit_prologue (void)
 #define NOT_INUSE(R) do {} while (0)
 #endif
 
+  if (DEFAULT_ABI == ABI_ELFv2)
+    {
+      cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
+
+      /* With -mminimal-toc we may generate an extra use of r2 below.  */
+      if (!TARGET_SINGLE_PIC_BASE
+	  && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
+	cfun->machine->r2_setup_needed = true;
+    }
+
+
   if (flag_stack_usage_info)
     current_function_static_stack_size = info->total_size;
 
@@ -19766,7 +22951,7 @@ rs6000_emit_prologue (void)
 
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    treg, GEN_INT (-info->total_size));
+			    treg, GEN_INT (-info->total_size), NULL_RTX);
       sp_off = frame_off = info->total_size;
     }
 
@@ -19851,14 +23036,14 @@ rs6000_emit_prologue (void)
 
 	  insn = emit_move_insn (mem, reg);
 	  rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX, NULL_RTX);
 	  END_USE (0);
 	}
     }
 
   /* If we need to save CR, put it into r12 or r11.  Choose r12 except when
      r12 will be needed by out-of-line gpr restore.  */
-  cr_save_regno = (DEFAULT_ABI == ABI_AIX
+  cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
 		   && !(strategy & (SAVE_INLINE_GPRS
 				    | SAVE_NOINLINE_GPRS_SAVES_LR))
 		   ? 11 : 12);
@@ -19867,21 +23052,9 @@ rs6000_emit_prologue (void)
       && REGNO (frame_reg_rtx) != cr_save_regno
       && !(using_static_chain_p && cr_save_regno == 11))
     {
-      rtx set;
-
       cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
       START_USE (cr_save_regno);
-      insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
-      RTX_FRAME_RELATED_P (insn) = 1;
-      /* Now, there's no way that dwarf2out_frame_debug_expr is going
-	 to understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)'.
-	 But that's OK.  All we have to do is specify that _one_ condition
-	 code register is saved in this stack slot.  The thrower's epilogue
-	 will then restore all the call-saved registers.
-	 We use CR2_REGNO (70) to be compatible with gcc-2.95 on Linux.  */
-      set = gen_rtx_SET (VOIDmode, cr_save_rtx,
-			 gen_rtx_REG (SImode, CR2_REGNO));
-      add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+      rs6000_emit_move_from_cr (cr_save_rtx);
     }
 
   /* Do any required saving of fpr's.  If only one or two to save, do
@@ -19919,7 +23092,7 @@ rs6000_emit_prologue (void)
 				     info->lr_save_offset,
 				     DFmode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off,
-			    NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -19998,7 +23171,7 @@ rs6000_emit_prologue (void)
 					 SAVRES_SAVE | SAVRES_GPR);
 
 	  rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
-				NULL_RTX, NULL_RTX);
+				NULL_RTX, NULL_RTX, NULL_RTX);
 	}
 
       /* Move the static chain pointer back.  */
@@ -20048,7 +23221,7 @@ rs6000_emit_prologue (void)
 				     info->lr_save_offset + ptr_off,
 				     reg_mode, sel);
       rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX, NULL_RTX);
       if (lr)
 	END_USE (0);
     }
@@ -20064,7 +23237,7 @@ rs6000_emit_prologue (void)
 			     info->gp_save_offset + frame_off + reg_size * i);
       insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX, NULL_RTX);
     }
   else if (!WORLD_SAVE_P (info))
     {
@@ -20133,7 +23306,8 @@ rs6000_emit_prologue (void)
 	 be updated if we arrived at this function via a plt call or
 	 toc adjusting stub.  */
       emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
-      toc_restore_insn = TARGET_32BIT ? 0x80410014 : 0xE8410028;
+      toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
+			  + RS6000_TOC_SAVE_SLOT);
       hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
       emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
       compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
@@ -20152,7 +23326,7 @@ rs6000_emit_prologue (void)
       LABEL_NUSES (toc_save_done) += 1;
 
       save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
-				   TOC_REGNUM, frame_off + 5 * reg_size,
+				   TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
 				   sp_off - frame_off);
 
       emit_label (toc_save_done);
@@ -20192,26 +23366,121 @@ rs6000_emit_prologue (void)
       rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
 			       GEN_INT (info->cr_save_offset + frame_off));
       rtx mem = gen_frame_mem (SImode, addr);
-      /* See the large comment above about why CR2_REGNO is used.  */
-      rtx magic_eh_cr_reg = gen_rtx_REG (SImode, CR2_REGNO);
 
       /* If we didn't copy cr before, do so now using r0.  */
       if (cr_save_rtx == NULL_RTX)
 	{
-	  rtx set;
-
 	  START_USE (0);
 	  cr_save_rtx = gen_rtx_REG (SImode, 0);
-	  insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
+	  rs6000_emit_move_from_cr (cr_save_rtx);
+	}
+
+      /* Saving CR requires a two-instruction sequence: one instruction
+	 to move the CR to a general-purpose register, and a second
+	 instruction that stores the GPR to memory.
+
+	 We do not emit any DWARF CFI records for the first of these,
+	 because we cannot properly represent the fact that CR is saved in
+	 a register.  One reason is that we cannot express that multiple
+	 CR fields are saved; another reason is that on 64-bit, the size
+	 of the CR register in DWARF (4 bytes) differs from the size of
+	 a general-purpose register.
+
+	 This means if any intervening instruction were to clobber one of
+	 the call-saved CR fields, we'd have incorrect CFI.  To prevent
+	 this from happening, we mark the store to memory as a use of
+	 those CR fields, which prevents any such instruction from being
+	 scheduled in between the two instructions.  */
+      rtx crsave_v[9];
+      int n_crsave = 0;
+      int i;
+
+      crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  crsave_v[n_crsave++]
+	    = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
+
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+					  gen_rtvec_v (n_crsave, crsave_v)));
+      END_USE (REGNO (cr_save_rtx));
+
+      /* Now, there's no way that dwarf2out_frame_debug_expr is going to
+	 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
+	 so we need to construct a frame expression manually.  */
 	  RTX_FRAME_RELATED_P (insn) = 1;
-	  set = gen_rtx_SET (VOIDmode, cr_save_rtx, magic_eh_cr_reg);
+
+      /* Update address to be stack-pointer relative, like
+	 rs6000_frame_related would do.  */
+      addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
+			   GEN_INT (info->cr_save_offset + sp_off));
+      mem = gen_frame_mem (SImode, addr);
+
+      if (DEFAULT_ABI == ABI_ELFv2)
+	{
+	  /* In the ELFv2 ABI we generate separate CFI records for each
+	     CR field that was actually saved.  They all point to the
+	     same 32-bit stack slot.  */
+	  rtx crframe[8];
+	  int n_crframe = 0;
+
+	  for (i = 0; i < 8; i++)
+	    if (save_reg_p (CR0_REGNO + i))
+	      {
+		crframe[n_crframe]
+		  = gen_rtx_SET (VOIDmode, mem,
+				 gen_rtx_REG (SImode, CR0_REGNO + i));
+
+		RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
+		n_crframe++;
+	     }
+
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_PARALLEL (VOIDmode,
+					  gen_rtvec_v (n_crframe, crframe)));
+	}
+      else
+	{
+	  /* In other ABIs, by convention, we use a single CR regnum to
+	     represent the fact that all call-saved CR fields are saved.
+	     We use CR2_REGNO to be compatible with gcc-2.95 on Linux.  */
+	  rtx set = gen_rtx_SET (VOIDmode, mem,
+				 gen_rtx_REG (SImode, CR2_REGNO));
 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
 	}
-      insn = emit_move_insn (mem, cr_save_rtx);
-      END_USE (REGNO (cr_save_rtx));
+    }
 
-      rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-			    NULL_RTX, NULL_RTX);
+  /* In the ELFv2 ABI we need to save all call-saved CR fields into
+     *separate* slots if the routine calls __builtin_eh_return, so
+     that they can be independently restored by the unwinder.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      int i, cr_off = info->ehcr_offset;
+      rtx crsave;
+
+      /* ??? We might get better performance by using multiple mfocrf
+	 instructions.  */
+      crsave = gen_rtx_REG (SImode, 0);
+      emit_insn (gen_movesi_from_cr (crsave));
+
+      for (i = 0; i < 8; i++)
+	if (!call_used_regs[CR0_REGNO + i])
+	  {
+	    rtvec p = rtvec_alloc (2);
+	    RTVEC_ELT (p, 0)
+	      = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
+	    RTVEC_ELT (p, 1)
+	      = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
+
+	    insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			  gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
+					   sp_reg_rtx, cr_off + sp_off));
+
+	    cr_off += reg_size;
+	  }
     }
 
   /* Update stack and set back pointer unless this is V.4,
@@ -20291,7 +23560,7 @@ rs6000_emit_prologue (void)
 				     info->altivec_save_offset + ptr_off,
 				     0, V4SImode, SAVRES_SAVE | SAVRES_VR);
       rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
-			    NULL_RTX, NULL_RTX);
+			    NULL_RTX, NULL_RTX, NULL_RTX);
       if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
 	{
 	  /* The oddity mentioned above clobbered our frame reg.  */
@@ -20307,7 +23576,7 @@ rs6000_emit_prologue (void)
       for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
 	if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
 	  {
-	    rtx areg, savereg, mem;
+	    rtx areg, savereg, mem, split_reg;
 	    int offset;
 
 	    offset = (info->altivec_save_offset + frame_off
@@ -20325,8 +23594,18 @@ rs6000_emit_prologue (void)
 
 	    insn = emit_move_insn (mem, savereg);
 
+	    /* When we split a VSX store into two insns, we need to make
+	       sure the DWARF info knows which register we are storing.
+	       Pass it in to be used on the appropriate note.  */
+	    if (!BYTES_BIG_ENDIAN
+		&& GET_CODE (PATTERN (insn)) == SET
+		&& GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
+	      split_reg = savereg;
+	    else
+	      split_reg = NULL_RTX;
+
 	    rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
-				  areg, GEN_INT (offset));
+				  areg, GEN_INT (offset), split_reg);
 	  }
     }
 
@@ -20350,7 +23629,8 @@ rs6000_emit_prologue (void)
 	 be using r12 as frame_reg_rtx and r11 as the static chain
 	 pointer for nested functions.  */
       save_regno = 12;
-      if (DEFAULT_ABI == ABI_AIX && !using_static_chain_p)
+      if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	  && !using_static_chain_p)
 	save_regno = 11;
       else if (REGNO (frame_reg_rtx) == 12)
 	{
@@ -20389,7 +23669,7 @@ rs6000_emit_prologue (void)
 	 can use register 0.  This allows us to use a plain 'blr' to return
 	 from the procedure more often.  */
       int save_LR_around_toc_setup = (TARGET_ELF
-				      && DEFAULT_ABI != ABI_AIX
+				      && DEFAULT_ABI == ABI_V4
 				      && flag_pic
 				      && ! info->lr_save_p
 				      && EDGE_COUNT (EXIT_BLOCK_PTR->preds) > 0);
@@ -20451,7 +23731,7 @@ rs6000_emit_prologue (void)
   if (rs6000_save_toc_in_prologue_p ())
     {
       rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
-      emit_insn (gen_frame_store (reg, sp_reg_rtx, 5 * reg_size));
+      emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
     }
 }
 
@@ -20492,6 +23772,49 @@ rs6000_output_function_prologue (FILE *file,
 	}
     }
 
+  /* ELFv2 ABI r2 setup code and local entry point.  This must follow
+     immediately after the global entry point label.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
+    {
+      const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+
+      fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
+      fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
+
+      fputs ("\t.localentry\t", file);
+      assemble_name (file, name);
+      fputs (",.-", file);
+      assemble_name (file, name);
+      fputs ("\n", file);
+    }
+
+  /* Output -mprofile-kernel code.  This needs to be done here instead of
+     in output_function_profile since it must go after the ELFv2 ABI
+     local entry point.  */
+  if (TARGET_PROFILE_KERNEL)
+    {
+      gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
+      gcc_assert (!TARGET_32BIT);
+
+      asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
+      asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
+
+      /* In the ELFv2 ABI we have no compiler stack word.  It must be
+	 the resposibility of _mcount to preserve the static chain
+	 register if required.  */
+      if (DEFAULT_ABI != ABI_ELFv2
+	  && cfun->static_chain_decl != NULL)
+	{
+	  asm_fprintf (file, "\tstd %s,24(%s)\n",
+		       reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+	  fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+	  asm_fprintf (file, "\tld %s,24(%s)\n",
+		       reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+	}
+      else
+	fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+    }
+
   rs6000_pic_labelno++;
 }
 
@@ -20544,6 +23867,7 @@ restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
 
   if (using_mfcr_multiple && count > 1)
     {
+      rtx insn;
       rtvec p;
       int ndx;
 
@@ -20561,16 +23885,43 @@ restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
 			   gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
 	    ndx++;
 	  }
-      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
       gcc_assert (ndx == count);
+
+      /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+	 CR field separately.  */
+      if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+	{
+	  for (i = 0; i < 8; i++)
+	    if (save_reg_p (CR0_REGNO + i))
+	      add_reg_note (insn, REG_CFA_RESTORE,
+			    gen_rtx_REG (SImode, CR0_REGNO + i));
+
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
     }
   else
     for (i = 0; i < 8; i++)
       if (save_reg_p (CR0_REGNO + i))
-	emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO + i),
-					reg));
+	{
+	  rtx insn = emit_insn (gen_movsi_to_cr_one
+				 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
 
-  if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
+	  /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+	     CR field separately, attached to the insn that in fact
+	     restores this particular CR field.  */
+	  if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+	    {
+	      add_reg_note (insn, REG_CFA_RESTORE,
+			    gen_rtx_REG (SImode, CR0_REGNO + i));
+
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+
+  /* For other ABIs, we just generate a single CFA_RESTORE for CR2.  */
+  if (!exit_func && DEFAULT_ABI != ABI_ELFv2
+      && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
     {
       rtx insn = get_last_insn ();
       rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
@@ -20611,10 +23962,22 @@ restore_saved_lr (int regno, bool exit_func)
 static rtx
 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
 {
-  if (info->cr_save_p)
+  if (DEFAULT_ABI == ABI_ELFv2)
+    {
+      int i;
+      for (i = 0; i < 8; i++)
+	if (save_reg_p (CR0_REGNO + i))
+	  {
+	    rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
+	    cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
+					   cfa_restores);
+	  }
+    }
+  else if (info->cr_save_p)
     cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
 				   gen_rtx_REG (SImode, CR2_REGNO),
 				   cfa_restores);
+
   if (info->lr_save_p)
     cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
 				   gen_rtx_REG (Pmode, LR_REGNO),
@@ -21112,6 +24475,35 @@ rs6000_emit_epilogue (int sibcall)
 	       || (!restoring_GPRs_inline
 		   && info->first_fp_reg_save == 64));
 
+  /* In the ELFv2 ABI we need to restore all call-saved CR fields from
+     *separate* slots if the routine calls __builtin_eh_return, so
+     that they can be independently restored by the unwinder.  */
+  if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+    {
+      int i, cr_off = info->ehcr_offset;
+
+      for (i = 0; i < 8; i++)
+	if (!call_used_regs[CR0_REGNO + i])
+	  {
+	    rtx reg = gen_rtx_REG (SImode, 0);
+	    emit_insn (gen_frame_load (reg, frame_reg_rtx,
+				       cr_off + frame_off));
+
+	    insn = emit_insn (gen_movsi_to_cr_one
+				(gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
+
+	    if (!exit_func && flag_shrink_wrap)
+	      {
+		add_reg_note (insn, REG_CFA_RESTORE,
+			      gen_rtx_REG (SImode, CR0_REGNO + i));
+
+		RTX_FRAME_RELATED_P (insn) = 1;
+	      }
+
+	    cr_off += reg_size;
+	  }
+    }
+
   /* Get the old lr if we saved it.  If we are restoring registers
      out-of-line, then the out-of-line routines can do this for us.  */
   if (restore_lr && restoring_GPRs_inline)
@@ -21155,7 +24547,7 @@ rs6000_emit_epilogue (int sibcall)
 	{
 	  rtx reg = gen_rtx_REG (reg_mode, 2);
 	  emit_insn (gen_frame_load (reg, frame_reg_rtx,
-				     frame_off + 5 * reg_size));
+				     frame_off + RS6000_TOC_SAVE_SLOT));
 	}
 
       for (i = 0; ; ++i)
@@ -21441,6 +24833,7 @@ rs6000_emit_epilogue (int sibcall)
       if (! restoring_FPRs_inline)
 	{
 	  int i;
+	  int reg;
 	  rtx sym;
 
 	  if (flag_shrink_wrap)
@@ -21449,10 +24842,9 @@ rs6000_emit_epilogue (int sibcall)
 	  sym = rs6000_savres_routine_sym (info,
 					   SAVRES_FPR | (lr ? SAVRES_LR : 0));
 	  RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
-	  RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode,
-					  gen_rtx_REG (Pmode,
-						       DEFAULT_ABI == ABI_AIX
-						       ? 1 : 11));
+	  reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
+	  RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
+
 	  for (i = 0; i < 64 - info->first_fp_reg_save; i++)
 	    {
 	      rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
@@ -21530,7 +24922,8 @@ rs6000_output_function_epilogue (FILE *file,
 
      System V.4 Powerpc's (and the embedded ABI derived from it) use a
      different traceback table.  */
-  if (DEFAULT_ABI == ABI_AIX && ! flag_inhibit_size_directive
+  if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+      && ! flag_inhibit_size_directive
       && rs6000_traceback != traceback_none && !cfun->is_thunk)
     {
       const char *fname = NULL;
@@ -21858,6 +25251,12 @@ rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
   SIBLING_CALL_P (insn) = 1;
   emit_barrier ();
 
+  /* Ensure we have a global entry point for the thunk.   ??? We could
+     avoid that if the target routine doesn't need a global entry point,
+     but we do not know whether this is the case at this point.  */
+  if (DEFAULT_ABI == ABI_ELFv2)
+    cfun->machine->r2_setup_needed = true;
+
   /* Run just enough of rest_of_compilation to get the insns emitted.
      There's not really enough bulk here to make other passes such as
      instruction scheduling worth while.  Note that use_thunk calls
@@ -22554,7 +25953,7 @@ output_profile_hook (int labelno ATTRIBUTE_UNUSED)
   if (TARGET_PROFILE_KERNEL)
     return;
 
-  if (DEFAULT_ABI == ABI_AIX)
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     {
 #ifndef NO_PROFILE_COUNTERS
 # define NO_PROFILE_COUNTERS 0
@@ -22698,29 +26097,9 @@ output_function_profiler (FILE *file, int labelno)
       break;
 
     case ABI_AIX:
+    case ABI_ELFv2:
     case ABI_DARWIN:
-      if (!TARGET_PROFILE_KERNEL)
-	{
 	  /* Don't do anything, done in output_profile_hook ().  */
-	}
-      else
-	{
-	  gcc_assert (!TARGET_32BIT);
-
-	  asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
-	  asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
-
-	  if (cfun->static_chain_decl != NULL)
-	    {
-	      asm_fprintf (file, "\tstd %s,24(%s)\n",
-			   reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
-	      fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
-	      asm_fprintf (file, "\tld %s,24(%s)\n",
-			   reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
-	    }
-	  else
-	    fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
-	}
       break;
     }
 }
@@ -22846,6 +26225,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
                  || rs6000_cpu_attr == CPU_POWER4
                  || rs6000_cpu_attr == CPU_POWER5
 		 || rs6000_cpu_attr == CPU_POWER7
+		 || rs6000_cpu_attr == CPU_POWER8
                  || rs6000_cpu_attr == CPU_CELL)
                 && recog_memoized (dep_insn)
                 && (INSN_CODE (dep_insn) >= 0))
@@ -23128,7 +26508,8 @@ is_microcoded_insn (rtx insn)
   if (rs6000_cpu_attr == CPU_CELL)
     return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
 
-  if (rs6000_sched_groups)
+  if (rs6000_sched_groups
+      && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
     {
       enum attr_type type = get_attr_type (insn);
       if (type == TYPE_LOAD_EXT_U
@@ -23153,7 +26534,8 @@ is_cracked_insn (rtx insn)
       || GET_CODE (PATTERN (insn)) == CLOBBER)
     return false;
 
-  if (rs6000_sched_groups)
+  if (rs6000_sched_groups
+      && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
     {
       enum attr_type type = get_attr_type (insn);
       if (type == TYPE_LOAD_U || type == TYPE_STORE_U
@@ -23432,6 +26814,8 @@ rs6000_issue_rate (void)
   case CPU_POWER6:
   case CPU_POWER7:
     return 5;
+  case CPU_POWER8:
+    return 7;
   default:
     return 1;
   }
@@ -24059,6 +27443,39 @@ insn_must_be_first_in_group (rtx insn)
           break;
         }
       break;
+    case PROCESSOR_POWER8:
+      type = get_attr_type (insn);
+
+      switch (type)
+        {
+        case TYPE_CR_LOGICAL:
+        case TYPE_DELAYED_CR:
+        case TYPE_MFCR:
+        case TYPE_MFCRF:
+        case TYPE_MTCR:
+        case TYPE_COMPARE:
+        case TYPE_DELAYED_COMPARE:
+        case TYPE_VAR_DELAYED_COMPARE:
+        case TYPE_IMUL_COMPARE:
+        case TYPE_LMUL_COMPARE:
+        case TYPE_SYNC:
+        case TYPE_ISYNC:
+        case TYPE_LOAD_L:
+        case TYPE_STORE_C:
+        case TYPE_LOAD_U:
+        case TYPE_LOAD_UX:
+        case TYPE_LOAD_EXT:
+        case TYPE_LOAD_EXT_U:
+        case TYPE_LOAD_EXT_UX:
+        case TYPE_STORE_UX:
+        case TYPE_VECSTORE:
+        case TYPE_MFJMPR:
+        case TYPE_MTJMPR:
+          return true;
+        default:
+          break;
+        }
+      break;
     default:
       break;
     }
@@ -24137,6 +27554,25 @@ insn_must_be_last_in_group (rtx insn)
         break;
     }
     break;
+  case PROCESSOR_POWER8:
+    type = get_attr_type (insn);
+
+    switch (type)
+      {
+      case TYPE_MFCR:
+      case TYPE_MTCR:
+      case TYPE_ISYNC:
+      case TYPE_SYNC:
+      case TYPE_LOAD_L:
+      case TYPE_STORE_C:
+      case TYPE_LOAD_EXT_U:
+      case TYPE_LOAD_EXT_UX:
+      case TYPE_STORE_UX:
+        return true;
+      default:
+        break;
+    }
+    break;
   default:
     break;
   }
@@ -24226,8 +27662,9 @@ force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
       if (can_issue_more && !is_branch_slot_insn (next_insn))
 	can_issue_more--;
 
-      /* Power6 and Power7 have special group ending nop. */
-      if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7)
+      /* Do we have a special group ending nop? */
+      if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
+	  || rs6000_cpu_attr == CPU_POWER8)
 	{
 	  nop = gen_group_ending_nop ();
 	  emit_insn_before (nop, next_insn);
@@ -24598,6 +28035,11 @@ rs6000_trampoline_size (void)
       ret = (TARGET_32BIT) ? 12 : 24;
       break;
 
+    case ABI_ELFv2:
+      gcc_assert (!TARGET_32BIT);
+      ret = 32;
+      break;
+
     case ABI_DARWIN:
     case ABI_V4:
       ret = (TARGET_32BIT) ? 40 : 48;
@@ -24653,6 +28095,7 @@ rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
       break;
 
     /* Under V.4/eabi/darwin, __trampoline_setup does the real work.  */
+    case ABI_ELFv2:
     case ABI_DARWIN:
     case ABI_V4:
       emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
@@ -24743,6 +28186,9 @@ rs6000_handle_altivec_attribute (tree *node,
       unsigned_p = TYPE_UNSIGNED (type);
       switch (mode)
 	{
+	case TImode:
+	  result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+	  break;
 	case DImode:
 	  result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
 	  break;
@@ -24947,7 +28393,7 @@ rs6000_ms_bitfield_layout_p (const_tree record_type)
 static void
 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
 {
-  if (DEFAULT_ABI == ABI_AIX
+  if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
       && TARGET_MINIMAL_TOC
       && !TARGET_RELOCATABLE)
     {
@@ -24968,7 +28414,8 @@ rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
       else
 	fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
     }
-  else if (DEFAULT_ABI == ABI_AIX && !TARGET_RELOCATABLE)
+  else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+	   && !TARGET_RELOCATABLE)
     fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
   else
     {
@@ -25518,7 +28965,7 @@ rs6000_elf_reloc_rw_mask (void)
 {
   if (flag_pic)
     return 3;
-  else if (DEFAULT_ABI == ABI_AIX)
+  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
     return 2;
   else
     return 0;
@@ -25594,7 +29041,7 @@ rs6000_elf_asm_out_destructor (rtx symbol, int priority)
 void
 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
 {
-  if (TARGET_64BIT)
+  if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
     {
       fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
       ASM_OUTPUT_LABEL (file, name);
@@ -25660,7 +29107,6 @@ rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
       fprintf (file, "%s:\n", desc_name);
       fprintf (file, "\t.long %s\n", orig_name);
       fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
-      if (DEFAULT_ABI == ABI_AIX)
 	fputs ("\t.long 0\n", file);
       fprintf (file, "\t.previous\n");
     }
@@ -25690,7 +29136,7 @@ rs6000_elf_file_end (void)
     }
 #endif
 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
-  if (TARGET_32BIT)
+  if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
     file_end_indicate_exec_stack ();
 #endif
 }
@@ -25829,10 +29275,23 @@ rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
 	   name, suffix[smclass], flags & SECTION_ENTSIZE);
 }
 
+#define IN_NAMED_SECTION(DECL) \
+  ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
+   && DECL_SECTION_NAME (DECL) != NULL_TREE)
+
 static section *
 rs6000_xcoff_select_section (tree decl, int reloc,
-			     unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+			     unsigned HOST_WIDE_INT align)
 {
+  /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
+     named section.  */
+  if (align > BIGGEST_ALIGNMENT)
+    {
+      resolve_unique_section (decl, reloc, true);
+      if (IN_NAMED_SECTION (decl))
+	return get_named_section (decl, NULL, reloc);
+    }
+
   if (decl_readonly_section (decl, reloc))
     {
       if (TREE_PUBLIC (decl))
@@ -25870,10 +29329,12 @@ rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
 {
   const char *name;
 
-  /* Use select_section for private and uninitialized data.  */
+  /* Use select_section for private data and uninitialized data with
+     alignment <= BIGGEST_ALIGNMENT.  */
   if (!TREE_PUBLIC (decl)
       || DECL_COMMON (decl)
-      || DECL_INITIAL (decl) == NULL_TREE
+      || (DECL_INITIAL (decl) == NULL_TREE
+	  && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
       || DECL_INITIAL (decl) == error_mark_node
       || (flag_zero_initialized_in_bss
 	  && initializer_zerop (DECL_INITIAL (decl))))
@@ -26430,7 +29891,8 @@ rs6000_register_move_cost (enum machine_mode mode,
       /* For those processors that have slow LR/CTR moves, make them more
          expensive than memory in order to bias spills to memory .*/
       else if ((rs6000_cpu == PROCESSOR_POWER6
-		|| rs6000_cpu == PROCESSOR_POWER7)
+		|| rs6000_cpu == PROCESSOR_POWER7
+		|| rs6000_cpu == PROCESSOR_POWER8)
 	       && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
         ret = 6 * hard_regno_nregs[0][mode];
 
@@ -26440,7 +29902,7 @@ rs6000_register_move_cost (enum machine_mode mode,
     }
 
   /* If we have VSX, we can easily move between FPR or Altivec registers.  */
-  else if (VECTOR_UNIT_VSX_P (mode)
+  else if (VECTOR_MEM_VSX_P (mode)
 	   && reg_classes_intersect_p (to, VSX_REGS)
 	   && reg_classes_intersect_p (from, VSX_REGS))
     ret = 2 * hard_regno_nregs[32][mode];
@@ -26481,7 +29943,8 @@ rs6000_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
 
   if (reg_classes_intersect_p (rclass, GENERAL_REGS))
     ret = 4 * hard_regno_nregs[0][mode];
-  else if (reg_classes_intersect_p (rclass, FLOAT_REGS))
+  else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
+	    || reg_classes_intersect_p (rclass, VSX_REGS)))
     ret = 4 * hard_regno_nregs[32][mode];
   else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
     ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
@@ -26643,54 +30106,26 @@ rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
   emit_insn (gen_rtx_SET (VOIDmode, dst, r));
 }
 
-/* Newton-Raphson approximation of floating point divide with just 2 passes
-   (either single precision floating point, or newer machines with higher
-   accuracy estimates).  Support both scalar and vector divide.  Assumes no
-   trapping math and finite arguments.  */
+/* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
+   add a reg_note saying that this was a division.  Support both scalar and
+   vector divide.  Assumes no trapping math and finite arguments.  */
 
-static void
-rs6000_emit_swdiv_high_precision (rtx dst, rtx n, rtx d)
+void
+rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
 {
   enum machine_mode mode = GET_MODE (dst);
-  rtx x0, e0, e1, y1, u0, v0;
-  enum insn_code code = optab_handler (smul_optab, mode);
-  insn_gen_fn gen_mul = GEN_FCN (code);
-  rtx one = rs6000_load_constant_and_splat (mode, dconst1);
-
-  gcc_assert (code != CODE_FOR_nothing);
-
-  /* x0 = 1./d estimate */
-  x0 = gen_reg_rtx (mode);
-  emit_insn (gen_rtx_SET (VOIDmode, x0,
-			  gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
-					  UNSPEC_FRES)));
-
-  e0 = gen_reg_rtx (mode);
-  rs6000_emit_nmsub (e0, d, x0, one);		/* e0 = 1. - (d * x0) */
-
-  e1 = gen_reg_rtx (mode);
-  rs6000_emit_madd (e1, e0, e0, e0);		/* e1 = (e0 * e0) + e0 */
-
-  y1 = gen_reg_rtx (mode);
-  rs6000_emit_madd (y1, e1, x0, x0);		/* y1 = (e1 * x0) + x0 */
-
-  u0 = gen_reg_rtx (mode);
-  emit_insn (gen_mul (u0, n, y1));		/* u0 = n * y1 */
-
-  v0 = gen_reg_rtx (mode);
-  rs6000_emit_nmsub (v0, d, u0, n);		/* v0 = n - (d * u0) */
-
-  rs6000_emit_madd (dst, v0, y1, u0);		/* dst = (v0 * y1) + u0 */
-}
+  rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
+  int i;
 
-/* Newton-Raphson approximation of floating point divide that has a low
-   precision estimate.  Assumes no trapping math and finite arguments.  */
+  /* Low precision estimates guarantee 5 bits of accuracy.  High
+     precision estimates guarantee 14 bits of accuracy.  SFmode
+     requires 23 bits of accuracy.  DFmode requires 52 bits of
+     accuracy.  Each pass at least doubles the accuracy, leading
+     to the following.  */
+  int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
+  if (mode == DFmode || mode == V2DFmode)
+    passes++;
 
-static void
-rs6000_emit_swdiv_low_precision (rtx dst, rtx n, rtx d)
-{
-  enum machine_mode mode = GET_MODE (dst);
-  rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
   enum insn_code code = optab_handler (smul_optab, mode);
   insn_gen_fn gen_mul = GEN_FCN (code);
 
@@ -26704,46 +30139,44 @@ rs6000_emit_swdiv_low_precision (rtx dst, rtx n, rtx d)
 			  gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
 					  UNSPEC_FRES)));
 
-  e0 = gen_reg_rtx (mode);
-  rs6000_emit_nmsub (e0, d, x0, one);		/* e0 = 1. - d * x0 */
-
-  y1 = gen_reg_rtx (mode);
-  rs6000_emit_madd (y1, e0, x0, x0);		/* y1 = x0 + e0 * x0 */
+  /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i).  */
+  if (passes > 1) {
 
-  e1 = gen_reg_rtx (mode);
-  emit_insn (gen_mul (e1, e0, e0));		/* e1 = e0 * e0 */
+    /* e0 = 1. - d * x0  */
+    e0 = gen_reg_rtx (mode);
+    rs6000_emit_nmsub (e0, d, x0, one);
 
-  y2 = gen_reg_rtx (mode);
-  rs6000_emit_madd (y2, e1, y1, y1);		/* y2 = y1 + e1 * y1 */
+    /* x1 = x0 + e0 * x0  */
+    x1 = gen_reg_rtx (mode);
+    rs6000_emit_madd (x1, e0, x0, x0);
 
-  e2 = gen_reg_rtx (mode);
-  emit_insn (gen_mul (e2, e1, e1));		/* e2 = e1 * e1 */
+    for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
+	 ++i, xprev = xnext, eprev = enext) {
 
-  y3 = gen_reg_rtx (mode);
-  rs6000_emit_madd (y3, e2, y2, y2);		/* y3 = y2 + e2 * y2 */
+      /* enext = eprev * eprev  */
+      enext = gen_reg_rtx (mode);
+      emit_insn (gen_mul (enext, eprev, eprev));
 
-  u0 = gen_reg_rtx (mode);
-  emit_insn (gen_mul (u0, n, y3));		/* u0 = n * y3 */
+      /* xnext = xprev + enext * xprev  */
+      xnext = gen_reg_rtx (mode);
+      rs6000_emit_madd (xnext, enext, xprev, xprev);
+    }
 
-  v0 = gen_reg_rtx (mode);
-  rs6000_emit_nmsub (v0, d, u0, n);		/* v0 = n - d * u0 */
+  } else
+    xprev = x0;
 
-  rs6000_emit_madd (dst, v0, y3, u0);		/* dst = u0 + v0 * y3 */
-}
+  /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i).  */
 
-/* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
-   add a reg_note saying that this was a division.  Support both scalar and
-   vector divide.  Assumes no trapping math and finite arguments.  */
+  /* u = n * xprev  */
+  u = gen_reg_rtx (mode);
+  emit_insn (gen_mul (u, n, xprev));
 
-void
-rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
-{
-  enum machine_mode mode = GET_MODE (dst);
+  /* v = n - (d * u)  */
+  v = gen_reg_rtx (mode);
+  rs6000_emit_nmsub (v, d, u, n);
 
-  if (RS6000_RECIP_HIGH_PRECISION_P (mode))
-    rs6000_emit_swdiv_high_precision (dst, n, d);
-  else
-    rs6000_emit_swdiv_low_precision (dst, n, d);
+  /* dst = (v * xprev) + u  */
+  rs6000_emit_madd (dst, v, xprev, u);
 
   if (note_p)
     add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
@@ -26758,7 +30191,16 @@ rs6000_emit_swrsqrt (rtx dst, rtx src)
   enum machine_mode mode = GET_MODE (src);
   rtx x0 = gen_reg_rtx (mode);
   rtx y = gen_reg_rtx (mode);
-  int passes = (TARGET_RECIP_PRECISION) ? 2 : 3;
+
+  /* Low precision estimates guarantee 5 bits of accuracy.  High
+     precision estimates guarantee 14 bits of accuracy.  SFmode
+     requires 23 bits of accuracy.  DFmode requires 52 bits of
+     accuracy.  Each pass at least doubles the accuracy, leading
+     to the following.  */
+  int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
+  if (mode == DFmode || mode == V2DFmode)
+    passes++;
+
   REAL_VALUE_TYPE dconst3_2;
   int i;
   rtx halfthree;
@@ -26920,6 +30362,136 @@ rs6000_emit_parity (rtx dst, rtx src)
     }
 }
 
+/* Expand an Altivec constant permutation for little endian mode.
+   There are two issues: First, the two input operands must be
+   swapped so that together they form a double-wide array in LE
+   order.  Second, the vperm instruction has surprising behavior
+   in LE mode:  it interprets the elements of the source vectors
+   in BE mode ("left to right") and interprets the elements of
+   the destination vector in LE mode ("right to left").  To
+   correct for this, we must subtract each element of the permute
+   control vector from 31.
+
+   For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
+   with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
+   We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
+   serve as the permute control vector.  Then, in BE mode,
+
+     vperm 9,10,11,12
+
+   places the desired result in vr9.  However, in LE mode the 
+   vector contents will be
+
+     vr10 = 00000003 00000002 00000001 00000000
+     vr11 = 00000007 00000006 00000005 00000004
+
+   The result of the vperm using the same permute control vector is
+
+     vr9  = 05000000 07000000 01000000 03000000
+
+   That is, the leftmost 4 bytes of vr10 are interpreted as the
+   source for the rightmost 4 bytes of vr9, and so on.
+
+   If we change the permute control vector to
+
+     vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
+
+   and issue
+
+     vperm 9,11,10,12
+
+   we get the desired
+
+   vr9  = 00000006 00000004 00000002 00000000.  */
+
+void
+altivec_expand_vec_perm_const_le (rtx operands[4])
+{
+  unsigned int i;
+  rtx perm[16];
+  rtx constv, unspec;
+  rtx target = operands[0];
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx sel = operands[3];
+
+  /* Unpack and adjust the constant selector.  */
+  for (i = 0; i < 16; ++i)
+    {
+      rtx e = XVECEXP (sel, 0, i);
+      unsigned int elt = 31 - (INTVAL (e) & 31);
+      perm[i] = GEN_INT (elt);
+    }
+
+  /* Expand to a permute, swapping the inputs and using the
+     adjusted selector.  */
+  if (!REG_P (op0))
+    op0 = force_reg (V16QImode, op0);
+  if (!REG_P (op1))
+    op1 = force_reg (V16QImode, op1);
+
+  constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+  constv = force_reg (V16QImode, constv);
+  unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
+			   UNSPEC_VPERM);
+  if (!REG_P (target))
+    {
+      rtx tmp = gen_reg_rtx (V16QImode);
+      emit_move_insn (tmp, unspec);
+      unspec = tmp;
+    }
+
+  emit_move_insn (target, unspec);
+}
+
+/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
+   permute control vector.  But here it's not a constant, so we must
+   generate a vector NAND or NOR to do the adjustment.  */
+
+void
+altivec_expand_vec_perm_le (rtx operands[4])
+{
+  rtx notx, iorx, unspec;
+  rtx target = operands[0];
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx sel = operands[3];
+  rtx tmp = target;
+  rtx norreg = gen_reg_rtx (V16QImode);
+  enum machine_mode mode = GET_MODE (target);
+
+  /* Get everything in regs so the pattern matches.  */
+  if (!REG_P (op0))
+    op0 = force_reg (mode, op0);
+  if (!REG_P (op1))
+    op1 = force_reg (mode, op1);
+  if (!REG_P (sel))
+    sel = force_reg (V16QImode, sel);
+  if (!REG_P (target))
+    tmp = gen_reg_rtx (mode);
+
+  /* Invert the selector with a VNAND if available, else a VNOR.
+     The VNAND is preferred for future fusion opportunities.  */
+  notx = gen_rtx_NOT (V16QImode, sel);
+  iorx = (TARGET_P8_VECTOR
+	  ? gen_rtx_IOR (V16QImode, notx, notx)
+	  : gen_rtx_AND (V16QImode, notx, notx));
+  emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
+
+  /* Permute with operands reversed and adjusted selector.  */
+  unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
+			   UNSPEC_VPERM);
+
+  /* Copy into target, possibly by way of a register.  */
+  if (!REG_P (target))
+    {
+      emit_move_insn (tmp, unspec);
+      unspec = tmp;
+    }
+
+  emit_move_insn (target, unspec);
+}
+
 /* Expand an Altivec constant permutation.  Return true if we match
    an efficient implementation; false to fall back to VPERM.  */
 
@@ -26927,26 +30499,43 @@ bool
 altivec_expand_vec_perm_const (rtx operands[4])
 {
   struct altivec_perm_insn {
+    HOST_WIDE_INT mask;
     enum insn_code impl;
     unsigned char perm[16];
   };
   static const struct altivec_perm_insn patterns[] = {
-    { CODE_FOR_altivec_vpkuhum,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
       {  1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
-    { CODE_FOR_altivec_vpkuwum,
+    { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
       {  2,  3,  6,  7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
-    { CODE_FOR_altivec_vmrghb,
+    { OPTION_MASK_ALTIVEC, 
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
+       : CODE_FOR_altivec_vmrglb_direct),
       {  0, 16,  1, 17,  2, 18,  3, 19,  4, 20,  5, 21,  6, 22,  7, 23 } },
-    { CODE_FOR_altivec_vmrghh,
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
+       : CODE_FOR_altivec_vmrglh_direct),
       {  0,  1, 16, 17,  2,  3, 18, 19,  4,  5, 20, 21,  6,  7, 22, 23 } },
-    { CODE_FOR_altivec_vmrghw,
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
+       : CODE_FOR_altivec_vmrglw_direct),
       {  0,  1,  2,  3, 16, 17, 18, 19,  4,  5,  6,  7, 20, 21, 22, 23 } },
-    { CODE_FOR_altivec_vmrglb,
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
+       : CODE_FOR_altivec_vmrghb_direct),
       {  8, 24,  9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
-    { CODE_FOR_altivec_vmrglh,
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
+       : CODE_FOR_altivec_vmrghh_direct),
       {  8,  9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
-    { CODE_FOR_altivec_vmrglw,
-      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+    { OPTION_MASK_ALTIVEC,
+      (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
+       : CODE_FOR_altivec_vmrghw_direct),
+      {  8,  9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
+      {  0,  1,  2,  3, 16, 17, 18, 19,  8,  9, 10, 11, 24, 25, 26, 27 } },
+    { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
+      {  4,  5,  6,  7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
   };
 
   unsigned int i, j, elt, which;
@@ -27003,7 +30592,9 @@ altivec_expand_vec_perm_const (rtx operands[4])
 	  break;
       if (i == 16)
 	{
-	  emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt)));
+          if (!BYTES_BIG_ENDIAN)
+            elt = 15 - elt;
+	  emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
 	  return true;
 	}
 
@@ -27014,9 +30605,10 @@ altivec_expand_vec_perm_const (rtx operands[4])
 	      break;
 	  if (i == 16)
 	    {
+	      int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
 	      x = gen_reg_rtx (V8HImode);
-	      emit_insn (gen_altivec_vsplth (x, gen_lowpart (V8HImode, op0),
-					     GEN_INT (elt / 2)));
+	      emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
+						    GEN_INT (field)));
 	      emit_move_insn (target, gen_lowpart (V16QImode, x));
 	      return true;
 	    }
@@ -27032,9 +30624,10 @@ altivec_expand_vec_perm_const (rtx operands[4])
 	      break;
 	  if (i == 16)
 	    {
+	      int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
 	      x = gen_reg_rtx (V4SImode);
-	      emit_insn (gen_altivec_vspltw (x, gen_lowpart (V4SImode, op0),
-					     GEN_INT (elt / 4)));
+	      emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
+						    GEN_INT (field)));
 	      emit_move_insn (target, gen_lowpart (V16QImode, x));
 	      return true;
 	    }
@@ -27046,6 +30639,9 @@ altivec_expand_vec_perm_const (rtx operands[4])
     {
       bool swapped;
 
+      if ((patterns[j].mask & rs6000_isa_flags) == 0)
+	continue;
+
       elt = patterns[j].perm[0];
       if (perm[0] == elt)
 	swapped = false;
@@ -27069,7 +30665,30 @@ altivec_expand_vec_perm_const (rtx operands[4])
 	  enum machine_mode omode = insn_data[icode].operand[0].mode;
 	  enum machine_mode imode = insn_data[icode].operand[1].mode;
 
-	  if (swapped)
+	  /* For little-endian, don't use vpkuwum and vpkuhum if the
+	     underlying vector type is not V4SI and V8HI, respectively.
+	     For example, using vpkuwum with a V8HI picks up the even
+	     halfwords (BE numbering) when the even halfwords (LE
+	     numbering) are what we need.  */
+	  if (!BYTES_BIG_ENDIAN
+	      && icode == CODE_FOR_altivec_vpkuwum_direct
+	      && ((GET_CODE (op0) == REG
+		   && GET_MODE (op0) != V4SImode)
+		  || (GET_CODE (op0) == SUBREG
+		      && GET_MODE (XEXP (op0, 0)) != V4SImode)))
+	    continue;
+	  if (!BYTES_BIG_ENDIAN
+	      && icode == CODE_FOR_altivec_vpkuhum_direct
+	      && ((GET_CODE (op0) == REG
+		   && GET_MODE (op0) != V8HImode)
+		  || (GET_CODE (op0) == SUBREG
+		      && GET_MODE (XEXP (op0, 0)) != V8HImode)))
+	    continue;
+
+          /* For little-endian, the two input operands must be swapped
+             (or swapped back) to ensure proper right-to-left numbering
+             from 0 to 2N-1.  */
+	  if (swapped ^ !BYTES_BIG_ENDIAN)
 	    x = op0, op0 = op1, op1 = x;
 	  if (imode != V16QImode)
 	    {
@@ -27087,6 +30706,12 @@ altivec_expand_vec_perm_const (rtx operands[4])
 	}
     }
 
+  if (!BYTES_BIG_ENDIAN)
+    {
+      altivec_expand_vec_perm_const_le (operands);
+      return true;
+    }
+
   return false;
 }
 
@@ -27135,7 +30760,6 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
       vmode = GET_MODE (target);
       gcc_assert (GET_MODE_NUNITS (vmode) == 2);
       dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
-
       x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
       v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
       x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
@@ -27231,7 +30855,7 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
   unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
   rtx perm[16];
 
-  high = (highp == BYTES_BIG_ENDIAN ? 0 : nelt / 2);
+  high = (highp ? 0 : nelt / 2);
   for (i = 0; i < nelt / 2; i++)
     {
       perm[i * 2] = GEN_INT (i + high);
@@ -27286,6 +30910,8 @@ rs6000_function_value (const_tree valtype,
 {
   enum machine_mode mode;
   unsigned int regno;
+  enum machine_mode elt_mode;
+  int n_elts;
 
   /* Special handling for structs in darwin64.  */
   if (TARGET_MACHO 
@@ -27305,6 +30931,36 @@ rs6000_function_value (const_tree valtype,
       /* Otherwise fall through to standard ABI rules.  */
     }
 
+  /* The ELFv2 ABI returns homogeneous VFP aggregates in registers.  */
+  if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
+					     &elt_mode, &n_elts))
+    {
+      int first_reg, n_regs, i;
+      rtx par;
+
+      if (SCALAR_FLOAT_MODE_P (elt_mode))
+	{
+	  /* _Decimal128 must use even/odd register pairs.  */
+	  first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
+	  n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+	}
+      else
+	{
+	  first_reg = ALTIVEC_ARG_RETURN;
+	  n_regs = 1;
+	}
+
+      par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
+      for (i = 0; i < n_elts; i++)
+	{
+	  rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
+	  rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+	  XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+	}
+
+      return par;
+    }
+
   if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
     {
       /* Long long return value need be split in -mpowerpc64, 32bit ABI.  */
@@ -27417,6 +31073,13 @@ rs6000_libcall_value (enum machine_mode mode)
 }
 
 
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+rs6000_lra_p (void)
+{
+  return rs6000_lra_flag;
+}
+
 /* Given FROM and TO register numbers, say whether this elimination is allowed.
    Frame pointer elimination is automatically handled.
 
@@ -27679,22 +31342,33 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
 {
   { "altivec",			OPTION_MASK_ALTIVEC,		false, true  },
   { "cmpb",			OPTION_MASK_CMPB,		false, true  },
+  { "crypto",			OPTION_MASK_CRYPTO,		false, true  },
+  { "direct-move",		OPTION_MASK_DIRECT_MOVE,	false, true  },
   { "dlmzb",			OPTION_MASK_DLMZB,		false, true  },
   { "fprnd",			OPTION_MASK_FPRND,		false, true  },
   { "hard-dfp",			OPTION_MASK_DFP,		false, true  },
+  { "htm",			OPTION_MASK_HTM,		false, true  },
   { "isel",			OPTION_MASK_ISEL,		false, true  },
   { "mfcrf",			OPTION_MASK_MFCRF,		false, true  },
   { "mfpgpr",			OPTION_MASK_MFPGPR,		false, true  },
   { "mulhw",			OPTION_MASK_MULHW,		false, true  },
   { "multiple",			OPTION_MASK_MULTIPLE,		false, true  },
-  { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
   { "popcntb",			OPTION_MASK_POPCNTB,		false, true  },
   { "popcntd",			OPTION_MASK_POPCNTD,		false, true  },
+  { "power8-fusion",		OPTION_MASK_P8_FUSION,		false, true  },
+  { "power8-fusion-sign",	OPTION_MASK_P8_FUSION_SIGN,	false, true  },
+  { "power8-vector",		OPTION_MASK_P8_VECTOR,		false, true  },
   { "powerpc-gfxopt",		OPTION_MASK_PPC_GFXOPT,		false, true  },
   { "powerpc-gpopt",		OPTION_MASK_PPC_GPOPT,		false, true  },
+  { "quad-memory",		OPTION_MASK_QUAD_MEMORY,	false, true  },
+  { "quad-memory-atomic",	OPTION_MASK_QUAD_MEMORY_ATOMIC,	false, true  },
   { "recip-precision",		OPTION_MASK_RECIP_PRECISION,	false, true  },
   { "string",			OPTION_MASK_STRING,		false, true  },
+  { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
+  { "upper-regs-df",		OPTION_MASK_UPPER_REGS_DF,	false, false },
+  { "upper-regs-sf",		OPTION_MASK_UPPER_REGS_SF,	false, false },
   { "vsx",			OPTION_MASK_VSX,		false, true  },
+  { "vsx-timode",		OPTION_MASK_VSX_TIMODE,		false, true  },
 #ifdef OPTION_MASK_64BIT
 #if TARGET_AIX_OS
   { "aix64",			OPTION_MASK_64BIT,		false, false },
@@ -27734,6 +31408,11 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
   { "frsqrtes",		 RS6000_BTM_FRSQRTES,	false, false },
   { "popcntd",		 RS6000_BTM_POPCNTD,	false, false },
   { "cell",		 RS6000_BTM_CELL,	false, false },
+  { "power8-vector",	 RS6000_BTM_P8_VECTOR,	false, false },
+  { "crypto",		 RS6000_BTM_CRYPTO,	false, false },
+  { "htm",		 RS6000_BTM_HTM,	false, false },
+  { "hard-dfp",		 RS6000_BTM_DFP,	false, false },
+  { "hard-float",	 RS6000_BTM_HARD_FLOAT,	false, false },
 };
 
 /* Option variables that we want to support inside attribute((target)) and
@@ -28250,7 +31929,6 @@ rs6000_print_options_internal (FILE *file,
   size_t cur_column;
   size_t max_column = 76;
   const char *comma = "";
-  const char *nl = "\n";
 
   if (indent)
     start_column += fprintf (file, "%*s", indent, "");
@@ -28281,7 +31959,6 @@ rs6000_print_options_internal (FILE *file,
 	      fprintf (stderr, ", \\\n%*s", (int)start_column, "");
 	      cur_column = start_column + len;
 	      comma = "";
-	      nl = "\n\n";
 	    }
 
 	  fprintf (file, "%s%s%s%s", comma, prefix, no_str,
@@ -28291,7 +31968,7 @@ rs6000_print_options_internal (FILE *file,
 	}
     }
 
-  fputs (nl, file);
+  fputs ("\n", file);
 }
 
 /* Helper function to print the current isa options on a line.  */
@@ -28467,118 +32144,149 @@ rs6000_legitimate_constant_p (enum machine_mode mode, rtx x)
 }
 
 
-/* A function pointer under AIX is a pointer to a data area whose first word
-   contains the actual address of the function, whose second word contains a
-   pointer to its TOC, and whose third word contains a value to place in the
-   static chain register (r11).  Note that if we load the static chain, our
-   "trampoline" need not have any executable code.  */
+
+/* Expand code to perform a call under the AIX or ELFv2 ABI.  */
 
 void
-rs6000_call_indirect_aix (rtx value, rtx func_desc, rtx flag)
+rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
 {
+  rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
+  rtx toc_load = NULL_RTX;
+  rtx toc_restore = NULL_RTX;
   rtx func_addr;
-  rtx toc_reg;
-  rtx sc_reg;
-  rtx stack_ptr;
-  rtx stack_toc_offset;
-  rtx stack_toc_mem;
-  rtx func_toc_offset;
-  rtx func_toc_mem;
-  rtx func_sc_offset;
-  rtx func_sc_mem;
+  rtx abi_reg = NULL_RTX;
+  rtx call[4];
+  int n_call;
   rtx insn;
-  rtx (*call_func) (rtx, rtx, rtx, rtx);
-  rtx (*call_value_func) (rtx, rtx, rtx, rtx, rtx);
 
-  stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
-  toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
+  /* Handle longcall attributes.  */
+  if (INTVAL (cookie) & CALL_LONG)
+    func_desc = rs6000_longcall_ref (func_desc);
+
+  /* Handle indirect calls.  */
+  if (GET_CODE (func_desc) != SYMBOL_REF
+      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
+    {
+      /* Save the TOC into its reserved slot before the call,
+	 and prepare to restore it after the call.  */
+      rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+      rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
+      rtx stack_toc_mem = gen_frame_mem (Pmode,
+					 gen_rtx_PLUS (Pmode, stack_ptr,
+						       stack_toc_offset));
+      toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
+
+      /* Can we optimize saving the TOC in the prologue or
+	 do we need to do it at every call?  */
+      if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
+	cfun->machine->save_toc_in_prologue = true;
+      else
+	{
+	  MEM_VOLATILE_P (stack_toc_mem) = 1;
+	  emit_move_insn (stack_toc_mem, toc_reg);
+	}
+
+      if (DEFAULT_ABI == ABI_ELFv2)
+	{
+	  /* A function pointer in the ELFv2 ABI is just a plain address, but
+	     the ABI requires it to be loaded into r12 before the call.  */
+	  func_addr = gen_rtx_REG (Pmode, 12);
+	  emit_move_insn (func_addr, func_desc);
+	  abi_reg = func_addr;
+	}
+      else
+	{
+	  /* A function pointer under AIX is a pointer to a data area whose
+	     first word contains the actual address of the function, whose
+	     second word contains a pointer to its TOC, and whose third word
+	     contains a value to place in the static chain register (r11).
+	     Note that if we load the static chain, our "trampoline" need
+	     not have any executable code.  */
 
   /* Load up address of the actual function.  */
   func_desc = force_reg (Pmode, func_desc);
   func_addr = gen_reg_rtx (Pmode);
   emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
 
-  if (TARGET_32BIT)
-    {
-
-      stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_32BIT);
-      func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_32BIT);
-      func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_32BIT);
+	  /* Prepare to load the TOC of the called function.  Note that the
+	     TOC load must happen immediately before the actual call so
+	     that unwinding the TOC registers works correctly.  See the
+	     comment in frob_update_context.  */
+	  rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
+	  rtx func_toc_mem = gen_rtx_MEM (Pmode,
+					  gen_rtx_PLUS (Pmode, func_desc,
+							func_toc_offset));
+	  toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
+
+	  /* If we have a static chain, load it up.  */
       if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
 	{
-	  call_func = gen_call_indirect_aix32bit;
-	  call_value_func = gen_call_value_indirect_aix32bit;
-	}
-      else
-	{
-	  call_func = gen_call_indirect_aix32bit_nor11;
-	  call_value_func = gen_call_value_indirect_aix32bit_nor11;
+	      rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
+	      rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
+	      rtx func_sc_mem = gen_rtx_MEM (Pmode,
+					     gen_rtx_PLUS (Pmode, func_desc,
+							   func_sc_offset));
+	      emit_move_insn (sc_reg, func_sc_mem);
+	      abi_reg = sc_reg;
 	}
     }
-  else
-    {
-      stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_64BIT);
-      func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_64BIT);
-      func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_64BIT);
-      if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
-	{
-	  call_func = gen_call_indirect_aix64bit;
-	  call_value_func = gen_call_value_indirect_aix64bit;
 	}
       else
 	{
-	  call_func = gen_call_indirect_aix64bit_nor11;
-	  call_value_func = gen_call_value_indirect_aix64bit_nor11;
-	}
+      /* Direct calls use the TOC: for local calls, the callee will
+	 assume the TOC register is set; for non-local calls, the
+	 PLT stub needs the TOC register.  */
+      abi_reg = toc_reg;
+      func_addr = func_desc;
     }
 
-  /* Reserved spot to store the TOC.  */
-  stack_toc_mem = gen_frame_mem (Pmode,
-				 gen_rtx_PLUS (Pmode,
-					       stack_ptr,
-					       stack_toc_offset));
+  /* Create the call.  */
+  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
+  if (value != NULL_RTX)
+    call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
+  n_call = 1;
 
-  gcc_assert (cfun);
-  gcc_assert (cfun->machine);
+  if (toc_load)
+    call[n_call++] = toc_load;
+  if (toc_restore)
+    call[n_call++] = toc_restore;
 
-  /* Can we optimize saving the TOC in the prologue or do we need to do it at
-     every call?  */
-  if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
-    cfun->machine->save_toc_in_prologue = true;
+  call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
 
-  else
-    {
-      MEM_VOLATILE_P (stack_toc_mem) = 1;
-      emit_move_insn (stack_toc_mem, toc_reg);
-    }
+  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
+  insn = emit_call_insn (insn);
 
-  /* Calculate the address to load the TOC of the called function.  We don't
-     actually load this until the split after reload.  */
-  func_toc_mem = gen_rtx_MEM (Pmode,
-			      gen_rtx_PLUS (Pmode,
-					    func_desc,
-					    func_toc_offset));
+  /* Mention all registers defined by the ABI to hold information
+     as uses in CALL_INSN_FUNCTION_USAGE.  */
+  if (abi_reg)
+    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
+}
 
-  /* If we have a static chain, load it up.  */
-  if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
-    {
-      func_sc_mem = gen_rtx_MEM (Pmode,
-				 gen_rtx_PLUS (Pmode,
-					       func_desc,
-					       func_sc_offset));
+/* Expand code to perform a sibling call under the AIX or ELFv2 ABI.  */
 
-      sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
-      emit_move_insn (sc_reg, func_sc_mem);
-    }
+void
+rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
+{
+  rtx call[2];
+  rtx insn;
+
+  gcc_assert (INTVAL (cookie) == 0);
 
   /* Create the call.  */
-  if (value)
-    insn = call_value_func (value, func_addr, flag, func_toc_mem,
-			    stack_toc_mem);
-  else
-    insn = call_func (func_addr, flag, func_toc_mem, stack_toc_mem);
+  call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
+  if (value != NULL_RTX)
+    call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
 
-  emit_call_insn (insn);
+  call[1] = simple_return_rtx;
+
+  insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
+  insn = emit_call_insn (insn);
+
+  /* Note use of the TOC register.  */
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
+  /* We need to also mark a use of the link register since the function we
+     sibling-call to will use it to return to our caller.  */
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
 }
 
 /* Return whether we need to always update the saved TOC pointer when we update
@@ -28679,6 +32387,661 @@ rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
     add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
 }
 
+
+/* Helper function for rs6000_split_logical to emit a logical instruction after
+   spliting the operation to single GPR registers.
+
+   DEST is the destination register.
+   OP1 and OP2 are the input source registers.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+static void
+rs6000_split_logical_inner (rtx dest,
+			    rtx op1,
+			    rtx op2,
+			    enum rtx_code code,
+			    enum machine_mode mode,
+			    bool complement_final_p,
+			    bool complement_op1_p,
+			    bool complement_op2_p,
+			    rtx clobber_reg)
+{
+  rtx bool_rtx;
+  rtx set_rtx;
+
+  /* Optimize AND of 0/0xffffffff and IOR/XOR of 0.  */
+  if (op2 && GET_CODE (op2) == CONST_INT
+      && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
+      && !complement_final_p && !complement_op1_p && !complement_op2_p)
+    {
+      HOST_WIDE_INT mask = GET_MODE_MASK (mode);
+      HOST_WIDE_INT value = INTVAL (op2) & mask;
+
+      /* Optimize AND of 0 to just set 0.  Optimize AND of -1 to be a move.  */
+      if (code == AND)
+	{
+	  if (value == 0)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+	      return;
+	    }
+
+	  else if (value == mask)
+	    {
+	      if (!rtx_equal_p (dest, op1))
+		emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
+	      return;
+	    }
+	}
+
+      /* Optimize IOR/XOR of 0 to be a simple move.  Split large operations
+	 into separate ORI/ORIS or XORI/XORIS instrucitons.  */
+      else if (code == IOR || code == XOR)
+	{
+	  if (value == 0)
+	    {
+	      if (!rtx_equal_p (dest, op1))
+		emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
+	      return;
+	    }
+	}
+    }
+
+  if (complement_op1_p)
+    op1 = gen_rtx_NOT (mode, op1);
+
+  if (complement_op2_p)
+    op2 = gen_rtx_NOT (mode, op2);
+
+  bool_rtx = ((code == NOT)
+	      ? gen_rtx_NOT (mode, op1)
+	      : gen_rtx_fmt_ee (code, mode, op1, op2));
+
+  if (complement_final_p)
+    bool_rtx = gen_rtx_NOT (mode, bool_rtx);
+
+  set_rtx = gen_rtx_SET (VOIDmode, dest, bool_rtx);
+
+  /* Is this AND with an explicit clobber?  */
+  if (clobber_reg)
+    {
+      rtx clobber = gen_rtx_CLOBBER (VOIDmode, clobber_reg);
+      set_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_rtx, clobber));
+    }
+
+  emit_insn (set_rtx);
+  return;
+}
+
+/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system.  These
+   operations are split immediately during RTL generation to allow for more
+   optimizations of the AND/IOR/XOR.
+
+   OPERANDS is an array containing the destination and two input operands.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+static void
+rs6000_split_logical_di (rtx operands[3],
+			 enum rtx_code code,
+			 bool complement_final_p,
+			 bool complement_op1_p,
+			 bool complement_op2_p,
+			 rtx clobber_reg)
+{
+  const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
+  const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
+  const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
+  enum hi_lo { hi = 0, lo = 1 };
+  rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
+  size_t i;
+
+  op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
+  op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
+  op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
+  op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
+
+  if (code == NOT)
+    op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
+  else
+    {
+      if (GET_CODE (operands[2]) != CONST_INT)
+	{
+	  op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
+	  op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
+	}
+      else
+	{
+	  HOST_WIDE_INT value = INTVAL (operands[2]);
+	  HOST_WIDE_INT value_hi_lo[2];
+
+	  gcc_assert (!complement_final_p);
+	  gcc_assert (!complement_op1_p);
+	  gcc_assert (!complement_op2_p);
+
+	  value_hi_lo[hi] = value >> 32;
+	  value_hi_lo[lo] = value & lower_32bits;
+
+	  for (i = 0; i < 2; i++)
+	    {
+	      HOST_WIDE_INT sub_value = value_hi_lo[i];
+
+	      if (sub_value & sign_bit)
+		sub_value |= upper_32bits;
+
+	      op2_hi_lo[i] = GEN_INT (sub_value);
+
+	      /* If this is an AND instruction, check to see if we need to load
+		 the value in a register.  */
+	      if (code == AND && sub_value != -1 && sub_value != 0
+		  && !and_operand (op2_hi_lo[i], SImode))
+		op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
+	    }
+	}
+    }
+
+  for (i = 0; i < 2; i++)
+    {
+      /* Split large IOR/XOR operations.  */
+      if ((code == IOR || code == XOR)
+	  && GET_CODE (op2_hi_lo[i]) == CONST_INT
+	  && !complement_final_p
+	  && !complement_op1_p
+	  && !complement_op2_p
+	  && clobber_reg == NULL_RTX
+	  && !logical_const_operand (op2_hi_lo[i], SImode))
+	{
+	  HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
+	  HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
+	  HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
+	  rtx tmp = gen_reg_rtx (SImode);
+
+	  /* Make sure the constant is sign extended.  */
+	  if ((hi_16bits & sign_bit) != 0)
+	    hi_16bits |= upper_32bits;
+
+	  rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
+				      code, SImode, false, false, false,
+				      NULL_RTX);
+
+	  rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
+				      code, SImode, false, false, false,
+				      NULL_RTX);
+	}
+      else
+	rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
+				    code, SImode, complement_final_p,
+				    complement_op1_p, complement_op2_p,
+				    clobber_reg);
+    }
+
+  return;
+}
+
+/* Split the insns that make up boolean operations operating on multiple GPR
+   registers.  The boolean MD patterns ensure that the inputs either are
+   exactly the same as the output registers, or there is no overlap.
+
+   OPERANDS is an array containing the destination and two input operands.
+   CODE is the base operation (AND, IOR, XOR, NOT).
+   MODE is the machine mode.
+   If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+   If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+   If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+   CLOBBER_REG is either NULL or a scratch register of type CC to allow
+   formation of the AND instructions.  */
+
+void
+rs6000_split_logical (rtx operands[3],
+		      enum rtx_code code,
+		      bool complement_final_p,
+		      bool complement_op1_p,
+		      bool complement_op2_p,
+		      rtx clobber_reg)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum machine_mode sub_mode;
+  rtx op0, op1, op2;
+  int sub_size, regno0, regno1, nregs, i;
+
+  /* If this is DImode, use the specialized version that can run before
+     register allocation.  */
+  if (mode == DImode && !TARGET_POWERPC64)
+    {
+      rs6000_split_logical_di (operands, code, complement_final_p,
+			       complement_op1_p, complement_op2_p,
+			       clobber_reg);
+      return;
+    }
+
+  op0 = operands[0];
+  op1 = operands[1];
+  op2 = (code == NOT) ? NULL_RTX : operands[2];
+  sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
+  sub_size = GET_MODE_SIZE (sub_mode);
+  regno0 = REGNO (op0);
+  regno1 = REGNO (op1);
+
+  gcc_assert (reload_completed);
+  gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+  gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+
+  nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
+  gcc_assert (nregs > 1);
+
+  if (op2 && REG_P (op2))
+    gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
+
+  for (i = 0; i < nregs; i++)
+    {
+      int offset = i * sub_size;
+      rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
+      rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
+      rtx sub_op2 = ((code == NOT)
+		     ? NULL_RTX
+		     : simplify_subreg (sub_mode, op2, mode, offset));
+
+      rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
+				  complement_final_p, complement_op1_p,
+				  complement_op2_p, clobber_reg);
+    }
+
+  return;
+}
+
+
+/* Return true if the peephole2 can combine a load involving a combination of
+   an addis instruction and a load with an offset that can be fused together on
+   a power8.
+
+   The operands are:
+	operands[0]	register set with addis
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].
+
+   In addition, we are passed a boolean that is true if this is a peephole2,
+   and we can use see if the addis_reg is dead after the insn and can be
+   replaced by the target register.  */
+
+bool
+fusion_gpr_load_p (rtx *operands, bool peep2_p)
+{
+  rtx addis_reg = operands[0];
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx mem = operands[3];
+  rtx addr;
+  rtx base_reg;
+
+  /* Validate arguments.  */
+  if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
+    return false;
+
+  if (!base_reg_operand (target, GET_MODE (target)))
+    return false;
+
+  if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
+    return false;
+
+  if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
+    return false;
+
+  /* Allow sign/zero extension.  */
+  if (GET_CODE (mem) == ZERO_EXTEND
+      || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
+    mem = XEXP (mem, 0);
+
+  if (!MEM_P (mem))
+    return false;
+
+  addr = XEXP (mem, 0);			/* either PLUS or LO_SUM.  */
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    return false;
+
+  /* Validate that the register used to load the high value is either the
+     register being loaded, or we can safely replace its use in a peephole2.
+
+     If this is a peephole2, we assume that there are 2 instructions in the
+     peephole (addis and load), so we want to check if the target register was
+     not used in the memory address and the register to hold the addis result
+     is dead after the peephole.  */
+  if (REGNO (addis_reg) != REGNO (target))
+    {
+      if (!peep2_p)
+	return false;
+
+      if (reg_mentioned_p (target, mem))
+	return false;
+
+      if (!peep2_reg_dead_p (2, addis_reg))
+	return false;
+
+      /* If the target register being loaded is the stack pointer, we must
+         avoid loading any other value into it, even temporarily.  */
+      if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
+	return false;
+    }
+
+  base_reg = XEXP (addr, 0);
+  return REGNO (addis_reg) == REGNO (base_reg);
+}
+
+/* During the peephole2 pass, adjust and expand the insns for a load fusion
+   sequence.  We adjust the addis register to use the target register.  If the
+   load sign extends, we adjust the code to do the zero extending load, and an
+   explicit sign extension later since the fusion only covers zero extending
+   loads.
+
+   The operands are:
+	operands[0]	register set with addis (to be replaced with target)
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].  */
+
+void
+expand_fusion_gpr_load (rtx *operands)
+{
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx orig_mem = operands[3];
+  rtx  new_addr, new_mem, orig_addr, offset;
+  enum rtx_code plus_or_lo_sum;
+  enum machine_mode target_mode = GET_MODE (target);
+  enum machine_mode extend_mode = target_mode;
+  enum machine_mode ptr_mode = Pmode;
+  enum rtx_code extend = UNKNOWN;
+  rtx addis_reg = ((ptr_mode == target_mode)
+		   ? target
+		   : simplify_subreg (ptr_mode, target, target_mode, 0));
+
+  if (GET_CODE (orig_mem) == ZERO_EXTEND
+      || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
+    {
+      extend = GET_CODE (orig_mem);
+      orig_mem = XEXP (orig_mem, 0);
+      target_mode = GET_MODE (orig_mem);
+    }
+
+  gcc_assert (MEM_P (orig_mem));
+
+  orig_addr = XEXP (orig_mem, 0);
+  plus_or_lo_sum = GET_CODE (orig_addr);
+  gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
+
+  offset = XEXP (orig_addr, 1);
+  new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
+  new_mem = change_address (orig_mem, target_mode, new_addr);
+
+  if (extend != UNKNOWN)
+    new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
+
+  emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
+  emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
+
+  if (extend == SIGN_EXTEND)
+    {
+      int sub_off = ((BYTES_BIG_ENDIAN)
+		     ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
+		     : 0);
+      rtx sign_reg
+	= simplify_subreg (target_mode, target, extend_mode, sub_off);
+
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
+    }
+
+  return;
+}
+
+/* Return a string to fuse an addis instruction with a gpr load to the same
+   register that we loaded up the addis instruction.  The code is complicated,
+   so we call output_asm_insn directly, and just return "".
+
+   The operands are:
+	operands[0]	register set with addis (must be same reg as target).
+	operands[1]	value set via addis
+	operands[2]	target register being loaded
+	operands[3]	D-form memory reference using operands[0].  */
+
+const char *
+emit_fusion_gpr_load (rtx *operands)
+{
+  rtx addis_reg = operands[0];
+  rtx addis_value = operands[1];
+  rtx target = operands[2];
+  rtx mem = operands[3];
+  rtx fuse_ops[10];
+  rtx addr;
+  rtx load_offset;
+  const char *addis_str = NULL;
+  const char *load_str = NULL;
+  const char *extend_insn = NULL;
+  const char *mode_name = NULL;
+  char insn_template[80];
+  enum machine_mode mode;
+  const char *comment_str = ASM_COMMENT_START;
+  bool sign_p = false;
+
+  gcc_assert (REG_P (addis_reg) && REG_P (target));
+  gcc_assert (REGNO (addis_reg) == REGNO (target));
+
+  if (*comment_str == ' ')
+    comment_str++;
+
+  /* Allow sign/zero extension.  */
+  if (GET_CODE (mem) == ZERO_EXTEND)
+    mem = XEXP (mem, 0);
+
+  else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
+    {
+      sign_p = true;
+      mem = XEXP (mem, 0);
+    }
+
+  gcc_assert (MEM_P (mem));
+  addr = XEXP (mem, 0);
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    gcc_unreachable ();
+
+  load_offset = XEXP (addr, 1);
+
+  /* Now emit the load instruction to the same register.  */
+  mode = GET_MODE (mem);
+  switch (mode)
+    {
+    case QImode:
+      mode_name = "char";
+      load_str = "lbz";
+      extend_insn = "extsb %0,%0";
+      break;
+
+    case HImode:
+      mode_name = "short";
+      load_str = "lhz";
+      extend_insn = "extsh %0,%0";
+      break;
+
+    case SImode:
+      mode_name = "int";
+      load_str = "lwz";
+      extend_insn = "extsw %0,%0";
+      break;
+
+    case DImode:
+      if (TARGET_POWERPC64)
+	{
+	  mode_name = "long";
+	  load_str = "ld";
+	}
+      else
+	gcc_unreachable ();
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Emit the addis instruction.  */
+  fuse_ops[0] = target;
+  if (satisfies_constraint_L (addis_value))
+    {
+      fuse_ops[1] = addis_value;
+      addis_str = "lis %0,%v1";
+    }
+
+  else if (GET_CODE (addis_value) == PLUS)
+    {
+      rtx op0 = XEXP (addis_value, 0);
+      rtx op1 = XEXP (addis_value, 1);
+
+      if (REG_P (op0) && CONST_INT_P (op1)
+	  && satisfies_constraint_L (op1))
+	{
+	  fuse_ops[1] = op0;
+	  fuse_ops[2] = op1;
+	  addis_str = "addis %0,%1,%v2";
+	}
+    }
+
+  else if (GET_CODE (addis_value) == HIGH)
+    {
+      rtx value = XEXP (addis_value, 0);
+      if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
+	{
+	  fuse_ops[1] = XVECEXP (value, 0, 0);		/* symbol ref.  */
+	  fuse_ops[2] = XVECEXP (value, 0, 1);		/* TOC register.  */
+	  if (TARGET_ELF)
+	    addis_str = "addis %0,%2,%1@toc@ha";
+
+	  else if (TARGET_XCOFF)
+	    addis_str = "addis %0,%1@u(%2)";
+
+	  else
+	    gcc_unreachable ();
+	}
+
+      else if (GET_CODE (value) == PLUS)
+	{
+	  rtx op0 = XEXP (value, 0);
+	  rtx op1 = XEXP (value, 1);
+
+	  if (GET_CODE (op0) == UNSPEC
+	      && XINT (op0, 1) == UNSPEC_TOCREL
+	      && CONST_INT_P (op1))
+	    {
+	      fuse_ops[1] = XVECEXP (op0, 0, 0);	/* symbol ref.  */
+	      fuse_ops[2] = XVECEXP (op0, 0, 1);	/* TOC register.  */
+	      fuse_ops[3] = op1;
+	      if (TARGET_ELF)
+		addis_str = "addis %0,%2,%1+%3@toc@ha";
+
+	      else if (TARGET_XCOFF)
+		addis_str = "addis %0,%1+%3@u(%2)";
+
+	      else
+		gcc_unreachable ();
+	    }
+	}
+
+      else if (satisfies_constraint_L (value))
+	{
+	  fuse_ops[1] = value;
+	  addis_str = "lis %0,%v1";
+	}
+
+      else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
+	{
+	  fuse_ops[1] = value;
+	  addis_str = "lis %0,%1@ha";
+	}
+    }
+
+  if (!addis_str)
+    fatal_insn ("Could not generate addis value for fusion", addis_value);
+
+  sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
+	   comment_str, mode_name);
+  output_asm_insn (insn_template, fuse_ops);
+
+  /* Emit the D-form load instruction.  */
+  if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
+    {
+      sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
+      fuse_ops[1] = load_offset;
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (GET_CODE (load_offset) == UNSPEC
+	   && XINT (load_offset, 1) == UNSPEC_TOCREL)
+    {
+      if (TARGET_ELF)
+	sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
+
+      else if (TARGET_XCOFF)
+	sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+      else
+	gcc_unreachable ();
+
+      fuse_ops[1] = XVECEXP (load_offset, 0, 0);
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (GET_CODE (load_offset) == PLUS
+	   && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
+	   && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
+	   && CONST_INT_P (XEXP (load_offset, 1)))
+    {
+      rtx tocrel_unspec = XEXP (load_offset, 0);
+      if (TARGET_ELF)
+	sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
+
+      else if (TARGET_XCOFF)
+	sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
+
+      else
+	gcc_unreachable ();
+
+      fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
+      fuse_ops[2] = XEXP (load_offset, 1);
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
+    {
+      sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+      fuse_ops[1] = load_offset;
+      output_asm_insn (insn_template, fuse_ops);
+    }
+
+  else
+    fatal_insn ("Unable to generate load offset for fusion", load_offset);
+
+  /* Handle sign extension.  The peephole2 pass generates this as a separate
+     insn, but we handle it just in case it got reattached.  */
+  if (sign_p)
+    {
+      gcc_assert (extend_insn != NULL);
+      output_asm_insn (extend_insn, fuse_ops);
+    }
+
+  return "";
+}
+
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000.h b/gcc-4.8/gcc/config/rs6000/rs6000.h
index 8a3608410..1dc71f6db 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000.h
+++ b/gcc-4.8/gcc/config/rs6000/rs6000.h
@@ -92,7 +92,7 @@
 #ifdef HAVE_AS_POWER8
 #define ASM_CPU_POWER8_SPEC "-mpower8"
 #else
-#define ASM_CPU_POWER8_SPEC "-mpower4 -maltivec"
+#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC
 #endif
 
 #ifdef HAVE_AS_DCI
@@ -164,6 +164,7 @@
 %{mcpu=e6500: -me6500} \
 %{maltivec: -maltivec} \
 %{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \
+%{mpower8-vector|mcrypto|mdirect-move|mhtm: %{!mcpu*: %(asm_cpu_power8)}} \
 -many"
 
 #define CPP_DEFAULT_SPEC ""
@@ -277,6 +278,21 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define TARGET_POPCNTD 0
 #endif
 
+/* Define the ISA 2.07 flags as 0 if the target assembler does not support the
+   waitasecond instruction.  Allow -mpower8-fusion, since it does not add new
+   instructions.  */
+
+#ifndef HAVE_AS_POWER8
+#undef  TARGET_DIRECT_MOVE
+#undef  TARGET_CRYPTO
+#undef  TARGET_HTM
+#undef  TARGET_P8_VECTOR
+#define TARGET_DIRECT_MOVE 0
+#define TARGET_CRYPTO 0
+#define TARGET_HTM 0
+#define TARGET_P8_VECTOR 0
+#endif
+
 /* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync.  If
    not, generate the lwsync code as an integer constant.  */
 #ifdef HAVE_AS_LWSYNC
@@ -386,6 +402,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define TARGET_DEBUG_TARGET	(rs6000_debug & MASK_DEBUG_TARGET)
 #define TARGET_DEBUG_BUILTIN	(rs6000_debug & MASK_DEBUG_BUILTIN)
 
+/* Describe the vector unit used for arithmetic operations.  */
 extern enum rs6000_vector rs6000_vector_unit[];
 
 #define VECTOR_UNIT_NONE_P(MODE)			\
@@ -394,12 +411,25 @@ extern enum rs6000_vector rs6000_vector_unit[];
 #define VECTOR_UNIT_VSX_P(MODE)				\
   (rs6000_vector_unit[(MODE)] == VECTOR_VSX)
 
+#define VECTOR_UNIT_P8_VECTOR_P(MODE)			\
+  (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR)
+
 #define VECTOR_UNIT_ALTIVEC_P(MODE)			\
   (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC)
 
+#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],		\
+	     (int)VECTOR_VSX,				\
+	     (int)VECTOR_P8_VECTOR))
+
+/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either
+   altivec (VMX) or VSX vector instructions.  P8 vector support is upwards
+   compatible, so allow it as well, rather than changing all of the uses of the
+   macro.  */
 #define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE)		\
-  (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC 	\
-   || rs6000_vector_unit[(MODE)] == VECTOR_VSX)
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],		\
+	     (int)VECTOR_ALTIVEC,			\
+	     (int)VECTOR_P8_VECTOR))
 
 /* Describe whether to use VSX loads or Altivec loads.  For now, just use the
    same unit as the vector unit we are using, but we may want to migrate to
@@ -412,12 +442,21 @@ extern enum rs6000_vector rs6000_vector_mem[];
 #define VECTOR_MEM_VSX_P(MODE)				\
   (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
 
+#define VECTOR_MEM_P8_VECTOR_P(MODE)			\
+  (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+
 #define VECTOR_MEM_ALTIVEC_P(MODE)			\
   (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC)
 
+#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE)		\
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],		\
+	     (int)VECTOR_VSX,				\
+	     (int)VECTOR_P8_VECTOR))
+
 #define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE)		\
-  (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC 	\
-   || rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],		\
+	     (int)VECTOR_ALTIVEC,			\
+	     (int)VECTOR_P8_VECTOR))
 
 /* Return the alignment of a given vector type, which is set based on the
    vector unit use.  VSX for instance can load 32 or 64 bit aligned words
@@ -429,6 +468,15 @@ extern int rs6000_vector_align[];
    ? rs6000_vector_align[(MODE)]					\
    : (int)GET_MODE_BITSIZE ((MODE)))
 
+/* Determine the element order to use for vector instructions.  By
+   default we use big-endian element order when targeting big-endian,
+   and little-endian element order when targeting little-endian.  For
+   programs being ported from BE Power to LE Power, it can sometimes
+   be useful to use big-endian element order when targeting little-endian.
+   This is set via -maltivec=be, for example.  */
+#define VECTOR_ELT_ORDER_BIG                                  \
+  (BYTES_BIG_ENDIAN || (rs6000_altivec_element_order == 2))
+
 /* Alignment options for fields in structures for sub-targets following
    AIX-like ABI.
    ALIGN_POWER word-aligns FP doubles (default AIX ABI).
@@ -479,22 +527,45 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIDUZ	TARGET_POPCNTD
 #define TARGET_FCTIWUZ	TARGET_POPCNTD
 
+#define TARGET_XSCVDPSPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_XSCVSPDPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_VADDUQM		(TARGET_P8_VECTOR && TARGET_POWERPC64)
+
+/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
+   in power7, so conditionalize them on p8 features.  TImode syncs need quad
+   memory support.  */
+#define TARGET_SYNC_HI_QI	(TARGET_QUAD_MEMORY			\
+				 || TARGET_QUAD_MEMORY_ATOMIC		\
+				 || TARGET_DIRECT_MOVE)
+
+#define TARGET_SYNC_TI		TARGET_QUAD_MEMORY_ATOMIC
+
+/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
+   to allocate the SDmode stack slot to get the value into the proper location
+   in the register.  */
+#define TARGET_NO_SDMODE_STACK	(TARGET_LFIWZX && TARGET_STFIWX && TARGET_DFP)
+
 /* In switching from using target_flags to using rs6000_isa_flags, the options
    machinery creates OPTION_MASK_<xxx> instead of MASK_<xxx>.  For now map
    OPTION_MASK_<xxx> back into MASK_<xxx>.  */
 #define MASK_ALTIVEC			OPTION_MASK_ALTIVEC
 #define MASK_CMPB			OPTION_MASK_CMPB
+#define MASK_CRYPTO			OPTION_MASK_CRYPTO
 #define MASK_DFP			OPTION_MASK_DFP
+#define MASK_DIRECT_MOVE		OPTION_MASK_DIRECT_MOVE
 #define MASK_DLMZB			OPTION_MASK_DLMZB
 #define MASK_EABI			OPTION_MASK_EABI
 #define MASK_FPRND			OPTION_MASK_FPRND
+#define MASK_P8_FUSION			OPTION_MASK_P8_FUSION
 #define MASK_HARD_FLOAT			OPTION_MASK_HARD_FLOAT
+#define MASK_HTM			OPTION_MASK_HTM
 #define MASK_ISEL			OPTION_MASK_ISEL
 #define MASK_MFCRF			OPTION_MASK_MFCRF
 #define MASK_MFPGPR			OPTION_MASK_MFPGPR
 #define MASK_MULHW			OPTION_MASK_MULHW
 #define MASK_MULTIPLE			OPTION_MASK_MULTIPLE
 #define MASK_NO_UPDATE			OPTION_MASK_NO_UPDATE
+#define MASK_P8_VECTOR			OPTION_MASK_P8_VECTOR
 #define MASK_POPCNTB			OPTION_MASK_POPCNTB
 #define MASK_POPCNTD			OPTION_MASK_POPCNTD
 #define MASK_PPC_GFXOPT			OPTION_MASK_PPC_GFXOPT
@@ -505,6 +576,7 @@ extern int rs6000_vector_align[];
 #define MASK_STRING			OPTION_MASK_STRING
 #define MASK_UPDATE			OPTION_MASK_UPDATE
 #define MASK_VSX			OPTION_MASK_VSX
+#define MASK_VSX_TIMODE			OPTION_MASK_VSX_TIMODE
 
 #ifndef IN_LIBGCC2
 #define MASK_POWERPC64			OPTION_MASK_POWERPC64
@@ -551,13 +623,33 @@ extern int rs6000_vector_align[];
 				      || TARGET_CMPB	  /* ISA 2.05 */ \
 				      || TARGET_POPCNTD	  /* ISA 2.06 */ \
 				      || TARGET_ALTIVEC			 \
-				      || TARGET_VSX)))
+				      || TARGET_VSX			 \
+				      || TARGET_HARD_FLOAT)))
 
 /* E500 cores only support plain "sync", not lwsync.  */
 #define TARGET_NO_LWSYNC (rs6000_cpu == PROCESSOR_PPC8540 \
 			  || rs6000_cpu == PROCESSOR_PPC8548)
 
 
+/* Whether SF/DF operations are supported on the E500.  */
+#define TARGET_SF_SPE	(TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT	\
+			 && !TARGET_FPRS)
+
+#define TARGET_DF_SPE	(TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT	\
+			 && !TARGET_FPRS && TARGET_E500_DOUBLE)
+
+/* Whether SF/DF operations are supported by by the normal floating point unit
+   (or the vector/scalar unit).  */
+#define TARGET_SF_FPR	(TARGET_HARD_FLOAT && TARGET_FPRS		\
+			 && TARGET_SINGLE_FLOAT)
+
+#define TARGET_DF_FPR	(TARGET_HARD_FLOAT && TARGET_FPRS		\
+			 && TARGET_DOUBLE_FLOAT)
+
+/* Whether SF/DF operations are supported by any hardware.  */
+#define TARGET_SF_INSN	(TARGET_SF_FPR || TARGET_SF_SPE)
+#define TARGET_DF_INSN	(TARGET_DF_FPR || TARGET_DF_SPE)
+
 /* Which machine supports the various reciprocal estimate instructions.  */
 #define TARGET_FRES	(TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \
 			 && TARGET_FPRS && TARGET_SINGLE_FLOAT)
@@ -595,9 +687,6 @@ extern unsigned char rs6000_recip_bits[];
 #define RS6000_RECIP_AUTO_RSQRTE_P(MODE) \
   (rs6000_recip_bits[(int)(MODE)] & RS6000_RECIP_MASK_AUTO_RSQRTE)
 
-#define RS6000_RECIP_HIGH_PRECISION_P(MODE) \
-  ((MODE) == SFmode || (MODE) == V4SFmode || TARGET_RECIP_PRECISION)
-
 /* The default CPU for TARGET_OPTION_OVERRIDE.  */
 #define OPTION_TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT
 
@@ -760,12 +849,6 @@ extern unsigned rs6000_pointer_size;
 /* No data type wants to be aligned rounder than this.  */
 #define BIGGEST_ALIGNMENT 128
 
-/* A C expression to compute the alignment for a variables in the
-   local store.  TYPE is the data type, and ALIGN is the alignment
-   that the object would ordinarily have.  */
-#define LOCAL_ALIGNMENT(TYPE, ALIGN)				\
-  DATA_ALIGNMENT (TYPE, ALIGN)
-
 /* Alignment of field after `int : 0' in a structure.  */
 #define EMPTY_FIELD_BOUNDARY 32
 
@@ -775,8 +858,15 @@ extern unsigned rs6000_pointer_size;
 /* A bit-field declared as `int' forces `int' alignment for the struct.  */
 #define PCC_BITFIELD_TYPE_MATTERS 1
 
-/* Make strings word-aligned so strcpy from constants will be faster.
-   Make vector constants quadword aligned.  */
+enum data_align { align_abi, align_opt, align_both };
+
+/* A C expression to compute the alignment for a variables in the
+   local store.  TYPE is the data type, and ALIGN is the alignment
+   that the object would ordinarily have.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)				\
+  rs6000_data_alignment (TYPE, ALIGN, align_both)
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
 #define CONSTANT_ALIGNMENT(EXP, ALIGN)                           \
   (TREE_CODE (EXP) == STRING_CST	                         \
    && (STRICT_ALIGNMENT || !optimize_size)                       \
@@ -784,21 +874,14 @@ extern unsigned rs6000_pointer_size;
    ? BITS_PER_WORD                                               \
    : (ALIGN))
 
-/* Make arrays of chars word-aligned for the same reasons.
-   Align vectors to 128 bits.  Align SPE vectors and E500 v2 doubles to
-   64 bits.  */
+/* Make arrays of chars word-aligned for the same reasons.  */
 #define DATA_ALIGNMENT(TYPE, ALIGN)					\
-  (TREE_CODE (TYPE) == VECTOR_TYPE					\
-   ? (((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (TYPE)))		\
-       || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (TYPE)))) \
-      ? 64 : 128)							\
-   : ((TARGET_E500_DOUBLE						\
-       && TREE_CODE (TYPE) == REAL_TYPE					\
-       && TYPE_MODE (TYPE) == DFmode)					\
-      ? 64								\
-      : (TREE_CODE (TYPE) == ARRAY_TYPE					\
-	 && TYPE_MODE (TREE_TYPE (TYPE)) == QImode			\
-	 && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN)))
+  rs6000_data_alignment (TYPE, ALIGN, align_opt)
+
+/* Align vectors to 128 bits.  Align SPE vectors and E500 v2 doubles to
+   64 bits.  */
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
+  rs6000_data_alignment (TYPE, ALIGN, align_abi)
 
 /* Nonzero if move instructions will actually fail to work
    when given unaligned data.  */
@@ -842,15 +925,17 @@ extern unsigned rs6000_pointer_size;
    in inline functions.
 
    Another pseudo (not included in DWARF_FRAME_REGISTERS) is soft frame
-   pointer, which is eventually eliminated in favor of SP or FP.  */
+   pointer, which is eventually eliminated in favor of SP or FP.
 
-#define FIRST_PSEUDO_REGISTER 114
+   The 3 HTM registers aren't also included in DWARF_FRAME_REGISTERS.  */
+
+#define FIRST_PSEUDO_REGISTER 117
 
 /* This must be included for pre gcc 3.0 glibc compatibility.  */
 #define PRE_GCC3_DWARF_FRAME_REGISTERS 77
 
 /* Add 32 dwarf columns for synthetic SPE registers.  */
-#define DWARF_FRAME_REGISTERS ((FIRST_PSEUDO_REGISTER - 1) + 32)
+#define DWARF_FRAME_REGISTERS ((FIRST_PSEUDO_REGISTER - 4) + 32)
 
 /* The SPE has an additional 32 synthetic registers, with DWARF debug
    info numbering for these registers starting at 1200.  While eh_frame
@@ -866,7 +951,7 @@ extern unsigned rs6000_pointer_size;
    We must map them here to avoid huge unwinder tables mostly consisting
    of unused space.  */
 #define DWARF_REG_TO_UNWIND_COLUMN(r) \
-  ((r) > 1200 ? ((r) - 1200 + FIRST_PSEUDO_REGISTER - 1) : (r))
+  ((r) > 1200 ? ((r) - 1200 + (DWARF_FRAME_REGISTERS - 32)) : (r))
 
 /* Use standard DWARF numbering for DWARF debugging information.  */
 #define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO)
@@ -906,7 +991,7 @@ extern unsigned rs6000_pointer_size;
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    1, 1						   \
-   , 1, 1, 1                                       \
+   , 1, 1, 1, 1, 1, 1				   \
 }
 
 /* 1 for registers not available across function calls.
@@ -926,7 +1011,7 @@ extern unsigned rs6000_pointer_size;
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    1, 1						   \
-   , 1, 1, 1                                       \
+   , 1, 1, 1, 1, 1, 1				   \
 }
 
 /* Like `CALL_USED_REGISTERS' except this macro doesn't require that
@@ -945,7 +1030,7 @@ extern unsigned rs6000_pointer_size;
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
    0, 0						   \
-   , 0, 0, 0                                       \
+   , 0, 0, 0, 0, 0, 0				   \
 }
 
 #define TOTAL_ALTIVEC_REGS	(LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1)
@@ -984,6 +1069,9 @@ extern unsigned rs6000_pointer_size;
 	vrsave, vscr	(fixed)
 	spe_acc, spefscr (fixed)
 	sfp		(fixed)
+	tfhar		(fixed)
+	tfiar		(fixed)
+	texasr		(fixed)
 */
 
 #if FIXED_R2 == 1
@@ -1004,7 +1092,9 @@ extern unsigned rs6000_pointer_size;
 
 #define REG_ALLOC_ORDER						\
   {32,								\
-   45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,		\
+   /* move fr13 (ie 45) later, so if we need TFmode, it does */	\
+   /* not use fr14 which is a saved register.  */		\
+   44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45,		\
    33,								\
    63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51,		\
    50, 49, 48, 47, 46,						\
@@ -1023,7 +1113,7 @@ extern unsigned rs6000_pointer_size;
    96, 95, 94, 93, 92, 91,					\
    108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97,	\
    109, 110,							\
-   111, 112, 113						\
+   111, 112, 113, 114, 115, 116					\
 }
 
 /* True if register is floating-point.  */
@@ -1064,8 +1154,11 @@ extern unsigned rs6000_pointer_size;
 #define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
 
 /* Alternate name for any vector register supporting logical operations, no
-   matter which instruction set(s) are available.  */
-#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N)
+   matter which instruction set(s) are available.  Allow GPRs as well as the
+   vector registers.  */
+#define VLOGICAL_REGNO_P(N)						\
+  (INT_REGNO_P (N) || ALTIVEC_REGNO_P (N)				\
+   || (TARGET_VSX && FP_REGNO_P (N)))					\
 
 /* Return number of consecutive hard regs needed starting at reg REGNO
    to hold something of mode MODE.  */
@@ -1106,7 +1199,7 @@ extern unsigned rs6000_pointer_size;
 
 #define ALTIVEC_OR_VSX_VECTOR_MODE(MODE)				\
   (ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE)			\
-   || (MODE) == V2DImode)
+   || (MODE) == V2DImode || (MODE) == V1TImode)
 
 #define SPE_VECTOR_MODE(MODE)		\
 	((MODE) == V4HImode          	\
@@ -1125,28 +1218,32 @@ extern unsigned rs6000_pointer_size;
 /* Value is 1 if it is a good idea to tie two pseudo registers
    when one has mode MODE1 and one has mode MODE2.
    If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
-   for any hard reg, then this must be 0 for correct output.  */
+   for any hard reg, then this must be 0 for correct output.
+
+   PTImode cannot tie with other modes because PTImode is restricted to even
+   GPR registers, and TImode can go in any GPR as well as VSX registers (PR
+   57744).  */
 #define MODES_TIEABLE_P(MODE1, MODE2) \
-  (SCALAR_FLOAT_MODE_P (MODE1)			\
+  ((MODE1) == PTImode				\
+   ? (MODE2) == PTImode				\
+   : (MODE2) == PTImode				\
+   ? 0						\
+   : SCALAR_FLOAT_MODE_P (MODE1)		\
    ? SCALAR_FLOAT_MODE_P (MODE2)		\
    : SCALAR_FLOAT_MODE_P (MODE2)		\
-   ? SCALAR_FLOAT_MODE_P (MODE1)		\
+   ? 0						\
    : GET_MODE_CLASS (MODE1) == MODE_CC		\
    ? GET_MODE_CLASS (MODE2) == MODE_CC		\
    : GET_MODE_CLASS (MODE2) == MODE_CC		\
-   ? GET_MODE_CLASS (MODE1) == MODE_CC		\
+   ? 0						\
    : SPE_VECTOR_MODE (MODE1)			\
    ? SPE_VECTOR_MODE (MODE2)			\
    : SPE_VECTOR_MODE (MODE2)			\
-   ? SPE_VECTOR_MODE (MODE1)			\
-   : ALTIVEC_VECTOR_MODE (MODE1)		\
-   ? ALTIVEC_VECTOR_MODE (MODE2)		\
-   : ALTIVEC_VECTOR_MODE (MODE2)		\
-   ? ALTIVEC_VECTOR_MODE (MODE1)		\
+   ? 0						\
    : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)		\
    ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
    : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)		\
-   ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)		\
+   ? 0						\
    : 1)
 
 /* Post-reload, we can't use any new AltiVec registers, as we already
@@ -1240,6 +1337,7 @@ enum reg_class
   VSCR_REGS,
   SPE_ACC_REGS,
   SPEFSCR_REGS,
+  SPR_REGS,
   NON_SPECIAL_REGS,
   LINK_REGS,
   CTR_REGS,
@@ -1270,6 +1368,7 @@ enum reg_class
   "VSCR_REGS",								\
   "SPE_ACC_REGS",                                                       \
   "SPEFSCR_REGS",                                                       \
+  "SPR_REGS",								\
   "NON_SPECIAL_REGS",							\
   "LINK_REGS",								\
   "CTR_REGS",								\
@@ -1299,6 +1398,7 @@ enum reg_class
   { 0x00000000, 0x00000000, 0x00000000, 0x00004000 }, /* VSCR_REGS */	     \
   { 0x00000000, 0x00000000, 0x00000000, 0x00008000 }, /* SPE_ACC_REGS */     \
   { 0x00000000, 0x00000000, 0x00000000, 0x00010000 }, /* SPEFSCR_REGS */     \
+  { 0x00000000, 0x00000000, 0x00000000, 0x00040000 }, /* SPR_REGS */     \
   { 0xffffffff, 0xffffffff, 0x00000008, 0x00020000 }, /* NON_SPECIAL_REGS */ \
   { 0x00000000, 0x00000000, 0x00000002, 0x00000000 }, /* LINK_REGS */	     \
   { 0x00000000, 0x00000000, 0x00000004, 0x00000000 }, /* CTR_REGS */	     \
@@ -1309,7 +1409,7 @@ enum reg_class
   { 0x00000000, 0x00000000, 0x00000ff0, 0x00000000 }, /* CR_REGS */	     \
   { 0xffffffff, 0x00000000, 0x00000ffe, 0x00020000 }, /* NON_FLOAT_REGS */   \
   { 0x00000000, 0x00000000, 0x00001000, 0x00000000 }, /* CA_REGS */	     \
-  { 0xffffffff, 0xffffffff, 0xfffffffe, 0x0003ffff }  /* ALL_REGS */	     \
+  { 0xffffffff, 0xffffffff, 0xfffffffe, 0x0007ffff }  /* ALL_REGS */	     \
 }
 
 /* The same information, inverted:
@@ -1337,7 +1437,18 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wa,		/* Any VSX register */
   RS6000_CONSTRAINT_wd,		/* VSX register for V2DF */
   RS6000_CONSTRAINT_wf,		/* VSX register for V4SF */
+  RS6000_CONSTRAINT_wg,		/* FPR register for -mmfpgpr */
+  RS6000_CONSTRAINT_wl,		/* FPR register for LFIWAX */
+  RS6000_CONSTRAINT_wm,		/* VSX register for direct move */
+  RS6000_CONSTRAINT_wr,		/* GPR register if 64-bit  */
   RS6000_CONSTRAINT_ws,		/* VSX register for DF */
+  RS6000_CONSTRAINT_wt,		/* VSX register for TImode */
+  RS6000_CONSTRAINT_wu,		/* Altivec register for float load/stores.  */
+  RS6000_CONSTRAINT_wv,		/* Altivec register for double load/stores.  */
+  RS6000_CONSTRAINT_ww,		/* FP or VSX register for vsx float ops.  */
+  RS6000_CONSTRAINT_wx,		/* FPR register for STFIWX */
+  RS6000_CONSTRAINT_wy,		/* VSX register for SF */
+  RS6000_CONSTRAINT_wz,		/* FPR register for LFIWZX */
   RS6000_CONSTRAINT_MAX
 };
 
@@ -1425,21 +1536,14 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    arguments.  */
 #define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 || flag_asan != 0)
 
-/* Size of the outgoing register save area */
-#define RS6000_REG_SAVE ((DEFAULT_ABI == ABI_AIX			\
-			  || DEFAULT_ABI == ABI_DARWIN)			\
-			 ? (TARGET_64BIT ? 64 : 32)			\
-			 : 0)
-
 /* Size of the fixed area on the stack */
 #define RS6000_SAVE_AREA \
-  (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_DARWIN) ? 24 : 8)	\
+  ((DEFAULT_ABI == ABI_V4 ? 8 : DEFAULT_ABI == ABI_ELFv2 ? 16 : 24)	\
    << (TARGET_64BIT ? 1 : 0))
 
-/* MEM representing address to save the TOC register */
-#define RS6000_SAVE_TOC gen_rtx_MEM (Pmode, \
-				     plus_constant (Pmode, stack_pointer_rtx, \
-						    (TARGET_32BIT ? 20 : 40)))
+/* Stack offset for toc save slot.  */
+#define RS6000_TOC_SAVE_SLOT \
+  ((DEFAULT_ABI == ABI_ELFv2 ? 12 : 20) << (TARGET_64BIT ? 1 : 0))
 
 /* Align an address */
 #define RS6000_ALIGN(n,a) (((n) + (a) - 1) & ~((a) - 1))
@@ -1489,7 +1593,7 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 /* Define this if stack space is still allocated for a parameter passed
    in a register.  The value is the number of bytes allocated to this
    area.  */
-#define REG_PARM_STACK_SPACE(FNDECL)	RS6000_REG_SAVE
+#define REG_PARM_STACK_SPACE(FNDECL) rs6000_reg_parm_stack_space((FNDECL))
 
 /* Define this if the above stack space is to be considered part of the
    space allocated by the caller.  */
@@ -1522,7 +1626,7 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    NONLOCAL needs twice Pmode to maintain both backchain and SP.  */
 #define STACK_SAVEAREA_MODE(LEVEL)	\
   (LEVEL == SAVE_FUNCTION ? VOIDmode	\
-  : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : TImode) : Pmode)
+  : LEVEL == SAVE_NONLOCAL ? (TARGET_32BIT ? DImode : PTImode) : Pmode)
 
 /* Minimum and maximum general purpose registers used to hold arguments.  */
 #define GP_ARG_MIN_REG 3
@@ -1533,9 +1637,8 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 #define FP_ARG_MIN_REG 33
 #define	FP_ARG_AIX_MAX_REG 45
 #define	FP_ARG_V4_MAX_REG  40
-#define	FP_ARG_MAX_REG ((DEFAULT_ABI == ABI_AIX				\
-			 || DEFAULT_ABI == ABI_DARWIN)			\
-			? FP_ARG_AIX_MAX_REG : FP_ARG_V4_MAX_REG)
+#define	FP_ARG_MAX_REG (DEFAULT_ABI == ABI_V4				\
+			? FP_ARG_V4_MAX_REG : FP_ARG_AIX_MAX_REG)
 #define FP_ARG_NUM_REG (FP_ARG_MAX_REG - FP_ARG_MIN_REG + 1)
 
 /* Minimum and maximum AltiVec registers used to hold arguments.  */
@@ -1543,10 +1646,17 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 #define ALTIVEC_ARG_MAX_REG (ALTIVEC_ARG_MIN_REG + 11)
 #define ALTIVEC_ARG_NUM_REG (ALTIVEC_ARG_MAX_REG - ALTIVEC_ARG_MIN_REG + 1)
 
+/* Maximum number of registers per ELFv2 homogeneous aggregate argument.  */
+#define AGGR_ARG_NUM_REG 8
+
 /* Return registers */
 #define GP_ARG_RETURN GP_ARG_MIN_REG
 #define FP_ARG_RETURN FP_ARG_MIN_REG
 #define ALTIVEC_ARG_RETURN (FIRST_ALTIVEC_REGNO + 2)
+#define FP_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? FP_ARG_RETURN	\
+			   : (FP_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
+#define ALTIVEC_ARG_MAX_RETURN (DEFAULT_ABI != ABI_ELFv2 ? ALTIVEC_ARG_RETURN \
+			        : (ALTIVEC_ARG_RETURN + AGGR_ARG_NUM_REG - 1))
 
 /* Flags for the call/call_value rtl operations set up by function_arg */
 #define CALL_NORMAL		0x00000000	/* no special processing */
@@ -1566,8 +1676,10 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    On RS/6000, this is r3, fp1, and v2 (for AltiVec).  */
 #define FUNCTION_VALUE_REGNO_P(N)					\
   ((N) == GP_ARG_RETURN							\
-   || ((N) == FP_ARG_RETURN && TARGET_HARD_FLOAT && TARGET_FPRS)	\
-   || ((N) == ALTIVEC_ARG_RETURN && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
+   || ((N) >= FP_ARG_RETURN && (N) <= FP_ARG_MAX_RETURN			\
+       && TARGET_HARD_FLOAT && TARGET_FPRS)				\
+   || ((N) >= ALTIVEC_ARG_RETURN && (N) <= ALTIVEC_ARG_MAX_RETURN	\
+       && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
 
 /* 1 if N is a possible register number for function argument passing.
    On RS/6000, these are r3-r10 and fp1-fp13.
@@ -1692,10 +1804,7 @@ typedef struct rs6000_args
    rs6000_stack_info in rs6000.c for more information on how the different
    abi's store the return address.  */
 #define RETURN_ADDRESS_OFFSET						\
- ((DEFAULT_ABI == ABI_AIX						\
-   || DEFAULT_ABI == ABI_DARWIN)	? (TARGET_32BIT ? 8 : 16) :	\
-  (DEFAULT_ABI == ABI_V4)		? 4 :				\
-  (internal_error ("RETURN_ADDRESS_OFFSET not supported"), 0))
+  ((DEFAULT_ABI == ABI_V4 ? 4 : 8) << (TARGET_64BIT ? 1 : 0))
 
 /* The current return address is in link register (65).  The return address
    of anything farther back is accessed normally at an offset of 8 from the
@@ -2215,6 +2324,9 @@ extern char rs6000_reg_names[][8];	/* register names (0 vs. %r0).  */
   &rs6000_reg_names[111][0],	/* spe_acc */				\
   &rs6000_reg_names[112][0],	/* spefscr */				\
   &rs6000_reg_names[113][0],	/* sfp  */				\
+  &rs6000_reg_names[114][0],	/* tfhar  */				\
+  &rs6000_reg_names[115][0],	/* tfiar  */				\
+  &rs6000_reg_names[116][0],	/* texasr  */				\
 }
 
 /* Table of additional register names to use in user input.  */
@@ -2268,7 +2380,9 @@ extern char rs6000_reg_names[][8];	/* register names (0 vs. %r0).  */
   {"vs48", 93}, {"vs49", 94}, {"vs50", 95}, {"vs51", 96},       \
   {"vs52", 97}, {"vs53", 98}, {"vs54", 99}, {"vs55", 100},	\
   {"vs56", 101},{"vs57", 102},{"vs58", 103},{"vs59", 104},      \
-  {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108} }
+  {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108},	\
+  /* Transactional Memory Facility (HTM) Registers.  */		\
+  {"tfhar",  114}, {"tfiar",  115}, {"texasr",  116} }
 
 /* This is how to output an element of a case-vector that is relative.  */
 
@@ -2357,7 +2471,12 @@ extern int frame_pointer_needed;
 #define RS6000_BTC_ATTR_MASK	0x00000700	/* Mask of the attributes.  */
 
 /* Miscellaneous information.  */
-#define RS6000_BTC_OVERLOADED	0x4000000	/* function is overloaded.  */
+#define RS6000_BTC_SPR		0x01000000	/* function references SPRs.  */
+#define RS6000_BTC_VOID		0x02000000	/* function has no return value.  */
+#define RS6000_BTC_OVERLOADED	0x04000000	/* function is overloaded.  */
+#define RS6000_BTC_32BIT	0x08000000	/* function references SPRs.  */
+#define RS6000_BTC_64BIT	0x10000000	/* function references SPRs.  */
+#define RS6000_BTC_MISC_MASK	0x1f000000	/* Mask of the misc info.  */
 
 /* Convenience macros to document the instruction type.  */
 #define RS6000_BTC_MEM		RS6000_BTC_MISC	/* load/store touches mem.  */
@@ -2369,6 +2488,9 @@ extern int frame_pointer_needed;
 #define RS6000_BTM_ALWAYS	0		/* Always enabled.  */
 #define RS6000_BTM_ALTIVEC	MASK_ALTIVEC	/* VMX/altivec vectors.  */
 #define RS6000_BTM_VSX		MASK_VSX	/* VSX (vector/scalar).  */
+#define RS6000_BTM_P8_VECTOR	MASK_P8_VECTOR	/* ISA 2.07 vector.  */
+#define RS6000_BTM_CRYPTO	MASK_CRYPTO	/* crypto funcs.  */
+#define RS6000_BTM_HTM		MASK_HTM	/* hardware TM funcs.  */
 #define RS6000_BTM_SPE		MASK_STRING	/* E500 */
 #define RS6000_BTM_PAIRED	MASK_MULHW	/* 750CL paired insns.  */
 #define RS6000_BTM_FRE		MASK_POPCNTB	/* FRE instruction.  */
@@ -2377,15 +2499,22 @@ extern int frame_pointer_needed;
 #define RS6000_BTM_FRSQRTES	MASK_POPCNTB	/* FRSQRTES instruction.  */
 #define RS6000_BTM_POPCNTD	MASK_POPCNTD	/* Target supports ISA 2.06.  */
 #define RS6000_BTM_CELL		MASK_FPRND	/* Target is cell powerpc.  */
+#define RS6000_BTM_DFP		MASK_DFP	/* Decimal floating point.  */
+#define RS6000_BTM_HARD_FLOAT	MASK_SOFT_FLOAT	/* Hardware floating point.  */
 
 #define RS6000_BTM_COMMON	(RS6000_BTM_ALTIVEC			\
 				 | RS6000_BTM_VSX			\
+				 | RS6000_BTM_P8_VECTOR			\
+				 | RS6000_BTM_CRYPTO			\
 				 | RS6000_BTM_FRE			\
 				 | RS6000_BTM_FRES			\
 				 | RS6000_BTM_FRSQRTE			\
 				 | RS6000_BTM_FRSQRTES			\
+				 | RS6000_BTM_HTM			\
 				 | RS6000_BTM_POPCNTD			\
-				 | RS6000_BTM_CELL)
+				 | RS6000_BTM_CELL			\
+				 | RS6000_BTM_DFP			\
+				 | RS6000_BTM_HARD_FLOAT)
 
 /* Define builtin enum index.  */
 
@@ -2395,6 +2524,7 @@ extern int frame_pointer_needed;
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -2406,6 +2536,7 @@ extern int frame_pointer_needed;
 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) ENUM,
@@ -2424,6 +2555,7 @@ enum rs6000_builtins
 #undef RS6000_BUILTIN_A
 #undef RS6000_BUILTIN_D
 #undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
 #undef RS6000_BUILTIN_P
 #undef RS6000_BUILTIN_Q
 #undef RS6000_BUILTIN_S
@@ -2437,6 +2569,7 @@ enum rs6000_builtin_type_index
   RS6000_BTI_opaque_p_V2SI,
   RS6000_BTI_opaque_V4SI,
   RS6000_BTI_V16QI,
+  RS6000_BTI_V1TI,
   RS6000_BTI_V2SI,
   RS6000_BTI_V2SF,
   RS6000_BTI_V2DI,
@@ -2446,6 +2579,7 @@ enum rs6000_builtin_type_index
   RS6000_BTI_V4SF,
   RS6000_BTI_V8HI,
   RS6000_BTI_unsigned_V16QI,
+  RS6000_BTI_unsigned_V1TI,
   RS6000_BTI_unsigned_V8HI,
   RS6000_BTI_unsigned_V4SI,
   RS6000_BTI_unsigned_V2DI,
@@ -2471,8 +2605,13 @@ enum rs6000_builtin_type_index
   RS6000_BTI_UINTSI,		 /* unsigned_intSI_type_node */
   RS6000_BTI_INTDI,		 /* intDI_type_node */
   RS6000_BTI_UINTDI,		 /* unsigned_intDI_type_node */
+  RS6000_BTI_INTTI,		 /* intTI_type_node */
+  RS6000_BTI_UINTTI,		 /* unsigned_intTI_type_node */
   RS6000_BTI_float,	         /* float_type_node */
   RS6000_BTI_double,	         /* double_type_node */
+  RS6000_BTI_long_double,        /* long_double_type_node */
+  RS6000_BTI_dfloat64,		 /* dfloat64_type_node */
+  RS6000_BTI_dfloat128,		 /* dfloat128_type_node */
   RS6000_BTI_void,	         /* void_type_node */
   RS6000_BTI_MAX
 };
@@ -2483,6 +2622,7 @@ enum rs6000_builtin_type_index
 #define opaque_p_V2SI_type_node       (rs6000_builtin_types[RS6000_BTI_opaque_p_V2SI])
 #define opaque_V4SI_type_node         (rs6000_builtin_types[RS6000_BTI_opaque_V4SI])
 #define V16QI_type_node               (rs6000_builtin_types[RS6000_BTI_V16QI])
+#define V1TI_type_node                (rs6000_builtin_types[RS6000_BTI_V1TI])
 #define V2DI_type_node                (rs6000_builtin_types[RS6000_BTI_V2DI])
 #define V2DF_type_node                (rs6000_builtin_types[RS6000_BTI_V2DF])
 #define V2SI_type_node                (rs6000_builtin_types[RS6000_BTI_V2SI])
@@ -2492,6 +2632,7 @@ enum rs6000_builtin_type_index
 #define V4SF_type_node                (rs6000_builtin_types[RS6000_BTI_V4SF])
 #define V8HI_type_node                (rs6000_builtin_types[RS6000_BTI_V8HI])
 #define unsigned_V16QI_type_node      (rs6000_builtin_types[RS6000_BTI_unsigned_V16QI])
+#define unsigned_V1TI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V1TI])
 #define unsigned_V8HI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V8HI])
 #define unsigned_V4SI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V4SI])
 #define unsigned_V2DI_type_node       (rs6000_builtin_types[RS6000_BTI_unsigned_V2DI])
@@ -2518,8 +2659,13 @@ enum rs6000_builtin_type_index
 #define uintSI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTSI])
 #define intDI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTDI])
 #define uintDI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTDI])
+#define intTI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_INTTI])
+#define uintTI_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_UINTTI])
 #define float_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_float])
 #define double_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_double])
+#define long_double_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_long_double])
+#define dfloat64_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_dfloat64])
+#define dfloat128_type_internal_node	 (rs6000_builtin_types[RS6000_BTI_dfloat128])
 #define void_type_internal_node		 (rs6000_builtin_types[RS6000_BTI_void])
 
 extern GTY(()) tree rs6000_builtin_types[RS6000_BTI_MAX];
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000.md b/gcc-4.8/gcc/config/rs6000/rs6000.md
index ade39ea5f..8f1fd769b 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000.md
+++ b/gcc-4.8/gcc/config/rs6000/rs6000.md
@@ -25,10 +25,14 @@
 ;;
 
 (define_constants
-  [(STACK_POINTER_REGNUM	1)
+  [(FIRST_GPR_REGNO		0)
+   (STACK_POINTER_REGNUM	1)
    (TOC_REGNUM			2)
    (STATIC_CHAIN_REGNUM		11)
    (HARD_FRAME_POINTER_REGNUM	31)
+   (LAST_GPR_REGNO		31)
+   (FIRST_FPR_REGNO		32)
+   (LAST_FPR_REGNO		63)
    (LR_REGNO			65)
    (CTR_REGNO			66)
    (ARG_POINTER_REGNUM		67)
@@ -49,18 +53,9 @@
    (SPE_ACC_REGNO		111)
    (SPEFSCR_REGNO		112)
    (FRAME_POINTER_REGNUM	113)
-
-   ; ABI defined stack offsets for storing the TOC pointer with AIX calls.
-   (TOC_SAVE_OFFSET_32BIT	20)
-   (TOC_SAVE_OFFSET_64BIT	40)
-
-   ; Function TOC offset in the AIX function descriptor.
-   (AIX_FUNC_DESC_TOC_32BIT	4)
-   (AIX_FUNC_DESC_TOC_64BIT	8)
-
-   ; Static chain offset in the AIX function descriptor.
-   (AIX_FUNC_DESC_SC_32BIT	8)
-   (AIX_FUNC_DESC_SC_64BIT	16)
+   (TFHAR_REGNO			114)
+   (TFIAR_REGNO			115)
+   (TEXASR_REGNO		116)
   ])
 
 ;;
@@ -123,6 +118,22 @@
    UNSPEC_LFIWZX
    UNSPEC_FCTIWUZ
    UNSPEC_GRP_END_NOP
+   UNSPEC_P8V_FMRGOW
+   UNSPEC_P8V_MTVSRWZ
+   UNSPEC_P8V_RELOAD_FROM_GPR
+   UNSPEC_P8V_MTVSRD
+   UNSPEC_P8V_XXPERMDI
+   UNSPEC_P8V_RELOAD_FROM_VSX
+   UNSPEC_ADDG6S
+   UNSPEC_CDTBCD
+   UNSPEC_CBCDTD
+   UNSPEC_DIVE
+   UNSPEC_DIVEO
+   UNSPEC_DIVEU
+   UNSPEC_DIVEUO
+   UNSPEC_UNPACK_128BIT
+   UNSPEC_PACK_128BIT
+   UNSPEC_LSQ
   ])
 
 ;;
@@ -142,7 +153,7 @@
 
 ;; Define an insn type attribute.  This is used in function unit delay
 ;; computations.
-(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt"
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto,htm"
   (const_string "integer"))
 
 ;; Define floating point instruction sub-types for use with Xfpu.md
@@ -164,7 +175,7 @@
 ;; Processor type -- this attribute must exactly match the processor_type
 ;; enumeration in rs6000.h.
 
-(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan"
+(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan,power8"
   (const (symbol_ref "rs6000_cpu_attr")))
 
 
@@ -197,6 +208,7 @@
 (include "power5.md")
 (include "power6.md")
 (include "power7.md")
+(include "power8.md")
 (include "cell.md")
 (include "xfpu.md")
 (include "a2.md")
@@ -215,7 +227,7 @@
 (define_mode_iterator GPR [SI (DI "TARGET_POWERPC64")])
 
 ; Any supported integer mode.
-(define_mode_iterator INT [QI HI SI DI TI])
+(define_mode_iterator INT [QI HI SI DI TI PTI])
 
 ; Any supported integer mode that fits in one register.
 (define_mode_iterator INT1 [QI HI SI (DI "TARGET_POWERPC64")])
@@ -223,6 +235,12 @@
 ; extend modes for DImode
 (define_mode_iterator QHSI [QI HI SI])
 
+; QImode or HImode for small atomic ops
+(define_mode_iterator QHI [QI HI])
+
+; HImode or SImode for sign extended fusion ops
+(define_mode_iterator HSI [HI SI])
+
 ; SImode or DImode, even if DImode doesn't fit in GPRs.
 (define_mode_iterator SDI [SI DI])
 
@@ -230,6 +248,10 @@
 ; (one with a '.') will compare; and the size used for arithmetic carries.
 (define_mode_iterator P [(SI "TARGET_32BIT") (DI "TARGET_64BIT")])
 
+; Iterator to add PTImode along with TImode (TImode can go in VSX registers,
+; PTImode is GPR only)
+(define_mode_iterator TI2 [TI PTI])
+
 ; Any hardware-supported floating-point mode
 (define_mode_iterator FP [
   (SF "TARGET_HARD_FLOAT 
@@ -253,6 +275,50 @@
   (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
   ])
 
+; Floating point move iterators to combine binary and decimal moves
+(define_mode_iterator FMOVE32 [SF SD])
+(define_mode_iterator FMOVE64 [DF DD])
+(define_mode_iterator FMOVE64X [DI DF DD])
+(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
+				(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+
+; Iterators for 128 bit types for direct move
+(define_mode_iterator FMOVE128_GPR [(TI    "TARGET_VSX_TIMODE")
+				    (V16QI "")
+				    (V8HI  "")
+				    (V4SI  "")
+				    (V4SF  "")
+				    (V2DI  "")
+				    (V2DF  "")
+				    (V1TI  "")])
+
+; Whether a floating point move is ok, don't allow SD without hardware FP
+(define_mode_attr fmove_ok [(SF "")
+			    (DF "")
+			    (SD "TARGET_HARD_FLOAT && TARGET_FPRS")
+			    (DD "")])
+
+; Convert REAL_VALUE to the appropriate bits
+(define_mode_attr real_value_to_target [(SF "REAL_VALUE_TO_TARGET_SINGLE")
+					(DF "REAL_VALUE_TO_TARGET_DOUBLE")
+					(SD "REAL_VALUE_TO_TARGET_DECIMAL32")
+					(DD "REAL_VALUE_TO_TARGET_DECIMAL64")])
+
+; Definitions for load to 32-bit fpr register
+(define_mode_attr f32_lr [(SF "f")		 (SD "wz")])
+(define_mode_attr f32_lm [(SF "m")		 (SD "Z")])
+(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_lv [(SF "lxsspx %x0,%y1")	 (SD "lxsiwzx %x0,%y1")])
+
+; Definitions for store from 32-bit fpr register
+(define_mode_attr f32_sr [(SF "f")		  (SD "wx")])
+(define_mode_attr f32_sm [(SF "m")		  (SD "Z")])
+(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0")  (SD "stxsiwzx %x1,%y0")])
+
+; Definitions for 32-bit fpr direct move
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
+
 ; These modes do not fit in integer registers in 32-bit mode.
 ; but on e500v2, the gpr are 64 bit registers
 (define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
@@ -263,6 +329,25 @@
 ; Iterator for just SF/DF
 (define_mode_iterator SFDF [SF DF])
 
+; SF/DF suffix for traditional floating instructions
+(define_mode_attr Ftrad		[(SF "s") (DF "")])
+
+; SF/DF suffix for VSX instructions
+(define_mode_attr Fvsx		[(SF "sp") (DF	"dp")])
+
+; SF/DF constraint for arithmetic on traditional floating point registers
+(define_mode_attr Ff		[(SF "f") (DF "d")])
+
+; SF/DF constraint for arithmetic on VSX registers
+(define_mode_attr Fv		[(SF "wy") (DF "ws")])
+
+; s/d suffix for things like fp_addsub_s/fp_addsub_d
+(define_mode_attr Fs		[(SF "s")  (DF "d")])
+
+; FRE/FRES support
+(define_mode_attr Ffre		[(SF "fres") (DF "fre")])
+(define_mode_attr FFRE		[(SF "FRES") (DF "FRE")])
+
 ; Conditional returns.
 (define_code_iterator any_return [return simple_return])
 (define_code_attr return_pred [(return "direct_return ()")
@@ -271,7 +356,14 @@
 
 ; Various instructions that come in SI and DI forms.
 ; A generic w/d attribute, for things like cmpw/cmpd.
-(define_mode_attr wd [(QI "b") (HI "h") (SI "w") (DI "d")])
+(define_mode_attr wd [(QI    "b")
+		      (HI    "h")
+		      (SI    "w")
+		      (DI    "d")
+		      (V16QI "b")
+		      (V8HI  "h")
+		      (V4SI  "w")
+		      (V2DI  "d")])
 
 ; DImode bits
 (define_mode_attr dbits [(QI "56") (HI "48") (SI "32")])
@@ -297,6 +389,8 @@
 
 (define_mode_attr rreg [(SF   "f")
 			(DF   "ws")
+			(TF   "f")
+			(TD   "f")
 			(V4SF "wf")
 			(V2DF "wd")])
 
@@ -312,6 +406,87 @@
 (define_mode_attr TARGET_FLOAT [(SF "TARGET_SINGLE_FLOAT")
 				(DF "TARGET_DOUBLE_FLOAT")])
 
+;; Mode iterator for logical operations on 128-bit types
+(define_mode_iterator BOOL_128		[TI
+					 PTI
+					 (V16QI	"TARGET_ALTIVEC")
+					 (V8HI	"TARGET_ALTIVEC")
+					 (V4SI	"TARGET_ALTIVEC")
+					 (V4SF	"TARGET_ALTIVEC")
+					 (V2DI	"TARGET_ALTIVEC")
+					 (V2DF	"TARGET_ALTIVEC")
+					 (V1TI  "TARGET_ALTIVEC")])
+
+;; For the GPRs we use 3 constraints for register outputs, two that are the
+;; same as the output register, and a third where the output register is an
+;; early clobber, so we don't have to deal with register overlaps.  For the
+;; vector types, we prefer to use the vector registers.  For TI mode, allow
+;; either.
+
+;; Mode attribute for boolean operation register constraints for output
+(define_mode_attr BOOL_REGS_OUTPUT	[(TI	"&r,r,r,wa,v")
+					 (PTI	"&r,r,r")
+					 (V16QI	"wa,v,&?r,?r,?r")
+					 (V8HI	"wa,v,&?r,?r,?r")
+					 (V4SI	"wa,v,&?r,?r,?r")
+					 (V4SF	"wa,v,&?r,?r,?r")
+					 (V2DI	"wa,v,&?r,?r,?r")
+					 (V2DF	"wa,v,&?r,?r,?r")
+					 (V1TI	"wa,v,&?r,?r,?r")])
+
+;; Mode attribute for boolean operation register constraints for operand1
+(define_mode_attr BOOL_REGS_OP1		[(TI	"r,0,r,wa,v")
+					 (PTI	"r,0,r")
+					 (V16QI	"wa,v,r,0,r")
+					 (V8HI	"wa,v,r,0,r")
+					 (V4SI	"wa,v,r,0,r")
+					 (V4SF	"wa,v,r,0,r")
+					 (V2DI	"wa,v,r,0,r")
+					 (V2DF	"wa,v,r,0,r")
+					 (V1TI	"wa,v,r,0,r")])
+
+;; Mode attribute for boolean operation register constraints for operand2
+(define_mode_attr BOOL_REGS_OP2		[(TI	"r,r,0,wa,v")
+					 (PTI	"r,r,0")
+					 (V16QI	"wa,v,r,r,0")
+					 (V8HI	"wa,v,r,r,0")
+					 (V4SI	"wa,v,r,r,0")
+					 (V4SF	"wa,v,r,r,0")
+					 (V2DI	"wa,v,r,r,0")
+					 (V2DF	"wa,v,r,r,0")
+					 (V1TI	"wa,v,r,r,0")])
+
+;; Mode attribute for boolean operation register constraints for operand1
+;; for one_cmpl.  To simplify things, we repeat the constraint where 0
+;; is used for operand1 or operand2
+(define_mode_attr BOOL_REGS_UNARY	[(TI	"r,0,0,wa,v")
+					 (PTI	"r,0,0")
+					 (V16QI	"wa,v,r,0,0")
+					 (V8HI	"wa,v,r,0,0")
+					 (V4SI	"wa,v,r,0,0")
+					 (V4SF	"wa,v,r,0,0")
+					 (V2DI	"wa,v,r,0,0")
+					 (V2DF	"wa,v,r,0,0")
+					 (V1TI	"wa,v,r,0,0")])
+
+;; Mode attribute for the clobber of CC0 for AND expansion.
+;; For the 128-bit types, we never do AND immediate, but we need to
+;; get the correct number of X's for the number of operands.
+(define_mode_attr BOOL_REGS_AND_CR0	[(TI	"X,X,X,X,X")
+					 (PTI	"X,X,X")
+					 (V16QI	"X,X,X,X,X")
+					 (V8HI	"X,X,X,X,X")
+					 (V4SI	"X,X,X,X,X")
+					 (V4SF	"X,X,X,X,X")
+					 (V2DI	"X,X,X,X,X")
+					 (V2DF	"X,X,X,X,X")
+					 (V1TI	"X,X,X,X,X")])
+
+;; Mode attribute to give the correct type for integer divides
+(define_mode_attr idiv_ldiv [(SI "idiv")
+			     (DI "ldiv")])
+
+
 ;; Start with fixed-point load and store insns.  Here we put only the more
 ;; complex forms.  Basic data transfer is done later.
 
@@ -324,11 +499,19 @@
 (define_insn "*zero_extend<mode>di2_internal1"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
   "@
    l<wd>z%U1%X1 %0,%1
    rldicl %0,%1,0,<dbits>"
-  [(set_attr "type" "load,*")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
 
 (define_insn "*zero_extend<mode>di2_internal2"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
@@ -382,6 +565,29 @@
 		    (const_int 0)))]
   "")
 
+(define_insn "*zero_extendsidi2_lfiwzx"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wu")
+	(zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWZX"
+  "@
+   lwz%U1%X1 %0,%1
+   rldicl %0,%1,0,32
+   mtvsrwz %x0,%1
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
 (define_insn "extendqidi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 	(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
@@ -454,7 +660,15 @@
   "@
    lha%U1%X1 %0,%1
    extsh %0,%1"
-  [(set_attr "type" "load_ext,exts")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")])])
 
 (define_insn ""
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
@@ -521,16 +735,47 @@
   "TARGET_POWERPC64"
   "")
 
-(define_insn ""
+(define_insn "*extendsidi2_lfiwax"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu")
+	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+  "TARGET_POWERPC64 && TARGET_LFIWAX"
+  "@
+   lwa%U1%X1 %0,%1
+   extsw %0,%1
+   mtvsrwa %x0,%1
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")
+       (const_string "mffgpr")
+       (const_string "fpload")
+       (const_string "fpload")])])
+
+(define_insn "*extendsidi2_nocell"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
-  "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+  "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
   "@
    lwa%U1%X1 %0,%1
    extsw %0,%1"
-  [(set_attr "type" "load_ext,exts")])
-
-(define_insn ""
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")])])
+
+(define_insn "*extendsidi2_nocell"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 	(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
   "TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
@@ -602,7 +847,15 @@
   "@
    lbz%U1%X1 %0,%1
    rlwinm %0,%1,0,0xff"
-  [(set_attr "type" "load,*")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
 
 (define_insn ""
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
@@ -722,7 +975,15 @@
   "@
    lbz%U1%X1 %0,%1
    rlwinm %0,%1,0,0xff"
-  [(set_attr "type" "load,*")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
 
 (define_insn ""
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
@@ -848,7 +1109,15 @@
   "@
    lhz%U1%X1 %0,%1
    rlwinm %0,%1,0,0xffff"
-  [(set_attr "type" "load,*")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")])])
 
 (define_insn ""
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
@@ -915,7 +1184,15 @@
   "@
    lha%U1%X1 %0,%1
    extsh %0,%1"
-  [(set_attr "type" "load_ext,exts")])
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ext_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_ext_u")
+	   (const_string "load_ext")))
+       (const_string "exts")])])
 
 (define_insn ""
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
@@ -1658,7 +1935,19 @@
     FAIL;
 })
 
-(define_insn "one_cmpl<mode>2"
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:SDI 0 "gpc_reg_operand" "")
+	(not:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))]
+  ""
+{
+  if (<MODE>mode == DImode && !TARGET_POWERPC64)
+    {
+      rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
+      DONE;
+    }
+})
+
+(define_insn "*one_cmpl<mode>2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
   ""
@@ -1935,7 +2224,9 @@
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 	(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
   "TARGET_CMPB && TARGET_POPCNTB"
-  "prty<wd> %0,%1")
+  "prty<wd> %0,%1"
+  [(set_attr "length" "4")
+   (set_attr "type" "popcnt")])
 
 (define_expand "parity<mode>2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "")
@@ -2412,7 +2703,7 @@
 			     (match_operand:SI 2 "gpc_reg_operand" "r,r"))
 		    (const_int 0)))
    (clobber (match_scratch:SI 3 "=r,r"))]
-  ""
+  "TARGET_32BIT"
   "@
    mullw. %3,%1,%2
    #"
@@ -2425,7 +2716,7 @@
 			     (match_operand:SI 2 "gpc_reg_operand" ""))
 		    (const_int 0)))
    (clobber (match_scratch:SI 3 ""))]
-  "reload_completed"
+  "TARGET_32BIT && reload_completed"
   [(set (match_dup 3)
 	(mult:SI (match_dup 1) (match_dup 2)))
    (set (match_dup 0)
@@ -2440,7 +2731,7 @@
 		    (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
 	(mult:SI (match_dup 1) (match_dup 2)))]
-  ""
+  "TARGET_32BIT"
   "@
    mullw. %0,%1,%2
    #"
@@ -2454,7 +2745,7 @@
 		    (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "")
 	(mult:SI (match_dup 1) (match_dup 2)))]
-  "reload_completed"
+  "TARGET_32BIT && reload_completed"
   [(set (match_dup 0)
 	(mult:SI (match_dup 1) (match_dup 2)))
    (set (match_dup 3)
@@ -2469,10 +2760,7 @@
 		  (match_operand:GPR 2 "gpc_reg_operand" "r")))]
   ""
   "div<wd>u %0,%1,%2"
-   [(set (attr "type")
-      (cond [(match_operand:SI 0 "" "")
-		(const_string "idiv")]
-	(const_string "ldiv")))])
+   [(set_attr "type" "<idiv_ldiv>")])
 
 
 ;; For powers of two we can do srai/aze for divide and then adjust for
@@ -2496,10 +2784,7 @@
 		 (match_operand:GPR 2 "gpc_reg_operand" "r")))]
   ""
   "div<wd> %0,%1,%2"
-  [(set (attr "type")
-     (cond [(match_operand:SI 0 "" "")
-		(const_string "idiv")]
-	(const_string "ldiv")))])
+  [(set_attr "type" "<idiv_ldiv>")])
 
 (define_expand "mod<mode>3"
   [(use (match_operand:GPR 0 "gpc_reg_operand" ""))
@@ -3698,20 +3983,33 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotlsi3_internal7"
+(define_insn "*rotlsi3_internal7le"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
 	(zero_extend:SI
 	 (subreg:QI
 	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
 		     (match_operand:SI 2 "reg_or_cint_operand" "ri")) 0)))]
-  ""
+  "!BYTES_BIG_ENDIAN"
   "rlw%I2nm %0,%1,%h2,0xff"
   [(set (attr "cell_micro")
      (if_then_else (match_operand:SI 2 "const_int_operand" "")
 	(const_string "not")
 	(const_string "always")))])
 
-(define_insn "*rotlsi3_internal8"
+(define_insn "*rotlsi3_internal7be"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:QI
+	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "ri")) 3)))]
+  "BYTES_BIG_ENDIAN"
+  "rlw%I2nm %0,%1,%h2,0xff"
+  [(set (attr "cell_micro")
+     (if_then_else (match_operand:SI 2 "const_int_operand" "")
+	(const_string "not")
+	(const_string "always")))])
+
+(define_insn "*rotlsi3_internal8le"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:SI
 		     (subreg:QI
@@ -3719,7 +4017,24 @@
 				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:SI 3 "=r,r,r,r"))]
-  ""
+  "!BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %3,%1,%2,0xff
+   rlwinm. %3,%1,%h2,0xff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotlsi3_internal8be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 3))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  "BYTES_BIG_ENDIAN"
   "@
    rlwnm. %3,%1,%2,0xff
    rlwinm. %3,%1,%h2,0xff
@@ -3736,7 +4051,7 @@
 				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:SI 3 ""))]
-  "reload_completed"
+  "!BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 3)
 	(zero_extend:SI (subreg:QI
 		      (rotate:SI (match_dup 1)
@@ -3746,7 +4061,25 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotlsi3_internal9"
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 3))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:QI
+		      (rotate:SI (match_dup 1)
+				 (match_dup 2)) 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal9le"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:SI
 		     (subreg:QI
@@ -3755,7 +4088,25 @@
 		    (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
 	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
-  ""
+  "!BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %0,%1,%2,0xff
+   rlwinm. %0,%1,%h2,0xff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotlsi3_internal9be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 3))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 3)))]
+  "BYTES_BIG_ENDIAN"
   "@
    rlwnm. %0,%1,%2,0xff
    rlwinm. %0,%1,%h2,0xff
@@ -3773,7 +4124,7 @@
 		    (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "")
 	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
-  "reload_completed"
+  "!BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 0)
 	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 0)))
    (set (match_dup 3)
@@ -3781,20 +4132,48 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotlsi3_internal10"
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:QI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 3))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 3)))]
+  "BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:QI (rotate:SI (match_dup 1) (match_dup 2)) 3)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal10le"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:SI
 	 (subreg:HI
 	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
 		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")) 0)))]
-  ""
+  "!BYTES_BIG_ENDIAN"
   "@
    rlwnm %0,%1,%2,0xffff
    rlwinm %0,%1,%h2,0xffff"
   [(set_attr "type" "var_shift_rotate,integer")])
 
+(define_insn "*rotlsi3_internal10be"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI
+	 (subreg:HI
+	  (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:SI 2 "reg_or_cint_operand" "r,i")) 2)))]
+  "BYTES_BIG_ENDIAN"
+  "@
+   rlwnm %0,%1,%2,0xffff
+   rlwinm %0,%1,%h2,0xffff"
+  [(set_attr "type" "var_shift_rotate,integer")])
 
-(define_insn "*rotlsi3_internal11"
+(define_insn "*rotlsi3_internal11le"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:SI
 		     (subreg:HI
@@ -3802,7 +4181,24 @@
 				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:SI 3 "=r,r,r,r"))]
-  ""
+  "!BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %3,%1,%2,0xffff
+   rlwinm. %3,%1,%h2,0xffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotlsi3_internal11be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 2))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r,r,r"))]
+  "BYTES_BIG_ENDIAN"
   "@
    rlwnm. %3,%1,%2,0xffff
    rlwinm. %3,%1,%h2,0xffff
@@ -3819,7 +4215,7 @@
 				 (match_operand:SI 2 "reg_or_cint_operand" "")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:SI 3 ""))]
-  "reload_completed"
+  "!BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 3)
 	(zero_extend:SI (subreg:HI
 		      (rotate:SI (match_dup 1)
@@ -3829,7 +4225,25 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotlsi3_internal12"
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 2))
+		    (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:HI
+		      (rotate:SI (match_dup 1)
+				 (match_dup 2)) 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotlsi3_internal12le"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:SI
 		     (subreg:HI
@@ -3838,7 +4252,25 @@
 		    (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
 	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
-  ""
+  "!BYTES_BIG_ENDIAN"
+  "@
+   rlwnm. %0,%1,%2,0xffff
+   rlwinm. %0,%1,%h2,0xffff
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotlsi3_internal12be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:SI 2 "reg_or_cint_operand" "r,i,r,i")) 2))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 2)))]
+  "BYTES_BIG_ENDIAN"
   "@
    rlwnm. %0,%1,%2,0xffff
    rlwinm. %0,%1,%h2,0xffff
@@ -3856,7 +4288,7 @@
 		    (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "")
 	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))]
-  "reload_completed"
+  "!BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 0)
 	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 0)))
    (set (match_dup 3)
@@ -3864,6 +4296,23 @@
 		    (const_int 0)))]
   "")
 
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:SI
+		     (subreg:HI
+		      (rotate:SI (match_operand:SI 1 "gpc_reg_operand" "")
+				 (match_operand:SI 2 "reg_or_cint_operand" "")) 2))
+		    (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 2)))]
+  "BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:HI (rotate:SI (match_dup 1) (match_dup 2)) 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
 (define_insn "ashlsi3"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
 	(ashift:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
@@ -4054,7 +4503,7 @@
    #
    #
    #"
-  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
    (set_attr "length" "4,4,4,8,8,8")])
 
 (define_split
@@ -4086,7 +4535,7 @@
    #
    #
    #"
-  [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+  [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
    (set_attr "length" "4,4,4,8,8,8")])
 
 (define_split
@@ -4177,16 +4626,25 @@
 		    (const_int 0)))]
   "")
 
-(define_insn ""
+(define_insn "*lshiftrt_internal1le"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
 	(zero_extend:SI
 	 (subreg:QI
 	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
 		       (match_operand:SI 2 "const_int_operand" "i")) 0)))]
-  "includes_rshift_p (operands[2], GEN_INT (255))"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
   "rlwinm %0,%1,%s2,0xff")
 
-(define_insn ""
+(define_insn "*lshiftrt_internal1be"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	(zero_extend:SI
+	 (subreg:QI
+	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "const_int_operand" "i")) 3)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
+  "rlwinm %0,%1,%s2,0xff")
+
+(define_insn "*lshiftrt_internal2le"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
 	(compare:CC
 	 (zero_extend:SI
@@ -4195,7 +4653,23 @@
 			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
 	 (const_int 0)))
    (clobber (match_scratch:SI 3 "=r,r"))]
-  "includes_rshift_p (operands[2], GEN_INT (255))"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
+  "@
+   rlwinm. %3,%1,%s2,0xff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*lshiftrt_internal2be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 3))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
   "@
    rlwinm. %3,%1,%s2,0xff
    #"
@@ -4211,7 +4685,7 @@
 			(match_operand:SI 2 "const_int_operand" "")) 0))
 	 (const_int 0)))
    (clobber (match_scratch:SI 3 ""))]
-  "includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
   [(set (match_dup 3)
 	(zero_extend:SI (subreg:QI
 	   (lshiftrt:SI (match_dup 1)
@@ -4221,7 +4695,26 @@
 		    (const_int 0)))]
   "")
 
-(define_insn ""
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 3))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:QI
+	   (lshiftrt:SI (match_dup 1)
+			(match_dup 2)) 3)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*lshiftrt_internal3le"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
 	(compare:CC
 	 (zero_extend:SI
@@ -4231,7 +4724,24 @@
 	 (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
-  "includes_rshift_p (operands[2], GEN_INT (255))"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
+  "@
+   rlwinm. %0,%1,%s2,0xff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*lshiftrt_internal3be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 3))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 3)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255))"
   "@
    rlwinm. %0,%1,%s2,0xff
    #"
@@ -4248,7 +4758,7 @@
 	 (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "")
 	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
-  "includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
   [(set (match_dup 0)
 	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))
    (set (match_dup 3)
@@ -4256,16 +4766,43 @@
 		    (const_int 0)))]
   "")
 
-(define_insn ""
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:QI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 3))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 3)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (255)) && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:QI (lshiftrt:SI (match_dup 1) (match_dup 2)) 3)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*lshiftrt_internal4le"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
 	(zero_extend:SI
 	 (subreg:HI
 	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
 		       (match_operand:SI 2 "const_int_operand" "i")) 0)))]
-  "includes_rshift_p (operands[2], GEN_INT (65535))"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
   "rlwinm %0,%1,%s2,0xffff")
 
-(define_insn ""
+(define_insn "*lshiftrt_internal4be"
+  [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
+	 (zero_extend:SI
+	  (subreg:HI
+	  (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r")
+		       (match_operand:SI 2 "const_int_operand" "i")) 2)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
+  "rlwinm %0,%1,%s2,0xffff")
+
+(define_insn "*lshiftrt_internal5le"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
 	(compare:CC
 	 (zero_extend:SI
@@ -4274,7 +4811,23 @@
 			(match_operand:SI 2 "const_int_operand" "i,i")) 0))
 	 (const_int 0)))
    (clobber (match_scratch:SI 3 "=r,r"))]
-  "includes_rshift_p (operands[2], GEN_INT (65535))"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
+  "@
+   rlwinm. %3,%1,%s2,0xffff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*lshiftrt_internal5be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 2))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 "=r,r"))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
   "@
    rlwinm. %3,%1,%s2,0xffff
    #"
@@ -4290,7 +4843,7 @@
 			(match_operand:SI 2 "const_int_operand" "")) 0))
 	 (const_int 0)))
    (clobber (match_scratch:SI 3 ""))]
-  "includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
   [(set (match_dup 3)
 	(zero_extend:SI (subreg:HI
 	   (lshiftrt:SI (match_dup 1)
@@ -4300,7 +4853,26 @@
 		    (const_int 0)))]
   "")
 
-(define_insn ""
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 2))
+	 (const_int 0)))
+   (clobber (match_scratch:SI 3 ""))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:SI (subreg:HI
+	   (lshiftrt:SI (match_dup 1)
+			(match_dup 2)) 2)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*lshiftrt_internal5le"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
 	(compare:CC
 	 (zero_extend:SI
@@ -4310,7 +4882,24 @@
 	 (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
-  "includes_rshift_p (operands[2], GEN_INT (65535))"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
+  "@
+   rlwinm. %0,%1,%s2,0xffff
+   #"
+  [(set_attr "type" "delayed_compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*lshiftrt_internal5be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
+			(match_operand:SI 2 "const_int_operand" "i,i")) 2))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 2)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535))"
   "@
    rlwinm. %0,%1,%s2,0xffff
    #"
@@ -4327,7 +4916,7 @@
 	 (const_int 0)))
    (set (match_operand:SI 0 "gpc_reg_operand" "")
 	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))]
-  "includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  "!BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
   [(set (match_dup 0)
 	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 0)))
    (set (match_dup 3)
@@ -4335,6 +4924,24 @@
 		    (const_int 0)))]
   "")
 
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC
+	 (zero_extend:SI
+	  (subreg:HI
+	   (lshiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")) 2))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "gpc_reg_operand" "")
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 2)))]
+  "BYTES_BIG_ENDIAN && includes_rshift_p (operands[2], GEN_INT (65535)) && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:SI (subreg:HI (lshiftrt:SI (match_dup 1) (match_dup 2)) 2)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
 (define_insn "ashrsi3"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r")
 	(ashiftrt:SI (match_operand:SI 1 "gpc_reg_operand" "r,r")
@@ -4455,224 +5062,226 @@
 		    (const_int 0)))]
   "")
 
-;; Floating-point insns, excluding normal data motion.
-;;
-;; PowerPC has a full set of single-precision floating point instructions.
-;;
-;; For the POWER architecture, we pretend that we have both SFmode and
-;; DFmode insns, while, in fact, all fp insns are actually done in double.
-;; The only conversions we will do will be when storing to memory.  In that
-;; case, we will use the "frsp" instruction before storing.
-;;
-;; Note that when we store into a single-precision memory location, we need to
-;; use the frsp insn first.  If the register being stored isn't dead, we
-;; need a scratch register for the frsp.  But this is difficult when the store
-;; is done by reload.  It is not incorrect to do the frsp on the register in
-;; this case, we just lose precision that we would have otherwise gotten but
-;; is not guaranteed.  Perhaps this should be tightened up at some point.
-
-(define_expand "extendsfdf2"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn_and_split "*extendsfdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d")
-	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
-  "@
-   #
-   fmr %0,%1
-   lfs%U1%X1 %0,%1"
-  "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
-  [(const_int 0)]
-{
-  emit_note (NOTE_INSN_DELETED);
-  DONE;
-}
-  [(set_attr "type" "fp,fp,fpload")])
-
-(define_expand "truncdfsf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*truncdfsf2_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
-  "frsp %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "negsf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(neg:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
-  "")
 
-(define_insn "*negsf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(neg:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fneg %0,%1"
-  [(set_attr "type" "fp")])
+;; Floating-point insns, excluding normal data motion.  We combine the SF/DF
+;; modes here, and also add in conditional vsx/power8-vector support to access
+;; values in the traditional Altivec registers if the appropriate
+;; -mupper-regs-{df,sf} option is enabled.
 
-(define_expand "abssf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(abs:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+(define_expand "abs<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn "*abssf2"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(abs:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fabs %0,%1"
-  [(set_attr "type" "fp")])
+(define_insn "*abs<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fabs %0,%1
+   xsabsdp %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(neg:SF (abs:SF (match_operand:SF 1 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fnabs %0,%1"
-  [(set_attr "type" "fp")])
+(define_insn "*nabs<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(neg:SFDF
+	 (abs:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fnabs %0,%1
+   xsnabsdp %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_expand "addsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(plus:SF (match_operand:SF 1 "gpc_reg_operand" "")
-		 (match_operand:SF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+(define_expand "neg<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(plus:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
-		 (match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fadds %0,%1,%2"
+(define_insn "*neg<mode>2_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(neg:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fneg %0,%1
+   xsnegdp %x0,%x1"
   [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_s")])
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_expand "subsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(minus:SF (match_operand:SF 1 "gpc_reg_operand" "")
-		  (match_operand:SF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+(define_expand "add<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(minus:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-		  (match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fsubs %0,%1,%2"
+(define_insn "*add<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(plus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fadd<Ftrad> %0,%1,%2
+   xsadd<Fvsx> %x0,%x1,%x2"
   [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_s")])
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_expand "mulsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(mult:SF (match_operand:SF 1 "gpc_reg_operand" "")
-		 (match_operand:SF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT"
+(define_expand "sub<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		    (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
-		 (match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fmuls %0,%1,%2"
+(define_insn "*sub<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(minus:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		    (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fsub<Ftrad> %0,%1,%2
+   xssub<Fvsx> %x0,%x1,%x2"
   [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_mul_s")])
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_expand "divsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(div:SF (match_operand:SF 1 "gpc_reg_operand" "")
-		(match_operand:SF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
+(define_expand "mul<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(div:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-		(match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS
-   && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
-  "fdivs %0,%1,%2"
-  [(set_attr "type" "sdiv")])
-
-(define_insn "fres"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
-  "TARGET_FRES"
-  "fres %0,%1"
+(define_insn "*mul<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(mult:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fmul<Ftrad> %0,%1,%2
+   xsmul<Fvsx> %x0,%x1,%x2"
+  [(set_attr "type" "dmul")
+   (set_attr "fp_type" "fp_mul_<Fs>")])
+
+(define_expand "div<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")
+		  (match_operand:SFDF 2 "gpc_reg_operand" "")))]
+  "TARGET_<MODE>_INSN && !TARGET_SIMPLE_FPU"
+  "")
+
+(define_insn "*div<mode>3_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(div:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU"
+  "@
+   fdiv<Ftrad> %0,%1,%2
+   xsdiv<Fvsx> %x0,%x1,%x2"
+  [(set_attr "type" "<Fs>div")
+   (set_attr "fp_type" "fp_div_<Fs>")])
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(sqrt:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && !TARGET_SIMPLE_FPU
+   && (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))"
+  "@
+   fsqrt<Ftrad> %0,%1
+   xssqrt<Fvsx> %x0,%x1"
+  [(set_attr "type" "<Fs>sqrt")
+   (set_attr "fp_type" "fp_sqrt_<Fs>")])
+
+;; Floating point reciprocal approximation
+(define_insn "fre<Fs>"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_FRES))]
+  "TARGET_<FFRE>"
+  "@
+   fre<Ftrad> %0,%1
+   xsre<Fvsx> %x0,%x1"
   [(set_attr "type" "fp")])
 
-; builtin fmaf support
-(define_insn "*fmasf4_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-		(match_operand:SF 2 "gpc_reg_operand" "f")
-		(match_operand:SF 3 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fmadds %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_s")])
+(define_insn "*rsqrt<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
+		     UNSPEC_RSQRT))]
+  "RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)"
+  "@
+   frsqrte<Ftrad> %0,%1
+   xsrsqrte<Fvsx> %x0,%x1"
+  [(set_attr "type" "fp")])
 
-(define_insn "*fmssf4_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-		(match_operand:SF 2 "gpc_reg_operand" "f")
-		(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fmsubs %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_s")])
+;; Floating point comparisons
+(define_insn "*cmp<mode>_fpr"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,y")
+	(compare:CCFP (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		      (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fcmpu %0,%1,%2
+   xscmpudp %0,%x1,%x2"
+  [(set_attr "type" "fpcompare")])
 
-(define_insn "*nfmasf4_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-			(match_operand:SF 2 "gpc_reg_operand" "f")
-			(match_operand:SF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fnmadds %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_s")])
+;; Floating point conversions
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(float_extend:DF (match_operand:SF 1 "reg_or_none500mem_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
+  "")
 
-(define_insn "*nfmssf4_fpr"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
-			(match_operand:SF 2 "gpc_reg_operand" "f")
-			(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fnmsubs %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_s")])
+(define_insn_and_split "*extendsfdf2_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wv")
+	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "@
+   #
+   fmr %0,%1
+   lfs%U1%X1 %0,%1
+   #
+   xxlor %x0,%x1,%x1
+   lxsspx %x0,%y1"
+  "&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+{
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+}
+  [(set_attr_alternative "type"
+      [(const_string "fp")
+       (const_string "fp")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "vecsimple")
+       (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(if_then_else
+	 (match_test "update_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_u")
+	 (const_string "fpload")))])])
 
-(define_expand "sqrtsf2"
+(define_expand "truncdfsf2"
   [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
-  "(TARGET_PPC_GPOPT || TARGET_XILINX_FPU)
-   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
-   && !TARGET_SIMPLE_FPU"
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
   "")
 
-(define_insn ""
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "f")))]
-  "(TARGET_PPC_GPOPT || TARGET_XILINX_FPU) && TARGET_HARD_FLOAT
-   && TARGET_FPRS && TARGET_SINGLE_FLOAT && !TARGET_SIMPLE_FPU"
-  "fsqrts %0,%1"
-  [(set_attr "type" "ssqrt")])
-
-(define_insn "*rsqrtsf_internal1"
+(define_insn "*truncdfsf2_fpr"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
-		   UNSPEC_RSQRT))]
-  "TARGET_FRSQRTES"
-  "frsqrtes %0,%1"
+	(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "frsp %0,%1"
   [(set_attr "type" "fp")])
 
 ;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
@@ -4742,37 +5351,82 @@
 ;; Use an unspec rather providing an if-then-else in RTL, to prevent the
 ;; compiler from optimizing -0.0
 (define_insn "copysign<mode>3_fcpsgn"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")
-		      (match_operand:SFDF 2 "gpc_reg_operand" "<rreg2>")]
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")
+		      (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")]
 		     UNSPEC_COPYSIGN))]
-  "TARGET_CMPB && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "fcpsgn %0,%2,%1"
+  "TARGET_<MODE>_FPR && TARGET_CMPB"
+  "@
+   fcpsgn %0,%2,%1
+   xscpsgn<Fvsx> %x0,%x2,%x1"
   [(set_attr "type" "fp")])
 
 ;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a
 ;; fsel instruction and some auxiliary computations.  Then we just have a
 ;; single DEFINE_INSN for fsel and the define_splits to make them if made by
 ;; combine.
-(define_expand "smaxsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
-			     (match_operand:SF 2 "gpc_reg_operand" ""))
+;; For MIN, MAX on non-VSX machines, and conditional move all of the time, we
+;; use DEFINE_EXPAND's that involve a fsel instruction and some auxiliary
+;; computations.  Then we just have a single DEFINE_INSN for fsel and the
+;; define_splits to make them if made by combine.  On VSX machines we have the
+;; min/max instructions.
+;;
+;; On VSX, we only check for TARGET_VSX instead of checking for a vsx/p8 vector
+;; to allow either DF/SF to use only traditional registers.
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
+			       (match_operand:SFDF 2 "gpc_reg_operand" ""))
 			 (match_dup 1)
 			 (match_dup 2)))]
-  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS 
-   && TARGET_SINGLE_FLOAT && !flag_trapping_math"
-  "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
+  "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
+{
+  rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]);
+  DONE;
+})
 
-(define_expand "sminsf3"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(if_then_else:SF (ge (match_operand:SF 1 "gpc_reg_operand" "")
-			     (match_operand:SF 2 "gpc_reg_operand" ""))
+(define_insn "*smax<mode>3_vsx"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(smax:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && TARGET_VSX"
+  "xsmaxdp %x0,%x1,%x2"
+  [(set_attr "type" "fp")])
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(if_then_else:SFDF (ge (match_operand:SFDF 1 "gpc_reg_operand" "")
+			       (match_operand:SFDF 2 "gpc_reg_operand" ""))
 			 (match_dup 2)
 			 (match_dup 1)))]
-  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS 
-   && TARGET_SINGLE_FLOAT && !flag_trapping_math"
-  "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
+  "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math"
+{
+  rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "*smin<mode>3_vsx"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(smin:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>")
+		   (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_<MODE>_FPR && TARGET_VSX"
+  "xsmindp %x0,%x1,%x2"
+  [(set_attr "type" "fp")])
+
+(define_split
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
+	(match_operator:SFDF 3 "min_max_operator"
+	 [(match_operand:SFDF 1 "gpc_reg_operand" "")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "")]))]
+  "TARGET_<MODE>_FPR && TARGET_PPC_GFXOPT && !flag_trapping_math
+   && !TARGET_VSX"
+  [(const_int 0)]
+{
+  rs6000_emit_minmax (operands[0], GET_CODE (operands[3]), operands[1],
+		      operands[2]);
+  DONE;
+})
 
 (define_split
   [(set (match_operand:SF 0 "gpc_reg_operand" "")
@@ -4904,208 +5558,9 @@
   "fsel %0,%1,%2,%3"
   [(set_attr "type" "fp")])
 
-(define_expand "negdf2"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(neg:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*negdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(neg:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fneg %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "absdf2"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(abs:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*absdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(abs:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fabs %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_insn "*nabsdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(neg:DF (abs:DF (match_operand:DF 1 "gpc_reg_operand" "d"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fnabs %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "adddf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(plus:DF (match_operand:DF 1 "gpc_reg_operand" "")
-		 (match_operand:DF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*adddf3_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(plus:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
-		 (match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fadd %0,%1,%2"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_d")])
-
-(define_expand "subdf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(minus:DF (match_operand:DF 1 "gpc_reg_operand" "")
-		  (match_operand:DF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*subdf3_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(minus:DF (match_operand:DF 1 "gpc_reg_operand" "d")
-		  (match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fsub %0,%1,%2"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_d")])
-
-(define_expand "muldf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(mult:DF (match_operand:DF 1 "gpc_reg_operand" "")
-		 (match_operand:DF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)"
-  "")
-
-(define_insn "*muldf3_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
-		 (match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fmul %0,%1,%2"
-  [(set_attr "type" "dmul")
-   (set_attr "fp_type" "fp_mul_d")])
-
-(define_expand "divdf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(div:DF (match_operand:DF 1 "gpc_reg_operand" "")
-		(match_operand:DF 2 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT
-   && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)
-   && !TARGET_SIMPLE_FPU"
-  "")
-
-(define_insn "*divdf3_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(div:DF (match_operand:DF 1 "gpc_reg_operand" "d")
-		(match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && !TARGET_SIMPLE_FPU
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fdiv %0,%1,%2"
-  [(set_attr "type" "ddiv")])
-
-(define_insn "*fred_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
-  "TARGET_FRE && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fre %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_insn "*rsqrtdf_internal1"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")]
-		   UNSPEC_RSQRT))]
-  "TARGET_FRSQRTE && !VECTOR_UNIT_VSX_P (DFmode)"
-  "frsqrte %0,%1"
-  [(set_attr "type" "fp")])
-
-; builtin fma support
-(define_insn "*fmadf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-		(match_operand:DF 2 "gpc_reg_operand" "f")
-		(match_operand:DF 3 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_NONE_P (DFmode)"
-  "fmadd %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*fmsdf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-		(match_operand:DF 2 "gpc_reg_operand" "f")
-		(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_NONE_P (DFmode)"
-  "fmsub %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*nfmadf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-			(match_operand:DF 2 "gpc_reg_operand" "f")
-			(match_operand:DF 3 "gpc_reg_operand" "f"))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_NONE_P (DFmode)"
-  "fnmadd %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_insn "*nfmsdf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-			(match_operand:DF 2 "gpc_reg_operand" "f")
-			(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_NONE_P (DFmode)"
-  "fnmsub %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
-(define_expand "sqrtdf2"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "")))]
-  "TARGET_PPC_GPOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
-  "")
-
-(define_insn "*sqrtdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(sqrt:DF (match_operand:DF 1 "gpc_reg_operand" "d")))]
-  "TARGET_PPC_GPOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fsqrt %0,%1"
-  [(set_attr "type" "dsqrt")])
-
 ;; The conditional move instructions allow us to perform max and min
 ;; operations even when
 
-(define_expand "smaxdf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
-			     (match_operand:DF 2 "gpc_reg_operand" ""))
-			 (match_dup 1)
-			 (match_dup 2)))]
-  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
-   && !flag_trapping_math"
-  "{ rs6000_emit_minmax (operands[0], SMAX, operands[1], operands[2]); DONE;}")
-
-(define_expand "smindf3"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(if_then_else:DF (ge (match_operand:DF 1 "gpc_reg_operand" "")
-			     (match_operand:DF 2 "gpc_reg_operand" ""))
-			 (match_dup 2)
-			 (match_dup 1)))]
-  "TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
-   && !flag_trapping_math"
-  "{ rs6000_emit_minmax (operands[0], SMIN, operands[1], operands[2]); DONE;}")
-
 (define_split
   [(set (match_operand:DF 0 "gpc_reg_operand" "")
 	(match_operator:DF 3 "min_max_operator"
@@ -5159,12 +5614,15 @@
 ; We don't define lfiwax/lfiwzx with the normal definition, because we
 ; don't want to support putting SImode in FPR registers.
 (define_insn "lfiwax"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
 		   UNSPEC_LFIWAX))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
-  "lfiwax %0,%y1"
-  [(set_attr "type" "fpload")])
+  "@
+   lfiwax %0,%y1
+   lxsiwax %x0,%y1
+   mtvsrwa %x0,%1"
+  [(set_attr "type" "fpload,fpload,mffgpr")])
 
 ; This split must be run before register allocation because it allocates the
 ; memory slot that is needed to move values to/from the FPR.  We don't allocate
@@ -5186,7 +5644,8 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     tmp = convert_to_mode (DImode, src, false);
   else
     {
@@ -5235,12 +5694,15 @@
    (set_attr "type" "fpload")])
 
 (define_insn "lfiwzx"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+	(unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
 		   UNSPEC_LFIWZX))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
-  "lfiwzx %0,%y1"
-  [(set_attr "type" "fpload")])
+  "@
+   lfiwzx %0,%y1
+   lxsiwzx %x0,%y1
+   mtvsrwz %x0,%1"
+  [(set_attr "type" "fpload,fpload,mftgpr")])
 
 (define_insn_and_split "floatunssi<mode>2_lfiwzx"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
@@ -5257,7 +5719,8 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+  if (!MEM_P (src) && TARGET_POWERPC64
+      && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     tmp = convert_to_mode (DImode, src, true);
   else
     {
@@ -5548,7 +6011,7 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     {
       dest = gen_lowpart (DImode, dest);
       emit_move_insn (dest, tmp);
@@ -5642,7 +6105,7 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_MFPGPR && TARGET_POWERPC64)
+  else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
     {
       dest = gen_lowpart (DImode, dest);
       emit_move_insn (dest, tmp);
@@ -5781,66 +6244,52 @@
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
 	(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
 		   UNSPEC_FCTID))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
   "fctid %0,%1"
   [(set_attr "type" "fp")])
 
-(define_expand "btrunc<mode>2"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
-		     UNSPEC_FRIZ))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
-  "")
-
-(define_insn "*btrunc<mode>2_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+(define_insn "btrunc<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
 		     UNSPEC_FRIZ))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
-   && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "friz %0,%1"
-  [(set_attr "type" "fp")])
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "@
+   friz %0,%1
+   xsrdpiz %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_expand "ceil<mode>2"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
+(define_insn "ceil<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
 		     UNSPEC_FRIP))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
-  "")
-
-(define_insn "*ceil<mode>2_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
-		     UNSPEC_FRIP))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
-   && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "frip %0,%1"
-  [(set_attr "type" "fp")])
-
-(define_expand "floor<mode>2"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "")]
-		     UNSPEC_FRIM))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
-  "")
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "@
+   frip %0,%1
+   xsrdpip %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
-(define_insn "*floor<mode>2_fpr"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
-	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
+(define_insn "floor<mode>2"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>")
+	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")]
 		     UNSPEC_FRIM))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>
-   && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "frim %0,%1"
-  [(set_attr "type" "fp")])
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
+  "@
+   frim %0,%1
+   xsrdpim %x0,%x1"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
 ;; No VSX equivalent to frin
 (define_insn "round<mode>2"
   [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<rreg2>")
 	(unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>")]
 		     UNSPEC_FRIN))]
-  "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT>"
+  "TARGET_<MODE>_FPR && TARGET_FPRND"
   "frin %0,%1"
-  [(set_attr "type" "fp")])
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_addsub_<Fs>")])
 
 ; An UNSPEC is used so we don't have to support SImode in FP registers.
 (define_insn "stfiwx"
@@ -6084,6 +6533,49 @@
   [(set_attr "length" "8")
    (set_attr "type" "fpload")])
 
+;; Define the TImode operations that can be done in a small number
+;; of instructions.  The & constraints are to prevent the register
+;; allocator from allocating registers that overlap with the inputs
+;; (for example, having an input in 7,8 and an output in 6,7).  We
+;; also allow for the output being the same as one of the inputs.
+
+(define_insn "addti3"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,&r,r,r")
+	(plus:TI (match_operand:TI 1 "gpc_reg_operand" "%r,r,0,0")
+		 (match_operand:TI 2 "reg_or_short_operand" "r,I,r,I")))]
+  "TARGET_64BIT"
+{
+  if (WORDS_BIG_ENDIAN)
+    return (GET_CODE (operands[2])) != CONST_INT
+	    ? \"addc %L0,%L1,%L2\;adde %0,%1,%2\"
+	    : \"addic %L0,%L1,%2\;add%G2e %0,%1\";
+  else
+    return (GET_CODE (operands[2])) != CONST_INT
+	    ? \"addc %0,%1,%2\;adde %L0,%L1,%L2\"
+	    : \"addic %0,%1,%2\;add%G2e %L0,%L1\";
+}
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+(define_insn "subti3"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r,&r,r,r,r")
+	(minus:TI (match_operand:TI 1 "reg_or_short_operand" "r,I,0,r,I")
+		  (match_operand:TI 2 "gpc_reg_operand" "r,r,r,0,0")))]
+  "TARGET_64BIT"
+{
+  if (WORDS_BIG_ENDIAN)
+    return (GET_CODE (operands[1]) != CONST_INT)
+	    ? \"subfc %L0,%L2,%L1\;subfe %0,%2,%1\"
+	    : \"subfic %L0,%L2,%1\;subf%G1e %0,%2\";
+  else
+    return (GET_CODE (operands[1]) != CONST_INT)
+	    ? \"subfc %0,%2,%1\;subfe %L0,%L2,%L1\"
+	    : \"subfic %0,%2,%1\;subf%G1e %L0,%L2\";
+}
+  [(set_attr "type" "two")
+   (set_attr "length" "8")])
+
+
 ;; Define the DImode operations that can be done in a small number
 ;; of instructions.  The & constraints are to prevent the register
 ;; allocator from allocating registers that overlap with the inputs
@@ -6260,11 +6752,11 @@
   [(set_attr "type" "two,three")
    (set_attr "length" "8,12")])
 
-(define_insn "*ashrdisi3_noppc64"
+(define_insn "*ashrdisi3_noppc64be"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
         (subreg:SI (ashiftrt:DI (match_operand:DI 1 "gpc_reg_operand" "r")
                                 (const_int 32)) 4))]
-  "TARGET_32BIT && !TARGET_POWERPC64"
+  "TARGET_32BIT && !TARGET_POWERPC64 && WORDS_BIG_ENDIAN"
   "*
 {
   if (REGNO (operands[0]) == REGNO (operands[1]))
@@ -6551,19 +7043,31 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotldi3_internal7"
+(define_insn "*rotldi3_internal7le"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:DI
 	 (subreg:QI
 	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
 		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl %0,%1,%2,56
+   rldicl %0,%1,%H2,56"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal7be"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:QI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 7)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN"
   "@
    rldcl %0,%1,%2,56
    rldicl %0,%1,%H2,56"
   [(set_attr "type" "var_shift_rotate,integer")])
 
-(define_insn "*rotldi3_internal8"
+(define_insn "*rotldi3_internal8le"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:DI
 		     (subreg:QI
@@ -6571,7 +7075,24 @@
 				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:DI 3 "=r,r,r,r"))]
-  "TARGET_64BIT"
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %3,%1,%2,56
+   rldicl. %3,%1,%H2,56
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal8be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 7))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
   "@
    rldcl. %3,%1,%2,56
    rldicl. %3,%1,%H2,56
@@ -6588,7 +7109,7 @@
 				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:DI 3 ""))]
-  "TARGET_POWERPC64 && reload_completed"
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 3)
 	(zero_extend:DI (subreg:QI
 		      (rotate:DI (match_dup 1)
@@ -6598,7 +7119,25 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotldi3_internal9"
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 7))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:QI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 7)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal9le"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:DI
 		     (subreg:QI
@@ -6607,7 +7146,25 @@
 		    (const_int 0)))
    (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
 	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
-  "TARGET_64BIT"
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %0,%1,%2,56
+   rldicl. %0,%1,%H2,56
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal9be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 7))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 7)))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
   "@
    rldcl. %0,%1,%2,56
    rldicl. %0,%1,%H2,56
@@ -6625,7 +7182,7 @@
 		    (const_int 0)))
    (set (match_operand:DI 0 "gpc_reg_operand" "")
 	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
-  "TARGET_POWERPC64 && reload_completed"
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 0)
 	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 0)))
    (set (match_dup 3)
@@ -6633,19 +7190,48 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotldi3_internal10"
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:QI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 7))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 7)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:QI (rotate:DI (match_dup 1) (match_dup 2)) 7)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal10le"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:DI
 	 (subreg:HI
 	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
 		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN"
   "@
    rldcl %0,%1,%2,48
    rldicl %0,%1,%H2,48"
   [(set_attr "type" "var_shift_rotate,integer")])
 
-(define_insn "*rotldi3_internal11"
+(define_insn "*rotldi3_internal10be"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:HI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 6)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN"
+  "@
+   rldcl %0,%1,%2,48
+   rldicl %0,%1,%H2,48"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal11le"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:DI
 		     (subreg:HI
@@ -6653,7 +7239,24 @@
 				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:DI 3 "=r,r,r,r"))]
-  "TARGET_64BIT"
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %3,%1,%2,48
+   rldicl. %3,%1,%H2,48
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal11be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 6))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
   "@
    rldcl. %3,%1,%2,48
    rldicl. %3,%1,%H2,48
@@ -6670,7 +7273,7 @@
 				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:DI 3 ""))]
-  "TARGET_POWERPC64 && reload_completed"
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 3)
 	(zero_extend:DI (subreg:HI
 		      (rotate:DI (match_dup 1)
@@ -6680,7 +7283,25 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotldi3_internal12"
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 6))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:HI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 6)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal12le"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:DI
 		     (subreg:HI
@@ -6689,7 +7310,25 @@
 		    (const_int 0)))
    (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
 	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
-  "TARGET_64BIT"
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %0,%1,%2,48
+   rldicl. %0,%1,%H2,48
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal12be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 6))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 6)))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
   "@
    rldcl. %0,%1,%2,48
    rldicl. %0,%1,%H2,48
@@ -6707,7 +7346,7 @@
 		    (const_int 0)))
    (set (match_operand:DI 0 "gpc_reg_operand" "")
 	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
-  "TARGET_POWERPC64 && reload_completed"
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 0)
 	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 0)))
    (set (match_dup 3)
@@ -6715,19 +7354,48 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotldi3_internal13"
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:HI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 6))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 6)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:HI (rotate:DI (match_dup 1) (match_dup 2)) 6)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal13le"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(zero_extend:DI
 	 (subreg:SI
 	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
 		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 0)))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl %0,%1,%2,32
+   rldicl %0,%1,%H2,32"
+  [(set_attr "type" "var_shift_rotate,integer")])
+
+(define_insn "*rotldi3_internal13be"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(zero_extend:DI
+	 (subreg:SI
+	  (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r")
+		     (match_operand:DI 2 "reg_or_cint_operand" "r,i")) 4)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN"
   "@
    rldcl %0,%1,%2,32
    rldicl %0,%1,%H2,32"
   [(set_attr "type" "var_shift_rotate,integer")])
 
-(define_insn "*rotldi3_internal14"
+(define_insn "*rotldi3_internal14le"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:DI
 		     (subreg:SI
@@ -6735,7 +7403,24 @@
 				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:DI 3 "=r,r,r,r"))]
-  "TARGET_64BIT"
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %3,%1,%2,32
+   rldicl. %3,%1,%H2,32
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal14be"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 4))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 "=r,r,r,r"))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
   "@
    rldcl. %3,%1,%2,32
    rldicl. %3,%1,%H2,32
@@ -6752,7 +7437,7 @@
 				 (match_operand:DI 2 "reg_or_cint_operand" "")) 0))
 		    (const_int 0)))
    (clobber (match_scratch:DI 3 ""))]
-  "TARGET_POWERPC64 && reload_completed"
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 3)
 	(zero_extend:DI (subreg:SI
 		      (rotate:DI (match_dup 1)
@@ -6762,7 +7447,25 @@
 		    (const_int 0)))]
   "")
 
-(define_insn "*rotldi3_internal15"
+(define_split
+  [(set (match_operand:CC 0 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 4))
+		    (const_int 0)))
+   (clobber (match_scratch:DI 3 ""))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 3)
+	(zero_extend:DI (subreg:SI
+		      (rotate:DI (match_dup 1)
+				 (match_dup 2)) 4)))
+   (set (match_dup 0)
+	(compare:CC (match_dup 3)
+		    (const_int 0)))]
+  "")
+
+(define_insn "*rotldi3_internal15le"
   [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
 	(compare:CC (zero_extend:DI
 		     (subreg:SI
@@ -6771,7 +7474,25 @@
 		    (const_int 0)))
    (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
 	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
-  "TARGET_64BIT"
+  "TARGET_64BIT && !BYTES_BIG_ENDIAN"
+  "@
+   rldcl. %0,%1,%2,32
+   rldicl. %0,%1,%H2,32
+   #
+   #"
+  [(set_attr "type" "var_delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+   (set_attr "length" "4,4,8,8")])
+
+(define_insn "*rotldi3_internal15be"
+  [(set (match_operand:CC 3 "cc_reg_operand" "=x,x,?y,?y")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "r,r,r,r")
+				 (match_operand:DI 2 "reg_or_cint_operand" "r,i,r,i")) 4))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r")
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 4)))]
+  "TARGET_64BIT && BYTES_BIG_ENDIAN"
   "@
    rldcl. %0,%1,%2,32
    rldicl. %0,%1,%H2,32
@@ -6789,7 +7510,7 @@
 		    (const_int 0)))
    (set (match_operand:DI 0 "gpc_reg_operand" "")
 	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))]
-  "TARGET_POWERPC64 && reload_completed"
+  "TARGET_POWERPC64 && !BYTES_BIG_ENDIAN && reload_completed"
   [(set (match_dup 0)
 	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 0)))
    (set (match_dup 3)
@@ -6797,6 +7518,23 @@
 		    (const_int 0)))]
   "")
 
+(define_split
+  [(set (match_operand:CC 3 "cc_reg_not_micro_cr0_operand" "")
+	(compare:CC (zero_extend:DI
+		     (subreg:SI
+		      (rotate:DI (match_operand:DI 1 "gpc_reg_operand" "")
+				 (match_operand:DI 2 "reg_or_cint_operand" "")) 4))
+		    (const_int 0)))
+   (set (match_operand:DI 0 "gpc_reg_operand" "")
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 4)))]
+  "TARGET_POWERPC64 && BYTES_BIG_ENDIAN && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (subreg:SI (rotate:DI (match_dup 1) (match_dup 2)) 4)))
+   (set (match_dup 3)
+	(compare:CC (match_dup 0)
+		    (const_int 0)))]
+  "")
+
 (define_expand "ashldi3"
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(ashift:DI (match_operand:DI 1 "gpc_reg_operand" "")
@@ -7195,10 +7933,19 @@
   [(parallel
     [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	  (and:DI (match_operand:DI 1 "gpc_reg_operand" "")
-		  (match_operand:DI 2 "and64_2_operand" "")))
+		  (match_operand:DI 2 "reg_or_cint_operand" "")))
      (clobber (match_scratch:CC 3 ""))])]
-  "TARGET_POWERPC64"
-  "")
+  ""
+{
+  if (!TARGET_POWERPC64)
+    {
+      rtx cc = gen_rtx_SCRATCH (CCmode);
+      rs6000_split_logical (operands, AND, false, false, false, cc);
+      DONE;
+    }
+  else if (!and64_2_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+})
 
 (define_insn "anddi3_mc"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r,r,r,r")
@@ -7379,11 +8126,17 @@
 (define_expand "iordi3"
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(ior:DI (match_operand:DI 1 "gpc_reg_operand" "")
-		(match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
-  "TARGET_POWERPC64"
-  "
+		(match_operand:DI 2 "reg_or_cint_operand" "")))]
+  ""
 {
-  if (non_logical_cint_operand (operands[2], DImode))
+  if (!TARGET_POWERPC64)
+    {
+      rs6000_split_logical (operands, IOR, false, false, false, NULL_RTX);
+      DONE;
+    }
+  else if (!reg_or_logical_cint_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
+  else if (non_logical_cint_operand (operands[2], DImode))
     {
       HOST_WIDE_INT value;
       rtx tmp = ((!can_create_pseudo_p ()
@@ -7408,15 +8161,21 @@
       emit_insn (gen_iordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
       DONE;
     }
-}")
+})
 
 (define_expand "xordi3"
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(xor:DI (match_operand:DI 1 "gpc_reg_operand" "")
-		(match_operand:DI 2 "reg_or_logical_cint_operand" "")))]
-  "TARGET_POWERPC64"
-  "
+		(match_operand:DI 2 "reg_or_cint_operand" "")))]
+  ""
 {
+  if (!TARGET_POWERPC64)
+    {
+      rs6000_split_logical (operands, XOR, false, false, false, NULL_RTX);
+      DONE;
+    }
+  else if (!reg_or_logical_cint_operand (operands[2], DImode))
+    operands[2] = force_reg (DImode, operands[2]);
   if (non_logical_cint_operand (operands[2], DImode))
     {
       HOST_WIDE_INT value;
@@ -7442,7 +8201,7 @@
       emit_insn (gen_xordi3 (operands[0], tmp, GEN_INT (value & 0xffff)));
       DONE;
     }
-}")
+})
 
 (define_insn "*booldi3_internal1"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r")
@@ -7678,6 +8437,384 @@
 	(compare:CC (match_dup 0)
 		    (const_int 0)))]
   "")
+
+;; Eqv operation.
+(define_insn "*eqv<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+	(not:GPR
+	 (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+		  (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
+  ""
+  "eqv %0,%1,%2"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
+
+;; 128-bit logical operations expanders
+
+(define_expand "and<mode>3"
+  [(parallel [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+		   (and:BOOL_128
+		    (match_operand:BOOL_128 1 "vlogical_operand" "")
+		    (match_operand:BOOL_128 2 "vlogical_operand" "")))
+	      (clobber (match_scratch:CC 3 ""))])]
+  ""
+  "")
+
+(define_expand "ior<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
+		      (match_operand:BOOL_128 2 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_expand "xor<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
+		      (match_operand:BOOL_128 2 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  ""
+  "")
+
+(define_expand "nor<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(and:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
+  ""
+  "")
+
+(define_expand "andc<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+        (and:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
+	 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  ""
+  "")
+
+;; Power8 vector logical instructions.
+(define_expand "eqv<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(not:BOOL_128
+	 (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "")
+		       (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
+  "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
+  "")
+
+;; Rewrite nand into canonical form
+(define_expand "nand<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(ior:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" ""))
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))))]
+  "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
+  "")
+
+;; The canonical form is to have the negated element first, so we need to
+;; reverse arguments.
+(define_expand "orc<mode>3"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "")
+	(ior:BOOL_128
+	 (not:BOOL_128 (match_operand:BOOL_128 2 "vlogical_operand" ""))
+	 (match_operand:BOOL_128 1 "vlogical_operand" "")))]
+  "<MODE>mode == TImode || <MODE>mode == PTImode || TARGET_P8_VECTOR"
+  "")
+
+;; 128-bit logical operations insns and split operations
+(define_insn_and_split "*and<mode>3_internal"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+        (and:BOOL_128
+	 (match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
+	 (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")))
+   (clobber (match_scratch:CC 3 "<BOOL_REGS_AND_CR0>"))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxland %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "vand %0,%1,%2";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, AND, false, false, false, operands[3]);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+;; 128-bit IOR/XOR
+(define_insn_and_split "*bool<mode>3_internal"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(match_operator:BOOL_128 3 "boolean_or_operator"
+	 [(match_operand:BOOL_128 1 "vlogical_operand" "%<BOOL_REGS_OP1>")
+	  (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxl%q3 %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "v%q3 %0,%1,%2";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, false,
+			NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+;; 128-bit ANDC/ORC
+(define_insn_and_split "*boolc<mode>3_internal1"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(match_operator:BOOL_128 3 "boolean_operator"
+	 [(not:BOOL_128
+	   (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP1>"))
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
+  "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxl%q3 %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "v%q3 %0,%1,%2";
+
+  return "#";
+}
+  "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
+   && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+			NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+(define_insn_and_split "*boolc<mode>3_internal2"
+  [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
+	(match_operator:TI2 3 "boolean_operator"
+	 [(not:TI2
+	   (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
+	  (match_operand:TI2 2 "int_reg_operand" "r,r,0")]))]
+  "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  "#"
+  "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+			NULL_RTX);
+  DONE;
+}
+  [(set_attr "type" "integer")
+   (set (attr "length")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16")))])
+
+;; 128-bit NAND/NOR
+(define_insn_and_split "*boolcc<mode>3_internal1"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(match_operator:BOOL_128 3 "boolean_operator"
+	 [(not:BOOL_128
+	   (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>"))
+	  (not:BOOL_128
+	   (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))]))]
+  "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxl%q3 %x0,%x1,%x2";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "v%q3 %0,%1,%2";
+
+  return "#";
+}
+  "(TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND))
+   && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
+			NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+(define_insn_and_split "*boolcc<mode>3_internal2"
+  [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
+	(match_operator:TI2 3 "boolean_operator"
+	 [(not:TI2
+	   (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
+	  (not:TI2
+	   (match_operand:TI2 2 "int_reg_operand" "r,r,0"))]))]
+  "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  "#"
+  "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, true,
+			NULL_RTX);
+  DONE;
+}
+  [(set_attr "type" "integer")
+   (set (attr "length")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16")))])
+
+
+;; 128-bit EQV
+(define_insn_and_split "*eqv<mode>3_internal1"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(not:BOOL_128
+	 (xor:BOOL_128
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>")
+	  (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))))]
+  "TARGET_P8_VECTOR"
+{
+  if (vsx_register_operand (operands[0], <MODE>mode))
+    return "xxleqv %x0,%x1,%x2";
+
+  return "#";
+}
+  "TARGET_P8_VECTOR && reload_completed
+   && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
+(define_insn_and_split "*eqv<mode>3_internal2"
+  [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
+	(not:TI2
+	 (xor:TI2
+	  (match_operand:TI2 1 "int_reg_operand" "r,0,r")
+	  (match_operand:TI2 2 "int_reg_operand" "r,r,0"))))]
+  "!TARGET_P8_VECTOR"
+  "#"
+  "reload_completed && !TARGET_P8_VECTOR"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, XOR, true, false, false, NULL_RTX);
+  DONE;
+}
+  [(set_attr "type" "integer")
+   (set (attr "length")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16")))])
+
+;; 128-bit one's complement
+(define_insn_and_split "*one_cmpl<mode>3_internal"
+  [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
+	(not:BOOL_128
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))]
+  ""
+{
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
+    return "xxlnor %x0,%x1,%x1";
+
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "vnor %0,%1,%1";
+
+  return "#";
+}
+  "reload_completed && int_reg_operand (operands[0], <MODE>mode)"
+  [(const_int 0)]
+{
+  rs6000_split_logical (operands, NOT, false, false, false, NULL_RTX);
+  DONE;
+}
+  [(set (attr "type")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "vecsimple")
+	(const_string "integer")))
+   (set (attr "length")
+      (if_then_else
+	(match_test "vsx_register_operand (operands[0], <MODE>mode)")
+	(const_string "4")
+	(if_then_else
+	 (match_test "TARGET_POWERPC64")
+	 (const_string "8")
+	 (const_string "16"))))])
+
 
 ;; Now define ways of moving data around.
 
@@ -7765,7 +8902,31 @@
    mt%0 %1
    mt%0 %1
    nop"
-  [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*")
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")
+       (const_string "*")])
+
    (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4")])
 
 (define_insn "*movsi_internal1_single"
@@ -7787,7 +8948,44 @@
    nop
    stfs%U0%X0 %1,%0
    lfs%U1%X1 %0,%1"
-  [(set_attr "type" "*,*,load,store,*,*,*,mfjmpr,mtjmpr,*,*,*,*")
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))])
    (set_attr "length" "4,4,4,4,4,4,8,4,4,4,4,4,4")])
 
 ;; Split a load of a large constant into the appropriate two-insn
@@ -7822,7 +9020,7 @@
    cmp<wd>i %2,%0,0
    mr. %0,%1
    #"
-  [(set_attr "type" "cmp,compare,cmp")
+  [(set_attr "type" "cmp,fast_compare,cmp")
    (set_attr "length" "4,4,8")])
 
 (define_split
@@ -7850,7 +9048,26 @@
    mf%1 %0
    mt%0 %1
    nop"
-  [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")])
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")])])
 
 (define_expand "mov<mode>"
   [(set (match_operand:INT 0 "general_operand" "")
@@ -7871,7 +9088,26 @@
    mf%1 %0
    mt%0 %1
    nop"
-  [(set_attr "type" "*,load,store,*,mfjmpr,mtjmpr,*")])
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")])])
 
 ;; Here is how to move condition codes around.  When we store CC data in
 ;; an integer register or memory, we store just the high-order 4 bits.
@@ -7899,7 +9135,7 @@
    mf%1 %0
    mt%0 %1
    lwz%U1%X1 %0,%1
-   stw%U0%U1 %1,%0"
+   stw%U0%X0 %1,%0"
   [(set (attr "type")
      (cond [(eq_attr "alternative" "0,3")
 		(const_string "cr_logical")
@@ -7912,9 +9148,23 @@
 	    (eq_attr "alternative" "9")
 		(const_string "mtjmpr")
 	    (eq_attr "alternative" "10")
-		(const_string "load")
+		(if_then_else
+		  (match_test "update_indexed_address_mem (operands[1],
+							   VOIDmode)")
+		  (const_string "load_ux")
+		  (if_then_else
+		    (match_test "update_address_mem (operands[1], VOIDmode)")
+		    (const_string "load_u")
+		    (const_string "load")))
 	    (eq_attr "alternative" "11")
-		(const_string "store")
+		(if_then_else
+		  (match_test "update_indexed_address_mem (operands[0],
+							   VOIDmode)")
+		  (const_string "store_ux")
+		  (if_then_else
+		    (match_test "update_address_mem (operands[0], VOIDmode)")
+		    (const_string "store_u")
+		    (const_string "store")))
 	    (match_test "TARGET_MFCRF")
 		(const_string "mfcrf")
 	   ]
@@ -7926,15 +9176,17 @@
 ;; can produce floating-point values in fixed-point registers.  Unless the
 ;; value is a simple constant or already in memory, we deal with this by
 ;; allocating memory and copying the value explicitly via that memory location.
-(define_expand "movsf"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "")
-	(match_operand:SF 1 "any_operand" ""))]
-  ""
-  "{ rs6000_emit_move (operands[0], operands[1], SFmode); DONE; }")
+
+;; Move 32-bit binary/decimal floating point
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "")
+	(match_operand:FMOVE32 1 "any_operand" ""))]
+  "<fmove_ok>"
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
 
 (define_split
-  [(set (match_operand:SF 0 "gpc_reg_operand" "")
-	(match_operand:SF 1 "const_double_operand" ""))]
+  [(set (match_operand:FMOVE32 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE32 1 "const_double_operand" ""))]
   "reload_completed
    && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
        || (GET_CODE (operands[0]) == SUBREG
@@ -7947,42 +9199,89 @@
   REAL_VALUE_TYPE rv;
 
   REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+  <real_value_to_target> (rv, l);
 
   if (! TARGET_POWERPC64)
-    operands[2] = operand_subword (operands[0], 0, 0, SFmode);
+    operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
   else
     operands[2] = gen_lowpart (SImode, operands[0]);
 
   operands[3] = gen_int_mode (l, SImode);
 }")
 
-(define_insn "*movsf_hardfloat"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=!r,!r,m,f,f,m,*c*l,!r,*h,!r,!r")
-	(match_operand:SF 1 "input_operand" "r,m,r,f,m,f,r,h,0,G,Fn"))]
-  "(gpc_reg_operand (operands[0], SFmode)
-   || gpc_reg_operand (operands[1], SFmode))
+(define_insn "mov<mode>_hardfloat"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wu,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wu,r,<f32_dm>,r,h,0,G,Fn"))]
+  "(gpc_reg_operand (operands[0], <MODE>mode)
+   || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
   "@
    mr %0,%1
    lwz%U1%X1 %0,%1
    stw%U0%X0 %1,%0
    fmr %0,%1
-   lfs%U1%X1 %0,%1
-   stfs%U0%X0 %1,%0
+   xxlor %x0,%x1,%x1
+   xxlxor %x0,%x0,%x0
+   <f32_li>
+   <f32_si>
+   <f32_lv>
+   <f32_sv>
+   mtvsrwz %x0,%1
+   mfvsrwz %0,%x1
    mt%0 %1
    mf%1 %0
    nop
    #
    #"
-  [(set_attr "type" "*,load,store,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8")])
-
-(define_insn "*movsf_softfloat"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
-	(match_operand:SF 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
-  "(gpc_reg_operand (operands[0], SFmode)
-   || gpc_reg_operand (operands[1], SFmode))
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "fp")
+       (const_string "vecsimple")
+       (const_string "vecsimple")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (const_string "fpload")
+       (const_string "fpstore")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
+
+(define_insn "*mov<mode>_softfloat"
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
+	(match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
+  "(gpc_reg_operand (operands[0], <MODE>mode)
+   || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
   "@
    mr %0,%1
@@ -7995,19 +9294,42 @@
    #
    #
    nop"
-  [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*")
+  [(set_attr_alternative "type"
+      [(const_string "*")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")])
    (set_attr "length" "4,4,4,4,4,4,4,4,8,4")])
 
 
-(define_expand "movdf"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-	(match_operand:DF 1 "any_operand" ""))]
+;; Move 64-bit binary/decimal floating point
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "")
+	(match_operand:FMOVE64 1 "any_operand" ""))]
   ""
-  "{ rs6000_emit_move (operands[0], operands[1], DFmode); DONE; }")
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
 
 (define_split
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(match_operand:DF 1 "const_int_operand" ""))]
+  [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE64 1 "const_int_operand" ""))]
   "! TARGET_POWERPC64 && reload_completed
    && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
        || (GET_CODE (operands[0]) == SUBREG
@@ -8020,8 +9342,8 @@
   int endian = (WORDS_BIG_ENDIAN == 0);
   HOST_WIDE_INT value = INTVAL (operands[1]);
 
-  operands[2] = operand_subword (operands[0], endian, 0, DFmode);
-  operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
+  operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
 #if HOST_BITS_PER_WIDE_INT == 32
   operands[4] = (value & 0x80000000) ? constm1_rtx : const0_rtx;
 #else
@@ -8031,8 +9353,8 @@
 }")
 
 (define_split
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(match_operand:DF 1 "const_double_operand" ""))]
+  [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE64 1 "const_double_operand" ""))]
   "! TARGET_POWERPC64 && reload_completed
    && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
        || (GET_CODE (operands[0]) == SUBREG
@@ -8047,17 +9369,17 @@
   REAL_VALUE_TYPE rv;
 
   REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+  <real_value_to_target> (rv, l);
 
-  operands[2] = operand_subword (operands[0], endian, 0, DFmode);
-  operands[3] = operand_subword (operands[0], 1 - endian, 0, DFmode);
+  operands[2] = operand_subword (operands[0], endian, 0, <MODE>mode);
+  operands[3] = operand_subword (operands[0], 1 - endian, 0, <MODE>mode);
   operands[4] = gen_int_mode (l[endian], SImode);
   operands[5] = gen_int_mode (l[1 - endian], SImode);
 }")
 
 (define_split
-  [(set (match_operand:DF 0 "gpc_reg_operand" "")
-	(match_operand:DF 1 "const_double_operand" ""))]
+  [(set (match_operand:FMOVE64 0 "gpc_reg_operand" "")
+	(match_operand:FMOVE64 1 "const_double_operand" ""))]
   "TARGET_POWERPC64 && reload_completed
    && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
        || (GET_CODE (operands[0]) == SUBREG
@@ -8074,7 +9396,7 @@
 #endif
 
   REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
-  REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+  <real_value_to_target> (rv, l);
 
   operands[2] = gen_lowpart (DImode, operands[0]);
   /* HIGHPART is lower memory address when WORDS_BIG_ENDIAN.  */
@@ -8099,21 +9421,18 @@
 ;; since the D-form version of the memory instructions does not need a GPR for
 ;; reloading.
 
-(define_insn "*movdf_hardfloat32"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,!r,!r,!r")
-	(match_operand:DF 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,G,H,F"))]
+(define_insn "*mov<mode>_hardfloat32"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "@
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
    fmr %0,%1
    lxsd%U1x %x0,%y1
-   lxsd%U1x %x0,%y1
    stxsd%U0x %x1,%y0
-   stxsd%U0x %x1,%y0
-   xxlor %x0,%x1,%x1
    xxlor %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
    #
@@ -8122,115 +9441,141 @@
    #
    #
    #"
-  [(set_attr "type" "fpstore,fpload,fp,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,store,load,two,fp,fp,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8,8,8,8,12,16")])
-
-(define_insn "*movdf_softfloat32"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
-	(match_operand:DF 1 "input_operand" "r,Y,r,G,H,F"))]
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (const_string "fpload"))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (const_string "fpstore"))
+       (const_string "vecsimple")
+       (const_string "vecsimple")
+       (const_string "store")
+       (const_string "load")
+       (const_string "two")
+       (const_string "fp")
+       (const_string "fp")
+       (const_string "*")])
+   (set_attr "length" "4,4,4,4,4,4,4,8,8,8,8,12,16")])
+
+(define_insn "*mov<mode>_softfloat32"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,r,r,r")
+	(match_operand:FMOVE64 1 "input_operand" "r,Y,r,G,H,F"))]
   "! TARGET_POWERPC64 
    && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) 
-       || TARGET_SOFT_FLOAT || TARGET_E500_SINGLE)
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
+       || TARGET_SOFT_FLOAT || TARGET_E500_SINGLE
+       || (<MODE>mode == DDmode && TARGET_E500_DOUBLE))
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "#"
   [(set_attr "type" "store,load,two,*,*,*")
    (set_attr "length" "8,8,8,8,12,16")])
 
-;; Reload patterns to support gpr load/store with misaligned mem.
-;; and multiple gpr load/store at offset >= 0xfffc
-(define_expand "reload_<mode>_store"
-  [(parallel [(match_operand 0 "memory_operand" "=m")
-              (match_operand 1 "gpc_reg_operand" "r")
-              (match_operand:GPR 2 "register_operand" "=&b")])]
-  ""
-{
-  rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
-  DONE;
-})
-
-(define_expand "reload_<mode>_load"
-  [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
-              (match_operand 1 "memory_operand" "m")
-              (match_operand:GPR 2 "register_operand" "=b")])]
-  ""
-{
-  rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
-  DONE;
-})
-
-; ld/std require word-aligned displacements -> 'Y' constraint.
-; List Y->r and r->Y before r->r for reload.
-(define_insn "*movdf_hardfloat64_mfpgpr"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,!r,ws,?wa,ws,?wa,Z,?Z,m,d,d,wa,*c*l,!r,*h,!r,!r,!r,r,d")
-	(match_operand:DF 1 "input_operand" "r,Y,r,ws,?wa,Z,Z,ws,wa,d,m,d,j,r,h,0,G,H,F,d,r"))]
-  "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS 
-   && TARGET_DOUBLE_FLOAT
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
-  "@
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   xxlor %x0,%x1,%x1
-   xxlor %x0,%x1,%x1
-   lxsd%U1x %x0,%y1
-   lxsd%U1x %x0,%y1
-   stxsd%U0x %x1,%y0
-   stxsd%U0x %x1,%y0
-   stfd%U0%X0 %1,%0
-   lfd%U1%X1 %0,%1
-   fmr %0,%1
-   xxlxor %x0,%x0,%x0
-   mt%0 %1
-   mf%1 %0
-   nop
-   #
-   #
-   #
-   mftgpr %0,%1
-   mffgpr %0,%1"
-  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fpstore,fpload,fp,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
-
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
-(define_insn "*movdf_hardfloat64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,d,d,Y,r,!r,ws,?wa,Z,?Z,ws,?wa,wa,*c*l,!r,*h,!r,!r,!r")
-	(match_operand:DF 1 "input_operand" "d,m,d,r,Y,r,Z,Z,ws,wa,ws,wa,j,r,h,0,G,H,F"))]
-  "TARGET_POWERPC64 && !TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS 
-   && TARGET_DOUBLE_FLOAT
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
+(define_insn "*mov<mode>_hardfloat64"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
+  "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "@
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
    fmr %0,%1
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   lxsd%U1x %x0,%y1
    lxsd%U1x %x0,%y1
    stxsd%U0x %x1,%y0
-   stxsd%U0x %x1,%y0
-   xxlor %x0,%x1,%x1
    xxlor %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
+   std%U0%X0 %1,%0
+   ld%U1%X1 %0,%1
+   mr %0,%1
    mt%0 %1
    mf%1 %0
    nop
    #
    #
-   #"
-  [(set_attr "type" "fpstore,fpload,fp,store,load,*,fpload,fpload,fpstore,fpstore,vecsimple,vecsimple,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
-
-(define_insn "*movdf_softfloat64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
-	(match_operand:DF 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
+   #
+   mftgpr %0,%1
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (const_string "fpload"))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (const_string "fpstore"))
+       (const_string "vecsimple")
+       (const_string "vecsimple")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
+       (const_string "mffgpr")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
+
+(define_insn "*mov<mode>_softfloat64"
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
+	(match_operand:FMOVE64 1 "input_operand" "r,Y,r,r,h,G,H,F,0"))]
   "TARGET_POWERPC64 && (TARGET_SOFT_FLOAT || !TARGET_FPRS)
-   && (gpc_reg_operand (operands[0], DFmode)
-       || gpc_reg_operand (operands[1], DFmode))"
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "@
    std%U0%X0 %1,%0
    ld%U1%X1 %0,%1
@@ -8241,38 +9586,87 @@
    #
    #
    nop"
-  [(set_attr "type" "store,load,*,mtjmpr,mfjmpr,*,*,*,*")
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "mtjmpr")
+       (const_string "mfjmpr")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")])
    (set_attr "length" "4,4,4,4,4,8,12,16,4")])
 
-(define_expand "movtf"
-  [(set (match_operand:TF 0 "general_operand" "")
-	(match_operand:TF 1 "any_operand" ""))]
-  "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
-  "{ rs6000_emit_move (operands[0], operands[1], TFmode); DONE; }")
+(define_expand "mov<mode>"
+  [(set (match_operand:FMOVE128 0 "general_operand" "")
+	(match_operand:FMOVE128 1 "any_operand" ""))]
+  ""
+  "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
 
 ;; It's important to list Y->r and r->Y before r->r because otherwise
 ;; reload, given m->r, will try to pick r->r and reload it, which
 ;; doesn't make progress.
-(define_insn_and_split "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
-	(match_operand:TF 1 "input_operand" "d,m,d,r,YGHF,r"))]
-  "!TARGET_IEEEQUAD
-   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128
-   && (gpc_reg_operand (operands[0], TFmode)
-       || gpc_reg_operand (operands[1], TFmode))"
+
+;; We can't split little endian direct moves of TDmode, because the words are
+;; not swapped like they are for TImode or TFmode.  Subregs therefore are
+;; problematical.  Don't allow direct move for this case.
+
+(define_insn_and_split "*mov<mode>_64bit_dm"
+  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r,r,wm")
+	(match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r,wm,r"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64
+   && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+  [(set_attr "length" "8,8,8,12,12,8,8,8")])
+
+(define_insn_and_split "*movtd_64bit_nodm"
+  [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
+	(match_operand:TD 1 "input_operand" "d,m,d,r,YGHF,r"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 && !WORDS_BIG_ENDIAN
+   && (gpc_reg_operand (operands[0], TDmode)
+       || gpc_reg_operand (operands[1], TDmode))"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
+  [(set_attr "length" "8,8,8,12,12,8")])
+
+(define_insn_and_split "*mov<mode>_32bit"
+  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
+	(match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "#"
   "&& reload_completed"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
   [(set_attr "length" "8,8,8,20,20,16")])
 
-(define_insn_and_split "*movtf_softfloat"
-  [(set (match_operand:TF 0 "rs6000_nonimmediate_operand" "=Y,r,r")
-	(match_operand:TF 1 "input_operand"         "r,YGHF,r"))]
-  "!TARGET_IEEEQUAD
-   && (TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_LONG_DOUBLE_128
-   && (gpc_reg_operand (operands[0], TFmode)
-       || gpc_reg_operand (operands[1], TFmode))"
+(define_insn_and_split "*mov<mode>_softfloat"
+  [(set (match_operand:FMOVE128 0 "rs6000_nonimmediate_operand" "=Y,r,r")
+	(match_operand:FMOVE128 1 "input_operand" "r,YGHF,r"))]
+  "(TARGET_SOFT_FLOAT || !TARGET_FPRS)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "#"
   "&& reload_completed"
   [(pc)]
@@ -8557,6 +9951,252 @@
   operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
 }")
 
+;; Reload helper functions used by rs6000_secondary_reload.  The patterns all
+;; must have 3 arguments, and scratch register constraint must be a single
+;; constraint.
+
+;; Reload patterns to support gpr load/store with misaligned mem.
+;; and multiple gpr load/store at offset >= 0xfffc
+(define_expand "reload_<mode>_store"
+  [(parallel [(match_operand 0 "memory_operand" "=m")
+              (match_operand 1 "gpc_reg_operand" "r")
+              (match_operand:GPR 2 "register_operand" "=&b")])]
+  ""
+{
+  rs6000_secondary_reload_gpr (operands[1], operands[0], operands[2], true);
+  DONE;
+})
+
+(define_expand "reload_<mode>_load"
+  [(parallel [(match_operand 0 "gpc_reg_operand" "=r")
+              (match_operand 1 "memory_operand" "m")
+              (match_operand:GPR 2 "register_operand" "=b")])]
+  ""
+{
+  rs6000_secondary_reload_gpr (operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+
+;; Power8 merge instructions to allow direct move to/from floating point
+;; registers in 32-bit mode.  We use TF mode to get two registers to move the
+;; individual 32-bit parts across.  Subreg doesn't work too well on the TF
+;; value, since it is allocated in reload and not all of the flow information
+;; is setup for it.  We have two patterns to do the two moves between gprs and
+;; fprs.  There isn't a dependancy between the two, but we could potentially
+;; schedule other instructions between the two instructions.  TFmode is
+;; currently limited to traditional FPR registers.  If/when this is changed, we
+;; will need to revist %L to make sure it works with VSX registers, or add an
+;; %x version of %L.
+
+(define_insn "p8_fmrgow_<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
+	(unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
+			 UNSPEC_P8V_FMRGOW))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "fmrgow %0,%1,%L1"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "p8_mtvsrwz_1"
+  [(set (match_operand:TF 0 "register_operand" "=d")
+	(unspec:TF [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %x0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrwz_2"
+  [(set (match_operand:TF 0 "register_operand" "+d")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRWZ))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrwz %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
+  [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
+	(unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
+			 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=d"))]
+  "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (SImode, src);
+  rtx gpr_lo_reg = gen_lowpart (SImode, src);
+
+  emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
+(define_insn "p8_mtvsrd_1"
+  [(set (match_operand:TF 0 "register_operand" "=ws")
+	(unspec:TF [(match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_2"
+  [(set (match_operand:TF 0 "register_operand" "+ws")
+	(unspec:TF [(match_dup 0)
+		    (match_operand:DI 1 "register_operand" "r")]
+		   UNSPEC_P8V_MTVSRD))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrd %L0,%1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_xxpermdi_<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
+			     UNSPEC_P8V_XXPERMDI))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "xxpermdi %x0,%1,%L1,0"
+  [(set_attr "type" "vecperm")])
+
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
+	 UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:TF 2 "register_operand" "=ws"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DImode, src);
+  rtx gpr_lo_reg = gen_lowpart (DImode, src);
+
+  emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
+  emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
+  emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+(define_split
+  [(set (match_operand:FMOVE128_GPR 0 "nonimmediate_operand" "")
+	(match_operand:FMOVE128_GPR 1 "input_operand" ""))]
+  "reload_completed
+   && (int_reg_operand (operands[0], <MODE>mode)
+       || int_reg_operand (operands[1], <MODE>mode))"
+  [(pc)]
+{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
+
+;; Move SFmode to a VSX from a GPR register.  Because scalar floating point
+;; type is stored internally as double precision in the VSX registers, we have
+;; to convert it from the vector format.
+
+(define_insn_and_split "reload_vsx_from_gprsf"
+  [(set (match_operand:SF 0 "register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "r")]
+		   UNSPEC_P8V_RELOAD_FROM_GPR))
+   (clobber (match_operand:DI 2 "register_operand" "=r"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
+  rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
+
+  /* Move SF value to upper 32-bits for xscvspdpn.  */
+  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+  emit_move_insn (op0_di, op2);
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "two")])
+
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
+;; and then doing a move of that.
+(define_insn "p8_mfvsrd_3_<mode>"
+  [(set (match_operand:DF 0 "register_operand" "=r")
+	(unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
+  [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
+	(unspec:FMOVE128_GPR
+	 [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+	 UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx gpr_hi_reg = gen_highpart (DFmode, dest);
+  rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
+
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
+  emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
+  emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+;; Move SFmode to a GPR from a VSX register.  Because scalar floating point
+;; type is stored internally as double precision, we have to convert it to the
+;; vector format.
+
+(define_insn_and_split "reload_gpr_from_vsxsf"
+  [(set (match_operand:SF 0 "register_operand" "=r")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))
+   (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
+
+  emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
+  emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
+  emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
+  DONE;
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "three")])
+
+(define_insn "p8_mfvsrd_4_disf"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
+		   UNSPEC_P8V_RELOAD_FROM_VSX))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mfvsrd %0,%x1"
+  [(set_attr "type" "mftgpr")])
+
+
 ;; Next come the multi-word integer load and store and the load and store
 ;; multiple insns.
 
@@ -8565,8 +10205,8 @@
 ;; Use of fprs is disparaged slightly otherwise reload prefers to reload
 ;; a gpr into a fpr instead of reloading an invalid 'Y' address
 (define_insn "*movdi_internal32"
-  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r,?wa")
-	(match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF,O"))]
+  [(set (match_operand:DI 0 "rs6000_nonimmediate_operand" "=Y,r,r,?m,?*d,?*d,r")
+	(match_operand:DI 1 "input_operand" "r,Y,r,d,m,d,IJKnGHF"))]
   "! TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
@@ -8577,15 +10217,34 @@
    stfd%U0%X0 %1,%0
    lfd%U1%X1 %0,%1
    fmr %0,%1
-   #
-   xxlxor %x0,%x0,%x0"
-  [(set_attr "type" "store,load,*,fpstore,fpload,fp,*,vecsimple")])
+   #"
+  [(set_attr_alternative "type"
+      [(const_string "store")
+       (const_string "load")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "*")])])
 
 (define_split
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(match_operand:DI 1 "const_int_operand" ""))]
   "! TARGET_POWERPC64 && reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 1))]
   "
@@ -8607,38 +10266,15 @@
   [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
         (match_operand:DIFD 1 "input_operand" ""))]
   "reload_completed && !TARGET_POWERPC64
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
-(define_insn "*movdi_mfpgpr"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*d")
-	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*d,r"))]
-  "TARGET_POWERPC64 && TARGET_MFPGPR && TARGET_HARD_FLOAT && TARGET_FPRS
-   && (gpc_reg_operand (operands[0], DImode)
-       || gpc_reg_operand (operands[1], DImode))"
-  "@
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   li %0,%1
-   lis %0,%v1
-   #
-   stfd%U0%X0 %1,%0
-   lfd%U1%X1 %0,%1
-   fmr %0,%1
-   mf%1 %0
-   mt%0 %1
-   nop
-   mftgpr %0,%1
-   mffgpr %0,%1"
-  [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,mftgpr,mffgpr")
-   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4")])
-
 (define_insn "*movdi_internal64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,?wa")
-	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,O"))]
-  "TARGET_POWERPC64 && (!TARGET_MFPGPR || !TARGET_HARD_FLOAT || !TARGET_FPRS)
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,r,*h,*h,r,?*wg,r,?*wm")
+	(match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,*h,r,0,*wg,r,*wm,r"))]
+  "TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], DImode)
        || gpc_reg_operand (operands[1], DImode))"
   "@
@@ -8654,9 +10290,52 @@
    mf%1 %0
    mt%0 %1
    nop
-   xxlxor %x0,%x0,%x0"
-  [(set_attr "type" "store,load,*,*,*,*,fpstore,fpload,fp,mfjmpr,mtjmpr,*,vecsimple")
-   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4")])
+   mftgpr %0,%1
+   mffgpr %0,%1
+   mfvsrd %0,%x1
+   mtvsrd %x0,%1"
+  [(set_attr_alternative "type"
+      [(if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "store_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "store_u")
+	   (const_string "store")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "load_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "load_u")
+	   (const_string "load")))
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (const_string "*")
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	 (const_string "fpstore_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[0], VOIDmode)")
+	   (const_string "fpstore_u")
+	   (const_string "fpstore")))
+       (if_then_else
+	 (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	 (const_string "fpload_ux")
+	 (if_then_else
+	   (match_test "update_address_mem (operands[1], VOIDmode)")
+	   (const_string "fpload_u")
+	   (const_string "fpload")))
+       (const_string "fp")
+       (const_string "mfjmpr")
+       (const_string "mtjmpr")
+       (const_string "*")
+       (const_string "mftgpr")
+       (const_string "mffgpr")
+       (const_string "mftgpr")
+       (const_string "mffgpr")])
+   (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4")])
 
 ;; immediate value valid for a single instruction hiding in a const_double
 (define_insn ""
@@ -8719,14 +10398,16 @@
     FAIL;
 }")
 
-;; TImode is similar, except that we usually want to compute the address into
-;; a register and use lsi/stsi (the exception is during reload).
+;; TImode/PTImode is similar, except that we usually want to compute the
+;; address into a register and use lsi/stsi (the exception is during reload).
 
-(define_insn "*movti_string"
-  [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
-	(match_operand:TI 1 "input_operand" "r,r,Q,Y,r,n"))]
+(define_insn "*mov<mode>_string"
+  [(set (match_operand:TI2 0 "reg_or_mem_operand" "=Q,Y,????r,????r,????r,r")
+	(match_operand:TI2 1 "input_operand" "r,r,Q,Y,r,n"))]
   "! TARGET_POWERPC64
-   && (gpc_reg_operand (operands[0], TImode) || gpc_reg_operand (operands[1], TImode))"
+   && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "*
 {
   switch (which_alternative)
@@ -8756,27 +10437,32 @@
    			                  (const_string "always")
 					  (const_string "conditional")))])
 
-(define_insn "*movti_ppc64"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=Y,r,r")
-	(match_operand:TI 1 "input_operand" "r,Y,r"))]
-  "(TARGET_POWERPC64 && (gpc_reg_operand (operands[0], TImode)
-    || gpc_reg_operand (operands[1], TImode)))
-   && VECTOR_MEM_NONE_P (TImode)"
-  "#"
-  [(set_attr "type" "store,load,*")])
+(define_insn "*mov<mode>_ppc64"
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
+	(match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,n"))]
+  "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode)))"
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type" "store,store,load,load,*,*")
+   (set_attr "length" "8")])
 
 (define_split
-  [(set (match_operand:TI 0 "gpc_reg_operand" "")
-	(match_operand:TI 1 "const_double_operand" ""))]
-  "TARGET_POWERPC64 && VECTOR_MEM_NONE_P (TImode)"
+  [(set (match_operand:TI2 0 "int_reg_operand" "")
+	(match_operand:TI2 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64
+   && (VECTOR_MEM_NONE_P (<MODE>mode)
+       || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
   [(set (match_dup 2) (match_dup 4))
    (set (match_dup 3) (match_dup 5))]
   "
 {
   operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0,
-				       TImode);
+				       <MODE>mode);
   operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0,
-				       TImode);
+				       <MODE>mode);
   if (GET_CODE (operands[1]) == CONST_DOUBLE)
     {
       operands[4] = GEN_INT (CONST_DOUBLE_HIGH (operands[1]));
@@ -8792,10 +10478,12 @@
 }")
 
 (define_split
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-        (match_operand:TI 1 "input_operand" ""))]
-  "reload_completed && VECTOR_MEM_NONE_P (TImode)
-   && gpr_or_gpr_p (operands[0], operands[1])"
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "")
+        (match_operand:TI2 1 "input_operand" ""))]
+  "reload_completed
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
 
@@ -9651,7 +11339,7 @@
 	 	    (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
 		   UNSPEC_TLSGD)
    (clobber (reg:SI LR_REGNO))]
-  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
+  "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
 {
   if (TARGET_CMODEL != CMODEL_SMALL)
     return "addis %0,%1,%2@got@tlsgd@ha\;addi %0,%0,%2@got@tlsgd@l\;"
@@ -9723,7 +11411,7 @@
 	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGD)))
    (set (match_dup 0)
    	(lo_sum:TLSmode (match_dup 3)
-	    (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGD)))]
+	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGD)))]
   "
 {
   operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
@@ -9746,7 +11434,8 @@
 (define_insn "*tls_gd_low<TLSmode:tls_abi_suffix>"
   [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
      (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
-       (unspec:TLSmode [(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+       (unspec:TLSmode [(match_operand:TLSmode 3 "gpc_reg_operand" "b")
+			(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
 		       UNSPEC_TLSGD)))]
   "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
   "addi %0,%1,%2@got@tlsgd@l"
@@ -9759,7 +11448,8 @@
    (unspec:TLSmode [(match_operand:TLSmode 3 "rs6000_tls_symbol_ref" "")]
 		   UNSPEC_TLSGD)
    (clobber (reg:SI LR_REGNO))]
-  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS
+   && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
   "bl %z1(%3@tlsgd)\;nop"
   [(set_attr "type" "branch")
    (set_attr "length" "8")])
@@ -9791,7 +11481,7 @@
    (unspec:TLSmode [(match_operand:TLSmode 1 "gpc_reg_operand" "b")]
 		   UNSPEC_TLSLD)
    (clobber (reg:SI LR_REGNO))]
-  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX"
+  "HAVE_AS_TLS && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
 {
   if (TARGET_CMODEL != CMODEL_SMALL)
     return "addis %0,%1,%&@got@tlsld@ha\;addi %0,%0,%&@got@tlsld@l\;"
@@ -9858,7 +11548,7 @@
 	    (unspec:TLSmode [(const_int 0) (match_dup 1)] UNSPEC_TLSLD)))
    (set (match_dup 0)
    	(lo_sum:TLSmode (match_dup 2)
-	    (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)))]
+	    (unspec:TLSmode [(const_int 0) (match_dup 1)] UNSPEC_TLSLD)))]
   "
 {
   operands[2] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
@@ -9881,7 +11571,9 @@
 (define_insn "*tls_ld_low<TLSmode:tls_abi_suffix>"
   [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=b")
      (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
-       (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)))]
+       (unspec:TLSmode [(const_int 0)
+                        (match_operand:TLSmode 2 "gpc_reg_operand" "b")]
+                       UNSPEC_TLSLD)))]
   "HAVE_AS_TLS && TARGET_TLS_MARKERS && TARGET_CMODEL != CMODEL_SMALL"
   "addi %0,%1,%&@got@tlsld@l"
   [(set_attr "length" "4")])
@@ -9892,7 +11584,8 @@
 	      (match_operand 2 "" "g")))
    (unspec:TLSmode [(const_int 0)] UNSPEC_TLSLD)
    (clobber (reg:SI LR_REGNO))]
-  "HAVE_AS_TLS && DEFAULT_ABI == ABI_AIX && TARGET_TLS_MARKERS"
+  "HAVE_AS_TLS && TARGET_TLS_MARKERS
+   && (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)"
   "bl %z1(%&@tlsld)\;nop"
   [(set_attr "type" "branch")
    (set_attr "length" "8")])
@@ -9953,7 +11646,7 @@
 	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTDTPREL)))
    (set (match_dup 0)
 	(lo_sum:TLSmode (match_dup 3)
-	    (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGOTDTPREL)))]
+	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTDTPREL)))]
   "
 {
   operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
@@ -9976,7 +11669,8 @@
 (define_insn "*tls_got_dtprel_low<TLSmode:tls_abi_suffix>"
   [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
      (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
-	 (unspec:TLSmode [(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+	 (unspec:TLSmode [(match_operand:TLSmode 3 "gpc_reg_operand" "b")
+			  (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
 			 UNSPEC_TLSGOTDTPREL)))]
   "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
   "l<TLSmode:tls_insn_suffix> %0,%2@got@dtprel@l(%1)"
@@ -10022,7 +11716,7 @@
 	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTTPREL)))
    (set (match_dup 0)
 	(lo_sum:TLSmode (match_dup 3)
-	    (unspec:TLSmode [(match_dup 2)] UNSPEC_TLSGOTTPREL)))]
+	    (unspec:TLSmode [(match_dup 1) (match_dup 2)] UNSPEC_TLSGOTTPREL)))]
   "
 {
   operands[3] = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
@@ -10045,7 +11739,8 @@
 (define_insn "*tls_got_tprel_low<TLSmode:tls_abi_suffix>"
   [(set (match_operand:TLSmode 0 "gpc_reg_operand" "=r")
      (lo_sum:TLSmode (match_operand:TLSmode 1 "gpc_reg_operand" "b")
-	 (unspec:TLSmode [(match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
+	 (unspec:TLSmode [(match_operand:TLSmode 3 "gpc_reg_operand" "b")
+			  (match_operand:TLSmode 2 "rs6000_tls_symbol_ref" "")]
 			 UNSPEC_TLSGOTTPREL)))]
   "HAVE_AS_TLS && TARGET_CMODEL != CMODEL_SMALL"
   "l<TLSmode:tls_insn_suffix> %0,%2@got@tprel@l(%1)"
@@ -10261,7 +11956,7 @@
   [(parallel [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
 		   (unspec:SI [(const_int 0)] UNSPEC_TOC))
 	      (use (reg:SI 2))])]
-  "DEFAULT_ABI == ABI_AIX && TARGET_32BIT"
+  "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_32BIT"
   "*
 {
   char buf[30];
@@ -10276,7 +11971,7 @@
   [(parallel [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
 		   (unspec:DI [(const_int 0)] UNSPEC_TOC))
 	      (use (reg:DI 2))])]
-  "DEFAULT_ABI == ABI_AIX && TARGET_64BIT"
+  "(DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && TARGET_64BIT"
   "*
 {
   char buf[30];
@@ -10306,7 +12001,7 @@
   [(parallel [(set (reg:SI LR_REGNO)
 		   (match_operand:SI 0 "immediate_operand" "s"))
 	      (use (unspec [(match_dup 0)] UNSPEC_TOC))])]
-  "TARGET_ELF && DEFAULT_ABI != ABI_AIX
+  "TARGET_ELF && DEFAULT_ABI == ABI_V4
    && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
   "")
 
@@ -10314,7 +12009,7 @@
   [(set (reg:SI LR_REGNO)
 	(match_operand:SI 0 "immediate_operand" "s"))
    (use (unspec [(match_dup 0)] UNSPEC_TOC))]
-  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
+  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
    && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
   "bcl 20,31,%0\\n%0:"
   [(set_attr "type" "branch")
@@ -10324,7 +12019,7 @@
   [(set (reg:SI LR_REGNO)
 	(match_operand:SI 0 "immediate_operand" "s"))
    (use (unspec [(match_dup 0)] UNSPEC_TOC))]
-  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
+  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4
    && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
   "*
 {
@@ -10344,7 +12039,7 @@
 			       (label_ref (match_operand 1 "" ""))]
 		           UNSPEC_TOCPTR))
 	      (match_dup 1)])]
-  "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
   "")
 
 (define_insn "load_toc_v4_PIC_1b_normal"
@@ -10353,7 +12048,7 @@
 		    (label_ref (match_operand 1 "" ""))]
 		UNSPEC_TOCPTR))
    (match_dup 1)]
-  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
   "bcl 20,31,$+8\;.long %0-$"
   [(set_attr "type" "branch")
    (set_attr "length" "8")])
@@ -10364,7 +12059,7 @@
 		    (label_ref (match_operand 1 "" ""))]
 		UNSPEC_TOCPTR))
    (match_dup 1)]
-  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
   "*
 {
   char name[32];
@@ -10382,7 +12077,7 @@
 	(mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")
 		   (minus:SI (match_operand:SI 2 "immediate_operand" "s")
 			     (match_operand:SI 3 "immediate_operand" "s")))))]
-  "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+  "TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2"
   "lwz %0,%2-%3(%1)"
   [(set_attr "type" "load")])
 
@@ -10392,7 +12087,7 @@
 		 (high:SI
 		   (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
 			     (match_operand:SI 3 "symbol_ref_operand" "s")))))]
-  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
+  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
   "addis %0,%1,%2-%3@ha")
 
 (define_insn "load_toc_v4_PIC_3c"
@@ -10400,7 +12095,7 @@
 	(lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b")
 		   (minus:SI (match_operand:SI 2 "symbol_ref_operand" "s")
 			     (match_operand:SI 3 "symbol_ref_operand" "s"))))]
-  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic"
+  "TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic"
   "addi %0,%1,%2-%3@l")
 
 ;; If the TOC is shared over a translation unit, as happens with all
@@ -10542,8 +12237,13 @@
 
   operands[0] = XEXP (operands[0], 0);
 
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_call_aix (NULL_RTX, operands[0], operands[1], operands[2]);
+      DONE;
+    }
+
   if (GET_CODE (operands[0]) != SYMBOL_REF
-      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[0]))
       || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[2]) & CALL_LONG) != 0))
     {
       if (INTVAL (operands[2]) & CALL_LONG)
@@ -10556,12 +12256,6 @@
 	  operands[0] = force_reg (Pmode, operands[0]);
 	  break;
 
-	case ABI_AIX:
-	  /* AIX function pointers are really pointers to a three word
-	     area.  */
-	  rs6000_call_indirect_aix (NULL_RTX, operands[0], operands[1]);
-	  DONE;
-
 	default:
 	  gcc_unreachable ();
 	}
@@ -10587,8 +12281,13 @@
 
   operands[1] = XEXP (operands[1], 0);
 
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_call_aix (operands[0], operands[1], operands[2], operands[3]);
+      DONE;
+    }
+
   if (GET_CODE (operands[1]) != SYMBOL_REF
-      || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (operands[1]))
       || (DEFAULT_ABI != ABI_DARWIN && (INTVAL (operands[3]) & CALL_LONG) != 0))
     {
       if (INTVAL (operands[3]) & CALL_LONG)
@@ -10601,12 +12300,6 @@
 	  operands[1] = force_reg (Pmode, operands[1]);
 	  break;
 
-	case ABI_AIX:
-	  /* AIX function pointers are really pointers to a three word
-	     area.  */
-	  rs6000_call_indirect_aix (operands[0], operands[1], operands[2]);
-	  DONE;
-
 	default:
 	  gcc_unreachable ();
 	}
@@ -10698,135 +12391,6 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8")])
 
-;; Call to indirect functions with the AIX abi using a 3 word descriptor.
-;; Operand0 is the addresss of the function to call
-;; Operand1 is the flag for System V.4 for unprototyped or FP registers
-;; Operand2 is the location in the function descriptor to load r2 from
-;; Operand3 is the stack location to hold the current TOC pointer
-
-(define_insn "call_indirect_aix<ptrsize>"
-  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
-	 (match_operand 1 "" "g,g"))
-   (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
-   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
-   (use (reg:P STATIC_CHAIN_REGNUM))
-   (clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX && TARGET_POINTERS_TO_NESTED_FUNCTIONS"
-  "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
-  [(set_attr "type" "jmpreg")
-   (set_attr "length" "12")])
-
-;; Like call_indirect_aix<ptrsize>, but no use of the static chain
-;; Operand0 is the addresss of the function to call
-;; Operand1 is the flag for System V.4 for unprototyped or FP registers
-;; Operand2 is the location in the function descriptor to load r2 from
-;; Operand3 is the stack location to hold the current TOC pointer
-
-(define_insn "call_indirect_aix<ptrsize>_nor11"
-  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
-	 (match_operand 1 "" "g,g"))
-   (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
-   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
-   (clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX && !TARGET_POINTERS_TO_NESTED_FUNCTIONS"
-  "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
-  [(set_attr "type" "jmpreg")
-   (set_attr "length" "12")])
-
-;; Operand0 is the return result of the function
-;; Operand1 is the addresss of the function to call
-;; Operand2 is the flag for System V.4 for unprototyped or FP registers
-;; Operand3 is the location in the function descriptor to load r2 from
-;; Operand4 is the stack location to hold the current TOC pointer
-
-(define_insn "call_value_indirect_aix<ptrsize>"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
-	      (match_operand 2 "" "g,g")))
-   (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
-   (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
-   (use (reg:P STATIC_CHAIN_REGNUM))
-   (clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX && TARGET_POINTERS_TO_NESTED_FUNCTIONS"
-  "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
-  [(set_attr "type" "jmpreg")
-   (set_attr "length" "12")])
-
-;; Like call_value_indirect_aix<ptrsize>, but no use of the static chain
-;; Operand0 is the return result of the function
-;; Operand1 is the addresss of the function to call
-;; Operand2 is the flag for System V.4 for unprototyped or FP registers
-;; Operand3 is the location in the function descriptor to load r2 from
-;; Operand4 is the stack location to hold the current TOC pointer
-
-(define_insn "call_value_indirect_aix<ptrsize>_nor11"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
-	      (match_operand 2 "" "g,g")))
-   (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
-   (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
-   (clobber (reg:P LR_REGNO))]
-  "DEFAULT_ABI == ABI_AIX && !TARGET_POINTERS_TO_NESTED_FUNCTIONS"
-  "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
-  [(set_attr "type" "jmpreg")
-   (set_attr "length" "12")])
-
-;; Call to function which may be in another module.  Restore the TOC
-;; pointer (r2) after the call unless this is System V.
-;; Operand2 is nonzero if we are using the V.4 calling sequence and
-;; either the function was not prototyped, or it was prototyped as a
-;; variable argument function.  It is > 0 if FP registers were passed
-;; and < 0 if they were not.
-
-(define_insn "*call_nonlocal_aix32"
-  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "s"))
-	 (match_operand 1 "" "g"))
-   (use (match_operand:SI 2 "immediate_operand" "O"))
-   (clobber (reg:SI LR_REGNO))]
-  "TARGET_32BIT
-   && DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[2]) & CALL_LONG) == 0"
-  "bl %z0\;nop"
-  [(set_attr "type" "branch")
-   (set_attr "length" "8")])
-   
-(define_insn "*call_nonlocal_aix64"
-  [(call (mem:SI (match_operand:DI 0 "symbol_ref_operand" "s"))
-	 (match_operand 1 "" "g"))
-   (use (match_operand:SI 2 "immediate_operand" "O"))
-   (clobber (reg:SI LR_REGNO))]
-  "TARGET_64BIT
-   && DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[2]) & CALL_LONG) == 0"
-  "bl %z0\;nop"
-  [(set_attr "type" "branch")
-   (set_attr "length" "8")])
-
-(define_insn "*call_value_nonlocal_aix32"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "s"))
-	      (match_operand 2 "" "g")))
-   (use (match_operand:SI 3 "immediate_operand" "O"))
-   (clobber (reg:SI LR_REGNO))]
-  "TARGET_32BIT
-   && DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[3]) & CALL_LONG) == 0"
-  "bl %z1\;nop"
-  [(set_attr "type" "branch")
-   (set_attr "length" "8")])
-
-(define_insn "*call_value_nonlocal_aix64"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:DI 1 "symbol_ref_operand" "s"))
-	      (match_operand 2 "" "g")))
-   (use (match_operand:SI 3 "immediate_operand" "O"))
-   (clobber (reg:SI LR_REGNO))]
-  "TARGET_64BIT
-   && DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[3]) & CALL_LONG) == 0"
-  "bl %z1\;nop"
-  [(set_attr "type" "branch")
-   (set_attr "length" "8")])
 
 ;; A function pointer under System V is just a normal pointer
 ;; operands[0] is the function pointer
@@ -11009,6 +12573,104 @@
   [(set_attr "type" "branch,branch")
    (set_attr "length" "4,8")])
 
+
+;; Call to AIX abi function in the same module.
+
+(define_insn "*call_local_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "current_file_function_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z0"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*call_value_local_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "current_file_function_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;; Call to AIX abi function which may be in another module.
+;; Restore the TOC pointer (r2) after the call.
+
+(define_insn "*call_nonlocal_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "symbol_ref_operand" "s"))
+	 (match_operand 1 "" "g"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z0\;nop"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_nonlocal_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "symbol_ref_operand" "s"))
+	      (match_operand 2 "" "g")))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "bl %z1\;nop"
+  [(set_attr "type" "branch")
+   (set_attr "length" "8")])
+
+;; Call to indirect functions with the AIX abi using a 3 word descriptor.
+;; Operand0 is the addresss of the function to call
+;; Operand2 is the location in the function descriptor to load r2 from
+;; Operand3 is the stack location to hold the current TOC pointer
+
+(define_insn "*call_indirect_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
+	 (match_operand 1 "" "g,g"))
+   (use (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
+   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX"
+  "<ptrload> 2,%2\;b%T0l\;<ptrload> 2,%3"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+(define_insn "*call_value_indirect_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
+	      (match_operand 2 "" "g,g")))
+   (use (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
+   (set (reg:P TOC_REGNUM) (match_operand:P 4 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_AIX"
+  "<ptrload> 2,%3\;b%T1l\;<ptrload> 2,%4"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "12")])
+
+;; Call to indirect functions with the ELFv2 ABI.
+;; Operand0 is the addresss of the function to call
+;; Operand2 is the stack location to hold the current TOC pointer
+
+(define_insn "*call_indirect_elfv2<mode>"
+  [(call (mem:SI (match_operand:P 0 "register_operand" "c,*l"))
+	 (match_operand 1 "" "g,g"))
+   (set (reg:P TOC_REGNUM) (match_operand:P 2 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_ELFv2"
+  "b%T0l\;<ptrload> 2,%2"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_indirect_elfv2<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "register_operand" "c,*l"))
+	      (match_operand 2 "" "g,g")))
+   (set (reg:P TOC_REGNUM) (match_operand:P 3 "memory_operand" "<ptrm>,<ptrm>"))
+   (clobber (reg:P LR_REGNO))]
+  "DEFAULT_ABI == ABI_ELFv2"
+  "b%T1l\;<ptrload> 2,%3"
+  [(set_attr "type" "jmpreg")
+   (set_attr "length" "8")])
+
+
 ;; Call subroutine returning any type.
 (define_expand "untyped_call"
   [(parallel [(call (match_operand 0 "" "")
@@ -11056,6 +12718,39 @@
   gcc_assert (GET_CODE (operands[1]) == CONST_INT);
 
   operands[0] = XEXP (operands[0], 0);
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_sibcall_aix (NULL_RTX, operands[0], operands[1], operands[2]);
+      DONE;
+    }
+}")
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "register_operand" "")
+		(call (mem:SI (match_operand 1 "address_operand" ""))
+		      (match_operand 2 "" "")))
+	      (use (match_operand 3 "" ""))
+	      (use (reg:SI LR_REGNO))
+	      (simple_return)])]
+  ""
+  "
+{
+#if TARGET_MACHO
+  if (MACHOPIC_INDIRECT)
+    operands[1] = machopic_indirect_call_target (operands[1]);
+#endif
+
+  gcc_assert (GET_CODE (operands[1]) == MEM);
+  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+
+  operands[1] = XEXP (operands[1], 0);
+
+  if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      rs6000_sibcall_aix (operands[0], operands[1], operands[2], operands[3]);
+      DONE;
+    }
 }")
 
 ;; this and similar patterns must be marked as using LR, otherwise
@@ -11123,7 +12818,6 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8")])
 
-
 (define_insn "*sibcall_value_local64"
   [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand:DI 1 "current_file_function_operand" "s,s"))
@@ -11145,35 +12839,6 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8")])
 
-(define_insn "*sibcall_nonlocal_aix<mode>"
-  [(call (mem:SI (match_operand:P 0 "call_operand" "s,c"))
-	 (match_operand 1 "" "g,g"))
-   (use (match_operand:SI 2 "immediate_operand" "O,O"))
-   (use (reg:SI LR_REGNO))
-   (simple_return)]
-  "DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[2]) & CALL_LONG) == 0"
-  "@
-   b %z0
-   b%T0"
-  [(set_attr "type" "branch")
-   (set_attr "length" "4")])
-
-(define_insn "*sibcall_value_nonlocal_aix<mode>"
-  [(set (match_operand 0 "" "")
-	(call (mem:SI (match_operand:P 1 "call_operand" "s,c"))
-	      (match_operand 2 "" "g,g")))
-   (use (match_operand:SI 3 "immediate_operand" "O,O"))
-   (use (reg:SI LR_REGNO))
-   (simple_return)]
-  "DEFAULT_ABI == ABI_AIX
-   && (INTVAL (operands[3]) & CALL_LONG) == 0"
-  "@
-   b %z1
-   b%T1"
-  [(set_attr "type" "branch")
-   (set_attr "length" "4")])
-
 (define_insn "*sibcall_nonlocal_sysv<mode>"
   [(call (mem:SI (match_operand:P 0 "call_operand" "s,s,c,c"))
 	 (match_operand 1 "" ""))
@@ -11204,27 +12869,6 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8,4,8")])
 
-(define_expand "sibcall_value"
-  [(parallel [(set (match_operand 0 "register_operand" "")
-		(call (mem:SI (match_operand 1 "address_operand" ""))
-		      (match_operand 2 "" "")))
-	      (use (match_operand 3 "" ""))
-	      (use (reg:SI LR_REGNO))
-	      (simple_return)])]
-  ""
-  "
-{
-#if TARGET_MACHO
-  if (MACHOPIC_INDIRECT)
-    operands[1] = machopic_indirect_call_target (operands[1]);
-#endif
-
-  gcc_assert (GET_CODE (operands[1]) == MEM);
-  gcc_assert (GET_CODE (operands[2]) == CONST_INT);
-
-  operands[1] = XEXP (operands[1], 0);
-}")
-
 (define_insn "*sibcall_value_nonlocal_sysv<mode>"
   [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand:P 1 "call_operand" "s,s,c,c"))
@@ -11256,6 +12900,31 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4,8,4,8")])
 
+;; AIX ABI sibling call patterns.
+
+(define_insn "*sibcall_aix<mode>"
+  [(call (mem:SI (match_operand:P 0 "call_operand" "s,c"))
+	 (match_operand 1 "" "g,g"))
+   (simple_return)]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "@
+   b %z0
+   b%T0"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+(define_insn "*sibcall_value_aix<mode>"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:P 1 "call_operand" "s,c"))
+	      (match_operand 2 "" "g,g")))
+   (simple_return)]
+  "DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2"
+  "@
+   b %z1
+   b%T1"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
 (define_expand "sibcall_epilogue"
   [(use (const_int 0))]
   ""
@@ -11294,7 +12963,14 @@
   operands[1] = gen_rtx_REG (Pmode, 0);
   return "st<wd>%U0%X0 %1,%0";
 }
-  [(set_attr "type" "store")
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
+	(const_string "store_ux")
+	(if_then_else
+	  (match_test "update_address_mem (operands[0], VOIDmode)")
+	  (const_string "store_u")
+	  (const_string "store"))))
    (set_attr "length" "4")])
 
 (define_insn "probe_stack_range<P:mode>"
@@ -11589,23 +13265,6 @@
   [(set (match_dup 3) (compare:CCUNS (match_dup 1) (match_dup 2)))
    (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 4)))])
 
-(define_insn "*cmpsf_internal1"
-  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
-	(compare:CCFP (match_operand:SF 1 "gpc_reg_operand" "f")
-		      (match_operand:SF 2 "gpc_reg_operand" "f")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
-  "fcmpu %0,%1,%2"
-  [(set_attr "type" "fpcompare")])
-
-(define_insn "*cmpdf_internal1"
-  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
-	(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "d")
-		      (match_operand:DF 2 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fcmpu %0,%1,%2"
-  [(set_attr "type" "fpcompare")])
-
 ;; Only need to compare second words if first words equal
 (define_insn "*cmptf_internal1"
   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
@@ -13501,6 +15160,14 @@
   "mfcr %0"
   [(set_attr "type" "mfcr")])
 
+(define_insn "*crsave"
+  [(match_parallel 0 "crsave_operation"
+		   [(set (match_operand:SI 1 "memory_operand" "=m")
+			 (match_operand:SI 2 "gpc_reg_operand" "r"))])]
+  ""
+  "stw %2,%1"
+  [(set_attr "type" "store")])
+
 (define_insn "*stmw"
   [(match_parallel 0 "stmw_operation"
 		   [(set (match_operand:SI 1 "memory_operand" "=m")
@@ -13885,7 +15552,7 @@
 		   (match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))]
   "TARGET_POPCNTD"
   "bpermd %0,%1,%2"
-  [(set_attr "type" "integer")])
+  [(set_attr "type" "popcnt")])
 
 
 ;; Builtin fma support.  Handle 
@@ -13900,6 +15567,20 @@
   ""
   "")
 
+(define_insn "*fma<mode>4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(fma:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "%<Ff>,<Fv>,<Fv>")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	  (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fmadd<Ftrad> %0,%1,%2,%3
+   xsmadda<Fvsx> %x0,%x1,%x2
+   xsmaddm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
 ; Altivec only has fma and nfms.
 (define_expand "fms<mode>4"
   [(set (match_operand:FMA_F 0 "register_operand" "")
@@ -13910,6 +15591,20 @@
   "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "")
 
+(define_insn "*fms<mode>4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(fma:SFDF
+	 (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
+	 (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	 (neg:SFDF (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fmsub<Ftrad> %0,%1,%2,%3
+   xsmsuba<Fvsx> %x0,%x1,%x2
+   xsmsubm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
 ;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
 (define_expand "fnma<mode>4"
   [(set (match_operand:FMA_F 0 "register_operand" "")
@@ -13943,6 +15638,21 @@
   "!VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
   "")
 
+(define_insn "*nfma<mode>4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(neg:SFDF
+	 (fma:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	  (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>"))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fnmadd<Ftrad> %0,%1,%2,%3
+   xsnmadda<Fvsx> %x0,%x1,%x2
+   xsnmaddm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
 ; Not an official optab name, but used from builtins.
 (define_expand "nfms<mode>4"
   [(set (match_operand:FMA_F 0 "register_operand" "")
@@ -13954,6 +15664,23 @@
   ""
   "")
 
+(define_insn "*nfmssf4_fpr"
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fv>,<Fv>")
+	(neg:SFDF
+	 (fma:SFDF
+	  (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>,<Fv>")
+	  (match_operand:SFDF 2 "gpc_reg_operand" "<Ff>,<Fv>,0")
+	  (neg:SFDF
+	   (match_operand:SFDF 3 "gpc_reg_operand" "<Ff>,0,<Fv>")))))]
+  "TARGET_<MODE>_FPR"
+  "@
+   fnmsub<Ftrad> %0,%1,%2,%3
+   xsnmsuba<Fvsx> %x0,%x1,%x2
+   xsnmsubm<Fvsx> %x0,%x1,%x3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_<Fs>")])
+
+
 (define_expand "rs6000_get_timebase"
   [(use (match_operand:DI 0 "gpc_reg_operand" ""))]
   ""
@@ -14020,6 +15747,228 @@
 })
 
 
+;; Power8 fusion support for fusing an addis instruction with a D-form load of
+;; a GPR.  The addis instruction must be adjacent to the load, and use the same
+;; register that is being loaded.  The fused ops must be physically adjacent.
+
+;; We use define_peephole for the actual addis/load, and the register used to
+;; hold the addis value must be the same as the register being loaded.  We use
+;; define_peephole2 to change the register used for addis to be the register
+;; being loaded, since we can look at whether it is dead after the load insn.
+
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "fusion_gpr_addis" ""))
+   (set (match_operand:INT1 2 "base_reg_operand" "")
+	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
+  "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
+{
+  return emit_fusion_gpr_load (operands);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+(define_peephole2
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "fusion_gpr_addis" ""))
+   (set (match_operand:INT1 2 "base_reg_operand" "")
+	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
+  "TARGET_P8_FUSION
+   && (REGNO (operands[0]) != REGNO (operands[2])
+       || GET_CODE (operands[3]) == SIGN_EXTEND)
+   && fusion_gpr_load_p (operands, true)"
+  [(const_int 0)]
+{
+  expand_fusion_gpr_load (operands);
+  DONE;
+})
+
+
+;; Miscellaneous ISA 2.06 (power7) instructions
+(define_insn "addg6s"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")]
+		   UNSPEC_ADDG6S))]
+  "TARGET_POPCNTD"
+  "addg6s %0,%1,%2"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
+(define_insn "cdtbcd"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_CDTBCD))]
+  "TARGET_POPCNTD"
+  "cdtbcd %0,%1"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
+(define_insn "cbcdtd"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+		   UNSPEC_CBCDTD))]
+  "TARGET_POPCNTD"
+  "cbcdtd %0,%1"
+  [(set_attr "type" "integer")
+   (set_attr "length" "4")])
+
+(define_int_iterator UNSPEC_DIV_EXTEND [UNSPEC_DIVE
+					UNSPEC_DIVEO
+					UNSPEC_DIVEU
+					UNSPEC_DIVEUO])
+
+(define_int_attr div_extend [(UNSPEC_DIVE	"e")
+			     (UNSPEC_DIVEO	"eo")
+			     (UNSPEC_DIVEU	"eu")
+			     (UNSPEC_DIVEUO	"euo")])
+
+(define_insn "div<div_extend>_<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(unspec:GPR [(match_operand:GPR 1 "register_operand" "r")
+		     (match_operand:GPR 2 "register_operand" "r")]
+		    UNSPEC_DIV_EXTEND))]
+  "TARGET_POPCNTD"
+  "div<wd><div_extend> %0,%1,%2"
+  [(set_attr "type" "<idiv_ldiv>")])
+
+
+;; Pack/unpack 128-bit floating point types that take 2 scalar registers
+
+; Type of the 64-bit part when packing/unpacking 128-bit floating point types
+(define_mode_attr FP128_64 [(TF "DF") (TD "DI")])
+
+(define_expand "unpack<mode>"
+  [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "")
+	(unspec:<FP128_64>
+	 [(match_operand:FMOVE128 1 "register_operand" "")
+	  (match_operand:QI 2 "const_0_to_1_operand" "")]
+	 UNSPEC_UNPACK_128BIT))]
+  ""
+  "")
+
+;; The Advance Toolchain 7.0-3 added private builtins: __builtin_longdouble_dw0
+;; and __builtin_longdouble_dw1 to optimize glibc.  Add support for these
+;; builtins here.
+
+(define_expand "unpacktf_0"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(unspec:DF [(match_operand:TF 1 "register_operand" "")
+		    (const_int 0)]
+	 UNSPEC_UNPACK_128BIT))]
+  ""
+  "")
+
+(define_expand "unpacktf_1"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(unspec:DF [(match_operand:TF 1 "register_operand" "")
+		    (const_int 1)]
+	 UNSPEC_UNPACK_128BIT))]
+  ""
+  "")
+
+(define_insn_and_split "unpack<mode>_dm"
+  [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "=d,m,d,r,m")
+	(unspec:<FP128_64>
+	 [(match_operand:FMOVE128 1 "register_operand" "d,d,r,d,r")
+	  (match_operand:QI 2 "const_0_to_1_operand" "i,i,i,i,i")]
+	 UNSPEC_UNPACK_128BIT))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  unsigned fp_regno = REGNO (operands[1]) + UINTVAL (operands[2]);
+
+  if (REG_P (operands[0]) && REGNO (operands[0]) == fp_regno)
+    {
+      emit_note (NOTE_INSN_DELETED);
+      DONE;
+    }
+
+  operands[3] = gen_rtx_REG (<FP128_64>mode, fp_regno);
+}
+  [(set_attr "type" "fp,fpstore,mffgpr,mftgpr,store")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "unpack<mode>_nodm"
+  [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "=d,m")
+	(unspec:<FP128_64>
+	 [(match_operand:FMOVE128 1 "register_operand" "d,d")
+	  (match_operand:QI 2 "const_0_to_1_operand" "i,i")]
+	 UNSPEC_UNPACK_128BIT))]
+  "!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  unsigned fp_regno = REGNO (operands[1]) + UINTVAL (operands[2]);
+
+  if (REG_P (operands[0]) && REGNO (operands[0]) == fp_regno)
+    {
+      emit_note (NOTE_INSN_DELETED);
+      DONE;
+    }
+
+  operands[3] = gen_rtx_REG (<FP128_64>mode, fp_regno);
+}
+  [(set_attr "type" "fp,fpstore")
+   (set_attr "length" "4")])
+
+(define_insn_and_split "pack<mode>"
+  [(set (match_operand:FMOVE128 0 "register_operand" "=d,&d")
+	(unspec:FMOVE128
+	 [(match_operand:<FP128_64> 1 "register_operand" "0,d")
+	  (match_operand:<FP128_64> 2 "register_operand" "d,d")]
+	 UNSPEC_PACK_128BIT))]
+  ""
+  "@
+   fmr %L0,%2
+   #"
+  "&& reload_completed && REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (match_dup 2))]
+{
+  unsigned dest_hi = REGNO (operands[0]);
+  unsigned dest_lo = dest_hi + 1;
+
+  gcc_assert (!IN_RANGE (REGNO (operands[1]), dest_hi, dest_lo));
+  gcc_assert (!IN_RANGE (REGNO (operands[2]), dest_hi, dest_lo));
+
+  operands[3] = gen_rtx_REG (<FP128_64>mode, dest_hi);
+  operands[4] = gen_rtx_REG (<FP128_64>mode, dest_lo);
+}
+  [(set_attr "type" "fp,fp")
+   (set_attr "length" "4,8")])
+
+(define_insn "unpackv1ti"
+  [(set (match_operand:DI 0 "register_operand" "=d,d")
+	(unspec:DI [(match_operand:V1TI 1 "register_operand" "0,wa")
+		    (match_operand:QI 2 "const_0_to_1_operand" "O,i")]
+	 UNSPEC_UNPACK_128BIT))]
+  "TARGET_VSX"
+{
+  if (REGNO (operands[0]) == REGNO (operands[1]) && INTVAL (operands[2]) == 0)
+    return ASM_COMMENT_START " xxpermdi to same register";
+
+  operands[3] = GEN_INT (INTVAL (operands[2]) == 0 ? 0 : 3);
+  return "xxpermdi %x0,%x1,%x1,%3";
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4")])
+
+(define_insn "packv1ti"
+  [(set (match_operand:V1TI 0 "register_operand" "=wa")
+	(unspec:V1TI
+	 [(match_operand:DI 1 "register_operand" "d")
+	  (match_operand:DI 2 "register_operand" "d")]
+	 UNSPEC_PACK_128BIT))]
+  "TARGET_VSX"
+  "xxpermdi %x0,%x1,%x2,0"
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "4")])
+
+
 
 (include "sync.md")
 (include "vector.md")
@@ -14028,3 +15977,5 @@
 (include "spe.md")
 (include "dfp.md")
 (include "paired.md")
+(include "crypto.md")
+(include "htm.md")
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000.opt b/gcc-4.8/gcc/config/rs6000/rs6000.opt
index 8e3cea121..5b56eb0da 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000.opt
+++ b/gcc-4.8/gcc/config/rs6000/rs6000.opt
@@ -1,6 +1,6 @@
 ; Options for the rs6000 port of the compiler
 ;
-; Copyright (C) 2005-2013 Free Software Foundation, Inc.
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
 ; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
 ;
 ; This file is part of GCC.
@@ -137,6 +137,14 @@ maltivec
 Target Report Mask(ALTIVEC) Var(rs6000_isa_flags)
 Use AltiVec instructions
 
+maltivec=le
+Target Report RejectNegative Var(rs6000_altivec_element_order, 1) Save
+Generate Altivec instructions using little-endian element order
+
+maltivec=be
+Target Report RejectNegative Var(rs6000_altivec_element_order, 2)
+Generate Altivec instructions using big-endian element order
+
 mhard-dfp
 Target Report Mask(DFP) Var(rs6000_isa_flags)
 Use decimal floating point instructions
@@ -181,13 +189,16 @@ mvsx
 Target Report Mask(VSX) Var(rs6000_isa_flags)
 Use vector/scalar (VSX) instructions
 
+mvsx-scalar-float
+Target Undocumented Report Var(TARGET_VSX_SCALAR_FLOAT) Init(1)
+; If -mpower8-vector, use VSX arithmetic instructions for SFmode (on by default)
+
 mvsx-scalar-double
-Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(-1)
-; If -mvsx, use VSX arithmetic instructions for scalar double (on by default)
+Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(1)
+; If -mvsx, use VSX arithmetic instructions for DFmode (on by default)
 
 mvsx-scalar-memory
-Target Undocumented Report Var(TARGET_VSX_SCALAR_MEMORY)
-; If -mvsx, use VSX scalar memory reference instructions for scalar double (off by default)
+Target Undocumented Report Alias(mupper-regs-df)
 
 mvsx-align-128
 Target Undocumented Report Var(TARGET_VSX_ALIGN_128)
@@ -363,6 +374,14 @@ mabi=no-spe
 Target RejectNegative Var(rs6000_spe_abi, 0)
 Do not use the SPE ABI extensions
 
+mabi=elfv1
+Target RejectNegative Var(rs6000_elf_abi, 1) Save
+Use the ELFv1 ABI
+
+mabi=elfv2
+Target RejectNegative Var(rs6000_elf_abi, 2)
+Use the ELFv2 ABI
+
 ; These are here for testing during development only, do not document
 ; in the manual please.
 
@@ -443,6 +462,10 @@ mlong-double-
 Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save
 -mlong-double-<n>	Specify size of long double (64 or 128 bits)
 
+mlra
+Target Report Var(rs6000_lra_flag) Init(0) Save
+Use LRA instead of reload
+
 msched-costly-dep=
 Target RejectNegative Joined Var(rs6000_sched_costly_dep_str)
 Determine which dependences between insns are considered costly
@@ -514,3 +537,51 @@ Use/do not use r11 to hold the static link in calls to functions via pointers.
 msave-toc-indirect
 Target Report Var(TARGET_SAVE_TOC_INDIRECT) Save
 Control whether we save the TOC in the prologue for indirect calls or generate the save inline
+
+mvsx-timode
+Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags)
+Allow 128-bit integers in VSX registers
+
+mpower8-fusion
+Target Report Mask(P8_FUSION) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power8
+
+mpower8-fusion-sign
+Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
+Allow sign extension in fusion operations
+
+mpower8-vector
+Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
+Use/do not use vector and scalar instructions added in ISA 2.07.
+
+mcrypto
+Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
+Use ISA 2.07 crypto instructions
+
+mdirect-move
+Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags)
+Use ISA 2.07 direct move between GPR & VSX register instructions
+
+mhtm
+Target Report Mask(HTM) Var(rs6000_isa_flags)
+Use ISA 2.07 transactional memory (HTM) instructions
+
+mquad-memory
+Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
+Generate the quad word memory instructions (lq/stq).
+
+mquad-memory-atomic
+Target Report Mask(QUAD_MEMORY_ATOMIC) Var(rs6000_isa_flags)
+Generate the quad word memory atomic instructions (lqarx/stqcx).
+
+mcompat-align-parm
+Target Report Var(rs6000_compat_align_parm) Init(1) Save
+Generate aggregate parameter passing code with at most 64-bit alignment.
+
+mupper-regs-df
+Target Undocumented Mask(UPPER_REGS_DF) Var(rs6000_isa_flags)
+Allow double variables in upper registers with -mcpu=power7 or -mvsx
+
+mupper-regs-sf
+Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
+Allow float variables in upper registers with -mcpu=power8 or -mp8-vector
diff --git a/gcc-4.8/gcc/config/rs6000/sync.md b/gcc-4.8/gcc/config/rs6000/sync.md
index 252e2690a..74b59ec2c 100644
--- a/gcc-4.8/gcc/config/rs6000/sync.md
+++ b/gcc-4.8/gcc/config/rs6000/sync.md
@@ -1,5 +1,5 @@
 ;; Machine description for PowerPC synchronization instructions.
-;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
 ;; Contributed by Geoffrey Keating.
 
 ;; This file is part of GCC.
@@ -18,14 +18,23 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
-(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
+(define_mode_attr larx [(QI "lbarx")
+			(HI "lharx")
+			(SI "lwarx")
+			(DI "ldarx")
+			(TI "lqarx")])
+
+(define_mode_attr stcx [(QI "stbcx.")
+			(HI "sthcx.")
+			(SI "stwcx.")
+			(DI "stdcx.")
+			(TI "stqcx.")])
 
 (define_code_iterator FETCHOP [plus minus ior xor and])
 (define_code_attr fetchop_name
   [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
 (define_code_attr fetchop_pred
-  [(plus "add_operand") (minus "gpc_reg_operand")
+  [(plus "add_operand") (minus "int_reg_operand")
    (ior "logical_operand") (xor "logical_operand") (and "and_operand")])
 
 (define_expand "mem_thread_fence"
@@ -98,10 +107,17 @@
   "isync"
   [(set_attr "type" "isync")])
 
+;; Types that we should provide atomic instructions for.
+(define_mode_iterator AINT [QI
+			    HI
+			    SI
+			    (DI "TARGET_POWERPC64")
+			    (TI "TARGET_SYNC_TI")])
+
 ;; The control dependency used for load dependency described
 ;; in B.2.3 of the Power ISA 2.06B.
 (define_insn "loadsync_<mode>"
-  [(unspec_volatile:BLK [(match_operand:INT1 0 "register_operand" "r")]
+  [(unspec_volatile:BLK [(match_operand:AINT 0 "register_operand" "r")]
 			UNSPECV_ISYNC)
    (clobber (match_scratch:CC 1 "=y"))]
   ""
@@ -109,18 +125,73 @@
   [(set_attr "type" "isync")
    (set_attr "length" "12")])
 
+(define_insn "load_quadpti"
+  [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+	(unspec:PTI
+	 [(match_operand:TI 1 "quad_memory_operand" "wQ")] UNSPEC_LSQ))]
+  "TARGET_SYNC_TI
+   && !reg_mentioned_p (operands[0], operands[1])"
+  "lq %0,%1"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
 (define_expand "atomic_load<mode>"
-  [(set (match_operand:INT1 0 "register_operand" "")		;; output
-	(match_operand:INT1 1 "memory_operand" ""))		;; memory
+  [(set (match_operand:AINT 0 "register_operand" "")		;; output
+	(match_operand:AINT 1 "memory_operand" ""))		;; memory
    (use (match_operand:SI 2 "const_int_operand" ""))]		;; model
   ""
 {
+  if (<MODE>mode == TImode && !TARGET_SYNC_TI)
+    FAIL;
+
   enum memmodel model = (enum memmodel) INTVAL (operands[2]);
 
   if (model == MEMMODEL_SEQ_CST)
     emit_insn (gen_hwsync ());
 
+  if (<MODE>mode != TImode)
   emit_move_insn (operands[0], operands[1]);
+  else
+    {
+      rtx op0 = operands[0];
+      rtx op1 = operands[1];
+      rtx pti_reg = gen_reg_rtx (PTImode);
+
+      // Can't have indexed address for 'lq'
+      if (indexed_address (XEXP (op1, 0), TImode))
+	{
+	  rtx old_addr = XEXP (op1, 0);
+	  rtx new_addr = force_reg (Pmode, old_addr);
+	  operands[1] = op1 = replace_equiv_address (op1, new_addr);
+	}
+
+      emit_insn (gen_load_quadpti (pti_reg, op1));
+
+      /* For 4.8 we need to do explicit dword copies, even in big endian mode,
+	 unless we are using the LRA register allocator. The 4.9 register
+	 allocator is smart enough to assign an even/odd pair. */
+      if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
+	emit_move_insn (op0, gen_lowpart (TImode, pti_reg));
+      else
+	{
+	  rtx op0_lo = gen_lowpart (DImode, op0);
+	  rtx op0_hi = gen_highpart (DImode, op0);
+	  rtx pti_lo = gen_lowpart (DImode, pti_reg);
+	  rtx pti_hi = gen_highpart (DImode, pti_reg);
+
+	  emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+	  if (WORDS_BIG_ENDIAN)
+	    {
+	      emit_move_insn (op0_hi, pti_hi);
+	      emit_move_insn (op0_lo, pti_lo);
+	    }
+	  else
+	    {
+	      emit_move_insn (op0_hi, pti_lo);
+	      emit_move_insn (op0_lo, pti_hi);
+	    }
+	}
+    }
 
   switch (model)
     {
@@ -129,16 +200,7 @@
     case MEMMODEL_CONSUME:
     case MEMMODEL_ACQUIRE:
     case MEMMODEL_SEQ_CST:
-      if (GET_MODE (operands[0]) == QImode)
-	emit_insn (gen_loadsync_qi (operands[0]));
-      else if (GET_MODE (operands[0]) == HImode)
-	emit_insn (gen_loadsync_hi (operands[0]));
-      else if (GET_MODE (operands[0]) == SImode)
-	emit_insn (gen_loadsync_si (operands[0]));
-      else if (GET_MODE (operands[0]) == DImode)
-	emit_insn (gen_loadsync_di (operands[0]));
-      else
-	gcc_unreachable ();
+      emit_insn (gen_loadsync_<mode> (operands[0]));
       break;
     default:
       gcc_unreachable ();
@@ -146,12 +208,24 @@
   DONE;
 })
 
+(define_insn "store_quadpti"
+  [(set (match_operand:PTI 0 "quad_memory_operand" "=wQ")
+	(unspec:PTI
+	 [(match_operand:PTI 1 "quad_int_reg_operand" "r")] UNSPEC_LSQ))]
+  "TARGET_SYNC_TI"
+  "stq %1,%0"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
 (define_expand "atomic_store<mode>"
-  [(set (match_operand:INT1 0 "memory_operand" "")		;; memory
-	(match_operand:INT1 1 "register_operand" ""))		;; input
+  [(set (match_operand:AINT 0 "memory_operand" "")		;; memory
+	(match_operand:AINT 1 "register_operand" ""))		;; input
    (use (match_operand:SI 2 "const_int_operand" ""))]		;; model
   ""
 {
+  if (<MODE>mode == TImode && !TARGET_SYNC_TI)
+    FAIL;
+
   enum memmodel model = (enum memmodel) INTVAL (operands[2]);
   switch (model)
     {
@@ -166,39 +240,224 @@
     default:
       gcc_unreachable ();
     }
+  if (<MODE>mode != TImode)
   emit_move_insn (operands[0], operands[1]);
+  else
+    {
+      rtx op0 = operands[0];
+      rtx op1 = operands[1];
+      rtx pti_reg = gen_reg_rtx (PTImode);
+
+      // Can't have indexed address for 'stq'
+      if (indexed_address (XEXP (op0, 0), TImode))
+	{
+	  rtx old_addr = XEXP (op0, 0);
+	  rtx new_addr = force_reg (Pmode, old_addr);
+	  operands[0] = op0 = replace_equiv_address (op0, new_addr);
+	}
+
+      /* For 4.8 we need to do explicit dword copies, even in big endian mode,
+	 unless we are using the LRA register allocator. The 4.9 register
+	 allocator is smart enough to assign an even/odd pair. */
+      if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
+	emit_move_insn (pti_reg, gen_lowpart (PTImode, op1));
+      else
+	{
+	  rtx op1_lo = gen_lowpart (DImode, op1);
+	  rtx op1_hi = gen_highpart (DImode, op1);
+	  rtx pti_lo = gen_lowpart (DImode, pti_reg);
+	  rtx pti_hi = gen_highpart (DImode, pti_reg);
+
+	  emit_insn (gen_rtx_CLOBBER (VOIDmode, pti_reg));
+	  if (WORDS_BIG_ENDIAN)
+	    {
+	      emit_move_insn (pti_hi, op1_hi);
+	      emit_move_insn (pti_lo, op1_lo);
+	    }
+	  else
+	    {
+	      emit_move_insn (pti_hi, op1_lo);
+	      emit_move_insn (pti_lo, op1_hi);
+	    }
+	}
+
+      emit_insn (gen_store_quadpti (gen_lowpart (PTImode, op0), pti_reg));
+    }
+
   DONE;
 })
 
-;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve
-;; opcode that is "phased-in".  Not implemented as of Power7, so not yet used,
-;; but let's prepare the macros anyway.
-
-(define_mode_iterator ATOMIC    [SI (DI "TARGET_POWERPC64")])
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
+;; other than the quad memory operations, which have special restrictions.
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
+;; in and did not show up until power8.  TImode atomic lqarx/stqcx. require
+;; special handling due to even/odd register requirements.
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
+			      (HI "TARGET_SYNC_HI_QI")
+			      SI
+			      (DI "TARGET_POWERPC64")])
 
 (define_insn "load_locked<mode>"
-  [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r")
+  [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
 	(unspec_volatile:ATOMIC
          [(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
   ""
   "<larx> %0,%y1"
   [(set_attr "type" "load_l")])
 
+(define_insn "load_locked<QHI:mode>_si"
+  [(set (match_operand:SI 0 "int_reg_operand" "=r")
+	(unspec_volatile:SI
+	  [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_HI_QI"
+  "<QHI:larx> %0,%y1"
+  [(set_attr "type" "load_l")])
+
+;; Use PTImode to get even/odd register pairs.
+
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions.  Under AT 7.0, we need use an explicit copy,
+;; even in big endian mode, unless we are using the LRA register allocator.  In
+;; GCC 4.9, the register allocator is smart enough to assign a even/odd
+;; register pair.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
+(define_expand "load_lockedti"
+  [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx pti = gen_reg_rtx (PTImode);
+
+  if (!indexed_or_indirect_operand (op1, TImode))
+    {
+      rtx old_addr = XEXP (op1, 0);
+      rtx new_addr = force_reg (Pmode, old_addr);
+      operands[1] = op1 = change_address (op1, TImode, new_addr);
+    }
+
+  emit_insn (gen_load_lockedpti (pti, op1));
+  if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
+    emit_move_insn (op0, gen_lowpart (TImode, pti));
+  else
+    {
+      rtx op0_lo = gen_lowpart (DImode, op0);
+      rtx op0_hi = gen_highpart (DImode, op0);
+      rtx pti_lo = gen_lowpart (DImode, pti);
+      rtx pti_hi = gen_highpart (DImode, pti);
+
+      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+      if (WORDS_BIG_ENDIAN)
+	{
+	  emit_move_insn (op0_hi, pti_hi);
+	  emit_move_insn (op0_lo, pti_lo);
+	}
+      else
+	{
+	  emit_move_insn (op0_hi, pti_lo);
+	  emit_move_insn (op0_lo, pti_hi);
+	}
+    }
+  DONE;
+})
+
+(define_insn "load_lockedpti"
+  [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+	(unspec_volatile:PTI
+         [(match_operand:TI 1 "indexed_or_indirect_operand" "Z")] UNSPECV_LL))]
+  "TARGET_SYNC_TI
+   && !reg_mentioned_p (operands[0], operands[1])
+   && quad_int_reg_operand (operands[0], PTImode)"
+  "lqarx %0,%y1"
+  [(set_attr "type" "load_l")])
+
 (define_insn "store_conditional<mode>"
   [(set (match_operand:CC 0 "cc_reg_operand" "=x")
 	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
    (set (match_operand:ATOMIC 1 "memory_operand" "=Z")
-	(match_operand:ATOMIC 2 "gpc_reg_operand" "r"))]
+	(match_operand:ATOMIC 2 "int_reg_operand" "r"))]
   ""
   "<stcx> %2,%y1"
   [(set_attr "type" "store_c")])
 
+;; Use a temporary register to force getting an even register for the
+;; lqarx/stqcrx. instructions.  Under AT 7.0, we need use an explicit copy,
+;; even in big endian mode.  In GCC 4.9, the register allocator is smart enough
+;; to assign a even/odd register pair.
+
+;; On little endian systems where non-atomic quad word load/store instructions
+;; are not used, the address can be register+offset, so make sure the address
+;; is indexed or indirect before register allocation.
+
+(define_expand "store_conditionalti"
+  [(use (match_operand:CC 0 "cc_reg_operand" ""))
+   (use (match_operand:TI 1 "memory_operand" ""))
+   (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
+  "TARGET_SYNC_TI"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx addr = XEXP (op1, 0);
+  rtx pti_mem;
+  rtx pti_reg;
+
+  if (!indexed_or_indirect_operand (op1, TImode))
+    {
+      rtx new_addr = force_reg (Pmode, addr);
+      operands[1] = op1 = change_address (op1, TImode, new_addr);
+      addr = new_addr;
+    }
+
+  pti_mem = change_address (op1, PTImode, addr);
+  pti_reg = gen_reg_rtx (PTImode);
+
+  if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
+    emit_move_insn (pti_reg, gen_lowpart (PTImode, op2));
+  else
+    {
+      rtx op2_lo = gen_lowpart (DImode, op2);
+      rtx op2_hi = gen_highpart (DImode, op2);
+      rtx pti_lo = gen_lowpart (DImode, pti_reg);
+      rtx pti_hi = gen_highpart (DImode, pti_reg);
+
+      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+      if (WORDS_BIG_ENDIAN)
+	{
+	  emit_move_insn (pti_hi, op2_hi);
+	  emit_move_insn (pti_lo, op2_lo);
+	}
+      else
+	{
+	  emit_move_insn (pti_hi, op2_lo);
+	  emit_move_insn (pti_lo, op2_hi);
+	}
+    }
+
+  emit_insn (gen_store_conditionalpti (op0, pti_mem, pti_reg));
+  DONE;
+})
+
+(define_insn "store_conditionalpti"
+  [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+	(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:PTI 1 "indexed_or_indirect_operand" "=Z")
+	(match_operand:PTI 2 "quad_int_reg_operand" "r"))]
+  "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
+  "stqcx. %2,%y1"
+  [(set_attr "type" "store_c")])
+
 (define_expand "atomic_compare_and_swap<mode>"
-  [(match_operand:SI 0 "gpc_reg_operand" "")		;; bool out
-   (match_operand:INT1 1 "gpc_reg_operand" "")		;; val out
-   (match_operand:INT1 2 "memory_operand" "")		;; memory
-   (match_operand:INT1 3 "reg_or_short_operand" "")	;; expected
-   (match_operand:INT1 4 "gpc_reg_operand" "")		;; desired
+  [(match_operand:SI 0 "int_reg_operand" "")		;; bool out
+   (match_operand:AINT 1 "int_reg_operand" "")		;; val out
+   (match_operand:AINT 2 "memory_operand" "")		;; memory
+   (match_operand:AINT 3 "reg_or_short_operand" "")	;; expected
+   (match_operand:AINT 4 "int_reg_operand" "")		;; desired
    (match_operand:SI 5 "const_int_operand" "")		;; is_weak
    (match_operand:SI 6 "const_int_operand" "")		;; model succ
    (match_operand:SI 7 "const_int_operand" "")]		;; model fail
@@ -209,9 +468,9 @@
 })
 
 (define_expand "atomic_exchange<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; input
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; input
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -220,9 +479,9 @@
 })
 
 (define_expand "atomic_<fetchop_name><mode>"
-  [(match_operand:INT1 0 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 0)
-     (match_operand:INT1 1 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 0)
+     (match_operand:AINT 1 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 2 "const_int_operand" "")]		;; model
   ""
 {
@@ -232,8 +491,8 @@
 })
 
 (define_expand "atomic_nand<mode>"
-  [(match_operand:INT1 0 "memory_operand" "")		;; memory
-   (match_operand:INT1 1 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "memory_operand" "")		;; memory
+   (match_operand:AINT 1 "int_reg_operand" "")		;; operand
    (match_operand:SI 2 "const_int_operand" "")]		;; model
   ""
 {
@@ -243,10 +502,10 @@
 })
 
 (define_expand "atomic_fetch_<fetchop_name><mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 1)
-     (match_operand:INT1 2 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 { 
@@ -256,9 +515,9 @@
 })
 
 (define_expand "atomic_fetch_nand<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -268,10 +527,10 @@
 })
 
 (define_expand "atomic_<fetchop_name>_fetch<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (FETCHOP:INT1 (match_dup 1)
-     (match_operand:INT1 2 "<fetchop_pred>" ""))	;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (FETCHOP:AINT (match_dup 1)
+     (match_operand:AINT 2 "<fetchop_pred>" ""))	;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
@@ -281,9 +540,9 @@
 })
 
 (define_expand "atomic_nand_fetch<mode>"
-  [(match_operand:INT1 0 "gpc_reg_operand" "")		;; output
-   (match_operand:INT1 1 "memory_operand" "")		;; memory
-   (match_operand:INT1 2 "gpc_reg_operand" "")		;; operand
+  [(match_operand:AINT 0 "int_reg_operand" "")		;; output
+   (match_operand:AINT 1 "memory_operand" "")		;; memory
+   (match_operand:AINT 2 "int_reg_operand" "")		;; operand
    (match_operand:SI 3 "const_int_operand" "")]		;; model
   ""
 {
diff --git a/gcc-4.8/gcc/config/rs6000/sysv4.h b/gcc-4.8/gcc/config/rs6000/sysv4.h
index fabc22c8c..965751b4f 100644
--- a/gcc-4.8/gcc/config/rs6000/sysv4.h
+++ b/gcc-4.8/gcc/config/rs6000/sysv4.h
@@ -45,7 +45,7 @@
 				      & (OPTION_MASK_RELOCATABLE	\
 					 | OPTION_MASK_MINIMAL_TOC))	\
 				     && flag_pic > 1)			\
-				 || DEFAULT_ABI == ABI_AIX)
+				 || DEFAULT_ABI != ABI_V4)
 
 #define	TARGET_BITFIELD_TYPE	(! TARGET_NO_BITFIELD_TYPE)
 #define	TARGET_BIG_ENDIAN	(! TARGET_LITTLE_ENDIAN)
@@ -147,7 +147,7 @@ do {									\
 	     rs6000_sdata_name);					\
     }									\
 									\
-  else if (flag_pic && DEFAULT_ABI != ABI_AIX				\
+  else if (flag_pic && DEFAULT_ABI == ABI_V4				\
 	   && (rs6000_sdata == SDATA_EABI				\
 	       || rs6000_sdata == SDATA_SYSV))				\
     {									\
@@ -173,14 +173,14 @@ do {									\
       error ("-mrelocatable and -mno-minimal-toc are incompatible");	\
     }									\
 									\
-  if (TARGET_RELOCATABLE && rs6000_current_abi == ABI_AIX)		\
+  if (TARGET_RELOCATABLE && rs6000_current_abi != ABI_V4)		\
     {									\
       rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;			\
       error ("-mrelocatable and -mcall-%s are incompatible",		\
 	     rs6000_abi_name);						\
     }									\
 									\
-  if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi == ABI_AIX)	\
+  if (!TARGET_64BIT && flag_pic > 1 && rs6000_current_abi != ABI_V4)	\
     {									\
       flag_pic = 0;							\
       error ("-fPIC and -mcall-%s are incompatible",			\
@@ -193,7 +193,7 @@ do {									\
     }									\
 									\
   /* Treat -fPIC the same as -mrelocatable.  */				\
-  if (flag_pic > 1 && DEFAULT_ABI != ABI_AIX)				\
+  if (flag_pic > 1 && DEFAULT_ABI == ABI_V4)				\
     {									\
       rs6000_isa_flags |= OPTION_MASK_RELOCATABLE | OPTION_MASK_MINIMAL_TOC; \
       TARGET_NO_FP_IN_TOC = 1;						\
@@ -317,7 +317,7 @@ do {									\
 
 /* Put PC relative got entries in .got2.  */
 #define	MINIMAL_TOC_SECTION_ASM_OP \
-  (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI != ABI_AIX)		\
+  (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI == ABI_V4)		\
    ? "\t.section\t\".got2\",\"aw\"" : "\t.section\t\".got1\",\"aw\"")
 
 #define	SDATA_SECTION_ASM_OP "\t.section\t\".sdata\",\"aw\""
@@ -522,8 +522,6 @@ extern int fixuplabelno;
 #define ENDIAN_SELECT(BIG_OPT, LITTLE_OPT, DEFAULT_OPT)	\
 "%{mlittle|mlittle-endian:"	LITTLE_OPT ";"	\
   "mbig|mbig-endian:"		BIG_OPT    ";"	\
-  "mcall-aixdesc|mcall-freebsd|mcall-netbsd|"	\
-  "mcall-openbsd|mcall-linux:"	BIG_OPT    ";"	\
   "mcall-i960-old:"		LITTLE_OPT ";"	\
   ":"				DEFAULT_OPT "}"
 
@@ -536,25 +534,12 @@ extern int fixuplabelno;
 %{memb|msdata=eabi: -memb}" \
 ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
 
-#define	CC1_ENDIAN_BIG_SPEC ""
-
-#define	CC1_ENDIAN_LITTLE_SPEC "\
-%{!mstrict-align: %{!mno-strict-align: \
-    %{!mcall-i960-old: \
-	-mstrict-align \
-    } \
-}}"
-
-#define	CC1_ENDIAN_DEFAULT_SPEC "%(cc1_endian_big)"
-
 #ifndef CC1_SECURE_PLT_DEFAULT_SPEC
 #define CC1_SECURE_PLT_DEFAULT_SPEC ""
 #endif
 
-/* Pass -G xxx to the compiler and set correct endian mode.  */
+/* Pass -G xxx to the compiler.  */
 #define	CC1_SPEC "%{G*} %(cc1_cpu)" \
-  ENDIAN_SELECT(" %(cc1_endian_big)", " %(cc1_endian_little)",	\
-		" %(cc1_endian_default)")			\
 "%{meabi: %{!mcall-*: -mcall-sysv }} \
 %{!meabi: %{!mno-eabi: \
     %{mrelocatable: -meabi } \
@@ -912,9 +897,6 @@ ncrtn.o%s"
   { "link_os_netbsd",		LINK_OS_NETBSD_SPEC },			\
   { "link_os_openbsd",		LINK_OS_OPENBSD_SPEC },			\
   { "link_os_default",		LINK_OS_DEFAULT_SPEC },			\
-  { "cc1_endian_big",		CC1_ENDIAN_BIG_SPEC },			\
-  { "cc1_endian_little",	CC1_ENDIAN_LITTLE_SPEC },		\
-  { "cc1_endian_default",	CC1_ENDIAN_DEFAULT_SPEC },		\
   { "cc1_secure_plt_default",	CC1_SECURE_PLT_DEFAULT_SPEC },		\
   { "cpp_os_ads",		CPP_OS_ADS_SPEC },			\
   { "cpp_os_yellowknife",	CPP_OS_YELLOWKNIFE_SPEC },		\
diff --git a/gcc-4.8/gcc/config/rs6000/sysv4le.h b/gcc-4.8/gcc/config/rs6000/sysv4le.h
index 3901122a7..28da1c99c 100644
--- a/gcc-4.8/gcc/config/rs6000/sysv4le.h
+++ b/gcc-4.8/gcc/config/rs6000/sysv4le.h
@@ -22,9 +22,6 @@
 #undef  TARGET_DEFAULT
 #define TARGET_DEFAULT MASK_LITTLE_ENDIAN
 
-#undef	CC1_ENDIAN_DEFAULT_SPEC
-#define	CC1_ENDIAN_DEFAULT_SPEC "%(cc1_endian_little)"
-
 #undef	DEFAULT_ASM_ENDIAN
 #define	DEFAULT_ASM_ENDIAN " -mlittle"
 
@@ -34,3 +31,7 @@
 
 #undef	MULTILIB_DEFAULTS
 #define	MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" }
+
+/* Little-endian PowerPC64 Linux uses the ELF v2 ABI by default.  */
+#define LINUX64_DEFAULT_ABI_ELFv2
+
diff --git a/gcc-4.8/gcc/config/rs6000/t-linux64 b/gcc-4.8/gcc/config/rs6000/t-linux64
index 9175de2ff..70e928dd7 100644
--- a/gcc-4.8/gcc/config/rs6000/t-linux64
+++ b/gcc-4.8/gcc/config/rs6000/t-linux64
@@ -25,8 +25,8 @@
 # it doesn't tell anything about the 32bit libraries on those systems.  Set
 # MULTILIB_OSDIRNAMES according to what is found on the target.
 
-MULTILIB_OPTIONS        = m64/m32
-MULTILIB_DIRNAMES       = 64 32
-MULTILIB_EXTRA_OPTS     = fPIC
-MULTILIB_OSDIRNAMES	= ../lib64$(call if_multiarch,:powerpc64-linux-gnu)
-MULTILIB_OSDIRNAMES    += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu)
+MULTILIB_OPTIONS    := m64/m32
+MULTILIB_DIRNAMES   := 64 32
+MULTILIB_EXTRA_OPTS := 
+MULTILIB_OSDIRNAMES := m64=../lib64$(call if_multiarch,:powerpc64-linux-gnu)
+MULTILIB_OSDIRNAMES += m32=$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu)
diff --git a/gcc-4.8/gcc/config/rs6000/t-linux64bele b/gcc-4.8/gcc/config/rs6000/t-linux64bele
new file mode 100644
index 000000000..97c1ee6fb
--- /dev/null
+++ b/gcc-4.8/gcc/config/rs6000/t-linux64bele
@@ -0,0 +1,7 @@
+#rs6000/t-linux64end
+
+MULTILIB_OPTIONS    += mlittle
+MULTILIB_DIRNAMES   += le
+MULTILIB_OSDIRNAMES += $(subst =,.mlittle=,$(subst lible32,lib32le,$(subst lible64,lib64le,$(subst lib,lible,$(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES))))))
+MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mlittle%,$(MULTILIB_OSDIRNAMES)))
+MULTILIB_MATCHES    := ${MULTILIB_MATCHES_ENDIAN}
diff --git a/gcc-4.8/gcc/config/rs6000/t-linux64le b/gcc-4.8/gcc/config/rs6000/t-linux64le
new file mode 100644
index 000000000..0cf38e152
--- /dev/null
+++ b/gcc-4.8/gcc/config/rs6000/t-linux64le
@@ -0,0 +1,3 @@
+#rs6000/t-linux64le
+
+MULTILIB_OSDIRNAMES := $(subst -linux,le-linux,$(MULTILIB_OSDIRNAMES))
diff --git a/gcc-4.8/gcc/config/rs6000/t-linux64lebe b/gcc-4.8/gcc/config/rs6000/t-linux64lebe
new file mode 100644
index 000000000..2e63bdb9f
--- /dev/null
+++ b/gcc-4.8/gcc/config/rs6000/t-linux64lebe
@@ -0,0 +1,7 @@
+#rs6000/t-linux64leend
+
+MULTILIB_OPTIONS    += mbig
+MULTILIB_DIRNAMES   += be
+MULTILIB_OSDIRNAMES += $(subst =,.mbig=,$(subst libbe32,lib32be,$(subst libbe64,lib64be,$(subst lib,libbe,$(subst le-linux,-linux,$(MULTILIB_OSDIRNAMES))))))
+MULTILIB_OSDIRNAMES += $(subst $(if $(findstring 64,$(target)),m64,m32).,,$(filter $(if $(findstring 64,$(target)),m64,m32).mbig%,$(MULTILIB_OSDIRNAMES)))
+MULTILIB_MATCHES    := ${MULTILIB_MATCHES_ENDIAN}
diff --git a/gcc-4.8/gcc/config/rs6000/t-rs6000 b/gcc-4.8/gcc/config/rs6000/t-rs6000
index 52c183915..81372dfb1 100644
--- a/gcc-4.8/gcc/config/rs6000/t-rs6000
+++ b/gcc-4.8/gcc/config/rs6000/t-rs6000
@@ -60,6 +60,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
 	$(srcdir)/config/rs6000/power5.md \
 	$(srcdir)/config/rs6000/power6.md \
 	$(srcdir)/config/rs6000/power7.md \
+	$(srcdir)/config/rs6000/power8.md \
 	$(srcdir)/config/rs6000/cell.md \
 	$(srcdir)/config/rs6000/xfpu.md \
 	$(srcdir)/config/rs6000/a2.md \
@@ -70,6 +71,8 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
 	$(srcdir)/config/rs6000/vector.md \
 	$(srcdir)/config/rs6000/vsx.md \
 	$(srcdir)/config/rs6000/altivec.md \
+	$(srcdir)/config/rs6000/crypto.md \
+	$(srcdir)/config/rs6000/htm.md \
 	$(srcdir)/config/rs6000/spe.md \
 	$(srcdir)/config/rs6000/dfp.md \
 	$(srcdir)/config/rs6000/paired.md
diff --git a/gcc-4.8/gcc/config/rs6000/vector.md b/gcc-4.8/gcc/config/rs6000/vector.md
index 5a6e1fb30..5c45ec3f9 100644
--- a/gcc-4.8/gcc/config/rs6000/vector.md
+++ b/gcc-4.8/gcc/config/rs6000/vector.md
@@ -24,28 +24,28 @@
 
 
 ;; Vector int modes
-(define_mode_iterator VEC_I [V16QI V8HI V4SI])
+(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
 
 ;; Vector float modes
 (define_mode_iterator VEC_F [V4SF V2DF])
 
 ;; Vector arithmetic modes
-(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF])
+(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF])
 
 ;; Vector modes that need alginment via permutes
 (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
 
 ;; Vector logical modes
-(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
 
 ;; Vector modes for moves.  Don't do TImode here.
-(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
 
 ;; Vector modes for types that don't need a realignment under VSX
-(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF])
+(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI])
 
 ;; Vector comparison modes
-(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF])
+(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF])
 
 ;; Vector init/extract modes
 (define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF])
@@ -54,7 +54,8 @@
 (define_mode_iterator VEC_64 [V2DI V2DF])
 
 ;; Vector reload iterator
-(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF DF TI])
+(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
+			     SF SD SI DF DD DI TI])
 
 ;; Base type from vector mode
 (define_mode_attr VEC_base [(V16QI "QI")
@@ -63,6 +64,7 @@
 			    (V2DI  "DI")
 			    (V4SF  "SF")
 			    (V2DF  "DF")
+			    (V1TI  "TI")
 			    (TI    "TI")])
 
 ;; Same size integer type for floating point data
@@ -88,7 +90,8 @@
 				 (smax "smax")])
 
 
-;; Vector move instructions.
+;; Vector move instructions.  Little-endian VSX loads and stores require
+;; special handling to circumvent "element endianness."
 (define_expand "mov<mode>"
   [(set (match_operand:VEC_M 0 "nonimmediate_operand" "")
 	(match_operand:VEC_M 1 "any_operand" ""))]
@@ -104,6 +107,15 @@
 	       && !vlogical_operand (operands[1], <MODE>mode))
 	operands[1] = force_reg (<MODE>mode, operands[1]);
     }
+  if (!BYTES_BIG_ENDIAN
+      && VECTOR_MEM_VSX_P (<MODE>mode)
+      && !gpr_or_gpr_p (operands[0], operands[1])
+      && (memory_operand (operands[0], <MODE>mode)
+          ^ memory_operand (operands[1], <MODE>mode)))
+    {
+      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
+      DONE;
+    }
 })
 
 ;; Generic vector floating point load/store instructions.  These will match
@@ -126,7 +138,9 @@
         (match_operand:VEC_L 1 "input_operand" ""))]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
    && reload_completed
-   && gpr_or_gpr_p (operands[0], operands[1])"
+   && gpr_or_gpr_p (operands[0], operands[1])
+   && !direct_move_p (operands[0], operands[1])
+   && !quad_load_store_p (operands[0], operands[1])"
   [(pc)]
 {
   rs6000_split_multireg_move (operands[0], operands[1]);
@@ -249,7 +263,7 @@
   [(set (match_operand:VEC_F 0 "vfloat_operand" "")
 	(mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
 		    (match_operand:VEC_F 2 "vfloat_operand" "")))]
-  "VECTOR_UNIT_VSX_P (<MODE>mode) || VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   if (<MODE>mode == V4SFmode && VECTOR_UNIT_ALTIVEC_P (<MODE>mode))
     {
@@ -395,7 +409,7 @@
 			  (match_operand:VEC_I 5 "vint_operand" "")])
 	 (match_operand:VEC_I 1 "vint_operand" "")
 	 (match_operand:VEC_I 2 "vint_operand" "")))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "
 {
   if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
@@ -451,7 +465,7 @@
 			  (match_operand:VEC_I 5 "vint_operand" "")])
 	 (match_operand:VEC_I 1 "vint_operand" "")
 	 (match_operand:VEC_I 2 "vint_operand" "")))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "
 {
   if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],
@@ -505,14 +519,14 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		   (match_operand:VEC_I 2 "vint_operand" "")))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 (define_expand "vector_geu<mode>"
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		   (match_operand:VEC_I 2 "vint_operand" "")))]
-  "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 (define_insn_and_split "*vector_uneq<mode>"
@@ -595,8 +609,8 @@
 	(ge:VEC_F (match_dup 2)
 		  (match_dup 1)))
    (set (match_dup 0)
-	(not:VEC_F (ior:VEC_F (match_dup 3)
-			      (match_dup 4))))]
+        (and:VEC_F (not:VEC_F (match_dup 3))
+                   (not:VEC_F (match_dup 4))))]
   "
 {
   operands[3] = gen_reg_rtx (<MODE>mode);
@@ -708,47 +722,18 @@
   "")
 
 
-;; Vector logical instructions
-(define_expand "xor<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (xor:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-(define_expand "ior<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-(define_expand "and<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-		   (match_operand:VEC_L 2 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
+;; Vector count leading zeros
+(define_expand "clz<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+	(clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
 
-(define_expand "one_cmpl<mode>2"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
+;; Vector population count
+(define_expand "popcount<mode>2"
+  [(set (match_operand:VEC_I 0 "register_operand" "")
+        (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+  "TARGET_P8_VECTOR")
 
-(define_expand "nor<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
-			      (match_operand:VEC_L 2 "vlogical_operand" ""))))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-(define_expand "andc<mode>3"
-  [(set (match_operand:VEC_L 0 "vlogical_operand" "")
-        (and:VEC_L (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))
-		   (match_operand:VEC_L 1 "vlogical_operand" "")))]
-  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
 
 ;; Same size conversions
 (define_expand "float<VEC_int><mode>2"
@@ -889,7 +874,7 @@
 {
   rtx reg = gen_reg_rtx (V4SFmode);
 
-  rs6000_expand_interleave (reg, operands[1], operands[1], true);
+  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
   emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
   DONE;
 })
@@ -901,7 +886,7 @@
 {
   rtx reg = gen_reg_rtx (V4SFmode);
 
-  rs6000_expand_interleave (reg, operands[1], operands[1], false);
+  rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
   emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
   DONE;
 })
@@ -913,7 +898,7 @@
 {
   rtx reg = gen_reg_rtx (V4SImode);
 
-  rs6000_expand_interleave (reg, operands[1], operands[1], true);
+  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
   emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
   DONE;
 })
@@ -925,7 +910,7 @@
 {
   rtx reg = gen_reg_rtx (V4SImode);
 
-  rs6000_expand_interleave (reg, operands[1], operands[1], false);
+  rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
   emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
   DONE;
 })
@@ -937,7 +922,7 @@
 {
   rtx reg = gen_reg_rtx (V4SImode);
 
-  rs6000_expand_interleave (reg, operands[1], operands[1], true);
+  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
   emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
   DONE;
 })
@@ -949,7 +934,7 @@
 {
   rtx reg = gen_reg_rtx (V4SImode);
 
-  rs6000_expand_interleave (reg, operands[1], operands[1], false);
+  rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
   emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
   DONE;
 })
@@ -963,8 +948,19 @@
    (match_operand:V16QI 3 "vlogical_operand" "")]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
-  emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], operands[2],
-				       operands[3]));
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
+    	      				 operands[2], operands[3]));
+  else
+    {
+      /* We have changed lvsr to lvsl, so to complete the transformation
+         of vperm for LE, we must swap the inputs.  */
+      rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
+                                   gen_rtvec (3, operands[2],
+                                              operands[1], operands[3]),
+                                   UNSPEC_VPERM);
+      emit_move_insn (operands[0], unspec);
+    }
   DONE;
 })
 
@@ -1064,7 +1060,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		      (match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Expanders for arithmetic shift left on each vector element
@@ -1072,7 +1068,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 		      (match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Expanders for logical shift right on each vector element
@@ -1080,7 +1076,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 			(match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Expanders for arithmetic shift right on each vector element
@@ -1088,7 +1084,7 @@
   [(set (match_operand:VEC_I 0 "vint_operand" "")
 	(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
 			(match_operand:VEC_I 2 "vint_operand" "")))]
-  "TARGET_ALTIVEC"
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")
 
 ;; Vector reduction expanders for VSX
diff --git a/gcc-4.8/gcc/config/rs6000/vsx.md b/gcc-4.8/gcc/config/rs6000/vsx.md
index 3fafd9b27..5f5e4a3b8 100644
--- a/gcc-4.8/gcc/config/rs6000/vsx.md
+++ b/gcc-4.8/gcc/config/rs6000/vsx.md
@@ -34,11 +34,20 @@
 (define_mode_iterator VSX_F [V4SF V2DF])
 
 ;; Iterator for logical types supported by VSX
-(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
 
 ;; Iterator for memory move.  Handle TImode specially to allow
 ;; it to use gprs as well as vsx registers.
-(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
+(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
+
+(define_mode_iterator VSX_M2 [V16QI
+			      V8HI
+			      V4SI
+			      V2DI
+			      V4SF
+			      V2DF
+			      V1TI
+			      (TI	"TARGET_VSX_TIMODE")])
 
 ;; Map into the appropriate load/store name based on the type
 (define_mode_attr VSm  [(V16QI "vw4")
@@ -48,7 +57,8 @@
 			(V2DF  "vd2")
 			(V2DI  "vd2")
 			(DF    "d")
-			(TI    "vw4")])
+			(V1TI  "vd2")
+			(TI    "vd2")])
 
 ;; Map into the appropriate suffix based on the type
 (define_mode_attr VSs	[(V16QI "sp")
@@ -59,7 +69,8 @@
 			 (V2DI  "dp")
 			 (DF    "dp")
 			 (SF	"sp")
-			 (TI    "sp")])
+			 (V1TI  "dp")
+			 (TI    "dp")])
 
 ;; Map the register class used
 (define_mode_attr VSr	[(V16QI "v")
@@ -70,7 +81,8 @@
 			 (V2DF  "wd")
 			 (DF    "ws")
 			 (SF	"d")
-			 (TI    "wd")])
+			 (V1TI  "v")
+			 (TI    "wt")])
 
 ;; Map the register class used for float<->int conversions
 (define_mode_attr VSr2	[(V2DF  "wd")
@@ -115,7 +127,7 @@
 			 (V4SF  "v")
 			 (V2DI  "v")
 			 (V2DF  "v")
-			 (TI    "v")
+			 (V1TI  "v")
 			 (DF    "s")])
 
 ;; Appropriate type for add ops (and other simple FP ops)
@@ -173,7 +185,8 @@
 				(V2DF	"vecdouble")])
 
 ;; Map the scalar mode for a vector type
-(define_mode_attr VS_scalar [(V2DF	"DF")
+(define_mode_attr VS_scalar [(V1TI	"TI")
+			     (V2DF	"DF")
 			     (V2DI	"DI")
 			     (V4SF	"SF")
 			     (V4SI	"SI")
@@ -184,7 +197,8 @@
 (define_mode_attr VS_double [(V4SI	"V8SI")
 			     (V4SF	"V8SF")
 			     (V2DI	"V4DI")
-			     (V2DF	"V4DF")])
+			     (V2DF	"V4DF")
+			     (V1TI	"V2TI")])
 
 ;; Constants for creating unspecs
 (define_c_enum "unspec"
@@ -192,6 +206,8 @@
    UNSPEC_VSX_CVDPSXWS
    UNSPEC_VSX_CVDPUXWS
    UNSPEC_VSX_CVSPDP
+   UNSPEC_VSX_CVSPDPN
+   UNSPEC_VSX_CVDPSPN
    UNSPEC_VSX_CVSXWDP
    UNSPEC_VSX_CVUXWDP
    UNSPEC_VSX_CVSXDSP
@@ -204,77 +220,394 @@
    UNSPEC_VSX_ROUND_I
    UNSPEC_VSX_ROUND_IC
    UNSPEC_VSX_SLDWI
+   UNSPEC_VSX_XXSPLTW
   ])
 
 ;; VSX moves
-(define_insn "*vsx_mov<mode>"
-  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
-	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (register_operand (operands[0], <MODE>mode) 
-       || register_operand (operands[1], <MODE>mode))"
+
+;; The patterns for LE permuted loads and stores come before the general
+;; VSX moves so they match first.
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+        (match_operand:VSX_D 1 "memory_operand" "Z"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set (match_dup 2)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 2)
+          (parallel [(const_int 1) (const_int 0)])))]
+  "
 {
-  switch (which_alternative)
-    {
-    case 0:
-    case 3:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "stx<VSm>x %x1,%y0";
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+                                       : operands[0];
+}
+  "
+  [(set_attr "type" "vecload")
+   (set_attr "length" "8")])
 
-    case 1:
-    case 4:
-      gcc_assert (MEM_P (operands[1])
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
-      return "lx<VSm>x %x0,%y1";
+(define_insn_and_split "*vsx_le_perm_load_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+        (match_operand:VSX_W 1 "memory_operand" "Z"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set (match_dup 2)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 2)
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))]
+  "
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+                                       : operands[0];
+}
+  "
+  [(set_attr "type" "vecload")
+   (set_attr "length" "8")])
 
-    case 2:
-    case 5:
-      return "xxlor %x0,%x1,%x1";
+(define_insn_and_split "*vsx_le_perm_load_v8hi"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+        (match_operand:V8HI 1 "memory_operand" "Z"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set (match_dup 2)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 0)
+        (vec_select:V8HI
+          (match_dup 2)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+                                       : operands[0];
+}
+  "
+  [(set_attr "type" "vecload")
+   (set_attr "length" "8")])
 
-    case 6:
-    case 7:
-    case 8:
-    case 11:
-      return "#";
+(define_insn_and_split "*vsx_le_perm_load_v16qi"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+        (match_operand:V16QI 1 "memory_operand" "Z"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  [(set (match_dup 2)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 0)
+        (vec_select:V16QI
+          (match_dup 2)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
+                                       : operands[0];
+}
+  "
+  [(set_attr "type" "vecload")
+   (set_attr "length" "8")])
 
-    case 9:
-    case 10:
-      return "xxlxor %x0,%x0,%x0";
+(define_insn "*vsx_le_perm_store_<mode>"
+  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
+        (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "12")])
+
+(define_split
+  [(set (match_operand:VSX_D 0 "memory_operand" "")
+        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  [(set (match_dup 2)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 2)
+          (parallel [(const_int 1) (const_int 0)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
+                                       : operands[1];
+})
 
-    case 12:
-      return output_vec_const_move (operands);
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:VSX_D 0 "memory_operand" "")
+        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 1) (const_int 0)])))]
+  "")
 
-    case 13:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "stvx %1,%y0";
+(define_insn "*vsx_le_perm_store_<mode>"
+  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+        (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "12")])
+
+(define_split
+  [(set (match_operand:VSX_W 0 "memory_operand" "")
+        (match_operand:VSX_W 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  [(set (match_dup 2)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 2)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
+                                       : operands[1];
+})
 
-    case 14:
-      gcc_assert (MEM_P (operands[0])
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
-		  && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
-      return "lvx %0,%y1";
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:VSX_W 0 "memory_operand" "")
+        (match_operand:VSX_W 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))
+   (set (match_dup 0)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))
+   (set (match_dup 1)
+        (vec_select:<MODE>
+          (match_dup 1)
+          (parallel [(const_int 2) (const_int 3)
+	             (const_int 0) (const_int 1)])))]
+  "")
 
-    default:
-      gcc_unreachable ();
-    }
+(define_insn "*vsx_le_perm_store_v8hi"
+  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
+        (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "12")])
+
+(define_split
+  [(set (match_operand:V8HI 0 "memory_operand" "")
+        (match_operand:V8HI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  [(set (match_dup 2)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 0)
+        (vec_select:V8HI
+          (match_dup 2)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
+                                       : operands[1];
+})
+
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:V8HI 0 "memory_operand" "")
+        (match_operand:V8HI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 0)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))
+   (set (match_dup 1)
+        (vec_select:V8HI
+          (match_dup 1)
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "")
+
+(define_insn "*vsx_le_perm_store_v16qi"
+  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+        (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "#"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "12")])
+
+(define_split
+  [(set (match_operand:V16QI 0 "memory_operand" "")
+        (match_operand:V16QI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  [(set (match_dup 2)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 0)
+        (vec_select:V16QI
+          (match_dup 2)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 
+                                       : operands[1];
+})
+
+;; The post-reload split requires that we re-permute the source
+;; register in case it is still live.
+(define_split
+  [(set (match_operand:V16QI 0 "memory_operand" "")
+        (match_operand:V16QI 1 "vsx_register_operand" ""))]
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  [(set (match_dup 1)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 0)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))
+   (set (match_dup 1)
+        (vec_select:V16QI
+          (match_dup 1)
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "")
+
+
+(define_insn "*vsx_mov<mode>"
+  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
+	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)
+   && (register_operand (operands[0], <MODE>mode) 
+       || register_operand (operands[1], <MODE>mode))"
+{
+  return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
-
-;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for
-;; unions.  However for plain data movement, slightly favor the vector loads
-(define_insn "*vsx_movti"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,?Y,?r,?r,wa,v,v,wZ")
-	(match_operand:TI 1 "input_operand" "wa,Z,wa,r,Y,r,j,W,wZ,v"))]
-  "VECTOR_MEM_VSX_P (TImode)
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+   (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
+
+;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
+;; use of TImode is for unions.  However for plain data movement, slightly
+;; favor the vector loads
+(define_insn "*vsx_movti_64bit"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
+	(match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
+   && (register_operand (operands[0], TImode) 
+       || register_operand (operands[1], TImode))"
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+   (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
+
+(define_insn "*vsx_movti_32bit"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
+	(match_operand:TI 1 "input_operand"        "wa, Z,wa, O,W,wZ, v,r,r,    Q,    Y,    r,n"))]
+  "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
    && (register_operand (operands[0], TImode) 
        || register_operand (operands[1], TImode))"
 {
@@ -290,27 +623,45 @@
       return "xxlor %x0,%x1,%x1";
 
     case 3:
+      return "xxlxor %x0,%x0,%x0";
+
     case 4:
+      return output_vec_const_move (operands);
+
     case 5:
-      return "#";
+      return "stvx %1,%y0";
 
     case 6:
-      return "xxlxor %x0,%x0,%x0";
+      return "lvx %0,%y1";
 
     case 7:
-      return output_vec_const_move (operands);
+      if (TARGET_STRING)
+        return \"stswi %1,%P0,16\";
 
     case 8:
-      return "stvx %1,%y0";
+      return \"#\";
 
     case 9:
-      return "lvx %0,%y1";
+      /* If the address is not used in the output, we can use lsi.  Otherwise,
+	 fall through to generating four loads.  */
+      if (TARGET_STRING
+          && ! reg_overlap_mentioned_p (operands[0], operands[1]))
+	return \"lswi %0,%P1,16\";
+      /* ... fall through ...  */
 
+    case 10:
+    case 11:
+    case 12:
+      return \"#\";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")])
+  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
+   (set_attr "length" "     4,      4,        4,       4,         8,       4,      4,      16,      16,     16,     16,16,16")
+   (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
+   			                  (const_string "always")
+					  (const_string "conditional")))])
 
 ;; Explicit  load/store expanders for the builtin functions
 (define_expand "vsx_load_<mode>"
@@ -320,46 +671,48 @@
   "")
 
 (define_expand "vsx_store_<mode>"
-  [(set (match_operand:VEC_M 0 "memory_operand" "")
-	(match_operand:VEC_M 1 "vsx_register_operand" ""))]
+  [(set (match_operand:VSX_M 0 "memory_operand" "")
+	(match_operand:VSX_M 1 "vsx_register_operand" ""))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
   "")
 
 
-;; VSX scalar and vector floating point arithmetic instructions
+;; VSX vector floating point arithmetic instructions.  The VSX scalar
+;; instructions are now combined with the insn for the traditional floating
+;; point unit.
 (define_insn "*vsx_add<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (plus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>add<VSs> %x0,%x1,%x2"
+  "xvadd<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_sub<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (minus:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		     (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>sub<VSs> %x0,%x1,%x2"
+  "xvsub<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_mul<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>mul<VSs> %x0,%x1,%x2"
-  [(set_attr "type" "<VStype_mul>")
+  "xvmul<VSs> %x0,%x1,%x2"
+  [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
 (define_insn "*vsx_div<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		   (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>div<VSs> %x0,%x1,%x2"
+  "xvdiv<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_div>")
    (set_attr "fp_type" "<VSfptype_div>")])
 
@@ -402,94 +755,72 @@
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_fre<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 		      UNSPEC_FRES))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>re<VSs> %x0,%x1"
+  "xvre<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_neg<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (neg:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>neg<VSs> %x0,%x1"
+  "xvneg<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_abs<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (abs:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>abs<VSs> %x0,%x1"
+  "xvabs<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_nabs<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (neg:VSX_B
-	 (abs:VSX_B
-	  (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa"))))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (neg:VSX_F
+	 (abs:VSX_F
+	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>nabs<VSs> %x0,%x1"
+  "xvnabs<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_smax<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (smax:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>max<VSs> %x0,%x1,%x2"
+  "xvmax<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "*vsx_smin<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (smin:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>min<VSs> %x0,%x1,%x2"
+  "xvmin<VSs> %x0,%x1,%x2"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
-;; Special VSX version of smin/smax for single precision floating point.  Since
-;; both numbers are rounded to single precision, we can just use the DP version
-;; of the instruction.
-
-(define_insn "*vsx_smaxsf3"
-  [(set (match_operand:SF 0 "vsx_register_operand" "=f")
-        (smax:SF (match_operand:SF 1 "vsx_register_operand" "f")
-		 (match_operand:SF 2 "vsx_register_operand" "f")))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "xsmaxdp %x0,%x1,%x2"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_d")])
-
-(define_insn "*vsx_sminsf3"
-  [(set (match_operand:SF 0 "vsx_register_operand" "=f")
-        (smin:SF (match_operand:SF 1 "vsx_register_operand" "f")
-		 (match_operand:SF 2 "vsx_register_operand" "f")))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "xsmindp %x0,%x1,%x2"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_addsub_d")])
-
 (define_insn "*vsx_sqrt<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-        (sqrt:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+        (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>sqrt<VSs> %x0,%x1"
+  "xvsqrt<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_sqrt>")
    (set_attr "fp_type" "<VSfptype_sqrt>")])
 
 (define_insn "*vsx_rsqrte<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 		      UNSPEC_RSQRT))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>rsqrte<VSs> %x0,%x1"
+  "xvrsqrte<VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
@@ -528,26 +859,10 @@
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
-;; Fused vector multiply/add instructions Support the classical DF versions of
-;; fma, which allows the target to be a separate register from the 3 inputs.
-;; Under VSX, the target must be either the addend or the first multiply.
-;; Where we can, also do the same for the Altivec V4SF fmas.
-
-(define_insn "*vsx_fmadf4"
-  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
-	(fma:DF
-	  (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
-	  (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
-	  (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "@
-   xsmaddadp %x0,%x1,%x2
-   xsmaddmdp %x0,%x1,%x3
-   xsmaddadp %x0,%x1,%x2
-   xsmaddmdp %x0,%x1,%x3
-   fmadd %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
+;; Fused vector multiply/add instructions. Support the classical Altivec
+;; versions of fma, which allows the target to be a separate register from the
+;; 3 inputs.  Under VSX, the target must be either the addend or the first
+;; multiply.
 
 (define_insn "*vsx_fmav4sf4"
   [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
@@ -578,23 +893,6 @@
    xvmaddmdp %x0,%x1,%x3"
   [(set_attr "type" "vecdouble")])
 
-(define_insn "*vsx_fmsdf4"
-  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
-	(fma:DF
-	  (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
-	  (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
-	  (neg:DF
-	    (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "@
-   xsmsubadp %x0,%x1,%x2
-   xsmsubmdp %x0,%x1,%x3
-   xsmsubadp %x0,%x1,%x2
-   xsmsubmdp %x0,%x1,%x3
-   fmsub %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
 (define_insn "*vsx_fms<mode>4"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
 	(fma:VSX_F
@@ -604,29 +902,12 @@
 	    (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "@
-   x<VSv>msuba<VSs> %x0,%x1,%x2
-   x<VSv>msubm<VSs> %x0,%x1,%x3
-   x<VSv>msuba<VSs> %x0,%x1,%x2
-   x<VSv>msubm<VSs> %x0,%x1,%x3"
+   xvmsuba<VSs> %x0,%x1,%x2
+   xvmsubm<VSs> %x0,%x1,%x3
+   xvmsuba<VSs> %x0,%x1,%x2
+   xvmsubm<VSs> %x0,%x1,%x3"
   [(set_attr "type" "<VStype_mul>")])
 
-(define_insn "*vsx_nfmadf4"
-  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
-	(neg:DF
-	 (fma:DF
-	  (match_operand:DF 1 "vsx_register_operand" "ws,ws,wa,wa,d")
-	  (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
-	  (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d"))))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "@
-   xsnmaddadp %x0,%x1,%x2
-   xsnmaddmdp %x0,%x1,%x3
-   xsnmaddadp %x0,%x1,%x2
-   xsnmaddmdp %x0,%x1,%x3
-   fnmadd %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
 (define_insn "*vsx_nfma<mode>4"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
 	(neg:VSX_F
@@ -636,31 +917,13 @@
 	  (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "@
-   x<VSv>nmadda<VSs> %x0,%x1,%x2
-   x<VSv>nmaddm<VSs> %x0,%x1,%x3
-   x<VSv>nmadda<VSs> %x0,%x1,%x2
-   x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+   xvnmadda<VSs> %x0,%x1,%x2
+   xvnmaddm<VSs> %x0,%x1,%x3
+   xvnmadda<VSs> %x0,%x1,%x2
+   xvnmaddm<VSs> %x0,%x1,%x3"
   [(set_attr "type" "<VStype_mul>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
-(define_insn "*vsx_nfmsdf4"
-  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,d")
-	(neg:DF
-	 (fma:DF
-	   (match_operand:DF 1 "vsx_register_operand" "%ws,ws,wa,wa,d")
-	   (match_operand:DF 2 "vsx_register_operand" "ws,0,wa,0,d")
-	   (neg:DF
-	     (match_operand:DF 3 "vsx_register_operand" "0,ws,0,wa,d")))))]
-  "VECTOR_UNIT_VSX_P (DFmode)"
-  "@
-   xsnmsubadp %x0,%x1,%x2
-   xsnmsubmdp %x0,%x1,%x3
-   xsnmsubadp %x0,%x1,%x2
-   xsnmsubmdp %x0,%x1,%x3
-   fnmsub %0,%1,%2,%3"
-  [(set_attr "type" "fp")
-   (set_attr "fp_type" "fp_maddsub_d")])
-
 (define_insn "*vsx_nfmsv4sf4"
   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
 	(neg:V4SF
@@ -722,16 +985,6 @@
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
-;; Floating point scalar compare
-(define_insn "*vsx_cmpdf_internal1"
-  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y,?y")
-	(compare:CCFP (match_operand:DF 1 "gpc_reg_operand" "ws,wa")
-		      (match_operand:DF 2 "gpc_reg_operand" "ws,wa")))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_VSX_P (DFmode)"
-  "xscmpudp %0,%x1,%x2"
-  [(set_attr "type" "fpcompare")])
-
 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
 ;; indicate a combined status
 (define_insn "*vsx_eq_<mode>_p"
@@ -798,13 +1051,13 @@
 
 ;; Copy sign
 (define_insn "vsx_copysign<mode>3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B
-	 [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")
-	  (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F
+	 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")
+	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")]
 	 UNSPEC_COPYSIGN))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>cpsgn<VSs> %x0,%x2,%x1"
+  "xvcpsgn<VSs> %x0,%x2,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
@@ -865,10 +1118,10 @@
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_btrunc<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(fix:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")))]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>r<VSs>iz %x0,%x1"
+  "xvr<VSs>iz %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
@@ -882,20 +1135,20 @@
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_floor<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 		      UNSPEC_FRIM))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>r<VSs>im %x0,%x1"
+  "xvr<VSs>im %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_ceil<mode>2"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
+  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
+	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")]
 		      UNSPEC_FRIP))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>r<VSs>ip %x0,%x1"
+  "xvr<VSs>ip %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
@@ -942,6 +1195,40 @@
   "xscvspdp %x0,%x1"
   [(set_attr "type" "fp")])
 
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
+(define_insn "vsx_xscvdpspn"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvspdpn"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvdpspn_scalar"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+		     UNSPEC_VSX_CVDPSPN))]
+  "TARGET_XSCVDPSPN"
+  "xscvdpspn %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Used by direct move to move a SFmode value from GPR to VSX register
+(define_insn "vsx_xscvspdpn_directmove"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
+		   UNSPEC_VSX_CVSPDPN))]
+  "TARGET_XSCVSPDPN"
+  "xscvspdpn %x0,%x1"
+  [(set_attr "type" "fp")])
+
 ;; Convert from 64-bit to 32-bit types
 ;; Note, favor the Altivec registers since the usual use of these instructions
 ;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1027,73 +1314,21 @@
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 
-;; Logical and permute operations
-(define_insn "*vsx_and<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (and:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxland %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_ior<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-		   (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_xor<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (xor:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlxor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_one_cmpl<mode>2"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (not:VSX_L
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlnor %x0,%x1,%x1"
-  [(set_attr "type" "vecsimple")])
-  
-(define_insn "*vsx_nor<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (not:VSX_L
-	 (ior:VSX_L
-	  (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
-	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlnor %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
-(define_insn "*vsx_andc<mode>3"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-        (and:VSX_L
-	 (not:VSX_L
-	  (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
-	 (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxlandc %x0,%x1,%x2"
-  [(set_attr "type" "vecsimple")])
-
-
 ;; Permute operations
 
 ;; Build a V2DF/V2DI vector from two scalars
 (define_insn "vsx_concat_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
-	(unspec:VSX_D
-	 [(match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
-	  (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")]
-	 UNSPEC_VSX_CONCAT))]
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa")
+	(vec_concat:VSX_D
+	 (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa")
+	 (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxpermdi %x0,%x1,%x2,0"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "xxpermdi %x0,%x1,%x2,0";
+  else
+    return "xxpermdi %x0,%x2,%x1,0";
+}
   [(set_attr "type" "vecperm")])
 
 ;; Special purpose concat using xxpermdi to glue two single precision values
@@ -1106,9 +1341,176 @@
 	  (match_operand:SF 2 "vsx_register_operand" "f,f")]
 	 UNSPEC_VSX_CONCAT))]
   "VECTOR_MEM_VSX_P (V2DFmode)"
-  "xxpermdi %x0,%x1,%x2,0"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "xxpermdi %x0,%x1,%x2,0";
+  else
+    return "xxpermdi %x0,%x2,%x1,0";
+}
+  [(set_attr "type" "vecperm")])
+
+;; xxpermdi for little endian loads and stores.  We need several of
+;; these since the form of the PARALLEL differs by mode.
+(define_insn "*vsx_xxpermdi2_le_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_D
+          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 1) (const_int 0)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxpermdi %x0,%x1,%x1,2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxpermdi4_le_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_W
+          (match_operand:VSX_W 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "xxpermdi %x0,%x1,%x1,2"
   [(set_attr "type" "vecperm")])
 
+(define_insn "*vsx_xxpermdi8_le_V8HI"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+        (vec_select:V8HI
+          (match_operand:V8HI 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+  "xxpermdi %x0,%x1,%x1,2"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_xxpermdi16_le_V16QI"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+        (vec_select:V16QI
+          (match_operand:V16QI 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+  "xxpermdi %x0,%x1,%x1,2"
+  [(set_attr "type" "vecperm")])
+
+;; lxvd2x for little endian loads.  We need several of
+;; these since the form of the PARALLEL differs by mode.
+(define_insn "*vsx_lxvd2x2_le_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_D
+          (match_operand:VSX_D 1 "memory_operand" "Z")
+          (parallel [(const_int 1) (const_int 0)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "*vsx_lxvd2x4_le_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_W
+          (match_operand:VSX_W 1 "memory_operand" "Z")
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "*vsx_lxvd2x8_le_V8HI"
+  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
+        (vec_select:V8HI
+          (match_operand:V8HI 1 "memory_operand" "Z")
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+(define_insn "*vsx_lxvd2x16_le_V16QI"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+        (vec_select:V16QI
+          (match_operand:V16QI 1 "memory_operand" "Z")
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+  "lxvd2x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+;; stxvd2x for little endian stores.  We need several of
+;; these since the form of the PARALLEL differs by mode.
+(define_insn "*vsx_stxvd2x2_le_<mode>"
+  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
+        (vec_select:VSX_D
+          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 1) (const_int 0)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "*vsx_stxvd2x4_le_<mode>"
+  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+        (vec_select:VSX_W
+          (match_operand:VSX_W 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 2) (const_int 3)
+                     (const_int 0) (const_int 1)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "*vsx_stxvd2x8_le_V8HI"
+  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
+        (vec_select:V8HI
+          (match_operand:V8HI 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "*vsx_stxvd2x16_le_V16QI"
+  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
+        (vec_select:V16QI
+          (match_operand:V16QI 1 "vsx_register_operand" "wa")
+          (parallel [(const_int 8) (const_int 9)
+                     (const_int 10) (const_int 11)
+                     (const_int 12) (const_int 13)
+                     (const_int 14) (const_int 15)
+                     (const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+  "stxvd2x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
+;; Convert a TImode value into V1TImode
+(define_expand "vsx_set_v1ti"
+  [(match_operand:V1TI 0 "nonimmediate_operand" "")
+   (match_operand:V1TI 1 "nonimmediate_operand" "")
+   (match_operand:TI 2 "input_operand" "")
+   (match_operand:QI 3 "u5bit_cint_operand" "")]
+  "VECTOR_MEM_VSX_P (V1TImode)"
+{
+  if (operands[3] != const0_rtx)
+    gcc_unreachable ();
+
+  emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
+  DONE;
+})
+
 ;; Set the element of a V2DI/VD2F mode
 (define_insn "vsx_set_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa")
@@ -1118,9 +1520,10 @@
 		      UNSPEC_VSX_SET))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  if (INTVAL (operands[3]) == 0)
+  int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
+  if (INTVAL (operands[3]) == idx_first)
     return \"xxpermdi %x0,%x2,%x1,1\";
-  else if (INTVAL (operands[3]) == 1)
+  else if (INTVAL (operands[3]) == 1 - idx_first)
     return \"xxpermdi %x0,%x1,%x2,0\";
   else
     gcc_unreachable ();
@@ -1135,8 +1538,12 @@
 			[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
+  int fldDM;
   gcc_assert (UINTVAL (operands[2]) <= 1);
-  operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
+  fldDM = INTVAL (operands[2]) << 1;
+  if (!BYTES_BIG_ENDIAN)
+    fldDM = 3 - fldDM;
+  operands[3] = GEN_INT (fldDM);
   return \"xxpermdi %x0,%x1,%x1,%3\";
 }
   [(set_attr "type" "vecperm")])
@@ -1149,7 +1556,26 @@
 	 (parallel [(const_int 0)])))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
   "lxsd%U1x %x0,%y1"
-  [(set_attr "type" "fpload")
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(const_string "fpload")))
+   (set_attr "length" "4")])  
+
+;; Optimize extracting element 1 from memory for little endian
+(define_insn "*vsx_extract_<mode>_one_le"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+	(vec_select:<VS_scalar>
+	 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+	 (parallel [(const_int 1)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && !WORDS_BIG_ENDIAN"
+  "lxsd%U1x %x0,%y1"
+  [(set (attr "type")
+      (if_then_else
+	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+	(const_string "fpload_ux")
+	(const_string "fpload")))
    (set_attr "length" "4")])  
 
 ;; Extract a SF element from V4SF
@@ -1172,7 +1598,7 @@
   rtx op2 = operands[2];
   rtx op3 = operands[3];
   rtx tmp;
-  HOST_WIDE_INT ele = INTVAL (op2);
+  HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
 
   if (ele == 0)
     tmp = op1;
@@ -1213,11 +1639,22 @@
       if (<MODE>mode != V2DImode)
 	{
 	  target = gen_lowpart (V2DImode, target);
-	  op0 = gen_lowpart (V2DImode, target);
-	  op1 = gen_lowpart (V2DImode, target);
+	  op0 = gen_lowpart (V2DImode, op0);
+	  op1 = gen_lowpart (V2DImode, op1);
 	}
     }
+  /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
+     transformation we don't want; it is necessary for
+     rs6000_expand_vec_perm_const_1 but not for this use.  So we
+     prepare for that by reversing the transformation here.  */
+  if (BYTES_BIG_ENDIAN)
   emit_insn (gen (target, op0, op1, perm0, perm1));
+  else
+    {
+      rtx p0 = GEN_INT (3 - INTVAL (perm1));
+      rtx p1 = GEN_INT (3 - INTVAL (perm0));
+      emit_insn (gen (target, op1, op0, p0, p1));
+    }
   DONE;
 })
 
@@ -1231,9 +1668,32 @@
 		     (match_operand 4 "const_2_to_3_operand" "")])))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  int mask = (INTVAL (operands[3]) << 1) | (INTVAL (operands[4]) - 2);
+  int op3, op4, mask;
+
+  /* For little endian, swap operands and invert/swap selectors
+     to get the correct xxpermdi.  The operand swap sets up the
+     inputs as a little endian array.  The selectors are swapped
+     because they are defined to use big endian ordering.  The
+     selectors are inverted to get the correct doublewords for
+     little endian ordering.  */
+  if (BYTES_BIG_ENDIAN)
+    {
+      op3 = INTVAL (operands[3]);
+      op4 = INTVAL (operands[4]);
+    }
+  else
+    {
+      op3 = 3 - INTVAL (operands[4]);
+      op4 = 3 - INTVAL (operands[3]);
+    }
+
+  mask = (op3 << 1) | (op4 - 2);
   operands[3] = GEN_INT (mask);
+
+  if (BYTES_BIG_ENDIAN)
   return "xxpermdi %x0,%x1,%x2,%3";
+  else
+    return "xxpermdi %x0,%x2,%x1,%3";
 }
   [(set_attr "type" "vecperm")])
 
@@ -1252,24 +1712,56 @@
 
 ;; Expanders for builtins
 (define_expand "vsx_mergel_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
-	(vec_select:VSX_D
-	  (vec_concat:<VS_double>
-	    (match_operand:VSX_D 1 "vsx_register_operand" "")
-	    (match_operand:VSX_D 2 "vsx_register_operand" ""))
-	  (parallel [(const_int 1) (const_int 3)])))]
+  [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "")
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
 
 (define_expand "vsx_mergeh_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "")
-	(vec_select:VSX_D
-	  (vec_concat:<VS_double>
-	    (match_operand:VSX_D 1 "vsx_register_operand" "")
-	    (match_operand:VSX_D 2 "vsx_register_operand" ""))
-	  (parallel [(const_int 0) (const_int 2)])))]
+  [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
+   (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "")
+{
+  rtvec v;
+  rtx x;
+
+  /* Special handling for LE with -maltivec=be.  */
+  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
+    {
+      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
+    }
+  else
+    {
+      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
+      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
+    }
+
+  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  DONE;
+})
 
 ;; V2DF/V2DI splat
 (define_insn "vsx_splat_<mode>"
@@ -1295,6 +1787,20 @@
 	  (parallel
 	   [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  if (!BYTES_BIG_ENDIAN)
+    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
+
+  return "xxspltw %x0,%x1,%2";
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "vsx_xxspltw_<mode>_direct"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa")
+        (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,wa")
+                       (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
+                      UNSPEC_VSX_XXSPLTW))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxspltw %x0,%x1,%2"
   [(set_attr "type" "vecperm")])
 
@@ -1308,7 +1814,12 @@
 	  (parallel [(const_int 0) (const_int 4)
 		     (const_int 1) (const_int 5)])))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxmrghw %x0,%x1,%x2"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "xxmrghw %x0,%x1,%x2";
+  else
+    return "xxmrglw %x0,%x2,%x1";
+}
   [(set_attr "type" "vecperm")])
 
 (define_insn "vsx_xxmrglw_<mode>"
@@ -1320,7 +1831,12 @@
 	  (parallel [(const_int 2) (const_int 6)
 		     (const_int 3) (const_int 7)])))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxmrglw %x0,%x1,%x2"
+{
+  if (BYTES_BIG_ENDIAN)
+    return "xxmrglw %x0,%x1,%x2";
+  else
+    return "xxmrghw %x0,%x2,%x1";
+}
   [(set_attr "type" "vecperm")])
 
 ;; Shift left double by word immediate
@@ -1483,3 +1999,27 @@
 }"
   [(set_attr "length" "20")
    (set_attr "type" "veccomplex")])
+
+
+;; Power8 Vector fusion.  The fused ops must be physically adjacent.
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "short_cint_operand" ""))
+   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+	(mem:VSX_M2 (plus:P (match_dup 0)
+			    (match_operand:P 3 "int_reg_operand" ""))))]
+  "TARGET_VSX && TARGET_P8_FUSION"
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  [(set_attr "length" "8")
+   (set_attr "type" "vecload")])
+
+(define_peephole
+  [(set (match_operand:P 0 "base_reg_operand" "")
+	(match_operand:P 1 "short_cint_operand" ""))
+   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
+	(mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
+			    (match_dup 0))))]
+  "TARGET_VSX && TARGET_P8_FUSION"
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  [(set_attr "length" "8")
+   (set_attr "type" "vecload")])
diff --git a/gcc-4.8/gcc/config/s390/htmxlintrin.h b/gcc-4.8/gcc/config/s390/htmxlintrin.h
index bb142195b..952b40975 100644
--- a/gcc-4.8/gcc/config/s390/htmxlintrin.h
+++ b/gcc-4.8/gcc/config/s390/htmxlintrin.h
@@ -33,13 +33,20 @@ extern "C" {
    the IBM XL compiler.  For documentation please see the "z/OS XL
    C/C++ Programming Guide" publically available on the web.  */
 
-extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+/* FIXME: __TM_simple_begin and __TM_begin should be marked
+   __always_inline__ as well but this currently produces an error
+   since the tbegin builtins are "returns_twice" and setjmp_call_p
+   (calls.c) therefore identifies the functions as calling setjmp.
+   The tree inliner currently refuses to inline functions calling
+   setjmp.  */
+
+long
 __TM_simple_begin ()
 {
   return __builtin_tbegin_nofloat (0);
 }
 
-extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+long
 __TM_begin (void* const tdb)
 {
   return __builtin_tbegin_nofloat (tdb);
@@ -78,7 +85,7 @@ __TM_nesting_depth (void* const tdb_ptr)
   if (depth != 0)
     return depth;
 
-  if (tdb->format == 0)
+  if (tdb->format != 1)
     return 0;
   return tdb->nesting_depth;
 }
@@ -90,7 +97,7 @@ __TM_is_user_abort (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  if (tdb->format == 0)
+  if (tdb->format != 1)
     return 0;
 
   return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);
@@ -101,7 +108,7 @@ __TM_is_named_user_abort (void* const tdb_ptr, unsigned char* code)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  if (tdb->format == 0)
+  if (tdb->format != 1)
     return 0;
 
   if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)
@@ -117,7 +124,7 @@ __TM_is_illegal (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  return (tdb->format == 0
+  return (tdb->format == 1
 	  && (tdb->abort_code == 4 /* unfiltered program interruption */
 	      || tdb->abort_code == 11 /* restricted instruction */));
 }
@@ -127,7 +134,7 @@ __TM_is_footprint_exceeded (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  return (tdb->format == 0
+  return (tdb->format == 1
 	  && (tdb->abort_code == 7 /* fetch overflow */
 	      || tdb->abort_code == 8 /* store overflow */));
 }
@@ -137,7 +144,7 @@ __TM_is_nested_too_deep (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  return tdb->format == 0 && tdb->abort_code == 13; /* depth exceeded */
+  return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */
 }
 
 extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -145,7 +152,7 @@ __TM_is_conflict (void* const tdb_ptr)
 {
   struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
 
-  return (tdb->format == 0
+  return (tdb->format == 1
 	  && (tdb->abort_code == 9 /* fetch conflict */
 	      || tdb->abort_code == 10 /* store conflict */));
 }
diff --git a/gcc-4.8/gcc/config/s390/s390-protos.h b/gcc-4.8/gcc/config/s390/s390-protos.h
index 67283df45..7b43ed01b 100644
--- a/gcc-4.8/gcc/config/s390/s390-protos.h
+++ b/gcc-4.8/gcc/config/s390/s390-protos.h
@@ -110,5 +110,6 @@ extern bool s390_decompose_shift_count (rtx, rtx *, HOST_WIDE_INT *);
 extern int s390_branch_condition_mask (rtx);
 extern int s390_compare_and_branch_condition_mask (rtx);
 extern bool s390_extzv_shift_ok (int, int, unsigned HOST_WIDE_INT);
+extern void s390_asm_output_function_label (FILE *, const char *, tree);
 
 #endif /* RTX_CODE */
diff --git a/gcc-4.8/gcc/config/s390/s390.c b/gcc-4.8/gcc/config/s390/s390.c
index 273022778..836a3d4fd 100644
--- a/gcc-4.8/gcc/config/s390/s390.c
+++ b/gcc-4.8/gcc/config/s390/s390.c
@@ -407,6 +407,65 @@ struct GTY(()) machine_function
    bytes on a z10 (or higher) CPU.  */
 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
 
+static const int s390_hotpatch_trampoline_halfwords_default = 12;
+static const int s390_hotpatch_trampoline_halfwords_max = 1000000;
+static int s390_hotpatch_trampoline_halfwords = -1;
+
+/* Return the argument of the given hotpatch attribute or the default value if
+   no argument is present.  */
+
+static inline int
+get_hotpatch_attribute (tree hotpatch_attr)
+{
+  const_tree args;
+
+  args = TREE_VALUE (hotpatch_attr);
+
+  return (args) ?
+    TREE_INT_CST_LOW (TREE_VALUE (args)):
+    s390_hotpatch_trampoline_halfwords_default;
+}
+
+/* Check whether the hotpatch attribute is applied to a function and, if it has
+   an argument, the argument is valid.  */
+
+static tree
+s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
+				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (args)
+    {
+      tree expr = TREE_VALUE (args);
+
+      if (TREE_CODE (expr) != INTEGER_CST
+	  || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
+	  || TREE_INT_CST_HIGH (expr) != 0
+	  || TREE_INT_CST_LOW (expr) > (unsigned int)
+	  s390_hotpatch_trampoline_halfwords_max)
+	{
+	  error ("requested %qE attribute is not a non-negative integer"
+		 " constant or too large (max. %d)", name,
+		 s390_hotpatch_trampoline_halfwords_max);
+	  *no_add_attrs = true;
+	}
+    }
+
+  return NULL_TREE;
+}
+
+static const struct attribute_spec s390_attribute_table[] = {
+  { "hotpatch", 0, 1, true, false, false, s390_handle_hotpatch_attribute, false
+  },
+  /* End element.  */
+  { NULL,        0, 0, false, false, false, NULL, false }
+};
+
 /* Return the alignment for LABEL.  We default to the -falign-labels
    value except for the literal pool base label.  */
 int
@@ -883,7 +942,8 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 	{
 	  /* For CCRAWmode put the required cc mask into the second
 	     operand.  */
-	  if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode)
+        if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
+            && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
 	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
 	  *op0 = XVECEXP (*op0, 0, 0);
 	  *code = new_code;
@@ -1594,6 +1654,46 @@ s390_init_machine_status (void)
 static void
 s390_option_override (void)
 {
+  unsigned int i;
+  cl_deferred_option *opt;
+  vec<cl_deferred_option> *v =
+    (vec<cl_deferred_option> *) s390_deferred_options;
+
+  if (v)
+    FOR_EACH_VEC_ELT (*v, i, opt)
+      {
+	switch (opt->opt_index)
+	  {
+	  case OPT_mhotpatch:
+	    s390_hotpatch_trampoline_halfwords = (opt->value) ?
+	      s390_hotpatch_trampoline_halfwords_default : -1;
+	    break;
+	  case OPT_mhotpatch_:
+	    {
+	      int val;
+
+	      val = integral_argument (opt->arg);
+	      if (val == -1)
+		{
+		  /* argument is not a plain number */
+		  error ("argument to %qs should be a non-negative integer",
+			 "-mhotpatch=");
+		  break;
+		}
+	      else if (val > s390_hotpatch_trampoline_halfwords_max)
+		{
+		  error ("argument to %qs is too large (max. %d)",
+			 "-mhotpatch=", s390_hotpatch_trampoline_halfwords_max);
+		  break;
+		}
+	      s390_hotpatch_trampoline_halfwords = val;
+	      break;
+	    }
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
   /* Set up function hooks.  */
   init_machine_status = s390_init_machine_status;
 
@@ -3015,15 +3115,22 @@ s390_preferred_reload_class (rtx op, reg_class_t rclass)
 	 prefer ADDR_REGS.  If 'class' is not a superset
 	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
       case CONST:
-	/* A larl operand with odd addend will get fixed via secondary
-	   reload.  So don't request it to be pushed into literal
-	   pool.  */
+	/* Symrefs cannot be pushed into the literal pool with -fPIC
+	   so we *MUST NOT* return NO_REGS for these cases
+	   (s390_cannot_force_const_mem will return true).  
+
+	   On the other hand we MUST return NO_REGS for symrefs with
+	   invalid addend which might have been pushed to the literal
+	   pool (no -fPIC).  Usually we would expect them to be
+	   handled via secondary reload but this does not happen if
+	   they are used as literal pool slot replacement in reload
+	   inheritance (see emit_input_reload_insns).  */
 	if (TARGET_CPU_ZARCH
 	    && GET_CODE (XEXP (op, 0)) == PLUS
 	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
 	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
 	  {
-	    if (reg_class_subset_p (ADDR_REGS, rclass))
+	    if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
 	      return ADDR_REGS;
 	    else
 	      return NO_REGS;
@@ -4683,7 +4790,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
   int smode_bsize, mode_bsize;
   rtx op, clobber;
 
-  if (bitsize + bitpos > GET_MODE_SIZE (mode))
+  if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
     return false;
 
   /* Generate INSERT IMMEDIATE (IILL et al).  */
@@ -5311,6 +5418,101 @@ get_some_local_dynamic_name (void)
   gcc_unreachable ();
 }
 
+/* Returns -1 if the function should not be made hotpatchable.  Otherwise it
+   returns a number >= 0 that is the desired size of the hotpatch trampoline
+   in halfwords. */
+
+static int s390_function_num_hotpatch_trampoline_halfwords (tree decl,
+							    bool do_warn)
+{
+  tree attr;
+
+  if (DECL_DECLARED_INLINE_P (decl)
+      || DECL_ARTIFICIAL (decl)
+      || MAIN_NAME_P (DECL_NAME (decl)))
+    {
+      /* - Explicitly inlined functions cannot be hotpatched.
+	 - Artificial functions need not be hotpatched.
+	 - Making the main function hotpatchable is useless. */
+      return -1;
+    }
+  attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
+  if (attr || s390_hotpatch_trampoline_halfwords >= 0)
+    {
+      if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (decl)))
+	{
+	  if (do_warn)
+	    warning (OPT_Wattributes, "function %qE with the %qs attribute"
+		     " is not hotpatchable", DECL_NAME (decl), "always_inline");
+	  return -1;
+	}
+      else
+	{
+	  return (attr) ?
+	    get_hotpatch_attribute (attr) : s390_hotpatch_trampoline_halfwords;
+	}
+    }
+
+  return -1;
+}
+
+/* Hook to determine if one function can safely inline another.  */
+
+static bool
+s390_can_inline_p (tree caller, tree callee)
+{
+  if (s390_function_num_hotpatch_trampoline_halfwords (callee, false) >= 0)
+    return false;
+
+  return default_target_can_inline_p (caller, callee);
+}
+
+/* Write the extra assembler code needed to declare a function properly.  */
+
+void
+s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
+				tree decl)
+{
+  int hotpatch_trampoline_halfwords = -1;
+
+  if (decl)
+    {
+      hotpatch_trampoline_halfwords =
+	s390_function_num_hotpatch_trampoline_halfwords (decl, true);
+      if (hotpatch_trampoline_halfwords >= 0
+	  && decl_function_context (decl) != NULL_TREE)
+	{
+	  warning_at (DECL_SOURCE_LOCATION (decl), OPT_mhotpatch,
+		      "hotpatching is not compatible with nested functions");
+	  hotpatch_trampoline_halfwords = -1;
+	}
+    }
+
+  if (hotpatch_trampoline_halfwords > 0)
+    {
+      int i;
+
+      /* Add a trampoline code area before the function label and initialize it
+	 with two-byte nop instructions.  This area can be overwritten with code
+	 that jumps to a patched version of the function.  */
+      for (i = 0; i < hotpatch_trampoline_halfwords; i++)
+	asm_fprintf (asm_out_file, "\tnopr\t%%r7\n");
+      /* Note:  The function label must be aligned so that (a) the bytes of the
+	 following nop do not cross a cacheline boundary, and (b) a jump address
+	 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
+	 stored directly before the label without crossing a cacheline
+	 boundary.  All this is necessary to make sure the trampoline code can
+	 be changed atomically.  */
+    }
+
+  ASM_OUTPUT_LABEL (asm_out_file, fname);
+
+  /* Output a four-byte nop if hotpatching is enabled.  This can be overwritten
+     atomically with a relative backwards jump to the trampoline area.  */
+  if (hotpatch_trampoline_halfwords >= 0)
+    asm_fprintf (asm_out_file, "\tnop\t0\n");
+}
+
 /* Output machine-dependent UNSPECs occurring in address constant X
    in assembler syntax to stdio stream FILE.  Returns true if the
    constant X could be recognized, false otherwise.  */
@@ -7846,6 +8048,9 @@ s390_optimize_nonescaping_tx (void)
     {
       bb = BASIC_BLOCK (bb_index);
 
+      if (!bb)
+	continue;
+
       FOR_BB_INSNS (bb, insn)
 	{
 	  rtx ite, cc, pat, target;
@@ -7959,7 +8164,10 @@ s390_optimize_nonescaping_tx (void)
   if (!result)
     return;
 
-  PATTERN (tbegin_insn) = XVECEXP (PATTERN (tbegin_insn), 0, 0);
+  PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
+			    gen_rtvec (2,
+				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
+				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
   INSN_CODE (tbegin_insn) = -1;
   df_insn_rescan (tbegin_insn);
 
@@ -9568,61 +9776,47 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
 void
 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
 {
-  const int CC0 = 1 << 3;
-  const int CC1 = 1 << 2;
-  const int CC3 = 1 << 0;
-  rtx abort_label = gen_label_rtx ();
-  rtx leave_label = gen_label_rtx ();
+  rtx retry_plus_two = gen_reg_rtx (SImode);
   rtx retry_reg = gen_reg_rtx (SImode);
   rtx retry_label = NULL_RTX;
-  rtx jump;
-  rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
 
   if (retry != NULL_RTX)
     {
       emit_move_insn (retry_reg, retry);
+      emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
+      emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
       retry_label = gen_label_rtx ();
       emit_label (retry_label);
     }
 
   if (clobber_fprs_p)
-    emit_insn (gen_tbegin_1 (tdb,
-		 gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
+    emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
   else
-    emit_insn (gen_tbegin_nofloat_1 (tdb,
-		 gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
-
-  jump = s390_emit_jump (abort_label,
-			 gen_rtx_NE (VOIDmode,
-				     gen_rtx_REG (CCRAWmode, CC_REGNUM),
-				     gen_rtx_CONST_INT (VOIDmode, CC0)));
-
-  JUMP_LABEL (jump) = abort_label;
-  LABEL_NUSES (abort_label) = 1;
-  add_reg_note (jump, REG_BR_PROB, very_unlikely);
+    emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
+				     tdb));
 
-  /* Initialize CC return value.  */
-  emit_move_insn (dest, const0_rtx);
-
-  s390_emit_jump (leave_label, NULL_RTX);
-  LABEL_NUSES (leave_label) = 1;
-  emit_barrier ();
-
-  /* Abort handler code.  */
-
-  emit_label (abort_label);
+  emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
+					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
+								   CC_REGNUM)),
+					UNSPEC_CC_TO_INT));
   if (retry != NULL_RTX)
     {
+      const int CC0 = 1 << 3;
+      const int CC1 = 1 << 2;
+      const int CC3 = 1 << 0;
+      rtx jump;
       rtx count = gen_reg_rtx (SImode);
+      rtx leave_label = gen_label_rtx ();
+
+      /* Exit for success and permanent failures.  */
       jump = s390_emit_jump (leave_label,
 			     gen_rtx_EQ (VOIDmode,
 			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
-			       gen_rtx_CONST_INT (VOIDmode, CC1 | CC3)));
-      LABEL_NUSES (leave_label) = 2;
-      add_reg_note (jump, REG_BR_PROB, very_unlikely);
+			       gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
+      LABEL_NUSES (leave_label) = 1;
 
       /* CC2 - transient failure. Perform retry with ppa.  */
-      emit_move_insn (count, retry);
+      emit_move_insn (count, retry_plus_two);
       emit_insn (gen_subsi3 (count, count, retry_reg));
       emit_insn (gen_tx_assist (count));
       jump = emit_jump_insn (gen_doloop_si64 (retry_label,
@@ -9630,13 +9824,8 @@ s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
 					      retry_reg));
       JUMP_LABEL (jump) = retry_label;
       LABEL_NUSES (retry_label) = 1;
-    }
-
-  emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
-					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
-								   CC_REGNUM)),
-					UNSPEC_CC_TO_INT));
   emit_label (leave_label);
+    }
 }
 
 /* Builtins.  */
@@ -9674,6 +9863,9 @@ static void
 s390_init_builtins (void)
 {
   tree ftype, uint64_type;
+  tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
+				       NULL, NULL);
+  tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
 
   /* void foo (void) */
   ftype = build_function_type_list (void_type_node, NULL_TREE);
@@ -9684,17 +9876,17 @@ s390_init_builtins (void)
   ftype = build_function_type_list (void_type_node, integer_type_node,
 				    NULL_TREE);
   add_builtin_function ("__builtin_tabort", ftype,
-			S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, NULL_TREE);
+			S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, noreturn_attr);
   add_builtin_function ("__builtin_tx_assist", ftype,
 			S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
 
   /* int foo (void *) */
   ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
   add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
-			BUILT_IN_MD, NULL, NULL_TREE);
+			BUILT_IN_MD, NULL, returns_twice_attr);
   add_builtin_function ("__builtin_tbegin_nofloat", ftype,
 			S390_BUILTIN_TBEGIN_NOFLOAT,
-			BUILT_IN_MD, NULL, NULL_TREE);
+			BUILT_IN_MD, NULL, returns_twice_attr);
 
   /* int foo (void *, int) */
   ftype = build_function_type_list (integer_type_node, ptr_type_node,
@@ -9702,11 +9894,11 @@ s390_init_builtins (void)
   add_builtin_function ("__builtin_tbegin_retry", ftype,
 			S390_BUILTIN_TBEGIN_RETRY,
 			BUILT_IN_MD,
-			NULL, NULL_TREE);
+			NULL, returns_twice_attr);
   add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
 			S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
 			BUILT_IN_MD,
-			NULL, NULL_TREE);
+			NULL, returns_twice_attr);
 
   /* int foo (void) */
   ftype = build_function_type_list (integer_type_node, NULL_TREE);
@@ -11622,6 +11814,12 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
 #undef TARGET_CANONICALIZE_COMPARISON
 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
 
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE s390_attribute_table
+
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P s390_can_inline_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-s390.h"
diff --git a/gcc-4.8/gcc/config/s390/s390.h b/gcc-4.8/gcc/config/s390/s390.h
index 03ab89000..a937c30d2 100644
--- a/gcc-4.8/gcc/config/s390/s390.h
+++ b/gcc-4.8/gcc/config/s390/s390.h
@@ -217,7 +217,7 @@ enum processor_flags
 #define STACK_BOUNDARY 64
 
 /* Allocation boundary (in *bits*) for the code of a function.  */
-#define FUNCTION_BOUNDARY 32
+#define FUNCTION_BOUNDARY 64
 
 /* There is no point aligning anything to a rounder boundary than this.  */
 #define BIGGEST_ALIGNMENT 64
@@ -878,6 +878,9 @@ do {									\
   fputc ('\n', (FILE));							\
 } while (0)
 
+#undef ASM_OUTPUT_FUNCTION_LABEL
+#define ASM_OUTPUT_FUNCTION_LABEL(FILE, NAME, DECL) \
+  s390_asm_output_function_label (FILE, NAME, DECL)
 
 /* Miscellaneous parameters.  */
 
diff --git a/gcc-4.8/gcc/config/s390/s390.md b/gcc-4.8/gcc/config/s390/s390.md
index ad3ba27c5..fab189843 100644
--- a/gcc-4.8/gcc/config/s390/s390.md
+++ b/gcc-4.8/gcc/config/s390/s390.md
@@ -147,6 +147,7 @@
 
    ; Transactional Execution support
    UNSPECV_TBEGIN
+   UNSPECV_TBEGIN_TDB
    UNSPECV_TBEGINC
    UNSPECV_TEND
    UNSPECV_TABORT
@@ -9896,9 +9897,10 @@
 
 (define_insn "tbegin_1"
   [(set (reg:CCRAW CC_REGNUM)
-	(unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand"    "=Q")
-				(match_operand     1 "const_int_operand" " D")]
+	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")]
 			       UNSPECV_TBEGIN))
+   (set (match_operand:BLK 1 "memory_operand" "=Q")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_TBEGIN_TDB))
    (clobber (reg:DF 16))
    (clobber (reg:DF 17))
    (clobber (reg:DF 18))
@@ -9917,18 +9919,19 @@
    (clobber (reg:DF 31))]
 ; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is
 ; not supposed to be used for immediates (see genpreds.c).
-  "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff"
-  "tbegin\t%0,%x1"
+  "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
+  "tbegin\t%1,%x0"
   [(set_attr "op_type" "SIL")])
 
 ; Same as above but without the FPR clobbers
 (define_insn "tbegin_nofloat_1"
   [(set (reg:CCRAW CC_REGNUM)
-	(unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand"    "=Q")
-				(match_operand     1 "const_int_operand" " D")]
-			       UNSPECV_TBEGIN))]
-  "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff"
-  "tbegin\t%0,%x1"
+	(unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" "D")]
+			       UNSPECV_TBEGIN))
+   (set (match_operand:BLK 1 "memory_operand" "=Q")
+	(unspec_volatile:BLK [(match_dup 0)] UNSPECV_TBEGIN_TDB))]
+  "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
+  "tbegin\t%1,%x0"
   [(set_attr "op_type" "SIL")])
 
 
@@ -10012,15 +10015,12 @@
 ; Transaction perform processor assist
 
 (define_expand "tx_assist"
-  [(set (match_dup 1) (const_int 0))
-   (unspec_volatile [(match_operand:SI 0 "register_operand" "")
-		     (match_dup 1)
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "")
+		     (reg:SI GPR0_REGNUM)
 		     (const_int 1)]
 		    UNSPECV_PPA)]
   "TARGET_HTM"
-{
-  operands[1] = gen_reg_rtx (SImode);
-})
+  "")
 
 (define_insn "*ppa"
   [(unspec_volatile [(match_operand:SI 0 "register_operand" "d")
@@ -10028,5 +10028,5 @@
 		     (match_operand 2 "const_int_operand" "I")]
 		    UNSPECV_PPA)]
   "TARGET_HTM && INTVAL (operands[2]) < 16"
-  "ppa\t%0,%1,1"
+  "ppa\t%0,%1,%2"
   [(set_attr "op_type" "RRF")])
diff --git a/gcc-4.8/gcc/config/s390/s390.opt b/gcc-4.8/gcc/config/s390/s390.opt
index a4e6ef25d..65d17c334 100644
--- a/gcc-4.8/gcc/config/s390/s390.opt
+++ b/gcc-4.8/gcc/config/s390/s390.opt
@@ -96,6 +96,14 @@ mhard-float
 Target Report RejectNegative Negative(msoft-float) InverseMask(SOFT_FLOAT, HARD_FLOAT)
 Enable hardware floating point
 
+mhotpatch
+Target Report Var(s390_deferred_options) Defer
+Prepend the function label with 12 two-byte Nop instructions, and add a four byte Nop instruction after the label for hotpatching.
+
+mhotpatch=
+Target RejectNegative Report Joined Var(s390_deferred_options) Defer
+Prepend the function label with the given number of two-byte Nop instructions, and add a four byte Nop instruction after the label for hotpatching.
+
 mlong-double-128
 Target Report RejectNegative Negative(mlong-double-64) Mask(LONG_DOUBLE_128)
 Use 128-bit long double
diff --git a/gcc-4.8/gcc/config/sh/constraints.md b/gcc-4.8/gcc/config/sh/constraints.md
index 59bf6b5ad..df7823764 100644
--- a/gcc-4.8/gcc/config/sh/constraints.md
+++ b/gcc-4.8/gcc/config/sh/constraints.md
@@ -221,6 +221,7 @@
 (define_constraint "Q"
   "A pc relative load operand."
   (and (match_code "mem")
+       (match_test "GET_MODE (op) != QImode")
        (match_test "IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0))")))
 
 (define_constraint "Bsc"
@@ -295,13 +296,15 @@
 
 (define_memory_constraint "Sdd"
   "A memory reference that uses displacement addressing."
-  (and (match_test "MEM_P (op) && GET_CODE (XEXP (op, 0)) == PLUS")
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
        (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
        (match_test "CONST_INT_P (XEXP (XEXP (op, 0), 1))")))
 
 (define_memory_constraint "Snd"
   "A memory reference that excludes displacement addressing."
-  (match_test "! satisfies_constraint_Sdd (op)"))
+  (and (match_code "mem")
+       (match_test "! satisfies_constraint_Sdd (op)")))
 
 (define_memory_constraint "Sbv"
   "A memory reference, as used in SH2A bclr.b, bset.b, etc."
diff --git a/gcc-4.8/gcc/config/sh/predicates.md b/gcc-4.8/gcc/config/sh/predicates.md
index dcbd75bb8..b1905fa0d 100644
--- a/gcc-4.8/gcc/config/sh/predicates.md
+++ b/gcc-4.8/gcc/config/sh/predicates.md
@@ -389,6 +389,12 @@
 					   XEXP (XEXP (op, 0), 1),
 					   TARGET_SH2A, true)")))
 
+;; Returns true if OP is a displacement address that can fit into a
+;; 16 bit (non-SH2A) memory load / store insn.
+(define_predicate "short_displacement_mem_operand"
+  (match_test "sh_disp_addr_displacement (op)
+	       <= sh_max_mov_insn_displacement (GET_MODE (op), false)"))
+
 ;; Returns 1 if the operand can be used in an SH2A movu.{b|w} insn.
 (define_predicate "zero_extend_movu_operand"
   (and (match_operand 0 "displacement_mem_operand")
@@ -413,6 +419,11 @@
   if (t_reg_operand (op, mode))
     return 0;
 
+  /* Disallow PC relative QImode loads, since these is no insn to do that
+     and an imm8 load should be used instead.  */
+  if (IS_PC_RELATIVE_LOAD_ADDR_P (op) && GET_MODE (op) == QImode)
+    return false;
+
   if (MEM_P (op))
     {
       rtx inside = XEXP (op, 0);
diff --git a/gcc-4.8/gcc/config/sh/sh-protos.h b/gcc-4.8/gcc/config/sh/sh-protos.h
index 4671c5499..8f99caac0 100644
--- a/gcc-4.8/gcc/config/sh/sh-protos.h
+++ b/gcc-4.8/gcc/config/sh/sh-protos.h
@@ -159,6 +159,8 @@ extern bool sh_vector_mode_supported_p (enum machine_mode);
 extern bool sh_cfun_trap_exit_p (void);
 extern rtx sh_find_equiv_gbr_addr (rtx cur_insn, rtx mem);
 extern int sh_eval_treg_value (rtx op);
+extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op);
+extern int sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a);
 
 /* Result value of sh_find_set_of_reg.  */
 struct set_of_reg
diff --git a/gcc-4.8/gcc/config/sh/sh.c b/gcc-4.8/gcc/config/sh/sh.c
index 44e1e4ce3..9ecaa926d 100644
--- a/gcc-4.8/gcc/config/sh/sh.c
+++ b/gcc-4.8/gcc/config/sh/sh.c
@@ -24,6 +24,9 @@ along with GCC; see the file COPYING3.  If not see
    malloc & co, which are poisoned by "system.h".  The proper solution is
    to include <cstdlib> in "system.h" instead of <stdlib.h>.  */
 #include <cstdlib>
+#include <sstream>
+#include <vector>
+#include <algorithm>
 
 #include "config.h"
 #include "system.h"
@@ -60,10 +63,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm-constrs.h"
 #include "opts.h"
 
-#include <sstream>
-#include <vector>
-#include <algorithm>
-
 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
 
 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
@@ -310,9 +309,7 @@ static rtx sh_trampoline_adjust_address (rtx);
 static void sh_conditional_register_usage (void);
 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
 static int mov_insn_size (enum machine_mode, bool);
-static int max_mov_insn_displacement (enum machine_mode, bool);
 static int mov_insn_alignment_mask (enum machine_mode, bool);
-static HOST_WIDE_INT disp_addr_displacement (rtx);
 static bool sequence_insn_p (rtx);
 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
@@ -3628,8 +3625,8 @@ mov_insn_size (enum machine_mode mode, bool consider_sh2a)
 
 /* Determine the maximum possible displacement for a move insn for the
    specified mode.  */
-static int
-max_mov_insn_displacement (enum machine_mode mode, bool consider_sh2a)
+int
+sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
 {
   /* The 4 byte displacement move insns are the same as the 2 byte
      versions but take a 12 bit displacement.  All we need to do is to
@@ -3665,8 +3662,8 @@ mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
 }
 
 /* Return the displacement value of a displacement address.  */
-static inline HOST_WIDE_INT
-disp_addr_displacement (rtx x)
+HOST_WIDE_INT
+sh_disp_addr_displacement (rtx x)
 {
   gcc_assert (satisfies_constraint_Sdd (x));
   return INTVAL (XEXP (XEXP (x, 0), 1));
@@ -3703,12 +3700,12 @@ sh_address_cost (rtx x, enum machine_mode mode,
 	 HImode and QImode loads/stores with displacement put pressure on
 	 R0 which will most likely require another reg copy.  Thus account
 	 a higher cost for that.  */
-      if (offset > 0 && offset <= max_mov_insn_displacement (mode, false))
+      if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
 	return (mode == HImode || mode == QImode) ? 2 : 1;
 
       /* The displacement would fit into a 4 byte move insn (SH2A).  */
       if (TARGET_SH2A
-	  && offset > 0 && offset <= max_mov_insn_displacement (mode, true))
+	  && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
 	return 2;
 
       /* The displacement is probably out of range and will require extra
@@ -10218,7 +10215,7 @@ sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
   else
     {
       const HOST_WIDE_INT offset = INTVAL (op);
-      const int max_disp = max_mov_insn_displacement (mode, consider_sh2a);
+      const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
       const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
 
       /* If the mode does not support any displacement always return false.
@@ -10404,7 +10401,7 @@ sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
      effectively disable the small displacement insns.  */
   const int mode_sz = GET_MODE_SIZE (mode);
   const int mov_insn_sz = mov_insn_size (mode, false);
-  const int max_disp = max_mov_insn_displacement (mode, false);
+  const int max_disp = sh_max_mov_insn_displacement (mode, false);
   const int max_disp_next = max_disp + mov_insn_sz;
   HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
   HOST_WIDE_INT offset_adjust;
@@ -13165,7 +13162,8 @@ sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
      the insns must have the appropriate alternatives.  */
   if ((mode == QImode || mode == HImode) && rclass != R0_REGS
       && satisfies_constraint_Sdd (x)
-      && disp_addr_displacement (x) <= max_mov_insn_displacement (mode, false))
+      && sh_disp_addr_displacement (x)
+	 <= sh_max_mov_insn_displacement (mode, false))
     return R0_REGS;
 
   /* When reload is trying to address a QImode or HImode subreg on the stack, 
diff --git a/gcc-4.8/gcc/config/sh/sh.md b/gcc-4.8/gcc/config/sh/sh.md
index 8ae9cea76..300f99018 100644
--- a/gcc-4.8/gcc/config/sh/sh.md
+++ b/gcc-4.8/gcc/config/sh/sh.md
@@ -2152,6 +2152,7 @@
 	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R1_REG))
    (clobber (reg:SI R4_REG))
    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
   "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
@@ -6831,34 +6832,9 @@ label:
   prepare_move_operands (operands, QImode);
 })
 
-;; If movqi_reg_reg is specified as an alternative of movqi, movqi will be
-;; selected to copy QImode regs.  If one of them happens to be allocated
-;; on the stack, reload will stick to movqi insn and generate wrong
-;; displacement addressing because of the generic m alternatives.
-;; With the movqi_reg_reg being specified before movqi it will be initially
-;; picked to load/store regs.  If the regs regs are on the stack reload
-;; try other insns and not stick to movqi_reg_reg, unless there were spilled
-;; pseudos in which case 'm' constraints pertain.
-;; The same applies to the movhi variants.
-;;
-;; Notice, that T bit is not allowed as a mov src operand here.  This is to
-;; avoid things like (set (reg:QI) (subreg:QI (reg:SI T_REG) 0)), which
-;; introduces zero extensions after T bit stores and redundant reg copies.
-;;
-;; FIXME: We can't use 'arith_reg_operand' (which disallows T_REG) as a
-;; predicate for the mov src operand because reload will have trouble
-;; reloading MAC subregs otherwise.  For that probably special patterns
-;; would be required.
-(define_insn "*mov<mode>_reg_reg"
-  [(set (match_operand:QIHI 0 "arith_reg_dest" "=r,m,*z")
-	(match_operand:QIHI 1 "register_operand" "r,*z,m"))]
-  "TARGET_SH1 && !t_reg_operand (operands[1], VOIDmode)"
-  "@
-    mov		%1,%0
-    mov.<bw>	%1,%0
-    mov.<bw>	%1,%0"
-  [(set_attr "type" "move,store,load")])
-
+;; Specifying the displacement addressing load / store patterns separately
+;; before the generic movqi / movhi pattern allows controlling the order
+;; in which load / store insns are selected in a more fine grained way.
 ;; FIXME: The non-SH2A and SH2A variants should be combined by adding
 ;; "enabled" attribute as it is done in other targets.
 (define_insn "*mov<mode>_store_mem_disp04"
@@ -6908,38 +6884,44 @@ label:
   [(set_attr "type" "load")
    (set_attr "length" "2,2,4")])
 
-;; The m constraints basically allow any kind of addresses to be used with any
-;; source/target register as the other operand.  This is not true for 
-;; displacement addressing modes on anything but SH2A.  That's why the
-;; specialized load/store insns are specified above.
-(define_insn "*movqi"
-  [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,l")
-	(match_operand:QI 1 "general_movsrc_operand"  "i,m,r,l,r"))]
+;; The order of the constraint alternatives is important here.
+;; Q/r has to come first, otherwise PC relative loads might wrongly get
+;; placed into delay slots.  Since there is no QImode PC relative load, the
+;; Q constraint and general_movsrc_operand will reject it for QImode.
+;; The Snd alternatives should come before Sdd in order to avoid a preference
+;; of using r0 als the register operand for addressing modes other than
+;; displacement addressing.
+;; The Sdd alternatives allow only r0 as register operand, even though on
+;; SH2A any register could be allowed by switching to a 32 bit insn.
+;; Generally sticking to the r0 is preferrable, since it generates smaller
+;; code.  Obvious r0 reloads can then be eliminated with a peephole on SH2A.
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHI 0 "general_movdst_operand"
+			      "=r,r,r,Snd,r,  Sdd,z,  r,l")
+	(match_operand:QIHI 1 "general_movsrc_operand"
+			       "Q,r,i,r,  Snd,z,  Sdd,l,r"))]
   "TARGET_SH1
-   && (arith_reg_operand (operands[0], QImode)
-       || arith_reg_operand (operands[1], QImode))"
+   && (arith_reg_operand (operands[0], <MODE>mode)
+       || arith_reg_operand (operands[1], <MODE>mode))"
   "@
+	mov.<bw>	%1,%0
 	mov	%1,%0
-	mov.b	%1,%0
-	mov.b	%1,%0
-	sts	%1,%0
-	lds	%1,%0"
- [(set_attr "type" "movi8,load,store,prget,prset")])
-
-(define_insn "*movhi"
-  [(set (match_operand:HI 0 "general_movdst_operand" "=r,r,r,m,r,l")
-	(match_operand:HI 1 "general_movsrc_operand"  "Q,i,m,r,l,r"))]
-  "TARGET_SH1
-   && (arith_reg_operand (operands[0], HImode)
-       || arith_reg_operand (operands[1], HImode))"
-  "@
-	mov.w	%1,%0
 	mov	%1,%0
-	mov.w	%1,%0
-	mov.w	%1,%0
+	mov.<bw>	%1,%0
+	mov.<bw>	%1,%0
+	mov.<bw>	%1,%0
+	mov.<bw>	%1,%0
 	sts	%1,%0
 	lds	%1,%0"
- [(set_attr "type" "pcload,movi8,load,store,prget,prset")])
+  [(set_attr "type" "pcload,move,movi8,store,load,store,load,prget,prset")
+   (set (attr "length")
+	(cond [(and (match_operand 0 "displacement_mem_operand")
+		    (not (match_operand 0 "short_displacement_mem_operand")))
+	       (const_int 4)
+	       (and (match_operand 1 "displacement_mem_operand")
+		    (not (match_operand 1 "short_displacement_mem_operand")))
+	       (const_int 4)]
+	      (const_int 2)))])
 
 (define_insn "*movqi_media"
   [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m")
@@ -8427,11 +8409,9 @@ label:
 
   while (true)
     {
-      /* It's not safe to go beyond the current basic block after reload.  */
       set_of_reg s1 = sh_find_set_of_reg (tested_reg, s0.insn,
-					  reload_completed
-					  ? prev_nonnote_insn_bb
-					  : prev_nonnote_insn);
+					  prev_nonnote_insn);
+
       if (s1.set_src == NULL_RTX)
 	break;
 
@@ -8454,8 +8434,18 @@ label:
 	   T bit.  Notice that some T bit stores such as negc also modify
 	   the T bit.  */
 	if (modified_between_p (get_t_reg_rtx (), s1.insn, testing_insn)
-	    || modified_in_p (get_t_reg_rtx (), s1.insn))
+	  || modified_in_p (get_t_reg_rtx (), s1.insn)
+	  || !no_labels_between_p (s1.insn, testing_insn))
 	  operands[2] = NULL_RTX;
+      else
+	{
+	  /* If the insn that sets the tested reg has a REG_DEAD note on
+	     the T bit remove that note since we're extending the usage
+	     of the T bit.  */
+	  rtx n = find_regno_note (s1.insn, REG_DEAD, T_REG);
+	  if (n != NULL_RTX)
+	    remove_note (s1.insn, n);
+	}
 
 	break;
     }
diff --git a/gcc-4.8/gcc/config/sh/sh.opt b/gcc-4.8/gcc/config/sh/sh.opt
index c314e144c..8a6788eb3 100644
--- a/gcc-4.8/gcc/config/sh/sh.opt
+++ b/gcc-4.8/gcc/config/sh/sh.opt
@@ -21,7 +21,7 @@
 ;; Used for various architecture options.
 Mask(SH_E)
 
-;; Set if the default precision of th FPU is single.
+;; Set if the default precision of the FPU is single.
 Mask(FPU_SINGLE)
 
 ;; Set if the a double-precision FPU is present but is restricted to
diff --git a/gcc-4.8/gcc/config/sparc/leon.md b/gcc-4.8/gcc/config/sparc/leon.md
index 60815079d..b511397fe 100644
--- a/gcc-4.8/gcc/config/sparc/leon.md
+++ b/gcc-4.8/gcc/config/sparc/leon.md
@@ -17,40 +17,48 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
+;; Leon is a single-issue processor.
 
 (define_automaton "leon")
 
-(define_cpu_unit "leon_memory, leon_fpalu" "leon")
-(define_cpu_unit "leon_fpmds" "leon")
-(define_cpu_unit "write_buf" "leon")
+(define_cpu_unit "leon_memory" "leon")
 
 (define_insn_reservation "leon_load" 1
-  (and (eq_attr "cpu" "leon")
-    (eq_attr "type" "load,sload,fpload"))
+  (and (eq_attr "cpu" "leon") (eq_attr "type" "load,sload"))
   "leon_memory")
 
-(define_insn_reservation "leon_store" 1
-  (and (eq_attr "cpu" "leon")
-    (eq_attr "type" "store,fpstore"))
-  "leon_memory+write_buf")
-  
-(define_insn_reservation "leon_fp_alu" 1
-  (and (eq_attr "cpu" "leon")
-    (eq_attr "type" "fp,fpmove"))
-  "leon_fpalu, nothing")
-
-(define_insn_reservation "leon_fp_mult" 1
-  (and (eq_attr "cpu" "leon")
-    (eq_attr "type" "fpmul"))
-  "leon_fpmds, nothing")
-
-(define_insn_reservation "leon_fp_div" 16
-  (and (eq_attr "cpu" "leon")
-    (eq_attr "type" "fpdivs,fpdivd"))
-  "leon_fpmds, nothing*15")
-
-(define_insn_reservation "leon_fp_sqrt" 23
-  (and (eq_attr "cpu" "leon")
-    (eq_attr "type" "fpsqrts,fpsqrtd"))
-  "leon_fpmds, nothing*21")
+;; Use a double reservation to work around the load pipeline hazard on UT699.
+(define_insn_reservation "leon3_load" 1
+  (and (eq_attr "cpu" "leon3") (eq_attr "type" "load,sload"))
+  "leon_memory*2")
 
+(define_insn_reservation "leon_store" 2
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "store"))
+  "leon_memory*2")
+
+;; This describes Gaisler Research's FPU
+
+(define_automaton "grfpu")
+
+(define_cpu_unit "grfpu_alu" "grfpu")
+(define_cpu_unit "grfpu_ds" "grfpu")
+
+(define_insn_reservation "leon_fp_alu" 4
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fp,fpcmp,fpmul"))
+  "grfpu_alu, nothing*3")
+
+(define_insn_reservation "leon_fp_divs" 16
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivs"))
+  "grfpu_ds*14, nothing*2")
+
+(define_insn_reservation "leon_fp_divd" 17
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivd"))
+  "grfpu_ds*15, nothing*2")
+
+(define_insn_reservation "leon_fp_sqrts" 24
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrts"))
+  "grfpu_ds*22, nothing*2")
+
+(define_insn_reservation "leon_fp_sqrtd" 25
+  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrtd"))
+  "grfpu_ds*23, nothing*2")
diff --git a/gcc-4.8/gcc/config/sparc/sparc-opts.h b/gcc-4.8/gcc/config/sparc/sparc-opts.h
index 72791772e..b5e9761af 100644
--- a/gcc-4.8/gcc/config/sparc/sparc-opts.h
+++ b/gcc-4.8/gcc/config/sparc/sparc-opts.h
@@ -30,6 +30,7 @@ enum processor_type {
   PROCESSOR_SUPERSPARC,
   PROCESSOR_HYPERSPARC,
   PROCESSOR_LEON,
+  PROCESSOR_LEON3,
   PROCESSOR_SPARCLITE,
   PROCESSOR_F930,
   PROCESSOR_F934,
diff --git a/gcc-4.8/gcc/config/sparc/sparc-protos.h b/gcc-4.8/gcc/config/sparc/sparc-protos.h
index a0c1a7a05..adaeb6d9d 100644
--- a/gcc-4.8/gcc/config/sparc/sparc-protos.h
+++ b/gcc-4.8/gcc/config/sparc/sparc-protos.h
@@ -69,7 +69,6 @@ extern bool sparc_expand_move (enum machine_mode, rtx *);
 extern void sparc_emit_set_symbolic_const64 (rtx, rtx, rtx);
 extern int sparc_splitdi_legitimate (rtx, rtx);
 extern int sparc_split_regreg_legitimate (rtx, rtx);
-extern int sparc_absnegfloat_split_legitimate (rtx, rtx);
 extern const char *output_ubranch (rtx, rtx);
 extern const char *output_cbranch (rtx, rtx, int, int, int, rtx);
 extern const char *output_return (rtx);
@@ -84,9 +83,9 @@ extern void emit_conditional_branch_insn (rtx []);
 extern int mems_ok_for_ldd_peep (rtx, rtx, rtx);
 extern int empty_delay_slot (rtx);
 extern int emit_cbcond_nop (rtx);
+extern int eligible_for_call_delay (rtx);
 extern int eligible_for_return_delay (rtx);
 extern int eligible_for_sibcall_delay (rtx);
-extern int tls_call_delay (rtx);
 extern int emit_move_sequence (rtx, enum machine_mode);
 extern int fp_sethi_p (rtx);
 extern int fp_mov_p (rtx);
diff --git a/gcc-4.8/gcc/config/sparc/sparc.c b/gcc-4.8/gcc/config/sparc/sparc.c
index 7e87b4716..25b7e53f2 100644
--- a/gcc-4.8/gcc/config/sparc/sparc.c
+++ b/gcc-4.8/gcc/config/sparc/sparc.c
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "df.h"
 #include "opts.h"
+#include "tree-pass.h"
 
 /* Processor costs */
 
@@ -226,6 +227,30 @@ struct processor_costs leon_costs = {
 };
 
 static const
+struct processor_costs leon3_costs = {
+  COSTS_N_INSNS (1), /* int load */
+  COSTS_N_INSNS (1), /* int signed load */
+  COSTS_N_INSNS (1), /* int zeroed load */
+  COSTS_N_INSNS (1), /* float load */
+  COSTS_N_INSNS (1), /* fmov, fneg, fabs */
+  COSTS_N_INSNS (1), /* fadd, fsub */
+  COSTS_N_INSNS (1), /* fcmp */
+  COSTS_N_INSNS (1), /* fmov, fmovr */
+  COSTS_N_INSNS (1), /* fmul */
+  COSTS_N_INSNS (14), /* fdivs */
+  COSTS_N_INSNS (15), /* fdivd */
+  COSTS_N_INSNS (22), /* fsqrts */
+  COSTS_N_INSNS (23), /* fsqrtd */
+  COSTS_N_INSNS (5), /* imul */
+  COSTS_N_INSNS (5), /* imulX */
+  0, /* imul bit factor */
+  COSTS_N_INSNS (35), /* idiv */
+  COSTS_N_INSNS (35), /* idivX */
+  COSTS_N_INSNS (1), /* movcc/movr */
+  0, /* shift penalty */
+};
+
+static const
 struct processor_costs sparclet_costs = {
   COSTS_N_INSNS (3), /* int load */
   COSTS_N_INSNS (3), /* int signed load */
@@ -538,7 +563,6 @@ static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
 				   HOST_WIDE_INT, tree);
 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
 				       HOST_WIDE_INT, const_tree);
-static void sparc_reorg (void);
 static struct machine_function * sparc_init_machine_status (void);
 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
 static rtx sparc_tls_get_addr (void);
@@ -680,9 +704,6 @@ char sparc_hard_reg_printed[8];
 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
 
-#undef TARGET_MACHINE_DEPENDENT_REORG
-#define TARGET_MACHINE_DEPENDENT_REORG sparc_reorg
-
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS sparc_rtx_costs
 #undef TARGET_ADDRESS_COST
@@ -804,6 +825,306 @@ char sparc_hard_reg_printed[8];
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
+/* Return the memory reference contained in X if any, zero otherwise.  */
+
+static rtx
+mem_ref (rtx x)
+{
+  if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
+    x = XEXP (x, 0);
+
+  if (MEM_P (x))
+    return x;
+
+  return NULL_RTX;
+}
+
+/* We use a machine specific pass to enable workarounds for errata.
+   We need to have the (essentially) final form of the insn stream in order
+   to properly detect the various hazards.  Therefore, this machine specific
+   pass runs as late as possible.  The pass is inserted in the pass pipeline
+   at the end of sparc_option_override.  */
+
+static bool
+sparc_gate_work_around_errata (void)
+{
+  /* The only errata we handle are those of the AT697F and UT699.  */
+  return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0;
+}
+
+static unsigned int
+sparc_do_work_around_errata (void)
+{
+  rtx insn, next;
+
+  /* Force all instructions to be split into their final form.  */
+  split_all_insns_noflow ();
+
+  /* Now look for specific patterns in the insn stream.  */
+  for (insn = get_insns (); insn; insn = next)
+    {
+      bool insert_nop = false;
+      rtx set;
+
+      /* Look into the instruction in a delay slot.  */
+      if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	insn = XVECEXP (PATTERN (insn), 0, 1);
+
+      /* Look for a single-word load into an odd-numbered FP register.  */
+      if (sparc_fix_at697f
+	  && NONJUMP_INSN_P (insn)
+	  && (set = single_set (insn)) != NULL_RTX
+	  && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
+	  && MEM_P (SET_SRC (set))
+	  && REG_P (SET_DEST (set))
+	  && REGNO (SET_DEST (set)) > 31
+	  && REGNO (SET_DEST (set)) % 2 != 0)
+	{
+	  /* The wrong dependency is on the enclosing double register.  */
+	  const unsigned int x = REGNO (SET_DEST (set)) - 1;
+	  unsigned int src1, src2, dest;
+	  int code;
+
+	  next = next_active_insn (insn);
+	  if (!next)
+	    break;
+	  /* If the insn is a branch, then it cannot be problematic.  */
+	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
+	    continue;
+
+	  extract_insn (next);
+	  code = INSN_CODE (next);
+
+	  switch (code)
+	    {
+	    case CODE_FOR_adddf3:
+	    case CODE_FOR_subdf3:
+	    case CODE_FOR_muldf3:
+	    case CODE_FOR_divdf3:
+	      dest = REGNO (recog_data.operand[0]);
+	      src1 = REGNO (recog_data.operand[1]);
+	      src2 = REGNO (recog_data.operand[2]);
+	      if (src1 != src2)
+		{
+		  /* Case [1-4]:
+				 ld [address], %fx+1
+				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
+		  if ((src1 == x || src2 == x)
+		      && (dest == src1 || dest == src2))
+		    insert_nop = true;
+		}
+	      else
+		{
+		  /* Case 5:
+			     ld [address], %fx+1
+			     FPOPd %fx, %fx, %fx  */
+		  if (src1 == x
+		      && dest == src1
+		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
+		    insert_nop = true;
+		}
+	      break;
+
+	    case CODE_FOR_sqrtdf2:
+	      dest = REGNO (recog_data.operand[0]);
+	      src1 = REGNO (recog_data.operand[1]);
+	      /* Case 6:
+			 ld [address], %fx+1
+			 fsqrtd %fx, %fx  */
+	      if (src1 == x && dest == src1)
+		insert_nop = true;
+	      break;
+
+	    default:
+	      break;
+	    }
+	}
+
+      /* Look for a single-word load into an integer register.  */
+      else if (sparc_fix_ut699
+	       && NONJUMP_INSN_P (insn)
+	       && (set = single_set (insn)) != NULL_RTX
+	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
+	       && mem_ref (SET_SRC (set)) != NULL_RTX
+	       && REG_P (SET_DEST (set))
+	       && REGNO (SET_DEST (set)) < 32)
+	{
+	  /* There is no problem if the second memory access has a data
+	     dependency on the first single-cycle load.  */
+	  rtx x = SET_DEST (set);
+
+	  next = next_active_insn (insn);
+	  if (!next)
+	    break;
+	  /* If the insn is a branch, then it cannot be problematic.  */
+	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
+	    continue;
+
+	  /* Look for a second memory access to/from an integer register.  */
+	  if ((set = single_set (next)) != NULL_RTX)
+	    {
+	      rtx src = SET_SRC (set);
+	      rtx dest = SET_DEST (set);
+	      rtx mem;
+
+	      /* LDD is affected.  */
+	      if ((mem = mem_ref (src)) != NULL_RTX
+		  && REG_P (dest)
+		  && REGNO (dest) < 32
+		  && !reg_mentioned_p (x, XEXP (mem, 0)))
+		insert_nop = true;
+
+	      /* STD is *not* affected.  */
+	      else if (MEM_P (dest)
+		       && GET_MODE_SIZE (GET_MODE (dest)) <= 4
+		       && (src == CONST0_RTX (GET_MODE (dest))
+			   || (REG_P (src)
+			       && REGNO (src) < 32
+			       && REGNO (src) != REGNO (x)))
+		       && !reg_mentioned_p (x, XEXP (dest, 0)))
+		insert_nop = true;
+	    }
+	}
+
+      /* Look for a single-word load/operation into an FP register.  */
+      else if (sparc_fix_ut699
+	       && NONJUMP_INSN_P (insn)
+	       && (set = single_set (insn)) != NULL_RTX
+	       && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
+	       && REG_P (SET_DEST (set))
+	       && REGNO (SET_DEST (set)) > 31)
+	{
+	  /* Number of instructions in the problematic window.  */
+	  const int n_insns = 4;
+	  /* The problematic combination is with the sibling FP register.  */
+	  const unsigned int x = REGNO (SET_DEST (set));
+	  const unsigned int y = x ^ 1;
+	  rtx after;
+	  int i;
+
+	  next = next_active_insn (insn);
+	  if (!next)
+	    break;
+	  /* If the insn is a branch, then it cannot be problematic.  */
+	  if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
+	    continue;
+
+	  /* Look for a second load/operation into the sibling FP register.  */
+	  if (!((set = single_set (next)) != NULL_RTX
+		&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
+		&& REG_P (SET_DEST (set))
+		&& REGNO (SET_DEST (set)) == y))
+	    continue;
+
+	  /* Look for a (possible) store from the FP register in the next N
+	     instructions, but bail out if it is again modified or if there
+	     is a store from the sibling FP register before this store.  */
+	  for (after = next, i = 0; i < n_insns; i++)
+	    {
+	      bool branch_p;
+
+	      after = next_active_insn (after);
+	      if (!after)
+		break;
+
+	      /* This is a branch with an empty delay slot.  */
+	      if (!NONJUMP_INSN_P (after))
+		{
+		  if (++i == n_insns)
+		    break;
+		  branch_p = true;
+		  after = NULL_RTX;
+		}
+	      /* This is a branch with a filled delay slot.  */
+	      else if (GET_CODE (PATTERN (after)) == SEQUENCE)
+		{
+		  if (++i == n_insns)
+		    break;
+		  branch_p = true;
+		  after = XVECEXP (PATTERN (after), 0, 1);
+		}
+	      /* This is a regular instruction.  */
+	      else
+		branch_p = false;
+
+	      if (after && (set = single_set (after)) != NULL_RTX)
+		{
+		  const rtx src = SET_SRC (set);
+		  const rtx dest = SET_DEST (set);
+		  const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
+
+		  /* If the FP register is again modified before the store,
+		     then the store isn't affected.  */
+		  if (REG_P (dest)
+		      && (REGNO (dest) == x
+			  || (REGNO (dest) == y && size == 8)))
+		    break;
+
+		  if (MEM_P (dest) && REG_P (src))
+		    {
+		      /* If there is a store from the sibling FP register
+			 before the store, then the store is not affected.  */
+		      if (REGNO (src) == y || (REGNO (src) == x && size == 8))
+			break;
+
+		      /* Otherwise, the store is affected.  */
+		      if (REGNO (src) == x && size == 4)
+			{
+			  insert_nop = true;
+			  break;
+			}
+		    }
+		}
+
+	      /* If we have a branch in the first M instructions, then we
+		 cannot see the (M+2)th instruction so we play safe.  */
+	      if (branch_p && i <= (n_insns - 2))
+		{
+		  insert_nop = true;
+		  break;
+		}
+	    }
+	}
+
+      else
+	next = NEXT_INSN (insn);
+
+      if (insert_nop)
+	emit_insn_before (gen_nop (), next);
+    }
+
+  return 0;
+}
+
+struct rtl_opt_pass pass_work_around_errata =
+{
+ {
+  RTL_PASS,
+  "errata",				/* name */
+  OPTGROUP_NONE,			/* optinfo_flags */
+  sparc_gate_work_around_errata,	/* gate */
+  sparc_do_work_around_errata,		/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  TV_MACH_DEP,				/* tv_id */
+  0,					/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_verify_rtl_sharing,		/* todo_flags_finish */
+ }
+};
+
+struct register_pass_info insert_pass_work_around_errata =
+{
+  &pass_work_around_errata.pass,	/* pass */
+  "dbr",				/* reference_pass_name */
+  1,					/* ref_pass_instance_number */
+  PASS_POS_INSERT_AFTER			/* po_op */
+};
+
+/* Helpers for TARGET_DEBUG_OPTIONS.  */
 static void
 dump_target_flag_bits (const int flags)
 {
@@ -888,6 +1209,7 @@ sparc_option_override (void)
     { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
     { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
     { TARGET_CPU_leon, PROCESSOR_LEON },
+    { TARGET_CPU_leon3, PROCESSOR_LEON3 },
     { TARGET_CPU_sparclite, PROCESSOR_F930 },
     { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
     { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
@@ -902,7 +1224,7 @@ sparc_option_override (void)
   };
   const struct cpu_default *def;
   /* Table of values for -m{cpu,tune}=.  This must match the order of
-     the PROCESSOR_* enumeration.  */
+     the enum processor_type in sparc-opts.h.  */
   static struct cpu_table {
     const char *const name;
     const int disable;
@@ -914,8 +1236,8 @@ sparc_option_override (void)
     /* TI TMS390Z55 supersparc */
     { "supersparc",	MASK_ISA, MASK_V8 },
     { "hypersparc",	MASK_ISA, MASK_V8|MASK_FPU },
-    /* LEON */
-    { "leon",		MASK_ISA, MASK_V8|MASK_FPU },
+    { "leon",		MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
+    { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
     { "sparclite",	MASK_ISA, MASK_SPARCLITE },
     /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
     { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
@@ -1075,6 +1397,9 @@ sparc_option_override (void)
 #ifndef HAVE_AS_SPARC4
 		   & ~MASK_CBCOND
 #endif
+#ifndef HAVE_AS_LEON
+		   & ~(MASK_LEON | MASK_LEON3)
+#endif
 		   );
 
   /* If -mfpu or -mno-fpu was explicitly used, don't override with
@@ -1164,6 +1489,9 @@ sparc_option_override (void)
     case PROCESSOR_LEON:
       sparc_costs = &leon_costs;
       break;
+    case PROCESSOR_LEON3:
+      sparc_costs = &leon3_costs;
+      break;
     case PROCESSOR_SPARCLET:
     case PROCESSOR_TSC701:
       sparc_costs = &sparclet_costs;
@@ -1200,6 +1528,10 @@ sparc_option_override (void)
       /* Choose the most relaxed model for the processor.  */
       else if (TARGET_V9)
 	sparc_memory_model = SMM_RMO;
+      else if (TARGET_LEON3)
+	sparc_memory_model = SMM_TSO;
+      else if (TARGET_LEON)
+	sparc_memory_model = SMM_SC;
       else if (TARGET_V8)
 	sparc_memory_model = SMM_PSO;
       else
@@ -1241,6 +1573,13 @@ sparc_option_override (void)
      pessimizes for double floating-point registers.  */
   if (!global_options_set.x_flag_ira_share_save_slots)
     flag_ira_share_save_slots = 0;
+
+  /* We register a machine specific pass to work around errata, if any.
+     The pass mut be scheduled as late as possible so that we have the
+     (essentially) final form of the insn stream to work on.
+     Registering the pass must be done at start up.  It's convenient to
+     do it here.  */
+  register_pass (&insert_pass_work_around_errata);
 }
 
 /* Miscellaneous utilities.  */
@@ -3090,10 +3429,13 @@ emit_cbcond_nop (rtx insn)
 /* Return nonzero if TRIAL can go into the call delay slot.  */
 
 int
-tls_call_delay (rtx trial)
+eligible_for_call_delay (rtx trial)
 {
   rtx pat;
 
+  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
+    return 0;
+
   /* Binutils allows
        call __tls_get_addr, %tgd_call (foo)
         add %l7, %o0, %o0, %tgd_add (foo)
@@ -3175,11 +3517,7 @@ eligible_for_restore_insn (rtx trial, bool return_p)
 
   /* If we have the 'return' instruction, anything that does not use
      local or output registers and can go into a delay slot wins.  */
-  else if (return_p
-	   && TARGET_V9
-	   && !epilogue_renumber (&pat, 1)
-	   && get_attr_in_uncond_branch_delay (trial)
-	       == IN_UNCOND_BRANCH_DELAY_TRUE)
+  else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
     return 1;
 
   /* The 'restore src1,src2,dest' pattern for SImode.  */
@@ -3222,21 +3560,20 @@ eligible_for_return_delay (rtx trial)
   int regno;
   rtx pat;
 
-  if (GET_CODE (trial) != INSN)
-    return 0;
-
-  if (get_attr_length (trial) != 1)
-    return 0;
-
   /* If the function uses __builtin_eh_return, the eh_return machinery
      occupies the delay slot.  */
   if (crtl->calls_eh_return)
     return 0;
 
+  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
+    return 0;
+
   /* In the case of a leaf or flat function, anything can go into the slot.  */
   if (sparc_leaf_function_p || TARGET_FLAT)
-    return
-      get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
+    return 1;
+
+  if (!NONJUMP_INSN_P (trial))
+    return 0;
 
   pat = PATTERN (trial);
   if (GET_CODE (pat) == PARALLEL)
@@ -3256,9 +3593,7 @@ eligible_for_return_delay (rtx trial)
 	  if (regno >= 8 && regno < 24)
 	    return 0;
 	}
-      return !epilogue_renumber (&pat, 1)
-	&& (get_attr_in_uncond_branch_delay (trial)
-	    == IN_UNCOND_BRANCH_DELAY_TRUE);
+      return !epilogue_renumber (&pat, 1);
     }
 
   if (GET_CODE (pat) != SET)
@@ -3278,10 +3613,7 @@ eligible_for_return_delay (rtx trial)
      instruction, it can probably go in.  But restore will not work
      with FP_REGS.  */
   if (! SPARC_INT_REG_P (regno))
-    return (TARGET_V9
-	    && !epilogue_renumber (&pat, 1)
-	    && get_attr_in_uncond_branch_delay (trial)
-	       == IN_UNCOND_BRANCH_DELAY_TRUE);
+    return TARGET_V9 && !epilogue_renumber (&pat, 1);
 
   return eligible_for_restore_insn (trial, true);
 }
@@ -3293,10 +3625,10 @@ eligible_for_sibcall_delay (rtx trial)
 {
   rtx pat;
 
-  if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
+  if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
     return 0;
 
-  if (get_attr_length (trial) != 1)
+  if (!NONJUMP_INSN_P (trial))
     return 0;
 
   pat = PATTERN (trial);
@@ -3315,6 +3647,9 @@ eligible_for_sibcall_delay (rtx trial)
       return 1;
     }
 
+  if (GET_CODE (pat) != SET)
+    return 0;
+
   /* Otherwise, only operations which can be done in tandem with
      a `restore' insn can go into the delay slot.  */
   if (GET_CODE (SET_DEST (pat)) != REG
@@ -8134,22 +8469,6 @@ sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
   return 0;
 }
 
-/* Return 1 if x and y are some kind of REG and they refer to
-   different hard registers.  This test is guaranteed to be
-   run after reload.  */
-
-int
-sparc_absnegfloat_split_legitimate (rtx x, rtx y)
-{
-  if (GET_CODE (x) != REG)
-    return 0;
-  if (GET_CODE (y) != REG)
-    return 0;
-  if (REGNO (x) == REGNO (y))
-    return 0;
-  return 1;
-}
-
 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
    This makes them candidates for using ldd and std insns.
 
@@ -10355,7 +10674,8 @@ sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
 	      tmp = e0.add_with_sign (tmp, false, &add1_ovf);
 	      if (tmp.is_negative ())
 		tmp = tmp.neg_with_overflow (&neg2_ovf);
-
+	      else
+		neg2_ovf = false;
 	      result = result.add_with_sign (tmp, false, &add2_ovf);
 	      overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
 	    }
@@ -10897,107 +11217,6 @@ sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
   return (vcall_offset >= -32768 || ! fixed_regs[5]);
 }
 
-/* We use the machine specific reorg pass to enable workarounds for errata.  */
-
-static void
-sparc_reorg (void)
-{
-  rtx insn, next;
-
-  /* The only erratum we handle for now is that of the AT697F processor.  */
-  if (!sparc_fix_at697f)
-    return;
-
-  /* We need to have the (essentially) final form of the insn stream in order
-     to properly detect the various hazards.  Run delay slot scheduling.  */
-  if (optimize > 0 && flag_delayed_branch)
-    {
-      cleanup_barriers ();
-      dbr_schedule (get_insns ());
-    }
-
-  /* Now look for specific patterns in the insn stream.  */
-  for (insn = get_insns (); insn; insn = next)
-    {
-      bool insert_nop = false;
-      rtx set;
-
-      /* Look for a single-word load into an odd-numbered FP register.  */
-      if (NONJUMP_INSN_P (insn)
-	  && (set = single_set (insn)) != NULL_RTX
-	  && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
-	  && MEM_P (SET_SRC (set))
-	  && REG_P (SET_DEST (set))
-	  && REGNO (SET_DEST (set)) > 31
-	  && REGNO (SET_DEST (set)) % 2 != 0)
-	{
-	  /* The wrong dependency is on the enclosing double register.  */
-	  unsigned int x = REGNO (SET_DEST (set)) - 1;
-	  unsigned int src1, src2, dest;
-	  int code;
-
-	  /* If the insn has a delay slot, then it cannot be problematic.  */
-	  next = next_active_insn (insn);
-	  if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
-	    code = -1;
-	  else
-	    {
-	      extract_insn (next);
-	      code = INSN_CODE (next);
-	    }
-
-	  switch (code)
-	    {
-	    case CODE_FOR_adddf3:
-	    case CODE_FOR_subdf3:
-	    case CODE_FOR_muldf3:
-	    case CODE_FOR_divdf3:
-	      dest = REGNO (recog_data.operand[0]);
-	      src1 = REGNO (recog_data.operand[1]);
-	      src2 = REGNO (recog_data.operand[2]);
-	      if (src1 != src2)
-		{
-		  /* Case [1-4]:
-				 ld [address], %fx+1
-				 FPOPd %f{x,y}, %f{y,x}, %f{x,y}  */
-		  if ((src1 == x || src2 == x)
-		      && (dest == src1 || dest == src2))
-		    insert_nop = true;
-		}
-	      else
-		{
-		  /* Case 5:
-			     ld [address], %fx+1
-			     FPOPd %fx, %fx, %fx  */
-		  if (src1 == x
-		      && dest == src1
-		      && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
-		    insert_nop = true;
-		}
-	      break;
-
-	    case CODE_FOR_sqrtdf2:
-	      dest = REGNO (recog_data.operand[0]);
-	      src1 = REGNO (recog_data.operand[1]);
-	      /* Case 6:
-			 ld [address], %fx+1
-			 fsqrtd %fx, %fx  */
-	      if (src1 == x && dest == src1)
-		insert_nop = true;
-	      break;
-
-	    default:
-	      break;
-	    }
-	}
-      else
-	next = NEXT_INSN (insn);
-
-      if (insert_nop)
-	emit_insn_after (gen_nop (), insn);
-    }
-}
-
 /* How to allocate a 'struct machine_function'.  */
 
 static struct machine_function *
diff --git a/gcc-4.8/gcc/config/sparc/sparc.h b/gcc-4.8/gcc/config/sparc/sparc.h
index c6122c115..d96c1b6b4 100644
--- a/gcc-4.8/gcc/config/sparc/sparc.h
+++ b/gcc-4.8/gcc/config/sparc/sparc.h
@@ -136,21 +136,22 @@ extern enum cmodel sparc_cmodel;
 #define TARGET_CPU_supersparc	2
 #define TARGET_CPU_hypersparc	3
 #define TARGET_CPU_leon		4
-#define TARGET_CPU_sparclite	5
-#define TARGET_CPU_f930		5       /* alias */
-#define TARGET_CPU_f934		5       /* alias */
-#define TARGET_CPU_sparclite86x	6
-#define TARGET_CPU_sparclet	7
-#define TARGET_CPU_tsc701	7       /* alias */
-#define TARGET_CPU_v9		8	/* generic v9 implementation */
-#define TARGET_CPU_sparcv9	8	/* alias */
-#define TARGET_CPU_sparc64	8	/* alias */
-#define TARGET_CPU_ultrasparc	9
-#define TARGET_CPU_ultrasparc3	10
-#define TARGET_CPU_niagara	11
-#define TARGET_CPU_niagara2	12
-#define TARGET_CPU_niagara3	13
-#define TARGET_CPU_niagara4	14
+#define TARGET_CPU_leon3	5
+#define TARGET_CPU_sparclite	6
+#define TARGET_CPU_f930		6       /* alias */
+#define TARGET_CPU_f934		6       /* alias */
+#define TARGET_CPU_sparclite86x	7
+#define TARGET_CPU_sparclet	8
+#define TARGET_CPU_tsc701	8       /* alias */
+#define TARGET_CPU_v9		9	/* generic v9 implementation */
+#define TARGET_CPU_sparcv9	9	/* alias */
+#define TARGET_CPU_sparc64	9	/* alias */
+#define TARGET_CPU_ultrasparc	10
+#define TARGET_CPU_ultrasparc3	11
+#define TARGET_CPU_niagara	12
+#define TARGET_CPU_niagara2	13
+#define TARGET_CPU_niagara3	14
+#define TARGET_CPU_niagara4	15
 
 #if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
  || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
@@ -232,9 +233,10 @@ extern enum cmodel sparc_cmodel;
 #define ASM_CPU32_DEFAULT_SPEC ""
 #endif
 
-#if TARGET_CPU_DEFAULT == TARGET_CPU_leon
+#if TARGET_CPU_DEFAULT == TARGET_CPU_leon \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_leon3
 #define CPP_CPU32_DEFAULT_SPEC "-D__leon__ -D__sparc_v8__"
-#define ASM_CPU32_DEFAULT_SPEC ""
+#define ASM_CPU32_DEFAULT_SPEC AS_LEON_FLAG
 #endif
 
 #endif
@@ -282,6 +284,7 @@ extern enum cmodel sparc_cmodel;
 %{mcpu=supersparc:-D__supersparc__ -D__sparc_v8__} \
 %{mcpu=hypersparc:-D__hypersparc__ -D__sparc_v8__} \
 %{mcpu=leon:-D__leon__ -D__sparc_v8__} \
+%{mcpu=leon3:-D__leon__ -D__sparc_v8__} \
 %{mcpu=v9:-D__sparc_v9__} \
 %{mcpu=ultrasparc:-D__sparc_v9__} \
 %{mcpu=ultrasparc3:-D__sparc_v9__} \
@@ -329,7 +332,8 @@ extern enum cmodel sparc_cmodel;
 %{mcpu=v8:-Av8} \
 %{mcpu=supersparc:-Av8} \
 %{mcpu=hypersparc:-Av8} \
-%{mcpu=leon:-Av8} \
+%{mcpu=leon:" AS_LEON_FLAG "} \
+%{mcpu=leon3:" AS_LEON_FLAG "} \
 %{mv8plus:-Av8plus} \
 %{mcpu=v9:-Av9} \
 %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
@@ -1754,6 +1758,12 @@ extern int sparc_indent_opcode;
 #define AS_NIAGARA4_FLAG "-Av9" AS_NIAGARA3_FLAG
 #endif
 
+#ifdef HAVE_AS_LEON
+#define AS_LEON_FLAG "-Aleon"
+#else
+#define AS_LEON_FLAG "-Av8"
+#endif
+
 /* We use gcc _mcount for profiling.  */
 #define NO_PROFILE_COUNTERS 0
 
diff --git a/gcc-4.8/gcc/config/sparc/sparc.md b/gcc-4.8/gcc/config/sparc/sparc.md
index b60af4333..f380a414f 100644
--- a/gcc-4.8/gcc/config/sparc/sparc.md
+++ b/gcc-4.8/gcc/config/sparc/sparc.md
@@ -206,7 +206,7 @@
 ;; 'f' for all DF/TFmode values, including those that are specific to the v8.
 
 ;; Attribute for cpu type.
-;; These must match the values for enum processor_type in sparc.h.
+;; These must match the values of the enum processor_type in sparc-opts.h.
 (define_attr "cpu"
   "v7,
    cypress,
@@ -214,6 +214,7 @@
    supersparc,
    hypersparc,
    leon,
+   leon3,
    sparclite,
    f930,
    f934,
@@ -284,7 +285,8 @@
   (const_string "none"))
 
 (define_attr "pic" "false,true"
-  (symbol_ref "(flag_pic != 0 ? PIC_TRUE : PIC_FALSE)"))
+  (symbol_ref "(flag_pic != 0
+		? PIC_TRUE : PIC_FALSE)"))
 
 (define_attr "calls_alloca" "false,true"
   (symbol_ref "(cfun->calls_alloca != 0
@@ -306,6 +308,10 @@
   (symbol_ref "(TARGET_FLAT != 0
 		? FLAT_TRUE : FLAT_FALSE)"))
 
+(define_attr "fix_ut699" "false,true"
+   (symbol_ref "(sparc_fix_ut699 != 0
+		 ? FIX_UT699_TRUE : FIX_UT699_FALSE)"))
+
 ;; Length (in # of insns).
 ;; Beware that setting a length greater or equal to 3 for conditional branches
 ;; has a side-effect (see output_cbranch and output_v9branch).
@@ -412,6 +418,10 @@
 (define_attr "fptype" "single,double"
   (const_string "single"))
 
+;; FP precision specific to the UT699.
+(define_attr "fptype_ut699" "none,single"
+  (const_string "none"))
+
 ;; UltraSPARC-III integer load type.
 (define_attr "us3load_type" "2cycle,3cycle"
   (const_string "2cycle"))
@@ -420,32 +430,18 @@
   [(set_attr "length" "2")
    (set_attr "type" "multi")])
 
-;; Attributes for instruction and branch scheduling
-(define_attr "tls_call_delay" "false,true"
-  (symbol_ref "(tls_call_delay (insn)
-		? TLS_CALL_DELAY_TRUE : TLS_CALL_DELAY_FALSE)"))
-
+;; Attributes for branch scheduling
 (define_attr "in_call_delay" "false,true"
-  (cond [(eq_attr "type" "uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
-		(const_string "false")
-	 (eq_attr "type" "load,fpload,store,fpstore")
-		(if_then_else (eq_attr "length" "1")
-			      (const_string "true")
-			      (const_string "false"))]
-	 (if_then_else (and (eq_attr "length" "1")
-			    (eq_attr "tls_call_delay" "true"))
-		       (const_string "true")
-		       (const_string "false"))))
-
-(define_attr "eligible_for_sibcall_delay" "false,true"
+  (symbol_ref "(eligible_for_call_delay (insn)
+		? IN_CALL_DELAY_TRUE : IN_CALL_DELAY_FALSE)"))
+
+(define_attr "in_sibcall_delay" "false,true"
   (symbol_ref "(eligible_for_sibcall_delay (insn)
-		? ELIGIBLE_FOR_SIBCALL_DELAY_TRUE
-		: ELIGIBLE_FOR_SIBCALL_DELAY_FALSE)"))
+		? IN_SIBCALL_DELAY_TRUE : IN_SIBCALL_DELAY_FALSE)"))
 
-(define_attr "eligible_for_return_delay" "false,true"
+(define_attr "in_return_delay" "false,true"
   (symbol_ref "(eligible_for_return_delay (insn)
-		? ELIGIBLE_FOR_RETURN_DELAY_TRUE
-		: ELIGIBLE_FOR_RETURN_DELAY_FALSE)"))
+		? IN_RETURN_DELAY_TRUE : IN_RETURN_DELAY_FALSE)"))
 
 ;; ??? !v9: Should implement the notion of predelay slots for floating-point
 ;; branches.  This would allow us to remove the nop always inserted before
@@ -460,39 +456,33 @@
 ;; because it prevents us from moving back the final store of inner loops.
 
 (define_attr "in_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
-		     (eq_attr "length" "1"))
-		(const_string "true")
-		(const_string "false")))
-
-(define_attr "in_uncond_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
-		     (eq_attr "length" "1"))
-		(const_string "true")
-		(const_string "false")))
-
-(define_attr "in_annul_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
-		     (eq_attr "length" "1"))
+  (cond [(eq_attr "type" "uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
+	   (const_string "false")
+	 (and (eq_attr "fix_ut699" "true") (eq_attr "type" "load,sload"))
+	   (const_string "false")
+	 (and (eq_attr "fix_ut699" "true")
+	      (and (eq_attr "type" "fpload,fp,fpmove,fpmul,fpdivs,fpsqrts")
+		   (ior (eq_attr "fptype" "single")
+		        (eq_attr "fptype_ut699" "single"))))
+	   (const_string "false")
+	 (eq_attr "length" "1")
 		(const_string "true")
-		(const_string "false")))
+	] (const_string "false")))
 
 (define_delay (eq_attr "type" "call")
   [(eq_attr "in_call_delay" "true") (nil) (nil)])
 
 (define_delay (eq_attr "type" "sibcall")
-  [(eq_attr "eligible_for_sibcall_delay" "true") (nil) (nil)])
+  [(eq_attr "in_sibcall_delay" "true") (nil) (nil)])
+
+(define_delay (eq_attr "type" "return")
+  [(eq_attr "in_return_delay" "true") (nil) (nil)])
 
 (define_delay (eq_attr "type" "branch")
-  [(eq_attr "in_branch_delay" "true")
-   (nil) (eq_attr "in_annul_branch_delay" "true")])
+  [(eq_attr "in_branch_delay" "true") (nil) (eq_attr "in_branch_delay" "true")])
 
 (define_delay (eq_attr "type" "uncond_branch")
-  [(eq_attr "in_uncond_branch_delay" "true")
-   (nil) (nil)])
-
-(define_delay (eq_attr "type" "return")
-  [(eq_attr "eligible_for_return_delay" "true") (nil) (nil)])
+  [(eq_attr "in_branch_delay" "true") (nil) (nil)])
 
 
 ;; Include SPARC DFA schedulers
@@ -3513,7 +3503,8 @@
   "TARGET_FPU"
   "fdtos\t%1, %0"
   [(set_attr "type" "fp")
-   (set_attr "fptype" "double")])
+   (set_attr "fptype" "double")
+   (set_attr "fptype_ut699" "single")])
 
 (define_expand "trunctfsf2"
   [(set (match_operand:SF 0 "register_operand" "")
@@ -3554,7 +3545,7 @@
   "TARGET_FPU"
   "fitos\t%1, %0"
   [(set_attr "type" "fp")
-   (set_attr "fptype" "double")])
+   (set_attr "fptype" "single")])
 
 (define_insn "floatsidf2"
   [(set (match_operand:DF 0 "register_operand" "=e")
@@ -3641,7 +3632,7 @@
   "TARGET_FPU"
   "fstoi\t%1, %0"
   [(set_attr "type" "fp")
-   (set_attr "fptype" "double")])
+   (set_attr "fptype" "single")])
 
 (define_insn "fix_truncdfsi2"
   [(set (match_operand:SI 0 "register_operand" "=f")
@@ -3649,7 +3640,8 @@
   "TARGET_FPU"
   "fdtoi\t%1, %0"
   [(set_attr "type" "fp")
-   (set_attr "fptype" "double")])
+   (set_attr "fptype" "double")
+   (set_attr "fptype_ut699" "single")])
 
 (define_expand "fix_trunctfsi2"
   [(set (match_operand:SI 0 "register_operand" "")
@@ -5548,7 +5540,7 @@
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "f"))
 		 (float_extend:DF (match_operand:SF 2 "register_operand" "f"))))]
-  "(TARGET_V8 || TARGET_V9) && TARGET_FPU"
+  "(TARGET_V8 || TARGET_V9) && TARGET_FPU && !sparc_fix_ut699"
   "fsmuld\t%1, %2, %0"
   [(set_attr "type" "fpmul")
    (set_attr "fptype" "double")])
@@ -5575,73 +5567,89 @@
 		(match_operand:TF 2 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
   "fdivq\t%1, %2, %0"
-  [(set_attr "type" "fpdivd")])
+  [(set_attr "type" "fpdivs")])
 
-(define_insn "divdf3"
+(define_expand "divdf3"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(div:DF (match_operand:DF 1 "register_operand" "e")
 		(match_operand:DF 2 "register_operand" "e")))]
   "TARGET_FPU"
+  "")
+
+(define_insn "*divdf3_nofix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(div:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU && !sparc_fix_ut699"
   "fdivd\t%1, %2, %0"
   [(set_attr "type" "fpdivd")
    (set_attr "fptype" "double")])
 
+(define_insn "*divdf3_fix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(div:DF (match_operand:DF 1 "register_operand" "e")
+		(match_operand:DF 2 "register_operand" "e")))]
+  "TARGET_FPU && sparc_fix_ut699"
+  "fdivd\t%1, %2, %0\n\tstd\t%0, [%%sp-8]"
+  [(set_attr "type" "fpdivd")
+   (set_attr "fptype" "double")
+   (set_attr "length" "2")])
+
 (define_insn "divsf3"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(div:SF (match_operand:SF 1 "register_operand" "f")
 		(match_operand:SF 2 "register_operand" "f")))]
-  "TARGET_FPU"
+  "TARGET_FPU && !sparc_fix_ut699"
   "fdivs\t%1, %2, %0"
   [(set_attr "type" "fpdivs")])
 
 (define_expand "negtf2"
-  [(set (match_operand:TF 0 "register_operand" "=e,e")
-	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+  [(set (match_operand:TF 0 "register_operand" "")
+	(neg:TF (match_operand:TF 1 "register_operand" "")))]
   "TARGET_FPU"
   "")
 
-(define_insn_and_split "*negtf2_notv9"
-  [(set (match_operand:TF 0 "register_operand" "=e,e")
-	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
-  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
-  "TARGET_FPU
-   && ! TARGET_V9"
-  "@
-  fnegs\t%0, %0
-  #"
-  "&& reload_completed
-   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
-  [(set (match_dup 2) (neg:SF (match_dup 3)))
-   (set (match_dup 4) (match_dup 5))
-   (set (match_dup 6) (match_dup 7))]
-  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
-   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
-   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
-   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
-   operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
-   operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
-  [(set_attr "type" "fpmove,*")
-   (set_attr "length" "*,2")])
-
-(define_insn_and_split "*negtf2_v9"
-  [(set (match_operand:TF 0 "register_operand" "=e,e")
-	(neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
-  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
-  "TARGET_FPU && TARGET_V9"
-  "@
-  fnegd\t%0, %0
-  #"
-  "&& reload_completed
-   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
-  [(set (match_dup 2) (neg:DF (match_dup 3)))
-   (set (match_dup 4) (match_dup 5))]
-  "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
-   operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
-   operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
-   operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
-  [(set_attr "type" "fpmove,*")
-   (set_attr "length" "*,2")
-   (set_attr "fptype" "double")])
+(define_insn "*negtf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(neg:TF (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fnegq\t%1, %0"
+  [(set_attr "type" "fpmove")])
+
+(define_insn_and_split "*negtf2"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(neg:TF (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && !TARGET_HARD_QUAD"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+  src1 = gen_df_reg (set_src, 0);
+  src2 = gen_df_reg (set_src, 1);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movdf (dest2, src2));
+      emit_insn (gen_negdf2 (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_negdf2 (dest1, src1));
+      if (REGNO (dest2) != REGNO (src2))
+	emit_insn (gen_movdf (dest2, src2));
+    }
+  DONE;
+}
+  [(set_attr "length" "2")])
 
 (define_expand "negdf2"
   [(set (match_operand:DF 0 "register_operand" "")
@@ -5650,22 +5658,39 @@
   "")
 
 (define_insn_and_split "*negdf2_notv9"
-  [(set (match_operand:DF 0 "register_operand" "=e,e")
-	(neg:DF (match_operand:DF 1 "register_operand" "0,e")))]
-  "TARGET_FPU && ! TARGET_V9"
-  "@
-  fnegs\t%0, %0
-  #"
-  "&& reload_completed
-   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
-  [(set (match_dup 2) (neg:SF (match_dup 3)))
-   (set (match_dup 4) (match_dup 5))]
-  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
-   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
-   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
-   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);"
-  [(set_attr "type" "fpmove,*")
-   (set_attr "length" "*,2")])
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(neg:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && !TARGET_V9"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_highpart (SFmode, set_dest);
+  dest2 = gen_lowpart (SFmode, set_dest);
+  src1 = gen_highpart (SFmode, set_src);
+  src2 = gen_lowpart (SFmode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movsf (dest2, src2));
+      emit_insn (gen_negsf2 (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_negsf2 (dest1, src1));
+      if (REGNO (dest2) != REGNO (src2))
+	emit_insn (gen_movsf (dest2, src2));
+    }
+  DONE;
+}
+  [(set_attr "length" "2")])
 
 (define_insn "*negdf2_v9"
   [(set (match_operand:DF 0 "register_operand" "=e")
@@ -5688,56 +5713,47 @@
   "TARGET_FPU"
   "")
 
-(define_insn_and_split "*abstf2_notv9"
-  [(set (match_operand:TF 0 "register_operand" "=e,e")
-	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
-  ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
-  "TARGET_FPU && ! TARGET_V9"
-  "@
-  fabss\t%0, %0
-  #"
-  "&& reload_completed
-   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
-  [(set (match_dup 2) (abs:SF (match_dup 3)))
-   (set (match_dup 4) (match_dup 5))
-   (set (match_dup 6) (match_dup 7))]
-  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
-   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
-   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
-   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
-   operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
-   operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
-  [(set_attr "type" "fpmove,*")
-   (set_attr "length" "*,2")])
-
-(define_insn "*abstf2_hq_v9"
-  [(set (match_operand:TF 0 "register_operand" "=e,e")
-	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
-  "TARGET_FPU && TARGET_V9 && TARGET_HARD_QUAD"
-  "@
-  fabsd\t%0, %0
-  fabsq\t%1, %0"
-  [(set_attr "type" "fpmove")
-   (set_attr "fptype" "double,*")])
+(define_insn "*abstf2_hq"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(abs:TF (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && TARGET_HARD_QUAD"
+  "fabsq\t%1, %0"
+  [(set_attr "type" "fpmove")])
 
-(define_insn_and_split "*abstf2_v9"
-  [(set (match_operand:TF 0 "register_operand" "=e,e")
-	(abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
-  "TARGET_FPU && TARGET_V9 && !TARGET_HARD_QUAD"
-  "@
-  fabsd\t%0, %0
-  #"
-  "&& reload_completed
-   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
-  [(set (match_dup 2) (abs:DF (match_dup 3)))
-   (set (match_dup 4) (match_dup 5))]
-  "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
-   operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
-   operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
-   operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
-  [(set_attr "type" "fpmove,*")
-   (set_attr "length" "*,2")
-   (set_attr "fptype" "double,*")])
+(define_insn_and_split "*abstf2"
+  [(set (match_operand:TF 0 "register_operand" "=e")
+	(abs:TF (match_operand:TF 1 "register_operand" "e")))]
+  "TARGET_FPU && !TARGET_HARD_QUAD"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_df_reg (set_dest, 0);
+  dest2 = gen_df_reg (set_dest, 1);
+  src1 = gen_df_reg (set_src, 0);
+  src2 = gen_df_reg (set_src, 1);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movdf (dest2, src2));
+      emit_insn (gen_absdf2 (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_absdf2 (dest1, src1));
+      if (REGNO (dest2) != REGNO (src2))
+	emit_insn (gen_movdf (dest2, src2));
+    }
+  DONE;
+}
+  [(set_attr "length" "2")])
 
 (define_expand "absdf2"
   [(set (match_operand:DF 0 "register_operand" "")
@@ -5746,22 +5762,39 @@
   "")
 
 (define_insn_and_split "*absdf2_notv9"
-  [(set (match_operand:DF 0 "register_operand" "=e,e")
-	(abs:DF (match_operand:DF 1 "register_operand" "0,e")))]
-  "TARGET_FPU && ! TARGET_V9"
-  "@
-  fabss\t%0, %0
-  #"
-  "&& reload_completed
-   && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
-  [(set (match_dup 2) (abs:SF (match_dup 3)))
-   (set (match_dup 4) (match_dup 5))]
-  "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
-   operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
-   operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
-   operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);"
-  [(set_attr "type" "fpmove,*")
-   (set_attr "length" "*,2")])
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(abs:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && !TARGET_V9"
+  "#"
+  "&& reload_completed"
+  [(clobber (const_int 0))]
+{
+  rtx set_dest = operands[0];
+  rtx set_src = operands[1];
+  rtx dest1, dest2;
+  rtx src1, src2;
+
+  dest1 = gen_highpart (SFmode, set_dest);
+  dest2 = gen_lowpart (SFmode, set_dest);
+  src1 = gen_highpart (SFmode, set_src);
+  src2 = gen_lowpart (SFmode, set_src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_insn (gen_movsf (dest2, src2));
+      emit_insn (gen_abssf2 (dest1, src1));
+    }
+  else
+    {
+      emit_insn (gen_abssf2 (dest1, src1));
+      if (REGNO (dest2) != REGNO (src2))
+	emit_insn (gen_movsf (dest2, src2));
+    }
+  DONE;
+}
+  [(set_attr "length" "2")])
 
 (define_insn "*absdf2_v9"
   [(set (match_operand:DF 0 "register_operand" "=e")
@@ -5789,20 +5822,35 @@
 	(sqrt:TF (match_operand:TF 1 "register_operand" "e")))]
   "TARGET_FPU && TARGET_HARD_QUAD"
   "fsqrtq\t%1, %0"
-  [(set_attr "type" "fpsqrtd")])
+  [(set_attr "type" "fpsqrts")])
 
-(define_insn "sqrtdf2"
+(define_expand "sqrtdf2"
   [(set (match_operand:DF 0 "register_operand" "=e")
 	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
   "TARGET_FPU"
+  "")
+
+(define_insn "*sqrtdf2_nofix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && !sparc_fix_ut699"
   "fsqrtd\t%1, %0"
   [(set_attr "type" "fpsqrtd")
    (set_attr "fptype" "double")])
 
+(define_insn "*sqrtdf2_fix"
+  [(set (match_operand:DF 0 "register_operand" "=e")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+  "TARGET_FPU && sparc_fix_ut699"
+  "fsqrtd\t%1, %0\n\tstd\t%0, [%%sp-8]"
+  [(set_attr "type" "fpsqrtd")
+   (set_attr "fptype" "double")
+   (set_attr "length" "2")])
+
 (define_insn "sqrtsf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
-  "TARGET_FPU"
+  "TARGET_FPU && !sparc_fix_ut699"
   "fsqrts\t%1, %0"
   [(set_attr "type" "fpsqrts")])
 
@@ -5821,19 +5869,6 @@
 }
   [(set_attr "type" "shift")])
 
-(define_insn "*ashlsi3_extend"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(zero_extend:DI
-	  (ashift:SI (match_operand:SI 1 "register_operand" "r")
-		     (match_operand:SI 2 "arith_operand" "rI"))))]
-  "TARGET_ARCH64"
-{
-  if (GET_CODE (operands[2]) == CONST_INT)
-    operands[2] = GEN_INT (INTVAL (operands[2]) & 0x1f);
-  return "sll\t%1, %2, %0";
-}
-  [(set_attr "type" "shift")])
-
 (define_expand "ashldi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(ashift:DI (match_operand:DI 1 "register_operand" "r")
diff --git a/gcc-4.8/gcc/config/sparc/sparc.opt b/gcc-4.8/gcc/config/sparc/sparc.opt
index 764c652e8..9de981b85 100644
--- a/gcc-4.8/gcc/config/sparc/sparc.opt
+++ b/gcc-4.8/gcc/config/sparc/sparc.opt
@@ -113,6 +113,10 @@ mrelax
 Target
 Optimize tail call instructions in assembler and linker
 
+muser-mode
+Target Report Mask(USER_MODE)
+Do not generate code that can only run in supervisor mode
+
 mcpu=
 Target RejectNegative Joined Var(sparc_cpu_and_features) Enum(sparc_processor_type) Init(PROCESSOR_V7)
 Use features of and schedule code for given CPU
@@ -146,6 +150,9 @@ EnumValue
 Enum(sparc_processor_type) String(leon) Value(PROCESSOR_LEON)
 
 EnumValue
+Enum(sparc_processor_type) String(leon3) Value(PROCESSOR_LEON3)
+
+EnumValue
 Enum(sparc_processor_type) String(sparclite) Value(PROCESSOR_SPARCLITE)
 
 EnumValue
@@ -201,9 +208,19 @@ Target Report RejectNegative Var(sparc_fix_at697f)
 Enable workaround for single erratum of AT697F processor
 (corresponding to erratum #13 of AT697E processor)
 
+mfix-ut699
+Target Report RejectNegative Var(sparc_fix_ut699)
+Enable workarounds for the errata of the UT699 processor
+
 Mask(LONG_DOUBLE_128)
 ;; Use 128-bit long double
 
+Mask(LEON)
+;; Generate code for LEON
+
+Mask(LEON3)
+;; Generate code for LEON3
+
 Mask(SPARCLITE)
 ;; Generate code for SPARClite
 
diff --git a/gcc-4.8/gcc/config/sparc/sync.md b/gcc-4.8/gcc/config/sparc/sync.md
index 1c1b9774b..cf909853b 100644
--- a/gcc-4.8/gcc/config/sparc/sync.md
+++ b/gcc-4.8/gcc/config/sparc/sync.md
@@ -161,7 +161,8 @@
    (match_operand:SI 5 "const_int_operand" "")		;; is_weak
    (match_operand:SI 6 "const_int_operand" "")		;; mod_s
    (match_operand:SI 7 "const_int_operand" "")]		;; mod_f
-  "TARGET_V9 && (<MODE>mode != DImode || TARGET_ARCH64 || TARGET_V8PLUS)"
+  "(TARGET_V9 || TARGET_LEON3)
+   && (<MODE>mode != DImode || TARGET_ARCH64 || TARGET_V8PLUS)"
 {
   sparc_expand_compare_and_swap (operands);
   DONE;
@@ -176,7 +177,7 @@
 	     [(match_operand:I48MODE 2 "register_operand" "")
 	      (match_operand:I48MODE 3 "register_operand" "")]
 	     UNSPECV_CAS))])]
-  "TARGET_V9"
+  "TARGET_V9 || TARGET_LEON3"
   "")
 
 (define_insn "*atomic_compare_and_swap<mode>_1"
@@ -187,10 +188,27 @@
 	  [(match_operand:I48MODE 2 "register_operand" "r")
 	   (match_operand:I48MODE 3 "register_operand" "0")]
 	  UNSPECV_CAS))]
-  "TARGET_V9 && (<MODE>mode == SImode || TARGET_ARCH64)"
+  "TARGET_V9 && (<MODE>mode != DImode || TARGET_ARCH64)"
   "cas<modesuffix>\t%1, %2, %0"
   [(set_attr "type" "multi")])
 
+(define_insn "*atomic_compare_and_swap_leon3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SI 1 "mem_noofs_operand" "+w"))
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_operand:SI 2 "register_operand" "r")
+	   (match_operand:SI 3 "register_operand" "0")]
+	  UNSPECV_CAS))]
+  "TARGET_LEON3"
+{
+  if (TARGET_USER_MODE)
+    return "casa\t%1 0xa, %2, %0"; /* ASI for user data space.  */
+  else
+    return "casa\t%1 0xb, %2, %0"; /* ASI for supervisor data space.  */
+}
+  [(set_attr "type" "multi")])
+
 (define_insn "*atomic_compare_and_swapdi_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=h")
 	(match_operand:DI 1 "mem_noofs_operand" "+w"))
@@ -220,7 +238,7 @@
    (match_operand:SI 1 "memory_operand" "")
    (match_operand:SI 2 "register_operand" "")
    (match_operand:SI 3 "const_int_operand" "")]
-  "TARGET_V8 || TARGET_V9"
+  "(TARGET_V8 || TARGET_V9) && !sparc_fix_ut699"
 {
   enum memmodel model = (enum memmodel) INTVAL (operands[3]);
 
@@ -236,7 +254,7 @@
 			    UNSPECV_SWAP))
    (set (match_dup 1)
 	(match_operand:SI 2 "register_operand" "0"))]
-  "TARGET_V8 || TARGET_V9"
+  "(TARGET_V8 || TARGET_V9) && !sparc_fix_ut699"
   "swap\t%1, %0"
   [(set_attr "type" "multi")])
 
@@ -244,7 +262,7 @@
   [(match_operand:QI 0 "register_operand" "")
    (match_operand:QI 1 "memory_operand" "")
    (match_operand:SI 2 "const_int_operand" "")]
-  ""
+  "!sparc_fix_ut699"
 {
   enum memmodel model = (enum memmodel) INTVAL (operands[2]);
   rtx ret;
@@ -268,6 +286,6 @@
 	(unspec_volatile:QI [(match_operand:QI 1 "memory_operand" "+m")]
 			    UNSPECV_LDSTUB))
    (set (match_dup 1) (const_int -1))]
-  ""
+  "!sparc_fix_ut699"
   "ldstub\t%1, %0"
   [(set_attr "type" "multi")])
diff --git a/gcc-4.8/gcc/config/sparc/t-rtems b/gcc-4.8/gcc/config/sparc/t-rtems
index 63d021770..f1a3d845e 100644
--- a/gcc-4.8/gcc/config/sparc/t-rtems
+++ b/gcc-4.8/gcc/config/sparc/t-rtems
@@ -17,6 +17,6 @@
 # <http://www.gnu.org/licenses/>.
 #
 
-MULTILIB_OPTIONS = msoft-float mcpu=v8
-MULTILIB_DIRNAMES = soft v8
+MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3
+MULTILIB_DIRNAMES = soft v8 leon3
 MULTILIB_MATCHES = msoft-float=mno-fpu
diff --git a/gcc-4.8/gcc/config/sparc/t-sparc b/gcc-4.8/gcc/config/sparc/t-sparc
index d7b17fbd0..664f4a424 100644
--- a/gcc-4.8/gcc/config/sparc/t-sparc
+++ b/gcc-4.8/gcc/config/sparc/t-sparc
@@ -23,7 +23,7 @@ sparc.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
   insn-codes.h conditions.h output.h $(INSN_ATTR_H) $(FLAGS_H) \
   $(FUNCTION_H) $(EXCEPT_H) $(EXPR_H) $(OPTABS_H) $(RECOG_H) \
   $(DIAGNOSTIC_CORE_H) $(GGC_H) $(TM_P_H) debug.h $(TARGET_H) \
-  $(TARGET_DEF_H) $(COMMON_TARGET_H) $(GIMPLE_H) \
+  $(TARGET_DEF_H) $(COMMON_TARGET_H) $(GIMPLE_H) $(TREE_PASS_H) \
   langhooks.h reload.h $(PARAMS_H) $(DF_H) $(OPTS_H) \
   gt-sparc.h
 
diff --git a/gcc-4.8/gcc/config/tilegx/sync.md b/gcc-4.8/gcc/config/tilegx/sync.md
index a4bea6b68..16f18922f 100644
--- a/gcc-4.8/gcc/config/tilegx/sync.md
+++ b/gcc-4.8/gcc/config/tilegx/sync.md
@@ -150,15 +150,22 @@
    (match_operand:SI 3 "const_int_operand" "")]         ;; model
   ""
 {
+  rtx addend;
   enum memmodel model = (enum memmodel) INTVAL (operands[3]);
 
   if (operands[2] != const0_rtx)
-    emit_move_insn (operands[2], gen_rtx_NEG (<MODE>mode, operands[2]));
+    {
+       addend = gen_reg_rtx (<MODE>mode);
+       emit_move_insn (addend,
+                       gen_rtx_MINUS (<MODE>mode, const0_rtx, operands[2]));
+    }
+  else
+    addend = operands[2];
 
   tilegx_pre_atomic_barrier (model);
   emit_insn (gen_atomic_fetch_add_bare<mode> (operands[0],
                                               operands[1],
-                                              operands[2]));
+                                              addend));
   tilegx_post_atomic_barrier (model);
   DONE;
 })
diff --git a/gcc-4.8/gcc/config/tilegx/tilegx-c.c b/gcc-4.8/gcc/config/tilegx/tilegx-c.c
index 3ecec6e04..8d19d20f7 100644
--- a/gcc-4.8/gcc/config/tilegx/tilegx-c.c
+++ b/gcc-4.8/gcc/config/tilegx/tilegx-c.c
@@ -47,6 +47,9 @@ tilegx_cpu_cpp_builtins (struct cpp_reader *pfile)
   if (TARGET_32BIT)
     builtin_define ("__tilegx32__");
 
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+
   TILEGX_CPU_CPP_ENDIAN_BUILTINS ();
   GNU_USER_TARGET_OS_CPP_BUILTINS ();
 }
diff --git a/gcc-4.8/gcc/config/tilegx/tilegx.c b/gcc-4.8/gcc/config/tilegx/tilegx.c
index beb1012cf..502b953cf 100644
--- a/gcc-4.8/gcc/config/tilegx/tilegx.c
+++ b/gcc-4.8/gcc/config/tilegx/tilegx.c
@@ -702,6 +702,16 @@ tilegx_init_expanders (void)
 }
 
 
+/* Implement TARGET_EXPAND_TO_RTL_HOOK.  */
+static void
+tilegx_expand_to_rtl_hook (void)
+{
+  /* Exclude earlier sets of crtl->uses_pic_offset_table, because we
+     only care about uses actually emitted.  */
+  crtl->uses_pic_offset_table = 0;
+}
+
+
 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  DImode shifts use the mode
    matching insns and therefore guarantee that the shift count is
    modulo 64.  SImode shifts sometimes use the 64 bit version so do
@@ -3543,6 +3553,12 @@ tilegx_expand_builtin (tree exp,
     }
   if (!pat)
     return NULL_RTX;
+
+  /* If we are generating a prefetch, tell the scheduler not to move
+     it around.  */
+  if (GET_CODE (pat) == PREFETCH)
+    PREFETCH_SCHEDULE_BARRIER_P (pat) = true;
+
   emit_insn (pat);
 
   if (nonvoid)
@@ -4368,10 +4384,12 @@ tilegx_gen_bundles (void)
   basic_block bb;
   FOR_EACH_BB (bb)
     {
-      rtx insn, next;
+      rtx insn, next, prev;
       rtx end = NEXT_INSN (BB_END (bb));
 
-      for (insn = next_insn_to_bundle (BB_HEAD (bb), end); insn; insn = next)
+      prev = NULL_RTX;
+      for (insn = next_insn_to_bundle (BB_HEAD (bb), end); insn;
+	   prev = insn, insn = next)
 	{
 	  next = next_insn_to_bundle (NEXT_INSN (insn), end);
 
@@ -4396,6 +4414,18 @@ tilegx_gen_bundles (void)
 		  PUT_MODE (insn, SImode);
 		}
 	    }
+
+	  /* Delete barrier insns, because they can mess up the
+	     emitting of bundle braces.  If it is end-of-bundle, then
+	     the previous insn must be marked end-of-bundle.  */
+	  if (get_attr_type (insn) == TYPE_NOTHING) {
+	    if (GET_MODE (insn) == QImode && prev != NULL
+		&& GET_MODE (prev) == SImode)
+	      {
+		PUT_MODE (prev, QImode);
+	      }
+	    delete_insn (insn);
+	  }
 	}
     }
 }
@@ -5498,6 +5528,9 @@ tilegx_file_end (void)
 #undef  TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS tilegx_rtx_costs
 
+#undef  TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK tilegx_expand_to_rtl_hook
+
 #undef  TARGET_SHIFT_TRUNCATION_MASK
 #define TARGET_SHIFT_TRUNCATION_MASK tilegx_shift_truncation_mask
 
diff --git a/gcc-4.8/gcc/config/tilegx/tilegx.md b/gcc-4.8/gcc/config/tilegx/tilegx.md
index 8dfcff603..5fe177619 100644
--- a/gcc-4.8/gcc/config/tilegx/tilegx.md
+++ b/gcc-4.8/gcc/config/tilegx/tilegx.md
@@ -5076,10 +5076,8 @@
 
 ;; Network intrinsics
 
-;; Note the "pseudo" text is handled specially by the
-;; asm_output_opcode routine.  If the output is an empty string, the
-;; instruction would bypass the asm_output_opcode routine, bypassing
-;; the bundle handling code.
+;; Note the this barrier is of type "nothing," which is deleted after
+;; the final scheduling pass so that nothing is emitted for it.
 (define_insn "tilegx_network_barrier"
   [(unspec_volatile:SI [(const_int 0)] UNSPEC_NETWORK_BARRIER)]
   ""
diff --git a/gcc-4.8/gcc/config/tilepro/tilepro-c.c b/gcc-4.8/gcc/config/tilepro/tilepro-c.c
index 8f7aa00d7..b9bf24ab2 100644
--- a/gcc-4.8/gcc/config/tilepro/tilepro-c.c
+++ b/gcc-4.8/gcc/config/tilepro/tilepro-c.c
@@ -44,6 +44,11 @@ tilepro_cpu_cpp_builtins (struct cpp_reader *pfile)
   builtin_define ("__tile_chip__=1");
   builtin_define ("__tile_chip_rev__=0");
 
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+  builtin_define ("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+
   TILEPRO_CPU_CPP_ENDIAN_BUILTINS ();
   GNU_USER_TARGET_OS_CPP_BUILTINS ();
 }
diff --git a/gcc-4.8/gcc/config/tilepro/tilepro.c b/gcc-4.8/gcc/config/tilepro/tilepro.c
index 59618e438..957146dd5 100644
--- a/gcc-4.8/gcc/config/tilepro/tilepro.c
+++ b/gcc-4.8/gcc/config/tilepro/tilepro.c
@@ -3167,6 +3167,12 @@ tilepro_expand_builtin (tree exp,
     }
   if (!pat)
     return NULL_RTX;
+
+  /* If we are generating a prefetch, tell the scheduler not to move
+     it around.  */
+  if (GET_CODE (pat) == PREFETCH)
+    PREFETCH_SCHEDULE_BARRIER_P (pat) = true;
+
   emit_insn (pat);
 
   if (nonvoid)
diff --git a/gcc-4.8/gcc/config/tilepro/tilepro.md b/gcc-4.8/gcc/config/tilepro/tilepro.md
index ca8cf80ca..ba9bc76d4 100644
--- a/gcc-4.8/gcc/config/tilepro/tilepro.md
+++ b/gcc-4.8/gcc/config/tilepro/tilepro.md
@@ -795,7 +795,7 @@
 
 (define_expand "ctzdi2"
   [(set (match_operand:DI 0 "register_operand" "")
-	(ctz:DI (match_operand:DI 1 "reg_or_0_operand" "")))]
+	(ctz:DI (match_operand:DI 1 "register_operand" "")))]
   ""
 {
   rtx lo, hi, ctz_lo, ctz_hi, ctz_hi_plus_32, result;
@@ -823,7 +823,7 @@
 
 (define_expand "clzdi2"
   [(set (match_operand:DI 0 "register_operand" "")
-	(clz:DI (match_operand:DI 1 "reg_or_0_operand" "")))]
+	(clz:DI (match_operand:DI 1 "register_operand" "")))]
   ""
 {
   rtx lo, hi, clz_lo, clz_hi, clz_lo_plus_32, result;
@@ -851,7 +851,7 @@
 
 (define_expand "ffsdi2"
   [(set (match_operand:DI 0 "register_operand" "")
-	(ffs:DI (match_operand:DI 1 "reg_or_0_operand" "")))]
+	(ffs:DI (match_operand:DI 1 "register_operand" "")))]
   ""
 {
   rtx lo, hi, ctz_lo, ctz_hi, ctz_hi_plus_32, ctz, ctz_plus_1,ctz_cond;