From 1d9fec7937f45dde5e04cac966a2d9a12f2fc15a Mon Sep 17 00:00:00 2001
From: Yiran Wang <yiran@google.com>
Date: Tue, 23 Jun 2015 15:33:17 -0700
Subject: Synchronize with google/gcc-4_9 to r224707 (from r214835)

Change-Id: I3d6f06fc613c8f8b6a82143dc44b7338483aac5d
---
 gcc-4.9/gcc/config/aarch64/aarch64-builtins.c  |  154 +++
 gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h   |    8 +
 gcc-4.9/gcc/config/aarch64/aarch64-linux.h     |   11 +-
 gcc-4.9/gcc/config/aarch64/aarch64-protos.h    |    1 +
 gcc-4.9/gcc/config/aarch64/aarch64-simd.md     |   39 +-
 gcc-4.9/gcc/config/aarch64/aarch64.c           |   12 +-
 gcc-4.9/gcc/config/aarch64/aarch64.md          |   55 +-
 gcc-4.9/gcc/config/aarch64/arm_neon.h          |  494 +++++----
 gcc-4.9/gcc/config/alpha/alpha.c               |    6 -
 gcc-4.9/gcc/config/alpha/alpha.md              |   63 +-
 gcc-4.9/gcc/config/arm/arm-protos.h            |    3 +
 gcc-4.9/gcc/config/arm/arm.c                   |   88 +-
 gcc-4.9/gcc/config/arm/arm.h                   |   11 +-
 gcc-4.9/gcc/config/arm/arm.md                  |   64 +-
 gcc-4.9/gcc/config/arm/constraints.md          |   11 +-
 gcc-4.9/gcc/config/arm/linux-grte.h            |   27 -
 gcc-4.9/gcc/config/arm/t-aprofile              |    3 +
 gcc-4.9/gcc/config/avr/avr-dimode.md           |    7 +
 gcc-4.9/gcc/config/avr/avr-fixed.md            |   41 +-
 gcc-4.9/gcc/config/avr/avr-protos.h            |    9 +
 gcc-4.9/gcc/config/avr/avr.c                   |  109 ++
 gcc-4.9/gcc/config/avr/avr.md                  |  155 ++-
 gcc-4.9/gcc/config/darwin-c.c                  |   25 +-
 gcc-4.9/gcc/config/darwin-driver.c             |   35 +-
 gcc-4.9/gcc/config/gnu-user.h                  |    7 +-
 gcc-4.9/gcc/config/i386/i386.c                 |  349 ++++--
 gcc-4.9/gcc/config/i386/i386.md                |   62 +-
 gcc-4.9/gcc/config/i386/i386.opt               |    8 -
 gcc-4.9/gcc/config/i386/linux.h                |   15 -
 gcc-4.9/gcc/config/i386/linux64.h              |    9 -
 gcc-4.9/gcc/config/i386/mmx.md                 |   36 +-
 gcc-4.9/gcc/config/i386/x86-tune.def           |    8 +-
 gcc-4.9/gcc/config/ia64/ia64.c                 |    5 -
 gcc-4.9/gcc/config/linux-grte.h                |   41 -
 gcc-4.9/gcc/config/linux.c                     |    2 -
 gcc-4.9/gcc/config/msp430/msp430.md            |    6 +-
 gcc-4.9/gcc/config/nios2/nios2.c               |   15 +
 gcc-4.9/gcc/config/pa/pa.c                     |   26 +-
 gcc-4.9/gcc/config/pa/pa.md                    |   25 +-
 gcc-4.9/gcc/config/pa/predicates.md            |   21 +-
 gcc-4.9/gcc/config/rs6000/altivec.h            |    6 +
 gcc-4.9/gcc/config/rs6000/altivec.md           |   56 +-
 gcc-4.9/gcc/config/rs6000/darwin.h             |    6 +-
 gcc-4.9/gcc/config/rs6000/linux-grte.h         |   41 -
 gcc-4.9/gcc/config/rs6000/predicates.md        |   75 +-
 gcc-4.9/gcc/config/rs6000/rs6000-builtin.def   |   10 +
 gcc-4.9/gcc/config/rs6000/rs6000-c.c           |  147 ++-
 gcc-4.9/gcc/config/rs6000/rs6000-protos.h      |    5 +-
 gcc-4.9/gcc/config/rs6000/rs6000.c             | 1367 ++++++++++++++++++++++--
 gcc-4.9/gcc/config/rs6000/rs6000.md            |   88 +-
 gcc-4.9/gcc/config/rs6000/rs6000.opt           |    4 +
 gcc-4.9/gcc/config/rs6000/rtems.h              |    3 +-
 gcc-4.9/gcc/config/rs6000/sysv4.h              |    6 -
 gcc-4.9/gcc/config/rs6000/t-rtems              |   73 +-
 gcc-4.9/gcc/config/rs6000/vsx.md               |  227 +++-
 gcc-4.9/gcc/config/rs6000/xcoff.h              |    7 +-
 gcc-4.9/gcc/config/rx/rx.c                     |    2 +-
 gcc-4.9/gcc/config/rx/rx.h                     |    6 +-
 gcc-4.9/gcc/config/s390/s390.c                 |   19 +-
 gcc-4.9/gcc/config/s390/s390.md                |    2 +-
 gcc-4.9/gcc/config/sh/predicates.md            |    2 +-
 gcc-4.9/gcc/config/sh/sh-mem.cc                |   24 +-
 gcc-4.9/gcc/config/sh/sh.c                     |    2 +-
 gcc-4.9/gcc/config/sh/sh.md                    |   47 +-
 gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc |   24 +-
 gcc-4.9/gcc/config/sh/sh_treg_combine.cc       |   49 +-
 gcc-4.9/gcc/config/sh/sync.md                  |   18 +-
 gcc-4.9/gcc/config/sparc/leon.md               |   14 +-
 gcc-4.9/gcc/config/sparc/linux.h               |    6 -
 gcc-4.9/gcc/config/sparc/linux64.h             |    6 -
 gcc-4.9/gcc/config/sparc/sparc-opts.h          |    1 +
 gcc-4.9/gcc/config/sparc/sparc.c               |   40 +-
 gcc-4.9/gcc/config/sparc/sparc.h               |   51 +-
 gcc-4.9/gcc/config/sparc/sparc.md              |    1 +
 gcc-4.9/gcc/config/sparc/sparc.opt             |    3 +
 gcc-4.9/gcc/config/sparc/t-rtems               |   13 +-
 76 files changed, 3459 insertions(+), 1058 deletions(-)

(limited to 'gcc-4.9/gcc/config')

diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c
index 55cfe0ab2..a5af874bf 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c
@@ -371,6 +371,12 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
 enum aarch64_builtins
 {
   AARCH64_BUILTIN_MIN,
+
+  AARCH64_BUILTIN_GET_FPCR,
+  AARCH64_BUILTIN_SET_FPCR,
+  AARCH64_BUILTIN_GET_FPSR,
+  AARCH64_BUILTIN_SET_FPSR,
+
   AARCH64_SIMD_BUILTIN_BASE,
 #include "aarch64-simd-builtins.def"
   AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
@@ -752,6 +758,24 @@ aarch64_init_simd_builtins (void)
 void
 aarch64_init_builtins (void)
 {
+  tree ftype_set_fpr
+    = build_function_type_list (void_type_node, unsigned_type_node, NULL);
+  tree ftype_get_fpr
+    = build_function_type_list (unsigned_type_node, NULL);
+
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
+    = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
+			    AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
+    = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
+			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+
   if (TARGET_SIMD)
     aarch64_init_simd_builtins ();
 }
@@ -964,6 +988,36 @@ aarch64_expand_builtin (tree exp,
 {
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   int fcode = DECL_FUNCTION_CODE (fndecl);
+  int icode;
+  rtx pat, op0;
+  tree arg0;
+
+  switch (fcode)
+    {
+    case AARCH64_BUILTIN_GET_FPCR:
+    case AARCH64_BUILTIN_SET_FPCR:
+    case AARCH64_BUILTIN_GET_FPSR:
+    case AARCH64_BUILTIN_SET_FPSR:
+      if ((fcode == AARCH64_BUILTIN_GET_FPCR)
+	  || (fcode == AARCH64_BUILTIN_GET_FPSR))
+	{
+	  icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
+	    CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
+	  target = gen_reg_rtx (SImode);
+	  pat = GEN_FCN (icode) (target);
+	}
+      else
+	{
+	  target = NULL_RTX;
+	  icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
+	    CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
+	  arg0 = CALL_EXPR_ARG (exp, 0);
+	  op0 = expand_normal (arg0);
+	  pat = GEN_FCN (icode) (op0);
+	}
+      emit_insn (pat);
+      return target;
+    }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
     return aarch64_simd_expand_builtin (fcode, exp, target);
@@ -1196,6 +1250,106 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return changed;
 }
 
+void
+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned AARCH64_FE_INVALID = 1;
+  const unsigned AARCH64_FE_DIVBYZERO = 2;
+  const unsigned AARCH64_FE_OVERFLOW = 4;
+  const unsigned AARCH64_FE_UNDERFLOW = 8;
+  const unsigned AARCH64_FE_INEXACT = 16;
+  const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
+							| AARCH64_FE_DIVBYZERO
+							| AARCH64_FE_OVERFLOW
+							| AARCH64_FE_UNDERFLOW
+							| AARCH64_FE_INEXACT);
+  const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
+  tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
+  tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
+  tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
+  tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
+
+  /* Generate the equivalence of :
+       unsigned int fenv_cr;
+       fenv_cr = __builtin_aarch64_get_fpcr ();
+
+       unsigned int fenv_sr;
+       fenv_sr = __builtin_aarch64_get_fpsr ();
+
+       Now set all exceptions to non-stop
+       unsigned int mask_cr
+		= ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
+       unsigned int masked_cr;
+       masked_cr = fenv_cr & mask_cr;
+
+       And clear all exception flags
+       unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
+       unsigned int masked_cr;
+       masked_sr = fenv_sr & mask_sr;
+
+       __builtin_aarch64_set_cr (masked_cr);
+       __builtin_aarch64_set_sr (masked_sr);  */
+
+  fenv_cr = create_tmp_var (unsigned_type_node, NULL);
+  fenv_sr = create_tmp_var (unsigned_type_node, NULL);
+
+  get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
+  set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
+  get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
+  set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
+
+  mask_cr = build_int_cst (unsigned_type_node,
+			   ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
+  mask_sr = build_int_cst (unsigned_type_node,
+			   ~(AARCH64_FE_ALL_EXCEPT));
+
+  ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_cr, build_call_expr (get_fpcr, 0));
+  ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
+		    fenv_sr, build_call_expr (get_fpsr, 0));
+
+  masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
+  masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
+
+  hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
+  hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
+			hold_fnclex_sr);
+  masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
+			masked_fenv_sr);
+  ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
+
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+		  hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __builtin_aarch64_set_fpsr (masked_fenv_sr);  */
+
+  *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+  /* Generate the equivalent of :
+       unsigned int new_fenv_var;
+       new_fenv_var = __builtin_aarch64_get_fpsr ();
+
+       __builtin_aarch64_set_fpsr (fenv_sr);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
+  reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
+			new_fenv_var, build_call_expr (get_fpsr, 0));
+  restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call = build_call_expr (atomic_feraiseexcept, 1,
+				 fold_convert (integer_type_node, new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node,
+			    reload_fenv, restore_fnenv), update_call);
+}
+
+
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
 #undef BUILTIN_DX
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h
index eafdd551d..bb5c88d53 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h
@@ -33,6 +33,14 @@
   " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
 #endif
 
+#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
+#define CA53_ERR_835769_SPEC \
+  " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#else
+#define CA53_ERR_835769_SPEC \
+  " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#endif
+
 #ifndef LINK_SPEC
 #define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X \
   -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
index b77becd23..651abe3ce 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
@@ -50,7 +50,16 @@
 
 #define LINUX_TARGET_LINK_SPEC LINUX_TARGET_LINK_SPEC0 CA53_ERR_835769_SPEC
 
-#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
+#define CA53_ERR_835769_SPEC \
+  " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#else
+#define CA53_ERR_835769_SPEC \
+  " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#endif
+
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC \
+                  CA53_ERR_835769_SPEC
 
 #define TARGET_OS_CPP_BUILTINS()		\
   do						\
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
index bef58bf71..8b0a70538 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
@@ -291,4 +291,5 @@ extern bool aarch64_madd_needs_nop (rtx);
 extern void aarch64_final_prescan_insn (rtx);
 extern bool
 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
+void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
index 851e77a02..7626ed31f 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
@@ -934,6 +934,41 @@
   [(set_attr "type" "neon_minmax<q>")]
 )
 
+(define_expand "<su><maxmin>v2di3"
+  [(parallel [
+    (set (match_operand:V2DI 0 "register_operand" "")
+	 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
+		  (match_operand:V2DI 2 "register_operand" "")))
+    (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_SIMD"
+{
+  enum rtx_code cmp_operator;
+  rtx cmp_fmt;
+
+  switch (<CODE>)
+    {
+    case UMIN:
+      cmp_operator = LTU;
+      break;
+    case SMIN:
+      cmp_operator = LT;
+      break;
+    case UMAX:
+      cmp_operator = GTU;
+      break;
+    case SMAX:
+      cmp_operator = GT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
+  emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
+              operands[2], cmp_fmt, operands[1], operands[2]));
+  DONE;
+})
+
 ;; vec_concat gives a new vector with the low elements from operand 1, and
 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
@@ -4565,8 +4600,8 @@
 })
 
 (define_insn "*aarch64_simd_ld1r<mode>"
-  [(set (match_operand:VALLDI 0 "register_operand" "=w")
-	(vec_duplicate:VALLDI
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+	(vec_duplicate:VALL
 	  (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
   "TARGET_SIMD"
   "ld1r\\t{%0.<Vtype>}, %1"
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c
index 2ff6c7cb8..029c54ca3 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.c
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.c
@@ -3874,7 +3874,7 @@ aarch64_print_operand_address (FILE *f, rtx x)
 	switch (GET_CODE (x))
 	  {
 	  case PRE_INC:
-	    asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], 
+	    asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
 	    return;
 	  case POST_INC:
@@ -5152,7 +5152,6 @@ aarch64_parse_cpu (void)
       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
 	{
 	  selected_cpu = cpu;
-	  selected_tune = cpu;
 	  aarch64_isa_flags = selected_cpu->flags;
 
 	  if (ext != NULL)
@@ -5248,9 +5247,8 @@ aarch64_override_options (void)
 
   gcc_assert (selected_cpu);
 
-  /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
   if (!selected_tune)
-    selected_tune = &all_cores[selected_cpu->core];
+    selected_tune = selected_cpu;
 
   aarch64_tune_flags = selected_tune->flags;
   aarch64_tune = selected_tune->core;
@@ -7194,7 +7192,7 @@ aarch64_expand_vector_init (rtx target, rtx vals)
   x = XVECEXP (vals, 0, 0);
   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
     n_var = 1, one_var = 0;
-  
+
   for (i = 1; i < n_elts; ++i)
     {
       x = XVECEXP (vals, 0, i);
@@ -8642,6 +8640,10 @@ aarch64_cannot_change_mode_class (enum machine_mode from,
 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
   aarch64_autovectorize_vector_sizes
 
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
+  aarch64_atomic_assign_expand_fenv
+
 /* Section anchor support.  */
 
 #undef TARGET_MIN_ANCHOR_OFFSET
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md
index 319f80591..05f5e1b35 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.md
@@ -107,6 +107,10 @@
 
 (define_c_enum "unspecv" [
     UNSPECV_EH_RETURN		; Represent EH_RETURN
+    UNSPECV_GET_FPCR		; Represent fetch of FPCR content.
+    UNSPECV_SET_FPCR		; Represent assign of FPCR content.
+    UNSPECV_GET_FPSR		; Represent fetch of FPSR content.
+    UNSPECV_SET_FPSR		; Represent assign of FPSR content.
   ]
 )
 
@@ -1102,7 +1106,7 @@
   add\\t%x0, %x1, %x2
   sub\\t%x0, %x1, #%n2
   add\\t%d0, %d1, %d2"
-  [(set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg")
+  [(set_attr "type" "alu_imm,alu_reg,alu_imm,neon_add")
    (set_attr "simd" "*,*,*,yes")]
 )
 
@@ -2782,7 +2786,7 @@
 
 ;; Logical right shift using SISD or Integer instruction
 (define_insn "*aarch64_lshr_sisd_or_int_<mode>3"
-  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+  [(set (match_operand:GPI 0 "register_operand" "=w,&w,r")
         (lshiftrt:GPI
           (match_operand:GPI 1 "register_operand" "w,w,r")
           (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
@@ -2801,11 +2805,13 @@
            (match_operand:DI 1 "aarch64_simd_register")
            (match_operand:QI 2 "aarch64_simd_register")))]
   "TARGET_SIMD && reload_completed"
-  [(set (match_dup 2)
+  [(set (match_dup 3)
         (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
    (set (match_dup 0)
-        (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))]
-  ""
+        (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_USHL))]
+  {
+    operands[3] = gen_lowpart (QImode, operands[0]);
+  }
 )
 
 (define_split
@@ -2814,11 +2820,13 @@
            (match_operand:SI 1 "aarch64_simd_register")
            (match_operand:QI 2 "aarch64_simd_register")))]
   "TARGET_SIMD && reload_completed"
-  [(set (match_dup 2)
+  [(set (match_dup 3)
         (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
    (set (match_dup 0)
-        (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))]
-  ""
+        (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_USHL_2S))]
+  {
+    operands[3] = gen_lowpart (QImode, operands[0]);
+  }
 )
 
 ;; Arithmetic right shift using SISD or Integer instruction
@@ -3642,6 +3650,37 @@
   DONE;
 })
 
+;; Write Floating-point Control Register.
+(define_insn "set_fpcr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
+  ""
+  "msr\\tfpcr, %0\;isb"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Control Register.
+(define_insn "get_fpcr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))]
+  ""
+  "mrs\\t%0, fpcr"
+  [(set_attr "type" "mrs")])
+
+;; Write Floating-point Status Register.
+(define_insn "set_fpsr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
+  ""
+  "msr\\tfpsr, %0"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Status Register.
+(define_insn "get_fpsr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
+  ""
+  "mrs\\t%0, fpsr"
+  [(set_attr "type" "mrs")])
+
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h
index c01669b2c..ae0ae9c1b 100644
--- a/gcc-4.9/gcc/config/aarch64/arm_neon.h
+++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h
@@ -39,9 +39,6 @@ typedef __builtin_aarch64_simd_hi int16x4_t
 typedef __builtin_aarch64_simd_si int32x2_t
   __attribute__ ((__vector_size__ (8)));
 typedef int64_t int64x1_t;
-typedef int32_t int32x1_t;
-typedef int16_t int16x1_t;
-typedef int8_t int8x1_t;
 typedef double float64x1_t;
 typedef __builtin_aarch64_simd_sf float32x2_t
   __attribute__ ((__vector_size__ (8)));
@@ -56,9 +53,6 @@ typedef __builtin_aarch64_simd_uhi uint16x4_t
 typedef __builtin_aarch64_simd_usi uint32x2_t
   __attribute__ ((__vector_size__ (8)));
 typedef uint64_t uint64x1_t;
-typedef uint32_t uint32x1_t;
-typedef uint16_t uint16x1_t;
-typedef uint8_t uint8x1_t;
 typedef __builtin_aarch64_simd_qi int8x16_t
   __attribute__ ((__vector_size__ (16)));
 typedef __builtin_aarch64_simd_hi int16x8_t
@@ -8400,7 +8394,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
 #define vmull_high_lane_s16(a, b, c)                                    \
   __extension__                                                         \
     ({                                                                  \
-       int16x8_t b_ = (b);                                              \
+       int16x4_t b_ = (b);                                              \
        int16x8_t a_ = (a);                                              \
        int32x4_t result;                                                \
        __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]"                         \
@@ -8413,7 +8407,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
 #define vmull_high_lane_s32(a, b, c)                                    \
   __extension__                                                         \
     ({                                                                  \
-       int32x4_t b_ = (b);                                              \
+       int32x2_t b_ = (b);                                              \
        int32x4_t a_ = (a);                                              \
        int64x2_t result;                                                \
        __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]"                         \
@@ -8426,7 +8420,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
 #define vmull_high_lane_u16(a, b, c)                                    \
   __extension__                                                         \
     ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
+       uint16x4_t b_ = (b);                                             \
        uint16x8_t a_ = (a);                                             \
        uint32x4_t result;                                               \
        __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]"                         \
@@ -8439,7 +8433,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
 #define vmull_high_lane_u32(a, b, c)                                    \
   __extension__                                                         \
     ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
+       uint32x2_t b_ = (b);                                             \
        uint32x4_t a_ = (a);                                             \
        uint64x2_t result;                                               \
        __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]"                         \
@@ -20925,42 +20919,42 @@ vqabsq_s64 (int64x2_t __a)
   return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqabsb_s8 (int8x1_t __a)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqabsb_s8 (int8_t __a)
 {
-  return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
+  return (int8_t) __builtin_aarch64_sqabsqi (__a);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqabsh_s16 (int16x1_t __a)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqabsh_s16 (int16_t __a)
 {
-  return (int16x1_t) __builtin_aarch64_sqabshi (__a);
+  return (int16_t) __builtin_aarch64_sqabshi (__a);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqabss_s32 (int32x1_t __a)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqabss_s32 (int32_t __a)
 {
-  return (int32x1_t) __builtin_aarch64_sqabssi (__a);
+  return (int32_t) __builtin_aarch64_sqabssi (__a);
 }
 
 /* vqadd */
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqaddb_s8 (int8x1_t __a, int8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqaddb_s8 (int8_t __a, int8_t __b)
 {
-  return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
+  return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqaddh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqaddh_s16 (int16_t __a, int16_t __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqadds_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqadds_s32 (int32_t __a, int32_t __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -20969,22 +20963,22 @@ vqaddd_s64 (int64x1_t __a, int64x1_t __b)
   return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqaddb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
+  return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b);
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqaddh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
+  return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b);
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqadds_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
+  return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -21095,26 +21089,26 @@ vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
   return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
 {
   return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
+vqdmlals_s32 (int64x1_t __a, int32_t __b, int32_t __c)
 {
   return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
+vqdmlals_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
 }
@@ -21221,26 +21215,26 @@ vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
   return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
 {
   return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
+vqdmlsls_s32 (int64x1_t __a, int32_t __b, int32_t __c)
 {
   return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
+vqdmlsls_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
 }
@@ -21271,26 +21265,26 @@ vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
   return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqdmulhh_s16 (int16_t __a, int16_t __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmulhs_s32 (int32_t __a, int32_t __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
 }
@@ -21393,26 +21387,26 @@ vqdmull_n_s32 (int32x2_t __a, int32_t __b)
   return __builtin_aarch64_sqdmull_nv2si (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmullh_s16 (int16_t __a, int16_t __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
+vqdmulls_s32 (int32_t __a, int32_t __b)
 {
   return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
+vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
 }
@@ -21455,40 +21449,40 @@ vqmovn_u64 (uint64x2_t __a)
   return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqmovnh_s16 (int16x1_t __a)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqmovnh_s16 (int16_t __a)
 {
-  return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
+  return (int8_t) __builtin_aarch64_sqmovnhi (__a);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqmovns_s32 (int32x1_t __a)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqmovns_s32 (int32_t __a)
 {
-  return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
+  return (int16_t) __builtin_aarch64_sqmovnsi (__a);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vqmovnd_s64 (int64x1_t __a)
 {
-  return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
+  return (int32_t) __builtin_aarch64_sqmovndi (__a);
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqmovnh_u16 (uint16x1_t __a)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqmovnh_u16 (uint16_t __a)
 {
-  return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
+  return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqmovns_u32 (uint32x1_t __a)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqmovns_u32 (uint32_t __a)
 {
-  return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
+  return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqmovnd_u64 (uint64x1_t __a)
 {
-  return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
+  return (uint32_t) __builtin_aarch64_uqmovndi (__a);
 }
 
 /* vqmovun */
@@ -21511,22 +21505,22 @@ vqmovun_s64 (int64x2_t __a)
   return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqmovunh_s16 (int16x1_t __a)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqmovunh_s16 (int16_t __a)
 {
-  return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
+  return (int8_t) __builtin_aarch64_sqmovunhi (__a);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqmovuns_s32 (int32x1_t __a)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqmovuns_s32 (int32_t __a)
 {
-  return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
+  return (int16_t) __builtin_aarch64_sqmovunsi (__a);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vqmovund_s64 (int64x1_t __a)
 {
-  return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
+  return (int32_t) __builtin_aarch64_sqmovundi (__a);
 }
 
 /* vqneg */
@@ -21537,22 +21531,22 @@ vqnegq_s64 (int64x2_t __a)
   return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqnegb_s8 (int8x1_t __a)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqnegb_s8 (int8_t __a)
 {
-  return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
+  return (int8_t) __builtin_aarch64_sqnegqi (__a);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqnegh_s16 (int16x1_t __a)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqnegh_s16 (int16_t __a)
 {
-  return (int16x1_t) __builtin_aarch64_sqneghi (__a);
+  return (int16_t) __builtin_aarch64_sqneghi (__a);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqnegs_s32 (int32x1_t __a)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqnegs_s32 (int32_t __a)
 {
-  return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
+  return (int32_t) __builtin_aarch64_sqnegsi (__a);
 }
 
 /* vqrdmulh */
@@ -21581,26 +21575,26 @@ vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
   return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmulhh_s16 (int16_t __a, int16_t __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
 {
   return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmulhs_s32 (int32_t __a, int32_t __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
 }
@@ -21703,20 +21697,20 @@ vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
   return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqrshlb_s8 (int8_t __a, int8_t __b)
 {
   return __builtin_aarch64_sqrshlqi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrshlh_s16 (int16_t __a, int16_t __b)
 {
   return __builtin_aarch64_sqrshlhi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrshls_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrshls_s32 (int32_t __a, int32_t __b)
 {
   return __builtin_aarch64_sqrshlsi (__a, __b);
 }
@@ -21727,22 +21721,22 @@ vqrshld_s64 (int64x1_t __a, int64x1_t __b)
   return __builtin_aarch64_sqrshldi (__a, __b);
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqrshlb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
+  return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b);
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqrshlh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
+  return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b);
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqrshls_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
+  return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -21789,40 +21783,40 @@ vqrshrn_n_u64 (uint64x2_t __a, const int __b)
   return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqrshrnh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqrshrnh_n_s16 (int16_t __a, const int __b)
 {
-  return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
+  return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrshrns_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrshrns_n_s32 (int32_t __a, const int __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqrshrnh_n_u16 (uint16_t __a, const int __b)
 {
-  return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
+  return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqrshrns_n_u32 (uint32x1_t __a, const int __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqrshrns_n_u32 (uint32_t __a, const int __b)
 {
-  return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
+  return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
+  return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
 }
 
 /* vqrshrun */
@@ -21845,22 +21839,22 @@ vqrshrun_n_s64 (int64x2_t __a, const int __b)
   return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqrshrunh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqrshrunh_n_s16 (int16_t __a, const int __b)
 {
-  return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
+  return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrshruns_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrshruns_n_s32 (int32_t __a, const int __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vqrshrund_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
 }
 
 /* vqshl */
@@ -21961,20 +21955,20 @@ vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
   return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshlb_s8 (int8x1_t __a, int8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshlb_s8 (int8_t __a, int8_t __b)
 {
   return __builtin_aarch64_sqshlqi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshlh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshlh_s16 (int16_t __a, int16_t __b)
 {
   return __builtin_aarch64_sqshlhi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqshls_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqshls_s32 (int32_t __a, int32_t __b)
 {
   return __builtin_aarch64_sqshlsi (__a, __b);
 }
@@ -21985,22 +21979,22 @@ vqshld_s64 (int64x1_t __a, int64x1_t __b)
   return __builtin_aarch64_sqshldi (__a, __b);
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqshlb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
+  return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b);
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqshlh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
+  return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b);
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqshls_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
+  return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -22105,22 +22099,22 @@ vqshlq_n_u64 (uint64x2_t __a, const int __b)
   return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshlb_n_s8 (int8x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshlb_n_s8 (int8_t __a, const int __b)
 {
-  return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
+  return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshlh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshlh_n_s16 (int16_t __a, const int __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqshls_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqshls_n_s32 (int32_t __a, const int __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -22129,22 +22123,22 @@ vqshld_n_s64 (int64x1_t __a, const int __b)
   return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqshlb_n_u8 (uint8x1_t __a, const int __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqshlb_n_u8 (uint8_t __a, const int __b)
 {
-  return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
+  return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b);
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqshlh_n_u16 (uint16x1_t __a, const int __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqshlh_n_u16 (uint16_t __a, const int __b)
 {
-  return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
+  return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b);
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqshls_n_u32 (uint32x1_t __a, const int __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqshls_n_u32 (uint32_t __a, const int __b)
 {
-  return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
+  return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -22203,22 +22197,22 @@ vqshluq_n_s64 (int64x2_t __a, const int __b)
   return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshlub_n_s8 (int8x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshlub_n_s8 (int8_t __a, const int __b)
 {
-  return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
+  return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshluh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshluh_n_s16 (int16_t __a, const int __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqshlus_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqshlus_n_s32 (int32_t __a, const int __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -22265,40 +22259,40 @@ vqshrn_n_u64 (uint64x2_t __a, const int __b)
   return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshrnh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshrnh_n_s16 (int16_t __a, const int __b)
 {
-  return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
+  return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshrns_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshrns_n_s32 (int32_t __a, const int __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vqshrnd_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqshrnh_n_u16 (uint16x1_t __a, const int __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqshrnh_n_u16 (uint16_t __a, const int __b)
 {
-  return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
+  return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqshrns_n_u32 (uint32x1_t __a, const int __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqshrns_n_u32 (uint32_t __a, const int __b)
 {
-  return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
+  return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
+  return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
 }
 
 /* vqshrun */
@@ -22321,42 +22315,42 @@ vqshrun_n_s64 (int64x2_t __a, const int __b)
   return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshrunh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshrunh_n_s16 (int16_t __a, const int __b)
 {
-  return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
+  return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshruns_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshruns_n_s32 (int32_t __a, const int __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
 vqshrund_n_s64 (int64x1_t __a, const int __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
 }
 
 /* vqsub */
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqsubb_s8 (int8x1_t __a, int8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqsubb_s8 (int8_t __a, int8_t __b)
 {
-  return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
+  return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqsubh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqsubh_s16 (int16_t __a, int16_t __b)
 {
-  return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
+  return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqsubs_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqsubs_s32 (int32_t __a, int32_t __b)
 {
-  return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
+  return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -22365,22 +22359,22 @@ vqsubd_s64 (int64x1_t __a, int64x1_t __b)
   return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqsubb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
+  return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b);
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqsubh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
+  return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b);
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqsubs_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
+  return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -23596,22 +23590,22 @@ vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
 						    (int64x2_t) __b);
 }
 
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vsqaddb_u8 (uint8_t __a, int8_t __b)
 {
-  return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
+  return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b);
 }
 
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vsqaddh_u16 (uint16_t __a, int16_t __b)
 {
-  return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
+  return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b);
 }
 
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vsqadds_u32 (uint32_t __a, int32_t __b)
 {
-  return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
+  return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b);
 }
 
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -25251,22 +25245,22 @@ vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
   return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
 }
 
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vuqaddb_s8 (int8_t __a, uint8_t __b)
 {
-  return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
+  return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b);
 }
 
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vuqaddh_s16 (int16_t __a, uint16_t __b)
 {
-  return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
+  return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b);
 }
 
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vuqadds_s32 (int32_t __a, uint32_t __b)
 {
-  return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
+  return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b);
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c
index d5c7908be..19ae3665a 100644
--- a/gcc-4.9/gcc/config/alpha/alpha.c
+++ b/gcc-4.9/gcc/config/alpha/alpha.c
@@ -9918,12 +9918,6 @@ alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 #undef TARGET_EXPAND_BUILTIN_VA_START
 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
 
-/* The Alpha architecture does not require sequential consistency.  See
-   http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
-   for an example of how it can be violated in practice.  */
-#undef TARGET_RELAXED_ORDERING
-#define TARGET_RELAXED_ORDERING true
-
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE alpha_option_override
 
diff --git a/gcc-4.9/gcc/config/alpha/alpha.md b/gcc-4.9/gcc/config/alpha/alpha.md
index 795b4df3f..1179d572d 100644
--- a/gcc-4.9/gcc/config/alpha/alpha.md
+++ b/gcc-4.9/gcc/config/alpha/alpha.md
@@ -5984,16 +5984,38 @@
   [(set_attr "type" "jsr")
    (set_attr "length" "*,*,8")])
 
-(define_insn_and_split "call_value_osf_tlsgd"
+(define_int_iterator TLS_CALL
+	[UNSPEC_TLSGD_CALL
+	 UNSPEC_TLSLDM_CALL])
+
+(define_int_attr tls
+	[(UNSPEC_TLSGD_CALL "tlsgd")
+	 (UNSPEC_TLSLDM_CALL "tlsldm")])
+
+(define_insn "call_value_osf_<tls>"
   [(set (match_operand 0)
 	(call (mem:DI (match_operand:DI 1 "symbolic_operand"))
 	      (const_int 0)))
-   (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSGD_CALL)
+   (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
    (use (reg:DI 29))
    (clobber (reg:DI 26))]
   "HAVE_AS_TLS"
-  "#"
-  "&& reload_completed"
+  "ldq $27,%1($29)\t\t!literal!%2\;jsr $26,($27),%1\t\t!lituse_<tls>!%2\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "16")])
+
+;; We must use peep2 instead of a split because we need accurate life
+;; information for $gp.
+(define_peephole2
+  [(parallel
+    [(set (match_operand 0)
+	  (call (mem:DI (match_operand:DI 1 "symbolic_operand"))
+		(const_int 0)))
+     (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
+     (use (reg:DI 29))
+     (clobber (reg:DI 26))])]
+  "HAVE_AS_TLS && reload_completed
+   && peep2_regno_dead_p (1, 29)"
   [(set (match_dup 3)
 	(unspec:DI [(match_dup 5)
 		    (match_dup 1)
@@ -6001,10 +6023,9 @@
    (parallel [(set (match_dup 0)
 		   (call (mem:DI (match_dup 3))
 			 (const_int 0)))
-	      (set (match_dup 5)
-		   (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
+	      (use (match_dup 5))
 	      (use (match_dup 1))
-	      (use (unspec [(match_dup 2)] UNSPEC_TLSGD_CALL))
+	      (use (unspec [(match_dup 2)] TLS_CALL))
 	      (clobber (reg:DI 26))])
    (set (match_dup 5)
 	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
@@ -6012,19 +6033,18 @@
   operands[3] = gen_rtx_REG (Pmode, 27);
   operands[4] = GEN_INT (alpha_next_sequence_number++);
   operands[5] = pic_offset_table_rtx;
-}
-  [(set_attr "type" "multi")])
+})
 
-(define_insn_and_split "call_value_osf_tlsldm"
-  [(set (match_operand 0)
-	(call (mem:DI (match_operand:DI 1 "symbolic_operand"))
-	      (const_int 0)))
-   (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSLDM_CALL)
-   (use (reg:DI 29))
-   (clobber (reg:DI 26))]
-  "HAVE_AS_TLS"
-  "#"
-  "&& reload_completed"
+(define_peephole2
+  [(parallel
+    [(set (match_operand 0)
+	  (call (mem:DI (match_operand:DI 1 "symbolic_operand"))
+		(const_int 0)))
+     (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
+     (use (reg:DI 29))
+     (clobber (reg:DI 26))])]
+  "HAVE_AS_TLS && reload_completed
+   && !peep2_regno_dead_p (1, 29)"
   [(set (match_dup 3)
 	(unspec:DI [(match_dup 5)
 		    (match_dup 1)
@@ -6035,7 +6055,7 @@
 	      (set (match_dup 5)
 		   (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
 	      (use (match_dup 1))
-	      (use (unspec [(match_dup 2)] UNSPEC_TLSLDM_CALL))
+	      (use (unspec [(match_dup 2)] TLS_CALL))
 	      (clobber (reg:DI 26))])
    (set (match_dup 5)
 	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
@@ -6043,8 +6063,7 @@
   operands[3] = gen_rtx_REG (Pmode, 27);
   operands[4] = GEN_INT (alpha_next_sequence_number++);
   operands[5] = pic_offset_table_rtx;
-}
-  [(set_attr "type" "multi")])
+})
 
 (define_insn "*call_value_osf_1"
   [(set (match_operand 0)
diff --git a/gcc-4.9/gcc/config/arm/arm-protos.h b/gcc-4.9/gcc/config/arm/arm-protos.h
index 13874ee6e..2ac3b3009 100644
--- a/gcc-4.9/gcc/config/arm/arm-protos.h
+++ b/gcc-4.9/gcc/config/arm/arm-protos.h
@@ -56,6 +56,7 @@ extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
 extern int legitimate_pic_operand_p (rtx);
 extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
 extern rtx legitimize_tls_address (rtx, rtx);
+extern bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
 extern int arm_legitimate_address_outer_p (enum machine_mode, rtx, RTX_CODE, int);
 extern int thumb_legitimate_offset_p (enum machine_mode, HOST_WIDE_INT);
 extern bool arm_legitimize_reload_address (rtx *, enum machine_mode, int, int,
@@ -294,4 +295,6 @@ extern void arm_emit_eabi_attribute (const char *, int, int);
 /* Defined in gcc/common/config/arm-common.c.  */
 extern const char *arm_rewrite_selected_cpu (const char *name);
 
+extern bool arm_is_constant_pool_ref (rtx);
+
 #endif /* ! GCC_ARM_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c
index 3c237cb6d..b79bb48b1 100644
--- a/gcc-4.9/gcc/config/arm/arm.c
+++ b/gcc-4.9/gcc/config/arm/arm.c
@@ -89,7 +89,6 @@ static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
 inline static int thumb1_index_register_rtx_p (rtx, int);
-static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
 static int thumb_far_jump_used_p (void);
 static bool thumb_force_lr_save (void);
 static unsigned arm_size_return_regs (void);
@@ -13952,9 +13951,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
   HOST_WIDE_INT srcoffset, dstoffset;
   HOST_WIDE_INT src_autoinc, dst_autoinc;
   rtx mem, addr;
-  
+
   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
-  
+
   /* Use hard registers if we have aligned source or destination so we can use
      load/store multiple with contiguous registers.  */
   if (dst_aligned || src_aligned)
@@ -13968,7 +13967,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
   src = copy_addr_to_reg (XEXP (srcbase, 0));
 
   srcoffset = dstoffset = 0;
-  
+
   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
      For copying the last bytes we want to subtract this offset again.  */
   src_autoinc = dst_autoinc = 0;
@@ -14022,14 +14021,14 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
 
       remaining -= block_size_bytes;
     }
-  
+
   /* Copy any whole words left (note these aren't interleaved with any
      subsequent halfword/byte load/stores in the interests of simplicity).  */
-  
+
   words = remaining / UNITS_PER_WORD;
 
   gcc_assert (words < interleave_factor);
-  
+
   if (src_aligned && words > 1)
     {
       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
@@ -14069,11 +14068,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
     }
 
   remaining -= words * UNITS_PER_WORD;
-  
+
   gcc_assert (remaining < 4);
-  
+
   /* Copy a halfword if necessary.  */
-  
+
   if (remaining >= 2)
     {
       halfword_tmp = gen_reg_rtx (SImode);
@@ -14097,11 +14096,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
       remaining -= 2;
       srcoffset += 2;
     }
-  
+
   gcc_assert (remaining < 2);
-  
+
   /* Copy last byte.  */
-  
+
   if ((remaining & 1) != 0)
     {
       byte_tmp = gen_reg_rtx (SImode);
@@ -14122,9 +14121,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
       remaining--;
       srcoffset++;
     }
-  
+
   /* Store last halfword if we haven't done so already.  */
-  
+
   if (halfword_tmp)
     {
       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
@@ -14143,7 +14142,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
       dstoffset++;
     }
-  
+
   gcc_assert (remaining == 0 && srcoffset == dstoffset);
 }
 
@@ -14162,7 +14161,7 @@ arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
 		      rtx *loop_mem)
 {
   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
-  
+
   /* Although the new mem does not refer to a known location,
      it does keep up to LENGTH bytes of alignment.  */
   *loop_mem = change_address (mem, BLKmode, *loop_reg);
@@ -14182,14 +14181,14 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
 {
   rtx label, src_reg, dest_reg, final_src, test;
   HOST_WIDE_INT leftover;
-  
+
   leftover = length % bytes_per_iter;
   length -= leftover;
-  
+
   /* Create registers and memory references for use within the loop.  */
   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
-  
+
   /* Calculate the value that SRC_REG should have after the last iteration of
      the loop.  */
   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
@@ -14198,7 +14197,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
   /* Emit the start of the loop.  */
   label = gen_label_rtx ();
   emit_label (label);
-  
+
   /* Emit the loop body.  */
   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
 				     interleave_factor);
@@ -14206,11 +14205,11 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
   /* Move on to the next block.  */
   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
-  
+
   /* Emit the loop condition.  */
   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
-  
+
   /* Mop up any left-over bytes.  */
   if (leftover)
     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
@@ -14224,7 +14223,7 @@ static int
 arm_movmemqi_unaligned (rtx *operands)
 {
   HOST_WIDE_INT length = INTVAL (operands[2]);
-  
+
   if (optimize_size)
     {
       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
@@ -14235,7 +14234,7 @@ arm_movmemqi_unaligned (rtx *operands)
 	 resulting code can be smaller.  */
       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
-      
+
       if (length > 12)
 	arm_block_move_unaligned_loop (operands[0], operands[1], length,
 				       interleave_factor, bytes_per_iter);
@@ -14253,7 +14252,7 @@ arm_movmemqi_unaligned (rtx *operands)
       else
 	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
     }
-  
+
   return 1;
 }
 
@@ -28520,7 +28519,11 @@ arm_set_return_address (rtx source, rtx scratch)
 
 	  addr = plus_constant (Pmode, addr, delta);
 	}
-      emit_move_insn (gen_frame_mem (Pmode, addr), source);
+      /* The store needs to be marked as frame related in order to prevent
+	 DSE from deleting it as dead if it is based on fp.  */
+      rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
     }
 }
 
@@ -28572,7 +28575,11 @@ thumb_set_return_address (rtx source, rtx scratch)
       else
 	addr = plus_constant (Pmode, addr, delta);
 
-      emit_move_insn (gen_frame_mem (Pmode, addr), source);
+      /* The store needs to be marked as frame related in order to prevent
+	 DSE from deleting it as dead if it is based on fp.  */
+      rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
     }
   else
     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
@@ -29828,10 +29835,10 @@ int
 vfp3_const_double_for_fract_bits (rtx operand)
 {
   REAL_VALUE_TYPE r0;
-  
+
   if (!CONST_DOUBLE_P (operand))
     return 0;
-  
+
   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
   if (exact_real_inverse (DFmode, &r0))
     {
@@ -30825,7 +30832,7 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
 	  else
 	    return false;
 	}
-      
+
       return true;
 
     case ARM_POST_DEC:
@@ -30842,10 +30849,10 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
 	return false;
 
       return true;
-     
+
     default:
       return false;
-      
+
     }
 
   return false;
@@ -30856,7 +30863,7 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
    Additionally, the default expansion code is not available or suitable
    for post-reload insn splits (this can occur when the register allocator
    chooses not to do a shift in NEON).
-   
+
    This function is used in both initial expand and post-reload splits, and
    handles all kinds of 64-bit shifts.
 
@@ -31109,7 +31116,7 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
 {
   enum rtx_code code = GET_CODE (*comparison);
   int code_int;
-  enum machine_mode mode = (GET_MODE (*op1) == VOIDmode) 
+  enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
     ? GET_MODE (*op2) : GET_MODE (*op1);
 
   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
@@ -31163,7 +31170,7 @@ arm_asan_shadow_offset (void)
 
 /* This is a temporary fix for PR60655.  Ideally we need
    to handle most of these cases in the generic part but
-   currently we reject minus (..) (sym_ref).  We try to 
+   currently we reject minus (..) (sym_ref).  We try to
    ameliorate the case with minus (sym_ref1) (sym_ref2)
    where they are in the same section.  */
 
@@ -31393,4 +31400,13 @@ arm_load_global_address (rtx symbol, rtx offset_reg,
   df_insn_rescan (load_insn);
 }
 
+/* return TRUE if x is a reference to a value in a constant pool */
+extern bool
+arm_is_constant_pool_ref (rtx x)
+{
+  return (MEM_P (x)
+	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+	  && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
+}
+
 #include "gt-arm.h"
diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h
index ab5167a8b..433a3dd77 100644
--- a/gcc-4.9/gcc/config/arm/arm.h
+++ b/gcc-4.9/gcc/config/arm/arm.h
@@ -74,8 +74,8 @@ extern char arm_arch_name[];
 	builtin_define_with_int_value (				\
 	  "__ARM_SIZEOF_MINIMAL_ENUM",				\
 	  flag_short_enums ? 1 : 4);				\
-	builtin_define_with_int_value (				\
-	  "__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE);		\
+	builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T",	\
+				    wchar_type_node);		\
 	if (TARGET_ARM_ARCH_PROFILE)				\
 	  builtin_define_with_int_value (			\
 	    "__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE);	\
@@ -2139,9 +2139,10 @@ extern int making_const_table;
    ? reverse_condition_maybe_unordered (code) \
    : reverse_condition (code))
 
-/* The arm5 clz instruction returns 32.  */
-#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
-#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  ((VALUE) = 32, 1)
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE))
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE))
 
 #define CC_STATUS_INIT \
   do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0)
diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md
index 467f9ce4e..1153a1e34 100644
--- a/gcc-4.9/gcc/config/arm/arm.md
+++ b/gcc-4.9/gcc/config/arm/arm.md
@@ -127,9 +127,10 @@
 ; This can be "a" for ARM, "t" for either of the Thumbs, "32" for
 ; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode.  "v6"
 ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
-; arm_arch6.  This attribute is used to compute attribute "enabled",
-; use type "any" to enable an alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
+; arm_arch6.  "v6t2" for Thumb-2 with arm_arch6.  This attribute is
+; used to compute attribute "enabled", use type "any" to enable an
+; alternative in all cases.
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
   (const_string "any"))
 
 (define_attr "arch_enabled" "no,yes"
@@ -164,6 +165,10 @@
 	      (match_test "TARGET_32BIT && !arm_arch6"))
 	 (const_string "yes")
 
+	 (and (eq_attr "arch" "v6t2")
+	      (match_test "TARGET_32BIT && arm_arch6 && arm_arch_thumb2"))
+	 (const_string "yes")
+
 	 (and (eq_attr "arch" "avoid_neon_for_64bits")
 	      (match_test "TARGET_NEON")
 	      (not (match_test "TARGET_PREFER_NEON_64BITS")))
@@ -3631,7 +3636,7 @@
 	 [(match_operand:SI 1 "s_register_operand" "r")
 	  (match_operand:SI 2 "s_register_operand" "r")]))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_32BIT && optimize_function_for_size_p (cfun)"
+  "TARGET_32BIT && optimize_function_for_size_p (cfun) && !arm_restrict_it"
   "*
   operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
 				operands[1], operands[2]);
@@ -4374,7 +4379,7 @@
 (define_insn "unaligned_loadhis"
   [(set (match_operand:SI 0 "s_register_operand" "=l,r")
 	(sign_extend:SI
-	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,Uh")]
 		     UNSPEC_UNALIGNED_LOAD)))]
   "unaligned_access && TARGET_32BIT"
   "ldr%(sh%)\t%0, %1\t@ unaligned"
@@ -5287,7 +5292,7 @@
 
 (define_insn "*arm_zero_extendhisi2_v6"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))]
   "TARGET_ARM && arm_arch6"
   "@
    uxth%?\\t%0, %1
@@ -5381,7 +5386,7 @@
 
 (define_insn "*arm_zero_extendqisi2_v6"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,Uh")))]
   "TARGET_ARM && arm_arch6"
   "@
    uxtb%(%)\\t%0, %1
@@ -5615,31 +5620,27 @@
 
 (define_insn "*arm_extendhisi2"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))]
   "TARGET_ARM && arm_arch4 && !arm_arch6"
   "@
    #
    ldr%(sh%)\\t%0, %1"
   [(set_attr "length" "8,4")
    (set_attr "type" "alu_shift_reg,load_byte")
-   (set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,256")
-   (set_attr "neg_pool_range" "*,244")]
+   (set_attr "predicable" "yes")]
 )
 
 ;; ??? Check Thumb-2 pool range
 (define_insn "*arm_extendhisi2_v6"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+	(sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))]
   "TARGET_32BIT && arm_arch6"
   "@
    sxth%?\\t%0, %1
    ldr%(sh%)\\t%0, %1"
   [(set_attr "type" "extend,load_byte")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "no")
-   (set_attr "pool_range" "*,256")
-   (set_attr "neg_pool_range" "*,244")]
+   (set_attr "predicable_short_it" "no")]
 )
 
 (define_insn "*arm_extendhisi2addsi"
@@ -5682,9 +5683,7 @@
   "TARGET_ARM && arm_arch4"
   "ldr%(sb%)\\t%0, %1"
   [(set_attr "type" "load_byte")
-   (set_attr "predicable" "yes")
-   (set_attr "pool_range" "256")
-   (set_attr "neg_pool_range" "244")]
+   (set_attr "predicable" "yes")]
 )
 
 (define_expand "extendqisi2"
@@ -5724,9 +5723,7 @@
    ldr%(sb%)\\t%0, %1"
   [(set_attr "length" "8,4")
    (set_attr "type" "alu_shift_reg,load_byte")
-   (set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,256")
-   (set_attr "neg_pool_range" "*,244")]
+   (set_attr "predicable" "yes")]
 )
 
 (define_insn "*arm_extendqisi_v6"
@@ -5738,9 +5735,7 @@
    sxtb%?\\t%0, %1
    ldr%(sb%)\\t%0, %1"
   [(set_attr "type" "extend,load_byte")
-   (set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,256")
-   (set_attr "neg_pool_range" "*,244")]
+   (set_attr "predicable" "yes")]
 )
 
 (define_insn "*arm_extendqisi2addsi"
@@ -6973,8 +6968,8 @@
 
 ;; Pattern to recognize insn generated default case above
 (define_insn "*movhi_insn_arch4"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
-	(match_operand:HI 1 "general_operand"      "rI,K,r,mi"))]
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m,r")
+	(match_operand:HI 1 "general_operand"      "rI,K,n,r,mi"))]
   "TARGET_ARM
    && arm_arch4
    && (register_operand (operands[0], HImode)
@@ -6982,16 +6977,19 @@
   "@
    mov%?\\t%0, %1\\t%@ movhi
    mvn%?\\t%0, #%B1\\t%@ movhi
+   movw%?\\t%0, %L1\\t%@ movhi
    str%(h%)\\t%1, %0\\t%@ movhi
    ldr%(h%)\\t%0, %1\\t%@ movhi"
   [(set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,*,*,256")
-   (set_attr "neg_pool_range" "*,*,*,244")
+   (set_attr "pool_range" "*,*,*,*,256")
+   (set_attr "neg_pool_range" "*,*,*,*,244")
+   (set_attr "arch" "*,*,v6t2,*,*")
    (set_attr_alternative "type"
                          [(if_then_else (match_operand 1 "const_int_operand" "")
                                         (const_string "mov_imm" )
                                         (const_string "mov_reg"))
                           (const_string "mvn_imm")
+                          (const_string "mov_imm")
                           (const_string "store1")
                           (const_string "load1")])]
 )
@@ -10944,10 +10942,16 @@
     enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[5]),
 					     operands[3], operands[4]);
     enum rtx_code rc = GET_CODE (operands[5]);
-
     operands[6] = gen_rtx_REG (mode, CC_REGNUM);
     gcc_assert (!(mode == CCFPmode || mode == CCFPEmode));
-    rc = reverse_condition (rc);
+    if (REGNO (operands[2]) != REGNO (operands[0]))
+      rc = reverse_condition (rc);
+    else 
+      {
+	rtx tmp = operands[1];
+	operands[1] = operands[2];
+	operands[2] = tmp;
+      }
 
     operands[6] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx);
   }
diff --git a/gcc-4.9/gcc/config/arm/constraints.md b/gcc-4.9/gcc/config/arm/constraints.md
index 85dd116ce..f848664d5 100644
--- a/gcc-4.9/gcc/config/arm/constraints.md
+++ b/gcc-4.9/gcc/config/arm/constraints.md
@@ -36,7 +36,7 @@
 ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
 
 ;; The following memory constraints have been used:
-;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
+;; in ARM/Thumb-2 state: Q, Uh, Ut, Uv, Uy, Un, Um, Us
 ;; in ARM state: Uq
 ;; in Thumb state: Uu, Uw
 
@@ -348,6 +348,12 @@
   An address valid for loading/storing register exclusive"
  (match_operand 0 "mem_noofs_operand"))
 
+(define_memory_constraint "Uh"
+ "@internal
+  An address suitable for byte and half-word loads which does not point inside a constant pool"
+ (and (match_code "mem")
+      (match_test "arm_legitimate_address_p (GET_MODE (op), XEXP (op, 0), false) && !arm_is_constant_pool_ref (op)")))
+
 (define_memory_constraint "Ut"
  "@internal
   In ARM/Thumb-2 state an address valid for loading/storing opaque structure
@@ -394,7 +400,8 @@
  (and (match_code "mem")
       (match_test "TARGET_ARM
 		   && arm_legitimate_address_outer_p (GET_MODE (op), XEXP (op, 0),
-						      SIGN_EXTEND, 0)")))
+						      SIGN_EXTEND, 0)
+		   && !arm_is_constant_pool_ref (op)")))
 
 (define_memory_constraint "Q"
  "@internal
diff --git a/gcc-4.9/gcc/config/arm/linux-grte.h b/gcc-4.9/gcc/config/arm/linux-grte.h
index 7ee5806b7..e69de29bb 100644
--- a/gcc-4.9/gcc/config/arm/linux-grte.h
+++ b/gcc-4.9/gcc/config/arm/linux-grte.h
@@ -1,27 +0,0 @@
-/* Definitions for ARM Linux-based GRTE (Google RunTime Environment).
-   Copyright (C) 2011 Free Software Foundation, Inc.
-   Contributed by Chris Demetriou.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-#undef SUBSUBTARGET_EXTRA_SPECS
-#define SUBSUBTARGET_EXTRA_SPECS LINUX_GRTE_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/arm/t-aprofile b/gcc-4.9/gcc/config/arm/t-aprofile
index ff9e2e1b3..86741e6b0 100644
--- a/gcc-4.9/gcc/config/arm/t-aprofile
+++ b/gcc-4.9/gcc/config/arm/t-aprofile
@@ -88,6 +88,9 @@ MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a53
 MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a57
 MULTILIB_MATCHES       += march?armv8-a=mcpu?cortex-a57.cortex-a53
 
+# Arch Matches
+MULTILIB_MATCHES       += march?armv8-a=march?armv8-a+crc
+
 # FPU matches
 MULTILIB_MATCHES       += mfpu?vfpv3-d16=mfpu?vfpv3
 MULTILIB_MATCHES       += mfpu?vfpv3-d16=mfpu?vfpv3-fp16
diff --git a/gcc-4.9/gcc/config/avr/avr-dimode.md b/gcc-4.9/gcc/config/avr/avr-dimode.md
index 639810518..56cd30458 100644
--- a/gcc-4.9/gcc/config/avr/avr-dimode.md
+++ b/gcc-4.9/gcc/config/avr/avr-dimode.md
@@ -68,6 +68,7 @@
   {
     rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
 
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
     emit_move_insn (acc_a, operands[1]);
 
     if (DImode == <MODE>mode
@@ -145,6 +146,7 @@
   {
     rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
 
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
     emit_move_insn (acc_a, operands[1]);
 
     if (const_operand (operands[2], GET_MODE (operands[2])))
@@ -201,6 +203,7 @@
   {
     rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
 
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
     emit_move_insn (acc_a, operands[1]);
 
     if (const_operand (operands[2], GET_MODE (operands[2])))
@@ -249,6 +252,7 @@
   {
     rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
 
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
     emit_move_insn (acc_a, operands[1]);
 
     if (const_operand (operands[2], GET_MODE (operands[2])))
@@ -338,6 +342,7 @@
   {
     rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
 
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
     emit_move_insn (acc_a, operands[1]);
 
     if (s8_operand (operands[2], VOIDmode))
@@ -424,6 +429,7 @@
   {
     rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
 
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
     emit_move_insn (acc_a, operands[1]);
     emit_move_insn (gen_rtx_REG (QImode, 16), operands[2]);
     emit_insn (gen_<code_stdname><mode>3_insn ());
@@ -457,6 +463,7 @@
               (clobber (any_extend:SI (match_dup 1)))])]
   "avr_have_dimode"
   {
+    avr_fix_inputs (operands, 1 << 2, regmask (SImode, 22));
     emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]);
     emit_move_insn (gen_rtx_REG (SImode, 18), operands[2]);
     emit_insn (gen_<extend_u>mulsidi3_insn());
diff --git a/gcc-4.9/gcc/config/avr/avr-fixed.md b/gcc-4.9/gcc/config/avr/avr-fixed.md
index 9c8489edd..6763f596e 100644
--- a/gcc-4.9/gcc/config/avr/avr-fixed.md
+++ b/gcc-4.9/gcc/config/avr/avr-fixed.md
@@ -231,7 +231,11 @@
               (clobber (reg:HI 24))])
    (set (match_operand:QQ 0 "register_operand" "")
         (reg:QQ 23))]
-  "!AVR_HAVE_MUL")
+  "!AVR_HAVE_MUL"
+  {
+    avr_fix_inputs (operands, 1 << 2, regmask (QQmode, 24));
+  })
+
 
 (define_expand "muluqq3_nomul"
   [(set (reg:UQQ 22)
@@ -246,7 +250,10 @@
               (clobber (reg:HI 22))])
    (set (match_operand:UQQ 0 "register_operand" "")
         (reg:UQQ 25))]
-  "!AVR_HAVE_MUL")
+  "!AVR_HAVE_MUL"
+  {
+    avr_fix_inputs (operands, 1 << 2, regmask (UQQmode, 22));
+  })
 
 (define_insn "*mulqq3.call"
   [(set (reg:QQ 23)
@@ -274,7 +281,10 @@
               (clobber (reg:HI 22))])
    (set (match_operand:ALL2QA 0 "register_operand" "")
         (reg:ALL2QA 24))]
-  "AVR_HAVE_MUL")
+  "AVR_HAVE_MUL"
+  {
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 18));
+  })
 
 ;; "*mulhq3.call"  "*muluhq3.call"
 ;; "*mulha3.call"  "*muluha3.call"
@@ -302,7 +312,10 @@
                     (reg:ALL4A 20)))
    (set (match_operand:ALL4A 0 "register_operand" "")
         (reg:ALL4A 24))]
-  "AVR_HAVE_MUL")
+  "AVR_HAVE_MUL"
+  {
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 16));
+  })
 
 ;; "*mulsa3.call" "*mulusa3.call"
 (define_insn "*mul<mode>3.call"
@@ -330,7 +343,12 @@
                                 (reg:ALL1Q 22)))
               (clobber (reg:QI 25))])
    (set (match_operand:ALL1Q 0 "register_operand" "")
-        (reg:ALL1Q 24))])
+        (reg:ALL1Q 24))]
+  ""
+  {
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 25));
+  })
+
 
 ;; "*divqq3.call" "*udivuqq3.call"
 (define_insn "*<code><mode>3.call"
@@ -356,7 +374,11 @@
               (clobber (reg:HI 26))
               (clobber (reg:QI 21))])
    (set (match_operand:ALL2QA 0 "register_operand" "")
-        (reg:ALL2QA 24))])
+        (reg:ALL2QA 24))]
+  ""
+  {
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 26));
+  })
 
 ;; "*divhq3.call" "*udivuhq3.call"
 ;; "*divha3.call" "*udivuha3.call"
@@ -385,7 +407,11 @@
               (clobber (reg:HI 26))
               (clobber (reg:HI 30))])
    (set (match_operand:ALL4A 0 "register_operand" "")
-        (reg:ALL4A 22))])
+        (reg:ALL4A 22))]
+  ""
+  {
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 24));
+  })
 
 ;; "*divsa3.call" "*udivusa3.call"
 (define_insn "*<code><mode>3.call"
@@ -435,6 +461,7 @@
 
     operands[3] = gen_rtx_REG (<MODE>mode, regno_out[(size_t) GET_MODE_SIZE (<MODE>mode)]);
     operands[4] = gen_rtx_REG (<MODE>mode,  regno_in[(size_t) GET_MODE_SIZE (<MODE>mode)]);
+    avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, REGNO (operands[4])));
     operands[5] = simplify_gen_subreg (QImode, force_reg (HImode, operands[2]), HImode, 0);
     // $2 is no more needed, but is referenced for expand.
     operands[2] = const0_rtx;
diff --git a/gcc-4.9/gcc/config/avr/avr-protos.h b/gcc-4.9/gcc/config/avr/avr-protos.h
index c5ce78429..4a899a27c 100644
--- a/gcc-4.9/gcc/config/avr/avr-protos.h
+++ b/gcc-4.9/gcc/config/avr/avr-protos.h
@@ -124,6 +124,15 @@ extern bool avr_mem_memx_p (rtx);
 extern bool avr_load_libgcc_p (rtx);
 extern bool avr_xload_libgcc_p (enum machine_mode);
 
+static inline unsigned
+regmask (enum machine_mode mode, unsigned regno)
+{
+  return ((1u << GET_MODE_SIZE (mode)) - 1) << regno;
+}
+
+extern void avr_fix_inputs (rtx*, unsigned, unsigned);
+extern bool avr_emit3_fix_outputs (rtx (*)(rtx,rtx,rtx), rtx*, unsigned, unsigned);
+
 extern rtx lpm_reg_rtx;
 extern rtx lpm_addr_reg_rtx;
 extern rtx tmp_reg_rtx;
diff --git a/gcc-4.9/gcc/config/avr/avr.c b/gcc-4.9/gcc/config/avr/avr.c
index fa979df46..4c65f5efa 100644
--- a/gcc-4.9/gcc/config/avr/avr.c
+++ b/gcc-4.9/gcc/config/avr/avr.c
@@ -11118,6 +11118,115 @@ avr_convert_to_type (tree type, tree expr)
 }
 
 
+/* PR63633: The middle-end might come up with hard regs as input operands.
+
+   RMASK is a bit mask representing a subset of hard registers R0...R31:
+   Rn is an element of that set iff bit n of RMASK is set.
+   OPMASK describes a subset of OP[]:  If bit n of OPMASK is 1 then
+   OP[n] has to be fixed; otherwise OP[n] is left alone.
+
+   For each element of OPMASK which is a hard register overlapping RMASK,
+   replace OP[n] with a newly created pseudo register
+
+   HREG == 0:  Also emit a move insn that copies the contents of that
+               hard register into the new pseudo.
+
+   HREG != 0:  Also set HREG[n] to the hard register.  */
+
+static void
+avr_fix_operands (rtx *op, rtx *hreg, unsigned opmask, unsigned rmask)
+{
+  for (; opmask; opmask >>= 1, op++)
+    {
+      rtx reg = *op;
+
+      if (hreg)
+        *hreg = NULL_RTX;
+
+      if ((opmask & 1)
+          && REG_P (reg)
+          && REGNO (reg) < FIRST_PSEUDO_REGISTER
+          // This hard-reg overlaps other prohibited hard regs?
+          && (rmask & regmask (GET_MODE (reg), REGNO (reg))))
+        {
+          *op = gen_reg_rtx (GET_MODE (reg));
+          if (hreg == NULL)
+            emit_move_insn (*op, reg);
+          else
+            *hreg = reg;
+        }
+
+      if (hreg)
+        hreg++;
+    }
+}
+
+
+void
+avr_fix_inputs (rtx *op, unsigned opmask, unsigned rmask)
+{
+  avr_fix_operands (op, NULL, opmask, rmask);
+}
+
+
+/* Helper for the function below:  If bit n of MASK is set and
+   HREG[n] != NULL, then emit a move insn to copy OP[n] to HREG[n].
+   Otherwise do nothing for that n.  Return TRUE.  */
+
+static bool
+avr_move_fixed_operands (rtx *op, rtx *hreg, unsigned mask)
+{
+  for (; mask; mask >>= 1, op++, hreg++)
+    if ((mask & 1)
+        && *hreg)
+      emit_move_insn (*hreg, *op);
+
+  return true;
+}
+
+
+/* PR63633: The middle-end might come up with hard regs as output operands.
+
+   GEN is a sequence generating function like gen_mulsi3 with 3 operands OP[].
+   RMASK is a bit mask representing a subset of hard registers R0...R31:
+   Rn is an element of that set iff bit n of RMASK is set.
+   OPMASK describes a subset of OP[]:  If bit n of OPMASK is 1 then
+   OP[n] has to be fixed; otherwise OP[n] is left alone.
+
+   Emit the insn sequence as generated by GEN() with all elements of OPMASK
+   which are hard registers overlapping RMASK replaced by newly created
+   pseudo registers.  After the sequence has been emitted, emit insns that
+   move the contents of respective pseudos to their hard regs.  */
+
+bool
+avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op,
+                       unsigned opmask, unsigned rmask)
+{
+  const int n = 3;
+  rtx hreg[n];
+
+  /* It is legitimate for GEN to call this function, and in order not to
+     get self-recursive we use the following static kludge.  This is the
+     only way not to duplicate all expanders and to avoid ugly and
+     hard-to-maintain C-code instead of the much more appreciated RTL
+     representation as supplied by define_expand.  */
+  static bool lock = false;
+
+  gcc_assert (opmask < (1u << n));
+
+  if (lock)
+    return false;
+
+  avr_fix_operands (op, hreg, opmask, rmask);
+
+  lock = true;
+  emit_insn (gen (op[0], op[1], op[2]));
+  lock = false;
+
+  return avr_move_fixed_operands (op, hreg, opmask);
+}
+
+
 /* Worker function for movmemhi expander.
    XOP[0]  Destination as MEM:BLK
    XOP[1]  Source      "     "
diff --git a/gcc-4.9/gcc/config/avr/avr.md b/gcc-4.9/gcc/config/avr/avr.md
index 3bb2a914a..3f4181dab 100644
--- a/gcc-4.9/gcc/config/avr/avr.md
+++ b/gcc-4.9/gcc/config/avr/avr.md
@@ -1482,7 +1482,11 @@
    (set (reg:QI 22) (match_operand:QI 2 "register_operand" ""))
    (parallel [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22)))
               (clobber (reg:QI 22))])
-   (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))])
+   (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))]
+  ""
+  {
+    avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24));
+  })
 
 (define_insn "*mulqi3_call"
   [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22)))
@@ -2210,7 +2214,13 @@
    (parallel [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22)))
               (clobber (reg:HI 22))
               (clobber (reg:QI 21))])
-   (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  {
+    avr_fix_inputs (operands, (1 << 2), regmask (HImode, 24));
+  })
+
 
 (define_insn "*mulhi3_call"
   [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22)))
@@ -2248,6 +2258,10 @@
         emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
         DONE;
       }
+
+    if (avr_emit3_fix_outputs (gen_mulsi3, operands, 1 << 0,
+                               regmask (DImode, 18) | regmask (HImode, 26)))
+      DONE;
   })
 
 (define_insn_and_split "*mulsi3"
@@ -2287,7 +2301,23 @@
 
 ;; "muluqisi3"
 ;; "muluhisi3"
-(define_insn_and_split "mulu<mode>si3"
+(define_expand "mulu<mode>si3"
+  [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "")
+                   (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" ""))
+                            (match_operand:SI 2 "pseudo_register_or_const_int_operand" "")))
+              (clobber (reg:HI 26))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  {
+    avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u);
+    if (avr_emit3_fix_outputs (gen_mulu<mode>si3, operands, 1 << 0,
+                               regmask (DImode, 18) | regmask (HImode, 26)))
+      DONE;
+  })
+
+;; "*muluqisi3"
+;; "*muluhisi3"
+(define_insn_and_split "*mulu<mode>si3"
   [(set (match_operand:SI 0 "pseudo_register_operand"                           "=r")
         (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
                  (match_operand:SI 2 "pseudo_register_or_const_int_operand"      "rn")))
@@ -2323,7 +2353,23 @@
 
 ;; "mulsqisi3"
 ;; "mulshisi3"
-(define_insn_and_split "muls<mode>si3"
+(define_expand "muls<mode>si3"
+  [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "")
+                   (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" ""))
+                            (match_operand:SI 2 "pseudo_register_or_const_int_operand" "")))
+              (clobber (reg:HI 26))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  {
+    avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u);
+    if (avr_emit3_fix_outputs (gen_muls<mode>si3, operands, 1 << 0,
+                               regmask (DImode, 18) | regmask (HImode, 26)))
+      DONE;
+  })
+
+;; "*mulsqisi3"
+;; "*mulshisi3"
+(define_insn_and_split "*muls<mode>si3"
   [(set (match_operand:SI 0 "pseudo_register_operand"                           "=r")
         (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
                  (match_operand:SI 2 "pseudo_register_or_const_int_operand"      "rn")))
@@ -2366,7 +2412,22 @@
 
 ;; One-extend operand 1
 
-(define_insn_and_split "mulohisi3"
+(define_expand "mulohisi3"
+  [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "")
+                   (mult:SI (not:SI (zero_extend:SI
+                                     (not:HI (match_operand:HI 1 "pseudo_register_operand" ""))))
+                            (match_operand:SI 2 "pseudo_register_or_const_int_operand" "")))
+              (clobber (reg:HI 26))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  {
+    avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u);
+    if (avr_emit3_fix_outputs (gen_mulohisi3, operands, 1 << 0,
+                               regmask (DImode, 18) | regmask (HImode, 26)))
+      DONE;
+  })
+
+(define_insn_and_split "*mulohisi3"
   [(set (match_operand:SI 0 "pseudo_register_operand"                          "=r")
         (mult:SI (not:SI (zero_extend:SI
                           (not:HI (match_operand:HI 1 "pseudo_register_operand" "r"))))
@@ -2394,7 +2455,12 @@
                             (any_extend:SI (match_operand:HI 2 "register_operand" ""))))
               (clobber (reg:HI 26))
               (clobber (reg:DI 18))])]
-  "AVR_HAVE_MUL")
+  "AVR_HAVE_MUL"
+  {
+    if (avr_emit3_fix_outputs (gen_<extend_u>mulhisi3, operands, 1 << 0,
+                               regmask (DImode, 18) | regmask (HImode, 26)))
+      DONE;
+  })
 
 (define_expand "usmulhisi3"
   [(parallel [(set (match_operand:SI 0 "register_operand" "")
@@ -2402,7 +2468,12 @@
                             (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
               (clobber (reg:HI 26))
               (clobber (reg:DI 18))])]
-  "AVR_HAVE_MUL")
+  "AVR_HAVE_MUL"
+  {
+    if (avr_emit3_fix_outputs (gen_usmulhisi3, operands, 1 << 0,
+                               regmask (DImode, 18) | regmask (HImode, 26)))
+      DONE;
+  })
 
 ;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3"
 ;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3"
@@ -2474,7 +2545,10 @@
               (clobber (reg:HI 22))])
    (set (match_operand:HI 0 "register_operand" "")
         (reg:HI 24))]
-  "AVR_HAVE_MUL")
+  "AVR_HAVE_MUL"
+  {
+    avr_fix_inputs (operands, 1 << 2, regmask (HImode, 18));
+  })
 
 
 (define_insn "*mulsi3_call"
@@ -2697,6 +2771,10 @@
         emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1]));
         DONE;
       }
+
+    if (avr_emit3_fix_outputs (gen_mulpsi3, operands, 1u << 0,
+                               regmask (DImode, 18) | regmask (HImode, 26)))
+      DONE;
   })
 
 (define_insn "*umulqihipsi3"
@@ -2729,7 +2807,21 @@
   [(set_attr "length" "7")
    (set_attr "cc" "clobber")])
 
-(define_insn_and_split "mulsqipsi3"
+(define_expand "mulsqipsi3"
+  [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
+                   (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" ""))
+                             (match_operand:PSI 2 "pseudo_register_or_const_int_operand""")))
+              (clobber (reg:HI 26))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  {
+    avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u);
+    if (avr_emit3_fix_outputs (gen_mulsqipsi3, operands, 1 << 0,
+                               regmask (DImode, 18) | regmask (HImode, 26)))
+      DONE;
+  })
+
+(define_insn_and_split "*mulsqipsi3"
   [(set (match_operand:PSI 0 "pseudo_register_operand"                          "=r")
         (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r"))
                   (match_operand:PSI 2 "pseudo_register_or_const_int_operand"    "rn")))
@@ -4931,8 +5023,9 @@
         (unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r,z")]
                    UNSPEC_INDEX_JMP))
    (use (label_ref (match_operand 1 "" "")))
-   (clobber (match_dup 0))]
-  ""
+   (clobber (match_dup 0))
+   (clobber (const_int 0))]
+  "!AVR_HAVE_EIJMP_EICALL"
   "@
 	ijmp
 	push %A0\;push %B0\;ret
@@ -4941,6 +5034,19 @@
    (set_attr "isa" "rjmp,rjmp,jmp")
    (set_attr "cc" "none,none,clobber")])
 
+(define_insn "*tablejump.3byte-pc"
+  [(set (pc)
+        (unspec:HI [(reg:HI REG_Z)]
+                   UNSPEC_INDEX_JMP))
+   (use (label_ref (match_operand 0 "" "")))
+   (clobber (reg:HI REG_Z))
+   (clobber (reg:QI 24))]
+  "AVR_HAVE_EIJMP_EICALL"
+  "clr r24\;subi r30,pm_lo8(-(%0))\;sbci r31,pm_hi8(-(%0))\;sbci r24,pm_hh8(-(%0))\;jmp __tablejump2__"
+  [(set_attr "length" "6")
+   (set_attr "isa" "eijmp")
+   (set_attr "cc" "clobber")])
+
 
 (define_expand "casesi"
   [(parallel [(set (match_dup 6)
@@ -4958,15 +5064,31 @@
                       (label_ref (match_operand 4 "" ""))
                       (pc)))
 
-   (set (match_dup 6)
-        (plus:HI (match_dup 6) (label_ref (match_operand:HI 3 "" ""))))
+   (set (match_dup 10)
+        (match_dup 7))
 
-   (parallel [(set (pc) (unspec:HI [(match_dup 6)] UNSPEC_INDEX_JMP))
+   (parallel [(set (pc)
+                   (unspec:HI [(match_dup 10)] UNSPEC_INDEX_JMP))
               (use (label_ref (match_dup 3)))
-              (clobber (match_dup 6))])]
+              (clobber (match_dup 10))
+              (clobber (match_dup 8))])]
   ""
   {
     operands[6] = gen_reg_rtx (HImode);
+
+    if (AVR_HAVE_EIJMP_EICALL)
+      {
+        operands[7] = operands[6];
+        operands[8] = all_regs_rtx[24];
+        operands[10] = gen_rtx_REG (HImode, REG_Z);
+      }
+    else
+      {
+        operands[7] = gen_rtx_PLUS (HImode, operands[6], 
+                                    gen_rtx_LABEL_REF (VOIDmode, operands[3]));
+        operands[8] = const0_rtx;
+        operands[10] = operands[6];
+      }
   })
 
 
@@ -6034,6 +6156,7 @@
         emit_insn (gen_fmul_insn (operand0, operand1, operand2));
         DONE;
       }
+    avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24));
   })
 
 (define_insn "fmul_insn"
@@ -6077,6 +6200,7 @@
         emit_insn (gen_fmuls_insn (operand0, operand1, operand2));
         DONE;
       }
+    avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24));
   })
 
 (define_insn "fmuls_insn"
@@ -6120,6 +6244,7 @@
         emit_insn (gen_fmulsu_insn (operand0, operand1, operand2));
         DONE;
       }
+    avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24));
   })
 
 (define_insn "fmulsu_insn"
diff --git a/gcc-4.9/gcc/config/darwin-c.c b/gcc-4.9/gcc/config/darwin-c.c
index 892ba3547..7fe4b1f2e 100644
--- a/gcc-4.9/gcc/config/darwin-c.c
+++ b/gcc-4.9/gcc/config/darwin-c.c
@@ -571,21 +571,34 @@ find_subframework_header (cpp_reader *pfile, const char *header, cpp_dir **dirp)
 }
 
 /* Return the value of darwin_macosx_version_min suitable for the
-   __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro,
-   so '10.4.2' becomes 1040.  The lowest digit is always zero.
+   __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro, so '10.4.2'
+   becomes 1040 and '10.10.0' becomes 101000.  The lowest digit is
+   always zero, as is the second lowest for '10.10.x' and above.
    Print a warning if the version number can't be understood.  */
 static const char *
 version_as_macro (void)
 {
-  static char result[] = "1000";
+  static char result[7] = "1000";
+  int minorDigitIdx;
 
   if (strncmp (darwin_macosx_version_min, "10.", 3) != 0)
     goto fail;
   if (! ISDIGIT (darwin_macosx_version_min[3]))
     goto fail;
-  result[2] = darwin_macosx_version_min[3];
-  if (darwin_macosx_version_min[4] != '\0'
-      && darwin_macosx_version_min[4] != '.')
+
+  minorDigitIdx = 3;
+  result[2] = darwin_macosx_version_min[minorDigitIdx++];
+  if (ISDIGIT (darwin_macosx_version_min[minorDigitIdx]))
+  {
+    /* Starting with OS X 10.10, the macro ends '00' rather than '0',
+       i.e. 10.10.x becomes 101000 rather than 10100.  */
+    result[3] = darwin_macosx_version_min[minorDigitIdx++];
+    result[4] = '0';
+    result[5] = '0';
+    result[6] = '\0';
+  }
+  if (darwin_macosx_version_min[minorDigitIdx] != '\0'
+      && darwin_macosx_version_min[minorDigitIdx] != '.')
     goto fail;
 
   return result;
diff --git a/gcc-4.9/gcc/config/darwin-driver.c b/gcc-4.9/gcc/config/darwin-driver.c
index 8b6ae9391..541e10bc0 100644
--- a/gcc-4.9/gcc/config/darwin-driver.c
+++ b/gcc-4.9/gcc/config/darwin-driver.c
@@ -29,8 +29,8 @@ along with GCC; see the file COPYING3.  If not see
 #include <sys/sysctl.h>
 #include "xregex.h"
 
-static bool
-darwin_find_version_from_kernel (char *new_flag)
+static char *
+darwin_find_version_from_kernel (void)
 {
   char osversion[32];
   size_t osversion_len = sizeof (osversion) - 1;
@@ -39,6 +39,7 @@ darwin_find_version_from_kernel (char *new_flag)
   char minor_vers[6];
   char * version_p;
   char * version_pend;
+  char * new_flag;
 
   /* Determine the version of the running OS.  If we can't, warn user,
      and do nothing.  */
@@ -46,7 +47,7 @@ darwin_find_version_from_kernel (char *new_flag)
 	      &osversion_len, NULL, 0) == -1)
     {
       warning (0, "sysctl for kern.osversion failed: %m");
-      return false;
+      return NULL;
     }
 
   /* Try to parse the first two parts of the OS version number.  Warn
@@ -57,8 +58,6 @@ darwin_find_version_from_kernel (char *new_flag)
   version_p = osversion + 1;
   if (ISDIGIT (*version_p))
     major_vers = major_vers * 10 + (*version_p++ - '0');
-  if (major_vers > 4 + 9)
-    goto parse_failed;
   if (*version_p++ != '.')
     goto parse_failed;
   version_pend = strchr(version_p, '.');
@@ -74,17 +73,16 @@ darwin_find_version_from_kernel (char *new_flag)
   if (major_vers - 4 <= 4)
     /* On 10.4 and earlier, the old linker is used which does not
        support three-component system versions.  */
-    sprintf (new_flag, "10.%d", major_vers - 4);
+    asprintf (&new_flag, "10.%d", major_vers - 4);
   else
-    sprintf (new_flag, "10.%d.%s", major_vers - 4,
-	     minor_vers);
+    asprintf (&new_flag, "10.%d.%s", major_vers - 4, minor_vers);
 
-  return true;
+  return new_flag;
 
  parse_failed:
   warning (0, "couldn%'t understand kern.osversion %q.*s",
 	   (int) osversion_len, osversion);
-  return false;
+  return NULL;
 }
 
 #endif
@@ -105,7 +103,7 @@ darwin_default_min_version (unsigned int *decoded_options_count,
   const unsigned int argc = *decoded_options_count;
   struct cl_decoded_option *const argv = *decoded_options;
   unsigned int i;
-  static char new_flag[sizeof ("10.0.0") + 6];
+  const char *new_flag;
 
   /* If the command-line is empty, just return.  */
   if (argc <= 1)
@@ -142,16 +140,16 @@ darwin_default_min_version (unsigned int *decoded_options_count,
 
 #ifndef CROSS_DIRECTORY_STRUCTURE
 
- /* Try to find the version from the kernel, if we fail - we print a message 
-    and give up.  */
- if (!darwin_find_version_from_kernel (new_flag))
-   return;
+  /* Try to find the version from the kernel, if we fail - we print a message 
+     and give up.  */
+  new_flag = darwin_find_version_from_kernel ();
+  if (!new_flag)
+    return;
 
 #else
 
- /* For cross-compilers, default to the target OS version. */
-
- strncpy (new_flag, DEF_MIN_OSX_VERSION, sizeof (new_flag));
+  /* For cross-compilers, default to the target OS version. */
+  new_flag = DEF_MIN_OSX_VERSION;
 
 #endif /* CROSS_DIRECTORY_STRUCTURE */
 
@@ -165,7 +163,6 @@ darwin_default_min_version (unsigned int *decoded_options_count,
   memcpy (*decoded_options + 2, argv + 1,
 	  (argc - 1) * sizeof (struct cl_decoded_option));
   return;
-  
 }
 
 /* Translate -filelist and -framework options in *DECODED_OPTIONS
diff --git a/gcc-4.9/gcc/config/gnu-user.h b/gcc-4.9/gcc/config/gnu-user.h
index 2af0a54ed..1a9a487a8 100644
--- a/gcc-4.9/gcc/config/gnu-user.h
+++ b/gcc-4.9/gcc/config/gnu-user.h
@@ -118,7 +118,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 /* Link -lasan early on the command line.  For -static-libasan, don't link
    it for -shared link, the executable should be compiled with -static-libasan
    in that case, and for executable link link with --{,no-}whole-archive around
-   it to force everything into the executable.  And similarly for -ltsan.  */
+   it to force everything into the executable.  And similarly for -ltsan
+   and -llsan.  */
 #if defined(HAVE_LD_STATIC_DYNAMIC)
 #undef LIBASAN_EARLY_SPEC
 #define LIBASAN_EARLY_SPEC "%{!shared:libasan_preinit%O%s} " \
@@ -129,4 +130,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define LIBTSAN_EARLY_SPEC "%{static-libtsan:%{!shared:" \
   LD_STATIC_OPTION " --whole-archive -ltsan --no-whole-archive " \
   LD_DYNAMIC_OPTION "}}%{!static-libtsan:-ltsan}"
+#undef LIBLSAN_EARLY_SPEC
+#define LIBLSAN_EARLY_SPEC "%{static-liblsan:%{!shared:" \
+  LD_STATIC_OPTION " --whole-archive -llsan --no-whole-archive " \
+  LD_DYNAMIC_OPTION "}}%{!static-liblsan:-llsan}"
 #endif
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
index a598b8eef..54942d520 100644
--- a/gcc-4.9/gcc/config/i386/i386.c
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -2465,7 +2465,7 @@ struct ptt
   const int align_func;
 };
 
-/* This table must be in sync with enum processor_type in i386.h.  */ 
+/* This table must be in sync with enum processor_type in i386.h.  */
 static const struct ptt processor_target_table[PROCESSOR_max] =
 {
   {"generic", &generic_cost, 16, 10, 16, 10, 16},
@@ -3257,14 +3257,14 @@ ix86_option_override_internal (bool main_args_p,
 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
 	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
 	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
-	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE 
+	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
 	| PTA_XSAVEOPT | PTA_FSGSBASE},
      {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 
-	| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 
-	| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR 
+	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
+	| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
+	| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
 	| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
 	| PTA_MOVBE},
       {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
@@ -3334,8 +3334,9 @@ ix86_option_override_internal (bool main_args_p,
       /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
 	 on and OPTION_MASK_ABI_64 is off.  We turn off
 	 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
-	 -m64.  */
-      if (TARGET_LP64_P (opts->x_ix86_isa_flags))
+	 -m64 or OPTION_MASK_CODE16 is turned on by -m16.  */
+      if (TARGET_LP64_P (opts->x_ix86_isa_flags)
+	  || TARGET_16BIT_P (opts->x_ix86_isa_flags))
 	opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
 #endif
     }
@@ -3846,11 +3847,30 @@ ix86_option_override_internal (bool main_args_p,
         opts->x_target_flags |= MASK_NO_RED_ZONE;
     }
 
+  if (!global_options_set.x_flag_shrink_wrap_frame_pointer)
+    flag_shrink_wrap_frame_pointer = 1;
+
+  /* -fshrink-wrap-frame-pointer is an optimization based on
+     -fno-omit-frame-pointer mode, so it is only effective when
+     flag_omit_frame_pointer is false.
+     Frame pointer shrinkwrap may increase code size, so disable
+     it when optimize_size is true.  */
+  if (flag_omit_frame_pointer
+      || optimize == 0
+      || optimize_size)
+    flag_shrink_wrap_frame_pointer = 0;
+
+  /* If only no -mno-omit-leaf-frame-pointer is explicitly specified,
+     -fshrink_wrap_frame_pointer will enable omitting leaf frame
+     pointer by default.  */
+  if (flag_shrink_wrap_frame_pointer
+      && !(TARGET_OMIT_LEAF_FRAME_POINTER_P (opts_set->x_target_flags)
+	   && !TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)))
+    opts->x_target_flags |= MASK_OMIT_LEAF_FRAME_POINTER;
+
   /* Keep nonleaf frame pointers.  */
   if (opts->x_flag_omit_frame_pointer)
     opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
-  else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
-    opts->x_flag_omit_frame_pointer = 1;
 
   /* If we're doing fast math, we don't care about comparison order
      wrt NaNs.  This lets us use a shorter comparison sequence.  */
@@ -3969,7 +3989,7 @@ ix86_option_override_internal (bool main_args_p,
   /* For all chips supporting SSE2, -mfpmath=sse performs better than
      fpmath=387.  The second is however default at many targets since the
      extra 80bit precision of temporaries is considered to be part of ABI.
-     Overwrite the default at least for -ffast-math. 
+     Overwrite the default at least for -ffast-math.
      TODO: -mfpmath=both seems to produce same performing code with bit
      smaller binaries.  It is however not clear if register allocation is
      ready for this setting.
@@ -4291,7 +4311,7 @@ ix86_conditional_register_usage (void)
   c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
 	    : TARGET_64BIT ? (1 << 2)
 	    : (1 << 1));
-  
+
   CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
 
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
@@ -4840,9 +4860,9 @@ ix86_valid_target_attribute_p (tree fndecl,
 
   tree old_optimize = build_optimization_node (&global_options);
 
-  /* Get the optimization options of the current function.  */  
+  /* Get the optimization options of the current function.  */
   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
- 
+
   if (!func_optimize)
     func_optimize = old_optimize;
 
@@ -4850,7 +4870,7 @@ ix86_valid_target_attribute_p (tree fndecl,
   memset (&func_options, 0, sizeof (func_options));
   init_options_struct (&func_options, NULL);
   lang_hooks.init_options_struct (&func_options);
- 
+
   cl_optimization_restore (&func_options,
 			   TREE_OPTIMIZATION (func_optimize));
 
@@ -5007,6 +5027,10 @@ ix86_in_large_data_p (tree exp)
   if (TREE_CODE (exp) == FUNCTION_DECL)
     return false;
 
+  /* Automatic variables are never large data.  */
+  if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
+    return false;
+
   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
     {
       const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
@@ -5040,8 +5064,7 @@ ATTRIBUTE_UNUSED static section *
 x86_64_elf_select_section (tree decl, int reloc,
 			   unsigned HOST_WIDE_INT align)
 {
-  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
-      && ix86_in_large_data_p (decl))
+  if (ix86_in_large_data_p (decl))
     {
       const char *sname = NULL;
       unsigned int flags = SECTION_WRITE;
@@ -5127,8 +5150,7 @@ x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
 static void ATTRIBUTE_UNUSED
 x86_64_elf_unique_section (tree decl, int reloc)
 {
-  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
-      && ix86_in_large_data_p (decl))
+  if (ix86_in_large_data_p (decl))
     {
       const char *prefix = NULL;
       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
@@ -5197,7 +5219,7 @@ x86_elf_aligned_common (FILE *file,
 {
   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
       && size > (unsigned int)ix86_section_threshold)
-    fputs (".largecomm\t", file);
+    fputs ("\t.largecomm\t", file);
   else
     fputs (COMMON_ASM_OP, file);
   assemble_name (file, name);
@@ -5976,7 +5998,18 @@ ix86_function_type_abi (const_tree fntype)
       if (abi == SYSV_ABI)
 	{
 	  if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
-	    abi = MS_ABI;
+	    {
+	      if (TARGET_X32)
+		{
+		  static bool warned = false;
+		  if (!warned)
+		    {
+		      error ("X32 does not support ms_abi attribute");
+		      warned = true;
+		    }
+		}
+	      abi = MS_ABI;
+	    }
 	}
       else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
 	abi = SYSV_ABI;
@@ -6212,7 +6245,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 
    The midde-end can't deal with the vector types > 16 bytes.  In this
    case, we return the original mode and warn ABI change if CUM isn't
-   NULL. 
+   NULL.
 
    If INT_RETURN is true, warn ABI change if the vector mode isn't
    available for function return value.  */
@@ -9083,20 +9116,22 @@ ix86_frame_pointer_required (void)
   if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
     return true;
 
-  /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
-     turns off the frame pointer by default.  Turn it back on now if
-     we've not got a leaf function.  */
-  if (TARGET_OMIT_LEAF_FRAME_POINTER
-      && (!crtl->is_leaf
-	  || ix86_current_function_calls_tls_descriptor))
-    return true;
-
   if (crtl->profile && !flag_fentry)
     return true;
 
   return false;
 }
 
+/* Return true if the frame pointer of the function could be omitted.  */
+
+static bool
+ix86_can_omit_leaf_frame_pointer (void)
+{
+  return TARGET_OMIT_LEAF_FRAME_POINTER
+	 && (crtl->is_leaf
+	     && !ix86_current_function_calls_tls_descriptor);
+}
+
 /* Record that the current function accesses previous call frames.  */
 
 void
@@ -9569,7 +9604,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
     offset += UNITS_PER_WORD;
 
   /* Skip saved base pointer.  */
-  if (frame_pointer_needed)
+  if (frame_pointer_needed || frame_pointer_partially_needed)
     offset += UNITS_PER_WORD;
   frame->hfp_save_offset = offset;
 
@@ -10890,6 +10925,26 @@ ix86_expand_prologue (void)
 	  m->fs.fp_valid = true;
 	}
     }
+  else if (frame_pointer_partially_needed)
+    {
+      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+      if (fpset_needed_in_prologue)
+	{
+	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+	  /* Using sp as cfa_reg will involve more .cfi_def_cfa_offset for
+	     pushes in prologue, so use fp as cfa_reg to reduce .eh_frame
+	     size when possible.  */
+	  if (!any_fp_def)
+	    {
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	      if (m->fs.cfa_reg == stack_pointer_rtx)
+		m->fs.cfa_reg = hard_frame_pointer_rtx;
+	      m->fs.fp_offset = m->fs.sp_offset;
+	      m->fs.fp_valid = true;
+	    }
+	}
+    }
 
   if (!int_registers_saved)
     {
@@ -11067,6 +11122,10 @@ ix86_expand_prologue (void)
 	      if (sp_is_cfa_reg)
 		m->fs.cfa_offset += UNITS_PER_WORD;
 	      RTX_FRAME_RELATED_P (insn) = 1;
+	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+					 plus_constant (Pmode, stack_pointer_rtx,
+							-UNITS_PER_WORD)));
 	    }
 	}
 
@@ -11080,6 +11139,10 @@ ix86_expand_prologue (void)
 	      if (sp_is_cfa_reg)
 		m->fs.cfa_offset += UNITS_PER_WORD;
 	      RTX_FRAME_RELATED_P (insn) = 1;
+	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+					 plus_constant (Pmode, stack_pointer_rtx,
+							-UNITS_PER_WORD)));
 	    }
 	}
 
@@ -11231,6 +11294,34 @@ ix86_expand_prologue (void)
     emit_insn (gen_prologue_use (stack_pointer_rtx));
 }
 
+/* Get frame pointer setting insn based on frame state.  */
+static rtx
+ix86_set_fp_insn ()
+{
+  rtx r, seq;
+  struct ix86_frame frame;
+  HOST_WIDE_INT offset;
+
+  ix86_compute_frame_layout (&frame);
+  gcc_assert (frame_pointer_partially_needed);
+  offset = frame.stack_pointer_offset - frame.hard_frame_pointer_offset; 
+
+  if (TARGET_64BIT && (offset > 0x7fffffff))
+    {
+      r = gen_rtx_SET (DImode, hard_frame_pointer_rtx, GEN_INT (offset));
+      emit_insn (r);
+      r = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, stack_pointer_rtx);
+      r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r);
+    }
+  else
+    {
+      r = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+      r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r);
+    }
+  emit_insn (r);
+  return r;
+}
+
 /* Emit code to restore REG using a POP insn.  */
 
 static void
@@ -11415,7 +11506,11 @@ ix86_expand_epilogue (int style)
 	      || m->fs.sp_offset == frame.stack_pointer_offset);
 
   /* The FP must be valid if the frame pointer is present.  */
-  gcc_assert (frame_pointer_needed == m->fs.fp_valid);
+  if (!frame_pointer_partially_needed)
+    gcc_assert (frame_pointer_needed == m->fs.fp_valid);
+  else
+    gcc_assert (!(any_fp_def && m->fs.fp_valid));
+
   gcc_assert (!m->fs.fp_valid
 	      || m->fs.fp_offset == frame.hard_frame_pointer_offset);
 
@@ -11619,7 +11714,7 @@ ix86_expand_epilogue (int style)
 
   /* If we used a stack pointer and haven't already got rid of it,
      then do so now.  */
-  if (m->fs.fp_valid)
+  if (m->fs.fp_valid || frame_pointer_partially_needed)
     {
       /* If the stack pointer is valid and pointing at the frame
 	 pointer store address, then we only need a pop.  */
@@ -11627,15 +11722,20 @@ ix86_expand_epilogue (int style)
 	ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
       /* Leave results in shorter dependency chains on CPUs that are
 	 able to grok it fast.  */
-      else if (TARGET_USE_LEAVE
-	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
-	       || !cfun->machine->use_fast_prologue_epilogue)
+      else if (m->fs.fp_valid
+	       && (TARGET_USE_LEAVE
+		   || optimize_function_for_size_p (cfun)
+		   || !cfun->machine->use_fast_prologue_epilogue))
 	ix86_emit_leave ();
       else
         {
+	  rtx dest, offset;
+	  dest = (m->fs.fp_valid) ? hard_frame_pointer_rtx : stack_pointer_rtx;
+	  offset = (m->fs.fp_valid) ? const0_rtx :
+			GEN_INT (m->fs.sp_offset - frame.hfp_save_offset);
 	  pro_epilogue_adjust_stack (stack_pointer_rtx,
-				     hard_frame_pointer_rtx,
-				     const0_rtx, style, !using_drap);
+				     dest,
+				     offset, style, !using_drap);
 	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
         }
     }
@@ -11947,7 +12047,7 @@ ix86_output_function_nops_prologue_epilogue (FILE *file,
   fprintf (file, "\n");
 
   /* Switching back to text section.  */
-  switch_to_section (function_section (current_function_decl));
+  switch_to_section (current_function_section ());
   return true;
 }
 
@@ -12379,7 +12479,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
 	  addr = XEXP (addr, 0);
 	  if (CONST_INT_P (addr))
 	    return 0;
-	}	      
+	}
       else if (GET_CODE (addr) == AND
 	       && const_32bit_mask (XEXP (addr, 1), DImode))
 	{
@@ -12905,8 +13005,16 @@ legitimate_pic_address_disp_p (rtx disp)
 		return true;
 	    }
 	  else if (!SYMBOL_REF_FAR_ADDR_P (op0)
-	      	   && (SYMBOL_REF_LOCAL_P (op0)
-		       || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie
+		   && (SYMBOL_REF_LOCAL_P (op0)
+		       || (HAVE_LD_PIE_COPYRELOC
+			   && flag_pie
+			   && !(SYMBOL_REF_WEAK (op0)
+		  /* TODO:Temporary fix for weak defined symbols. Weak defined
+		     symbols in an executable cannot be overridden even with
+		     a non-weak symbol in a shared library.
+		     Revert after fix is checked in here:
+		     http://gcc.gnu.org/ml/gcc-patches/2015-02/msg00366.html*/
+				&& SYMBOL_REF_EXTERNAL_P (op0))
 			   && !SYMBOL_REF_FUNCTION_P (op0)))
 		   && ix86_cmodel != CM_LARGE_PIC)
 	    return true;
@@ -13010,7 +13118,7 @@ ix86_legitimize_reload_address (rtx x,
 	      (reg:DI 2 cx))
 
      This RTX is rejected from ix86_legitimate_address_p due to
-     non-strictness of base register 97.  Following this rejection, 
+     non-strictness of base register 97.  Following this rejection,
      reload pushes all three components into separate registers,
      creating invalid memory address RTX.
 
@@ -13025,7 +13133,7 @@ ix86_legitimize_reload_address (rtx x,
       rtx base, index;
       bool something_reloaded = false;
 
-      base = XEXP (XEXP (x, 0), 1);      
+      base = XEXP (XEXP (x, 0), 1);
       if (!REG_OK_FOR_BASE_STRICT_P (base))
 	{
 	  push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
@@ -13929,7 +14037,7 @@ get_dllimport_decl (tree decl, bool beimport)
 #ifdef SUB_TARGET_RECORD_STUB
       SUB_TARGET_RECORD_STUB (name);
 #endif
-    }      
+    }
 
   rtl = gen_const_mem (Pmode, rtl);
   set_mem_alias_set (rtl, ix86_GOT_alias_set ());
@@ -13976,7 +14084,7 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg)
   return x;
 }
 
-/* Expand SYMBOL into its corresponding dllimport or refptr symbol.  WANT_REG 
+/* Expand SYMBOL into its corresponding dllimport or refptr symbol.  WANT_REG
    is true if we require the result be a register.  */
 
 static rtx
@@ -14749,7 +14857,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
       if (mode == CCmode)
 	suffix = "b";
       else if (mode == CCCmode)
-	suffix = "c";
+	suffix = fp ? "b" : "c";
       else
 	gcc_unreachable ();
       break;
@@ -14772,9 +14880,9 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
       break;
     case GEU:
       if (mode == CCmode)
-	suffix = fp ? "nb" : "ae";
+	suffix = "nb";
       else if (mode == CCCmode)
-	suffix = "nc";
+	suffix = fp ? "nb" : "nc";
       else
 	gcc_unreachable ();
       break;
@@ -15109,7 +15217,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	    case 2:
 	      putc ('w', file);
 	      break;
-  
+
 	    case 4:
 	      putc ('l', file);
 	      break;
@@ -16408,7 +16516,7 @@ ix86_mode_needed (int entity, rtx insn)
 }
 
 /* Check if a 256bit AVX register is referenced in stores.   */
- 
+
 static void
 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
  {
@@ -16417,7 +16525,7 @@ ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
       bool *used = (bool *) data;
       *used = true;
     }
- } 
+ }
 
 /* Calculate mode of upper 128bit AVX registers after the insn.  */
 
@@ -17463,7 +17571,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
 	    t = gen_reg_rtx (V4SFmode);
 	  else
 	    t = op0;
-	    
+
 	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
 	    emit_move_insn (t, CONST0_RTX (V4SFmode));
 	  else
@@ -18527,7 +18635,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
 
   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-  
+
   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
 }
 
@@ -21852,7 +21960,7 @@ ix86_expand_vec_perm (rtx operands[])
 
   if (TARGET_XOP)
     {
-      /* The XOP VPPERM insn supports three inputs.  By ignoring the 
+      /* The XOP VPPERM insn supports three inputs.  By ignoring the
 	 one_operand_shuffle special case, we avoid creating another
 	 set of constant vectors in memory.  */
       one_operand_shuffle = false;
@@ -23708,7 +23816,7 @@ expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
    DONE_LABEL is a label after the whole copying sequence. The label is created
    on demand if *DONE_LABEL is NULL.
    MIN_SIZE is minimal size of block copied.  This value gets adjusted for new
-   bounds after the initial copies. 
+   bounds after the initial copies.
 
    DESTMEM/SRCMEM are memory expressions pointing to the copies block,
    DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
@@ -24013,7 +24121,7 @@ expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
   return dst;
 }
 
-/* Return true if ALG can be used in current context.  
+/* Return true if ALG can be used in current context.
    Assume we expand memset if MEMSET is true.  */
 static bool
 alg_usable_p (enum stringop_alg alg, bool memset)
@@ -24136,7 +24244,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 		      *noalign = alg_noalign;
 		      return alg;
 		    }
-		  break;
+		  else if (!any_alg_usable_p)
+		    break;
 		}
 	      else if (alg_usable_p (candidate, memset))
 		{
@@ -24174,9 +24283,10 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
       alg = decide_alg (count, max / 2, min_size, max_size, memset,
 			zero_memset, dynamic_check, noalign);
       gcc_assert (*dynamic_check == -1);
-      gcc_assert (alg != libcall);
       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
 	*dynamic_check = max;
+      else
+	gcc_assert (alg != libcall);
       return alg;
     }
   return (alg_usable_p (algs->unknown_size, memset)
@@ -24336,7 +24446,7 @@ promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
 	with specified algorithm.
 
      4) Epilogue: code copying tail of the block that is too small to be
-	handled by main body (or up to size guarded by prologue guard). 
+	handled by main body (or up to size guarded by prologue guard).
 
   Misaligned move sequence
 
@@ -24531,7 +24641,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
 
   /* Do the cheap promotion to allow better CSE across the
      main loop and epilogue (ie one load of the big constant in the
-     front of all code.  
+     front of all code.
      For now the misaligned move sequences do not have fast path
      without broadcasting.  */
   if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
@@ -25103,13 +25213,19 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
     }
   else
     {
-      /* Static functions and indirect calls don't need the pic register.  */
+      /* Static functions and indirect calls don't need the pic register.  Also,
+	 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
+	 it an indirect call.  */
       if (flag_pic
 	  && (!TARGET_64BIT
 	      || (ix86_cmodel == CM_LARGE_PIC
 		  && DEFAULT_ABI != MS_ABI))
 	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
-	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
+	  && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
+	  && flag_plt
+	  && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
+	      || !lookup_attribute ("noplt",
+		     DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
 	use_reg (&use, pic_offset_table_rtx);
     }
 
@@ -25173,6 +25289,31 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
   return call;
 }
 
+/* Return true if the function being called was marked with attribute "noplt"
+   or using -fno-plt and we are compiling for non-PIC and x86_64.  We need to
+   handle the non-PIC case in the backend because there is no easy interface
+   for the front-end to force non-PLT calls to use the GOT.  This is currently
+   used only with 64-bit ELF targets to call the function marked "noplt"
+   indirectly.  */
+
+static bool
+ix86_nopic_noplt_attribute_p (rtx call_op)
+{
+  if (flag_pic || ix86_cmodel == CM_LARGE
+      || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
+      || SYMBOL_REF_LOCAL_P (call_op))
+    return false;
+
+  tree symbol_decl = SYMBOL_REF_DECL (call_op);
+
+  if (!flag_plt
+      || (symbol_decl != NULL_TREE
+          && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
+    return true;
+
+  return false;
+}
+
 /* Output the assembly for a call instruction.  */
 
 const char *
@@ -25184,7 +25325,9 @@ ix86_output_call_insn (rtx insn, rtx call_op)
 
   if (SIBLING_CALL_P (insn))
     {
-      if (direct_p)
+      if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
+	xasm = "jmp\t*%p0@GOTPCREL(%%rip)";
+      else if (direct_p)
 	xasm = "jmp\t%P0";
       /* SEH epilogue detection requires the indirect branch case
 	 to include REX.W.  */
@@ -25236,7 +25379,9 @@ ix86_output_call_insn (rtx insn, rtx call_op)
 	seh_nop_p = true;
     }
 
-  if (direct_p)
+  if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
+    xasm = "call\t*%p0@GOTPCREL(%%rip)";
+  else if (direct_p)
     xasm = "call\t%P0";
   else
     xasm = "call\t%A0";
@@ -26506,7 +26651,7 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail)
 		       using topological ordering in the region.  */
 		    if (rgn == CONTAINING_RGN (e->src->index)
 			&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
-		      add_dependee_for_func_arg (first_arg, e->src); 
+		      add_dependee_for_func_arg (first_arg, e->src);
 		  }
 	      }
 	    insn = first_arg;
@@ -26974,7 +27119,7 @@ ix86_local_alignment (tree exp, enum machine_mode mode,
      other unit can not rely on the alignment.
 
      Exclude va_list type.  It is the common case of local array where
-     we can not benefit from the alignment.  
+     we can not benefit from the alignment.
 
      TODO: Probably one should optimize for size only when var is not escaping.  */
   if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
@@ -31443,7 +31588,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
 	     		 build_fold_addr_expr (version_decl));
   result_var = create_tmp_var (ptr_type_node, NULL);
-  convert_stmt = gimple_build_assign (result_var, convert_expr); 
+  convert_stmt = gimple_build_assign (result_var, convert_expr);
   return_stmt = gimple_build_return (result_var);
 
   if (predicate_chain == NULL_TREE)
@@ -31470,7 +31615,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
       gimple_seq_add_stmt (&gseq, call_cond_stmt);
 
       predicate_chain = TREE_CHAIN (predicate_chain);
-      
+
       if (and_expr_var == NULL)
         and_expr_var = cond_var;
       else
@@ -31511,7 +31656,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
   gimple_set_bb (return_stmt, bb2);
 
   bb3 = e23->dest;
-  make_edge (bb1, bb3, EDGE_FALSE_VALUE); 
+  make_edge (bb1, bb3, EDGE_FALSE_VALUE);
 
   remove_edge (e23);
   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
@@ -31563,7 +31708,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
     P_FMA4,
     P_XOP,
     P_PROC_XOP,
-    P_FMA,    
+    P_FMA,
     P_PROC_FMA,
     P_AVX2,
     P_PROC_AVX2
@@ -31628,11 +31773,11 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
       cl_target_option_save (&cur_target, &global_options);
       target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
 						      &global_options_set);
-    
+
       gcc_assert (target_node);
       new_target = TREE_TARGET_OPTION (target_node);
       gcc_assert (new_target);
-      
+
       if (new_target->arch_specified && new_target->arch > 0)
 	{
 	  switch (new_target->arch)
@@ -31701,18 +31846,18 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
 	      arg_str = "bdver4";
 	      priority = P_PROC_AVX2;
 	      break;
-	    }  
-	}    
-    
+	    }
+	}
+
       cl_target_option_restore (&global_options, &cur_target);
-	
+
       if (predicate_list && arg_str == NULL)
 	{
 	  error_at (DECL_SOURCE_LOCATION (decl),
 	    	"No dispatcher found for the versioning attributes");
 	  return 0;
 	}
-    
+
       if (predicate_list)
 	{
           predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
@@ -31779,7 +31924,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
       *predicate_list = predicate_chain;
     }
 
-  return priority; 
+  return priority;
 }
 
 /* This compares the priority of target features in function DECL1
@@ -31798,7 +31943,7 @@ ix86_compare_version_priority (tree decl1, tree decl2)
 
 /* V1 and V2 point to function versions with different priorities
    based on the target ISA.  This function compares their priorities.  */
- 
+
 static int
 feature_compare (const void *v1, const void *v2)
 {
@@ -32111,12 +32256,12 @@ ix86_function_versions (tree fn1, tree fn2)
     result = true;
 
   XDELETEVEC (target1);
-  XDELETEVEC (target2); 
-  
+  XDELETEVEC (target2);
+
   return result;
 }
 
-static tree 
+static tree
 ix86_mangle_decl_assembler_name (tree decl, tree id)
 {
   /* For function version, add the target suffix to the assembler name.  */
@@ -32186,7 +32331,7 @@ make_dispatcher_decl (const tree decl)
   fn_type = TREE_TYPE (decl);
   func_type = build_function_type (TREE_TYPE (fn_type),
 				   TYPE_ARG_TYPES (fn_type));
-  
+
   func_decl = build_fn_decl (func_name, func_type);
   XDELETEVEC (func_name);
   TREE_USED (func_decl) = 1;
@@ -32199,7 +32344,7 @@ make_dispatcher_decl (const tree decl)
   /* This will be of type IFUNCs have to be externally visible.  */
   TREE_PUBLIC (func_decl) = 1;
 
-  return func_decl;  
+  return func_decl;
 }
 
 #endif
@@ -32236,7 +32381,7 @@ ix86_get_function_versions_dispatcher (void *decl)
   tree dispatch_decl = NULL;
 
   struct cgraph_function_version_info *default_version_info = NULL;
- 
+
   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
 
   node = cgraph_get_node (fn);
@@ -32244,7 +32389,7 @@ ix86_get_function_versions_dispatcher (void *decl)
 
   node_v = get_cgraph_node_version (node);
   gcc_assert (node_v != NULL);
- 
+
   if (node_v->dispatcher_resolver != NULL)
     return node_v->dispatcher_resolver;
 
@@ -32409,7 +32554,7 @@ make_resolver_func (const tree default_decl,
 
   gcc_assert (dispatch_decl != NULL);
   /* Mark dispatch_decl as "ifunc" with resolver as resolver_name.  */
-  DECL_ATTRIBUTES (dispatch_decl) 
+  DECL_ATTRIBUTES (dispatch_decl)
     = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
 
   /* Create the alias for dispatch to resolver here.  */
@@ -32424,7 +32569,7 @@ make_resolver_func (const tree default_decl,
    provide the code to dispatch the right function at run-time.  NODE points
    to the dispatcher decl whose body will be created.  */
 
-static tree 
+static tree
 ix86_generate_version_dispatcher_body (void *node_p)
 {
   tree resolver_decl;
@@ -32476,7 +32621,7 @@ ix86_generate_version_dispatcher_body (void *node_p)
     }
 
   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
-  rebuild_cgraph_edges (); 
+  rebuild_cgraph_edges ();
   pop_cfun ();
   return resolver_decl;
 }
@@ -32587,7 +32732,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
     M_AMDFAM15H,
     M_INTEL_SILVERMONT,
     M_AMD_BTVER1,
-    M_AMD_BTVER2,    
+    M_AMD_BTVER2,
     M_CPU_SUBTYPE_START,
     M_INTEL_COREI7_NEHALEM,
     M_INTEL_COREI7_WESTMERE,
@@ -32627,13 +32772,13 @@ fold_builtin_cpu (tree fndecl, tree *args)
       {"barcelona", M_AMDFAM10H_BARCELONA},
       {"shanghai", M_AMDFAM10H_SHANGHAI},
       {"istanbul", M_AMDFAM10H_ISTANBUL},
-      {"btver1", M_AMD_BTVER1},      
+      {"btver1", M_AMD_BTVER1},
       {"amdfam15h", M_AMDFAM15H},
       {"bdver1", M_AMDFAM15H_BDVER1},
       {"bdver2", M_AMDFAM15H_BDVER2},
       {"bdver3", M_AMDFAM15H_BDVER3},
       {"bdver4", M_AMDFAM15H_BDVER4},
-      {"btver2", M_AMD_BTVER2},      
+      {"btver2", M_AMD_BTVER2},
     };
 
   static struct _isa_names_table
@@ -35238,9 +35383,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
       {
 	/* Make it call __cpu_indicator_init in libgcc. */
 	tree call_expr, fndecl, type;
-        type = build_function_type_list (integer_type_node, NULL_TREE); 
+        type = build_function_type_list (integer_type_node, NULL_TREE);
 	fndecl = build_fn_decl ("__cpu_indicator_init", type);
-	call_expr = build_call_expr (fndecl, 0); 
+	call_expr = build_call_expr (fndecl, 0);
 	return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
       }
     case IX86_BUILTIN_CPU_IS:
@@ -41332,8 +41477,8 @@ ix86_encode_section_info (tree decl, rtx rtl, int first)
 {
   default_encode_section_info (decl, rtl, first);
 
-  if (TREE_CODE (decl) == VAR_DECL
-      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+  if (((TREE_CODE (decl) == VAR_DECL && is_global_var (decl))
+       || TREE_CODE(decl) == STRING_CST)
       && ix86_in_large_data_p (decl))
     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
 }
@@ -42957,8 +43102,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
 	      op0 = gen_lowpart (V4DImode, d->op0);
 	      op1 = gen_lowpart (V4DImode, d->op1);
 	      rperm[0]
-		= GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
-			   || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
+		= GEN_INT ((d->perm[0] / (nelt / 2))
+			   | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
 	      emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
 	      if (target != d->target)
 		emit_move_insn (d->target, gen_lowpart (d->vmode, target));
@@ -47277,6 +47422,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2)
 #undef TARGET_PROFILE_BEFORE_PROLOGUE
 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
 
+#undef TARGET_SET_FP_INSN
+#define TARGET_SET_FP_INSN ix86_set_fp_insn
+
 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
 
@@ -47562,6 +47710,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2)
 #undef TARGET_FRAME_POINTER_REQUIRED
 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
 
+#undef TARGET_CAN_OMIT_LEAF_FRAME_POINTER
+#define TARGET_CAN_OMIT_LEAF_FRAME_POINTER ix86_can_omit_leaf_frame_pointer
+
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
 
@@ -47601,6 +47752,8 @@ adjacent_mem_locations (rtx mem1, rtx mem2)
 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
   ix86_float_exceptions_rounding_supported_p
 
+#undef TARGET_STRICT_ALIGN
+#define TARGET_STRICT_ALIGN true
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md
index 39d395875..2369e4b40 100644
--- a/gcc-4.9/gcc/config/i386/i386.md
+++ b/gcc-4.9/gcc/config/i386/i386.md
@@ -12159,18 +12159,52 @@
    (set_attr "mode" "<MODE>")])
 
 ;; BMI2 instructions.
-(define_insn "bmi2_bzhi_<mode>3"
+(define_expand "bmi2_bzhi_<mode>3"
+  [(parallel
+    [(set (match_operand:SWI48 0 "register_operand")
+	  (zero_extract:SWI48
+	    (match_operand:SWI48 1 "nonimmediate_operand")
+	    (umin:SWI48
+	      (and:SWI48 (match_operand:SWI48 2 "register_operand")
+			 (const_int 255))
+	      (match_dup 3))
+	    (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_BMI2"
+  "operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);")
+
+(define_insn "*bmi2_bzhi_<mode>3"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
-	(and:SWI48 (lshiftrt:SWI48 (const_int -1)
-				   (match_operand:SWI48 2 "register_operand" "r"))
-		   (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+	(zero_extract:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+	  (umin:SWI48
+	    (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
+		       (const_int 255))
+	    (match_operand:SWI48 3 "const_int_operand" "n"))
+	  (const_int 0)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI2"
+  "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
   "bzhi\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "bitmanip")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
 
+(define_mode_attr k [(SI "k") (DI "q")])
+(define_insn "*bmi2_bzhi_<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(zero_extract:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+	  (umin:SWI48
+	    (zero_extend:SWI48 (match_operand:QI 2 "register_operand" "r"))
+	    (match_operand:SWI48 3 "const_int_operand" "n"))
+	  (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+  "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "bmi2_pdep_<mode>3"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
         (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
@@ -13518,7 +13552,8 @@
    (set (reg:CCFP FPSR_REG)
 	(unspec:CCFP [(match_dup 2) (match_dup 3)]
 		     UNSPEC_C2_FLAG))]
-  "TARGET_USE_FANCY_MATH_387"
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only"
   "fprem"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")])
@@ -13527,7 +13562,8 @@
   [(use (match_operand:XF 0 "register_operand"))
    (use (match_operand:XF 1 "general_operand"))
    (use (match_operand:XF 2 "general_operand"))]
-  "TARGET_USE_FANCY_MATH_387"
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only"
 {
   rtx label = gen_label_rtx ();
 
@@ -13550,7 +13586,8 @@
   [(use (match_operand:MODEF 0 "register_operand"))
    (use (match_operand:MODEF 1 "general_operand"))
    (use (match_operand:MODEF 2 "general_operand"))]
-  "TARGET_USE_FANCY_MATH_387"
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only"
 {
   rtx (*gen_truncxf) (rtx, rtx);
 
@@ -13589,7 +13626,8 @@
    (set (reg:CCFP FPSR_REG)
 	(unspec:CCFP [(match_dup 2) (match_dup 3)]
 		     UNSPEC_C2_FLAG))]
-  "TARGET_USE_FANCY_MATH_387"
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only"
   "fprem1"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")])
@@ -13598,7 +13636,8 @@
   [(use (match_operand:XF 0 "register_operand"))
    (use (match_operand:XF 1 "general_operand"))
    (use (match_operand:XF 2 "general_operand"))]
-  "TARGET_USE_FANCY_MATH_387"
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only"
 {
   rtx label = gen_label_rtx ();
 
@@ -13621,7 +13660,8 @@
   [(use (match_operand:MODEF 0 "register_operand"))
    (use (match_operand:MODEF 1 "general_operand"))
    (use (match_operand:MODEF 2 "general_operand"))]
-  "TARGET_USE_FANCY_MATH_387"
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only"
 {
   rtx (*gen_truncxf) (rtx, rtx);
 
diff --git a/gcc-4.9/gcc/config/i386/i386.opt b/gcc-4.9/gcc/config/i386/i386.opt
index 1e00b660e..f64a9e1eb 100644
--- a/gcc-4.9/gcc/config/i386/i386.opt
+++ b/gcc-4.9/gcc/config/i386/i386.opt
@@ -108,10 +108,6 @@ int x_ix86_dump_tunes
 TargetSave
 int x_ix86_force_align_arg_pointer
 
-;; -mcopyrelocs=
-TargetSave
-int x_ix86_copyrelocs
-
 ;; -mforce-drap= 
 TargetSave
 int x_ix86_force_drap
@@ -295,10 +291,6 @@ mfancy-math-387
 Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
 Generate sin, cos, sqrt for FPU
 
-mcopyrelocs
-Target Report Var(ix86_pie_copyrelocs) Init(0)
-Assume copy relocations support for pie builds.
-
 mforce-drap
 Target Report Var(ix86_force_drap)
 Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack
diff --git a/gcc-4.9/gcc/config/i386/linux.h b/gcc-4.9/gcc/config/i386/linux.h
index 27d68b5db..bfc7746bc 100644
--- a/gcc-4.9/gcc/config/i386/linux.h
+++ b/gcc-4.9/gcc/config/i386/linux.h
@@ -24,18 +24,3 @@ along with GCC; see the file COPYING3.  If not see
 #define RUNTIME_ROOT_PREFIX ""
 #endif
 #define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
-
-/* These may be provided by config/linux-grtev*.h.  */
-#ifndef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS
-#endif
-
-#undef  SUBTARGET_EXTRA_SPECS
-#ifndef SUBTARGET_EXTRA_SPECS_STR
-#define SUBTARGET_EXTRA_SPECS \
-  LINUX_GRTE_EXTRA_SPECS
-#else
-#define SUBTARGET_EXTRA_SPECS \
-  LINUX_GRTE_EXTRA_SPECS \
-  SUBTARGET_EXTRA_SPECS_STR
-#endif
diff --git a/gcc-4.9/gcc/config/i386/linux64.h b/gcc-4.9/gcc/config/i386/linux64.h
index 5124a341b..b71616fea 100644
--- a/gcc-4.9/gcc/config/i386/linux64.h
+++ b/gcc-4.9/gcc/config/i386/linux64.h
@@ -34,12 +34,3 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2"
 #define GLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/libx32/ld-linux-x32.so.2"
 
-/* These may be provided by config/linux-grtev*.h.  */
-#ifndef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS
-#endif
-
-#undef  SUBTARGET_EXTRA_SPECS
-#define SUBTARGET_EXTRA_SPECS \
-  LINUX_GRTE_EXTRA_SPECS
-
diff --git a/gcc-4.9/gcc/config/i386/mmx.md b/gcc-4.9/gcc/config/i386/mmx.md
index 214acde23..a7d2a7eec 100644
--- a/gcc-4.9/gcc/config/i386/mmx.md
+++ b/gcc-4.9/gcc/config/i386/mmx.md
@@ -600,20 +600,25 @@
 ;; Avoid combining registers from different units in a single alternative,
 ;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "*vec_extractv2sf_1"
-  [(set (match_operand:SF 0 "nonimmediate_operand"     "=y,x,y,x,f,r")
+  [(set (match_operand:SF 0 "nonimmediate_operand"     "=y,x,x,y,x,f,r")
 	(vec_select:SF
-	  (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o")
+	  (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o")
 	  (parallel [(const_int 1)])))]
   "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    punpckhdq\t%0, %0
-   unpckhps\t%0, %0
+   %vmovshdup\t{%1, %0|%0, %1}
+   shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
    #
    #
    #
    #"
-  [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov")
-   (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")])
+  [(set_attr "isa" "*,sse3,noavx,*,*,*,*")
+   (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
+   (set_attr "length_immediate" "*,*,1,*,*,*,*")
+   (set_attr "prefix_rep" "*,1,*,*,*,*,*")
+   (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig")
+   (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")])
 
 (define_split
   [(set (match_operand:SF 0 "register_operand")
@@ -1288,26 +1293,23 @@
 ;; Avoid combining registers from different units in a single alternative,
 ;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "*vec_extractv2si_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand"     "=y,x,x,x,y,x,r")
+  [(set (match_operand:SI 0 "nonimmediate_operand"     "=y,x,x,y,x,r")
 	(vec_select:SI
-	  (match_operand:V2SI 1 "nonimmediate_operand" " 0,0,x,0,o,o,o")
+	  (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o")
 	  (parallel [(const_int 1)])))]
   "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    punpckhdq\t%0, %0
-   punpckhdq\t%0, %0
-   pshufd\t{$85, %1, %0|%0, %1, 85}
-   unpckhps\t%0, %0
+   %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
+   shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
    #
    #
    #"
-  [(set (attr "isa")
-     (if_then_else (eq_attr "alternative" "1,2")
-       (const_string "sse2")
-       (const_string "*")))
-   (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov")
-   (set_attr "length_immediate" "*,*,1,*,*,*,*")
-   (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")])
+  [(set_attr "isa" "*,sse2,noavx,*,*,*")
+   (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov")
+   (set_attr "length_immediate" "*,1,1,*,*,*")
+   (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig")
+   (set_attr "mode" "DI,TI,V4SF,SI,SI,SI")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand")
diff --git a/gcc-4.9/gcc/config/i386/x86-tune.def b/gcc-4.9/gcc/config/i386/x86-tune.def
index ddf1d21c9..215c71f4d 100644
--- a/gcc-4.9/gcc/config/i386/x86-tune.def
+++ b/gcc-4.9/gcc/config/i386/x86-tune.def
@@ -97,25 +97,25 @@ DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
    conditional jump instruction for 32 bit TARGET.
    FIXME: revisit for generic.  */
 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32",
-          m_CORE_ALL | m_BDVER)
+          m_GENERIC | m_CORE_ALL | m_BDVER)
 
 /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent
    conditional jump instruction for TARGET_64BIT.
    FIXME: revisit for generic.  */
 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64",
-          m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
+          m_GENERIC | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
 
 /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a
    subsequent conditional jump instruction when the condition jump
    check sign flag (SF) or overflow flag (OF).  */
 DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
-          m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
+          m_GENERIC | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
 
 /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
    jump instruction when the alu instruction produces the CCFLAG consumed by
    the conditional jump instruction. */
 DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
-          m_SANDYBRIDGE | m_HASWELL)
+          m_GENERIC | m_SANDYBRIDGE | m_HASWELL)
 
 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
    during reassociation of integer computation.  */
diff --git a/gcc-4.9/gcc/config/ia64/ia64.c b/gcc-4.9/gcc/config/ia64/ia64.c
index 41adc4adc..4ec3e3abe 100644
--- a/gcc-4.9/gcc/config/ia64/ia64.c
+++ b/gcc-4.9/gcc/config/ia64/ia64.c
@@ -602,11 +602,6 @@ static const struct attribute_spec ia64_attribute_table[] =
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
 
-/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
-   in an order different from the specified program order.  */
-#undef TARGET_RELAXED_ORDERING
-#define TARGET_RELAXED_ORDERING true
-
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
 #undef TARGET_LEGITIMATE_ADDRESS_P
diff --git a/gcc-4.9/gcc/config/linux-grte.h b/gcc-4.9/gcc/config/linux-grte.h
index 31e8a94ce..e69de29bb 100644
--- a/gcc-4.9/gcc/config/linux-grte.h
+++ b/gcc-4.9/gcc/config/linux-grte.h
@@ -1,41 +0,0 @@
-/* Definitions for Linux-based GRTE (Google RunTime Environment).
-   Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc.
-   Contributed by Chris Demetriou and Ollie Wild.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* Overrides LIB_SPEC from gnu-user.h.  */
-#undef	LIB_SPEC
-#define LIB_SPEC \
-  "%{pthread:-lpthread} \
-   %{shared:-lc} \
-   %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}"
-
-/* When GRTE links statically, it needs its NSS and resolver libraries
-   linked in as well.  Note that when linking statically, these are
-   enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC.  */
-#undef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS \
-  { "libc", "%{static:%(libc_static);:-lc}" }, \
-  { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \
-  { "libc_static", "-lc -lresolv" }, \
-  { "libc_p_static", "-lc_p -lresolv_p" },
diff --git a/gcc-4.9/gcc/config/linux.c b/gcc-4.9/gcc/config/linux.c
index 7c3c5a461..cdb2b5bdd 100644
--- a/gcc-4.9/gcc/config/linux.c
+++ b/gcc-4.9/gcc/config/linux.c
@@ -23,8 +23,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "linux-protos.h"
 
-/* Android does not support GNU indirect functions.  */
-
 bool
 linux_has_ifunc_p (void)
 {
diff --git a/gcc-4.9/gcc/config/msp430/msp430.md b/gcc-4.9/gcc/config/msp430/msp430.md
index 5e890eced..3f29d6d62 100644
--- a/gcc-4.9/gcc/config/msp430/msp430.md
+++ b/gcc-4.9/gcc/config/msp430/msp430.md
@@ -559,7 +559,7 @@
   [(set (match_operand:PSI 0 "nonimmediate_operand" "=r")
 	(subreg:PSI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")) 0))]
   "TARGET_LARGE"
-  "RLAM #4, %0 { RRAM #4, %0"
+  "RLAM.A #4, %0 { RRAM.A #4, %0"
 )
 
 ;; Look for cases where integer/pointer conversions are suboptimal due
@@ -587,7 +587,7 @@
 	(ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0"))
 		   (const_int 1)))]
   "TARGET_LARGE"
-  "RLAM #4, %0 { RRAM #3, %0"
+  "RLAM.A #4, %0 { RRAM.A #3, %0"
 )
 
 (define_insn "extend_and_shift2_hipsi2"
@@ -595,7 +595,7 @@
 	(ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0"))
 		   (const_int 2)))]
   "TARGET_LARGE"
-  "RLAM #4, %0 { RRAM #2, %0"
+  "RLAM.A #4, %0 { RRAM.A #2, %0"
 )
 
 ; Nasty - we are sign-extending a 20-bit PSI value in one register into
diff --git a/gcc-4.9/gcc/config/nios2/nios2.c b/gcc-4.9/gcc/config/nios2/nios2.c
index cdd2e6bc9..047b615ba 100644
--- a/gcc-4.9/gcc/config/nios2/nios2.c
+++ b/gcc-4.9/gcc/config/nios2/nios2.c
@@ -2135,6 +2135,18 @@ nios2_output_dwarf_dtprel (FILE *file, int size, rtx x)
   fprintf (file, ")");
 }
 
+/* Implemet TARGET_ASM_FILE_END.  */
+
+static void
+nios2_asm_file_end (void)
+{
+  /* The Nios II Linux stack is mapped non-executable by default, so add a
+     .note.GNU-stack section for switching to executable stacks only when
+     trampolines are generated.  */
+  if (TARGET_LINUX_ABI && trampolines_created)
+    file_end_indicate_exec_stack ();
+}
+
 /* Implement TARGET_ASM_FUNCTION_PROLOGUE.  */
 static void
 nios2_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
@@ -3313,6 +3325,9 @@ nios2_merge_decl_attributes (tree olddecl, tree newdecl)
 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nios2_output_addr_const_extra
 
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END nios2_asm_file_end
+
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nios2_option_override
 
diff --git a/gcc-4.9/gcc/config/pa/pa.c b/gcc-4.9/gcc/config/pa/pa.c
index 5a7598ca7..801982068 100644
--- a/gcc-4.9/gcc/config/pa/pa.c
+++ b/gcc-4.9/gcc/config/pa/pa.c
@@ -3235,7 +3235,12 @@ pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
       && aligned_p
       && function_label_operand (x, VOIDmode))
     {
-      fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
+      fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
+
+      /* We don't want an OPD when generating fast indirect calls.  */
+      if (!TARGET_FAST_INDIRECT_CALLS)
+	fputs ("P%", asm_out_file);
+
       output_addr_const (asm_out_file, x);
       fputc ('\n', asm_out_file);
       return true;
@@ -4203,9 +4208,12 @@ pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
     {
       last_address = extra_nop ? 4 : 0;
       insn = get_last_nonnote_insn ();
-      last_address += INSN_ADDRESSES (INSN_UID (insn));
-      if (INSN_P (insn))
-	last_address += insn_default_length (insn);
+      if (insn)
+	{
+	  last_address += INSN_ADDRESSES (INSN_UID (insn));
+	  if (INSN_P (insn))
+	    last_address += insn_default_length (insn);
+	}
       last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
 		      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
     }
@@ -9308,6 +9316,12 @@ pa_function_value (const_tree valtype,
       || TREE_CODE (valtype) == COMPLEX_TYPE
       || TREE_CODE (valtype) == VECTOR_TYPE)
     {
+      HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
+
+      /* Handle aggregates that fit exactly in a word or double word.  */
+      if ((valsize & (UNITS_PER_WORD - 1)) == 0)
+	return gen_rtx_REG (TYPE_MODE (valtype), 28);
+
       if (TARGET_64BIT)
 	{
           /* Aggregates with a size less than or equal to 128 bits are
@@ -9316,7 +9330,7 @@ pa_function_value (const_tree valtype,
 	     memory.  */
 	  rtx loc[2];
 	  int i, offset = 0;
-	  int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
+	  int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
 
 	  for (i = 0; i < ub; i++)
 	    {
@@ -9328,7 +9342,7 @@ pa_function_value (const_tree valtype,
 
 	  return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
 	}
-      else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
+      else if (valsize > UNITS_PER_WORD)
 	{
 	  /* Aggregates 5 to 8 bytes in size are returned in general
 	     registers r28-r29 in the same manner as other non
diff --git a/gcc-4.9/gcc/config/pa/pa.md b/gcc-4.9/gcc/config/pa/pa.md
index a9421ac2e..43b909e35 100644
--- a/gcc-4.9/gcc/config/pa/pa.md
+++ b/gcc-4.9/gcc/config/pa/pa.md
@@ -123,7 +123,7 @@
 ;; type "binary" insns have two input operands (1,2) and one output (0)
 
 (define_attr "type"
-  "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload"
+  "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload,trap"
   (const_string "binary"))
 
 (define_attr "pa_combine_type"
@@ -166,7 +166,7 @@
 ;; For conditional branches. Frame related instructions are not allowed
 ;; because they confuse the unwind support.
 (define_attr "in_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,trap")
 		     (eq_attr "length" "4")
 		     (not (match_test "RTX_FRAME_RELATED_P (insn)")))
 		(const_string "true")
@@ -175,7 +175,7 @@
 ;; Disallow instructions which use the FPU since they will tie up the FPU
 ;; even if the instruction is nullified.
 (define_attr "in_nullified_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch")
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch,trap")
 		     (eq_attr "length" "4")
 		     (not (match_test "RTX_FRAME_RELATED_P (insn)")))
 		(const_string "true")
@@ -184,7 +184,7 @@
 ;; For calls and millicode calls.  Allow unconditional branches in the
 ;; delay slot.
 (define_attr "in_call_delay" "false,true"
-  (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+  (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,trap")
 	      (eq_attr "length" "4")
 	      (not (match_test "RTX_FRAME_RELATED_P (insn)")))
 	   (const_string "true")
@@ -5331,6 +5331,15 @@
   [(set_attr "type" "binary,binary")
    (set_attr "length" "4,4")])
 
+;; Trap instructions.
+
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))]
+  ""
+  "{addit|addi,tc},<> 1,%%r0,%%r0"
+  [(set_attr "type" "trap")
+   (set_attr "length" "4")])
+
 ;; Clobbering a "register_operand" instead of a match_scratch
 ;; in operand3 of millicode calls avoids spilling %r1 and
 ;; produces better code.
@@ -8926,14 +8935,14 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 ;; strength reduction is used.  It is actually created when the instruction
 ;; combination phase combines the special loop test.  Since this insn
 ;; is both a jump insn and has an output, it must deal with its own
-;; reloads, hence the `m' constraints.  The `!' constraints direct reload
+;; reloads, hence the `Q' constraints.  The `!' constraints direct reload
 ;; to not choose the register alternatives in the event a reload is needed.
 (define_insn "decrement_and_branch_until_zero"
   [(set (pc)
 	(if_then_else
 	  (match_operator 2 "comparison_operator"
 	   [(plus:SI
-	      (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*m")
+	      (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*Q")
 	      (match_operand:SI 1 "int5_operand" "L,L,L"))
 	    (const_int 0)])
 	  (label_ref (match_operand 3 "" ""))
@@ -9022,7 +9031,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 	   [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
 	  (label_ref (match_operand 3 "" ""))
 	  (pc)))
-   (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
+   (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*Q,!*q")
 	(match_dup 1))]
   ""
 "* return pa_output_movb (operands, insn, which_alternative, 0); "
@@ -9094,7 +9103,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 	   [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
 	  (pc)
 	  (label_ref (match_operand 3 "" ""))))
-   (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
+   (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*Q,!*q")
 	(match_dup 1))]
   ""
 "* return pa_output_movb (operands, insn, which_alternative, 1); "
diff --git a/gcc-4.9/gcc/config/pa/predicates.md b/gcc-4.9/gcc/config/pa/predicates.md
index 8dcfce0e9..405cf7f63 100644
--- a/gcc-4.9/gcc/config/pa/predicates.md
+++ b/gcc-4.9/gcc/config/pa/predicates.md
@@ -528,20 +528,29 @@
 ;; This predicate is used for branch patterns that internally handle
 ;; register reloading.  We need to accept non-symbolic memory operands
 ;; after reload to ensure that the pattern is still valid if reload
-;; didn't find a hard register for the operand.
+;; didn't find a hard register for the operand.  We also reject index
+;; and lo_sum DLT address as these are invalid for move destinations.
 
 (define_predicate "reg_before_reload_operand"
   (match_code "reg,mem")
 {
+  rtx op0;
+
   if (register_operand (op, mode))
     return true;
 
-  if (reload_completed
-      && memory_operand (op, mode)
-      && !symbolic_memory_operand (op, mode))
-    return true;
+  if (!reload_in_progress && !reload_completed)
+    return false;
 
-  return false;
+  if (! MEM_P (op))
+    return false;
+
+  op0 = XEXP (op, 0);
+
+  return (memory_address_p (mode, op0)
+	  && !IS_INDEX_ADDR_P (op0)
+	  && !IS_LO_SUM_DLT_ADDR_P (op0)
+	  && !symbolic_memory_operand (op, mode));
 })
 
 ;; True iff OP is a register or const_0 operand for MODE.
diff --git a/gcc-4.9/gcc/config/rs6000/altivec.h b/gcc-4.9/gcc/config/rs6000/altivec.h
index 129cf6fa1..9ee0ae5ec 100644
--- a/gcc-4.9/gcc/config/rs6000/altivec.h
+++ b/gcc-4.9/gcc/config/rs6000/altivec.h
@@ -124,6 +124,7 @@
 #define vec_vcfux __builtin_vec_vcfux
 #define vec_cts __builtin_vec_cts
 #define vec_ctu __builtin_vec_ctu
+#define vec_cpsgn __builtin_vec_copysign
 #define vec_expte __builtin_vec_expte
 #define vec_floor __builtin_vec_floor
 #define vec_loge __builtin_vec_loge
@@ -214,8 +215,10 @@
 #define vec_lvsl __builtin_vec_lvsl
 #define vec_lvsr __builtin_vec_lvsr
 #define vec_max __builtin_vec_max
+#define vec_mergee __builtin_vec_vmrgew
 #define vec_mergeh __builtin_vec_mergeh
 #define vec_mergel __builtin_vec_mergel
+#define vec_mergeo __builtin_vec_vmrgow
 #define vec_min __builtin_vec_min
 #define vec_mladd __builtin_vec_mladd
 #define vec_msum __builtin_vec_msum
@@ -319,6 +322,8 @@
 #define vec_sqrt __builtin_vec_sqrt
 #define vec_vsx_ld __builtin_vec_vsx_ld
 #define vec_vsx_st __builtin_vec_vsx_st
+#define vec_xl __builtin_vec_vsx_ld
+#define vec_xst __builtin_vec_vsx_st
 
 /* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions
    instead of __builtin_vec_<xxx>  */
@@ -336,6 +341,7 @@
 #define vec_vadduqm __builtin_vec_vadduqm
 #define vec_vbpermq __builtin_vec_vbpermq
 #define vec_vclz __builtin_vec_vclz
+#define vec_cntlz __builtin_vec_vclz
 #define vec_vclzb __builtin_vec_vclzb
 #define vec_vclzd __builtin_vec_vclzd
 #define vec_vclzh __builtin_vec_vclzh
diff --git a/gcc-4.9/gcc/config/rs6000/altivec.md b/gcc-4.9/gcc/config/rs6000/altivec.md
index a8cfcb739..02ea14237 100644
--- a/gcc-4.9/gcc/config/rs6000/altivec.md
+++ b/gcc-4.9/gcc/config/rs6000/altivec.md
@@ -67,7 +67,7 @@
    UNSPEC_VCTSXS
    UNSPEC_VLOGEFP
    UNSPEC_VEXPTEFP
-   UNSPEC_VLSDOI
+   UNSPEC_VSLDOI
    UNSPEC_VUNPACK_HI_SIGN
    UNSPEC_VUNPACK_LO_SIGN
    UNSPEC_VUNPACK_HI_SIGN_DIRECT
@@ -2077,7 +2077,7 @@
         (unspec:VM [(match_operand:VM 1 "register_operand" "v")
 		    (match_operand:VM 2 "register_operand" "v")
 		    (match_operand:QI 3 "immediate_operand" "i")]
-		  UNSPEC_VLSDOI))]
+		  UNSPEC_VSLDOI))]
   "TARGET_ALTIVEC"
   "vsldoi %0,%1,%2,%3"
   [(set_attr "type" "vecperm")])
@@ -2297,7 +2297,31 @@
   "dststt %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "altivec_lvsl"
+(define_expand "altivec_lvsl"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "memory_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1]));
+  else
+    {
+      int i;
+      rtx mask, perm[16], constv, vperm;
+      mask = gen_reg_rtx (V16QImode);
+      emit_insn (gen_altivec_lvsl_direct (mask, operands[1]));
+      for (i = 0; i < 16; ++i)
+        perm[i] = GEN_INT (i);
+      constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+      constv = force_reg (V16QImode, constv);
+      vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+                              UNSPEC_VPERM);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+    }
+  DONE;
+})
+
+(define_insn "altivec_lvsl_direct"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
 		      UNSPEC_LVSL))]
@@ -2305,7 +2329,31 @@
   "lvsl %0,%y1"
   [(set_attr "type" "vecload")])
 
-(define_insn "altivec_lvsr"
+(define_expand "altivec_lvsr"
+  [(use (match_operand:V16QI 0 "register_operand" ""))
+   (use (match_operand:V16QI 1 "memory_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1]));
+  else
+    {
+      int i;
+      rtx mask, perm[16], constv, vperm;
+      mask = gen_reg_rtx (V16QImode);
+      emit_insn (gen_altivec_lvsr_direct (mask, operands[1]));
+      for (i = 0; i < 16; ++i)
+        perm[i] = GEN_INT (i);
+      constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+      constv = force_reg (V16QImode, constv);
+      vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+                              UNSPEC_VPERM);
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+    }
+  DONE;
+})
+
+(define_insn "altivec_lvsr_direct"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
 		      UNSPEC_LVSR))]
diff --git a/gcc-4.9/gcc/config/rs6000/darwin.h b/gcc-4.9/gcc/config/rs6000/darwin.h
index 0329f3f62..dfd181e43 100644
--- a/gcc-4.9/gcc/config/rs6000/darwin.h
+++ b/gcc-4.9/gcc/config/rs6000/darwin.h
@@ -206,7 +206,11 @@ extern int darwin_emit_branch_islands;
     "vrsave", "vscr",							\
     "spe_acc", "spefscr",                                               \
     "sfp",								\
-    "tfhar", "tfiar", "texasr"						\
+    "tfhar", "tfiar", "texasr",						\
+    "rh0",  "rh1",  "rh2",  "rh3",  "rh4",  "rh5",  "rh6",  "rh7",	\
+    "rh8",  "rh9",  "rh10", "rh11", "rh12", "rh13", "rh14", "rh15",	\
+    "rh16", "rh17", "rh18", "rh19", "rh20", "rh21", "rh22", "rh23",	\
+    "rh24", "rh25", "rh26", "rh27", "rh28", "rh29", "rh30", "rh31"	\
 }
 
 /* This outputs NAME to FILE.  */
diff --git a/gcc-4.9/gcc/config/rs6000/linux-grte.h b/gcc-4.9/gcc/config/rs6000/linux-grte.h
index 53997f027..e69de29bb 100644
--- a/gcc-4.9/gcc/config/rs6000/linux-grte.h
+++ b/gcc-4.9/gcc/config/rs6000/linux-grte.h
@@ -1,41 +0,0 @@
-/* Definitions for Linux-based GRTE (Google RunTime Environment).
-   Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc.
-   Contributed by Chris Demetriou and Ollie Wild.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-<http://www.gnu.org/licenses/>.  */
-
-/* Overrides LIB_LINUX_SPEC from sysv4.h.  */
-#undef	LIB_LINUX_SPEC
-#define LIB_LINUX_SPEC \
-  "%{pthread:-lpthread} \
-   %{shared:-lc} \
-   %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}"
-
-/* When GRTE links statically, it needs its NSS and resolver libraries
-   linked in as well.  Note that when linking statically, these are
-   enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC.  */
-#undef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS \
-  { "libc", "%{static:%(libc_static);:-lc}" }, \
-  { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \
-  { "libc_static", "-lc -lresolv" }, \
-  { "libc_p_static", "-lc_p -lresolv_p" },
diff --git a/gcc-4.9/gcc/config/rs6000/predicates.md b/gcc-4.9/gcc/config/rs6000/predicates.md
index 8c384b380..2f4046215 100644
--- a/gcc-4.9/gcc/config/rs6000/predicates.md
+++ b/gcc-4.9/gcc/config/rs6000/predicates.md
@@ -1783,7 +1783,7 @@
 (define_predicate "fusion_gpr_mem_load"
   (match_code "mem,sign_extend,zero_extend")
 {
-  rtx addr;
+  rtx addr, base, offset;
 
   /* Handle sign/zero extend.  */
   if (GET_CODE (op) == ZERO_EXTEND
@@ -1813,24 +1813,79 @@
     }
 
   addr = XEXP (op, 0);
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    return 0;
+
+  base = XEXP (addr, 0);
+  if (!base_reg_operand (base, GET_MODE (base)))
+    return 0;
+
+  offset = XEXP (addr, 1);
+
   if (GET_CODE (addr) == PLUS)
+    return satisfies_constraint_I (offset);
+
+  else if (GET_CODE (addr) == LO_SUM)
     {
-      rtx base = XEXP (addr, 0);
-      rtx offset = XEXP (addr, 1);
+      if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
+	return small_toc_ref (offset, GET_MODE (offset));
 
-      return (base_reg_operand (base, GET_MODE (base))
-	      && satisfies_constraint_I (offset));
+      else if (TARGET_ELF && !TARGET_POWERPC64)
+	return CONSTANT_P (offset);
     }
 
-  else if (GET_CODE (addr) == LO_SUM)
+  return 0;
+})
+
+;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the
+;; memory field with both the addis and the memory offset.  Sign extension
+;; is not handled here, since lha and lwa are not fused.
+(define_predicate "fusion_gpr_mem_combo"
+  (match_code "mem,zero_extend")
+{
+  rtx addr, base, offset;
+
+  /* Handle zero extend.  */
+  if (GET_CODE (op) == ZERO_EXTEND)
     {
-      rtx base = XEXP (addr, 0);
-      rtx offset = XEXP (addr, 1);
+      op = XEXP (op, 0);
+      mode = GET_MODE (op);
+    }
+
+  if (!MEM_P (op))
+    return 0;
 
-      if (!base_reg_operand (base, GET_MODE (base)))
+  switch (mode)
+    {
+    case QImode:
+    case HImode:
+    case SImode:
+      break;
+
+    case DImode:
+      if (!TARGET_POWERPC64)
 	return 0;
+      break;
 
-      else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
+    default:
+      return 0;
+    }
+
+  addr = XEXP (op, 0);
+  if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+    return 0;
+
+  base = XEXP (addr, 0);
+  if (!fusion_gpr_addis (base, GET_MODE (base)))
+    return 0;
+
+  offset = XEXP (addr, 1);
+  if (GET_CODE (addr) == PLUS)
+    return satisfies_constraint_I (offset);
+
+  else if (GET_CODE (addr) == LO_SUM)
+    {
+      if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
 	return small_toc_ref (offset, GET_MODE (offset));
 
       else if (TARGET_ELF && !TARGET_POWERPC64)
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
index 220d1e970..9bb870394 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
@@ -1258,6 +1258,16 @@ BU_VSX_2 (VEC_MERGEL_V2DF,    "mergel_2df",	CONST,	vsx_mergel_v2df)
 BU_VSX_2 (VEC_MERGEL_V2DI,    "mergel_2di",	CONST,	vsx_mergel_v2di)
 BU_VSX_2 (VEC_MERGEH_V2DF,    "mergeh_2df",	CONST,	vsx_mergeh_v2df)
 BU_VSX_2 (VEC_MERGEH_V2DI,    "mergeh_2di",	CONST,	vsx_mergeh_v2di)
+BU_VSX_2 (XXSPLTD_V2DF,       "xxspltd_2df",    CONST,  vsx_xxspltd_v2df)
+BU_VSX_2 (XXSPLTD_V2DI,       "xxspltd_2di",    CONST,  vsx_xxspltd_v2di)
+BU_VSX_2 (DIV_V2DI,           "div_2di",        CONST,  vsx_div_v2di)
+BU_VSX_2 (UDIV_V2DI,          "udiv_2di",       CONST,  vsx_udiv_v2di)
+BU_VSX_2 (MUL_V2DI,           "mul_2di",        CONST,  vsx_mul_v2di)
+
+BU_VSX_2 (XVCVSXDDP_SCALE,    "xvcvsxddp_scale", CONST, vsx_xvcvsxddp_scale)
+BU_VSX_2 (XVCVUXDDP_SCALE,    "xvcvuxddp_scale", CONST, vsx_xvcvuxddp_scale)
+BU_VSX_2 (XVCVDPSXDS_SCALE,   "xvcvdpsxds_scale", CONST, vsx_xvcvdpsxds_scale)
+BU_VSX_2 (XVCVDPUXDS_SCALE,   "xvcvdpuxds_scale", CONST, vsx_xvcvdpuxds_scale)
 
 /* VSX abs builtin functions.  */
 BU_VSX_A (XVABSDP,	      "xvabsdp",	CONST,	absv2df2)
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-c.c b/gcc-4.9/gcc/config/rs6000/rs6000-c.c
index 46c4a9d8c..8dedeec26 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-c.c
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-c.c
@@ -597,6 +597,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_ROUND, ALTIVEC_BUILTIN_VRFIN,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+  { ALTIVEC_BUILTIN_VEC_ROUND, VSX_BUILTIN_XVRDPI,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
   { ALTIVEC_BUILTIN_VEC_RECIP, ALTIVEC_BUILTIN_VRECIPFP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_RECIP, VSX_BUILTIN_RECIP_V2DF,
@@ -876,6 +878,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
     RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
@@ -930,6 +944,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
     RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
@@ -1118,18 +1144,30 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
   { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFSX,
     RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVSXDDP_SCALE,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0},
+  { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVUXDDP_SCALE,
+    RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0},
   { ALTIVEC_BUILTIN_VEC_VCFSX, ALTIVEC_BUILTIN_VCFSX,
     RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
   { ALTIVEC_BUILTIN_VEC_VCFUX, ALTIVEC_BUILTIN_VCFUX,
     RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
   { ALTIVEC_BUILTIN_VEC_CTS, ALTIVEC_BUILTIN_VCTSXS,
     RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTS, VSX_BUILTIN_XVCVDPSXDS_SCALE,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 },
   { ALTIVEC_BUILTIN_VEC_CTU, ALTIVEC_BUILTIN_VCTUXS,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_CTU, VSX_BUILTIN_XVCVDPUXDS_SCALE,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 },
   { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVSP,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_DIV_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_UDIV_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DF,
     RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
   { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
@@ -1595,6 +1633,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
   { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
@@ -1643,6 +1691,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
   { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
@@ -1771,6 +1829,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULDP,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_MUL_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_MUL_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB,
@@ -1811,6 +1873,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
@@ -1841,6 +1915,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
     RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
@@ -1945,6 +2031,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
@@ -2127,6 +2215,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
   { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0 },
+  { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI,
+    RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
   { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
@@ -2518,6 +2614,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
     RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+  { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
   { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
     RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
@@ -2778,6 +2886,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI },
   { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI,
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V16QI },
+  { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI },
   { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SF,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI },
   { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI,
@@ -2818,6 +2928,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI },
   { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
     RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DI },
   { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI },
   { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF,
@@ -3267,6 +3383,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
 
   { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF,
     RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+  { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
   { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
     RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
   { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
@@ -3321,6 +3439,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
 
   { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF,
     RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+  { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF,
+    RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
   { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
     RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
   { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
@@ -3431,6 +3551,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
   { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+  { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P,
+    RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
   { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P,
     RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
   { ALTIVEC_BUILTIN_VEC_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P,
@@ -3889,12 +4021,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
 
   { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
     RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
   { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
+  { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+    RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
 
   { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
     RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
@@ -4128,7 +4264,8 @@ altivec_build_resolved_builtin (tree *args, int n,
      argument) is reversed.  Patch the arguments here before building
      the resolved CALL_EXPR.  */
   if (desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P
-      && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P)
+      && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P
+      && desc->overloaded_code != VSX_BUILTIN_XVCMPGEDP_P)
     {
       tree t;
       t = args[2], args[2] = args[1], args[1] = t;
@@ -4186,6 +4323,14 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
   if (TARGET_DEBUG_BUILTIN)
     fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n",
 	     (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+ 
+  /* vec_lvsl and vec_lvsr are deprecated for use with LE element order.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_LVSL && !VECTOR_ELT_ORDER_BIG)
+    warning (OPT_Wdeprecated, "vec_lvsl is deprecated for little endian; use \
+assignment for unaligned loads and stores");
+  else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !VECTOR_ELT_ORDER_BIG)
+    warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \
+assignment for unaligned loads and stores");
 
   /* For now treat vec_splats and vec_promote as the same.  */
   if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
index 067a74aa6..aa8e76249 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
@@ -65,6 +65,7 @@ extern void altivec_expand_stvx_be (rtx, rtx, enum machine_mode, unsigned);
 extern void altivec_expand_stvex_be (rtx, rtx, enum machine_mode, unsigned);
 extern void rs6000_expand_extract_even (rtx, rtx, rtx);
 extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
+extern void rs6000_scale_v2df (rtx, rtx, int);
 extern void build_mask64_2_operands (rtx, rtx *);
 extern int expand_block_clear (rtx[]);
 extern int expand_block_move (rtx[]);
@@ -79,9 +80,9 @@ extern int mems_ok_for_quad_peep (rtx, rtx);
 extern bool gpr_or_gpr_p (rtx, rtx);
 extern bool direct_move_p (rtx, rtx);
 extern bool quad_load_store_p (rtx, rtx);
-extern bool fusion_gpr_load_p (rtx *, bool);
+extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx);
 extern void expand_fusion_gpr_load (rtx *);
-extern const char *emit_fusion_gpr_load (rtx *);
+extern const char *emit_fusion_gpr_load (rtx, rtx);
 extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
 							    enum reg_class);
 extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.c b/gcc-4.9/gcc/config/rs6000/rs6000.c
index 28ccf86df..730e6c8a6 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.c
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.c
@@ -79,6 +79,9 @@
 #include "dumpfile.h"
 #include "cgraph.h"
 #include "target-globals.h"
+#include "real.h"
+#include "context.h"
+#include "tree-pass.h"
 #if TARGET_XCOFF
 #include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
 #endif
@@ -1171,6 +1174,7 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
 					  enum machine_mode,
 					  secondary_reload_info *,
 					  bool);
+rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
 
 /* Hash table stuff for keeping track of TOC entries.  */
 
@@ -1541,17 +1545,6 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
 #endif
 
-/* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
-   The PowerPC architecture requires only weak consistency among
-   processors--that is, memory accesses between processors need not be
-   sequentially consistent and memory accesses among processors can occur
-   in any order. The ability to order memory accesses weakly provides
-   opportunities for more efficient use of the system bus. Unless a
-   dependency exists, the 604e allows read operations to precede store
-   operations.  */
-#undef TARGET_RELAXED_ORDERING
-#define TARGET_RELAXED_ORDERING true
-
 #ifdef HAVE_AS_TLS
 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
@@ -4084,6 +4077,15 @@ static void
 rs6000_option_override (void)
 {
   (void) rs6000_option_override_internal (true);
+
+  /* Register machine-specific passes.  This needs to be done at start-up.
+     It's convenient to do it here (like i386 does).  */
+  opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
+
+  static struct register_pass_info analyze_swaps_info
+    = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
+
+  register_pass (&analyze_swaps_info);
 }
 
 
@@ -6896,24 +6898,6 @@ rs6000_delegitimize_address (rtx orig_x)
   if (GET_CODE (y) == UNSPEC
       && XINT (y, 1) == UNSPEC_TOCREL)
     {
-#ifdef ENABLE_CHECKING
-      if (REG_P (XVECEXP (y, 0, 1))
-	  && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER)
-	{
-	  /* All good.  */
-	}
-      else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR)
-	{
-	  /* Weirdness alert.  df_note_compute can replace r2 with a
-	     debug_expr when this unspec is in a debug_insn.
-	     Seen in gcc.dg/pr51957-1.c  */
-	}
-      else
-	{
-	  debug_rtx (orig_x);
-	  abort ();
-	}
-#endif
       y = XVECEXP (y, 0, 0);
 
 #ifdef HAVE_AS_TLS
@@ -13842,8 +13826,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
     case ALTIVEC_BUILTIN_MASK_FOR_STORE:
       {
-	int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
-		     : (int) CODE_FOR_altivec_lvsl);
+	int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+		     : (int) CODE_FOR_altivec_lvsl_direct);
 	enum machine_mode tmode = insn_data[icode].operand[0].mode;
 	enum machine_mode mode = insn_data[icode].operand[1].mode;
 	tree arg;
@@ -13871,7 +13855,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
 	    || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
 	  target = gen_reg_rtx (tmode);
 
-	/*pat = gen_altivec_lvsr (target, op);*/
 	pat = GEN_FCN (icode) (target, op);
 	if (!pat)
 	  return 0;
@@ -16654,10 +16637,13 @@ rs6000_secondary_reload (bool in_p,
 	      : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
 		 && (offset & 3) != 0))
 	    {
+	      /* -m32 -mpowerpc64 needs to use a 32-bit scratch register.  */
 	      if (in_p)
-		sri->icode = CODE_FOR_reload_di_load;
+		sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
+			      : CODE_FOR_reload_di_load);
 	      else
-		sri->icode = CODE_FOR_reload_di_store;
+		sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
+			      : CODE_FOR_reload_di_store);
 	      sri->extra_cost = 2;
 	      ret = NO_REGS;
 	    }
@@ -30923,6 +30909,23 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
   rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
 }
 
+/* Scale a V2DF vector SRC by two to the SCALE and place in TGT.  */
+void
+rs6000_scale_v2df (rtx tgt, rtx src, int scale)
+{
+  HOST_WIDE_INT hwi_scale (scale);
+  REAL_VALUE_TYPE r_pow;
+  rtvec v = rtvec_alloc (2);
+  rtx elt;
+  rtx scale_vec = gen_reg_rtx (V2DFmode);
+  (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
+  elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
+  RTVEC_ELT (v, 0) = elt;
+  RTVEC_ELT (v, 1) = elt;
+  rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
+  emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
+}
+
 /* Return an RTX representing where to find the function value of a
    function returning MODE.  */
 static rtx
@@ -32551,6 +32554,14 @@ rs6000_split_logical_inner (rtx dest,
   if (complement_op2_p)
     op2 = gen_rtx_NOT (mode, op2);
 
+  /* For canonical RTL, if only one arm is inverted it is the first.  */
+  if (!complement_op1_p && complement_op2_p)
+    {
+      rtx temp = op1;
+      op1 = op2;
+      op2 = temp;
+    }
+
   bool_rtx = ((code == NOT)
 	      ? gen_rtx_NOT (mode, op1)
 	      : gen_rtx_fmt_ee (code, mode, op1, op2));
@@ -32755,25 +32766,14 @@ rs6000_split_logical (rtx operands[3],
 
 /* Return true if the peephole2 can combine a load involving a combination of
    an addis instruction and a load with an offset that can be fused together on
-   a power8.
-
-   The operands are:
-	operands[0]	register set with addis
-	operands[1]	value set via addis
-	operands[2]	target register being loaded
-	operands[3]	D-form memory reference using operands[0].
-
-   In addition, we are passed a boolean that is true if this is a peephole2,
-   and we can use see if the addis_reg is dead after the insn and can be
-   replaced by the target register.  */
+   a power8.  */
 
 bool
-fusion_gpr_load_p (rtx *operands, bool peep2_p)
+fusion_gpr_load_p (rtx addis_reg,	/* register set via addis.  */
+		   rtx addis_value,	/* addis value.  */
+		   rtx target,		/* target register that is loaded.  */
+		   rtx mem)		/* bottom part of the memory addr. */
 {
-  rtx addis_reg = operands[0];
-  rtx addis_value = operands[1];
-  rtx target = operands[2];
-  rtx mem = operands[3];
   rtx addr;
   rtx base_reg;
 
@@ -32787,9 +32787,6 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
   if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
     return false;
 
-  if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
-    return false;
-
   /* Allow sign/zero extension.  */
   if (GET_CODE (mem) == ZERO_EXTEND
       || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
@@ -32798,22 +32795,22 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
   if (!MEM_P (mem))
     return false;
 
+  if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
+    return false;
+
   addr = XEXP (mem, 0);			/* either PLUS or LO_SUM.  */
   if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
     return false;
 
   /* Validate that the register used to load the high value is either the
-     register being loaded, or we can safely replace its use in a peephole2.
+     register being loaded, or we can safely replace its use.
 
-     If this is a peephole2, we assume that there are 2 instructions in the
-     peephole (addis and load), so we want to check if the target register was
-     not used in the memory address and the register to hold the addis result
-     is dead after the peephole.  */
+     This function is only called from the peephole2 pass and we assume that
+     there are 2 instructions in the peephole (addis and load), so we want to
+     check if the target register was not used in the memory address and the
+     register to hold the addis result is dead after the peephole.  */
   if (REGNO (addis_reg) != REGNO (target))
     {
-      if (!peep2_p)
-	return false;
-
       if (reg_mentioned_p (target, mem))
 	return false;
 
@@ -32854,9 +32851,6 @@ expand_fusion_gpr_load (rtx *operands)
   enum machine_mode extend_mode = target_mode;
   enum machine_mode ptr_mode = Pmode;
   enum rtx_code extend = UNKNOWN;
-  rtx addis_reg = ((ptr_mode == target_mode)
-		   ? target
-		   : simplify_subreg (ptr_mode, target, target_mode, 0));
 
   if (GET_CODE (orig_mem) == ZERO_EXTEND
       || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
@@ -32873,13 +32867,14 @@ expand_fusion_gpr_load (rtx *operands)
   gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
 
   offset = XEXP (orig_addr, 1);
-  new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
-  new_mem = change_address (orig_mem, target_mode, new_addr);
+  new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
+  new_mem = replace_equiv_address_nv (orig_mem, new_addr);
 
   if (extend != UNKNOWN)
     new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
 
-  emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
+  new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
+			    UNSPEC_FUSION_GPR);
   emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
 
   if (extend == SIGN_EXTEND)
@@ -32898,55 +32893,40 @@ expand_fusion_gpr_load (rtx *operands)
 }
 
 /* Return a string to fuse an addis instruction with a gpr load to the same
-   register that we loaded up the addis instruction.  The code is complicated,
-   so we call output_asm_insn directly, and just return "".
+   register that we loaded up the addis instruction.  The address that is used
+   is the logical address that was formed during peephole2:
+	(lo_sum (high) (low-part))
 
-   The operands are:
-	operands[0]	register set with addis (must be same reg as target).
-	operands[1]	value set via addis
-	operands[2]	target register being loaded
-	operands[3]	D-form memory reference using operands[0].  */
+   The code is complicated, so we call output_asm_insn directly, and just
+   return "".  */
 
 const char *
-emit_fusion_gpr_load (rtx *operands)
+emit_fusion_gpr_load (rtx target, rtx mem)
 {
-  rtx addis_reg = operands[0];
-  rtx addis_value = operands[1];
-  rtx target = operands[2];
-  rtx mem = operands[3];
+  rtx addis_value;
   rtx fuse_ops[10];
   rtx addr;
   rtx load_offset;
   const char *addis_str = NULL;
   const char *load_str = NULL;
-  const char *extend_insn = NULL;
   const char *mode_name = NULL;
   char insn_template[80];
   enum machine_mode mode;
   const char *comment_str = ASM_COMMENT_START;
-  bool sign_p = false;
 
-  gcc_assert (REG_P (addis_reg) && REG_P (target));
-  gcc_assert (REGNO (addis_reg) == REGNO (target));
-
-  if (*comment_str == ' ')
-    comment_str++;
-
-  /* Allow sign/zero extension.  */
   if (GET_CODE (mem) == ZERO_EXTEND)
     mem = XEXP (mem, 0);
 
-  else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
-    {
-      sign_p = true;
-      mem = XEXP (mem, 0);
-    }
+  gcc_assert (REG_P (target) && MEM_P (mem));
+
+  if (*comment_str == ' ')
+    comment_str++;
 
-  gcc_assert (MEM_P (mem));
   addr = XEXP (mem, 0);
   if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
     gcc_unreachable ();
 
+  addis_value = XEXP (addr, 0);
   load_offset = XEXP (addr, 1);
 
   /* Now emit the load instruction to the same register.  */
@@ -32956,29 +32936,22 @@ emit_fusion_gpr_load (rtx *operands)
     case QImode:
       mode_name = "char";
       load_str = "lbz";
-      extend_insn = "extsb %0,%0";
       break;
 
     case HImode:
       mode_name = "short";
       load_str = "lhz";
-      extend_insn = "extsh %0,%0";
       break;
 
     case SImode:
       mode_name = "int";
       load_str = "lwz";
-      extend_insn = "extsw %0,%0";
       break;
 
     case DImode:
-      if (TARGET_POWERPC64)
-	{
-	  mode_name = "long";
-	  load_str = "ld";
-	}
-      else
-	gcc_unreachable ();
+      gcc_assert (TARGET_POWERPC64);
+      mode_name = "long";
+      load_str = "ld";
       break;
 
     default:
@@ -33122,17 +33095,1191 @@ emit_fusion_gpr_load (rtx *operands)
   else
     fatal_insn ("Unable to generate load offset for fusion", load_offset);
 
-  /* Handle sign extension.  The peephole2 pass generates this as a separate
-     insn, but we handle it just in case it got reattached.  */
-  if (sign_p)
+  return "";
+}
+
+/* Analyze vector computations and remove unnecessary doubleword
+   swaps (xxswapdi instructions).  This pass is performed only
+   for little-endian VSX code generation.
+
+   For this specific case, loads and stores of 4x32 and 2x64 vectors
+   are inefficient.  These are implemented using the lvx2dx and
+   stvx2dx instructions, which invert the order of doublewords in
+   a vector register.  Thus the code generation inserts an xxswapdi
+   after each such load, and prior to each such store.  (For spill
+   code after register assignment, an additional xxswapdi is inserted
+   following each store in order to return a hard register to its
+   unpermuted value.)
+
+   The extra xxswapdi instructions reduce performance.  This can be
+   particularly bad for vectorized code.  The purpose of this pass
+   is to reduce the number of xxswapdi instructions required for
+   correctness.
+
+   The primary insight is that much code that operates on vectors
+   does not care about the relative order of elements in a register,
+   so long as the correct memory order is preserved.  If we have
+   a computation where all input values are provided by lvxd2x/xxswapdi
+   sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
+   and all intermediate computations are pure SIMD (independent of
+   element order), then all the xxswapdi's associated with the loads
+   and stores may be removed.
+
+   This pass uses some of the infrastructure and logical ideas from
+   the "web" pass in web.c.  We create maximal webs of computations
+   fitting the description above using union-find.  Each such web is
+   then optimized by removing its unnecessary xxswapdi instructions.
+
+   The pass is placed prior to global optimization so that we can
+   perform the optimization in the safest and simplest way possible;
+   that is, by replacing each xxswapdi insn with a register copy insn.
+   Subsequent forward propagation will remove copies where possible.
+
+   There are some operations sensitive to element order for which we
+   can still allow the operation, provided we modify those operations.
+   These include CONST_VECTORs, for which we must swap the first and
+   second halves of the constant vector; and SUBREGs, for which we
+   must adjust the byte offset to account for the swapped doublewords.
+   A remaining opportunity would be non-immediate-form splats, for
+   which we should adjust the selected lane of the input.  We should
+   also make code generation adjustments for sum-across operations,
+   since this is a common vectorizer reduction.
+
+   Because we run prior to the first split, we can see loads and stores
+   here that match *vsx_le_perm_{load,store}_<mode>.  These are vanilla
+   vector loads and stores that have not yet been split into a permuting
+   load/store and a swap.  (One way this can happen is with a builtin
+   call to vec_vsx_{ld,st}.)  We can handle these as well, but rather
+   than deleting a swap, we convert the load/store into a permuting
+   load/store (which effectively removes the swap).  */
+
+/* Notes on Permutes
+
+   We do not currently handle computations that contain permutes.  There
+   is a general transformation that can be performed correctly, but it
+   may introduce more expensive code than it replaces.  To handle these
+   would require a cost model to determine when to perform the optimization.
+   This commentary records how this could be done if desired.
+
+   The most general permute is something like this (example for V16QI):
+
+   (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
+                     (parallel [(const_int a0) (const_int a1)
+                                 ...
+                                (const_int a14) (const_int a15)]))
+
+   where a0,...,a15 are in [0,31] and select elements from op1 and op2
+   to produce in the result.
+
+   Regardless of mode, we can convert the PARALLEL to a mask of 16
+   byte-element selectors.  Let's call this M, with M[i] representing
+   the ith byte-element selector value.  Then if we swap doublewords
+   throughout the computation, we can get correct behavior by replacing
+   M with M' as follows:
+
+            { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
+    M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
+            { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
+            { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
+
+   This seems promising at first, since we are just replacing one mask
+   with another.  But certain masks are preferable to others.  If M
+   is a mask that matches a vmrghh pattern, for example, M' certainly
+   will not.  Instead of a single vmrghh, we would generate a load of
+   M' and a vperm.  So we would need to know how many xxswapd's we can
+   remove as a result of this transformation to determine if it's
+   profitable; and preferably the logic would need to be aware of all
+   the special preferable masks.
+
+   Another form of permute is an UNSPEC_VPERM, in which the mask is
+   already in a register.  In some cases, this mask may be a constant
+   that we can discover with ud-chains, in which case the above
+   transformation is ok.  However, the common usage here is for the
+   mask to be produced by an UNSPEC_LVSL, in which case the mask 
+   cannot be known at compile time.  In such a case we would have to
+   generate several instructions to compute M' as above at run time,
+   and a cost model is needed again.  */
+
+/* This is based on the union-find logic in web.c.  web_entry_base is
+   defined in df.h.  */
+class swap_web_entry : public web_entry_base
+{
+ public:
+  /* Pointer to the insn.  */
+  rtx insn;
+  /* Set if insn contains a mention of a vector register.  All other
+     fields are undefined if this field is unset.  */
+  unsigned int is_relevant : 1;
+  /* Set if insn is a load.  */
+  unsigned int is_load : 1;
+  /* Set if insn is a store.  */
+  unsigned int is_store : 1;
+  /* Set if insn is a doubleword swap.  This can either be a register swap
+     or a permuting load or store (test is_load and is_store for this).  */
+  unsigned int is_swap : 1;
+  /* Set if the insn has a live-in use of a parameter register.  */
+  unsigned int is_live_in : 1;
+  /* Set if the insn has a live-out def of a return register.  */
+  unsigned int is_live_out : 1;
+  /* Set if the insn contains a subreg reference of a vector register.  */
+  unsigned int contains_subreg : 1;
+  /* Set if the insn contains a 128-bit integer operand.  */
+  unsigned int is_128_int : 1;
+  /* Set if this is a call-insn.  */
+  unsigned int is_call : 1;
+  /* Set if this insn does not perform a vector operation for which
+     element order matters, or if we know how to fix it up if it does.
+     Undefined if is_swap is set.  */
+  unsigned int is_swappable : 1;
+  /* A nonzero value indicates what kind of special handling for this
+     insn is required if doublewords are swapped.  Undefined if
+     is_swappable is not set.  */
+  unsigned int special_handling : 3;
+  /* Set if the web represented by this entry cannot be optimized.  */
+  unsigned int web_not_optimizable : 1;
+  /* Set if this insn should be deleted.  */
+  unsigned int will_delete : 1;
+};
+
+enum special_handling_values {
+  SH_NONE = 0,
+  SH_CONST_VECTOR,
+  SH_SUBREG,
+  SH_NOSWAP_LD,
+  SH_NOSWAP_ST,
+  SH_EXTRACT,
+  SH_SPLAT
+};
+
+/* Union INSN with all insns containing definitions that reach USE.
+   Detect whether USE is live-in to the current function.  */
+static void
+union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
+{
+  struct df_link *link = DF_REF_CHAIN (use);
+
+  if (!link)
+    insn_entry[INSN_UID (insn)].is_live_in = 1;
+
+  while (link)
     {
-      gcc_assert (extend_insn != NULL);
-      output_asm_insn (extend_insn, fuse_ops);
+      if (DF_REF_IS_ARTIFICIAL (link->ref))
+	insn_entry[INSN_UID (insn)].is_live_in = 1;
+
+      if (DF_REF_INSN_INFO (link->ref))
+	{
+	  rtx def_insn = DF_REF_INSN (link->ref);
+	  (void)unionfind_union (insn_entry + INSN_UID (insn),
+				 insn_entry + INSN_UID (def_insn));
+	}
+
+      link = link->next;
     }
+}
 
-  return "";
+/* Union INSN with all insns containing uses reached from DEF.
+   Detect whether DEF is live-out from the current function.  */
+static void
+union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
+{
+  struct df_link *link = DF_REF_CHAIN (def);
+
+  if (!link)
+    insn_entry[INSN_UID (insn)].is_live_out = 1;
+
+  while (link)
+    {
+      /* This could be an eh use or some other artificial use;
+	 we treat these all the same (killing the optimization).  */
+      if (DF_REF_IS_ARTIFICIAL (link->ref))
+	insn_entry[INSN_UID (insn)].is_live_out = 1;
+
+      if (DF_REF_INSN_INFO (link->ref))
+	{
+	  rtx use_insn = DF_REF_INSN (link->ref);
+	  (void)unionfind_union (insn_entry + INSN_UID (insn),
+				 insn_entry + INSN_UID (use_insn));
+	}
+
+      link = link->next;
+    }
+}
+
+/* Return 1 iff INSN is a load insn, including permuting loads that
+   represent an lvxd2x instruction; else return 0.  */
+static unsigned int
+insn_is_load_p (rtx insn)
+{
+  rtx body = PATTERN (insn);
+
+  if (GET_CODE (body) == SET)
+    {
+      if (GET_CODE (SET_SRC (body)) == MEM)
+	return 1;
+
+      if (GET_CODE (SET_SRC (body)) == VEC_SELECT
+	  && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
+	return 1;
+
+      return 0;
+    }
+
+  if (GET_CODE (body) != PARALLEL)
+    return 0;
+
+  rtx set = XVECEXP (body, 0, 0);
+
+  if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
+    return 1;
+
+  return 0;
+}
+
+/* Return 1 iff INSN is a store insn, including permuting stores that
+   represent an stvxd2x instruction; else return 0.  */
+static unsigned int
+insn_is_store_p (rtx insn)
+{
+  rtx body = PATTERN (insn);
+  if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
+    return 1;
+  if (GET_CODE (body) != PARALLEL)
+    return 0;
+  rtx set = XVECEXP (body, 0, 0);
+  if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
+    return 1;
+  return 0;
 }
 
+/* Return 1 iff INSN swaps doublewords.  This may be a reg-reg swap,
+   a permuting load, or a permuting store.  */
+static unsigned int
+insn_is_swap_p (rtx insn)
+{
+  rtx body = PATTERN (insn);
+  if (GET_CODE (body) != SET)
+    return 0;
+  rtx rhs = SET_SRC (body);
+  if (GET_CODE (rhs) != VEC_SELECT)
+    return 0;
+  rtx parallel = XEXP (rhs, 1);
+  if (GET_CODE (parallel) != PARALLEL)
+    return 0;
+  unsigned int len = XVECLEN (parallel, 0);
+  if (len != 2 && len != 4 && len != 8 && len != 16)
+    return 0;
+  for (unsigned int i = 0; i < len / 2; ++i)
+    {
+      rtx op = XVECEXP (parallel, 0, i);
+      if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
+	return 0;
+    }
+  for (unsigned int i = len / 2; i < len; ++i)
+    {
+      rtx op = XVECEXP (parallel, 0, i);
+      if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
+	return 0;
+    }
+  return 1;
+}
+
+/* Return 1 iff OP is an operand that will not be affected by having
+   vector doublewords swapped in memory.  */
+static unsigned int
+rtx_is_swappable_p (rtx op, unsigned int *special)
+{
+  enum rtx_code code = GET_CODE (op);
+  int i, j;
+  rtx parallel;
+
+  switch (code)
+    {
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CLOBBER:
+    case REG:
+      return 1;
+
+    case VEC_CONCAT:
+    case ASM_INPUT:
+    case ASM_OPERANDS:
+      return 0;
+
+    case CONST_VECTOR:
+      {
+	*special = SH_CONST_VECTOR;
+	return 1;
+      }
+
+    case VEC_DUPLICATE:
+      /* Opportunity: If XEXP (op, 0) has the same mode as the result,
+	 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
+	 it represents a vector splat for which we can do special
+	 handling.  */
+      if (GET_CODE (XEXP (op, 0)) == CONST_INT)
+	return 1;
+      else if (GET_CODE (XEXP (op, 0)) == REG
+	       && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
+	/* This catches V2DF and V2DI splat, at a minimum.  */
+	return 1;
+      else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
+	/* If the duplicated item is from a select, defer to the select
+	   processing to see if we can change the lane for the splat.  */
+	return rtx_is_swappable_p (XEXP (op, 0), special);
+      else
+	return 0;
+
+    case VEC_SELECT:
+      /* A vec_extract operation is ok if we change the lane.  */
+      if (GET_CODE (XEXP (op, 0)) == REG
+	  && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
+	  && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+	  && XVECLEN (parallel, 0) == 1
+	  && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
+	{
+	  *special = SH_EXTRACT;
+	  return 1;
+	}
+      else
+	return 0;
+
+    case UNSPEC:
+      {
+	/* Various operations are unsafe for this optimization, at least
+	   without significant additional work.  Permutes are obviously
+	   problematic, as both the permute control vector and the ordering
+	   of the target values are invalidated by doubleword swapping.
+	   Vector pack and unpack modify the number of vector lanes.
+	   Merge-high/low will not operate correctly on swapped operands.
+	   Vector shifts across element boundaries are clearly uncool,
+	   as are vector select and concatenate operations.  Vector
+	   sum-across instructions define one operand with a specific
+	   order-dependent element, so additional fixup code would be
+	   needed to make those work.  Vector set and non-immediate-form
+	   vector splat are element-order sensitive.  A few of these
+	   cases might be workable with special handling if required.  */
+	int val = XINT (op, 1);
+	switch (val)
+	  {
+	  default:
+	    break;
+	  case UNSPEC_VMRGH_DIRECT:
+	  case UNSPEC_VMRGL_DIRECT:
+	  case UNSPEC_VPACK_SIGN_SIGN_SAT:
+	  case UNSPEC_VPACK_SIGN_UNS_SAT:
+	  case UNSPEC_VPACK_UNS_UNS_MOD:
+	  case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
+	  case UNSPEC_VPACK_UNS_UNS_SAT:
+	  case UNSPEC_VPERM:
+	  case UNSPEC_VPERM_UNS:
+	  case UNSPEC_VPERMHI:
+	  case UNSPEC_VPERMSI:
+	  case UNSPEC_VPKPX:
+	  case UNSPEC_VSLDOI:
+	  case UNSPEC_VSLO:
+	  case UNSPEC_VSRO:
+	  case UNSPEC_VSUM2SWS:
+	  case UNSPEC_VSUM4S:
+	  case UNSPEC_VSUM4UBS:
+	  case UNSPEC_VSUMSWS:
+	  case UNSPEC_VSUMSWS_DIRECT:
+	  case UNSPEC_VSX_CONCAT:
+	  case UNSPEC_VSX_SET:
+	  case UNSPEC_VSX_SLDWI:
+	  case UNSPEC_VUNPACK_HI_SIGN:
+	  case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
+	  case UNSPEC_VUNPACK_LO_SIGN:
+	  case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
+	  case UNSPEC_VUPKHPX:
+	  case UNSPEC_VUPKHS_V4SF:
+	  case UNSPEC_VUPKHU_V4SF:
+	  case UNSPEC_VUPKLPX:
+	  case UNSPEC_VUPKLS_V4SF:
+	  case UNSPEC_VUPKLU_V4SF:
+	  /* The following could be handled as an idiom with XXSPLTW.
+	     These place a scalar in BE element zero, but the XXSPLTW
+	     will currently expect it in BE element 2 in a swapped
+	     region.  When one of these feeds an XXSPLTW with no other
+	     defs/uses either way, we can avoid the lane change for
+	     XXSPLTW and things will be correct.  TBD.  */
+	  case UNSPEC_VSX_CVDPSPN:
+	  case UNSPEC_VSX_CVSPDP:
+	  case UNSPEC_VSX_CVSPDPN:
+	    return 0;
+	  case UNSPEC_VSPLT_DIRECT:
+	    *special = SH_SPLAT;
+	    return 1;
+	  }
+      }
+
+    default:
+      break;
+    }
+
+  const char *fmt = GET_RTX_FORMAT (code);
+  int ok = 1;
+
+  for (i = 0; i < GET_RTX_LENGTH (code); ++i)
+    if (fmt[i] == 'e' || fmt[i] == 'u')
+      {
+	unsigned int special_op = SH_NONE;
+	ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
+	/* Ensure we never have two kinds of special handling
+	   for the same insn.  */
+	if (*special != SH_NONE && special_op != SH_NONE
+	    && *special != special_op)
+	  return 0;
+	*special = special_op;
+      }
+    else if (fmt[i] == 'E')
+      for (j = 0; j < XVECLEN (op, i); ++j)
+	{
+	  unsigned int special_op = SH_NONE;
+	  ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
+	  /* Ensure we never have two kinds of special handling
+	     for the same insn.  */
+	  if (*special != SH_NONE && special_op != SH_NONE
+	      && *special != special_op)
+	    return 0;
+	  *special = special_op;
+	}
+
+  return ok;
+}
+
+/* Return 1 iff INSN is an operand that will not be affected by
+   having vector doublewords swapped in memory (in which case
+   *SPECIAL is unchanged), or that can be modified to be correct
+   if vector doublewords are swapped in memory (in which case
+   *SPECIAL is changed to a value indicating how).  */
+static unsigned int
+insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
+		     unsigned int *special)
+{
+  /* Calls are always bad.  */
+  if (GET_CODE (insn) == CALL_INSN)
+    return 0;
+
+  /* Loads and stores seen here are not permuting, but we can still
+     fix them up by converting them to permuting ones.  Exceptions:
+     UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
+     body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
+     for the SET source.  */
+  rtx body = PATTERN (insn);
+  int i = INSN_UID (insn);
+
+  if (insn_entry[i].is_load)
+    {
+      if (GET_CODE (body) == SET)
+	{
+	  *special = SH_NOSWAP_LD;
+	  return 1;
+	}
+      else
+	return 0;
+    }
+
+  if (insn_entry[i].is_store)
+    {
+      if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
+	{
+	  *special = SH_NOSWAP_ST;
+	  return 1;
+	}
+      else
+	return 0;
+    }
+
+  /* Otherwise check the operands for vector lane violations.  */
+  return rtx_is_swappable_p (body, special);
+}
+
+enum chain_purpose { FOR_LOADS, FOR_STORES };
+
+/* Return true if the UD or DU chain headed by LINK is non-empty,
+   and every entry on the chain references an insn that is a
+   register swap.  Furthermore, if PURPOSE is FOR_LOADS, each such
+   register swap must have only permuting loads as reaching defs.
+   If PURPOSE is FOR_STORES, each such register swap must have only
+   register swaps or permuting stores as reached uses.  */
+static bool
+chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
+			   enum chain_purpose purpose)
+{
+  if (!link)
+    return false;
+
+  for (; link; link = link->next)
+    {
+      if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
+	continue;
+
+      if (DF_REF_IS_ARTIFICIAL (link->ref))
+	return false;
+
+      rtx reached_insn = DF_REF_INSN (link->ref);
+      unsigned uid = INSN_UID (reached_insn);
+
+      if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
+	  || insn_entry[uid].is_store)
+	return false;
+
+      if (purpose == FOR_LOADS)
+	{
+	  df_ref *use_rec;
+	  for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
+	    {
+	      df_ref use = *use_rec;
+	      struct df_link *swap_link = DF_REF_CHAIN (use);
+
+	      while (swap_link)
+		{
+		  if (DF_REF_IS_ARTIFICIAL (link->ref))
+		    return false;
+
+		  rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
+		  unsigned uid2 = INSN_UID (swap_def_insn);
+
+		  /* Only permuting loads are allowed.  */
+		  if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
+		    return false;
+
+		  swap_link = swap_link->next;
+		}
+	    }
+	}
+      else if (purpose == FOR_STORES)
+	{
+	  df_ref *def_rec;
+	  for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+	    {
+	      df_ref def = *def_rec;
+	      struct df_link *swap_link = DF_REF_CHAIN (def);
+
+	      while (swap_link)
+		{
+		  if (DF_REF_IS_ARTIFICIAL (link->ref))
+		    return false;
+
+		  rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
+		  unsigned uid2 = INSN_UID (swap_use_insn);
+
+		  /* Permuting stores or register swaps are allowed.  */
+		  if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
+		    return false;
+
+		  swap_link = swap_link->next;
+		}
+	    }
+	}
+    }
+
+  return true;
+}
+
+/* Mark the xxswapdi instructions associated with permuting loads and
+   stores for removal.  Note that we only flag them for deletion here,
+   as there is a possibility of a swap being reached from multiple
+   loads, etc.  */
+static void
+mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
+{
+  rtx insn = insn_entry[i].insn;
+  unsigned uid = INSN_UID (insn);
+
+  if (insn_entry[i].is_load)
+    {
+      df_ref *def_rec;
+      for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+	{
+	  df_ref def = *def_rec;
+	  struct df_link *link = DF_REF_CHAIN (def);
+
+	  /* We know by now that these are swaps, so we can delete
+	     them confidently.  */
+	  while (link)
+	    {
+	      rtx use_insn = DF_REF_INSN (link->ref);
+	      insn_entry[INSN_UID (use_insn)].will_delete = 1;
+	      link = link->next;
+	    }
+	}
+    }
+  else if (insn_entry[i].is_store)
+    {
+      df_ref *use_rec;
+      for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
+	{
+	  df_ref use = *use_rec;
+	  /* Ignore uses for addressability.  */
+	  machine_mode mode = GET_MODE (DF_REF_REG (use));
+	  if (!VECTOR_MODE_P (mode))
+	    continue;
+
+	  struct df_link *link = DF_REF_CHAIN (use);
+
+	  /* We know by now that these are swaps, so we can delete
+	     them confidently.  */
+	  while (link)
+	    {
+	      rtx def_insn = DF_REF_INSN (link->ref);
+	      insn_entry[INSN_UID (def_insn)].will_delete = 1;
+	      link = link->next;
+	    }
+	}
+    }
+}
+
+/* OP is either a CONST_VECTOR or an expression containing one.
+   Swap the first half of the vector with the second in the first
+   case.  Recurse to find it in the second.  */
+static void
+swap_const_vector_halves (rtx op)
+{
+  int i;
+  enum rtx_code code = GET_CODE (op);
+  if (GET_CODE (op) == CONST_VECTOR)
+    {
+      int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
+      for (i = 0; i < half_units; ++i)
+	{
+	  rtx temp = CONST_VECTOR_ELT (op, i);
+	  CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
+	  CONST_VECTOR_ELT (op, i + half_units) = temp;
+	}
+    }
+  else
+    {
+      int j;
+      const char *fmt = GET_RTX_FORMAT (code);
+      for (i = 0; i < GET_RTX_LENGTH (code); ++i)
+	if (fmt[i] == 'e' || fmt[i] == 'u')
+	  swap_const_vector_halves (XEXP (op, i));
+	else if (fmt[i] == 'E')
+	  for (j = 0; j < XVECLEN (op, i); ++j)
+	    swap_const_vector_halves (XVECEXP (op, i, j));
+    }
+}
+
+/* Find all subregs of a vector expression that perform a narrowing,
+   and adjust the subreg index to account for doubleword swapping.  */
+static void
+adjust_subreg_index (rtx op)
+{
+  enum rtx_code code = GET_CODE (op);
+  if (code == SUBREG
+      && (GET_MODE_SIZE (GET_MODE (op))
+	  < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
+    {
+      unsigned int index = SUBREG_BYTE (op);
+      if (index < 8)
+	index += 8;
+      else
+	index -= 8;
+      SUBREG_BYTE (op) = index;
+    }
+
+  const char *fmt = GET_RTX_FORMAT (code);
+  int i,j;
+  for (i = 0; i < GET_RTX_LENGTH (code); ++i)
+    if (fmt[i] == 'e' || fmt[i] == 'u')
+      adjust_subreg_index (XEXP (op, i));
+    else if (fmt[i] == 'E')
+      for (j = 0; j < XVECLEN (op, i); ++j)
+	adjust_subreg_index (XVECEXP (op, i, j));
+}
+
+/* Convert the non-permuting load INSN to a permuting one.  */
+static void
+permute_load (rtx insn)
+{
+  rtx body = PATTERN (insn);
+  rtx mem_op = SET_SRC (body);
+  rtx tgt_reg = SET_DEST (body);
+  machine_mode mode = GET_MODE (tgt_reg);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int half_elts = n_elts / 2;
+  rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
+  int i, j;
+  for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
+    XVECEXP (par, 0, i) = GEN_INT (j);
+  for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
+    XVECEXP (par, 0, i) = GEN_INT (j);
+  rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
+  SET_SRC (body) = sel;
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Replacing load %d with permuted load\n",
+	     INSN_UID (insn));
+}
+
+/* Convert the non-permuting store INSN to a permuting one.  */
+static void
+permute_store (rtx insn)
+{
+  rtx body = PATTERN (insn);
+  rtx src_reg = SET_SRC (body);
+  machine_mode mode = GET_MODE (src_reg);
+  int n_elts = GET_MODE_NUNITS (mode);
+  int half_elts = n_elts / 2;
+  rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
+  int i, j;
+  for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
+    XVECEXP (par, 0, i) = GEN_INT (j);
+  for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
+    XVECEXP (par, 0, i) = GEN_INT (j);
+  rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
+  SET_SRC (body) = sel;
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Replacing store %d with permuted store\n",
+	     INSN_UID (insn));
+}
+
+/* Given OP that contains a vector extract operation, adjust the index
+   of the extracted lane to account for the doubleword swap.  */
+static void
+adjust_extract (rtx insn)
+{
+  rtx src = SET_SRC (PATTERN (insn));
+  /* The vec_select may be wrapped in a vec_duplicate for a splat, so
+     account for that.  */
+  rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
+  rtx par = XEXP (sel, 1);
+  int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
+  int lane = INTVAL (XVECEXP (par, 0, 0));
+  lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
+  XVECEXP (par, 0, 0) = GEN_INT (lane);
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
+}
+
+/* Given OP that contains a vector direct-splat operation, adjust the index
+   of the source lane to account for the doubleword swap.  */
+static void
+adjust_splat (rtx insn)
+{
+  rtx body = PATTERN (insn);
+  rtx unspec = XEXP (body, 1);
+  int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
+  int lane = INTVAL (XVECEXP (unspec, 0, 1));
+  lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
+  XVECEXP (unspec, 0, 1) = GEN_INT (lane);
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
+}
+
+/* The insn described by INSN_ENTRY[I] can be swapped, but only
+   with special handling.  Take care of that here.  */
+static void
+handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
+{
+  rtx insn = insn_entry[i].insn;
+  rtx body = PATTERN (insn);
+
+  switch (insn_entry[i].special_handling)
+    {
+    default:
+      gcc_unreachable ();
+    case SH_CONST_VECTOR:
+      {
+	/* A CONST_VECTOR will only show up somewhere in the RHS of a SET.  */
+	gcc_assert (GET_CODE (body) == SET);
+	rtx rhs = SET_SRC (body);
+	swap_const_vector_halves (rhs);
+	if (dump_file)
+	  fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
+	break;
+      }
+    case SH_SUBREG:
+      /* A subreg of the same size is already safe.  For subregs that
+	 select a smaller portion of a reg, adjust the index for
+	 swapped doublewords.  */
+      adjust_subreg_index (body);
+      if (dump_file)
+	fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
+      break;
+    case SH_NOSWAP_LD:
+      /* Convert a non-permuting load to a permuting one.  */
+      permute_load (insn);
+      break;
+    case SH_NOSWAP_ST:
+      /* Convert a non-permuting store to a permuting one.  */
+      permute_store (insn);
+      break;
+    case SH_EXTRACT:
+      /* Change the lane on an extract operation.  */
+      adjust_extract (insn);
+      break;
+    case SH_SPLAT:
+      /* Change the lane on a direct-splat operation.  */
+      adjust_splat (insn);
+      break;
+    }
+}
+
+/* Find the insn from the Ith table entry, which is known to be a
+   register swap Y = SWAP(X).  Replace it with a copy Y = X.  */
+static void
+replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
+{
+  rtx insn = insn_entry[i].insn;
+  rtx body = PATTERN (insn);
+  rtx src_reg = XEXP (SET_SRC (body), 0);
+  rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
+  rtx new_insn = emit_insn_before (copy, insn);
+  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
+  df_insn_rescan (new_insn);
+
+  if (dump_file)
+    {
+      unsigned int new_uid = INSN_UID (new_insn);
+      fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
+    }
+
+  df_insn_delete (insn);
+  remove_insn (insn);
+  INSN_DELETED_P (insn) = 1;
+}
+
+/* Dump the swap table to DUMP_FILE.  */
+static void
+dump_swap_insn_table (swap_web_entry *insn_entry)
+{
+  int e = get_max_uid ();
+  fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
+
+  for (int i = 0; i < e; ++i)
+    if (insn_entry[i].is_relevant)
+      {
+	swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
+	fprintf (dump_file, "%6d %6d  ", i,
+		 pred_entry && pred_entry->insn
+		 ? INSN_UID (pred_entry->insn) : 0);
+	if (insn_entry[i].is_load)
+	  fputs ("load ", dump_file);
+	if (insn_entry[i].is_store)
+	  fputs ("store ", dump_file);
+	if (insn_entry[i].is_swap)
+	  fputs ("swap ", dump_file);
+	if (insn_entry[i].is_live_in)
+	  fputs ("live-in ", dump_file);
+	if (insn_entry[i].is_live_out)
+	  fputs ("live-out ", dump_file);
+	if (insn_entry[i].contains_subreg)
+	  fputs ("subreg ", dump_file);
+	if (insn_entry[i].is_128_int)
+	  fputs ("int128 ", dump_file);
+	if (insn_entry[i].is_call)
+	  fputs ("call ", dump_file);
+	if (insn_entry[i].is_swappable)
+	  {
+	    fputs ("swappable ", dump_file);
+	    if (insn_entry[i].special_handling == SH_CONST_VECTOR)
+	      fputs ("special:constvec ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_SUBREG)
+	      fputs ("special:subreg ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
+	      fputs ("special:load ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
+	      fputs ("special:store ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_EXTRACT)
+	      fputs ("special:extract ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_SPLAT)
+	      fputs ("special:splat ", dump_file);
+	  }
+	if (insn_entry[i].web_not_optimizable)
+	  fputs ("unoptimizable ", dump_file);
+	if (insn_entry[i].will_delete)
+	  fputs ("delete ", dump_file);
+	fputs ("\n", dump_file);
+      }
+  fputs ("\n", dump_file);
+}
+
+/* Main entry point for this pass.  */
+unsigned int
+rs6000_analyze_swaps (function *fun)
+{
+  swap_web_entry *insn_entry;
+  basic_block bb;
+  rtx insn;
+
+  /* Dataflow analysis for use-def chains.  */
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  /* Allocate structure to represent webs of insns.  */
+  insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
+
+  /* Walk the insns to gather basic data.  */
+  FOR_ALL_BB_FN (bb, fun)
+    FOR_BB_INSNS (bb, insn)
+    {
+      unsigned int uid = INSN_UID (insn);
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  insn_entry[uid].insn = insn;
+
+	  if (GET_CODE (insn) == CALL_INSN)
+	    insn_entry[uid].is_call = 1;
+
+	  /* Walk the uses and defs to see if we mention vector regs.
+	     Record any constraints on optimization of such mentions.  */
+	  df_ref *use_rec;
+	  for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
+	    {
+	      df_ref mention = *use_rec;
+	      /* We use DF_REF_REAL_REG here to get inside any subregs.  */
+	      machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
+
+	      /* If a use gets its value from a call insn, it will be
+		 a hard register and will look like (reg:V4SI 3 3).
+		 The df analysis creates two mentions for GPR3 and GPR4,
+		 both DImode.  We must recognize this and treat it as a
+		 vector mention to ensure the call is unioned with this
+		 use.  */
+	      if (mode == DImode && DF_REF_INSN_INFO (mention))
+		{
+		  rtx feeder = DF_REF_INSN (mention);
+		  /* FIXME:  It is pretty hard to get from the df mention
+		     to the mode of the use in the insn.  We arbitrarily
+		     pick a vector mode here, even though the use might
+		     be a real DImode.  We can be too conservative
+		     (create a web larger than necessary) because of
+		     this, so consider eventually fixing this.  */
+		  if (GET_CODE (feeder) == CALL_INSN)
+		    mode = V4SImode;
+		}
+
+	      if (VECTOR_MODE_P (mode) || mode == TImode)
+		{
+		  insn_entry[uid].is_relevant = 1;
+		  if (mode == TImode || mode == V1TImode)
+		    insn_entry[uid].is_128_int = 1;
+		  if (DF_REF_INSN_INFO (mention))
+		    insn_entry[uid].contains_subreg
+		      = !rtx_equal_p (DF_REF_REG (mention),
+				      DF_REF_REAL_REG (mention));
+		  union_defs (insn_entry, insn, mention);
+		}
+	    }
+	  df_ref *def_rec;
+	  for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+	    {
+	      df_ref mention = *def_rec;
+	      /* We use DF_REF_REAL_REG here to get inside any subregs.  */
+	      machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
+
+	      /* If we're loading up a hard vector register for a call,
+		 it looks like (set (reg:V4SI 9 9) (...)).  The df
+		 analysis creates two mentions for GPR9 and GPR10, both
+		 DImode.  So relying on the mode from the mentions
+		 isn't sufficient to ensure we union the call into the
+		 web with the parameter setup code.  */
+	      if (mode == DImode && GET_CODE (insn) == SET
+		  && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
+		mode = GET_MODE (SET_DEST (insn));
+
+	      if (VECTOR_MODE_P (mode) || mode == TImode)
+		{
+		  insn_entry[uid].is_relevant = 1;
+		  if (mode == TImode || mode == V1TImode)
+		    insn_entry[uid].is_128_int = 1;
+		  if (DF_REF_INSN_INFO (mention))
+		    insn_entry[uid].contains_subreg
+		      = !rtx_equal_p (DF_REF_REG (mention),
+				      DF_REF_REAL_REG (mention));
+		  /* REG_FUNCTION_VALUE_P is not valid for subregs. */
+		  else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
+		    insn_entry[uid].is_live_out = 1;
+		  union_uses (insn_entry, insn, mention);
+		}
+	    }
+
+	  if (insn_entry[uid].is_relevant)
+	    {
+	      /* Determine if this is a load or store.  */
+	      insn_entry[uid].is_load = insn_is_load_p (insn);
+	      insn_entry[uid].is_store = insn_is_store_p (insn);
+
+	      /* Determine if this is a doubleword swap.  If not,
+		 determine whether it can legally be swapped.  */
+	      if (insn_is_swap_p (insn))
+		insn_entry[uid].is_swap = 1;
+	      else
+		{
+		  unsigned int special = SH_NONE;
+		  insn_entry[uid].is_swappable
+		    = insn_is_swappable_p (insn_entry, insn, &special);
+		  if (special != SH_NONE && insn_entry[uid].contains_subreg)
+		    insn_entry[uid].is_swappable = 0;
+		  else if (special != SH_NONE)
+		    insn_entry[uid].special_handling = special;
+		  else if (insn_entry[uid].contains_subreg)
+		    insn_entry[uid].special_handling = SH_SUBREG;
+		}
+	    }
+	}
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nSwap insn entry table when first built\n");
+      dump_swap_insn_table (insn_entry);
+    }
+
+  /* Record unoptimizable webs.  */
+  unsigned e = get_max_uid (), i;
+  for (i = 0; i < e; ++i)
+    {
+      if (!insn_entry[i].is_relevant)
+	continue;
+
+      swap_web_entry *root
+	= (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
+      unsigned uid = INSN_UID (insn_entry[i].insn);
+
+      if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
+	  || (insn_entry[i].contains_subreg
+	      && insn_entry[i].special_handling != SH_SUBREG)
+	  || insn_entry[i].is_128_int || insn_entry[i].is_call
+	  || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
+	root->web_not_optimizable = 1;
+
+      /* If we have loads or stores that aren't permuting then the
+	 optimization isn't appropriate.  */
+      else if ((insn_entry[i].is_load || insn_entry[i].is_store)
+	  && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
+	root->web_not_optimizable = 1;
+
+      /* If we have permuting loads or stores that are not accompanied
+	 by a register swap, the optimization isn't appropriate.  */
+      else if (insn_entry[i].is_load && insn_entry[i].is_swap)
+	{
+	  df_ref *def_rec;
+
+	  for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+	    {
+	      df_ref def = *def_rec;
+	      struct df_link *link = DF_REF_CHAIN (def);
+
+	      if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
+		{
+		  root->web_not_optimizable = 1;
+		  break;
+		}
+	    }
+	}
+      else if (insn_entry[i].is_store && insn_entry[i].is_swap)
+	{
+	  df_ref *use_rec;
+
+	  for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
+	    {
+	      df_ref use = *use_rec;
+	      struct df_link *link = DF_REF_CHAIN (use);
+
+	      if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
+		{
+		  root->web_not_optimizable = 1;
+		  break;
+		}
+	    }
+	}
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
+      dump_swap_insn_table (insn_entry);
+    }
+
+  /* For each load and store in an optimizable web (which implies
+     the loads and stores are permuting), find the associated
+     register swaps and mark them for removal.  Due to various
+     optimizations we may mark the same swap more than once.  Also
+     perform special handling for swappable insns that require it.  */
+  for (i = 0; i < e; ++i)
+    if ((insn_entry[i].is_load || insn_entry[i].is_store)
+	&& insn_entry[i].is_swap)
+      {
+	swap_web_entry* root_entry
+	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
+	if (!root_entry->web_not_optimizable)
+	  mark_swaps_for_removal (insn_entry, i);
+      }
+    else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
+      {
+	swap_web_entry* root_entry
+	  = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
+	if (!root_entry->web_not_optimizable)
+	  handle_special_swappables (insn_entry, i);
+      }
+
+  /* Now delete the swaps marked for removal.  */
+  for (i = 0; i < e; ++i)
+    if (insn_entry[i].will_delete)
+      replace_swap_with_copy (insn_entry, i);
+
+  /* Clean up.  */
+  free (insn_entry);
+  return 0;
+}
+
+const pass_data pass_data_analyze_swaps =
+{
+  RTL_PASS, /* type */
+  "swaps", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  true, /* has_gate */
+  true, /* has_execute */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_analyze_swaps : public rtl_opt_pass
+{
+public:
+  pass_analyze_swaps(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate ()
+    {
+      return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
+	      && rs6000_optimize_swaps);
+    }
+
+  unsigned int execute ()
+    {
+      return rs6000_analyze_swaps (cfun);
+    }
+
+}; // class pass_analyze_swaps
+
+rtl_opt_pass *
+make_pass_analyze_swaps (gcc::context *ctxt)
+{
+  return new pass_analyze_swaps (ctxt);
+}
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.md b/gcc-4.9/gcc/config/rs6000/rs6000.md
index d078491e1..f77754aa1 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.md
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.md
@@ -137,6 +137,7 @@
    UNSPEC_UNPACK_128BIT
    UNSPEC_PACK_128BIT
    UNSPEC_LSQ
+   UNSPEC_FUSION_GPR
   ])
 
 ;;
@@ -328,8 +329,25 @@
 (define_mode_attr f32_sv [(SF "stxsspx %x1,%y0")  (SD "stxsiwzx %x1,%y0")])
 
 ; Definitions for 32-bit fpr direct move
+; At present, the decimal modes are not allowed in the traditional altivec
+; registers, so restrict the constraints to just the traditional FPRs.
 (define_mode_attr f32_dm [(SF "wn") (SD "wh")])
 
+; Definitions for 32-bit VSX
+(define_mode_attr f32_vsx [(SF "ww") (SD "wn")])
+
+; Definitions for 32-bit use of altivec registers
+(define_mode_attr f32_av  [(SF "wu") (SD "wn")])
+
+; Definitions for 64-bit VSX
+(define_mode_attr f64_vsx [(DF "ws") (DD "wn")])
+
+; Definitions for 64-bit direct move
+(define_mode_attr f64_dm  [(DF "wk") (DD "wh")])
+
+; Definitions for 64-bit use of altivec registers
+(define_mode_attr f64_av  [(DF "wv") (DD "wn")])
+
 ; These modes do not fit in integer registers in 32-bit mode.
 ; but on e500v2, the gpr are 64 bit registers
 (define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
@@ -435,7 +453,7 @@
 ;; either.
 
 ;; Mode attribute for boolean operation register constraints for output
-(define_mode_attr BOOL_REGS_OUTPUT	[(TI	"&r,r,r,wa,v")
+(define_mode_attr BOOL_REGS_OUTPUT	[(TI	"&r,r,r,wt,v")
 					 (PTI	"&r,r,r")
 					 (V16QI	"wa,v,&?r,?r,?r")
 					 (V8HI	"wa,v,&?r,?r,?r")
@@ -446,7 +464,7 @@
 					 (V1TI	"wa,v,&?r,?r,?r")])
 
 ;; Mode attribute for boolean operation register constraints for operand1
-(define_mode_attr BOOL_REGS_OP1		[(TI	"r,0,r,wa,v")
+(define_mode_attr BOOL_REGS_OP1		[(TI	"r,0,r,wt,v")
 					 (PTI	"r,0,r")
 					 (V16QI	"wa,v,r,0,r")
 					 (V8HI	"wa,v,r,0,r")
@@ -457,7 +475,7 @@
 					 (V1TI	"wa,v,r,0,r")])
 
 ;; Mode attribute for boolean operation register constraints for operand2
-(define_mode_attr BOOL_REGS_OP2		[(TI	"r,r,0,wa,v")
+(define_mode_attr BOOL_REGS_OP2		[(TI	"r,r,0,wt,v")
 					 (PTI	"r,r,0")
 					 (V16QI	"wa,v,r,r,0")
 					 (V8HI	"wa,v,r,r,0")
@@ -470,7 +488,7 @@
 ;; Mode attribute for boolean operation register constraints for operand1
 ;; for one_cmpl.  To simplify things, we repeat the constraint where 0
 ;; is used for operand1 or operand2
-(define_mode_attr BOOL_REGS_UNARY	[(TI	"r,0,0,wa,v")
+(define_mode_attr BOOL_REGS_UNARY	[(TI	"r,0,0,wt,v")
 					 (PTI	"r,0,0")
 					 (V16QI	"wa,v,r,0,0")
 					 (V8HI	"wa,v,r,0,0")
@@ -8582,8 +8600,8 @@
   [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
 	(match_operator:BOOL_128 3 "boolean_operator"
 	 [(not:BOOL_128
-	   (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP1>"))
-	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
+	   (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))
+	  (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>")]))]
   "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
 {
   if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
@@ -8598,7 +8616,7 @@
    && reload_completed && int_reg_operand (operands[0], <MODE>mode)"
   [(const_int 0)]
 {
-  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, true,
 			NULL_RTX);
   DONE;
 }
@@ -8620,14 +8638,14 @@
   [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
 	(match_operator:TI2 3 "boolean_operator"
 	 [(not:TI2
-	   (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
-	  (match_operand:TI2 2 "int_reg_operand" "r,r,0")]))]
+	   (match_operand:TI2 2 "int_reg_operand" "r,0,r"))
+	  (match_operand:TI2 1 "int_reg_operand" "r,r,0")]))]
   "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
   "#"
   "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
   [(const_int 0)]
 {
-  rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+  rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, true,
 			NULL_RTX);
   DONE;
 }
@@ -9188,8 +9206,8 @@
 }")
 
 (define_insn "mov<mode>_hardfloat"
-  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wu,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
-	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wu,r,<f32_dm>,r,h,0,G,Fn"))]
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,<f32_vsx>,<f32_vsx>,<f32_lr>,<f32_sm>,<f32_av>,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,<f32_vsx>,j,<f32_lm>,<f32_sr>,Z,<f32_av>,r,<f32_dm>,r, h, 0, G,Fn"))]
   "(gpc_reg_operand (operands[0], <MODE>mode)
    || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -9390,8 +9408,8 @@
 ;; reloading.
 
 (define_insn "*mov<mode>_hardfloat32"
-  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r")
-	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))]
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_av>,Z,<f64_vsx>,<f64_vsx>,Y,r,!r,!r,!r,!r")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,<f64_av>,<f64_vsx>,j,r,Y,r,G,H,F"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT 
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -9459,8 +9477,8 @@
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
 (define_insn "*mov<mode>_hardfloat64"
-  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wk")
-	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wk,r"))]
+  [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_av>,Z,<f64_vsx>,<f64_vsx>,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,<f64_dm>")
+	(match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,<f64_av>,<f64_vsx>,j,r,Y,r,r,h,0,G,H,F,wg,r,<f64_dm>,r"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -15714,22 +15732,9 @@
 ;; a GPR.  The addis instruction must be adjacent to the load, and use the same
 ;; register that is being loaded.  The fused ops must be physically adjacent.
 
-;; We use define_peephole for the actual addis/load, and the register used to
-;; hold the addis value must be the same as the register being loaded.  We use
-;; define_peephole2 to change the register used for addis to be the register
-;; being loaded, since we can look at whether it is dead after the load insn.
-
-(define_peephole
-  [(set (match_operand:P 0 "base_reg_operand" "")
-	(match_operand:P 1 "fusion_gpr_addis" ""))
-   (set (match_operand:INT1 2 "base_reg_operand" "")
-	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
-  "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
-{
-  return emit_fusion_gpr_load (operands);
-}
-  [(set_attr "type" "load")
-   (set_attr "length" "8")])
+;; Find cases where the addis that feeds into a load instruction is either used
+;; once or is the same as the target register, and replace it with the fusion
+;; insn
 
 (define_peephole2
   [(set (match_operand:P 0 "base_reg_operand" "")
@@ -15737,15 +15742,28 @@
    (set (match_operand:INT1 2 "base_reg_operand" "")
 	(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
   "TARGET_P8_FUSION
-   && (REGNO (operands[0]) != REGNO (operands[2])
-       || GET_CODE (operands[3]) == SIGN_EXTEND)
-   && fusion_gpr_load_p (operands, true)"
+   && fusion_gpr_load_p (operands[0], operands[1], operands[2],
+			 operands[3])"
   [(const_int 0)]
 {
   expand_fusion_gpr_load (operands);
   DONE;
 })
 
+;; Fusion insn, created by the define_peephole2 above (and eventually by
+;; reload)
+
+(define_insn "fusion_gpr_load_<mode>"
+  [(set (match_operand:INT1 0 "base_reg_operand" "=&b")
+	(unspec:INT1 [(match_operand:INT1 1 "fusion_gpr_mem_combo" "")]
+		     UNSPEC_FUSION_GPR))]
+  "TARGET_P8_FUSION"
+{
+  return emit_fusion_gpr_load (operands[0], operands[1]);
+}
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
 
 ;; Miscellaneous ISA 2.06 (power7) instructions
 (define_insn "addg6s"
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.opt b/gcc-4.9/gcc/config/rs6000/rs6000.opt
index 4c1a02a52..4d0d5e73d 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.opt
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.opt
@@ -588,3 +588,7 @@ Allow double variables in upper registers with -mcpu=power7 or -mvsx
 mupper-regs-sf
 Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
 Allow float variables in upper registers with -mcpu=power8 or -mp8-vector
+
+moptimize-swaps
+Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
+Analyze and remove doubleword swaps from VSX computations.
diff --git a/gcc-4.9/gcc/config/rs6000/rtems.h b/gcc-4.9/gcc/config/rs6000/rtems.h
index 2402d5336..046488034 100644
--- a/gcc-4.9/gcc/config/rs6000/rtems.h
+++ b/gcc-4.9/gcc/config/rs6000/rtems.h
@@ -52,7 +52,8 @@
 %{mcpu=750:  %{!Dppc*: %{!Dmpc*: -Dmpc750}  } } \
 %{mcpu=821:  %{!Dppc*: %{!Dmpc*: -Dmpc821}  } } \
 %{mcpu=860:  %{!Dppc*: %{!Dmpc*: -Dmpc860}  } } \
-%{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540}  } }" 
+%{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540}  } } \
+%{mcpu=e6500: -D__PPC_CPU_E6500__}"
 
 #undef  SUBSUBTARGET_EXTRA_SPECS
 #define SUBSUBTARGET_EXTRA_SPECS \
diff --git a/gcc-4.9/gcc/config/rs6000/sysv4.h b/gcc-4.9/gcc/config/rs6000/sysv4.h
index 7cc543319..9d456ddec 100644
--- a/gcc-4.9/gcc/config/rs6000/sysv4.h
+++ b/gcc-4.9/gcc/config/rs6000/sysv4.h
@@ -846,11 +846,6 @@ ncrtn.o%s"
 #define CPP_OS_OPENBSD_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
 #endif
 
-/* These may be provided by rs6000/linux-grtev2.h.  */
-#ifndef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS
-#endif
-
 /* Define any extra SPECS that the compiler needs to generate.  */
 /* Override rs6000.h definition.  */
 #undef	SUBTARGET_EXTRA_SPECS
@@ -916,7 +911,6 @@ ncrtn.o%s"
   { "cpp_os_openbsd",		CPP_OS_OPENBSD_SPEC },			\
   { "cpp_os_default",		CPP_OS_DEFAULT_SPEC },			\
   { "fbsd_dynamic_linker",	FBSD_DYNAMIC_LINKER },			\
-  LINUX_GRTE_EXTRA_SPECS						\
   SUBSUBTARGET_EXTRA_SPECS
 
 #define	SUBSUBTARGET_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/rs6000/t-rtems b/gcc-4.9/gcc/config/rs6000/t-rtems
index 426f75ac5..eadda0d20 100644
--- a/gcc-4.9/gcc/config/rs6000/t-rtems
+++ b/gcc-4.9/gcc/config/rs6000/t-rtems
@@ -18,16 +18,24 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-MULTILIB_OPTIONS	= \
-mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540 \
-msoft-float/mfloat-gprs=double
+MULTILIB_OPTIONS =
+MULTILIB_DIRNAMES =
+MULTILIB_MATCHES =
+MULTILIB_EXCEPTIONS =
+MULTILIB_REQUIRED =
+
+MULTILIB_OPTIONS += mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540/mcpu=e6500
+MULTILIB_DIRNAMES += m403 m505 m603e m604 m860 m7400 m8540 me6500
+
+MULTILIB_OPTIONS += m32
+MULTILIB_DIRNAMES += m32
 
-MULTILIB_DIRNAMES	= \
-m403 m505 m603e m604 m860 m7400 m8540 \
-nof gprsdouble
+MULTILIB_OPTIONS += msoft-float/mfloat-gprs=double
+MULTILIB_DIRNAMES += nof gprsdouble
+
+MULTILIB_OPTIONS += mno-spe/mno-altivec
+MULTILIB_DIRNAMES += nospe noaltivec
 
-# MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
-MULTILIB_MATCHES	=
 MULTILIB_MATCHES  	+= ${MULTILIB_MATCHES_ENDIAN}
 MULTILIB_MATCHES	+= ${MULTILIB_MATCHES_SYSV}
 # Map 405 to 403
@@ -52,37 +60,20 @@ MULTILIB_MATCHES	+= mcpu?8540=mcpu?8548
 # (mfloat-gprs=single is implicit default)
 MULTILIB_MATCHES	+= mcpu?8540=mcpu?8540/mfloat-gprs?single
 
-# Soft-float only, default implies msoft-float
-# NOTE: Must match with MULTILIB_MATCHES_FLOAT and MULTILIB_MATCHES
-MULTILIB_SOFTFLOAT_ONLY = \
-*mcpu=401/*msoft-float* \
-*mcpu=403/*msoft-float* \
-*mcpu=405/*msoft-float* \
-*mcpu=801/*msoft-float* \
-*mcpu=821/*msoft-float* \
-*mcpu=823/*msoft-float* \
-*mcpu=860/*msoft-float*
-
-# Hard-float only, take out msoft-float
-MULTILIB_HARDFLOAT_ONLY = \
-*mcpu=505/*msoft-float*
-
-# Targets which do not support gprs
-MULTILIB_NOGPRS = \
-mfloat-gprs=* \
-*mcpu=403/*mfloat-gprs=* \
-*mcpu=505/*mfloat-gprs=* \
-*mcpu=603e/*mfloat-gprs=* \
-*mcpu=604/*mfloat-gprs=* \
-*mcpu=860/*mfloat-gprs=* \
-*mcpu=7400/*mfloat-gprs=*
-
-MULTILIB_EXCEPTIONS =
-
-# Disallow -Dppc and -Dmpc without other options
-MULTILIB_EXCEPTIONS 	+= Dppc* Dmpc*
+# Enumeration of multilibs
 
-MULTILIB_EXCEPTIONS	+= \
-${MULTILIB_SOFTFLOAT_ONLY} \
-${MULTILIB_HARDFLOAT_ONLY} \
-${MULTILIB_NOGPRS}
+MULTILIB_REQUIRED += msoft-float
+MULTILIB_REQUIRED += mcpu=403
+MULTILIB_REQUIRED += mcpu=505
+MULTILIB_REQUIRED += mcpu=603e
+MULTILIB_REQUIRED += mcpu=603e/msoft-float
+MULTILIB_REQUIRED += mcpu=604
+MULTILIB_REQUIRED += mcpu=604/msoft-float
+MULTILIB_REQUIRED += mcpu=7400
+MULTILIB_REQUIRED += mcpu=7400/msoft-float
+MULTILIB_REQUIRED += mcpu=8540
+MULTILIB_REQUIRED += mcpu=8540/msoft-float/mno-spe
+MULTILIB_REQUIRED += mcpu=8540/mfloat-gprs=double
+MULTILIB_REQUIRED += mcpu=860
+MULTILIB_REQUIRED += mcpu=e6500/m32
+MULTILIB_REQUIRED += mcpu=e6500/m32/msoft-float/mno-altivec
diff --git a/gcc-4.9/gcc/config/rs6000/vsx.md b/gcc-4.9/gcc/config/rs6000/vsx.md
index 2cf5e7a94..9aaf06428 100644
--- a/gcc-4.9/gcc/config/rs6000/vsx.md
+++ b/gcc-4.9/gcc/config/rs6000/vsx.md
@@ -260,6 +260,14 @@
    UNSPEC_VSX_ROUND_IC
    UNSPEC_VSX_SLDWI
    UNSPEC_VSX_XXSPLTW
+   UNSPEC_VSX_XXSPLTD
+   UNSPEC_VSX_DIVSD
+   UNSPEC_VSX_DIVUD
+   UNSPEC_VSX_MULSD
+   UNSPEC_VSX_XVCVSXDDP
+   UNSPEC_VSX_XVCVUXDDP
+   UNSPEC_VSX_XVCVDPSXDS
+   UNSPEC_VSX_XVCVDPUXDS
   ])
 
 ;; VSX moves
@@ -746,6 +754,34 @@
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
+; Emulate vector with scalar for vec_mul in V2DImode
+(define_insn_and_split "vsx_mul_v2di"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
+                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
+                     UNSPEC_VSX_MULSD))]
+  "VECTOR_MEM_VSX_P (V2DImode)"
+  "#"
+  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
+  [(const_int 0)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = gen_reg_rtx (DImode);
+  rtx op4 = gen_reg_rtx (DImode);
+  rtx op5 = gen_reg_rtx (DImode);
+  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
+  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
+  emit_insn (gen_muldi3 (op5, op3, op4));
+  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
+  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
+  emit_insn (gen_muldi3 (op3, op3, op4));
+  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
+}"
+  [(set_attr "type" "vecdouble")])
+
 (define_insn "*vsx_div<mode>3"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
@@ -755,6 +791,61 @@
   [(set_attr "type" "<VStype_div>")
    (set_attr "fp_type" "<VSfptype_div>")])
 
+; Emulate vector with scalar for vec_div in V2DImode
+(define_insn_and_split "vsx_div_v2di"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
+                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
+                     UNSPEC_VSX_DIVSD))]
+  "VECTOR_MEM_VSX_P (V2DImode)"
+  "#"
+  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
+  [(const_int 0)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = gen_reg_rtx (DImode);
+  rtx op4 = gen_reg_rtx (DImode);
+  rtx op5 = gen_reg_rtx (DImode);
+  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
+  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
+  emit_insn (gen_divdi3 (op5, op3, op4));
+  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
+  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
+  emit_insn (gen_divdi3 (op3, op3, op4));
+  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
+}"
+  [(set_attr "type" "vecdiv")])
+
+(define_insn_and_split "vsx_udiv_v2di"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
+                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
+                     UNSPEC_VSX_DIVUD))]
+  "VECTOR_MEM_VSX_P (V2DImode)"
+  "#"
+  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
+  [(const_int 0)]
+  "
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = gen_reg_rtx (DImode);
+  rtx op4 = gen_reg_rtx (DImode);
+  rtx op5 = gen_reg_rtx (DImode);
+  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
+  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
+  emit_insn (gen_udivdi3 (op5, op3, op4));
+  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
+  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
+  emit_insn (gen_udivdi3 (op3, op3, op4));
+  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
+}"
+  [(set_attr "type" "vecdiv")])
+
 ;; *tdiv* instruction returning the FG flag
 (define_expand "vsx_tdiv<mode>3_fg"
   [(set (match_dup 3)
@@ -904,11 +995,11 @@
 ;; multiply.
 
 (define_insn "*vsx_fmav4sf4"
-  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
 	(fma:V4SF
-	  (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v")
-	  (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v")
-	  (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))]
+	  (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
+	  (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
+	  (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
   "VECTOR_UNIT_VSX_P (V4SFmode)"
   "@
    xvmaddasp %x0,%x1,%x2
@@ -919,11 +1010,11 @@
   [(set_attr "type" "vecfloat")])
 
 (define_insn "*vsx_fmav2df4"
-  [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa")
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
 	(fma:V2DF
-	  (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa")
-	  (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0")
-	  (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))]
+	  (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
+	  (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
+	  (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
   "VECTOR_UNIT_VSX_P (V2DFmode)"
   "@
    xvmaddadp %x0,%x1,%x2
@@ -1268,6 +1359,102 @@
   "xscvspdpn %x0,%x1"
   [(set_attr "type" "fp")])
 
+;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
+
+(define_expand "vsx_xvcvsxddp_scale"
+  [(match_operand:V2DF 0 "vsx_register_operand" "")
+   (match_operand:V2DI 1 "vsx_register_operand" "")
+   (match_operand:QI 2 "immediate_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  int scale = INTVAL(operands[2]);
+  emit_insn (gen_vsx_xvcvsxddp (op0, op1));
+  if (scale != 0)
+    rs6000_scale_v2df (op0, op0, -scale);
+  DONE;
+})
+
+(define_insn "vsx_xvcvsxddp"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
+        (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
+                     UNSPEC_VSX_XVCVSXDDP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvsxddp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+(define_expand "vsx_xvcvuxddp_scale"
+  [(match_operand:V2DF 0 "vsx_register_operand" "")
+   (match_operand:V2DI 1 "vsx_register_operand" "")
+   (match_operand:QI 2 "immediate_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  int scale = INTVAL(operands[2]);
+  emit_insn (gen_vsx_xvcvuxddp (op0, op1));
+  if (scale != 0)
+    rs6000_scale_v2df (op0, op0, -scale);
+  DONE;
+})
+
+(define_insn "vsx_xvcvuxddp"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
+        (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
+                     UNSPEC_VSX_XVCVUXDDP))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvuxddp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+(define_expand "vsx_xvcvdpsxds_scale"
+  [(match_operand:V2DI 0 "vsx_register_operand" "")
+   (match_operand:V2DF 1 "vsx_register_operand" "")
+   (match_operand:QI 2 "immediate_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp = gen_reg_rtx (V2DFmode);
+  int scale = INTVAL(operands[2]);
+  if (scale != 0)
+    rs6000_scale_v2df (tmp, op1, scale);
+  emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
+  DONE;
+})
+
+(define_insn "vsx_xvcvdpsxds"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+        (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
+                     UNSPEC_VSX_XVCVDPSXDS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvdpsxds %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
+(define_expand "vsx_xvcvdpuxds_scale"
+  [(match_operand:V2DI 0 "vsx_register_operand" "")
+   (match_operand:V2DF 1 "vsx_register_operand" "")
+   (match_operand:QI 2 "immediate_operand" "")]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp = gen_reg_rtx (V2DFmode);
+  int scale = INTVAL(operands[2]);
+  if (scale != 0)
+    rs6000_scale_v2df (tmp, op1, scale);
+  emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
+  DONE;
+})
+
+(define_insn "vsx_xvcvdpuxds"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+        (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
+                     UNSPEC_VSX_XVCVDPUXDS))]
+  "VECTOR_UNIT_VSX_P (V2DFmode)"
+  "xvcvdpuxds %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
 ;; Convert from 64-bit to 32-bit types
 ;; Note, favor the Altivec registers since the usual use of these instructions
 ;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1359,8 +1546,8 @@
 (define_insn "vsx_concat_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?<VSa>")
 	(vec_concat:VSX_D
-	 (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,<VSa>")
-	 (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,<VSa>")))]
+	 (match_operand:<VS_scalar> 1 "vsx_register_operand" "<VS_64reg>,<VSa>")
+	 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
   if (BYTES_BIG_ENDIAN)
@@ -1647,7 +1834,7 @@
   [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr")
 	(vec_select:<VS_scalar>
 	 (match_operand:VSX_D 1 "memory_operand" "m,Z,m")
-	 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
+	 (parallel [(const_int 0)])))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
   "@
    lfd%U1%X1 %0,%1
@@ -1921,6 +2108,22 @@
   "xxspltw %x0,%x1,%2"
   [(set_attr "type" "vecperm")])
 
+;; V2DF/V2DI splat for use by vec_splat builtin
+(define_insn "vsx_xxspltd_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSX_XXSPLTD))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
+      || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
+    return "xxpermdi %x0,%x1,%x1,0";
+  else
+    return "xxpermdi %x0,%x1,%x1,3";
+}
+  [(set_attr "type" "vecperm")])
+
 ;; V4SF/V4SI interleave
 (define_insn "vsx_xxmrghw_<mode>"
   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
@@ -2041,7 +2244,7 @@
 ;; to the top element of the V2DF array without doing an extract.
 
 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
-  [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa")
+  [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
 	(vec_select:DF
 	 (VEC_reduc:V2DF
 	  (vec_concat:V2DF
diff --git a/gcc-4.9/gcc/config/rs6000/xcoff.h b/gcc-4.9/gcc/config/rs6000/xcoff.h
index f2b7bd07a..10123313f 100644
--- a/gcc-4.9/gcc/config/rs6000/xcoff.h
+++ b/gcc-4.9/gcc/config/rs6000/xcoff.h
@@ -304,14 +304,15 @@
   do { fputs (LOCAL_COMMON_ASM_OP, (FILE));			\
        RS6000_OUTPUT_BASENAME ((FILE), (NAME));			\
        if ((ALIGN) > 32)					\
-	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,%u\n",	\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s%u_,%u\n",	\
 		  (SIZE), xcoff_bss_section_name,			\
+		  floor_log2 ((ALIGN) / BITS_PER_UNIT),			\
 		  floor_log2 ((ALIGN) / BITS_PER_UNIT));		\
        else if ((SIZE) > 4)					\
-	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,3\n",	\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s3_,3\n",	\
 		  (SIZE), xcoff_bss_section_name);		\
        else							\
-	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s\n",	\
+	 fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,2\n",	\
 		  (SIZE), xcoff_bss_section_name);		\
      } while (0)
 #endif
diff --git a/gcc-4.9/gcc/config/rx/rx.c b/gcc-4.9/gcc/config/rx/rx.c
index 4242c1a97..11a825cfa 100644
--- a/gcc-4.9/gcc/config/rx/rx.c
+++ b/gcc-4.9/gcc/config/rx/rx.c
@@ -733,7 +733,7 @@ rx_print_operand (FILE * file, rtx op, int letter)
       break;
 
     case 'R':
-      gcc_assert (GET_MODE_SIZE (GET_MODE (op)) < 4);
+      gcc_assert (GET_MODE_SIZE (GET_MODE (op)) <= 4);
       unsigned_load = true;
       /* Fall through.  */
     case 'Q':
diff --git a/gcc-4.9/gcc/config/rx/rx.h b/gcc-4.9/gcc/config/rx/rx.h
index d99b19ad2..06a0ae850 100644
--- a/gcc-4.9/gcc/config/rx/rx.h
+++ b/gcc-4.9/gcc/config/rx/rx.h
@@ -433,9 +433,9 @@ typedef unsigned int CUMULATIVE_ARGS;
 /* Compute the alignment needed for label X in various situations.
    If the user has specified an alignment then honour that, otherwise
    use rx_align_for_label.  */
-#define JUMP_ALIGN(x)				(align_jumps ? align_jumps : rx_align_for_label (x, 0))
-#define LABEL_ALIGN(x)				(align_labels ? align_labels : rx_align_for_label (x, 3))
-#define LOOP_ALIGN(x)				(align_loops ? align_loops : rx_align_for_label (x, 2))
+#define JUMP_ALIGN(x)				(align_jumps > 1 ? align_jumps_log : rx_align_for_label (x, 0))
+#define LABEL_ALIGN(x)				(align_labels > 1 ? align_labels_log : rx_align_for_label (x, 3))
+#define LOOP_ALIGN(x)				(align_loops > 1 ? align_loops_log : rx_align_for_label (x, 2))
 #define LABEL_ALIGN_AFTER_BARRIER(x)		rx_align_for_label (x, 0)
 
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(STREAM, LOG, MAX_SKIP)	\
diff --git a/gcc-4.9/gcc/config/s390/s390.c b/gcc-4.9/gcc/config/s390/s390.c
index aac8de848..866de858c 100644
--- a/gcc-4.9/gcc/config/s390/s390.c
+++ b/gcc-4.9/gcc/config/s390/s390.c
@@ -9130,11 +9130,14 @@ s390_emit_epilogue (bool sibcall)
       if (! sibcall)
 	{
 	  /* Fetch return address from stack before load multiple,
-	     this will do good for scheduling.  */
-
-	  if (cfun_frame_layout.save_return_addr_p
-	      || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
-		  && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
+	     this will do good for scheduling.
+
+	     Only do this if we already decided that r14 needs to be
+	     saved to a stack slot. (And not just because r14 happens to
+	     be in between two GPRs which need saving.)  Otherwise it
+	     would be difficult to take that decision back in
+	     s390_optimize_prologue.  */
+	  if (cfun_gpr_save_slot (RETURN_REGNUM) == -1)
 	    {
 	      int return_regnum = find_unused_clobbered_reg();
 	      if (!return_regnum)
@@ -9149,6 +9152,12 @@ s390_emit_epilogue (bool sibcall)
 	      addr = gen_rtx_MEM (Pmode, addr);
 	      set_mem_alias_set (addr, get_frame_alias_set ());
 	      emit_move_insn (return_reg, addr);
+
+	      /* Once we did that optimization we have to make sure
+		 s390_optimize_prologue does not try to remove the
+		 store of r14 since we will not be able to find the
+		 load issued here.  */
+	      cfun_frame_layout.save_return_addr_p = true;
 	    }
 	}
 
diff --git a/gcc-4.9/gcc/config/s390/s390.md b/gcc-4.9/gcc/config/s390/s390.md
index b17c1fac8..10d7a5a6d 100644
--- a/gcc-4.9/gcc/config/s390/s390.md
+++ b/gcc-4.9/gcc/config/s390/s390.md
@@ -460,7 +460,7 @@
 ;; This iterator and attribute allow to combine most atomic operations.
 (define_code_iterator ATOMIC [and ior xor plus minus mult])
 (define_code_iterator ATOMIC_Z196 [and ior xor plus])
-(define_code_attr atomic [(and "and") (ior "ior") (xor "xor")
+(define_code_attr atomic [(and "and") (ior "or") (xor "xor")
 			  (plus "add") (minus "sub") (mult "nand")])
 (define_code_attr noxa [(and "n") (ior "o") (xor "x") (plus "a")])
 
diff --git a/gcc-4.9/gcc/config/sh/predicates.md b/gcc-4.9/gcc/config/sh/predicates.md
index 73bb880d6..3af1f8a14 100644
--- a/gcc-4.9/gcc/config/sh/predicates.md
+++ b/gcc-4.9/gcc/config/sh/predicates.md
@@ -398,7 +398,7 @@
 (define_predicate "general_extend_operand"
   (match_code "subreg,reg,mem,truncate")
 {
-  if (GET_CODE (op) == TRUNCATE)
+  if (reload_completed && GET_CODE (op) == TRUNCATE)
     return arith_operand (op, mode);
 
   if (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))))
diff --git a/gcc-4.9/gcc/config/sh/sh-mem.cc b/gcc-4.9/gcc/config/sh/sh-mem.cc
index 45af23acb..e5ef165cf 100644
--- a/gcc-4.9/gcc/config/sh/sh-mem.cc
+++ b/gcc-4.9/gcc/config/sh/sh-mem.cc
@@ -1,5 +1,5 @@
 /* Helper routines for memory move and comparison insns.
-   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Copyright (C) 2013-2015 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -226,7 +226,7 @@ sh_expand_cmpstr (rtx *operands)
   emit_move_insn (tmp3, addr2);
   emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
 
-  /*start long loop.  */
+  /* start long loop.  */
   emit_label (L_loop_long);
 
   emit_move_insn (tmp2, tmp3);
@@ -335,7 +335,7 @@ sh_expand_cmpnstr (rtx *operands)
   rtx len = force_reg (SImode, operands[3]);
   int constp = CONST_INT_P (operands[3]);
 
-  /* Loop on a register count. */
+  /* Loop on a register count.  */
   if (constp)
     {
       rtx tmp0 = gen_reg_rtx (SImode);
@@ -364,7 +364,7 @@ sh_expand_cmpnstr (rtx *operands)
               add_int_reg_note (jump, REG_BR_PROB, prob_likely);
             }
 
-          /* word count. Do we have iterations ? */
+          /* word count. Do we have iterations ?  */
           emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
 
           /*start long loop.  */
@@ -407,6 +407,7 @@ sh_expand_cmpnstr (rtx *operands)
           /* end loop.  Reached max iterations.  */
           if (! sbytes)
             {
+              emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
               jump = emit_jump_insn (gen_jump_compact (L_return));
               emit_barrier_after (jump);
             }
@@ -482,6 +483,13 @@ sh_expand_cmpnstr (rtx *operands)
       jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
       emit_barrier_after (jump);
     }
+  else
+    {
+      emit_insn (gen_cmpeqsi_t (len, const0_rtx));
+      emit_move_insn (operands[0], const0_rtx);
+      jump = emit_jump_insn (gen_branch_true (L_return));
+      add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+    }
 
   addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
   addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
@@ -522,14 +530,14 @@ sh_expand_cmpnstr (rtx *operands)
     emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
   emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
 
-  emit_label (L_return);
-
   emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
 
+  emit_label (L_return);
+
   return true;
 }
 
-/* Emit code to perform a strlen
+/* Emit code to perform a strlen.
 
    OPERANDS[0] is the destination.
    OPERANDS[1] is the string.
@@ -568,7 +576,7 @@ sh_expand_strlen (rtx *operands)
 
   addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
 
-  /*start long loop.  */
+  /* start long loop.  */
   emit_label (L_loop_long);
 
   /* tmp1 is aligned, OK to load.  */
diff --git a/gcc-4.9/gcc/config/sh/sh.c b/gcc-4.9/gcc/config/sh/sh.c
index 3d4553a6e..06798181a 100644
--- a/gcc-4.9/gcc/config/sh/sh.c
+++ b/gcc-4.9/gcc/config/sh/sh.c
@@ -2957,7 +2957,7 @@ enum
 struct ashl_lshr_sequence
 {
   char insn_count;
-  char amount[6];
+  signed char amount[6];
   char clobbers_t;
 };
 
diff --git a/gcc-4.9/gcc/config/sh/sh.md b/gcc-4.9/gcc/config/sh/sh.md
index ab1f0a51c..d957e5540 100644
--- a/gcc-4.9/gcc/config/sh/sh.md
+++ b/gcc-4.9/gcc/config/sh/sh.md
@@ -868,9 +868,9 @@
 
 (define_insn "*cmp_div0s_0"
   [(set (reg:SI T_REG)
-	(eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand")
+	(eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand" "%r")
 			    (const_int 31))
-	       (ge:SI (match_operand:SI 1 "arith_reg_operand")
+	       (ge:SI (match_operand:SI 1 "arith_reg_operand" "r")
 		      (const_int 0))))]
   "TARGET_SH1"
   "div0s	%0,%1"
@@ -4563,6 +4563,12 @@ label:
 {
   if (TARGET_SHMEDIA)
     {
+       if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
       emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
       DONE;
     }
@@ -4803,6 +4809,12 @@ label:
 {
   if (TARGET_SHMEDIA)
     {
+       if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
       emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2]));
       DONE;
     }
@@ -4896,6 +4908,12 @@ label:
 {
   if (TARGET_SHMEDIA)
     {
+      if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
       emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2]));
       DONE;
     }
@@ -4995,6 +5013,12 @@ label:
 {
   if (TARGET_SHMEDIA)
     {
+      if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
       emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2]));
       DONE;
     }
@@ -5069,6 +5093,12 @@ label:
 {
   if (TARGET_SHMEDIA)
     {
+      if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
       emit_insn (gen_lshrsi3_media (operands[0], operands[1], operands[2]));
       DONE;
     }
@@ -5263,6 +5293,12 @@ label:
 {
   if (TARGET_SHMEDIA)
     {
+      if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+	{
+	  operands[2] = GEN_INT (-INTVAL (operands[2]));
+	  emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
       emit_insn (gen_lshrdi3_media (operands[0], operands[1], operands[2]));
       DONE;
     }
@@ -6295,10 +6331,9 @@ label:
 })
 
 (define_expand "extendqihi2"
-  [(set (match_operand:HI 0 "arith_reg_dest" "")
-	(sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "")))]
-  ""
-  "")
+  [(set (match_operand:HI 0 "arith_reg_dest")
+	(sign_extend:HI (match_operand:QI 1 "arith_reg_operand")))]
+  "TARGET_SH1")
 
 (define_insn "*extendqihi2_compact_reg"
   [(set (match_operand:HI 0 "arith_reg_dest" "=r")
diff --git a/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
index 313e5b5f4..3791cc76c 100644
--- a/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
+++ b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
@@ -111,7 +111,7 @@ private:
   // Given a start insn and its basic block, recursively determine all
   // possible ccreg values in all basic block paths that can lead to the
   // start insn.
-  void find_last_ccreg_values (rtx start_insn, basic_block bb,
+  bool find_last_ccreg_values (rtx start_insn, basic_block bb,
 			       std::vector<ccreg_value>& values_out,
 			       std::vector<basic_block>& prev_visited_bb) const;
 
@@ -226,8 +226,8 @@ sh_optimize_sett_clrt::execute (void)
 
 	    ccreg_values.clear ();
 	    visited_bbs.clear ();
-	    find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values,
-				    visited_bbs);
+	    bool ok = find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values,
+					      visited_bbs);
 
 	    log_msg ("number of ccreg values collected: %u\n",
 		     (unsigned int)ccreg_values.size ());
@@ -235,7 +235,7 @@ sh_optimize_sett_clrt::execute (void)
 	    // If all the collected values are equal and are equal to the
 	    // constant value of the setcc insn, the setcc insn can be
 	    // removed.
-	    if (all_ccreg_values_equal (ccreg_values)
+	    if (ok && all_ccreg_values_equal (ccreg_values)
 		&& rtx_equal_p (ccreg_values.front ().value, setcc_val))
 	      {
 		log_msg ("all values are ");
@@ -309,7 +309,7 @@ sh_optimize_sett_clrt
     gcc_unreachable ();
 }
 
-void
+bool
 sh_optimize_sett_clrt
 ::find_last_ccreg_values (rtx start_insn, basic_block bb,
 			  std::vector<ccreg_value>& values_out,
@@ -348,7 +348,7 @@ sh_optimize_sett_clrt
 	  log_msg ("\n");
 
 	  values_out.push_back (v);
-	  return;
+	  return true;
 	}
 
       if (any_condjump_p (i) && onlyjump_p (i) && !prev_visited_bb.empty ())
@@ -372,7 +372,7 @@ sh_optimize_sett_clrt
 	  log_msg ("\n");
 
 	  values_out.push_back (v);
-	  return;
+	  return true;
 	}
     }
 
@@ -393,10 +393,14 @@ sh_optimize_sett_clrt
       for (edge_iterator ei = ei_start (bb->preds); !ei_end_p (ei);
 	   ei_next (&ei))
 	{
+	  if (ei_edge (ei)->flags & EDGE_COMPLEX)
+	    log_return (false, "aborting due to complex edge\n");
+
 	  basic_block pred_bb = ei_edge (ei)->src;
 	  pred_bb_count += 1;
-	  find_last_ccreg_values (BB_END (pred_bb), pred_bb, values_out,
-				  prev_visited_bb);
+	  if (!find_last_ccreg_values (BB_END (pred_bb), pred_bb, values_out,
+				       prev_visited_bb))
+	    return false;
 	}
 
       prev_visited_bb.pop_back ();
@@ -419,6 +423,8 @@ sh_optimize_sett_clrt
 
     values_out.push_back (v);
   }
+
+  return true;
 }
 
 bool
diff --git a/gcc-4.9/gcc/config/sh/sh_treg_combine.cc b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
index e73604022..38e28038d 100644
--- a/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
+++ b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
@@ -78,14 +78,17 @@ Example 1)
 
 In [bb 4] elimination of the comparison would require inversion of the branch
 condition and compensation of other BBs.
-Instead an inverting reg-move can be used:
+Instead the comparison in [bb 3] can be replaced with the comparison in [bb 5]
+by using a reg-reg move.  In [bb 4] a logical not is used to compensate the
+inverted condition.
 
 [bb 3]
 (set (reg:SI 167) (reg:SI 173))
 -> bb 5
 
 [BB 4]
-(set (reg:SI 167) (not:SI (reg:SI 177)))
+(set (reg:SI 147 t) (eq:SI (reg:SI 177) (const_int 0)))
+(set (reg:SI 167) (reg:SI 147 t))
 -> bb 5
 
 [bb 5]
@@ -214,9 +217,9 @@ In order to handle cases such as above the RTL pass does the following:
       and replace the comparisons in the BBs with reg-reg copies to get the
       operands in place (create new pseudo regs).
 
-    - If the cstores differ, try to apply the special case
-        (eq (reg) (const_int 0)) -> inverted = (not (reg)).
-      for the subordinate cstore types and eliminate the dominating ones.
+    - If the cstores differ and the comparison is a test against zero,
+      use reg-reg copies for the dominating cstores and logical not cstores
+      for the subordinate cstores.
 
 - If the comparison types in the BBs are not the same, or the first approach
   doesn't work out for some reason, try to eliminate the comparison before the
@@ -558,7 +561,8 @@ private:
   bool can_extend_ccreg_usage (const bb_entry& e,
 			       const cbranch_trace& trace) const;
 
-  // Create an insn rtx that is a negating reg move (not operation).
+  // Create an insn rtx that performs a logical not (test != 0) on the src_reg
+  // and stores the result in dst_reg.
   rtx make_not_reg_insn (rtx dst_reg, rtx src_reg) const;
 
   // Create an insn rtx that inverts the ccreg.
@@ -892,12 +896,32 @@ sh_treg_combine::can_remove_comparison (const bb_entry& e,
 rtx
 sh_treg_combine::make_not_reg_insn (rtx dst_reg, rtx src_reg) const
 {
-  // This will to go through expanders and may output multiple insns
-  // for multi-word regs.
+  // On SH we can do only SImode and DImode comparisons.
+  if (! (GET_MODE (src_reg) == SImode || GET_MODE (src_reg) == DImode))
+    return NULL;
+
+  // On SH we can store the ccreg into an SImode or DImode reg only.
+  if (! (GET_MODE (dst_reg) == SImode || GET_MODE (dst_reg) == DImode))
+    return NULL;
+
   start_sequence ();
-  expand_simple_unop (GET_MODE (dst_reg), NOT, src_reg, dst_reg, 0);
+
+  emit_insn (gen_rtx_SET (VOIDmode, m_ccreg,
+			  gen_rtx_fmt_ee (EQ, SImode, src_reg, const0_rtx)));
+
+  if (GET_MODE (dst_reg) == SImode)
+    emit_move_insn (dst_reg, m_ccreg);
+  else if (GET_MODE (dst_reg) == DImode)
+    {
+      emit_move_insn (gen_lowpart (SImode, dst_reg), m_ccreg);
+      emit_move_insn (gen_highpart (SImode, dst_reg), const0_rtx);
+    }
+  else
+    gcc_unreachable ();
+
   rtx i = get_insns ();
   end_sequence ();
+
   return i;
 }
 
@@ -1080,7 +1104,12 @@ sh_treg_combine::try_combine_comparisons (cbranch_trace& trace,
   // There is one special case though, where an integer comparison
   //     (eq (reg) (const_int 0))
   // can be inverted with a sequence
-  //     (eq (not (reg)) (const_int 0))
+  //     (set (t) (eq (reg) (const_int 0))
+  //     (set (reg) (t))
+  //     (eq (reg) (const_int 0))
+  //
+  // FIXME: On SH2A it might be better to use the nott insn in this case,
+  // i.e. do the try_eliminate_cstores approach instead.
   if (inv_cstore_count != 0 && cstore_count != 0)
     {
       if (make_not_reg_insn (comp_op0, comp_op0) == NULL_RTX)
diff --git a/gcc-4.9/gcc/config/sh/sync.md b/gcc-4.9/gcc/config/sh/sync.md
index a0a22a1f5..a3acaac56 100644
--- a/gcc-4.9/gcc/config/sh/sync.md
+++ b/gcc-4.9/gcc/config/sh/sync.md
@@ -466,6 +466,7 @@
    (set (mem:SI (match_dup 1))
 	(unspec:SI
 	  [(match_operand:SI 2 "arith_operand" "rI08")] UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))
    (clobber (reg:SI R0_REG))]
   "TARGET_ATOMIC_HARD_LLCS
    || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
@@ -484,6 +485,7 @@
    (set (mem:QIHI (match_dup 1))
 	(unspec:QIHI
 	  [(match_operand:QIHI 2 "register_operand" "r")] UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))
    (clobber (reg:SI R0_REG))
    (clobber (match_scratch:SI 3 "=&r"))
    (clobber (match_scratch:SI 4 "=1"))]
@@ -617,6 +619,7 @@
 	  [(FETCHOP:SI (mem:SI (match_dup 1))
 	     (match_operand:SI 2 "<fetchop_predicate>" "<fetchop_constraint>"))]
 	  UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))
    (clobber (reg:SI R0_REG))]
   "TARGET_ATOMIC_HARD_LLCS
    || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
@@ -637,6 +640,7 @@
 	  [(FETCHOP:QIHI (mem:QIHI (match_dup 1))
 	     (match_operand:QIHI 2 "<fetchop_predicate>" "<fetchop_constraint>"))]
 	  UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))
    (clobber (reg:SI R0_REG))
    (clobber (match_scratch:SI 3 "=&r"))
    (clobber (match_scratch:SI 4 "=1"))]
@@ -784,6 +788,7 @@
 	  [(not:SI (and:SI (mem:SI (match_dup 1))
 		   (match_operand:SI 2 "logical_operand" "rK08")))]
 	  UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))
    (clobber (reg:SI R0_REG))]
   "TARGET_ATOMIC_HARD_LLCS
    || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
@@ -805,6 +810,7 @@
 	  [(not:QIHI (and:QIHI (mem:QIHI (match_dup 1))
 		     (match_operand:QIHI 2 "logical_operand" "rK08")))]
 	  UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))
    (clobber (reg:SI R0_REG))
    (clobber (match_scratch:SI 3 "=&r"))
    (clobber (match_scratch:SI 4 "=1"))]
@@ -903,7 +909,7 @@
 	 "	and	%0,%3"			"\n"
 	 "	not	%3,%3"			"\n"
 	 "	mov.<bwl>	%3,@%1"		"\n"
-	 "	stc	%4,sr";
+	 "	ldc	%4,sr";
 }
   [(set_attr "length" "20")])
 
@@ -960,7 +966,8 @@
    (set (mem:SI (match_dup 1))
 	(unspec:SI
 	  [(FETCHOP:SI (mem:SI (match_dup 1)) (match_dup 2))]
-	  UNSPEC_ATOMIC))]
+	  UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))]
   "TARGET_ATOMIC_HARD_LLCS
    || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
 {
@@ -980,6 +987,7 @@
 	(unspec:QIHI
 	  [(FETCHOP:QIHI (mem:QIHI (match_dup 1)) (match_dup 2))]
 	  UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))
    (clobber (reg:SI R0_REG))
    (clobber (match_scratch:SI 3 "=&r"))
    (clobber (match_scratch:SI 4 "=1"))]
@@ -1124,7 +1132,8 @@
    (set (mem:SI (match_dup 1))
 	(unspec:SI
 	  [(not:SI (and:SI (mem:SI (match_dup 1)) (match_dup 2)))]
-	  UNSPEC_ATOMIC))]
+	  UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))]
   "TARGET_ATOMIC_HARD_LLCS
    || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
 {
@@ -1145,6 +1154,7 @@
 	(unspec:QIHI
 	  [(not:QIHI (and:QIHI (mem:QIHI (match_dup 1)) (match_dup 2)))]
 	  UNSPEC_ATOMIC))
+   (set (reg:SI T_REG) (const_int 1))
    (clobber (reg:SI R0_REG))
    (clobber (match_scratch:SI 3 "=&r"))
    (clobber (match_scratch:SI 4 "=1"))]
@@ -1353,7 +1363,7 @@
 	 "	ldc	r0,sr"		"\n"
 	 "	mov.b	@%0,r0"		"\n"
 	 "	mov.b	%1,@%0"		"\n"
-	 "	stc	%2,sr"		"\n"
+	 "	ldc	%2,sr"		"\n"
 	 "	tst	r0,r0";
 }
   [(set_attr "length" "16")])
diff --git a/gcc-4.9/gcc/config/sparc/leon.md b/gcc-4.9/gcc/config/sparc/leon.md
index 82b6a0d96..ad22e3b59 100644
--- a/gcc-4.9/gcc/config/sparc/leon.md
+++ b/gcc-4.9/gcc/config/sparc/leon.md
@@ -29,11 +29,11 @@
 
 ;; Use a double reservation to work around the load pipeline hazard on UT699.
 (define_insn_reservation "leon3_load" 1
-  (and (eq_attr "cpu" "leon3") (eq_attr "type" "load,sload"))
+  (and (eq_attr "cpu" "leon3,leon3v7") (eq_attr "type" "load,sload"))
   "leon_memory*2")
 
 (define_insn_reservation "leon_store" 2
-  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "store"))
+  (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "store"))
   "leon_memory*2")
 
 ;; This describes Gaisler Research's FPU
@@ -44,21 +44,21 @@
 (define_cpu_unit "grfpu_ds" "grfpu")
 
 (define_insn_reservation "leon_fp_alu" 4
-  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fp,fpcmp,fpmul"))
+  (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fp,fpcmp,fpmul"))
   "grfpu_alu, nothing*3")
 
 (define_insn_reservation "leon_fp_divs" 16
-  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivs"))
+  (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpdivs"))
   "grfpu_ds*14, nothing*2")
 
 (define_insn_reservation "leon_fp_divd" 17
-  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivd"))
+  (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpdivd"))
   "grfpu_ds*15, nothing*2")
 
 (define_insn_reservation "leon_fp_sqrts" 24
-  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrts"))
+  (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpsqrts"))
   "grfpu_ds*22, nothing*2")
 
 (define_insn_reservation "leon_fp_sqrtd" 25
-  (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrtd"))
+  (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpsqrtd"))
   "grfpu_ds*23, nothing*2")
diff --git a/gcc-4.9/gcc/config/sparc/linux.h b/gcc-4.9/gcc/config/sparc/linux.h
index c54ba2cb5..c40bb0b78 100644
--- a/gcc-4.9/gcc/config/sparc/linux.h
+++ b/gcc-4.9/gcc/config/sparc/linux.h
@@ -147,12 +147,6 @@ do {									\
 /* Static stack checking is supported by means of probes.  */
 #define STACK_CHECK_STATIC_BUILTIN 1
 
-/* Linux currently uses RMO in uniprocessor mode, which is equivalent to
-   TMO, and TMO in multiprocessor mode.  But they reserve the right to
-   change their minds.  */
-#undef SPARC_RELAXED_ORDERING
-#define SPARC_RELAXED_ORDERING true
-
 #undef NEED_INDICATE_EXEC_STACK
 #define NEED_INDICATE_EXEC_STACK 1
 
diff --git a/gcc-4.9/gcc/config/sparc/linux64.h b/gcc-4.9/gcc/config/sparc/linux64.h
index f00fb42ff..12bb3780b 100644
--- a/gcc-4.9/gcc/config/sparc/linux64.h
+++ b/gcc-4.9/gcc/config/sparc/linux64.h
@@ -261,12 +261,6 @@ do {									\
 /* Static stack checking is supported by means of probes.  */
 #define STACK_CHECK_STATIC_BUILTIN 1
 
-/* Linux currently uses RMO in uniprocessor mode, which is equivalent to
-   TMO, and TMO in multiprocessor mode.  But they reserve the right to
-   change their minds.  */
-#undef SPARC_RELAXED_ORDERING
-#define SPARC_RELAXED_ORDERING true
-
 #undef NEED_INDICATE_EXEC_STACK
 #define NEED_INDICATE_EXEC_STACK 1
 
diff --git a/gcc-4.9/gcc/config/sparc/sparc-opts.h b/gcc-4.9/gcc/config/sparc/sparc-opts.h
index 13b375ae1..26017edc0 100644
--- a/gcc-4.9/gcc/config/sparc/sparc-opts.h
+++ b/gcc-4.9/gcc/config/sparc/sparc-opts.h
@@ -31,6 +31,7 @@ enum processor_type {
   PROCESSOR_HYPERSPARC,
   PROCESSOR_LEON,
   PROCESSOR_LEON3,
+  PROCESSOR_LEON3V7,
   PROCESSOR_SPARCLITE,
   PROCESSOR_F930,
   PROCESSOR_F934,
diff --git a/gcc-4.9/gcc/config/sparc/sparc.c b/gcc-4.9/gcc/config/sparc/sparc.c
index 5b00cca47..f7fc957b4 100644
--- a/gcc-4.9/gcc/config/sparc/sparc.c
+++ b/gcc-4.9/gcc/config/sparc/sparc.c
@@ -786,9 +786,6 @@ char sparc_hard_reg_printed[8];
 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
 #endif
 
-#undef TARGET_RELAXED_ORDERING
-#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
-
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE sparc_option_override
 
@@ -1246,6 +1243,7 @@ sparc_option_override (void)
     { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
     { TARGET_CPU_leon, PROCESSOR_LEON },
     { TARGET_CPU_leon3, PROCESSOR_LEON3 },
+    { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
     { TARGET_CPU_sparclite, PROCESSOR_F930 },
     { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
     { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
@@ -1274,6 +1272,7 @@ sparc_option_override (void)
     { "hypersparc",	MASK_ISA, MASK_V8|MASK_FPU },
     { "leon",		MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
     { "leon3",		MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
+    { "leon3v7",	MASK_ISA, MASK_LEON3|MASK_FPU },
     { "sparclite",	MASK_ISA, MASK_SPARCLITE },
     /* The Fujitsu MB86930 is the original sparclite chip, with no FPU.  */
     { "f930",		MASK_ISA|MASK_FPU, MASK_SPARCLITE },
@@ -1526,6 +1525,7 @@ sparc_option_override (void)
       sparc_costs = &leon_costs;
       break;
     case PROCESSOR_LEON3:
+    case PROCESSOR_LEON3V7:
       sparc_costs = &leon3_costs;
       break;
     case PROCESSOR_SPARCLET:
@@ -6801,28 +6801,30 @@ function_arg_union_value (int size, enum machine_mode mode, int slotno,
 }
 
 /* Used by function_arg and sparc_function_value_1 to implement the conventions
-   for passing and returning large (BLKmode) vectors.
+   for passing and returning BLKmode vectors.
    Return an expression valid as a return value for the FUNCTION_ARG
    and TARGET_FUNCTION_VALUE.
 
-   SIZE is the size in bytes of the vector (at least 8 bytes).
+   SIZE is the size in bytes of the vector.
    REGNO is the FP hard register the vector will be passed in.  */
 
 static rtx
 function_arg_vector_value (int size, int regno)
 {
-  int i, nregs = size / 8;
-  rtx regs;
-
-  regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
+  const int nregs = MAX (1, size / 8);
+  rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
 
-  for (i = 0; i < nregs; i++)
-    {
+  if (size < 8)
+    XVECEXP (regs, 0, 0)
+      = gen_rtx_EXPR_LIST (VOIDmode,
+			   gen_rtx_REG (SImode, regno),
+			   const0_rtx);
+  else
+    for (int i = 0; i < nregs; i++)
       XVECEXP (regs, 0, i)
 	= gen_rtx_EXPR_LIST (VOIDmode,
 			     gen_rtx_REG (DImode, regno + 2*i),
 			     GEN_INT (i*8));
-    }
 
   return regs;
 }
@@ -6868,10 +6870,9 @@ sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
 		  || (TARGET_ARCH64 && size <= 16));
 
       if (mode == BLKmode)
-	return function_arg_vector_value (size,
-					  SPARC_FP_ARG_FIRST + 2*slotno);
-      else
-	mclass = MODE_FLOAT;
+	return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
+
+      mclass = MODE_FLOAT;
     }
 
   if (TARGET_ARCH32)
@@ -7315,10 +7316,9 @@ sparc_function_value_1 (const_tree type, enum machine_mode mode,
 		  || (TARGET_ARCH64 && size <= 32));
 
       if (mode == BLKmode)
-	return function_arg_vector_value (size,
-					  SPARC_FP_ARG_FIRST);
-      else
-	mclass = MODE_FLOAT;
+	return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
+
+      mclass = MODE_FLOAT;
     }
 
   if (TARGET_ARCH64 && type)
diff --git a/gcc-4.9/gcc/config/sparc/sparc.h b/gcc-4.9/gcc/config/sparc/sparc.h
index dd2b5ad9c..87f1d82d6 100644
--- a/gcc-4.9/gcc/config/sparc/sparc.h
+++ b/gcc-4.9/gcc/config/sparc/sparc.h
@@ -106,17 +106,6 @@ extern enum cmodel sparc_cmodel;
 
 #define SPARC_DEFAULT_CMODEL CM_32
 
-/* The SPARC-V9 architecture defines a relaxed memory ordering model (RMO)
-   which requires the following macro to be true if enabled.  Prior to V9,
-   there are no instructions to even talk about memory synchronization.
-   Note that the UltraSPARC III processors don't implement RMO, unlike the
-   UltraSPARC II processors.  Niagara, Niagara-2, and Niagara-3 do not
-   implement RMO either.
-
-   Default to false; for example, Solaris never enables RMO, only ever uses
-   total memory ordering (TMO).  */
-#define SPARC_RELAXED_ORDERING false
-
 /* Do not use the .note.GNU-stack convention by default.  */
 #define NEED_INDICATE_EXEC_STACK 0
 
@@ -137,21 +126,22 @@ extern enum cmodel sparc_cmodel;
 #define TARGET_CPU_hypersparc	3
 #define TARGET_CPU_leon		4
 #define TARGET_CPU_leon3	5
-#define TARGET_CPU_sparclite	6
-#define TARGET_CPU_f930		6       /* alias */
-#define TARGET_CPU_f934		6       /* alias */
-#define TARGET_CPU_sparclite86x	7
-#define TARGET_CPU_sparclet	8
-#define TARGET_CPU_tsc701	8       /* alias */
-#define TARGET_CPU_v9		9	/* generic v9 implementation */
-#define TARGET_CPU_sparcv9	9	/* alias */
-#define TARGET_CPU_sparc64	9	/* alias */
-#define TARGET_CPU_ultrasparc	10
-#define TARGET_CPU_ultrasparc3	11
-#define TARGET_CPU_niagara	12
-#define TARGET_CPU_niagara2	13
-#define TARGET_CPU_niagara3	14
-#define TARGET_CPU_niagara4	15
+#define TARGET_CPU_leon3v7	6
+#define TARGET_CPU_sparclite	7
+#define TARGET_CPU_f930		7       /* alias */
+#define TARGET_CPU_f934		7       /* alias */
+#define TARGET_CPU_sparclite86x	8
+#define TARGET_CPU_sparclet	9
+#define TARGET_CPU_tsc701	9       /* alias */
+#define TARGET_CPU_v9		10	/* generic v9 implementation */
+#define TARGET_CPU_sparcv9	10	/* alias */
+#define TARGET_CPU_sparc64	10	/* alias */
+#define TARGET_CPU_ultrasparc	11
+#define TARGET_CPU_ultrasparc3	12
+#define TARGET_CPU_niagara	13
+#define TARGET_CPU_niagara2	14
+#define TARGET_CPU_niagara3	15
+#define TARGET_CPU_niagara4	16
 
 #if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
  || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
@@ -239,6 +229,11 @@ extern enum cmodel sparc_cmodel;
 #define ASM_CPU32_DEFAULT_SPEC AS_LEON_FLAG
 #endif
 
+#if TARGET_CPU_DEFAULT == TARGET_CPU_leon3v7
+#define CPP_CPU32_DEFAULT_SPEC "-D__leon__"
+#define ASM_CPU32_DEFAULT_SPEC AS_LEONV7_FLAG
+#endif
+
 #endif
 
 #if !defined(CPP_CPU32_DEFAULT_SPEC) || !defined(CPP_CPU64_DEFAULT_SPEC)
@@ -285,6 +280,7 @@ extern enum cmodel sparc_cmodel;
 %{mcpu=hypersparc:-D__hypersparc__ -D__sparc_v8__} \
 %{mcpu=leon:-D__leon__ -D__sparc_v8__} \
 %{mcpu=leon3:-D__leon__ -D__sparc_v8__} \
+%{mcpu=leon3v7:-D__leon__} \
 %{mcpu=v9:-D__sparc_v9__} \
 %{mcpu=ultrasparc:-D__sparc_v9__} \
 %{mcpu=ultrasparc3:-D__sparc_v9__} \
@@ -334,6 +330,7 @@ extern enum cmodel sparc_cmodel;
 %{mcpu=hypersparc:-Av8} \
 %{mcpu=leon:" AS_LEON_FLAG "} \
 %{mcpu=leon3:" AS_LEON_FLAG "} \
+%{mcpu=leon3v7:" AS_LEONV7_FLAG "} \
 %{mv8plus:-Av8plus} \
 %{mcpu=v9:-Av9} \
 %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
@@ -1760,8 +1757,10 @@ extern int sparc_indent_opcode;
 
 #ifdef HAVE_AS_LEON
 #define AS_LEON_FLAG "-Aleon"
+#define AS_LEONV7_FLAG "-Aleon"
 #else
 #define AS_LEON_FLAG "-Av8"
+#define AS_LEONV7_FLAG "-Av7"
 #endif
 
 /* We use gcc _mcount for profiling.  */
diff --git a/gcc-4.9/gcc/config/sparc/sparc.md b/gcc-4.9/gcc/config/sparc/sparc.md
index 76c331597..954c297fd 100644
--- a/gcc-4.9/gcc/config/sparc/sparc.md
+++ b/gcc-4.9/gcc/config/sparc/sparc.md
@@ -221,6 +221,7 @@
    hypersparc,
    leon,
    leon3,
+   leon3v7,
    sparclite,
    f930,
    f934,
diff --git a/gcc-4.9/gcc/config/sparc/sparc.opt b/gcc-4.9/gcc/config/sparc/sparc.opt
index 64e40955a..3cd2b603a 100644
--- a/gcc-4.9/gcc/config/sparc/sparc.opt
+++ b/gcc-4.9/gcc/config/sparc/sparc.opt
@@ -152,6 +152,9 @@ Enum(sparc_processor_type) String(leon) Value(PROCESSOR_LEON)
 EnumValue
 Enum(sparc_processor_type) String(leon3) Value(PROCESSOR_LEON3)
 
+EnumValue
+Enum(sparc_processor_type) String(leon3v7) Value(PROCESSOR_LEON3V7)
+
 EnumValue
 Enum(sparc_processor_type) String(sparclite) Value(PROCESSOR_SPARCLITE)
 
diff --git a/gcc-4.9/gcc/config/sparc/t-rtems b/gcc-4.9/gcc/config/sparc/t-rtems
index 86a230261..ae7a33ab4 100644
--- a/gcc-4.9/gcc/config/sparc/t-rtems
+++ b/gcc-4.9/gcc/config/sparc/t-rtems
@@ -17,6 +17,15 @@
 # <http://www.gnu.org/licenses/>.
 #
 
-MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3
-MULTILIB_DIRNAMES = soft v8 leon3
+MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3/mcpu=leon3v7 muser-mode
+MULTILIB_DIRNAMES = soft v8 leon3 leon3v7 user-mode
 MULTILIB_MATCHES = msoft-float=mno-fpu
+
+MULTILIB_EXCEPTIONS = muser-mode
+MULTILIB_EXCEPTIONS += mcpu=leon3
+MULTILIB_EXCEPTIONS += mcpu=leon3v7
+MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3
+MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3v7
+MULTILIB_EXCEPTIONS += msoft-float/muser-mode
+MULTILIB_EXCEPTIONS += msoft-float/mcpu=v8/muser-mode
+MULTILIB_EXCEPTIONS += mcpu=v8/muser-mode
-- 
cgit v1.2.3