From 38a8aecfb882072900434499696b5c32a2274515 Mon Sep 17 00:00:00 2001
From: Rong Xu <xur@google.com>
Date: Mon, 21 Jul 2014 16:47:22 -0700
Subject: [4.9] Switch gcc-4.9 to use google/gcc-4_9 branch. This source drop
 uses svn version r212828 of google/gcc-4.9 branch. We also cherry-picked
 r213062, r213063 and r213064 to fix windows build issues. All gcc-4.9 patches
 before July 3rd are ported to google/gcc-4.9. The following prior commits has
 not been merged to google branch yet. (They are included in this commit).
 e7af147f979e657fe2df00808e5b4319b0e088c6,
 baf87df3cb2683649ba7e9872362a7e721117c23, and
 c231900e5dcc14d8296bd9f62b45997a49d4d5e7.

Change-Id: I4bea3ea470387ff751c2be4cb0d4a12059b9299b
---
 gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h |  59 ++
 gcc-4.9/gcc/config/aarch64/aarch64-linux.h         |  35 +-
 gcc-4.9/gcc/config/aarch64/aarch64-modes.def       |   1 +
 gcc-4.9/gcc/config/aarch64/aarch64-simd.md         | 461 ++++++++++++++--
 gcc-4.9/gcc/config/aarch64/aarch64.c               |  20 +-
 gcc-4.9/gcc/config/aarch64/aarch64.md              |  28 +-
 gcc-4.9/gcc/config/aarch64/arm_neon.h              |  50 +-
 gcc-4.9/gcc/config/aarch64/iterators.md            |  16 +-
 gcc-4.9/gcc/config/alpha/alpha.c                   |   6 +
 gcc-4.9/gcc/config/arm/aout.h                      |   9 +-
 gcc-4.9/gcc/config/arm/arm-cores.def               |   9 +-
 gcc-4.9/gcc/config/arm/arm-opts.h                  |   9 +-
 gcc-4.9/gcc/config/arm/arm.c                       |  13 +-
 gcc-4.9/gcc/config/arm/arm.h                       |   9 +-
 gcc-4.9/gcc/config/arm/arm.md                      |  13 +-
 gcc-4.9/gcc/config/arm/arm_neon.h                  | 608 +++++++++++++++++----
 gcc-4.9/gcc/config/arm/bpabi.h                     |   9 +-
 gcc-4.9/gcc/config/arm/elf.h                       |   9 +-
 gcc-4.9/gcc/config/arm/linux-eabi.h                |   5 +-
 gcc-4.9/gcc/config/arm/linux-elf.h                 |  11 +-
 gcc-4.9/gcc/config/arm/linux-gas.h                 |   9 +-
 gcc-4.9/gcc/config/arm/linux-grte.h                |  27 +
 gcc-4.9/gcc/config/arm/neon-docgen.ml              | 424 --------------
 gcc-4.9/gcc/config/arm/neon-gen.ml                 | 520 ------------------
 gcc-4.9/gcc/config/arm/netbsd-elf.h                |   9 +-
 gcc-4.9/gcc/config/arm/uclinux-eabi.h              |   9 +-
 gcc-4.9/gcc/config/arm/uclinux-elf.h               |   9 +-
 gcc-4.9/gcc/config/arm/unspecs.md                  |   9 +-
 gcc-4.9/gcc/config/arm/vxworks.h                   |   9 +-
 gcc-4.9/gcc/config/avr/avr-fixed.md                |   4 +-
 gcc-4.9/gcc/config/avr/avr.h                       |  14 +-
 gcc-4.9/gcc/config/avr/avr.md                      |   9 +
 gcc-4.9/gcc/config/dbx.h                           |   9 +-
 gcc-4.9/gcc/config/i386/driver-i386.c              |   5 +
 gcc-4.9/gcc/config/i386/gnu-user.h                 |   6 +-
 gcc-4.9/gcc/config/i386/i386-protos.h              |  11 +
 gcc-4.9/gcc/config/i386/i386.c                     | 362 +++++++++++-
 gcc-4.9/gcc/config/i386/i386.md                    |  97 ++--
 gcc-4.9/gcc/config/i386/i386.opt                   |  20 +
 gcc-4.9/gcc/config/i386/linux.h                    |  20 +-
 gcc-4.9/gcc/config/i386/linux64.h                  |  19 +-
 gcc-4.9/gcc/config/i386/sse.md                     |  64 +++
 gcc-4.9/gcc/config/initfini-array.h                |   9 +-
 gcc-4.9/gcc/config/linux-grte.h                    |  41 ++
 gcc-4.9/gcc/config/linux.c                         |   4 +-
 gcc-4.9/gcc/config/linux.h                         |  17 +-
 gcc-4.9/gcc/config/msp430/msp430-opts.h            |  32 ++
 gcc-4.9/gcc/config/msp430/msp430.c                 | 117 +++-
 gcc-4.9/gcc/config/msp430/msp430.h                 |   4 +-
 gcc-4.9/gcc/config/msp430/msp430.md                |  32 +-
 gcc-4.9/gcc/config/msp430/predicates.md            |   4 +
 gcc-4.9/gcc/config/newlib-stdint.h                 |   9 +-
 gcc-4.9/gcc/config/rs6000/htm.md                   |   2 +-
 gcc-4.9/gcc/config/rs6000/htmxlintrin.h            |   9 +-
 gcc-4.9/gcc/config/rs6000/linux-grte.h             |  41 ++
 gcc-4.9/gcc/config/rs6000/linux64.h                |   6 +-
 gcc-4.9/gcc/config/rs6000/rs6000-builtin.def       |  21 +-
 gcc-4.9/gcc/config/rs6000/rs6000-protos.h          |   2 +-
 gcc-4.9/gcc/config/rs6000/rs6000.c                 |  71 ++-
 gcc-4.9/gcc/config/rs6000/rs6000.h                 |  17 +-
 gcc-4.9/gcc/config/rs6000/rs6000.md                |  24 +-
 gcc-4.9/gcc/config/rs6000/sysv4.h                  |  11 +-
 gcc-4.9/gcc/config/rs6000/vsx.md                   |  43 +-
 gcc-4.9/gcc/config/rtems.h                         |   9 +-
 gcc-4.9/gcc/config/sol2-clearcap.map               |   2 +
 gcc-4.9/gcc/config/sol2-clearcapv2.map             |   7 +
 gcc-4.9/gcc/config/sol2.h                          |  11 +-
 gcc-4.9/gcc/config/sol2.opt                        |   4 +
 gcc-4.9/gcc/config/t-sol2                          |   7 +
 gcc-4.9/gcc/config/v850/rtems.h                    |   9 +-
 gcc-4.9/gcc/config/v850/v850-opts.h                |   9 +-
 gcc-4.9/gcc/config/v850/v850.h                     |   9 +-
 72 files changed, 2183 insertions(+), 1445 deletions(-)
 create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h
 create mode 100644 gcc-4.9/gcc/config/arm/linux-grte.h
 delete mode 100644 gcc-4.9/gcc/config/arm/neon-docgen.ml
 delete mode 100644 gcc-4.9/gcc/config/arm/neon-gen.ml
 create mode 100644 gcc-4.9/gcc/config/linux-grte.h
 create mode 100644 gcc-4.9/gcc/config/msp430/msp430-opts.h
 create mode 100644 gcc-4.9/gcc/config/rs6000/linux-grte.h
 create mode 100644 gcc-4.9/gcc/config/sol2-clearcap.map
 create mode 100644 gcc-4.9/gcc/config/sol2-clearcapv2.map

(limited to 'gcc-4.9/gcc/config')

diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h
new file mode 100644
index 000000000..91d235ff1
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h
@@ -0,0 +1,59 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_LINUX_ANDROID_H
+#define GCC_AARCH64_LINUX_ANDROID_H
+
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+	GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+	ANDROID_TARGET_OS_CPP_BUILTINS();	\
+    }						\
+  while (0)
+
+#undef  LINK_SPEC
+#define LINK_SPEC							\
+  LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC,				\
+		       LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
+
+#undef  CC1_SPEC
+#define CC1_SPEC							\
+  LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC,			\
+		       GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic"))
+
+#define CC1PLUS_SPEC \
+  LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
+
+#undef  LIB_SPEC
+#define LIB_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC,			\
+		    GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
+
+#undef	STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
+
+#undef	ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
+
+#endif  /* GCC_AARCH64_LINUX_ANDROID_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
index f32d19f16..f8a97c899 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
@@ -21,8 +21,10 @@
 #ifndef GCC_AARCH64_LINUX_H
 #define GCC_AARCH64_LINUX_H
 
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
-#define BIONIC_DYNAMIC_LINKER "/system/bin/linker64"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
 
 #define CPP_SPEC "%{pthread:-D_REENTRANT}"
 
@@ -36,38 +38,13 @@
    %{mbig-endian:-EB} %{mlittle-endian:-EL}     \
    -maarch64linux%{mbig-endian:b}"
 
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+
 #define TARGET_OS_CPP_BUILTINS()		\
   do						\
     {						\
 	GNU_USER_TARGET_OS_CPP_BUILTINS();	\
-	ANDROID_TARGET_OS_CPP_BUILTINS();	\
     }						\
   while (0)
 
-#undef  LINK_SPEC
-#define LINK_SPEC							\
-  LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC,				\
-		       LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
-
-#undef  CC1_SPEC
-#define CC1_SPEC							\
-  LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC,			\
-		      GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic"))
-
-#define CC1PLUS_SPEC \
-  LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
-
-#undef  LIB_SPEC
-#define LIB_SPEC							\
-  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC,			\
-		    GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
-
-#undef	STARTFILE_SPEC
-#define STARTFILE_SPEC \
-  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
-
-#undef	ENDFILE_SPEC
-#define ENDFILE_SPEC \
-  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
-
 #endif  /* GCC_AARCH64_LINUX_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
index 1d2cc7679..f9c436948 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
@@ -31,6 +31,7 @@ VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI.  */
 VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI.  */
 VECTOR_MODES (FLOAT, 8);      /*                 V2SF.  */
 VECTOR_MODES (FLOAT, 16);     /*            V4SF V2DF.  */
+VECTOR_MODE (FLOAT, DF, 1);   /*                 V1DF.  */
 
 /* Oct Int: 256-bit integer mode needed for 32-byte vector arguments.  */
 INT_MODE (OI, 32);
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
index 73aee2c3d..1f827b57d 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
@@ -934,14 +934,22 @@
   [(set_attr "type" "neon_minmax<q>")]
 )
 
-;; Move into low-half clearing high half to 0.
+;; vec_concat gives a new vector with the low elements from operand 1, and
+;; the high elements from operand 2.  That is to say, given op1 = { a, b }
+;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
+;; What that means, is that the RTL descriptions of the below patterns
+;; need to change depending on endianness.
 
-(define_insn "move_lo_quad_<mode>"
+;; Move to the low architectural bits of the register.
+;; On little-endian this is { operand, zeroes }
+;; On big-endian this is { zeroes, operand }
+
+(define_insn "move_lo_quad_internal_<mode>"
   [(set (match_operand:VQ 0 "register_operand" "=w,w,w")
         (vec_concat:VQ
 	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
 	  (vec_duplicate:<VHALF> (const_int 0))))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "@
    dup\\t%d0, %1.d[0]
    fmov\\t%d0, %1
@@ -952,7 +960,39 @@
    (set_attr "length" "4")]
 )
 
-;; Move into high-half.
+(define_insn "move_lo_quad_internal_be_<mode>"
+  [(set (match_operand:VQ 0 "register_operand" "=w,w,w")
+        (vec_concat:VQ
+	  (vec_duplicate:<VHALF> (const_int 0))
+	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "@
+   dup\\t%d0, %1.d[0]
+   fmov\\t%d0, %1
+   dup\\t%d0, %1"
+  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
+   (set_attr "simd" "yes,*,yes")
+   (set_attr "fp" "*,yes,*")
+   (set_attr "length" "4")]
+)
+
+(define_expand "move_lo_quad_<mode>"
+  [(match_operand:VQ 0 "register_operand")
+   (match_operand:VQ 1 "register_operand")]
+  "TARGET_SIMD"
+{
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
+  else
+    emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
+  DONE;
+}
+)
+
+;; Move operand1 to the high architectural bits of the register, keeping
+;; the low architectural bits of operand2.
+;; For little-endian this is { operand2, operand1 }
+;; For big-endian this is { operand1, operand2 }
 
 (define_insn "aarch64_simd_move_hi_quad_<mode>"
   [(set (match_operand:VQ 0 "register_operand" "+w,w")
@@ -961,12 +1001,25 @@
                 (match_dup 0)
                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
 	  (match_operand:<VHALF> 1 "register_operand" "w,r")))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "@
    ins\\t%0.d[1], %1.d[0]
    ins\\t%0.d[1], %1"
-  [(set_attr "type" "neon_ins")
-   (set_attr "length" "4")]
+  [(set_attr "type" "neon_ins")]
+)
+
+(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
+  [(set (match_operand:VQ 0 "register_operand" "+w,w")
+        (vec_concat:VQ
+	  (match_operand:<VHALF> 1 "register_operand" "w,r")
+          (vec_select:<VHALF>
+                (match_dup 0)
+                (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "@
+   ins\\t%0.d[1], %1.d[0]
+   ins\\t%0.d[1], %1"
+  [(set_attr "type" "neon_ins")]
 )
 
 (define_expand "move_hi_quad_<mode>"
@@ -974,9 +1027,13 @@
   (match_operand:<VHALF> 1 "register_operand" "")]
  "TARGET_SIMD"
 {
-  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
-  emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
-						   operands[1], p));
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, BYTES_BIG_ENDIAN);
+  if (BYTES_BIG_ENDIAN)
+    emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
+		    operands[1], p));
+  else
+    emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
+		    operands[1], p));
   DONE;
 })
 
@@ -2321,12 +2378,44 @@
         (vec_concat:<VDBL>
 	   (match_operand:VDIC 1 "register_operand" "w")
 	   (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "mov\\t%0.8b, %1.8b"
   [(set_attr "type" "neon_move<q>")]
 )
 
-(define_insn_and_split "aarch64_combine<mode>"
+(define_insn "*aarch64_combinez_be<mode>"
+  [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+        (vec_concat:<VDBL>
+	   (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")
+	   (match_operand:VDIC 1 "register_operand" "w")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "mov\\t%0.8b, %1.8b"
+  [(set_attr "type" "neon_move<q>")]
+)
+
+(define_expand "aarch64_combine<mode>"
+  [(match_operand:<VDBL> 0 "register_operand")
+   (match_operand:VDC 1 "register_operand")
+   (match_operand:VDC 2 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx op1, op2;
+  if (BYTES_BIG_ENDIAN)
+    {
+      op1 = operands[2];
+      op2 = operands[1];
+    }
+  else
+    {
+      op1 = operands[1];
+      op2 = operands[2];
+    }
+  emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
+  DONE;
+}
+)
+
+(define_insn_and_split "aarch64_combine_internal<mode>"
   [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
         (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
 			   (match_operand:VDC 2 "register_operand" "w")))]
@@ -2335,16 +2424,19 @@
   "&& reload_completed"
   [(const_int 0)]
 {
-  aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+  if (BYTES_BIG_ENDIAN)
+    aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
+  else
+    aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
   DONE;
 }
 [(set_attr "type" "multiple")]
 )
 
 (define_expand "aarch64_simd_combine<mode>"
-  [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
-        (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
-  (match_operand:VDC 2 "register_operand" "w")))]
+  [(match_operand:<VDBL> 0 "register_operand")
+   (match_operand:VDC 1 "register_operand")
+   (match_operand:VDC 2 "register_operand")]
   "TARGET_SIMD"
   {
     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
@@ -2633,7 +2725,41 @@
 
 ;; sq<r>dmulh_lane
 
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_expand "aarch64_sqdmulh_lane<mode>"
+  [(match_operand:VDQHS 0 "register_operand" "")
+   (match_operand:VDQHS 1 "register_operand" "")
+   (match_operand:<VCOND> 2 "register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_SIMD"
+  {
+     aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+     emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0],
+                                                         operands[1],
+                                                         operands[2],
+                                                         operands[3]));
+     DONE;
+  }
+)
+
+(define_expand "aarch64_sqrdmulh_lane<mode>"
+  [(match_operand:VDQHS 0 "register_operand" "")
+   (match_operand:VDQHS 1 "register_operand" "")
+   (match_operand:<VCOND> 2 "register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_SIMD"
+  {
+     aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+     emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0],
+                                                          operands[1],
+                                                          operands[2],
+                                                          operands[3]));
+     DONE;
+  }
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
         (unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "w")
@@ -2649,7 +2775,41 @@
   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
 
-(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
+(define_expand "aarch64_sqdmulh_laneq<mode>"
+  [(match_operand:VDQHS 0 "register_operand" "")
+   (match_operand:VDQHS 1 "register_operand" "")
+   (match_operand:<VCONQ> 2 "register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_SIMD"
+  {
+     aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+     emit_insn (gen_aarch64_sqdmulh_laneq<mode>_internal (operands[0],
+                                                          operands[1],
+                                                          operands[2],
+                                                          operands[3]));
+     DONE;
+   }
+)
+
+(define_expand "aarch64_sqrdmulh_laneq<mode>"
+  [(match_operand:VDQHS 0 "register_operand" "")
+   (match_operand:VDQHS 1 "register_operand" "")
+   (match_operand:<VCONQ> 2 "register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_SIMD"
+  {
+     aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+     emit_insn (gen_aarch64_sqrdmulh_laneq<mode>_internal (operands[0],
+                                                           operands[1],
+                                                           operands[2],
+                                                           operands[3]));
+     DONE;
+   }
+)
+
+(define_insn "aarch64_sq<r>dmulh_laneq<mode>_internal"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
         (unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "w")
@@ -2659,24 +2819,56 @@
 	 VQDMULH))]
   "TARGET_SIMD"
   "*
-   aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
 
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_expand "aarch64_sqdmulh_lane<mode>"
+  [(match_operand:SD_HSI 0 "register_operand" "")
+   (match_operand:SD_HSI 1 "register_operand" "")
+   (match_operand:<VCOND> 2 "register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_SIMD"
+  {
+    aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+    emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0],
+                                                        operands[1],
+                                                        operands[2],
+                                                        operands[3]));
+    DONE;
+  }
+)
+
+(define_expand "aarch64_sqrdmulh_lane<mode>"
+  [(match_operand:SD_HSI 0 "register_operand" "")
+   (match_operand:SD_HSI 1 "register_operand" "")
+   (match_operand:<VCOND> 2 "register_operand" "")
+   (match_operand:SI 3 "immediate_operand" "")]
+  "TARGET_SIMD"
+  {
+    aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+    emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0],
+                                                         operands[1],
+                                                         operands[2],
+                                                         operands[3]));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal"
   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
         (unspec:SD_HSI
 	  [(match_operand:SD_HSI 1 "register_operand" "w")
            (vec_select:<VEL>
-             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
 	 VQDMULH))]
   "TARGET_SIMD"
   "*
-   aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
-   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+   operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
@@ -2712,7 +2904,31 @@
 	      (sign_extend:<VWIDE>
 		(vec_duplicate:VD_HSI
 		  (vec_select:<VEL>
-		    (match_operand:<VCON> 3 "register_operand" "<vwx>")
+		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              ))
+	    (const_int 1))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+    return
+      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:VD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_duplicate:VD_HSI
+		  (vec_select:<VEL>
+		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
               ))
 	    (const_int 1))))]
@@ -2735,7 +2951,30 @@
 		(match_operand:SD_HSI 2 "register_operand" "w"))
 	      (sign_extend:<VWIDE>
 		(vec_select:<VEL>
-		  (match_operand:<VCON> 3 "register_operand" "<vwx>")
+		  (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+              )
+	    (const_int 1))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+    return
+      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+		(match_operand:SD_HSI 2 "register_operand" "w"))
+	      (sign_extend:<VWIDE>
+		(vec_select:<VEL>
+		  (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
               )
 	    (const_int 1))))]
@@ -2752,11 +2991,12 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
   emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3],
 						      operands[4]));
@@ -2767,12 +3007,13 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
-  emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+  emit_insn (gen_aarch64_sqdmlal_laneq<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3],
 						      operands[4]));
   DONE;
@@ -2782,11 +3023,12 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
   emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3],
 						      operands[4]));
@@ -2797,12 +3039,13 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "0")
    (match_operand:VSD_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
-  emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+  emit_insn (gen_aarch64_sqdmlsl_laneq<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3],
 						      operands[4]));
   DONE;
@@ -2890,7 +3133,33 @@
 		(sign_extend:<VWIDE>
                   (vec_duplicate:<VHALF>
 		    (vec_select:<VEL>
-		      (match_operand:<VCON> 3 "register_operand" "<vwx>")
+		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
+		    ))))
+	      (const_int 1))))]
+  "TARGET_SIMD"
+  {
+    operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+    return
+     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (SBINQOPS:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (ss_ashift:<VWIDE>
+	      (mult:<VWIDE>
+		(sign_extend:<VWIDE>
+                  (vec_select:<VHALF>
+                    (match_operand:VQ_HSI 2 "register_operand" "w")
+                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+		(sign_extend:<VWIDE>
+                  (vec_duplicate:<VHALF>
+		    (vec_select:<VEL>
+		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
 		    ))))
 	      (const_int 1))))]
@@ -2907,12 +3176,13 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       operands[4], p));
@@ -2923,13 +3193,14 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
-  emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+  emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       operands[4], p));
   DONE;
@@ -2939,12 +3210,13 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       operands[4], p));
@@ -2955,13 +3227,14 @@
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:<VWIDE> 1 "register_operand" "w")
    (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCON> 3 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
    (match_operand:SI 4 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
-  emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+  emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       operands[4], p));
   DONE;
@@ -3041,7 +3314,28 @@
 	       (sign_extend:<VWIDE>
                  (vec_duplicate:VD_HSI
                    (vec_select:<VEL>
-		     (match_operand:<VCON> 2 "register_operand" "<vwx>")
+		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+	       ))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  {
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (match_operand:VD_HSI 1 "register_operand" "w"))
+	       (sign_extend:<VWIDE>
+                 (vec_duplicate:VD_HSI
+                   (vec_select:<VEL>
+		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
 	     (const_int 1)))]
@@ -3061,7 +3355,27 @@
 		 (match_operand:SD_HSI 1 "register_operand" "w"))
 	       (sign_extend:<VWIDE>
                  (vec_select:<VEL>
-		   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+		   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
+	       ))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  {
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (match_operand:SD_HSI 1 "register_operand" "w"))
+	       (sign_extend:<VWIDE>
+                 (vec_select:<VEL>
+		   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
 		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
 	       ))
 	     (const_int 1)))]
@@ -3076,11 +3390,12 @@
 (define_expand "aarch64_sqdmull_lane<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VSD_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
   emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1],
 						      operands[2], operands[3]));
   DONE;
@@ -3089,12 +3404,13 @@
 (define_expand "aarch64_sqdmull_laneq<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VD_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
-  emit_insn (gen_aarch64_sqdmull_lane<mode>_internal
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+  emit_insn (gen_aarch64_sqdmull_laneq<mode>_internal
 	       (operands[0], operands[1], operands[2], operands[3]));
   DONE;
 })
@@ -3143,7 +3459,7 @@
 (define_expand "aarch64_sqdmull2<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "w")]
+   (match_operand:VQ_HSI 2 "register_operand" "w")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
@@ -3165,7 +3481,30 @@
 	       (sign_extend:<VWIDE>
                  (vec_duplicate:<VHALF>
                    (vec_select:<VEL>
-		     (match_operand:<VCON> 2 "register_operand" "<vwx>")
+		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+	       ))
+	     (const_int 1)))]
+  "TARGET_SIMD"
+  {
+    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+  }
+  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_ashift:<VWIDE>
+	     (mult:<VWIDE>
+	       (sign_extend:<VWIDE>
+		 (vec_select:<VHALF>
+                   (match_operand:VQ_HSI 1 "register_operand" "w")
+                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+	       (sign_extend:<VWIDE>
+                 (vec_duplicate:<VHALF>
+                   (vec_select:<VEL>
+		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
 	     (const_int 1)))]
@@ -3180,12 +3519,13 @@
 (define_expand "aarch64_sqdmull2_lane<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       p));
@@ -3195,13 +3535,14 @@
 (define_expand "aarch64_sqdmull2_laneq<mode>"
   [(match_operand:<VWIDE> 0 "register_operand" "=w")
    (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCON> 2 "register_operand" "<vwx>")
+   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
    (match_operand:SI 3 "immediate_operand" "i")]
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
-  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
-  emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
+  aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+  operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+  emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
 						       operands[2], operands[3],
 						       p));
   DONE;
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c
index 7b6c2b38e..bf35031ec 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.c
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.c
@@ -1405,6 +1405,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
   int ncrn, nvrn, nregs;
   bool allocate_ncrn, allocate_nvrn;
+  HOST_WIDE_INT size;
 
   /* We need to do this once per argument.  */
   if (pcum->aapcs_arg_processed)
@@ -1412,6 +1413,11 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
 
   pcum->aapcs_arg_processed = true;
 
+  /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
+  size
+    = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
+			UNITS_PER_WORD);
+
   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
 						 mode,
@@ -1462,9 +1468,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
     }
 
   ncrn = pcum->aapcs_ncrn;
-  nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
-	   + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-
+  nregs = size / UNITS_PER_WORD;
 
   /* C6 - C9.  though the sign and zero extension semantics are
      handled elsewhere.  This is the case where the argument fits
@@ -1513,13 +1517,12 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
   pcum->aapcs_nextncrn = NUM_ARG_REGS;
 
   /* The argument is passed on stack; record the needed number of words for
-     this argument (we can re-use NREGS) and align the total size if
-     necessary.  */
+     this argument and align the total size if necessary.  */
 on_stack:
-  pcum->aapcs_stack_words = nregs;
+  pcum->aapcs_stack_words = size / UNITS_PER_WORD;
   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
-					       16 / UNITS_PER_WORD) + 1;
+					       16 / UNITS_PER_WORD);
   return;
 }
 
@@ -6304,7 +6307,8 @@ aarch64_vector_mode_supported_p (enum machine_mode mode)
 	  || mode == V16QImode || mode == V2DImode
 	  || mode == V2SImode  || mode == V4HImode
 	  || mode == V8QImode || mode == V2SFmode
-	  || mode == V4SFmode || mode == V2DFmode))
+	  || mode == V4SFmode || mode == V2DFmode
+	  || mode == V1DFmode))
     return true;
 
   return false;
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md
index c86a29d8e..df81045e9 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.md
@@ -2823,17 +2823,18 @@
 
 ;; Arithmetic right shift using SISD or Integer instruction
 (define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
-  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+  [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r")
         (ashiftrt:GPI
-          (match_operand:GPI 1 "register_operand" "w,w,r")
-          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,rUs<cmode>")))]
+          (match_operand:GPI 1 "register_operand" "w,w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,0,rUs<cmode>")))]
   ""
   "@
    sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
    #
+   #
    asr\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "simd" "yes,yes,no")
-   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
+  [(set_attr "simd" "yes,yes,yes,no")
+   (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>,shift_reg")]
 )
 
 (define_split
@@ -2842,11 +2843,13 @@
            (match_operand:DI 1 "aarch64_simd_register")
            (match_operand:QI 2 "aarch64_simd_register")))]
   "TARGET_SIMD && reload_completed"
-  [(set (match_dup 2)
+  [(set (match_dup 3)
         (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
    (set (match_dup 0)
-        (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))]
-  ""
+        (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_SSHL))]
+{
+  operands[3] = gen_lowpart (QImode, operands[0]);
+}
 )
 
 (define_split
@@ -2855,11 +2858,13 @@
            (match_operand:SI 1 "aarch64_simd_register")
            (match_operand:QI 2 "aarch64_simd_register")))]
   "TARGET_SIMD && reload_completed"
-  [(set (match_dup 2)
+  [(set (match_dup 3)
         (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
    (set (match_dup 0)
-        (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))]
-  ""
+        (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_SSHL_2S))]
+{
+  operands[3] = gen_lowpart (QImode, operands[0]);
+}
 )
 
 (define_insn "*aarch64_sisd_ushl"
@@ -3608,6 +3613,7 @@
         (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")]
 		   UNSPEC_TLSDESC))
    (clobber (reg:DI LR_REGNUM))
+   (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:DI 1 "=r"))]
   "TARGET_TLS_DESC"
   "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h
index b03d11422..c01669b2c 100644
--- a/gcc-4.9/gcc/config/aarch64/arm_neon.h
+++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h
@@ -21008,7 +21008,7 @@ vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
 		       int const __d)
 {
   return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
@@ -21030,8 +21030,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
 {
-  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
+  return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21059,7 +21058,7 @@ vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
 		       int const __d)
 {
   return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
@@ -21081,8 +21080,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
 {
-  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
+  return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21104,7 +21102,7 @@ vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
 }
@@ -21116,7 +21114,7 @@ vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
 }
@@ -21136,7 +21134,7 @@ vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
 		       int const __d)
 {
   return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
@@ -21158,8 +21156,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
 {
-  int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
+  return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21187,7 +21184,7 @@ vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
 		       int const __d)
 {
   return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
@@ -21209,8 +21206,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
 {
-  int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
+  return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21232,7 +21228,7 @@ vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
 }
@@ -21244,7 +21240,7 @@ vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
 {
   return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
 }
@@ -21282,7 +21278,7 @@ vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
 }
 
 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
 }
@@ -21294,7 +21290,7 @@ vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
 }
@@ -21314,7 +21310,7 @@ vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
+vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
 {
   return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
 }
@@ -21334,8 +21330,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
 {
-  int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
+  return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
 }
 
 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21363,7 +21358,7 @@ vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
+vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
 {
   return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
 }
@@ -21383,8 +21378,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
 {
-  int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
-  return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
+  return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
 }
 
 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21406,7 +21400,7 @@ vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
 }
@@ -21418,7 +21412,7 @@ vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
 }
 
 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
 }
@@ -21594,7 +21588,7 @@ vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
 }
 
 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
 {
   return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
 }
@@ -21606,7 +21600,7 @@ vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
 }
 
 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
 {
   return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
 }
diff --git a/gcc-4.9/gcc/config/aarch64/iterators.md b/gcc-4.9/gcc/config/aarch64/iterators.md
index f1339b8cc..e76e3ef10 100644
--- a/gcc-4.9/gcc/config/aarch64/iterators.md
+++ b/gcc-4.9/gcc/config/aarch64/iterators.md
@@ -396,14 +396,15 @@
 			(SI   "SI") (HI   "HI")
 			(QI   "QI")])
 
-;; Define container mode for lane selection.
-(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI")
+;; 64-bit container modes the inner or scalar source mode.
+(define_mode_attr VCOND [(HI "V4HI") (SI "V2SI")
+			 (V4HI "V4HI") (V8HI "V4HI")
 			 (V2SI "V2SI") (V4SI "V2SI")
 			 (DI   "DI") (V2DI "DI")
 			 (V2SF "V2SF") (V4SF "V2SF")
 			 (V2DF "DF")])
 
-;; Define container mode for lane selection.
+;; 128-bit container modes the inner or scalar source mode.
 (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
 			 (V4HI "V8HI") (V8HI "V8HI")
 			 (V2SI "V4SI") (V4SI "V4SI")
@@ -412,15 +413,6 @@
 			 (V2DF "V2DF") (SI   "V4SI")
 			 (HI   "V8HI") (QI   "V16QI")])
 
-;; Define container mode for lane selection.
-(define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI")
-			(V4HI "V8HI") (V8HI "V8HI")
-			(V2SI "V4SI") (V4SI "V4SI")
-			(DI   "V2DI") (V2DI "V2DI")
-			(V2SF "V4SF") (V4SF "V4SF")
-			(V2DF "V2DF") (SI   "V4SI")
-			(HI   "V8HI") (QI   "V16QI")])
-
 ;; Half modes of all vector modes.
 (define_mode_attr VHALF [(V8QI "V4QI")  (V16QI "V8QI")
 			 (V4HI "V2HI")  (V8HI  "V4HI")
diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c
index dc07a02c0..d5c7908be 100644
--- a/gcc-4.9/gcc/config/alpha/alpha.c
+++ b/gcc-4.9/gcc/config/alpha/alpha.c
@@ -8715,6 +8715,11 @@ alpha_handle_trap_shadows (void)
 			}
 		      break;
 
+		    case BARRIER:
+		      /* __builtin_unreachable can expand to no code at all,
+			 leaving (barrier) RTXes in the instruction stream.  */
+		      goto close_shadow_notrapb;
+
 		    case JUMP_INSN:
 		    case CALL_INSN:
 		    case CODE_LABEL:
@@ -8730,6 +8735,7 @@ alpha_handle_trap_shadows (void)
 		  n = emit_insn_before (gen_trapb (), i);
 		  PUT_MODE (n, TImode);
 		  PUT_MODE (i, TImode);
+		close_shadow_notrapb:
 		  trap_pending = 0;
 		  shadow.used.i = 0;
 		  shadow.used.fp = 0;
diff --git a/gcc-4.9/gcc/config/arm/aout.h b/gcc-4.9/gcc/config/arm/aout.h
index 51d32a9d4..c8f4e45c6 100644
--- a/gcc-4.9/gcc/config/arm/aout.h
+++ b/gcc-4.9/gcc/config/arm/aout.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef ASM_APP_ON
diff --git a/gcc-4.9/gcc/config/arm/arm-cores.def b/gcc-4.9/gcc/config/arm/arm-cores.def
index 42f00b463..56041ec8b 100644
--- a/gcc-4.9/gcc/config/arm/arm-cores.def
+++ b/gcc-4.9/gcc/config/arm/arm-cores.def
@@ -14,8 +14,13 @@
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* Before using #include to read this file, define a macro:
diff --git a/gcc-4.9/gcc/config/arm/arm-opts.h b/gcc-4.9/gcc/config/arm/arm-opts.h
index a8393975a..21902940e 100644
--- a/gcc-4.9/gcc/config/arm/arm-opts.h
+++ b/gcc-4.9/gcc/config/arm/arm-opts.h
@@ -13,8 +13,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef ARM_OPTS_H
diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c
index 83763555c..3c237cb6d 100644
--- a/gcc-4.9/gcc/config/arm/arm.c
+++ b/gcc-4.9/gcc/config/arm/arm.c
@@ -16739,11 +16739,12 @@ thumb1_reorg (void)
       rtx prev, insn = BB_END (bb);
       bool insn_clobbered = false;
 
-      while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
+      while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
 	insn = PREV_INSN (insn);
 
       /* Find the last cbranchsi4_insn in basic block BB.  */
-      if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
+      if (insn == BB_HEAD (bb)
+	  || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
 	continue;
 
       /* Get the register with which we are comparing.  */
@@ -28210,9 +28211,13 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
       fputs (":\n", file);
       if (flag_pic)
 	{
-	  /* Output ".word .LTHUNKn-7-.LTHUNKPCn".  */
+	  /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
 	  rtx tem = XEXP (DECL_RTL (function), 0);
-	  tem = plus_constant (GET_MODE (tem), tem, -7);
+	  /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
+	     pipeline offset is four rather than eight.  Adjust the offset
+	     accordingly.  */
+	  tem = plus_constant (GET_MODE (tem), tem,
+			       TARGET_THUMB1_ONLY ? -3 : -7);
 	  tem = gen_rtx_MINUS (GET_MODE (tem),
 			       tem,
 			       gen_rtx_SYMBOL_REF (Pmode,
diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h
index 4d9121436..ab5167a8b 100644
--- a/gcc-4.9/gcc/config/arm/arm.h
+++ b/gcc-4.9/gcc/config/arm/arm.h
@@ -17,8 +17,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef GCC_ARM_H
diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md
index 662465f2a..467f9ce4e 100644
--- a/gcc-4.9/gcc/config/arm/arm.md
+++ b/gcc-4.9/gcc/config/arm/arm.md
@@ -75,6 +75,8 @@
   ]
 )
 
+;; UNSPEC_VOLATILE Usage:
+
 
 ;;---------------------------------------------------------------------------
 ;; Attributes
@@ -8349,8 +8351,8 @@
 
 (define_insn_and_split "*arm_cmpdi_unsigned"
   [(set (reg:CC_CZ CC_REGNUM)
-        (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r")
-                       (match_operand:DI 1 "arm_di_operand"     "Py,r,rDi")))]
+        (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r,r")
+                       (match_operand:DI 1 "arm_di_operand"     "Py,r,Di,rDi")))]
 
   "TARGET_32BIT"
   "#"   ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
@@ -8370,9 +8372,9 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "set")
-   (set_attr "enabled_for_depr_it" "yes,yes,no")
-   (set_attr "arch" "t2,t2,*")
-   (set_attr "length" "6,6,8")
+   (set_attr "enabled_for_depr_it" "yes,yes,no,*")
+   (set_attr "arch" "t2,t2,t2,a")
+   (set_attr "length" "6,6,10,8")
    (set_attr "type" "multiple")]
 )
 
@@ -9860,6 +9862,7 @@
   "TARGET_32BIT"
   "%i1%?\\t%0, %2, %4%S3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "shift" "4")
    (set_attr "arch" "a,t2,t2,a")
    ;; Thumb2 doesn't allow the stack pointer to be used for 
diff --git a/gcc-4.9/gcc/config/arm/arm_neon.h b/gcc-4.9/gcc/config/arm/arm_neon.h
index 37a6e611b..95735433d 100644
--- a/gcc-4.9/gcc/config/arm/arm_neon.h
+++ b/gcc-4.9/gcc/config/arm/arm_neon.h
@@ -1,5 +1,4 @@
-/* ARM NEON intrinsics include file. This file is generated automatically
-   using neon-gen.ml.  Please do not edit manually.
+/* ARM NEON intrinsics include file.
 
    Copyright (C) 2006-2014 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
@@ -7707,12 +7706,32 @@ vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
   return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
 }
 
+/* For big-endian, the shuffle masks for ZIP, UZP and TRN must be changed as
+   follows. (nelt = the number of elements within a vector.)
+
+   Firstly, a value of N within a mask, becomes (N ^ (nelt - 1)), as gcc vector
+   extension's indexing scheme is reversed *within each vector* (relative to the
+   neon intrinsics view), but without changing which of the two vectors.
+
+   Secondly, the elements within each mask are reversed, as the mask is itself a
+   vector, and will itself be loaded in reverse order (again, relative to the
+   neon intrinsics view, i.e. that would result from a "vld1" instruction).  */
+
 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
 vtrn_s8 (int8x8_t __a, int8x8_t __b)
 {
   int8x8x2_t __rv;
-  __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7720,8 +7739,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
 vtrn_s16 (int16x4_t __a, int16x4_t __b)
 {
   int16x4x2_t __rv;
-  __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7729,8 +7753,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
 vtrn_u8 (uint8x8_t __a, uint8x8_t __b)
 {
   uint8x8x2_t __rv;
-  __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7738,8 +7771,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
 vtrn_u16 (uint16x4_t __a, uint16x4_t __b)
 {
   uint16x4x2_t __rv;
-  __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7747,8 +7785,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
 vtrn_p8 (poly8x8_t __a, poly8x8_t __b)
 {
   poly8x8x2_t __rv;
-  __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7756,8 +7803,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
 vtrn_p16 (poly16x4_t __a, poly16x4_t __b)
 {
   poly16x4x2_t __rv;
-  __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7765,8 +7817,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
 vtrn_s32 (int32x2_t __a, int32x2_t __b)
 {
   int32x2x2_t __rv;
-  __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7774,8 +7831,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vtrn_f32 (float32x2_t __a, float32x2_t __b)
 {
   float32x2x2_t __rv;
-  __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7783,8 +7845,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
 vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
 {
   uint32x2x2_t __rv;
-  __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7792,8 +7859,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
 vtrnq_s8 (int8x16_t __a, int8x16_t __b)
 {
   int8x16x2_t __rv;
-  __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
-  __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7801,8 +7877,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
 vtrnq_s16 (int16x8_t __a, int16x8_t __b)
 {
   int16x8x2_t __rv;
-  __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7810,8 +7895,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
 vtrnq_s32 (int32x4_t __a, int32x4_t __b)
 {
   int32x4x2_t __rv;
-  __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7819,8 +7909,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vtrnq_f32 (float32x4_t __a, float32x4_t __b)
 {
   float32x4x2_t __rv;
-  __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7828,8 +7923,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
 vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
   uint8x16x2_t __rv;
-  __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
-  __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7837,8 +7941,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
 vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
   uint16x8x2_t __rv;
-  __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7846,8 +7959,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
 vtrnq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
   uint32x4x2_t __rv;
-  __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7855,8 +7973,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
 vtrnq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
   poly8x16x2_t __rv;
-  __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
-  __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7864,8 +7991,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
 vtrnq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
   poly16x8x2_t __rv;
-  __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7873,8 +8009,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
 vzip_s8 (int8x8_t __a, int8x8_t __b)
 {
   int8x8x2_t __rv;
-  __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7882,8 +8027,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
 vzip_s16 (int16x4_t __a, int16x4_t __b)
 {
   int16x4x2_t __rv;
-  __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7891,8 +8041,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
 vzip_u8 (uint8x8_t __a, uint8x8_t __b)
 {
   uint8x8x2_t __rv;
-  __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7900,8 +8059,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
 vzip_u16 (uint16x4_t __a, uint16x4_t __b)
 {
   uint16x4x2_t __rv;
-  __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7909,8 +8073,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
 vzip_p8 (poly8x8_t __a, poly8x8_t __b)
 {
   poly8x8x2_t __rv;
-  __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7918,8 +8091,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
 vzip_p16 (poly16x4_t __a, poly16x4_t __b)
 {
   poly16x4x2_t __rv;
-  __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7927,8 +8105,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
 vzip_s32 (int32x2_t __a, int32x2_t __b)
 {
   int32x2x2_t __rv;
-  __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7936,8 +8119,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vzip_f32 (float32x2_t __a, float32x2_t __b)
 {
   float32x2x2_t __rv;
-  __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7945,8 +8133,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
 vzip_u32 (uint32x2_t __a, uint32x2_t __b)
 {
   uint32x2x2_t __rv;
-  __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7954,8 +8147,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
 vzipq_s8 (int8x16_t __a, int8x16_t __b)
 {
   int8x16x2_t __rv;
-  __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
-  __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7963,8 +8165,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
 vzipq_s16 (int16x8_t __a, int16x8_t __b)
 {
   int16x8x2_t __rv;
-  __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7972,8 +8183,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
 vzipq_s32 (int32x4_t __a, int32x4_t __b)
 {
   int32x4x2_t __rv;
-  __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7981,8 +8197,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vzipq_f32 (float32x4_t __a, float32x4_t __b)
 {
   float32x4x2_t __rv;
-  __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7990,8 +8211,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
 vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
   uint8x16x2_t __rv;
-  __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
-  __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7999,8 +8229,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
 vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
   uint16x8x2_t __rv;
-  __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -8008,8 +8247,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
 vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
   uint32x4x2_t __rv;
-  __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -8017,8 +8261,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
 vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
   poly8x16x2_t __rv;
-  __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
-  __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -8026,8 +8279,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
 vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
   poly16x8x2_t __rv;
-  __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -8035,8 +8297,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
 vuzp_s8 (int8x8_t __a, int8x8_t __b)
 {
   int8x8x2_t __rv;
-  __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8044,8 +8315,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
 vuzp_s16 (int16x4_t __a, int16x4_t __b)
 {
   int16x4x2_t __rv;
-  __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8053,8 +8329,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
 vuzp_s32 (int32x2_t __a, int32x2_t __b)
 {
   int32x2x2_t __rv;
-  __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -8062,8 +8343,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vuzp_f32 (float32x2_t __a, float32x2_t __b)
 {
   float32x2x2_t __rv;
-  __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -8071,8 +8357,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
 vuzp_u8 (uint8x8_t __a, uint8x8_t __b)
 {
   uint8x8x2_t __rv;
-  __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8080,8 +8375,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
 vuzp_u16 (uint16x4_t __a, uint16x4_t __b)
 {
   uint16x4x2_t __rv;
-  __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8089,8 +8389,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
 vuzp_u32 (uint32x2_t __a, uint32x2_t __b)
 {
   uint32x2x2_t __rv;
-  __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -8098,8 +8403,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
 vuzp_p8 (poly8x8_t __a, poly8x8_t __b)
 {
   poly8x8x2_t __rv;
-  __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8107,8 +8421,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
 vuzp_p16 (poly16x4_t __a, poly16x4_t __b)
 {
   poly16x4x2_t __rv;
-  __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8116,8 +8435,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
 vuzpq_s8 (int8x16_t __a, int8x16_t __b)
 {
   int8x16x2_t __rv;
-  __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
-  __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#endif
   return __rv;
 }
 
@@ -8125,8 +8453,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
 vuzpq_s16 (int16x8_t __a, int16x8_t __b)
 {
   int16x8x2_t __rv;
-  __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8134,8 +8471,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
 vuzpq_s32 (int32x4_t __a, int32x4_t __b)
 {
   int32x4x2_t __rv;
-  __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8143,8 +8485,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vuzpq_f32 (float32x4_t __a, float32x4_t __b)
 {
   float32x4x2_t __rv;
-  __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8152,8 +8499,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
 vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
   uint8x16x2_t __rv;
-  __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
-  __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#endif
   return __rv;
 }
 
@@ -8161,8 +8517,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
 vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
   uint16x8x2_t __rv;
-  __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8170,8 +8535,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
 vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
   uint32x4x2_t __rv;
-  __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8179,8 +8549,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
 vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
   poly8x16x2_t __rv;
-  __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
-  __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#endif
   return __rv;
 }
 
@@ -8188,8 +8567,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
 vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
   poly16x8x2_t __rv;
-  __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
diff --git a/gcc-4.9/gcc/config/arm/bpabi.h b/gcc-4.9/gcc/config/arm/bpabi.h
index bc223f8e3..7a576ac46 100644
--- a/gcc-4.9/gcc/config/arm/bpabi.h
+++ b/gcc-4.9/gcc/config/arm/bpabi.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* Use the AAPCS ABI by default.  */
diff --git a/gcc-4.9/gcc/config/arm/elf.h b/gcc-4.9/gcc/config/arm/elf.h
index 2edf520de..15a32fb8a 100644
--- a/gcc-4.9/gcc/config/arm/elf.h
+++ b/gcc-4.9/gcc/config/arm/elf.h
@@ -16,8 +16,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef OBJECT_FORMAT_ELF
diff --git a/gcc-4.9/gcc/config/arm/linux-eabi.h b/gcc-4.9/gcc/config/arm/linux-eabi.h
index 4d42cbfc8..350639f32 100644
--- a/gcc-4.9/gcc/config/arm/linux-eabi.h
+++ b/gcc-4.9/gcc/config/arm/linux-eabi.h
@@ -68,8 +68,9 @@
    GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI.  */
 
 #undef  GLIBC_DYNAMIC_LINKER
-#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3"
-#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3"
+
+#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.3"
+#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux-armhf.so.3"
 #define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT
 
 #define GLIBC_DYNAMIC_LINKER \
diff --git a/gcc-4.9/gcc/config/arm/linux-elf.h b/gcc-4.9/gcc/config/arm/linux-elf.h
index 5dc3328e8..e825ae48c 100644
--- a/gcc-4.9/gcc/config/arm/linux-elf.h
+++ b/gcc-4.9/gcc/config/arm/linux-elf.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* elfos.h should have already been included.  Now just override
@@ -57,7 +62,7 @@
 
 #define LIBGCC_SPEC "%{mfloat-abi=soft*:-lfloat} -lgcc"
 
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
 
 #define LINUX_TARGET_LINK_SPEC  "%{h*} \
    %{static:-Bstatic} \
diff --git a/gcc-4.9/gcc/config/arm/linux-gas.h b/gcc-4.9/gcc/config/arm/linux-gas.h
index 52a739c26..1dd043782 100644
--- a/gcc-4.9/gcc/config/arm/linux-gas.h
+++ b/gcc-4.9/gcc/config/arm/linux-gas.h
@@ -15,8 +15,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* This is how we tell the assembler that a symbol is weak.
diff --git a/gcc-4.9/gcc/config/arm/linux-grte.h b/gcc-4.9/gcc/config/arm/linux-grte.h
new file mode 100644
index 000000000..7ee5806b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/linux-grte.h
@@ -0,0 +1,27 @@
+/* Definitions for ARM Linux-based GRTE (Google RunTime Environment).
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Chris Demetriou.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS LINUX_GRTE_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/arm/neon-docgen.ml b/gcc-4.9/gcc/config/arm/neon-docgen.ml
deleted file mode 100644
index 5788a533e..000000000
--- a/gcc-4.9/gcc/config/arm/neon-docgen.ml
+++ /dev/null
@@ -1,424 +0,0 @@
-(* ARM NEON documentation generator.
-
-   Copyright (C) 2006-2014 Free Software Foundation, Inc.
-   Contributed by CodeSourcery.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it under
-   the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 3, or (at your option) any later
-   version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or
-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-   for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.
-
-   This is an O'Caml program.  The O'Caml compiler is available from:
-
-     http://caml.inria.fr/
-
-   Or from your favourite OS's friendly packaging system. Tested with version
-   3.09.2, though other versions will probably work too.
-
-   Compile with:
-     ocamlc -c neon.ml
-     ocamlc -o neon-docgen neon.cmo neon-docgen.ml
-
-   Run with:
-     /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
-*)
-
-open Neon
-
-(* The combined "ops" and "reinterp" table.  *)
-let ops_reinterp = reinterp @ ops
-
-(* Helper functions for extracting things from the "ops" table.  *)
-let single_opcode desired_opcode () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        if opcode = desired_opcode then row :: got_so_far
-                                                   else got_so_far
-                 ) [] ops_reinterp
-
-let multiple_opcodes desired_opcodes () =
-  List.fold_left (fun got_so_far ->
-                  fun desired_opcode ->
-                    (single_opcode desired_opcode ()) @ got_so_far)
-                 [] desired_opcodes
-
-let ldx_opcode number () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        match opcode with
-                          Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
-                            row :: got_so_far
-                          | _ -> got_so_far
-                 ) [] ops_reinterp
-
-let stx_opcode number () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        match opcode with
-                          Vstx n | Vstx_lane n when n = number ->
-                            row :: got_so_far
-                          | _ -> got_so_far
-                 ) [] ops_reinterp
-
-let tbl_opcode () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        match opcode with
-                          Vtbl _ -> row :: got_so_far
-                          | _ -> got_so_far
-                 ) [] ops_reinterp
-
-let tbx_opcode () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        match opcode with
-                          Vtbx _ -> row :: got_so_far
-                          | _ -> got_so_far
-                 ) [] ops_reinterp
-
-(* The groups of intrinsics.  *)
-let intrinsic_groups =
-  [ "Addition", single_opcode Vadd;
-    "Multiplication", single_opcode Vmul;
-    "Multiply-accumulate", single_opcode Vmla;
-    "Multiply-subtract", single_opcode Vmls;
-    "Fused-multiply-accumulate", single_opcode Vfma;
-    "Fused-multiply-subtract", single_opcode Vfms;
-    "Round to integral (to nearest, ties to even)", single_opcode Vrintn;
-    "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta;
-    "Round to integral (towards +Inf)", single_opcode Vrintp;
-    "Round to integral (towards -Inf)", single_opcode Vrintm;
-    "Round to integral (towards 0)", single_opcode Vrintz;
-    "Subtraction", single_opcode Vsub;
-    "Comparison (equal-to)", single_opcode Vceq;
-    "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
-    "Comparison (less-than-or-equal-to)", single_opcode Vcle;
-    "Comparison (greater-than)", single_opcode Vcgt;
-    "Comparison (less-than)", single_opcode Vclt;
-    "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
-    "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
-    "Comparison (absolute greater-than)", single_opcode Vcagt;
-    "Comparison (absolute less-than)", single_opcode Vcalt;
-    "Test bits", single_opcode Vtst;
-    "Absolute difference", single_opcode Vabd;
-    "Absolute difference and accumulate", single_opcode Vaba;
-    "Maximum", single_opcode Vmax;
-    "Minimum", single_opcode Vmin;
-    "Pairwise add", single_opcode Vpadd;
-    "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
-    "Folding maximum", single_opcode Vpmax;
-    "Folding minimum", single_opcode Vpmin;
-    "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
-    "Vector shift left", single_opcode Vshl;
-    "Vector shift left by constant", single_opcode Vshl_n;
-    "Vector shift right by constant", single_opcode Vshr_n;
-    "Vector shift right by constant and accumulate", single_opcode Vsra_n;
-    "Vector shift right and insert", single_opcode Vsri;
-    "Vector shift left and insert", single_opcode Vsli;
-    "Absolute value", single_opcode Vabs;
-    "Negation", single_opcode Vneg;
-    "Bitwise not", single_opcode Vmvn;
-    "Count leading sign bits", single_opcode Vcls;
-    "Count leading zeros", single_opcode Vclz;
-    "Count number of set bits", single_opcode Vcnt;
-    "Reciprocal estimate", single_opcode Vrecpe;
-    "Reciprocal square-root estimate", single_opcode Vrsqrte;
-    "Get lanes from a vector", single_opcode Vget_lane;
-    "Set lanes in a vector", single_opcode Vset_lane;
-    "Create vector from literal bit pattern", single_opcode Vcreate;
-    "Set all lanes to the same value",
-      multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
-    "Combining vectors", single_opcode Vcombine;
-    "Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
-    "Conversions", multiple_opcodes [Vcvt; Vcvt_n];
-    "Move, single_opcode narrowing", single_opcode Vmovn;
-    "Move, single_opcode long", single_opcode Vmovl;
-    "Table lookup", tbl_opcode;
-    "Extended table lookup", tbx_opcode;
-    "Multiply, lane", single_opcode Vmul_lane;
-    "Long multiply, lane", single_opcode Vmull_lane;
-    "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
-    "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
-    "Multiply-accumulate, lane", single_opcode Vmla_lane;
-    "Multiply-subtract, lane", single_opcode Vmls_lane;
-    "Vector multiply by scalar", single_opcode Vmul_n;
-    "Vector long multiply by scalar", single_opcode Vmull_n;
-    "Vector saturating doubling long multiply by scalar",
-      single_opcode Vqdmull_n;
-    "Vector saturating doubling multiply high by scalar",
-      single_opcode Vqdmulh_n;
-    "Vector multiply-accumulate by scalar", single_opcode Vmla_n;
-    "Vector multiply-subtract by scalar", single_opcode Vmls_n;
-    "Vector extract", single_opcode Vext;
-    "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
-    "Bit selection", single_opcode Vbsl;
-    "Transpose elements", single_opcode Vtrn;
-    "Zip elements", single_opcode Vzip;
-    "Unzip elements", single_opcode Vuzp;
-    "Element/structure loads, VLD1 variants", ldx_opcode 1;
-    "Element/structure stores, VST1 variants", stx_opcode 1;
-    "Element/structure loads, VLD2 variants", ldx_opcode 2;
-    "Element/structure stores, VST2 variants", stx_opcode 2;
-    "Element/structure loads, VLD3 variants", ldx_opcode 3;
-    "Element/structure stores, VST3 variants", stx_opcode 3;
-    "Element/structure loads, VLD4 variants", ldx_opcode 4;
-    "Element/structure stores, VST4 variants", stx_opcode 4;
-    "Logical operations (AND)", single_opcode Vand;
-    "Logical operations (OR)", single_opcode Vorr;
-    "Logical operations (exclusive OR)", single_opcode Veor;
-    "Logical operations (AND-NOT)", single_opcode Vbic;
-    "Logical operations (OR-NOT)", single_opcode Vorn;
-    "Reinterpret casts", single_opcode Vreinterp ]
-
-(* Given an intrinsic shape, produce a string to document the corresponding
-   operand shapes.  *)
-let rec analyze_shape shape =
-  let rec n_things n thing =
-    match n with
-      0 -> []
-    | n -> thing :: (n_things (n - 1) thing)
-  in
-  let rec analyze_shape_elt reg_no elt =
-    match elt with
-      Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
-    | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
-    | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
-    | Immed -> "#@var{0}"
-    | VecArray (1, elt) ->
-        let elt_regexp = analyze_shape_elt 0 elt in
-          "@{" ^ elt_regexp ^ "@}"
-    | VecArray (n, elt) ->
-      let rec f m =
-        match m with
-          0 -> []
-        | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
-      in
-      let ops = List.rev (f n) in
-        "@{" ^ (commas (fun x -> x) ops "") ^ "@}"
-    | (PtrTo elt | CstPtrTo elt) ->
-      "[" ^ (analyze_shape_elt reg_no elt) ^ "]"
-    | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
-    | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
-    | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
-    | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts))
-  in
-    match shape with
-      All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
-    | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
-              ", " ^ (analyze_shape_elt 0 Dreg)
-    | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
-              (analyze_shape_elt 0 elt)
-    | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
-              ", " ^ (analyze_shape_elt 0 Dreg)
-    | Wide_noreg elt -> analyze_shape (Long_noreg elt)
-    | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
-                ", " ^ (analyze_shape_elt 0 Qreg)
-    | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
-    | By_scalar Dreg ->
-        analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
-    | By_scalar Qreg ->
-        analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
-    | By_scalar _ -> assert false
-    | Wide_lane ->
-        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
-    | Wide_scalar ->
-        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
-    | Pair_result elt ->
-      let elt_regexp = analyze_shape_elt 0 elt in
-      let elt_regexp' = analyze_shape_elt 1 elt in
-        elt_regexp ^ ", " ^ elt_regexp'
-    | Unary_scalar _ -> "FIXME Unary_scalar"
-    | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
-    | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
-    | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
-
-(* Document a single intrinsic.  *)
-let describe_intrinsic first chan
-                       (elt_ty, (_, features, shape, name, munge, _)) =
-  let c_arity, new_elt_ty = munge shape elt_ty in
-  let c_types = strings_of_arity c_arity in
-  Printf.fprintf chan "@itemize @bullet\n";
-  let item_code = if first then "@item" else "@itemx" in
-    Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
-                   (intrinsic_name name) (string_of_elt elt_ty);
-    Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
-    if not (List.exists (fun feature -> feature = No_op) features) then
-    begin
-      let print_one_insn name =
-        Printf.fprintf chan "@code{";
-        let no_suffix = (new_elt_ty = NoElts) in
-        let name_with_suffix =
-          if no_suffix then name
-          else name ^ "." ^ (string_of_elt_dots new_elt_ty)
-        in
-        let possible_operands = analyze_all_shapes features shape
-                                                   analyze_shape
-        in
-	let rec print_one_possible_operand op =
-	  Printf.fprintf chan "%s %s}" name_with_suffix op
-        in
-          (* If the intrinsic expands to multiple instructions, we assume
-             they are all of the same form.  *)
-          print_one_possible_operand (List.hd possible_operands)
-      in
-      let rec print_insns names =
-        match names with
-          [] -> ()
-        | [name] -> print_one_insn name
-        | name::names -> (print_one_insn name;
-                          Printf.fprintf chan " @emph{or} ";
-                          print_insns names)
-      in
-      let insn_names = get_insn_names features name in
-        Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
-        print_insns insn_names;
-        Printf.fprintf chan "\n"
-    end;
-    Printf.fprintf chan "@end itemize\n";
-    Printf.fprintf chan "\n\n"
-
-(* Document a group of intrinsics.  *)
-let document_group chan (group_title, group_extractor) =
-  (* Extract the rows in question from the ops table and then turn them
-     into a list of intrinsics.  *)
-  let intrinsics =
-    List.fold_left (fun got_so_far ->
-                    fun row ->
-                      match row with
-                        (_, _, _, _, _, elt_tys) ->
-                          List.fold_left (fun got_so_far' ->
-                                          fun elt_ty ->
-                                            (elt_ty, row) :: got_so_far')
-                                         got_so_far elt_tys
-                   ) [] (group_extractor ())
-  in
-    (* Emit the title for this group.  *)
-    Printf.fprintf chan "@subsubsection %s\n\n" group_title;
-    (* Emit a description of each intrinsic.  *)
-    List.iter (describe_intrinsic true chan) intrinsics;
-    (* Close this group.  *)
-    Printf.fprintf chan "\n\n"
-
-let gnu_header chan =
-  List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
-  "@c Copyright (C) 2006-2014 Free Software Foundation, Inc.";
-  "@c This is part of the GCC manual.";
-  "@c For copying conditions, see the file gcc.texi.";
-  "";
-  "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
-  "@c Please do not edit manually."]
-
-let crypto_doc =
-"
-@itemize @bullet
-@item poly128_t vldrq_p128(poly128_t const *)
-@end itemize
-
-@itemize @bullet
-@item void vstrq_p128(poly128_t *, poly128_t)
-@end itemize
-
-@itemize @bullet
-@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
-@end itemize
-
-@itemize @bullet
-@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
-@end itemize
-
-@itemize @bullet
-@item uint32_t vsha1h_u32 (uint32_t)
-@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
- 
-@itemize @bullet
-@item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
- 
-@itemize @bullet
-@item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}}
-@end itemize
- 
-@itemize @bullet
-@item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item poly128_t vmull_p64 (poly64_t a, poly64_t b)
-@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
-@end itemize
-
-@itemize @bullet
-@item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b)
-@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
-@end itemize
-"
-
-(* Program entry point.  *)
-let _ =
-  if Array.length Sys.argv <> 2 then
-    failwith "Usage: neon-docgen <output filename>"
-  else
-  let file = Sys.argv.(1) in
-    try
-      let chan = open_out file in
-        gnu_header chan;
-        List.iter (document_group chan) intrinsic_groups;
-        Printf.fprintf chan "%s\n" crypto_doc;
-        close_out chan
-    with Sys_error sys ->
-      failwith ("Could not create output file " ^ file ^ ": " ^ sys)
diff --git a/gcc-4.9/gcc/config/arm/neon-gen.ml b/gcc-4.9/gcc/config/arm/neon-gen.ml
deleted file mode 100644
index f3dd86b0a..000000000
--- a/gcc-4.9/gcc/config/arm/neon-gen.ml
+++ /dev/null
@@ -1,520 +0,0 @@
-(* Auto-generate ARM Neon intrinsics header file.
-   Copyright (C) 2006-2014 Free Software Foundation, Inc.
-   Contributed by CodeSourcery.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it under
-   the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 3, or (at your option) any later
-   version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or
-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-   for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.
-
-   This is an O'Caml program.  The O'Caml compiler is available from:
-
-     http://caml.inria.fr/
-
-   Or from your favourite OS's friendly packaging system. Tested with version
-   3.09.2, though other versions will probably work too.
-
-   Compile with:
-     ocamlc -c neon.ml
-     ocamlc -o neon-gen neon.cmo neon-gen.ml
-
-   Run with:
-     ./neon-gen > arm_neon.h
-*)
-
-open Neon
-
-(* The format codes used in the following functions are documented at:
-     http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\
-     #6_printflikefunctionsforprettyprinting
-   (one line, remove the backslash.)
-*)
-
-(* Following functions can be used to approximate GNU indentation style.  *)
-let start_function () =
-  Format.printf "@[<v 0>";
-  ref 0
-
-let end_function nesting =
-  match !nesting with
-    0 -> Format.printf "@;@;@]"
-  | _ -> failwith ("Bad nesting (ending function at level "
-                   ^ (string_of_int !nesting) ^ ")")
-
-let open_braceblock nesting =
-  begin match !nesting with
-    0 -> Format.printf "@,@<0>{@[<v 2>@,"
-  | _ -> Format.printf "@,@[<v 2>  @<0>{@[<v 2>@,"
-  end;
-  incr nesting
-
-let close_braceblock nesting =
-  decr nesting;
-  match !nesting with
-    0 -> Format.printf "@]@,@<0>}"
-  | _ -> Format.printf "@]@,@<0>}@]"
-
-let print_function arity fnname body =
-  let ffmt = start_function () in
-  Format.printf "__extension__ static __inline ";
-  let inl = "__attribute__ ((__always_inline__))" in
-  begin match arity with
-    Arity0 ret ->
-      Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname
-  | Arity1 (ret, arg0) ->
-      Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname
-                                        (string_of_vectype arg0)
-  | Arity2 (ret, arg0, arg1) ->
-      Format.printf "%s %s@,%s (%s __a, %s __b)"
-        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
-	(string_of_vectype arg1)
-  | Arity3 (ret, arg0, arg1, arg2) ->
-      Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)"
-        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
-	(string_of_vectype arg1) (string_of_vectype arg2)
-  | Arity4 (ret, arg0, arg1, arg2, arg3) ->
-      Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)"
-        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
-	(string_of_vectype arg1) (string_of_vectype arg2)
-        (string_of_vectype arg3)
-  end;
-  open_braceblock ffmt;
-  let rec print_lines = function
-    []       -> ()
-  | "" :: lines -> print_lines lines
-  | [line] -> Format.printf "%s" line
-  | line::lines -> Format.printf "%s@," line ; print_lines lines in
-  print_lines body;
-  close_braceblock ffmt;
-  end_function ffmt
-
-let union_string num elts base =
-  let itype = inttype_for_array num elts in
-  let iname = string_of_inttype itype
-  and sname = string_of_vectype (T_arrayof (num, elts)) in
-  Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base
-
-let rec signed_ctype = function
-    T_uint8x8 | T_poly8x8 -> T_int8x8
-  | T_uint8x16 | T_poly8x16 -> T_int8x16
-  | T_uint16x4 | T_poly16x4 -> T_int16x4
-  | T_uint16x8 | T_poly16x8 -> T_int16x8
-  | T_uint32x2 -> T_int32x2
-  | T_uint32x4 -> T_int32x4
-  | T_uint64x1 -> T_int64x1
-  | T_uint64x2 -> T_int64x2
-  | T_poly64x2 -> T_int64x2
-  (* Cast to types defined by mode in arm.c, not random types pulled in from
-     the <stdint.h> header in use. This fixes incompatible pointer errors when
-     compiling with C++.  *)
-  | T_uint8 | T_int8 -> T_intQI
-  | T_uint16 | T_int16 -> T_intHI
-  | T_uint32 | T_int32 -> T_intSI
-  | T_uint64 | T_int64 -> T_intDI
-  | T_float16 -> T_floatHF
-  | T_float32 -> T_floatSF
-  | T_poly8 -> T_intQI
-  | T_poly16 -> T_intHI
-  | T_poly64 -> T_intDI
-  | T_poly128 -> T_intTI
-  | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
-  | T_ptrto elt -> T_ptrto (signed_ctype elt)
-  | T_const elt -> T_const (signed_ctype elt)
-  | x -> x
-
-let add_cast ctype cval =
-  let stype = signed_ctype ctype in
-  if ctype <> stype then
-    Printf.sprintf "(%s) %s" (string_of_vectype stype) cval
-  else
-    cval
-
-let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")"
-
-(* Return a tuple of a list of declarations to go at the start of the function,
-   and a list of statements needed to return THING.  *)
-let return arity thing =
-  match arity with
-    Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
-  | Arity4 (ret, _, _, _, _) ->
-      begin match ret with
-	T_arrayof (num, vec) ->
-          let uname = union_string num vec "__rv" in
-          [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"]
-      | T_void ->
-	  [], [thing ^ ";"]
-      | _ ->
-	  [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"]
-      end
-
-let mask_shape_for_shuffle = function
-    All (num, reg) -> All (num, reg)
-  | Pair_result reg -> All (2, reg)
-  | _ -> failwith "mask_for_shuffle"
-
-let mask_elems shuffle shape elttype part =
-  let elem_size = elt_width elttype in
-  let num_elems =
-    match regmap shape 0 with
-      Dreg -> 64 / elem_size
-    | Qreg -> 128 / elem_size
-    | _ -> failwith "mask_elems" in
-  shuffle elem_size num_elems part
-
-(* Return a tuple of a list of declarations 0and a list of statements needed
-   to implement an intrinsic using __builtin_shuffle.  SHUFFLE is a function
-   which returns a list of elements suitable for using as a mask.  *)
-
-let shuffle_fn shuffle shape arity elttype =
-  let mshape = mask_shape_for_shuffle shape in
-  let masktype = type_for_elt mshape (unsigned_of_elt elttype) 0 in
-  let masktype_str = string_of_vectype masktype in
-  let shuffle_res = type_for_elt mshape elttype 0 in
-  let shuffle_res_str = string_of_vectype shuffle_res in
-  match arity with
-    Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
-  | Arity4 (ret, _, _, _, _) ->
-      begin match ret with
-        T_arrayof (num, vec) ->
-	  let elems1 = mask_elems shuffle mshape elttype `lo
-	  and elems2 = mask_elems shuffle mshape elttype `hi in
-	  let mask1 = (String.concat ", " (List.map string_of_int elems1))
-	  and mask2 = (String.concat ", " (List.map string_of_int elems2)) in
-	  let shuf1 = Printf.sprintf
-	    "__rv.val[0] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
-	    shuffle_res_str masktype_str mask1
-	  and shuf2 = Printf.sprintf
-	    "__rv.val[1] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
-	    shuffle_res_str masktype_str mask2 in
-	  [Printf.sprintf "%s __rv;" (string_of_vectype ret);],
-	  [shuf1; shuf2; "return __rv;"]
-      | _ ->
-          let elems = mask_elems shuffle mshape elttype `lo in
-          let mask =  (String.concat ", " (List.map string_of_int elems)) in
-	  let shuf = Printf.sprintf
-	    "return (%s) __builtin_shuffle (__a, (%s) { %s });" shuffle_res_str masktype_str mask in
-	  [""],
-	  [shuf]
-      end
-
-let rec element_type ctype =
-  match ctype with
-    T_arrayof (_, v) -> element_type v
-  | _ -> ctype
-
-let params ps =
-  let pdecls = ref [] in
-  let ptype t p =
-    match t with
-      T_arrayof (num, elts) ->
-        let uname = union_string num elts (p ^ "u") in
-        let decl = Printf.sprintf "%s = { %s };" uname p in
-        pdecls := decl :: !pdecls;
-        p ^ "u.__o"
-    | _ -> add_cast t p in
-  let plist = match ps with
-    Arity0 _ -> []
-  | Arity1 (_, t1) -> [ptype t1 "__a"]
-  | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"]
-  | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"]
-  | Arity4 (_, t1, t2, t3, t4) ->
-      [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in
-  !pdecls, plist
-
-let modify_params features plist =
-  let is_flipped =
-    List.exists (function Flipped _ -> true | _ -> false) features in
-  if is_flipped then
-    match plist with
-      [ a; b ] -> [ b; a ]
-    | _ ->
-      failwith ("Don't know how to flip args " ^ (String.concat ", " plist))
-  else
-    plist
-
-(* !!! Decide whether to add an extra information word based on the shape
-   form.  *)
-let extra_word shape features paramlist bits =
-  let use_word =
-    match shape with
-      All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow
-    | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm
-    | Narrow_imm -> true
-    | _ -> List.mem InfoWord features
-  in
-    if use_word then
-      paramlist @ [string_of_int bits]
-    else
-      paramlist
-
-(* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0).
-   Bit 1 represents floats & polynomials (1), or ordinary integers (0).
-   Bit 2 represents rounding (1) vs none (0).  *)
-let infoword_value elttype features =
-  let bits01 =
-    match elt_class elttype with
-      Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001
-    | Poly -> 0b010
-    | Float -> 0b011
-    | _ -> 0b000
-  and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in
-  bits01 lor rounding_bit
-
-(* "Cast" type operations will throw an exception in mode_of_elt (actually in
-   elt_width, called from there). Deal with that here, and generate a suffix
-   with multiple modes (<to><from>).  *)
-let rec mode_suffix elttype shape =
-  try
-    let mode = mode_of_elt elttype shape in
-    string_of_mode mode
-  with MixedMode (dst, src) ->
-    let dstmode = mode_of_elt ~argpos:0 dst shape
-    and srcmode = mode_of_elt ~argpos:1 src shape in
-    string_of_mode dstmode ^ string_of_mode srcmode
-
-let get_shuffle features =
-  try
-    match List.find (function Use_shuffle _ -> true | _ -> false) features with
-      Use_shuffle fn -> Some fn
-    | _ -> None
-  with Not_found -> None
-
-let print_feature_test_start features =
-  try
-    match List.find (fun feature ->
-                       match feature with Requires_feature _ -> true
-                                        | Requires_arch _ -> true
-                                        | Requires_FP_bit _ -> true
-                                        | _ -> false)
-                     features with
-      Requires_feature feature ->
-        Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
-    | Requires_arch arch ->
-        Format.printf "#if __ARM_ARCH >= %d@\n" arch
-    | Requires_FP_bit bit ->
-        Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
-                      (1 lsl bit)
-    | _ -> assert false
-  with Not_found -> assert true
-
-let print_feature_test_end features =
-  let feature =
-    List.exists (function Requires_feature _ -> true
-                          | Requires_arch _ -> true
-                          | Requires_FP_bit _ -> true
-                          |  _ -> false) features in
-  if feature then Format.printf "#endif@\n"
-
-
-let print_variant opcode features shape name (ctype, asmtype, elttype) =
-  let bits = infoword_value elttype features in
-  let modesuf = mode_suffix elttype shape in
-  let pdecls, paramlist = params ctype in
-  let rdecls, stmts =
-    match get_shuffle features with
-      Some shuffle -> shuffle_fn shuffle shape ctype elttype
-    | None ->
-	let paramlist' = modify_params features paramlist in
-	let paramlist'' = extra_word shape features paramlist' bits in
-	let parstr = String.concat ", " paramlist'' in
-	let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)"
-                	(builtin_name features name) modesuf parstr in
-	return ctype builtin in
-  let body = pdecls @ rdecls @ stmts
-  and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
-  begin
-    print_feature_test_start features;
-    print_function ctype fnname body;
-    print_feature_test_end features;
-  end
-
-(* When this function processes the element types in the ops table, it rewrites
-   them in a list of tuples (a,b,c):
-     a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8)
-     b : Asm type : a single, processed element type, e.g. P16. This is the
-         type which should be attached to the asm opcode.
-     c : Variant type : the unprocessed type for this variant (e.g. in add
-         instructions which don't care about the sign, b might be i16 and c
-         might be s16.)
-*)
-
-let print_op (opcode, features, shape, name, munge, types) =
-  let sorted_types = List.sort compare types in
-  let munged_types = List.map
-    (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in
-  List.iter
-    (fun variant -> print_variant opcode features shape name variant)
-    munged_types
-
-let print_ops ops =
-  List.iter print_op ops
-
-(* Output type definitions. Table entries are:
-     cbase : "C" name for the type.
-     abase : "ARM" base name for the type (i.e. int in int8x8_t).
-     esize : element size.
-     enum : element count.
-     alevel: architecture level at which available.
-*)
-
-type fpulevel = CRYPTO | ALL
-
-let deftypes () =
-  let typeinfo = [
-    (* Doubleword vector types.  *)
-    "__builtin_neon_qi", "int", 8, 8, ALL;
-    "__builtin_neon_hi", "int", 16, 4, ALL;
-    "__builtin_neon_si", "int", 32, 2, ALL;
-    "__builtin_neon_di", "int", 64, 1, ALL;
-    "__builtin_neon_hf", "float", 16, 4, ALL;
-    "__builtin_neon_sf", "float", 32, 2, ALL;
-    "__builtin_neon_poly8", "poly", 8, 8, ALL;
-    "__builtin_neon_poly16", "poly", 16, 4, ALL;
-    "__builtin_neon_poly64", "poly", 64, 1, CRYPTO;
-    "__builtin_neon_uqi", "uint", 8, 8, ALL;
-    "__builtin_neon_uhi", "uint", 16, 4, ALL;
-    "__builtin_neon_usi", "uint", 32, 2, ALL;
-    "__builtin_neon_udi", "uint", 64, 1, ALL;
-
-    (* Quadword vector types.  *)
-    "__builtin_neon_qi", "int", 8, 16, ALL;
-    "__builtin_neon_hi", "int", 16, 8, ALL;
-    "__builtin_neon_si", "int", 32, 4, ALL;
-    "__builtin_neon_di", "int", 64, 2, ALL;
-    "__builtin_neon_sf", "float", 32, 4, ALL;
-    "__builtin_neon_poly8", "poly", 8, 16, ALL;
-    "__builtin_neon_poly16", "poly", 16, 8, ALL;
-    "__builtin_neon_poly64", "poly", 64, 2, CRYPTO;
-    "__builtin_neon_uqi", "uint", 8, 16, ALL;
-    "__builtin_neon_uhi", "uint", 16, 8, ALL;
-    "__builtin_neon_usi", "uint", 32, 4, ALL;
-    "__builtin_neon_udi", "uint", 64, 2, ALL
-  ] in
-  List.iter
-    (fun (cbase, abase, esize, enum, fpulevel) ->
-      let attr =
-        match enum with
-          1 -> ""
-        | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))"
-                              (esize * enum / 8) in
-      if fpulevel == CRYPTO then
-        Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n";
-      Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr;
-      if fpulevel == CRYPTO then
-        Format.printf "#endif\n";)
-    typeinfo;
-  Format.print_newline ();
-  (* Extra types not in <stdint.h>.  *)
-  Format.printf "typedef float float32_t;\n";
-  Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
-  Format.printf "typedef __builtin_neon_poly16 poly16_t;\n";
-  Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n";
-  Format.printf "typedef __builtin_neon_poly64 poly64_t;\n";
-  Format.printf "typedef __builtin_neon_poly128 poly128_t;\n";
-  Format.printf "#endif\n"
-
-(* Output structs containing arrays, for load & store instructions etc.
-   poly128_t is deliberately not included here because it has no array types
-   defined for it.  *)
-
-let arrtypes () =
-  let typeinfo = [
-    "int", 8, ALL;    "int", 16, ALL;
-    "int", 32, ALL;   "int", 64, ALL;
-    "uint", 8, ALL;   "uint", 16, ALL;
-    "uint", 32, ALL;  "uint", 64, ALL;
-    "float", 32, ALL; "poly", 8, ALL;
-    "poly", 16, ALL; "poly", 64, CRYPTO
-  ] in
-  let writestruct elname elsize regsize arrsize fpulevel =
-    let elnum = regsize / elsize in
-    let structname =
-      Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in
-    let sfmt = start_function () in
-    Format.printf "%stypedef struct %s"
-      (if fpulevel == CRYPTO then "#ifdef __ARM_FEATURE_CRYPTO\n" else "") structname;
-    open_braceblock sfmt;
-    Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize;
-    close_braceblock sfmt;
-    Format.printf " %s;%s" structname (if fpulevel == CRYPTO then "\n#endif\n" else "");
-    end_function sfmt;
-  in
-    for n = 2 to 4 do
-      List.iter
-        (fun (elname, elsize, alevel) ->
-          writestruct elname elsize 64 n alevel;
-          writestruct elname elsize 128 n alevel)
-        typeinfo
-    done
-
-let print_lines = List.iter (fun s -> Format.printf "%s@\n" s)
-
-(* Do it.  *)
-
-let _ =
-  print_lines [
-"/* ARM NEON intrinsics include file. This file is generated automatically";
-"   using neon-gen.ml.  Please do not edit manually.";
-"";
-"   Copyright (C) 2006-2014 Free Software Foundation, Inc.";
-"   Contributed by CodeSourcery.";
-"";
-"   This file is part of GCC.";
-"";
-"   GCC is free software; you can redistribute it and/or modify it";
-"   under the terms of the GNU General Public License as published";
-"   by the Free Software Foundation; either version 3, or (at your";
-"   option) any later version.";
-"";
-"   GCC is distributed in the hope that it will be useful, but WITHOUT";
-"   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
-"   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public";
-"   License for more details.";
-"";
-"   Under Section 7 of GPL version 3, you are granted additional";
-"   permissions described in the GCC Runtime Library Exception, version";
-"   3.1, as published by the Free Software Foundation.";
-"";
-"   You should have received a copy of the GNU General Public License and";
-"   a copy of the GCC Runtime Library Exception along with this program;";
-"   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see";
-"   <http://www.gnu.org/licenses/>.  */";
-"";
-"#ifndef _GCC_ARM_NEON_H";
-"#define _GCC_ARM_NEON_H 1";
-"";
-"#ifndef __ARM_NEON__";
-"#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h";
-"#else";
-"";
-"#ifdef __cplusplus";
-"extern \"C\" {";
-"#endif";
-"";
-"#include <stdint.h>";
-""];
-  deftypes ();
-  arrtypes ();
-  Format.print_newline ();
-  print_ops ops;
-  Format.print_newline ();
-  print_ops reinterp;
-  print_ops reinterpq;
-  Format.printf "%s" crypto_intrinsics;
-  print_lines [
-"#ifdef __cplusplus";
-"}";
-"#endif";
-"#endif";
-"#endif"]
diff --git a/gcc-4.9/gcc/config/arm/netbsd-elf.h b/gcc-4.9/gcc/config/arm/netbsd-elf.h
index 9deda9679..645551fdf 100644
--- a/gcc-4.9/gcc/config/arm/netbsd-elf.h
+++ b/gcc-4.9/gcc/config/arm/netbsd-elf.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* Run-time Target Specification.  */
diff --git a/gcc-4.9/gcc/config/arm/uclinux-eabi.h b/gcc-4.9/gcc/config/arm/uclinux-eabi.h
index b5055ce40..4d57d1388 100644
--- a/gcc-4.9/gcc/config/arm/uclinux-eabi.h
+++ b/gcc-4.9/gcc/config/arm/uclinux-eabi.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* Override settings that are different to the uclinux-elf or
diff --git a/gcc-4.9/gcc/config/arm/uclinux-elf.h b/gcc-4.9/gcc/config/arm/uclinux-elf.h
index 5cd4fe527..43edba70c 100644
--- a/gcc-4.9/gcc/config/arm/uclinux-elf.h
+++ b/gcc-4.9/gcc/config/arm/uclinux-elf.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* We don't want a PLT.  */
diff --git a/gcc-4.9/gcc/config/arm/unspecs.md b/gcc-4.9/gcc/config/arm/unspecs.md
index 87edf18e2..db3fea5f5 100644
--- a/gcc-4.9/gcc/config/arm/unspecs.md
+++ b/gcc-4.9/gcc/config/arm/unspecs.md
@@ -58,6 +58,7 @@
                         ; instruction stream.
   UNSPEC_PIC_OFFSET     ; A symbolic 12-bit OFFSET that has been treated
                         ; correctly for PIC usage.
+  UNSPEC_GOT_PREL_SYM   ; Specify an R_ARM_GOT_PREL relocation of a symbol.
   UNSPEC_GOTSYM_OFF     ; The offset of the start of the GOT from a
                         ; a given symbolic address.
   UNSPEC_THUMB1_CASESI  ; A Thumb1 compressed dispatch-table call.
@@ -70,6 +71,11 @@
 			; that.
   UNSPEC_UNALIGNED_STORE ; Same for str/strh.
   UNSPEC_PIC_UNIFIED    ; Create a common pic addressing form.
+  UNSPEC_PROLOGUE_USE   ; As USE insns are not meaningful after reload,
+                        ; this unspec is used to prevent the deletion of
+                        ; instructions setting registers for EH handling
+                        ; and stack frame generation.  Operand 0 is the
+                        ; register to "use".
   UNSPEC_LL		; Represent an unpaired load-register-exclusive.
   UNSPEC_VRINTZ         ; Represent a float to integral float rounding
                         ; towards zero.
@@ -83,11 +89,12 @@
                         ; FPSCR rounding mode and signal inexactness.
   UNSPEC_VRINTA         ; Represent a float to integral float rounding
                         ; towards nearest, ties away from zero.
-  UNSPEC_GOT_PREL_SYM   ; Specify an R_ARM_GOT_PREL relocation of a symbol
 ])
 
 (define_c_enum "unspec" [
   UNSPEC_WADDC		; Used by the intrinsic form of the iWMMXt WADDC instruction.
+  UNSPEC_WMADDS         ; Used by the intrinsic form of the iWMMXt WMADDS instruction.
+  UNSPEC_WMADDU         ; Used by the intrinsic form of the iWMMXt WMADDU instruction.
   UNSPEC_WABS		; Used by the intrinsic form of the iWMMXt WABS instruction.
   UNSPEC_WQMULWMR	; Used by the intrinsic form of the iWMMXt WQMULWMR instruction.
   UNSPEC_WQMULMR	; Used by the intrinsic form of the iWMMXt WQMULMR instruction.
diff --git a/gcc-4.9/gcc/config/arm/vxworks.h b/gcc-4.9/gcc/config/arm/vxworks.h
index 8bef16bc4..faf84724d 100644
--- a/gcc-4.9/gcc/config/arm/vxworks.h
+++ b/gcc-4.9/gcc/config/arm/vxworks.h
@@ -17,8 +17,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3.  If not see
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
 
diff --git a/gcc-4.9/gcc/config/avr/avr-fixed.md b/gcc-4.9/gcc/config/avr/avr-fixed.md
index 1652415b1..9c8489edd 100644
--- a/gcc-4.9/gcc/config/avr/avr-fixed.md
+++ b/gcc-4.9/gcc/config/avr/avr-fixed.md
@@ -430,8 +430,8 @@
       }
 
     // Input and output of the libgcc function
-    const unsigned int regno_in[]  = { -1, 22, 22, -1, 18 };
-    const unsigned int regno_out[] = { -1, 24, 24, -1, 22 };
+    const unsigned int regno_in[]  = { -1U, 22, 22, -1U, 18 };
+    const unsigned int regno_out[] = { -1U, 24, 24, -1U, 22 };
 
     operands[3] = gen_rtx_REG (<MODE>mode, regno_out[(size_t) GET_MODE_SIZE (<MODE>mode)]);
     operands[4] = gen_rtx_REG (<MODE>mode,  regno_in[(size_t) GET_MODE_SIZE (<MODE>mode)]);
diff --git a/gcc-4.9/gcc/config/avr/avr.h b/gcc-4.9/gcc/config/avr/avr.h
index 78434ec5e..9d34983e4 100644
--- a/gcc-4.9/gcc/config/avr/avr.h
+++ b/gcc-4.9/gcc/config/avr/avr.h
@@ -251,18 +251,18 @@ enum reg_class {
 #define REG_CLASS_CONTENTS {						\
   {0x00000000,0x00000000},	/* NO_REGS */				\
   {0x00000001,0x00000000},	/* R0_REG */                            \
-  {3 << REG_X,0x00000000},      /* POINTER_X_REGS, r26 - r27 */		\
-  {3 << REG_Y,0x00000000},      /* POINTER_Y_REGS, r28 - r29 */		\
-  {3 << REG_Z,0x00000000},      /* POINTER_Z_REGS, r30 - r31 */		\
+  {3u << REG_X,0x00000000},     /* POINTER_X_REGS, r26 - r27 */		\
+  {3u << REG_Y,0x00000000},     /* POINTER_Y_REGS, r28 - r29 */		\
+  {3u << REG_Z,0x00000000},     /* POINTER_Z_REGS, r30 - r31 */		\
   {0x00000000,0x00000003},	/* STACK_REG, STACK */			\
-  {(3 << REG_Y) | (3 << REG_Z),						\
+  {(3u << REG_Y) | (3u << REG_Z),					\
      0x00000000},		/* BASE_POINTER_REGS, r28 - r31 */	\
-  {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z),				\
+  {(3u << REG_X) | (3u << REG_Y) | (3u << REG_Z),			\
      0x00000000},		/* POINTER_REGS, r26 - r31 */		\
-  {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z) | (3 << REG_W),		\
+  {(3u << REG_X) | (3u << REG_Y) | (3u << REG_Z) | (3u << REG_W),	\
      0x00000000},		/* ADDW_REGS, r24 - r31 */		\
   {0x00ff0000,0x00000000},	/* SIMPLE_LD_REGS r16 - r23 */          \
-  {(3 << REG_X)|(3 << REG_Y)|(3 << REG_Z)|(3 << REG_W)|(0xff << 16),	\
+  {(3u << REG_X)|(3u << REG_Y)|(3u << REG_Z)|(3u << REG_W)|(0xffu << 16),\
      0x00000000},	/* LD_REGS, r16 - r31 */			\
   {0x0000ffff,0x00000000},	/* NO_LD_REGS  r0 - r15 */              \
   {0xffffffff,0x00000000},	/* GENERAL_REGS, r0 - r31 */		\
diff --git a/gcc-4.9/gcc/config/avr/avr.md b/gcc-4.9/gcc/config/avr/avr.md
index 2c59bf3f9..3bb2a914a 100644
--- a/gcc-4.9/gcc/config/avr/avr.md
+++ b/gcc-4.9/gcc/config/avr/avr.md
@@ -368,6 +368,15 @@
   ""
   {
     int i;
+
+    // Avoid (subreg (mem)) for non-generic address spaces below.  Because
+    // of the poor addressing capabilities of these spaces it's better to
+    // load them in one chunk.  And it avoids PR61443.
+
+    if (MEM_P (operands[0])
+        && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[0])))
+      operands[0] = copy_to_mode_reg (<MODE>mode, operands[0]);
+
     for (i = GET_MODE_SIZE (<MODE>mode) - 1; i >= 0; --i)
       {
         rtx part = simplify_gen_subreg (QImode, operands[0], <MODE>mode, i);
diff --git a/gcc-4.9/gcc/config/dbx.h b/gcc-4.9/gcc/config/dbx.h
index 1b68bcd9a..8173bb06c 100644
--- a/gcc-4.9/gcc/config/dbx.h
+++ b/gcc-4.9/gcc/config/dbx.h
@@ -13,8 +13,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3.  If not see
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
 /* This file causes gcc to prefer using DBX (stabs) debugging
diff --git a/gcc-4.9/gcc/config/i386/driver-i386.c b/gcc-4.9/gcc/config/i386/driver-i386.c
index 1f5a11c9c..80f6a0879 100644
--- a/gcc-4.9/gcc/config/i386/driver-i386.c
+++ b/gcc-4.9/gcc/config/i386/driver-i386.c
@@ -739,6 +739,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 		    /* Assume Core 2.  */
 		    cpu = "core2";
 		}
+	      else if (has_longmode)
+		/* Perhaps some emulator?  Assume x86-64, otherwise gcc
+		   -march=native would be unusable for 64-bit compilations,
+		   as all the CPUs below are 32-bit only.  */
+		cpu = "x86-64";
 	      else if (has_sse3)
 		/* It is Core Duo.  */
 		cpu = "pentium-m";
diff --git a/gcc-4.9/gcc/config/i386/gnu-user.h b/gcc-4.9/gcc/config/i386/gnu-user.h
index d9e3fa434..21b9e9692 100644
--- a/gcc-4.9/gcc/config/i386/gnu-user.h
+++ b/gcc-4.9/gcc/config/i386/gnu-user.h
@@ -70,10 +70,12 @@ along with GCC; see the file COPYING3.  If not see
   "--32 %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}} " \
   LINUX_OR_ANDROID_CC ("", ANDROID_ASM_SPEC)
 
-#undef  SUBTARGET_EXTRA_SPECS
-#define SUBTARGET_EXTRA_SPECS \
+#undef  SUBTARGET_EXTRA_SPECS_STR
+#define SUBTARGET_EXTRA_SPECS_STR \
   { "link_emulation", GNU_USER_LINK_EMULATION },\
   { "dynamic_linker", GNU_USER_DYNAMIC_LINKER }
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS SUBTARGET_EXTRA_SPECS_STR
 
 #define GNU_USER_TARGET_LINK_SPEC "-m %(link_emulation) %{shared:-shared} \
   %{!shared: \
diff --git a/gcc-4.9/gcc/config/i386/i386-protos.h b/gcc-4.9/gcc/config/i386/i386-protos.h
index 6e3297880..fc0eb53f8 100644
--- a/gcc-4.9/gcc/config/i386/i386-protos.h
+++ b/gcc-4.9/gcc/config/i386/i386-protos.h
@@ -28,6 +28,16 @@ extern bool ix86_target_stack_probe (void);
 extern bool ix86_can_use_return_insn_p (void);
 extern void ix86_setup_frame_addresses (void);
 
+/* Section names for function patch prologue and epilogue section. See
+   ix86_output_function_nops_prologue_epilogue() in i386.c for details.  */
+#define FUNCTION_PATCH_PROLOGUE_SECTION "_function_patch_prologue"
+#define FUNCTION_PATCH_EPILOGUE_SECTION "_function_patch_epilogue"
+
+extern bool ix86_output_function_nops_prologue_epilogue (FILE *,
+                                                         const char *,
+                                                         const char *,
+                                                         int);
+
 extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int);
 extern void ix86_expand_prologue (void);
 extern void ix86_maybe_emit_epilogue_vzeroupper (void);
@@ -312,6 +322,7 @@ extern enum attr_cpu ix86_schedule;
 #endif
 
 extern const char * ix86_output_call_insn (rtx insn, rtx call_op);
+extern bool adjacent_mem_locations (rtx mem1, rtx mem2);
 
 #ifdef RTX_CODE
 /* Target data for multipass lookahead scheduling.
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
index d7c592f48..df504335e 100644
--- a/gcc-4.9/gcc/config/i386/i386.c
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -78,6 +78,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "diagnostic.h"
 #include "dumpfile.h"
 #include "tree-pass.h"
+#include "cfgloop.h"
 #include "context.h"
 #include "pass_manager.h"
 #include "target-globals.h"
@@ -5017,8 +5018,11 @@ ix86_in_large_data_p (tree exp)
       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
 
       /* If this is an incomplete type with size 0, then we can't put it
-	 in data because it might be too big when completed.  */
-      if (!size || size > ix86_section_threshold)
+	 in data because it might be too big when completed.  Also,
+	 int_size_in_bytes returns -1 if size can vary or is larger than
+	 an integer in which case also it is safer to assume that it goes in
+	 large data.  */
+      if (size <= 0 || size > ix86_section_threshold)
 	return true;
     }
 
@@ -11730,6 +11734,246 @@ ix86_expand_epilogue (int style)
   m->fs = frame_state_save;
 }
 
+
+/* True if the current function should be patched with nops at prologue and
+   returns.  */
+static bool patch_current_function_p = false;
+
+static inline bool
+has_attribute (const char* attribute_name)
+{
+  return lookup_attribute (attribute_name,
+                           DECL_ATTRIBUTES (current_function_decl)) != NULL;
+}
+
+/* Return true if we patch the current function. By default a function
+   is patched if it has loops or if the number of insns is greater than
+   patch_functions_min_instructions (number of insns roughly translates
+   to number of instructions).  */
+
+static bool
+check_should_patch_current_function (void)
+{
+  int num_insns = 0;
+  rtx insn;
+  const char *func_name = NULL;
+  struct loops *loops;
+  int num_loops = 0;
+  int min_functions_instructions;
+
+  /* If a function has an attribute forcing patching on or off, do as it
+     indicates.  */
+  if (has_attribute ("always_patch_for_instrumentation"))
+    return true;
+  else if (has_attribute ("never_patch_for_instrumentation"))
+    return false;
+
+  /* Patch the function if it has at least a loop.  */
+  if (!patch_functions_ignore_loops)
+    {
+      if (DECL_STRUCT_FUNCTION (current_function_decl)->cfg)
+        {
+          loops = flow_loops_find (NULL);
+          num_loops = loops->larray->length();
+          /* FIXME - Deallocating the loop causes a seg-fault.  */
+#if 0
+          flow_loops_free (loops);
+#endif
+          /* We are not concerned with the function body as a loop.  */
+          if (num_loops > 1)
+            return true;
+        }
+    }
+
+  /* Else, check if function has more than patch_functions_min_instrctions.  */
+
+  /* Borrowed this code from rest_of_handle_final() in final.c.  */
+  func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+  if (!patch_functions_dont_always_patch_main &&
+      func_name &&
+      strcmp("main", func_name) == 0)
+    return true;
+
+  min_functions_instructions =
+      PARAM_VALUE (PARAM_FUNCTION_PATCH_MIN_INSTRUCTIONS);
+  if (min_functions_instructions > 0)
+    {
+      /* Calculate the number of instructions in this function and only emit
+         function patch for instrumentation if it is greater than
+         patch_functions_min_instructions.  */
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+        {
+          if (NONDEBUG_INSN_P (insn))
+            ++num_insns;
+        }
+      if (num_insns < min_functions_instructions)
+        return false;
+    }
+
+  return true;
+}
+
+/* Emit the 11-byte patch space for the function prologue for functions that
+   qualify.  */
+
+static void
+ix86_output_function_prologue (FILE *file,
+                               HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+  /* Only for 64-bit target.  */
+  if (TARGET_64BIT && patch_functions_for_instrumentation)
+    {
+      patch_current_function_p = check_should_patch_current_function();
+      /* Emit the instruction 'jmp 09' followed by 9 bytes to make it 11-bytes
+         of nop.  */
+      ix86_output_function_nops_prologue_epilogue (
+          file,
+          FUNCTION_PATCH_PROLOGUE_SECTION,
+          ASM_BYTE"0xeb,0x09",
+          9);
+    }
+}
+
+/* Emit the nop bytes at function prologue or return (including tail call
+   jumps). The number of nop bytes generated is at least 8.
+   Also emits a section named SECTION_NAME, which is a backpointer section
+   holding the addresses of the nop bytes in the text section.
+   SECTION_NAME is either '_function_patch_prologue' or
+   '_function_patch_epilogue'. The backpointer section can be used to navigate
+   through all the function entry and exit points which are patched with nops.
+   PRE_INSTRUCTIONS are the instructions, if any, at the start of the nop byte
+   sequence. NUM_REMAINING_NOPS are the number of nop bytes to fill,
+   excluding the number of bytes in PRE_INSTRUCTIONS.
+   Returns true if the function was patched, false otherwise.  */
+
+bool
+ix86_output_function_nops_prologue_epilogue (FILE *file,
+                                             const char *section_name,
+                                             const char *pre_instructions,
+                                             int num_remaining_nops)
+{
+  static int labelno = 0;
+  char label[32], section_label[32];
+  section *section = NULL;
+  int num_actual_nops = num_remaining_nops - sizeof(void *);
+  unsigned int section_flags = SECTION_RELRO;
+  char *section_name_comdat = NULL;
+  const char *decl_section_name = NULL;
+  const char *func_name = NULL;
+  char *section_name_function_sections = NULL;
+  size_t len;
+
+  gcc_assert (num_remaining_nops >= 0);
+
+  if (!patch_current_function_p)
+    return false;
+
+  ASM_GENERATE_INTERNAL_LABEL (label, "LFPEL", labelno);
+  ASM_GENERATE_INTERNAL_LABEL (section_label, "LFPESL", labelno++);
+
+  /* Align the start of nops to 2-byte boundary so that the 2-byte jump
+     instruction can be patched atomically at run time.  */
+  ASM_OUTPUT_ALIGN (file, 1);
+
+  /* Emit nop bytes. They look like the following:
+       $LFPEL0:
+         <pre_instruction>
+         0x90 (repeated num_actual_nops times)
+         .quad $LFPESL0 - .
+     followed by section 'section_name' which contains the address
+     of instruction at 'label'.
+   */
+  ASM_OUTPUT_INTERNAL_LABEL (file, label);
+  if (pre_instructions)
+    fprintf (file, "%s\n", pre_instructions);
+
+  while (num_actual_nops-- > 0)
+    asm_fprintf (file, ASM_BYTE"0x90\n");
+
+  fprintf (file, ASM_QUAD);
+  /* Output "section_label - ." for the relative address of the entry in
+     the section 'section_name'.  */
+  assemble_name_raw (file, section_label);
+  fprintf (file, " - .");
+  fprintf (file, "\n");
+
+  /* Emit the backpointer section. For functions belonging to comdat group,
+     we emit a different section named '<section_name>.foo' where 'foo' is
+     the name of the comdat section. This section is later renamed to
+     '<section_name>' by ix86_elf_asm_named_section().
+     We emit a unique section name for the back pointer section for comdat
+     functions because otherwise the 'get_section' call may return an existing
+     non-comdat section with the same name, leading to references from
+     non-comdat section to comdat functions.
+  */
+  if (current_function_decl != NULL_TREE &&
+      DECL_ONE_ONLY (current_function_decl) &&
+      HAVE_COMDAT_GROUP)
+    {
+      decl_section_name =
+          TREE_STRING_POINTER (DECL_SECTION_NAME (current_function_decl));
+      len = strlen (decl_section_name) + strlen (section_name) + 2;
+      section_name_comdat = (char *) alloca (len);
+      sprintf (section_name_comdat, "%s.%s", section_name, decl_section_name);
+      section_name = section_name_comdat;
+      section_flags |= SECTION_LINKONCE;
+    }
+  else if (flag_function_sections)
+    {
+      func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+      if (func_name)
+        {
+          len = strlen (func_name) + strlen (section_name) + 2;
+          section_name_function_sections = (char *) alloca (len);
+          sprintf (section_name_function_sections, "%s.%s", section_name,
+                   func_name);
+          section_name = section_name_function_sections;
+        }
+    }
+  section = get_section (section_name, section_flags, current_function_decl);
+  switch_to_section (section);
+  /* Align the section to 8-byte boundary.  */
+  ASM_OUTPUT_ALIGN (file, 3);
+
+  /* Emit address of the start of nop bytes in the section:
+       $LFPESP0:
+         .quad $LFPEL0
+   */
+  ASM_OUTPUT_INTERNAL_LABEL (file, section_label);
+  fprintf(file, ASM_QUAD);
+  assemble_name_raw (file, label);
+  fprintf (file, "\n");
+
+  /* Switching back to text section.  */
+  switch_to_section (function_section (current_function_decl));
+  return true;
+}
+
+/* Strips the characters after '_function_patch_prologue' or
+   '_function_patch_epilogue' and emits the section.  */
+
+static void
+ix86_elf_asm_named_section (const char *name, unsigned int flags,
+                            tree decl)
+{
+  const char *section_name = name;
+  if (!flag_function_sections && HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
+    {
+      const int prologue_section_name_length =
+          sizeof(FUNCTION_PATCH_PROLOGUE_SECTION) - 1;
+      const int epilogue_section_name_length =
+          sizeof(FUNCTION_PATCH_EPILOGUE_SECTION) - 1;
+
+      if (strncmp (name, FUNCTION_PATCH_PROLOGUE_SECTION,
+                   prologue_section_name_length) == 0)
+        section_name = FUNCTION_PATCH_PROLOGUE_SECTION;
+      else if (strncmp (name, FUNCTION_PATCH_EPILOGUE_SECTION,
+                        epilogue_section_name_length) == 0)
+        section_name = FUNCTION_PATCH_EPILOGUE_SECTION;
+    }
+  default_elf_asm_named_section (section_name, flags, decl);
+}
+
 /* Reset from the function's potential modifications.  */
 
 static void
@@ -12659,7 +12903,9 @@ legitimate_pic_address_disp_p (rtx disp)
 		return true;
 	    }
 	  else if (!SYMBOL_REF_FAR_ADDR_P (op0)
-		   && SYMBOL_REF_LOCAL_P (op0)
+	      	   && (SYMBOL_REF_LOCAL_P (op0)
+		       || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie
+			   && !SYMBOL_REF_FUNCTION_P (op0)))
 		   && ix86_cmodel != CM_LARGE_PIC)
 	    return true;
 	  break;
@@ -21507,7 +21753,7 @@ ix86_expand_vec_perm (rtx operands[])
 	  t1 = gen_reg_rtx (V32QImode);
 	  t2 = gen_reg_rtx (V32QImode);
 	  t3 = gen_reg_rtx (V32QImode);
-	  vt2 = GEN_INT (128);
+	  vt2 = GEN_INT (-128);
 	  for (i = 0; i < 32; i++)
 	    vec[i] = vt2;
 	  vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
@@ -23794,7 +24040,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 {
   const struct stringop_algs * algs;
   bool optimize_for_speed;
-  int max = -1;
+  int max = 0;
   const struct processor_costs *cost;
   int i;
   bool any_alg_usable_p = false;
@@ -23832,7 +24078,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
   /* If expected size is not known but max size is small enough
      so inline version is a win, set expected size into
      the range.  */
-  if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
+  if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
       && expected_size == -1)
     expected_size = min_size / 2 + max_size / 2;
 
@@ -23921,7 +24167,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
             *dynamic_check = 128;
           return loop_1_byte;
         }
-      if (max == -1)
+      if (max <= 0)
 	max = 4096;
       alg = decide_alg (count, max / 2, min_size, max_size, memset,
 			zero_memset, dynamic_check, noalign);
@@ -24945,6 +25191,15 @@ ix86_output_call_insn (rtx insn, rtx call_op)
       else
 	xasm = "jmp\t%A0";
 
+      /* Just before the sibling call, add 11-bytes of nops to patch function
+         exit: 2 bytes for 'jmp 09' and remaining 9 bytes.  */
+      if (TARGET_64BIT && patch_functions_for_instrumentation)
+        ix86_output_function_nops_prologue_epilogue (
+            asm_out_file,
+            FUNCTION_PATCH_EPILOGUE_SECTION,
+            ASM_BYTE"0xeb, 0x09",
+            9);
+
       output_asm_insn (xasm, &call_op);
       return "";
     }
@@ -26238,13 +26493,17 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail)
 	      {
 		edge e;
 		edge_iterator ei;
-		/* Assume that region is SCC, i.e. all immediate predecessors
-	           of non-head block are in the same region.  */
+
+		/* Regions are SCCs with the exception of selective
+		   scheduling with pipelining of outer blocks enabled.
+		   So also check that immediate predecessors of a non-head
+		   block are in the same region.  */
 		FOR_EACH_EDGE (e, ei, bb->preds)
 		  {
 		    /* Avoid creating of loop-carried dependencies through
-		       using topological odering in region.  */
-		    if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
+		       using topological ordering in the region.  */
+		    if (rgn == CONTAINING_RGN (e->src->index)
+			&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
 		      add_dependee_for_func_arg (first_arg, e->src); 
 		  }
 	      }
@@ -28789,7 +29048,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
       ix86_builtins_isa[(int) code].isa = mask;
 
       mask &= ~OPTION_MASK_ISA_64BIT;
-      if (mask == 0
+      if (flag_dyn_ipa
+	  || mask == 0
 	  || (mask & ix86_isa_flags) != 0
 	  || (lang_hooks.builtin_function
 	      == lang_hooks.builtin_function_ext_scope))
@@ -37802,10 +38062,10 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
       else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
 	*total = 2;
       else if (flag_pic && SYMBOLIC_CONST (x)
-	       && (!TARGET_64BIT
-		   || (!GET_CODE (x) != LABEL_REF
-		       && (GET_CODE (x) != SYMBOL_REF
-		           || !SYMBOL_REF_LOCAL_P (x)))))
+	       && !(TARGET_64BIT
+		    && (GET_CODE (x) == LABEL_REF
+			|| (GET_CODE (x) == SYMBOL_REF
+			    && SYMBOL_REF_LOCAL_P (x)))))
 	*total = 1;
       else
 	*total = 0;
@@ -46745,6 +47005,70 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 		    atomic_feraiseexcept_call);
 }
 
+/* Try to determine BASE/OFFSET/SIZE parts of the given MEM.
+   Return true if successful, false if all the values couldn't
+   be determined.
+
+   This function only looks for REG/SYMBOL or REG/SYMBOL+CONST
+   address forms. */
+
+static bool
+get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
+		  HOST_WIDE_INT *size)
+{
+  rtx addr_rtx;
+  if MEM_SIZE_KNOWN_P (mem)
+    *size = MEM_SIZE (mem);
+  else
+    return false;
+
+  if (GET_CODE (XEXP (mem, 0)) == CONST)
+    addr_rtx = XEXP (XEXP (mem, 0), 0);
+  else
+    addr_rtx = (XEXP (mem, 0));
+
+  if (GET_CODE (addr_rtx) == REG
+      || GET_CODE (addr_rtx) == SYMBOL_REF)
+    {
+      *base = addr_rtx;
+      *offset = 0;
+    }
+  else if (GET_CODE (addr_rtx) == PLUS
+	   && CONST_INT_P (XEXP (addr_rtx, 1)))
+    {
+      *base = XEXP (addr_rtx, 0);
+      *offset = INTVAL (XEXP (addr_rtx, 1));
+    }
+  else
+    return false;
+
+  return true;
+}
+
+/* If MEM1 is adjacent to MEM2 and MEM1 has lower address,
+   return true.  */
+
+extern bool
+adjacent_mem_locations (rtx mem1, rtx mem2)
+{
+  rtx base1, base2;
+  HOST_WIDE_INT off1, size1, off2, size2;
+
+  if (get_memref_parts (mem1, &base1, &off1, &size1)
+      && get_memref_parts (mem2, &base2, &off2, &size2))
+    {
+      if (GET_CODE (base1) == SYMBOL_REF
+	  && GET_CODE (base2) == SYMBOL_REF
+	  && SYMBOL_REF_DECL (base1) == SYMBOL_REF_DECL (base2))
+        return (off1 + size1 == off2);
+      else if (REG_P (base1)
+	       && REG_P (base2)
+	       && REGNO (base1) == REGNO (base2))
+        return (off1 + size1 == off2);
+    }
+  return false;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
@@ -46787,9 +47111,15 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 #undef TARGET_BUILTIN_RECIPROCAL
 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
 
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE ix86_output_function_prologue
+
 #undef TARGET_ASM_FUNCTION_EPILOGUE
 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
 
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION ix86_elf_asm_named_section
+
 #undef TARGET_ENCODE_SECTION_INFO
 #ifndef SUBTARGET_ENCODE_SECTION_INFO
 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md
index 9f103cf30..058702904 100644
--- a/gcc-4.9/gcc/config/i386/i386.md
+++ b/gcc-4.9/gcc/config/i386/i386.md
@@ -3201,7 +3201,7 @@
        (const_string "1")
        (const_string "*")))
    (set (attr "mode")
-        (cond [(eq_attr "alternative" "3,4,9,10,13,14,15")
+        (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15")
 		 (const_string "SI")
 	       (eq_attr "alternative" "11")
 		 (const_string "DI")
@@ -4933,66 +4933,37 @@
 
 ;; Avoid store forwarding (partial memory) stall penalty by extending
 ;; SImode value to DImode through XMM register instead of pushing two
-;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC
-;; targets benefit from this optimization. Also note that fild
-;; loads from memory only.
+;; SImode values to stack. Also note that fild loads from memory only.
 
-(define_insn "*floatunssi<mode>2_1"
-  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+(define_insn_and_split "*floatunssi<mode>2_i387_with_xmm"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
 	(unsigned_float:X87MODEF
-	  (match_operand:SI 1 "nonimmediate_operand" "x,m")))
-   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
-   (clobber (match_scratch:SI 3 "=X,x"))]
+	  (match_operand:SI 1 "nonimmediate_operand" "rm")))
+   (clobber (match_scratch:DI 3 "=x"))
+   (clobber (match_operand:DI 2 "memory_operand" "=m"))]
   "!TARGET_64BIT
    && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-   && TARGET_SSE"
+   && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
   "#"
-  [(set_attr "type" "multi")
-   (set_attr "mode" "<MODE>")])
-
-(define_split
-  [(set (match_operand:X87MODEF 0 "register_operand")
-	(unsigned_float:X87MODEF
-	  (match_operand:SI 1 "register_operand")))
-   (clobber (match_operand:DI 2 "memory_operand"))
-   (clobber (match_scratch:SI 3))]
-  "!TARGET_64BIT
-   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-   && TARGET_SSE
-   && reload_completed"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0)
-	(float:X87MODEF (match_dup 2)))]
-  "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
-
-(define_split
-  [(set (match_operand:X87MODEF 0 "register_operand")
-	(unsigned_float:X87MODEF
-	  (match_operand:SI 1 "memory_operand")))
-   (clobber (match_operand:DI 2 "memory_operand"))
-   (clobber (match_scratch:SI 3))]
-  "!TARGET_64BIT
-   && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-   && TARGET_SSE
-   && reload_completed"
-  [(set (match_dup 2) (match_dup 3))
+  "&& reload_completed"
+  [(set (match_dup 3) (zero_extend:DI (match_dup 1)))
+   (set (match_dup 2) (match_dup 3))
    (set (match_dup 0)
 	(float:X87MODEF (match_dup 2)))]
-{
-  emit_move_insn (operands[3], operands[1]);
-  operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
-})
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
 
 (define_expand "floatunssi<mode>2"
   [(parallel
      [(set (match_operand:X87MODEF 0 "register_operand")
 	   (unsigned_float:X87MODEF
 	     (match_operand:SI 1 "nonimmediate_operand")))
-      (clobber (match_dup 2))
-      (clobber (match_scratch:SI 3))])]
+      (clobber (match_scratch:DI 3))
+      (clobber (match_dup 2))])]
   "!TARGET_64BIT
    && ((TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
-	&& TARGET_SSE)
+	&& TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
        || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
@@ -9627,7 +9598,7 @@
 
 (define_insn "x86_64_shrd"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
-        (ior:DI (ashiftrt:DI (match_dup 0)
+        (ior:DI (lshiftrt:DI (match_dup 0)
 		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
 		(ashift:DI (match_operand:DI 1 "register_operand" "r")
 		  (minus:QI (const_int 64) (match_dup 2)))))
@@ -9643,7 +9614,7 @@
 
 (define_insn "x86_shrd"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
-        (ior:SI (ashiftrt:SI (match_dup 0)
+        (ior:SI (lshiftrt:SI (match_dup 0)
 		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
 		(ashift:SI (match_operand:SI 1 "register_operand" "r")
 		  (minus:QI (const_int 32) (match_dup 2)))))
@@ -10095,13 +10066,13 @@
  [(set (match_dup 3) (match_dup 4))
   (parallel
    [(set (match_dup 4)
-	 (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2))
+	 (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
 		   (ashift:DWIH (match_dup 5)
 				(minus:QI (match_dup 6) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])
   (parallel
    [(set (match_dup 5)
-	 (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2))
+	 (ior:DWIH (lshiftrt:DWIH (match_dup 5) (match_dup 2))
 		   (ashift:DWIH (match_dup 3)
 				(minus:QI (match_dup 6) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])]
@@ -11611,7 +11582,18 @@
 (define_insn "simple_return_internal"
   [(simple_return)]
   "reload_completed"
-  "ret"
+{
+  if (TARGET_64BIT && patch_functions_for_instrumentation)
+    {
+      /* Emit 10 nop bytes after ret.  */
+      if (ix86_output_function_nops_prologue_epilogue (asm_out_file,
+      	 					       FUNCTION_PATCH_EPILOGUE_SECTION,
+						       "\tret",
+						       10))
+	return "";
+    }
+  return "ret";
+}
   [(set_attr "length" "1")
    (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
@@ -11624,7 +11606,18 @@
   [(simple_return)
    (unspec [(const_int 0)] UNSPEC_REP)]
   "reload_completed"
-  "rep%; ret"
+{
+  if (TARGET_64BIT && patch_functions_for_instrumentation)
+    {
+      /* Emit 9 nop bytes after rep;ret.  */
+      if (ix86_output_function_nops_prologue_epilogue (asm_out_file,
+						       FUNCTION_PATCH_EPILOGUE_SECTION,
+						       "\trep\;ret",
+						       9))
+	return "";
+    }
+  return "rep\;ret";
+}
   [(set_attr "length" "2")
    (set_attr "atom_unit" "jeu")
    (set_attr "length_immediate" "0")
diff --git a/gcc-4.9/gcc/config/i386/i386.opt b/gcc-4.9/gcc/config/i386/i386.opt
index 0f463a238..1e00b660e 100644
--- a/gcc-4.9/gcc/config/i386/i386.opt
+++ b/gcc-4.9/gcc/config/i386/i386.opt
@@ -108,6 +108,10 @@ int x_ix86_dump_tunes
 TargetSave
 int x_ix86_force_align_arg_pointer
 
+;; -mcopyrelocs=
+TargetSave
+int x_ix86_copyrelocs
+
 ;; -mforce-drap= 
 TargetSave
 int x_ix86_force_drap
@@ -291,6 +295,10 @@ mfancy-math-387
 Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
 Generate sin, cos, sqrt for FPU
 
+mcopyrelocs
+Target Report Var(ix86_pie_copyrelocs) Init(0)
+Assume copy relocations support for pie builds.
+
 mforce-drap
 Target Report Var(ix86_force_drap)
 Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack
@@ -781,6 +789,18 @@ mrtm
 Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save
 Support RTM built-in functions and code generation
 
+mpatch-functions-for-instrumentation
+Target RejectNegative Report Var(patch_functions_for_instrumentation) Save
+Patch function prologue and epilogue with custom NOPs for dynamic instrumentation. By default, functions with loops (controlled by -mpatch-functions-without-loop) or functions having instructions more than -mpatch-functions-min-instructions are patched.
+
+mpatch-functions-ignore-loops
+Target RejectNegative Report Var(patch_functions_ignore_loops) Save
+Ignore loops when deciding whether to patch a function for instrumentation (for use with -mpatch-functions-for-instrumentation).
+
+mno-patch-functions-main-always
+Target Report RejectNegative Var(patch_functions_dont_always_patch_main) Save
+Treat 'main' as any other function and only patch it if it meets the criteria for loops and minimum number of instructions (for use with -mpatch-functions-for-instrumentation).
+
 mstack-protector-guard=
 Target RejectNegative Joined Enum(stack_protector_guard) Var(ix86_stack_protector_guard) Init(SSP_TLS)
 Use given stack-protector guard
diff --git a/gcc-4.9/gcc/config/i386/linux.h b/gcc-4.9/gcc/config/i386/linux.h
index 1fb1e0321..27d68b5db 100644
--- a/gcc-4.9/gcc/config/i386/linux.h
+++ b/gcc-4.9/gcc/config/i386/linux.h
@@ -20,4 +20,22 @@ along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
 #define GNU_USER_LINK_EMULATION "elf_i386"
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
+
+/* These may be provided by config/linux-grtev*.h.  */
+#ifndef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS
+#endif
+
+#undef  SUBTARGET_EXTRA_SPECS
+#ifndef SUBTARGET_EXTRA_SPECS_STR
+#define SUBTARGET_EXTRA_SPECS \
+  LINUX_GRTE_EXTRA_SPECS
+#else
+#define SUBTARGET_EXTRA_SPECS \
+  LINUX_GRTE_EXTRA_SPECS \
+  SUBTARGET_EXTRA_SPECS_STR
+#endif
diff --git a/gcc-4.9/gcc/config/i386/linux64.h b/gcc-4.9/gcc/config/i386/linux64.h
index a90171e8c..5124a341b 100644
--- a/gcc-4.9/gcc/config/i386/linux64.h
+++ b/gcc-4.9/gcc/config/i386/linux64.h
@@ -27,6 +27,19 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define GNU_USER_LINK_EMULATION64 "elf_x86_64"
 #define GNU_USER_LINK_EMULATIONX32 "elf32_x86_64"
 
-#define GLIBC_DYNAMIC_LINKER32 "/lib/ld-linux.so.2"
-#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld-linux-x86-64.so.2"
-#define GLIBC_DYNAMIC_LINKERX32 "/libx32/ld-linux-x32.so.2"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2"
+#define GLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/libx32/ld-linux-x32.so.2"
+
+/* These may be provided by config/linux-grtev*.h.  */
+#ifndef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS
+#endif
+
+#undef  SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+  LINUX_GRTE_EXTRA_SPECS
+
diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md
index 72a4d6d07..27ade1964 100644
--- a/gcc-4.9/gcc/config/i386/sse.md
+++ b/gcc-4.9/gcc/config/i386/sse.md
@@ -8255,6 +8255,36 @@
   DONE;
 })
 
+(define_expand "usadv16qi"
+  [(match_operand:V4SI 0 "register_operand")
+   (match_operand:V16QI 1 "register_operand")
+   (match_operand:V16QI 2 "nonimmediate_operand")
+   (match_operand:V4SI 3 "nonimmediate_operand")]
+  "TARGET_SSE2"
+{
+  rtx t1 = gen_reg_rtx (V2DImode);
+  rtx t2 = gen_reg_rtx (V4SImode);
+  emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
+  convert_move (t2, t1, 0);
+  emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
+  DONE;
+})
+
+(define_expand "usadv32qi"
+  [(match_operand:V8SI 0 "register_operand")
+   (match_operand:V32QI 1 "register_operand")
+   (match_operand:V32QI 2 "nonimmediate_operand")
+   (match_operand:V8SI 3 "nonimmediate_operand")]
+  "TARGET_AVX2"
+{
+  rtx t1 = gen_reg_rtx (V4DImode);
+  rtx t2 = gen_reg_rtx (V8SImode);
+  emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
+  convert_move (t2, t1, 0);
+  emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
+  DONE;
+})
+
 (define_insn "ashr<mode>3"
   [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
 	(ashiftrt:VI24_AVX2
@@ -15606,3 +15636,37 @@
   [(set_attr "type" "sselog1")
    (set_attr "length_immediate" "1")
    (set_attr "mode" "TI")])
+
+;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+;; is true.
+(define_peephole2
+  [(set (match_operand:DF 0 "register_operand")
+	(match_operand:DF 1 "memory_operand"))
+   (set (match_operand:V2DF 2 "register_operand")
+	(vec_concat:V2DF (match_dup 0)
+	 (match_operand:DF 3 "memory_operand")))]
+  "TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+   && REGNO (operands[0]) == REGNO (operands[2])
+   && adjacent_mem_locations (operands[1], operands[3])"
+  [(set (match_dup 2)
+	(unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
+{
+  operands[4] = gen_rtx_MEM (V2DFmode, XEXP(operands[1], 0));
+})
+
+;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+;; is true.
+(define_peephole2
+  [(set (match_operand:DF 0 "memory_operand")
+	(vec_select:DF (match_operand:V2DF 1 "register_operand")
+		       (parallel [(const_int 0)])))
+   (set (match_operand:DF 2 "memory_operand")
+	(vec_select:DF (match_dup 1)
+		       (parallel [(const_int 1)])))]
+  "TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+   && adjacent_mem_locations (operands[0], operands[2])"
+  [(set (match_dup 3)
+	(unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
+{
+  operands[3] = gen_rtx_MEM (V2DFmode, XEXP(operands[0], 0));
+})
diff --git a/gcc-4.9/gcc/config/initfini-array.h b/gcc-4.9/gcc/config/initfini-array.h
index f7ae836e6..67e66f6f2 100644
--- a/gcc-4.9/gcc/config/initfini-array.h
+++ b/gcc-4.9/gcc/config/initfini-array.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifdef HAVE_INITFINI_ARRAY_SUPPORT
diff --git a/gcc-4.9/gcc/config/linux-grte.h b/gcc-4.9/gcc/config/linux-grte.h
new file mode 100644
index 000000000..31e8a94ce
--- /dev/null
+++ b/gcc-4.9/gcc/config/linux-grte.h
@@ -0,0 +1,41 @@
+/* Definitions for Linux-based GRTE (Google RunTime Environment).
+   Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc.
+   Contributed by Chris Demetriou and Ollie Wild.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Overrides LIB_SPEC from gnu-user.h.  */
+#undef	LIB_SPEC
+#define LIB_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}"
+
+/* When GRTE links statically, it needs its NSS and resolver libraries
+   linked in as well.  Note that when linking statically, these are
+   enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC.  */
+#undef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS \
+  { "libc", "%{static:%(libc_static);:-lc}" }, \
+  { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \
+  { "libc_static", "-lc -lresolv" }, \
+  { "libc_p_static", "-lc_p -lresolv_p" },
diff --git a/gcc-4.9/gcc/config/linux.c b/gcc-4.9/gcc/config/linux.c
index 6162675d8..2f1cd8e63 100644
--- a/gcc-4.9/gcc/config/linux.c
+++ b/gcc-4.9/gcc/config/linux.c
@@ -38,7 +38,9 @@ linux_libc_has_function (enum function_class fn_class)
     return true;
   if (OPTION_BIONIC)
     if (fn_class == function_c94
-	|| fn_class == function_c99_misc)
+	|| fn_class == function_c99_misc
+	|| (fn_class == function_sincos && !TARGET_ANDROID)
+       )
 	return true;
 
   return false;
diff --git a/gcc-4.9/gcc/config/linux.h b/gcc-4.9/gcc/config/linux.h
index d38ef81e3..7c2a8e2c0 100644
--- a/gcc-4.9/gcc/config/linux.h
+++ b/gcc-4.9/gcc/config/linux.h
@@ -73,13 +73,16 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    GLIBC_DYNAMIC_LINKER must be defined for each target using them, or
    GLIBC_DYNAMIC_LINKER32 and GLIBC_DYNAMIC_LINKER64 for targets
    supporting both 32-bit and 64-bit compilation.  */
-#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
-#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
-#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
-#define UCLIBC_DYNAMIC_LINKERX32 "/lib/ldx32-uClibc.so.0"
-#define BIONIC_DYNAMIC_LINKER "/system/bin/linker"
-#define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker"
-#define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define UCLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib/ld64-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/lib/ldx32-uClibc.so.0"
+#define BIONIC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/system/bin/linker"
+#define BIONIC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/system/bin/linker"
+#define BIONIC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/system/bin/linker64"
 #define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32"
 
 #define GNU_USER_DYNAMIC_LINKER						\
diff --git a/gcc-4.9/gcc/config/msp430/msp430-opts.h b/gcc-4.9/gcc/config/msp430/msp430-opts.h
new file mode 100644
index 000000000..119cfcb7f
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/msp430-opts.h
@@ -0,0 +1,32 @@
+/* GCC option-handling definitions for the TI MSP430
+   Copyright (C) 2014 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef MSP430_OPTS_H
+#define MSP430_OPTS_H
+
+enum msp430_hwmult_types
+{
+  NONE,
+  AUTO,
+  SMALL,
+  LARGE,
+  F5SERIES
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/msp430/msp430.c b/gcc-4.9/gcc/config/msp430/msp430.c
index c844aa2a1..9eb140974 100644
--- a/gcc-4.9/gcc/config/msp430/msp430.c
+++ b/gcc-4.9/gcc/config/msp430/msp430.c
@@ -730,6 +730,97 @@ msp430_get_raw_result_mode (int regno ATTRIBUTE_UNUSED)
 {
   return Pmode;
 }
+
+#undef  TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR msp430_gimplify_va_arg_expr
+
+#include "gimplify.h"
+#include "gimple-expr.h"
+
+static tree
+msp430_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			  gimple_seq *post_p)
+{
+  tree addr, t, type_size, rounded_size, valist_tmp;
+  unsigned HOST_WIDE_INT align, boundary;
+  bool indirect;
+
+  indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+  if (indirect)
+    type = build_pointer_type (type);
+
+  align = PARM_BOUNDARY / BITS_PER_UNIT;
+  boundary = targetm.calls.function_arg_boundary (TYPE_MODE (type), type);
+
+  /* When we align parameter on stack for caller, if the parameter
+     alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
+     aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
+     here with caller.  */
+  if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
+    boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+
+  boundary /= BITS_PER_UNIT;
+
+  /* Hoist the valist value into a temporary for the moment.  */
+  valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
+
+  /* va_list pointer is aligned to PARM_BOUNDARY.  If argument actually
+     requires greater alignment, we must perform dynamic alignment.  */
+  if (boundary > align
+      && !integer_zerop (TYPE_SIZE (type)))
+    {
+      /* FIXME: This is where this function diverts from targhooks.c:
+	 std_gimplify_va_arg_expr().  It works, but I do not know why...  */
+      if (! POINTER_TYPE_P (type))
+	{
+	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		      fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
+	  gimplify_and_add (t, pre_p);
+
+	  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		      fold_build2 (BIT_AND_EXPR, TREE_TYPE (valist),
+				   valist_tmp,
+				   build_int_cst (TREE_TYPE (valist), -boundary)));
+	  gimplify_and_add (t, pre_p);
+	}
+    }
+  else
+    boundary = align;
+
+  /* If the actual alignment is less than the alignment of the type,
+     adjust the type accordingly so that we don't assume strict alignment
+     when dereferencing the pointer.  */
+  boundary *= BITS_PER_UNIT;
+  if (boundary < TYPE_ALIGN (type))
+    {
+      type = build_variant_type_copy (type);
+      TYPE_ALIGN (type) = boundary;
+    }
+
+  /* Compute the rounded size of the type.  */
+  type_size = size_in_bytes (type);
+  rounded_size = round_up (type_size, align);
+
+  /* Reduce rounded_size so it's sharable with the postqueue.  */
+  gimplify_expr (&rounded_size, pre_p, post_p, is_gimple_val, fb_rvalue);
+
+  /* Get AP.  */
+  addr = valist_tmp;
+
+  /* Compute new value for AP.  */
+  t = fold_build_pointer_plus (valist_tmp, rounded_size);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+  gimplify_and_add (t, pre_p);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+
+  if (indirect)
+    addr = build_va_arg_indirect_ref (addr);
+
+  addr = build_va_arg_indirect_ref (addr);
+
+  return addr;
+}
 
 /* Addressing Modes */
 
@@ -2185,8 +2276,32 @@ msp430_print_operand (FILE * file, rtx op, int letter)
       msp430_print_operand_addr (file, addr);
       break;
 
-    case CONST_INT:
     case CONST:
+      if (GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT)
+	{
+	  op = XEXP (op, 0);
+	  switch (INTVAL (XEXP (op, 2)))
+	    {
+	    case 0:
+	      fprintf (file, "#lo (");
+	      msp430_print_operand_raw (file, XEXP (op, 0));
+	      fprintf (file, ")");
+	      break;
+	  
+	    case 16:
+	      fprintf (file, "#hi (");
+	      msp430_print_operand_raw (file, XEXP (op, 0));
+	      fprintf (file, ")");
+	      break;
+
+	    default:
+	      output_operand_lossage ("invalid zero extract");
+	      break;
+	    }
+	  break;
+	}
+      /* Fall through.  */
+    case CONST_INT:
     case SYMBOL_REF:
     case LABEL_REF:
       if (letter == 0)
diff --git a/gcc-4.9/gcc/config/msp430/msp430.h b/gcc-4.9/gcc/config/msp430/msp430.h
index 65d6ad66d..044e558a3 100644
--- a/gcc-4.9/gcc/config/msp430/msp430.h
+++ b/gcc-4.9/gcc/config/msp430/msp430.h
@@ -55,8 +55,8 @@ extern bool msp430x;
   "%{mcpu=*:-mcpu=%*}%{!mcpu=*:%{mmcu=*:-mmcu=%*}} " /* Pass the CPU type on to the assembler.  */ \
   "%{mrelax=-mQ} " /* Pass the relax option on to the assembler.  */ \
   "%{mlarge:-ml} " /* Tell the assembler if we are building for the LARGE pointer model.  */ \
-  "%{!msim:-md} %{msim:%{mlarge:-md}}" /* Copy data from ROM to RAM if necessary.  */ \
-  "%{ffunction-sections:-gdwarf-sections}" /* If function sections are being created then create DWARF line number sections as well.  */
+  "%{!msim:-md} %{msim:%{mlarge:-md}} " /* Copy data from ROM to RAM if necessary.  */ \
+  "%{ffunction-sections:-gdwarf-sections} " /* If function sections are being created then create DWARF line number sections as well.  */
 
 /* Enable linker section garbage collection by default, unless we
    are creating a relocatable binary (gc does not work) or debugging
diff --git a/gcc-4.9/gcc/config/msp430/msp430.md b/gcc-4.9/gcc/config/msp430/msp430.md
index 74a98b480..5e890eced 100644
--- a/gcc-4.9/gcc/config/msp430/msp430.md
+++ b/gcc-4.9/gcc/config/msp430/msp430.md
@@ -362,8 +362,8 @@
 ; halves.
 (define_split
   [(set (match_operand:SI          0 "msp430_nonsubreg_operand")
-	(plus:SI (match_operand:SI 1 "nonimmediate_operand")
-		 (match_operand:SI 2 "general_operand")))
+	(plus:SI (match_operand:SI 1 "msp430_nonsubreg_operand")
+		 (match_operand:SI 2 "msp430_nonsubreg_or_imm_operand")))
    ]
   ""
   [(parallel [(set (match_operand:HI 3 "nonimmediate_operand" "=&rm")
@@ -609,9 +609,15 @@
 ; when the PSI value is negative..
 ;
 ; Note: using PUSHM.A #1 is two bytes smaller than using PUSHX.A....
+;
+; Note: We use a + constraint on operand 0 as otherwise GCC gets confused
+; about extending a single PSI mode register into a pair of SImode registers
+; with the same starting register.  It thinks that the upper register of
+; the pair is unused and so it can clobber it.  Try compiling 20050826-2.c
+; at -O2 to see this.
 
 (define_insn "zero_extendpsisi2"
-  [(set (match_operand:SI                  0 "register_operand" "=r")
+  [(set (match_operand:SI                  0 "register_operand" "+r")
 	(zero_extend:SI (match_operand:PSI 1 "register_operand" "r")))]
   ""
   "*
@@ -1311,9 +1317,9 @@
   ""
   "*
     if (REGNO (operands[0]) != REGNO (operands[1]))
-      return \"MOV.W\t%1, %0 { SUB.W\t#0, %0 { AND.W\t%2, %0\";
+      return \"MOV.W\t%1, %0 { INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
     else
-      return \"SUB.W\t#0, %0 { AND.W\t%2, %0\";
+      return \"INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
   "
   )
 
@@ -1324,9 +1330,9 @@
   "optimize > 2 && msp430_hwmult_type != NONE"
   "*
     if (msp430_use_f5_series_hwmult ())
-      return \"PUSH.W sr { DINT { MOV.W %1, &0x04C2 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\";
+      return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x04C2 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\";
     else
-      return \"PUSH.W sr { DINT { MOV.W %1, &0x0132 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\";
+      return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x0132 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\";
   "
 )
 
@@ -1337,9 +1343,9 @@
   "optimize > 2 && msp430_hwmult_type != NONE"
   "*
     if (msp430_use_f5_series_hwmult ())
-      return \"PUSH.W sr { DINT { MOV.W %1, &0x04C0 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\";
+      return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x04C0 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\";
     else
-      return \"PUSH.W sr { DINT { MOV.W %1, &0x0130 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\";
+      return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x0130 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\";
   "
 )
 
@@ -1350,9 +1356,9 @@
   "optimize > 2 && msp430_hwmult_type != NONE"
   "*
     if (msp430_use_f5_series_hwmult ())
-      return \"PUSH.W sr { DINT { MOV.W %L1, &0x04D4 { MOV.W %H1, &0x04D6 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\";
+      return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x04D4 { MOV.W %H1, &0x04D6 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\";
     else
-      return \"PUSH.W sr { DINT { MOV.W %L1, &0x0144 { MOV.W %H1, &0x0146 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
+      return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x0144 { MOV.W %H1, &0x0146 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
   "
 )
 
@@ -1363,8 +1369,8 @@
   "optimize > 2 && msp430_hwmult_type != NONE"
   "*
     if (msp430_use_f5_series_hwmult ())
-      return \"PUSH.W sr { DINT { MOV.W %L1, &0x04D0 { MOV.W %H1, &0x04D2 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\";
+      return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x04D0 { MOV.W %H1, &0x04D2 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\";
     else
-      return \"PUSH.W sr { DINT { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0142 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
+      return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0142 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
   "
 )
diff --git a/gcc-4.9/gcc/config/msp430/predicates.md b/gcc-4.9/gcc/config/msp430/predicates.md
index 9a8e2da0a..94a628cd5 100644
--- a/gcc-4.9/gcc/config/msp430/predicates.md
+++ b/gcc-4.9/gcc/config/msp430/predicates.md
@@ -73,6 +73,10 @@
 (define_predicate "msp430_nonsubreg_operand"
   (match_code "reg,mem"))
 
+(define_predicate "msp430_nonsubreg_or_imm_operand"
+  (ior (match_operand 0 "msp430_nonsubreg_operand")
+       (match_operand 0 "immediate_operand")))
+
 ; TRUE for constants which are bit positions for zero_extract
 (define_predicate "msp430_bitpos"
   (and (match_code "const_int")
diff --git a/gcc-4.9/gcc/config/newlib-stdint.h b/gcc-4.9/gcc/config/newlib-stdint.h
index f4a78a544..47445e42d 100644
--- a/gcc-4.9/gcc/config/newlib-stdint.h
+++ b/gcc-4.9/gcc/config/newlib-stdint.h
@@ -13,8 +13,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3.  If not see
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
 /* newlib uses 32-bit long in certain cases for all non-SPU
diff --git a/gcc-4.9/gcc/config/rs6000/htm.md b/gcc-4.9/gcc/config/rs6000/htm.md
index 9dbb499ce..ca7f7fdf4 100644
--- a/gcc-4.9/gcc/config/rs6000/htm.md
+++ b/gcc-4.9/gcc/config/rs6000/htm.md
@@ -179,7 +179,7 @@
 			     (const_int 0)]
 			    UNSPECV_HTM_TABORTWCI))
    (set (subreg:CC (match_dup 2) 0) (match_dup 1))
-   (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 24)))
+   (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 28)))
    (parallel [(set (match_operand:SI 0 "int_reg_operand" "")
 		   (and:SI (match_dup 3) (const_int 15)))
               (clobber (scratch:CC))])]
diff --git a/gcc-4.9/gcc/config/rs6000/htmxlintrin.h b/gcc-4.9/gcc/config/rs6000/htmxlintrin.h
index 38dc066d3..bf7fe3a75 100644
--- a/gcc-4.9/gcc/config/rs6000/htmxlintrin.h
+++ b/gcc-4.9/gcc/config/rs6000/htmxlintrin.h
@@ -46,12 +46,17 @@ extern "C" {
 
 typedef char TM_buff_type[16];
 
+/* Compatibility macro with s390.  This macro can be used to determine
+   whether a transaction was successfully started from the __TM_begin()
+   and __TM_simple_begin() intrinsic functions below.  */
+#define _HTM_TBEGIN_STARTED     1
+
 extern __inline long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __TM_simple_begin (void)
 {
   if (__builtin_expect (__builtin_tbegin (0), 1))
-    return 1;
+    return _HTM_TBEGIN_STARTED;
   return 0;
 }
 
@@ -61,7 +66,7 @@ __TM_begin (void* const TM_buff)
 {
   *_TEXASRL_PTR (TM_buff) = 0;
   if (__builtin_expect (__builtin_tbegin (0), 1))
-    return 1;
+    return _HTM_TBEGIN_STARTED;
 #ifdef __powerpc64__
   *_TEXASR_PTR (TM_buff) = __builtin_get_texasr ();
 #else
diff --git a/gcc-4.9/gcc/config/rs6000/linux-grte.h b/gcc-4.9/gcc/config/rs6000/linux-grte.h
new file mode 100644
index 000000000..53997f027
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/linux-grte.h
@@ -0,0 +1,41 @@
+/* Definitions for Linux-based GRTE (Google RunTime Environment).
+   Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc.
+   Contributed by Chris Demetriou and Ollie Wild.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Overrides LIB_LINUX_SPEC from sysv4.h.  */
+#undef	LIB_LINUX_SPEC
+#define LIB_LINUX_SPEC \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}"
+
+/* When GRTE links statically, it needs its NSS and resolver libraries
+   linked in as well.  Note that when linking statically, these are
+   enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC.  */
+#undef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS \
+  { "libc", "%{static:%(libc_static);:-lc}" }, \
+  { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \
+  { "libc_static", "-lc -lresolv" }, \
+  { "libc_p_static", "-lc_p -lresolv_p" },
diff --git a/gcc-4.9/gcc/config/rs6000/linux64.h b/gcc-4.9/gcc/config/rs6000/linux64.h
index dbc9a527e..52c233b7d 100644
--- a/gcc-4.9/gcc/config/rs6000/linux64.h
+++ b/gcc-4.9/gcc/config/rs6000/linux64.h
@@ -367,11 +367,11 @@ extern int dot_symbols;
 #undef	LINK_OS_DEFAULT_SPEC
 #define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
 
-#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
+#define GLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld.so.1"
 #ifdef LINUX64_DEFAULT_ABI_ELFv2
-#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:/lib64/ld64.so.1;:/lib64/ld64.so.2}"
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:" RUNTIME_ROOT_PREFIX  "/lib64/ld64.so.1;:" RUNTIME_ROOT_PREFIX "/lib64/ld64.so.2}"
 #else
-#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv2:/lib64/ld64.so.2;:/lib64/ld64.so.1}"
+#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "%{mabi=elfv2:"RUNTIME_ROOT_PREFIX  "/lib64/ld64.so.2;:" RUNTIME_ROOT_PREFIX "/lib64/ld64.so.1}"
 #endif
 #define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
 #define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
index 8e15bdf16..220d1e970 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
@@ -622,19 +622,12 @@
 		     | RS6000_BTC_TERNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
-/* Miscellaneous builtins.  */
-#define BU_MISC_1(ENUM, NAME, ATTR, ICODE)				\
-  RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
-		    "__builtin_" NAME,			/* NAME */	\
-		    RS6000_BTM_HARD_FLOAT,		/* MASK */	\
-		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
-		     | RS6000_BTC_UNARY),				\
-		    CODE_FOR_ ## ICODE)			/* ICODE */
-
-#define BU_MISC_2(ENUM, NAME, ATTR, ICODE)				\
+/* 128-bit long double floating point builtins.  */
+#define BU_LDBL128_2(ENUM, NAME, ATTR, ICODE)				\
   RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM,		/* ENUM */	\
 		    "__builtin_" NAME,			/* NAME */	\
-		    RS6000_BTM_HARD_FLOAT,		/* MASK */	\
+		    (RS6000_BTM_HARD_FLOAT		/* MASK */	\
+		     | RS6000_BTM_LDBL128),				\
 		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
 		     | RS6000_BTC_BINARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
@@ -1593,10 +1586,8 @@ BU_P8V_MISC_3 (BCDSUB_OV,	"bcdsub_ov",	CONST,	bcdsub_unordered)
 BU_DFP_MISC_2 (PACK_TD,		"pack_dec128",		CONST,	packtd)
 BU_DFP_MISC_2 (UNPACK_TD,	"unpack_dec128",	CONST,	unpacktd)
 
-BU_MISC_2 (PACK_TF,		"pack_longdouble",	CONST,	packtf)
-BU_MISC_2 (UNPACK_TF,		"unpack_longdouble",	CONST,	unpacktf)
-BU_MISC_1 (UNPACK_TF_0,		"longdouble_dw0",	CONST,	unpacktf_0)
-BU_MISC_1 (UNPACK_TF_1,		"longdouble_dw1",	CONST,	unpacktf_1)
+BU_LDBL128_2 (PACK_TF,		"pack_longdouble",	CONST,	packtf)
+BU_LDBL128_2 (UNPACK_TF,	"unpack_longdouble",	CONST,	unpacktf)
 
 BU_P7_MISC_2 (PACK_V1TI,	"pack_vector_int128",	CONST,	packv1ti)
 BU_P7_MISC_2 (UNPACK_V1TI,	"unpack_vector_int128",	CONST,	unpackv1ti)
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
index 69bb26331..785f6ce1b 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
@@ -163,7 +163,7 @@ extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *);
 extern rtx rs6000_libcall_value (enum machine_mode);
 extern rtx rs6000_va_arg (tree, tree);
 extern int function_ok_for_sibcall (tree);
-extern int rs6000_reg_parm_stack_space (tree);
+extern int rs6000_reg_parm_stack_space (tree, bool);
 extern void rs6000_elf_declare_function_name (FILE *, const char *, tree);
 extern bool rs6000_elf_in_small_data_p (const_tree);
 #ifdef ARGS_SIZE_RTX
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.c b/gcc-4.9/gcc/config/rs6000/rs6000.c
index 4f1a399a2..bf67e7298 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.c
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.c
@@ -3037,7 +3037,8 @@ rs6000_builtin_mask_calculate (void)
 	  | ((TARGET_CRYPTO)		    ? RS6000_BTM_CRYPTO	   : 0)
 	  | ((TARGET_HTM)		    ? RS6000_BTM_HTM	   : 0)
 	  | ((TARGET_DFP)		    ? RS6000_BTM_DFP	   : 0)
-	  | ((TARGET_HARD_FLOAT)	    ? RS6000_BTM_HARD_FLOAT : 0));
+	  | ((TARGET_HARD_FLOAT)	    ? RS6000_BTM_HARD_FLOAT : 0)
+	  | ((TARGET_LONG_DOUBLE_128)	    ? RS6000_BTM_LDBL128 : 0));
 }
 
 /* Override command line options.  Mostly we process the processor type and
@@ -6118,7 +6119,8 @@ mem_operand_gpr (rtx op, enum machine_mode mode)
     return false;
 
   extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
-  gcc_assert (extra >= 0);
+  if (extra < 0)
+    extra = 0;
 
   if (GET_CODE (addr) == LO_SUM)
     /* For lo_sum addresses, we must allow any offset except one that
@@ -10477,35 +10479,65 @@ rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
    list, or passes any parameter in memory.  */
 
 static bool
-rs6000_function_parms_need_stack (tree fun)
+rs6000_function_parms_need_stack (tree fun, bool incoming)
 {
-  function_args_iterator args_iter;
-  tree arg_type;
+  tree fntype, result;
   CUMULATIVE_ARGS args_so_far_v;
   cumulative_args_t args_so_far;
 
   if (!fun)
     /* Must be a libcall, all of which only use reg parms.  */
     return false;
+
+  fntype = fun;
   if (!TYPE_P (fun))
-    fun = TREE_TYPE (fun);
+    fntype = TREE_TYPE (fun);
 
   /* Varargs functions need the parameter save area.  */
-  if (!prototype_p (fun) || stdarg_p (fun))
+  if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
     return true;
 
-  INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX);
+  INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
   args_so_far = pack_cumulative_args (&args_so_far_v);
 
-  if (aggregate_value_p (TREE_TYPE (fun), fun))
+  /* When incoming, we will have been passed the function decl.
+     It is necessary to use the decl to handle K&R style functions,
+     where TYPE_ARG_TYPES may not be available.  */
+  if (incoming)
     {
-      tree type = build_pointer_type (TREE_TYPE (fun));
-      rs6000_parm_needs_stack (args_so_far, type);
+      gcc_assert (DECL_P (fun));
+      result = DECL_RESULT (fun);
     }
+  else
+    result = TREE_TYPE (fntype);
 
-  FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter)
-    if (rs6000_parm_needs_stack (args_so_far, arg_type))
-      return true;
+  if (result && aggregate_value_p (result, fntype))
+    {
+      if (!TYPE_P (result))
+	result = TREE_TYPE (result);
+      result = build_pointer_type (result);
+      rs6000_parm_needs_stack (args_so_far, result);
+    }
+
+  if (incoming)
+    {
+      tree parm;
+
+      for (parm = DECL_ARGUMENTS (fun);
+	   parm && parm != void_list_node;
+	   parm = TREE_CHAIN (parm))
+	if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
+	  return true;
+    }
+  else
+    {
+      function_args_iterator args_iter;
+      tree arg_type;
+
+      FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
+	if (rs6000_parm_needs_stack (args_so_far, arg_type))
+	  return true;
+    }
 
   return false;
 }
@@ -10517,7 +10549,7 @@ rs6000_function_parms_need_stack (tree fun)
    all parameters in registers.  */
 
 int
-rs6000_reg_parm_stack_space (tree fun)
+rs6000_reg_parm_stack_space (tree fun, bool incoming)
 {
   int reg_parm_stack_space;
 
@@ -10535,7 +10567,7 @@ rs6000_reg_parm_stack_space (tree fun)
     case ABI_ELFv2:
       /* ??? Recomputing this every time is a bit expensive.  Is there
 	 a place to cache this information?  */
-      if (rs6000_function_parms_need_stack (fun))
+      if (rs6000_function_parms_need_stack (fun, incoming))
 	reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
       else
 	reg_parm_stack_space = 0;
@@ -13559,11 +13591,15 @@ rs6000_invalid_builtin (enum rs6000_builtins fncode)
   else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
 	   == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
     error ("Builtin function %s requires the -mhard-dfp and"
-	   "-mpower8-vector options", name);
+	   " -mpower8-vector options", name);
   else if ((fnmask & RS6000_BTM_DFP) != 0)
     error ("Builtin function %s requires the -mhard-dfp option", name);
   else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
     error ("Builtin function %s requires the -mpower8-vector option", name);
+  else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
+	   == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
+    error ("Builtin function %s requires the -mhard-float and"
+	   " -mlong-double-128 options", name);
   else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
     error ("Builtin function %s requires the -mhard-float option", name);
   else
@@ -31313,6 +31349,7 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
   { "htm",		 RS6000_BTM_HTM,	false, false },
   { "hard-dfp",		 RS6000_BTM_DFP,	false, false },
   { "hard-float",	 RS6000_BTM_HARD_FLOAT,	false, false },
+  { "long-double-128",	 RS6000_BTM_LDBL128,	false, false },
 };
 
 /* Option variables that we want to support inside attribute((target)) and
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.h b/gcc-4.9/gcc/config/rs6000/rs6000.h
index 21330dc65..2b5d033f8 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.h
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.h
@@ -1602,7 +1602,14 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 /* Define this if stack space is still allocated for a parameter passed
    in a register.  The value is the number of bytes allocated to this
    area.  */
-#define REG_PARM_STACK_SPACE(FNDECL) rs6000_reg_parm_stack_space((FNDECL))
+#define REG_PARM_STACK_SPACE(FNDECL) \
+  rs6000_reg_parm_stack_space ((FNDECL), false)
+
+/* Define this macro if space guaranteed when compiling a function body
+   is different to space required when making a call, a situation that
+   can arise with K&R style function definitions.  */
+#define INCOMING_REG_PARM_STACK_SPACE(FNDECL) \
+  rs6000_reg_parm_stack_space ((FNDECL), true)
 
 /* Define this if the above stack space is to be considered part of the
    space allocated by the caller.  */
@@ -2501,8 +2508,8 @@ extern int frame_pointer_needed;
 #define RS6000_BTC_SAT		RS6000_BTC_MISC	/* saturate sets VSCR.  */
 
 /* Builtin targets.  For now, we reuse the masks for those options that are in
-   target flags, and pick two random bits for SPE and paired which aren't in
-   target_flags.  */
+   target flags, and pick three random bits for SPE, paired and ldbl128 which
+   aren't in target_flags.  */
 #define RS6000_BTM_ALWAYS	0		/* Always enabled.  */
 #define RS6000_BTM_ALTIVEC	MASK_ALTIVEC	/* VMX/altivec vectors.  */
 #define RS6000_BTM_VSX		MASK_VSX	/* VSX (vector/scalar).  */
@@ -2519,6 +2526,7 @@ extern int frame_pointer_needed;
 #define RS6000_BTM_CELL		MASK_FPRND	/* Target is cell powerpc.  */
 #define RS6000_BTM_DFP		MASK_DFP	/* Decimal floating point.  */
 #define RS6000_BTM_HARD_FLOAT	MASK_SOFT_FLOAT	/* Hardware floating point.  */
+#define RS6000_BTM_LDBL128	MASK_MULTIPLE	/* 128-bit long double.  */
 
 #define RS6000_BTM_COMMON	(RS6000_BTM_ALTIVEC			\
 				 | RS6000_BTM_VSX			\
@@ -2532,7 +2540,8 @@ extern int frame_pointer_needed;
 				 | RS6000_BTM_POPCNTD			\
 				 | RS6000_BTM_CELL			\
 				 | RS6000_BTM_DFP			\
-				 | RS6000_BTM_HARD_FLOAT)
+				 | RS6000_BTM_HARD_FLOAT		\
+				 | RS6000_BTM_LDBL128)
 
 /* Define builtin enum index.  */
 
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.md b/gcc-4.9/gcc/config/rs6000/rs6000.md
index e853bc4f9..26d0d1530 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.md
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.md
@@ -746,7 +746,7 @@
 
 (define_insn "*extendsidi2_lfiwax"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu")
-	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "Y,r,r,Z,Z")))]
   "TARGET_POWERPC64 && TARGET_LFIWAX"
   "@
    lwa%U1%X1 %0,%1
@@ -769,7 +769,7 @@
 
 (define_insn "*extendsidi2_nocell"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
-	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
+	(sign_extend:DI (match_operand:SI 1 "lwa_operand" "Y,r")))]
   "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
   "@
    lwa%U1%X1 %0,%1
@@ -15806,26 +15806,6 @@
   ""
   "")
 
-;; The Advance Toolchain 7.0-3 added private builtins: __builtin_longdouble_dw0
-;; and __builtin_longdouble_dw1 to optimize glibc.  Add support for these
-;; builtins here.
-
-(define_expand "unpacktf_0"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-	(unspec:DF [(match_operand:TF 1 "register_operand" "")
-		    (const_int 0)]
-	 UNSPEC_UNPACK_128BIT))]
-  ""
-  "")
-
-(define_expand "unpacktf_1"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-	(unspec:DF [(match_operand:TF 1 "register_operand" "")
-		    (const_int 1)]
-	 UNSPEC_UNPACK_128BIT))]
-  ""
-  "")
-
 (define_insn_and_split "unpack<mode>_dm"
   [(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "=d,m,d,r,m")
 	(unspec:<FP128_64>
diff --git a/gcc-4.9/gcc/config/rs6000/sysv4.h b/gcc-4.9/gcc/config/rs6000/sysv4.h
index 3fb6bd59f..afbd2892e 100644
--- a/gcc-4.9/gcc/config/rs6000/sysv4.h
+++ b/gcc-4.9/gcc/config/rs6000/sysv4.h
@@ -761,7 +761,10 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
 
 #define LINK_START_LINUX_SPEC ""
 
-#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld.so.1"
 #define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
 #if DEFAULT_LIBC == LIBC_UCLIBC
 #define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
@@ -843,6 +846,11 @@ ncrtn.o%s"
 #define CPP_OS_OPENBSD_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
 #endif
 
+/* These may be provided by rs6000/linux-grtev2.h.  */
+#ifndef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS
+#endif
+
 /* Define any extra SPECS that the compiler needs to generate.  */
 /* Override rs6000.h definition.  */
 #undef	SUBTARGET_EXTRA_SPECS
@@ -908,6 +916,7 @@ ncrtn.o%s"
   { "cpp_os_openbsd",		CPP_OS_OPENBSD_SPEC },			\
   { "cpp_os_default",		CPP_OS_DEFAULT_SPEC },			\
   { "fbsd_dynamic_linker",	FBSD_DYNAMIC_LINKER },			\
+  LINUX_GRTE_EXTRA_SPECS						\
   SUBSUBTARGET_EXTRA_SPECS
 
 #define	SUBSUBTARGET_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/rs6000/vsx.md b/gcc-4.9/gcc/config/rs6000/vsx.md
index 23d85ab06..6d20eab11 100644
--- a/gcc-4.9/gcc/config/rs6000/vsx.md
+++ b/gcc-4.9/gcc/config/rs6000/vsx.md
@@ -24,6 +24,13 @@
 ;; Iterator for the 2 64-bit vector types
 (define_mode_iterator VSX_D [V2DF V2DI])
 
+;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
+;; lxvd2x to properly handle swapping words on little endian
+(define_mode_iterator VSX_LE [V2DF
+			      V2DI
+			      V1TI
+			      (TI	"VECTOR_MEM_VSX_P (TImode)")])
+
 ;; Iterator for the 2 32-bit vector types
 (define_mode_iterator VSX_W [V4SF V4SI])
 
@@ -228,8 +235,8 @@
 ;; The patterns for LE permuted loads and stores come before the general
 ;; VSX moves so they match first.
 (define_insn_and_split "*vsx_le_perm_load_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-        (match_operand:VSX_D 1 "memory_operand" "Z"))]
+  [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa")
+        (match_operand:VSX_LE 1 "memory_operand" "Z"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -342,16 +349,16 @@
    (set_attr "length" "8")])
 
 (define_insn "*vsx_le_perm_store_<mode>"
-  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
-        (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
+  [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
+        (match_operand:VSX_LE 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
   [(set_attr "type" "vecstore")
    (set_attr "length" "12")])
 
 (define_split
-  [(set (match_operand:VSX_D 0 "memory_operand" "")
-        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  [(set (match_operand:VSX_LE 0 "memory_operand" "")
+        (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
   [(set (match_dup 2)
         (vec_select:<MODE>
@@ -369,8 +376,8 @@
 ;; The post-reload split requires that we re-permute the source
 ;; register in case it is still live.
 (define_split
-  [(set (match_operand:VSX_D 0 "memory_operand" "")
-        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  [(set (match_operand:VSX_LE 0 "memory_operand" "")
+        (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
   [(set (match_dup 1)
         (vec_select:<MODE>
@@ -1352,9 +1359,9 @@
 ;; xxpermdi for little endian loads and stores.  We need several of
 ;; these since the form of the PARALLEL differs by mode.
 (define_insn "*vsx_xxpermdi2_le_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-        (vec_select:VSX_D
-          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+  [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_LE
+          (match_operand:VSX_LE 1 "vsx_register_operand" "wa")
           (parallel [(const_int 1) (const_int 0)])))]
   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxpermdi %x0,%x1,%x1,2"
@@ -1401,9 +1408,9 @@
 ;; lxvd2x for little endian loads.  We need several of
 ;; these since the form of the PARALLEL differs by mode.
 (define_insn "*vsx_lxvd2x2_le_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-        (vec_select:VSX_D
-          (match_operand:VSX_D 1 "memory_operand" "Z")
+  [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_LE
+          (match_operand:VSX_LE 1 "memory_operand" "Z")
           (parallel [(const_int 1) (const_int 0)])))]
   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
   "lxvd2x %x0,%y1"
@@ -1450,9 +1457,9 @@
 ;; stxvd2x for little endian stores.  We need several of
 ;; these since the form of the PARALLEL differs by mode.
 (define_insn "*vsx_stxvd2x2_le_<mode>"
-  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
-        (vec_select:VSX_D
-          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+  [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
+        (vec_select:VSX_LE
+          (match_operand:VSX_LE 1 "vsx_register_operand" "wa")
           (parallel [(const_int 1) (const_int 0)])))]
   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
   "stxvd2x %x1,%y0"
@@ -1683,7 +1690,7 @@
     {
       if (GET_CODE (op3) == SCRATCH)
 	op3 = gen_reg_rtx (V4SFmode);
-      emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2));
+      emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
       tmp = op3;
     }
   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
diff --git a/gcc-4.9/gcc/config/rtems.h b/gcc-4.9/gcc/config/rtems.h
index 3da27c57e..f14aed36a 100644
--- a/gcc-4.9/gcc/config/rtems.h
+++ b/gcc-4.9/gcc/config/rtems.h
@@ -13,8 +13,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3.  If not see
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
 /* The system headers under RTEMS are C++-aware.  */
diff --git a/gcc-4.9/gcc/config/sol2-clearcap.map b/gcc-4.9/gcc/config/sol2-clearcap.map
new file mode 100644
index 000000000..2d880c983
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-clearcap.map
@@ -0,0 +1,2 @@
+# Clear all hardware capabilities emitted by Sun as.
+hwcap_1 = V0x0 OVERRIDE;
diff --git a/gcc-4.9/gcc/config/sol2-clearcapv2.map b/gcc-4.9/gcc/config/sol2-clearcapv2.map
new file mode 100644
index 000000000..3c0cacedb
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-clearcapv2.map
@@ -0,0 +1,7 @@
+# Clear all hardware capabilities emitted by Sun as.
+#
+# Uses mapfile v2 syntax which is the only way to clear AT_SUN_CAP_HW2 flags.
+$mapfile_version 2
+CAPABILITY {
+  HW = ;
+};
diff --git a/gcc-4.9/gcc/config/sol2.h b/gcc-4.9/gcc/config/sol2.h
index eb8e0b91f..db7844b29 100644
--- a/gcc-4.9/gcc/config/sol2.h
+++ b/gcc-4.9/gcc/config/sol2.h
@@ -183,12 +183,21 @@ along with GCC; see the file COPYING3.  If not see
 #define LINK_LIBGCC_MAPFILE_SPEC ""
 #endif
 
+/* Clear hardware capabilities, either explicitly or with OpenMP:
+   #pragma openmp declare simd creates clones for SSE2, AVX, and AVX2.  */
+#ifdef HAVE_LD_CLEARCAP
+#define LINK_CLEARCAP_SPEC " %{mclear-hwcap|fopenmp*:-M %sclearcap.map}"
+#else
+#define LINK_CLEARCAP_SPEC ""
+#endif
+
 #undef  LINK_SPEC
 #define LINK_SPEC \
   "%{h*} %{v:-V} \
    %{!shared:%{!static:%{rdynamic: " RDYNAMIC_SPEC "}}} \
    %{static:-dn -Bstatic} \
-   %{shared:-G -dy %{!mimpure-text:-z text}} " LINK_LIBGCC_MAPFILE_SPEC " \
+   %{shared:-G -dy %{!mimpure-text:-z text}} " \
+   LINK_LIBGCC_MAPFILE_SPEC LINK_CLEARCAP_SPEC " \
    %{symbolic:-Bsymbolic -G -dy -z text} \
    %(link_arch) \
    %{Qy:} %{!Qn:-Qy}"
diff --git a/gcc-4.9/gcc/config/sol2.opt b/gcc-4.9/gcc/config/sol2.opt
index a5ae7c510..16a3e5f58 100644
--- a/gcc-4.9/gcc/config/sol2.opt
+++ b/gcc-4.9/gcc/config/sol2.opt
@@ -27,6 +27,10 @@ Driver Joined
 Ym,
 Driver Joined
 
+mclear-hwcap
+Target Report
+Clear hardware capabilities when linking
+
 mimpure-text
 Target Report
 Pass -z text to linker
diff --git a/gcc-4.9/gcc/config/t-sol2 b/gcc-4.9/gcc/config/t-sol2
index a4c4af4ad..25feb0486 100644
--- a/gcc-4.9/gcc/config/t-sol2
+++ b/gcc-4.9/gcc/config/t-sol2
@@ -35,3 +35,10 @@ sol2-stubs.o: $(srcdir)/config/sol2-stubs.c
 sol2.o: $(srcdir)/config/sol2.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
+
+# Install clearcap.map if present.
+install: install-clearcap-map
+
+# Ignore failures: file only exists if linker supports it.
+install-clearcap-map:
+	-$(INSTALL_DATA) clearcap.map $(DESTDIR)$(libdir)
diff --git a/gcc-4.9/gcc/config/v850/rtems.h b/gcc-4.9/gcc/config/v850/rtems.h
index 01dff3e52..42ebb9fc0 100644
--- a/gcc-4.9/gcc/config/v850/rtems.h
+++ b/gcc-4.9/gcc/config/v850/rtems.h
@@ -13,8 +13,13 @@
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* Specify predefined symbols in preprocessor.  */
diff --git a/gcc-4.9/gcc/config/v850/v850-opts.h b/gcc-4.9/gcc/config/v850/v850-opts.h
index a91b1b059..c3d9b7aac 100644
--- a/gcc-4.9/gcc/config/v850/v850-opts.h
+++ b/gcc-4.9/gcc/config/v850/v850-opts.h
@@ -13,8 +13,13 @@
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef V850_OPTS_H
diff --git a/gcc-4.9/gcc/config/v850/v850.h b/gcc-4.9/gcc/config/v850/v850.h
index 92db20a44..6ccdc5d65 100644
--- a/gcc-4.9/gcc/config/v850/v850.h
+++ b/gcc-4.9/gcc/config/v850/v850.h
@@ -14,8 +14,13 @@
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef GCC_V850_H
-- 
cgit v1.2.3