20 files changed, 638 insertions, 1091 deletions
diff --git a/gcc-4.9/gcc/config/arm/aout.h b/gcc-4.9/gcc/config/arm/aout.h
index 51d32a9d4..c8f4e45c6 100644
--- a/gcc-4.9/gcc/config/arm/aout.h
+++ b/gcc-4.9/gcc/config/arm/aout.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef ASM_APP_ON
diff --git a/gcc-4.9/gcc/config/arm/arm-cores.def b/gcc-4.9/gcc/config/arm/arm-cores.def
index 42f00b463..56041ec8b 100644
--- a/gcc-4.9/gcc/config/arm/arm-cores.def
+++ b/gcc-4.9/gcc/config/arm/arm-cores.def
@@ -14,8 +14,13 @@
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* Before using #include to read this file, define a macro:
diff --git a/gcc-4.9/gcc/config/arm/arm-opts.h b/gcc-4.9/gcc/config/arm/arm-opts.h
index a8393975a..21902940e 100644
--- a/gcc-4.9/gcc/config/arm/arm-opts.h
+++ b/gcc-4.9/gcc/config/arm/arm-opts.h
@@ -13,8 +13,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef ARM_OPTS_H
diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c
index 83763555c..3c237cb6d 100644
--- a/gcc-4.9/gcc/config/arm/arm.c
+++ b/gcc-4.9/gcc/config/arm/arm.c
@@ -16739,11 +16739,12 @@ thumb1_reorg (void)
       rtx prev, insn = BB_END (bb);
       bool insn_clobbered = false;
 
-      while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
+      while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
 	insn = PREV_INSN (insn);
 
       /* Find the last cbranchsi4_insn in basic block BB.  */
-      if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
+      if (insn == BB_HEAD (bb)
+	  || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
 	continue;
 
       /* Get the register with which we are comparing.  */
@@ -28210,9 +28211,13 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
       fputs (":\n", file);
       if (flag_pic)
 	{
-	  /* Output ".word .LTHUNKn-7-.LTHUNKPCn".  */
+	  /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
 	  rtx tem = XEXP (DECL_RTL (function), 0);
-	  tem = plus_constant (GET_MODE (tem), tem, -7);
+	  /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
+	     pipeline offset is four rather than eight.  Adjust the offset
+	     accordingly.  */
+	  tem = plus_constant (GET_MODE (tem), tem,
+			       TARGET_THUMB1_ONLY ? -3 : -7);
 	  tem = gen_rtx_MINUS (GET_MODE (tem),
 			       tem,
 			       gen_rtx_SYMBOL_REF (Pmode,
diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h
index 4d9121436..ab5167a8b 100644
--- a/gcc-4.9/gcc/config/arm/arm.h
+++ b/gcc-4.9/gcc/config/arm/arm.h
@@ -17,8 +17,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef GCC_ARM_H
diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md
index 662465f2a..467f9ce4e 100644
--- a/gcc-4.9/gcc/config/arm/arm.md
+++ b/gcc-4.9/gcc/config/arm/arm.md
@@ -75,6 +75,8 @@
   ]
 )
 
+;; UNSPEC_VOLATILE Usage:
+
 
 ;;---------------------------------------------------------------------------
 ;; Attributes
@@ -8349,8 +8351,8 @@
 
 (define_insn_and_split "*arm_cmpdi_unsigned"
   [(set (reg:CC_CZ CC_REGNUM)
-        (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r")
-                       (match_operand:DI 1 "arm_di_operand"     "Py,r,rDi")))]
+        (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r,r")
+                       (match_operand:DI 1 "arm_di_operand"     "Py,r,Di,rDi")))]
 
   "TARGET_32BIT"
   "#"   ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
@@ -8370,9 +8372,9 @@
     operands[1] = gen_lowpart (SImode, operands[1]);
   }
   [(set_attr "conds" "set")
-   (set_attr "enabled_for_depr_it" "yes,yes,no")
-   (set_attr "arch" "t2,t2,*")
-   (set_attr "length" "6,6,8")
+   (set_attr "enabled_for_depr_it" "yes,yes,no,*")
+   (set_attr "arch" "t2,t2,t2,a")
+   (set_attr "length" "6,6,10,8")
    (set_attr "type" "multiple")]
 )
 
@@ -9860,6 +9862,7 @@
   "TARGET_32BIT"
   "%i1%?\\t%0, %2, %4%S3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "shift" "4")
    (set_attr "arch" "a,t2,t2,a")
    ;; Thumb2 doesn't allow the stack pointer to be used for 
diff --git a/gcc-4.9/gcc/config/arm/arm_neon.h b/gcc-4.9/gcc/config/arm/arm_neon.h
index 37a6e611b..95735433d 100644
--- a/gcc-4.9/gcc/config/arm/arm_neon.h
+++ b/gcc-4.9/gcc/config/arm/arm_neon.h
@@ -1,5 +1,4 @@
-/* ARM NEON intrinsics include file. This file is generated automatically
-   using neon-gen.ml.  Please do not edit manually.
+/* ARM NEON intrinsics include file.
 
    Copyright (C) 2006-2014 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
@@ -7707,12 +7706,32 @@ vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
   return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
 }
 
+/* For big-endian, the shuffle masks for ZIP, UZP and TRN must be changed as
+   follows. (nelt = the number of elements within a vector.)
+
+   Firstly, a value of N within a mask, becomes (N ^ (nelt - 1)), as gcc vector
+   extension's indexing scheme is reversed *within each vector* (relative to the
+   neon intrinsics view), but without changing which of the two vectors.
+
+   Secondly, the elements within each mask are reversed, as the mask is itself a
+   vector, and will itself be loaded in reverse order (again, relative to the
+   neon intrinsics view, i.e. that would result from a "vld1" instruction).  */
+
 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
 vtrn_s8 (int8x8_t __a, int8x8_t __b)
 {
   int8x8x2_t __rv;
-  __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7720,8 +7739,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
 vtrn_s16 (int16x4_t __a, int16x4_t __b)
 {
   int16x4x2_t __rv;
-  __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7729,8 +7753,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
 vtrn_u8 (uint8x8_t __a, uint8x8_t __b)
 {
   uint8x8x2_t __rv;
-  __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7738,8 +7771,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
 vtrn_u16 (uint16x4_t __a, uint16x4_t __b)
 {
   uint16x4x2_t __rv;
-  __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7747,8 +7785,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
 vtrn_p8 (poly8x8_t __a, poly8x8_t __b)
 {
   poly8x8x2_t __rv;
-  __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7756,8 +7803,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
 vtrn_p16 (poly16x4_t __a, poly16x4_t __b)
 {
   poly16x4x2_t __rv;
-  __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7765,8 +7817,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
 vtrn_s32 (int32x2_t __a, int32x2_t __b)
 {
   int32x2x2_t __rv;
-  __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7774,8 +7831,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vtrn_f32 (float32x2_t __a, float32x2_t __b)
 {
   float32x2x2_t __rv;
-  __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7783,8 +7845,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
 vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
 {
   uint32x2x2_t __rv;
-  __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7792,8 +7859,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
 vtrnq_s8 (int8x16_t __a, int8x16_t __b)
 {
   int8x16x2_t __rv;
-  __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
-  __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7801,8 +7877,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
 vtrnq_s16 (int16x8_t __a, int16x8_t __b)
 {
   int16x8x2_t __rv;
-  __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7810,8 +7895,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
 vtrnq_s32 (int32x4_t __a, int32x4_t __b)
 {
   int32x4x2_t __rv;
-  __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7819,8 +7909,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vtrnq_f32 (float32x4_t __a, float32x4_t __b)
 {
   float32x4x2_t __rv;
-  __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7828,8 +7923,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
 vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
   uint8x16x2_t __rv;
-  __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
-  __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7837,8 +7941,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
 vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
   uint16x8x2_t __rv;
-  __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7846,8 +7959,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
 vtrnq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
   uint32x4x2_t __rv;
-  __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
-  __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7855,8 +7973,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
 vtrnq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
   poly8x16x2_t __rv;
-  __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
-  __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7864,8 +7991,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
 vtrnq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
   poly16x8x2_t __rv;
-  __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
-  __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7873,8 +8009,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
 vzip_s8 (int8x8_t __a, int8x8_t __b)
 {
   int8x8x2_t __rv;
-  __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7882,8 +8027,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
 vzip_s16 (int16x4_t __a, int16x4_t __b)
 {
   int16x4x2_t __rv;
-  __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7891,8 +8041,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
 vzip_u8 (uint8x8_t __a, uint8x8_t __b)
 {
   uint8x8x2_t __rv;
-  __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7900,8 +8059,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
 vzip_u16 (uint16x4_t __a, uint16x4_t __b)
 {
   uint16x4x2_t __rv;
-  __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7909,8 +8073,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
 vzip_p8 (poly8x8_t __a, poly8x8_t __b)
 {
   poly8x8x2_t __rv;
-  __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7918,8 +8091,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
 vzip_p16 (poly16x4_t __a, poly16x4_t __b)
 {
   poly16x4x2_t __rv;
-  __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7927,8 +8105,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
 vzip_s32 (int32x2_t __a, int32x2_t __b)
 {
   int32x2x2_t __rv;
-  __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7936,8 +8119,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vzip_f32 (float32x2_t __a, float32x2_t __b)
 {
   float32x2x2_t __rv;
-  __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7945,8 +8133,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
 vzip_u32 (uint32x2_t __a, uint32x2_t __b)
 {
   uint32x2x2_t __rv;
-  __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -7954,8 +8147,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
 vzipq_s8 (int8x16_t __a, int8x16_t __b)
 {
   int8x16x2_t __rv;
-  __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
-  __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7963,8 +8165,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
 vzipq_s16 (int16x8_t __a, int16x8_t __b)
 {
   int16x8x2_t __rv;
-  __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -7972,8 +8183,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
 vzipq_s32 (int32x4_t __a, int32x4_t __b)
 {
   int32x4x2_t __rv;
-  __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7981,8 +8197,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vzipq_f32 (float32x4_t __a, float32x4_t __b)
 {
   float32x4x2_t __rv;
-  __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -7990,8 +8211,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
 vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
   uint8x16x2_t __rv;
-  __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
-  __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -7999,8 +8229,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
 vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
   uint16x8x2_t __rv;
-  __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -8008,8 +8247,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
 vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
   uint32x4x2_t __rv;
-  __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
-  __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#endif
   return __rv;
 }
 
@@ -8017,8 +8261,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
 vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
   poly8x16x2_t __rv;
-  __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
-  __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#endif
   return __rv;
 }
 
@@ -8026,8 +8279,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
 vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
   poly16x8x2_t __rv;
-  __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
-  __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 12, 4, 13, 5, 14, 6, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
   return __rv;
 }
 
@@ -8035,8 +8297,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
 vuzp_s8 (int8x8_t __a, int8x8_t __b)
 {
   int8x8x2_t __rv;
-  __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8044,8 +8315,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
 vuzp_s16 (int16x4_t __a, int16x4_t __b)
 {
   int16x4x2_t __rv;
-  __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8053,8 +8329,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
 vuzp_s32 (int32x2_t __a, int32x2_t __b)
 {
   int32x2x2_t __rv;
-  __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -8062,8 +8343,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
 vuzp_f32 (float32x2_t __a, float32x2_t __b)
 {
   float32x2x2_t __rv;
-  __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -8071,8 +8357,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
 vuzp_u8 (uint8x8_t __a, uint8x8_t __b)
 {
   uint8x8x2_t __rv;
-  __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8080,8 +8375,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
 vuzp_u16 (uint16x4_t __a, uint16x4_t __b)
 {
   uint16x4x2_t __rv;
-  __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8089,8 +8389,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
 vuzp_u32 (uint32x2_t __a, uint32x2_t __b)
 {
   uint32x2x2_t __rv;
-  __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
-  __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
   return __rv;
 }
 
@@ -8098,8 +8403,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
 vuzp_p8 (poly8x8_t __a, poly8x8_t __b)
 {
   poly8x8x2_t __rv;
-  __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8107,8 +8421,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
 vuzp_p16 (poly16x4_t __a, poly16x4_t __b)
 {
   poly16x4x2_t __rv;
-  __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8116,8 +8435,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
 vuzpq_s8 (int8x16_t __a, int8x16_t __b)
 {
   int8x16x2_t __rv;
-  __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
-  __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#endif
   return __rv;
 }
 
@@ -8125,8 +8453,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
 vuzpq_s16 (int16x8_t __a, int16x8_t __b)
 {
   int16x8x2_t __rv;
-  __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8134,8 +8471,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
 vuzpq_s32 (int32x4_t __a, int32x4_t __b)
 {
   int32x4x2_t __rv;
-  __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8143,8 +8485,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
 vuzpq_f32 (float32x4_t __a, float32x4_t __b)
 {
   float32x4x2_t __rv;
-  __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8152,8 +8499,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
 vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
   uint8x16x2_t __rv;
-  __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
-  __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#endif
   return __rv;
 }
 
@@ -8161,8 +8517,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
 vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
   uint16x8x2_t __rv;
-  __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
@@ -8170,8 +8535,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
 vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
   uint32x4x2_t __rv;
-  __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
-  __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#endif
   return __rv;
 }
 
@@ -8179,8 +8549,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
 vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
 {
   poly8x16x2_t __rv;
-  __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
-  __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#endif
   return __rv;
 }
 
@@ -8188,8 +8567,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
 vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
 {
   poly16x8x2_t __rv;
-  __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
-  __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 9, 11, 13, 15, 1, 3, 5, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+      { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
   return __rv;
 }
 
diff --git a/gcc-4.9/gcc/config/arm/bpabi.h b/gcc-4.9/gcc/config/arm/bpabi.h
index bc223f8e3..7a576ac46 100644
--- a/gcc-4.9/gcc/config/arm/bpabi.h
+++ b/gcc-4.9/gcc/config/arm/bpabi.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* Use the AAPCS ABI by default.  */
diff --git a/gcc-4.9/gcc/config/arm/elf.h b/gcc-4.9/gcc/config/arm/elf.h
index 2edf520de..15a32fb8a 100644
--- a/gcc-4.9/gcc/config/arm/elf.h
+++ b/gcc-4.9/gcc/config/arm/elf.h
@@ -16,8 +16,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef OBJECT_FORMAT_ELF
diff --git a/gcc-4.9/gcc/config/arm/linux-eabi.h b/gcc-4.9/gcc/config/arm/linux-eabi.h
index 4d42cbfc8..350639f32 100644
--- a/gcc-4.9/gcc/config/arm/linux-eabi.h
+++ b/gcc-4.9/gcc/config/arm/linux-eabi.h
@@ -68,8 +68,9 @@
    GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI.  */
 
 #undef  GLIBC_DYNAMIC_LINKER
-#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3"
-#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3"
+
+#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.3"
+#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux-armhf.so.3"
 #define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT
 
 #define GLIBC_DYNAMIC_LINKER \
diff --git a/gcc-4.9/gcc/config/arm/linux-elf.h b/gcc-4.9/gcc/config/arm/linux-elf.h
index 5dc3328e8..e825ae48c 100644
--- a/gcc-4.9/gcc/config/arm/linux-elf.h
+++ b/gcc-4.9/gcc/config/arm/linux-elf.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* elfos.h should have already been included.  Now just override
@@ -57,7 +62,7 @@
 
 #define LIBGCC_SPEC "%{mfloat-abi=soft*:-lfloat} -lgcc"
 
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
 
 #define LINUX_TARGET_LINK_SPEC  "%{h*} \
    %{static:-Bstatic} \
diff --git a/gcc-4.9/gcc/config/arm/linux-gas.h b/gcc-4.9/gcc/config/arm/linux-gas.h
index 52a739c26..1dd043782 100644
--- a/gcc-4.9/gcc/config/arm/linux-gas.h
+++ b/gcc-4.9/gcc/config/arm/linux-gas.h
@@ -15,8 +15,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* This is how we tell the assembler that a symbol is weak.
diff --git a/gcc-4.9/gcc/config/arm/linux-grte.h b/gcc-4.9/gcc/config/arm/linux-grte.h
new file mode 100644
index 000000000..7ee5806b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/linux-grte.h
@@ -0,0 +1,27 @@
+/* Definitions for ARM Linux-based GRTE (Google RunTime Environment).
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Chris Demetriou.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#undef SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS LINUX_GRTE_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/arm/neon-docgen.ml b/gcc-4.9/gcc/config/arm/neon-docgen.ml
deleted file mode 100644
index 5788a533e..000000000
--- a/gcc-4.9/gcc/config/arm/neon-docgen.ml
+++ /dev/null
@@ -1,424 +0,0 @@
-(* ARM NEON documentation generator.
-
-   Copyright (C) 2006-2014 Free Software Foundation, Inc.
-   Contributed by CodeSourcery.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it under
-   the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 3, or (at your option) any later
-   version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or
-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-   for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.
-
-   This is an O'Caml program.  The O'Caml compiler is available from:
-
-     http://caml.inria.fr/
-
-   Or from your favourite OS's friendly packaging system. Tested with version
-   3.09.2, though other versions will probably work too.
-
-   Compile with:
-     ocamlc -c neon.ml
-     ocamlc -o neon-docgen neon.cmo neon-docgen.ml
-
-   Run with:
-     /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
-*)
-
-open Neon
-
-(* The combined "ops" and "reinterp" table.  *)
-let ops_reinterp = reinterp @ ops
-
-(* Helper functions for extracting things from the "ops" table.  *)
-let single_opcode desired_opcode () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        if opcode = desired_opcode then row :: got_so_far
-                                                   else got_so_far
-                 ) [] ops_reinterp
-
-let multiple_opcodes desired_opcodes () =
-  List.fold_left (fun got_so_far ->
-                  fun desired_opcode ->
-                    (single_opcode desired_opcode ()) @ got_so_far)
-                 [] desired_opcodes
-
-let ldx_opcode number () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        match opcode with
-                          Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
-                            row :: got_so_far
-                          | _ -> got_so_far
-                 ) [] ops_reinterp
-
-let stx_opcode number () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        match opcode with
-                          Vstx n | Vstx_lane n when n = number ->
-                            row :: got_so_far
-                          | _ -> got_so_far
-                 ) [] ops_reinterp
-
-let tbl_opcode () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        match opcode with
-                          Vtbl _ -> row :: got_so_far
-                          | _ -> got_so_far
-                 ) [] ops_reinterp
-
-let tbx_opcode () =
-  List.fold_left (fun got_so_far ->
-                  fun row ->
-                    match row with
-                      (opcode, _, _, _, _, _) ->
-                        match opcode with
-                          Vtbx _ -> row :: got_so_far
-                          | _ -> got_so_far
-                 ) [] ops_reinterp
-
-(* The groups of intrinsics.  *)
-let intrinsic_groups =
-  [ "Addition", single_opcode Vadd;
-    "Multiplication", single_opcode Vmul;
-    "Multiply-accumulate", single_opcode Vmla;
-    "Multiply-subtract", single_opcode Vmls;
-    "Fused-multiply-accumulate", single_opcode Vfma;
-    "Fused-multiply-subtract", single_opcode Vfms;
-    "Round to integral (to nearest, ties to even)", single_opcode Vrintn;
-    "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta;
-    "Round to integral (towards +Inf)", single_opcode Vrintp;
-    "Round to integral (towards -Inf)", single_opcode Vrintm;
-    "Round to integral (towards 0)", single_opcode Vrintz;
-    "Subtraction", single_opcode Vsub;
-    "Comparison (equal-to)", single_opcode Vceq;
-    "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
-    "Comparison (less-than-or-equal-to)", single_opcode Vcle;
-    "Comparison (greater-than)", single_opcode Vcgt;
-    "Comparison (less-than)", single_opcode Vclt;
-    "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
-    "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
-    "Comparison (absolute greater-than)", single_opcode Vcagt;
-    "Comparison (absolute less-than)", single_opcode Vcalt;
-    "Test bits", single_opcode Vtst;
-    "Absolute difference", single_opcode Vabd;
-    "Absolute difference and accumulate", single_opcode Vaba;
-    "Maximum", single_opcode Vmax;
-    "Minimum", single_opcode Vmin;
-    "Pairwise add", single_opcode Vpadd;
-    "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
-    "Folding maximum", single_opcode Vpmax;
-    "Folding minimum", single_opcode Vpmin;
-    "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
-    "Vector shift left", single_opcode Vshl;
-    "Vector shift left by constant", single_opcode Vshl_n;
-    "Vector shift right by constant", single_opcode Vshr_n;
-    "Vector shift right by constant and accumulate", single_opcode Vsra_n;
-    "Vector shift right and insert", single_opcode Vsri;
-    "Vector shift left and insert", single_opcode Vsli;
-    "Absolute value", single_opcode Vabs;
-    "Negation", single_opcode Vneg;
-    "Bitwise not", single_opcode Vmvn;
-    "Count leading sign bits", single_opcode Vcls;
-    "Count leading zeros", single_opcode Vclz;
-    "Count number of set bits", single_opcode Vcnt;
-    "Reciprocal estimate", single_opcode Vrecpe;
-    "Reciprocal square-root estimate", single_opcode Vrsqrte;
-    "Get lanes from a vector", single_opcode Vget_lane;
-    "Set lanes in a vector", single_opcode Vset_lane;
-    "Create vector from literal bit pattern", single_opcode Vcreate;
-    "Set all lanes to the same value",
-      multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
-    "Combining vectors", single_opcode Vcombine;
-    "Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
-    "Conversions", multiple_opcodes [Vcvt; Vcvt_n];
-    "Move, single_opcode narrowing", single_opcode Vmovn;
-    "Move, single_opcode long", single_opcode Vmovl;
-    "Table lookup", tbl_opcode;
-    "Extended table lookup", tbx_opcode;
-    "Multiply, lane", single_opcode Vmul_lane;
-    "Long multiply, lane", single_opcode Vmull_lane;
-    "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
-    "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
-    "Multiply-accumulate, lane", single_opcode Vmla_lane;
-    "Multiply-subtract, lane", single_opcode Vmls_lane;
-    "Vector multiply by scalar", single_opcode Vmul_n;
-    "Vector long multiply by scalar", single_opcode Vmull_n;
-    "Vector saturating doubling long multiply by scalar",
-      single_opcode Vqdmull_n;
-    "Vector saturating doubling multiply high by scalar",
-      single_opcode Vqdmulh_n;
-    "Vector multiply-accumulate by scalar", single_opcode Vmla_n;
-    "Vector multiply-subtract by scalar", single_opcode Vmls_n;
-    "Vector extract", single_opcode Vext;
-    "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
-    "Bit selection", single_opcode Vbsl;
-    "Transpose elements", single_opcode Vtrn;
-    "Zip elements", single_opcode Vzip;
-    "Unzip elements", single_opcode Vuzp;
-    "Element/structure loads, VLD1 variants", ldx_opcode 1;
-    "Element/structure stores, VST1 variants", stx_opcode 1;
-    "Element/structure loads, VLD2 variants", ldx_opcode 2;
-    "Element/structure stores, VST2 variants", stx_opcode 2;
-    "Element/structure loads, VLD3 variants", ldx_opcode 3;
-    "Element/structure stores, VST3 variants", stx_opcode 3;
-    "Element/structure loads, VLD4 variants", ldx_opcode 4;
-    "Element/structure stores, VST4 variants", stx_opcode 4;
-    "Logical operations (AND)", single_opcode Vand;
-    "Logical operations (OR)", single_opcode Vorr;
-    "Logical operations (exclusive OR)", single_opcode Veor;
-    "Logical operations (AND-NOT)", single_opcode Vbic;
-    "Logical operations (OR-NOT)", single_opcode Vorn;
-    "Reinterpret casts", single_opcode Vreinterp ]
-
-(* Given an intrinsic shape, produce a string to document the corresponding
-   operand shapes.  *)
-let rec analyze_shape shape =
-  let rec n_things n thing =
-    match n with
-      0 -> []
-    | n -> thing :: (n_things (n - 1) thing)
-  in
-  let rec analyze_shape_elt reg_no elt =
-    match elt with
-      Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
-    | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
-    | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
-    | Immed -> "#@var{0}"
-    | VecArray (1, elt) ->
-        let elt_regexp = analyze_shape_elt 0 elt in
-          "@{" ^ elt_regexp ^ "@}"
-    | VecArray (n, elt) ->
-      let rec f m =
-        match m with
-          0 -> []
-        | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
-      in
-      let ops = List.rev (f n) in
-        "@{" ^ (commas (fun x -> x) ops "") ^ "@}"
-    | (PtrTo elt | CstPtrTo elt) ->
-      "[" ^ (analyze_shape_elt reg_no elt) ^ "]"
-    | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
-    | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
-    | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
-    | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts))
-  in
-    match shape with
-      All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
-    | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
-              ", " ^ (analyze_shape_elt 0 Dreg)
-    | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
-              (analyze_shape_elt 0 elt)
-    | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
-              ", " ^ (analyze_shape_elt 0 Dreg)
-    | Wide_noreg elt -> analyze_shape (Long_noreg elt)
-    | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
-                ", " ^ (analyze_shape_elt 0 Qreg)
-    | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
-    | By_scalar Dreg ->
-        analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
-    | By_scalar Qreg ->
-        analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
-    | By_scalar _ -> assert false
-    | Wide_lane ->
-        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
-    | Wide_scalar ->
-        analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
-    | Pair_result elt ->
-      let elt_regexp = analyze_shape_elt 0 elt in
-      let elt_regexp' = analyze_shape_elt 1 elt in
-        elt_regexp ^ ", " ^ elt_regexp'
-    | Unary_scalar _ -> "FIXME Unary_scalar"
-    | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
-    | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
-    | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
-
-(* Document a single intrinsic.  *)
-let describe_intrinsic first chan
-                       (elt_ty, (_, features, shape, name, munge, _)) =
-  let c_arity, new_elt_ty = munge shape elt_ty in
-  let c_types = strings_of_arity c_arity in
-  Printf.fprintf chan "@itemize @bullet\n";
-  let item_code = if first then "@item" else "@itemx" in
-    Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
-                   (intrinsic_name name) (string_of_elt elt_ty);
-    Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
-    if not (List.exists (fun feature -> feature = No_op) features) then
-    begin
-      let print_one_insn name =
-        Printf.fprintf chan "@code{";
-        let no_suffix = (new_elt_ty = NoElts) in
-        let name_with_suffix =
-          if no_suffix then name
-          else name ^ "." ^ (string_of_elt_dots new_elt_ty)
-        in
-        let possible_operands = analyze_all_shapes features shape
-                                                   analyze_shape
-        in
-	let rec print_one_possible_operand op =
-	  Printf.fprintf chan "%s %s}" name_with_suffix op
-        in
-          (* If the intrinsic expands to multiple instructions, we assume
-             they are all of the same form.  *)
-          print_one_possible_operand (List.hd possible_operands)
-      in
-      let rec print_insns names =
-        match names with
-          [] -> ()
-        | [name] -> print_one_insn name
-        | name::names -> (print_one_insn name;
-                          Printf.fprintf chan " @emph{or} ";
-                          print_insns names)
-      in
-      let insn_names = get_insn_names features name in
-        Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
-        print_insns insn_names;
-        Printf.fprintf chan "\n"
-    end;
-    Printf.fprintf chan "@end itemize\n";
-    Printf.fprintf chan "\n\n"
-
-(* Document a group of intrinsics.  *)
-let document_group chan (group_title, group_extractor) =
-  (* Extract the rows in question from the ops table and then turn them
-     into a list of intrinsics.  *)
-  let intrinsics =
-    List.fold_left (fun got_so_far ->
-                    fun row ->
-                      match row with
-                        (_, _, _, _, _, elt_tys) ->
-                          List.fold_left (fun got_so_far' ->
-                                          fun elt_ty ->
-                                            (elt_ty, row) :: got_so_far')
-                                         got_so_far elt_tys
-                   ) [] (group_extractor ())
-  in
-    (* Emit the title for this group.  *)
-    Printf.fprintf chan "@subsubsection %s\n\n" group_title;
-    (* Emit a description of each intrinsic.  *)
-    List.iter (describe_intrinsic true chan) intrinsics;
-    (* Close this group.  *)
-    Printf.fprintf chan "\n\n"
-
-let gnu_header chan =
-  List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
-  "@c Copyright (C) 2006-2014 Free Software Foundation, Inc.";
-  "@c This is part of the GCC manual.";
-  "@c For copying conditions, see the file gcc.texi.";
-  "";
-  "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
-  "@c Please do not edit manually."]
-
-let crypto_doc =
-"
-@itemize @bullet
-@item poly128_t vldrq_p128(poly128_t const *)
-@end itemize
-
-@itemize @bullet
-@item void vstrq_p128(poly128_t *, poly128_t)
-@end itemize
-
-@itemize @bullet
-@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
-@end itemize
-
-@itemize @bullet
-@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
-@end itemize
-
-@itemize @bullet
-@item uint32_t vsha1h_u32 (uint32_t)
-@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
- 
-@itemize @bullet
-@item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
- 
-@itemize @bullet
-@item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}}
-@end itemize
- 
-@itemize @bullet
-@item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item poly128_t vmull_p64 (poly64_t a, poly64_t b)
-@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
-@end itemize
-
-@itemize @bullet
-@item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b)
-@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
-@end itemize
-"
-
-(* Program entry point.  *)
-let _ =
-  if Array.length Sys.argv <> 2 then
-    failwith "Usage: neon-docgen <output filename>"
-  else
-  let file = Sys.argv.(1) in
-    try
-      let chan = open_out file in
-        gnu_header chan;
-        List.iter (document_group chan) intrinsic_groups;
-        Printf.fprintf chan "%s\n" crypto_doc;
-        close_out chan
-    with Sys_error sys ->
-      failwith ("Could not create output file " ^ file ^ ": " ^ sys)
diff --git a/gcc-4.9/gcc/config/arm/neon-gen.ml b/gcc-4.9/gcc/config/arm/neon-gen.ml
deleted file mode 100644
index f3dd86b0a..000000000
--- a/gcc-4.9/gcc/config/arm/neon-gen.ml
+++ /dev/null
@@ -1,520 +0,0 @@
-(* Auto-generate ARM Neon intrinsics header file.
-   Copyright (C) 2006-2014 Free Software Foundation, Inc.
-   Contributed by CodeSourcery.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it under
-   the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 3, or (at your option) any later
-   version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or
-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-   for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.
-
-   This is an O'Caml program.  The O'Caml compiler is available from:
-
-     http://caml.inria.fr/
-
-   Or from your favourite OS's friendly packaging system. Tested with version
-   3.09.2, though other versions will probably work too.
-
-   Compile with:
-     ocamlc -c neon.ml
-     ocamlc -o neon-gen neon.cmo neon-gen.ml
-
-   Run with:
-     ./neon-gen > arm_neon.h
-*)
-
-open Neon
-
-(* The format codes used in the following functions are documented at:
-     http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\
-     #6_printflikefunctionsforprettyprinting
-   (one line, remove the backslash.)
-*)
-
-(* Following functions can be used to approximate GNU indentation style.  *)
-let start_function () =
-  Format.printf "@[<v 0>";
-  ref 0
-
-let end_function nesting =
-  match !nesting with
-    0 -> Format.printf "@;@;@]"
-  | _ -> failwith ("Bad nesting (ending function at level "
-                   ^ (string_of_int !nesting) ^ ")")
-
-let open_braceblock nesting =
-  begin match !nesting with
-    0 -> Format.printf "@,@<0>{@[<v 2>@,"
-  | _ -> Format.printf "@,@[<v 2>  @<0>{@[<v 2>@,"
-  end;
-  incr nesting
-
-let close_braceblock nesting =
-  decr nesting;
-  match !nesting with
-    0 -> Format.printf "@]@,@<0>}"
-  | _ -> Format.printf "@]@,@<0>}@]"
-
-let print_function arity fnname body =
-  let ffmt = start_function () in
-  Format.printf "__extension__ static __inline ";
-  let inl = "__attribute__ ((__always_inline__))" in
-  begin match arity with
-    Arity0 ret ->
-      Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname
-  | Arity1 (ret, arg0) ->
-      Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname
-                                        (string_of_vectype arg0)
-  | Arity2 (ret, arg0, arg1) ->
-      Format.printf "%s %s@,%s (%s __a, %s __b)"
-        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
-	(string_of_vectype arg1)
-  | Arity3 (ret, arg0, arg1, arg2) ->
-      Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)"
-        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
-	(string_of_vectype arg1) (string_of_vectype arg2)
-  | Arity4 (ret, arg0, arg1, arg2, arg3) ->
-      Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)"
-        (string_of_vectype ret) inl fnname (string_of_vectype arg0)
-	(string_of_vectype arg1) (string_of_vectype arg2)
-        (string_of_vectype arg3)
-  end;
-  open_braceblock ffmt;
-  let rec print_lines = function
-    []       -> ()
-  | "" :: lines -> print_lines lines
-  | [line] -> Format.printf "%s" line
-  | line::lines -> Format.printf "%s@," line ; print_lines lines in
-  print_lines body;
-  close_braceblock ffmt;
-  end_function ffmt
-
-let union_string num elts base =
-  let itype = inttype_for_array num elts in
-  let iname = string_of_inttype itype
-  and sname = string_of_vectype (T_arrayof (num, elts)) in
-  Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base
-
-let rec signed_ctype = function
-    T_uint8x8 | T_poly8x8 -> T_int8x8
-  | T_uint8x16 | T_poly8x16 -> T_int8x16
-  | T_uint16x4 | T_poly16x4 -> T_int16x4
-  | T_uint16x8 | T_poly16x8 -> T_int16x8
-  | T_uint32x2 -> T_int32x2
-  | T_uint32x4 -> T_int32x4
-  | T_uint64x1 -> T_int64x1
-  | T_uint64x2 -> T_int64x2
-  | T_poly64x2 -> T_int64x2
-  (* Cast to types defined by mode in arm.c, not random types pulled in from
-     the <stdint.h> header in use. This fixes incompatible pointer errors when
-     compiling with C++.  *)
-  | T_uint8 | T_int8 -> T_intQI
-  | T_uint16 | T_int16 -> T_intHI
-  | T_uint32 | T_int32 -> T_intSI
-  | T_uint64 | T_int64 -> T_intDI
-  | T_float16 -> T_floatHF
-  | T_float32 -> T_floatSF
-  | T_poly8 -> T_intQI
-  | T_poly16 -> T_intHI
-  | T_poly64 -> T_intDI
-  | T_poly128 -> T_intTI
-  | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
-  | T_ptrto elt -> T_ptrto (signed_ctype elt)
-  | T_const elt -> T_const (signed_ctype elt)
-  | x -> x
-
-let add_cast ctype cval =
-  let stype = signed_ctype ctype in
-  if ctype <> stype then
-    Printf.sprintf "(%s) %s" (string_of_vectype stype) cval
-  else
-    cval
-
-let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")"
-
-(* Return a tuple of a list of declarations to go at the start of the function,
-   and a list of statements needed to return THING.  *)
-let return arity thing =
-  match arity with
-    Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
-  | Arity4 (ret, _, _, _, _) ->
-      begin match ret with
-	T_arrayof (num, vec) ->
-          let uname = union_string num vec "__rv" in
-          [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"]
-      | T_void ->
-	  [], [thing ^ ";"]
-      | _ ->
-	  [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"]
-      end
-
-let mask_shape_for_shuffle = function
-    All (num, reg) -> All (num, reg)
-  | Pair_result reg -> All (2, reg)
-  | _ -> failwith "mask_for_shuffle"
-
-let mask_elems shuffle shape elttype part =
-  let elem_size = elt_width elttype in
-  let num_elems =
-    match regmap shape 0 with
-      Dreg -> 64 / elem_size
-    | Qreg -> 128 / elem_size
-    | _ -> failwith "mask_elems" in
-  shuffle elem_size num_elems part
-
-(* Return a tuple of a list of declarations 0and a list of statements needed
-   to implement an intrinsic using __builtin_shuffle.  SHUFFLE is a function
-   which returns a list of elements suitable for using as a mask.  *)
-
-let shuffle_fn shuffle shape arity elttype =
-  let mshape = mask_shape_for_shuffle shape in
-  let masktype = type_for_elt mshape (unsigned_of_elt elttype) 0 in
-  let masktype_str = string_of_vectype masktype in
-  let shuffle_res = type_for_elt mshape elttype 0 in
-  let shuffle_res_str = string_of_vectype shuffle_res in
-  match arity with
-    Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
-  | Arity4 (ret, _, _, _, _) ->
-      begin match ret with
-        T_arrayof (num, vec) ->
-	  let elems1 = mask_elems shuffle mshape elttype `lo
-	  and elems2 = mask_elems shuffle mshape elttype `hi in
-	  let mask1 = (String.concat ", " (List.map string_of_int elems1))
-	  and mask2 = (String.concat ", " (List.map string_of_int elems2)) in
-	  let shuf1 = Printf.sprintf
-	    "__rv.val[0] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
-	    shuffle_res_str masktype_str mask1
-	  and shuf2 = Printf.sprintf
-	    "__rv.val[1] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
-	    shuffle_res_str masktype_str mask2 in
-	  [Printf.sprintf "%s __rv;" (string_of_vectype ret);],
-	  [shuf1; shuf2; "return __rv;"]
-      | _ ->
-          let elems = mask_elems shuffle mshape elttype `lo in
-          let mask =  (String.concat ", " (List.map string_of_int elems)) in
-	  let shuf = Printf.sprintf
-	    "return (%s) __builtin_shuffle (__a, (%s) { %s });" shuffle_res_str masktype_str mask in
-	  [""],
-	  [shuf]
-      end
-
-let rec element_type ctype =
-  match ctype with
-    T_arrayof (_, v) -> element_type v
-  | _ -> ctype
-
-let params ps =
-  let pdecls = ref [] in
-  let ptype t p =
-    match t with
-      T_arrayof (num, elts) ->
-        let uname = union_string num elts (p ^ "u") in
-        let decl = Printf.sprintf "%s = { %s };" uname p in
-        pdecls := decl :: !pdecls;
-        p ^ "u.__o"
-    | _ -> add_cast t p in
-  let plist = match ps with
-    Arity0 _ -> []
-  | Arity1 (_, t1) -> [ptype t1 "__a"]
-  | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"]
-  | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"]
-  | Arity4 (_, t1, t2, t3, t4) ->
-      [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in
-  !pdecls, plist
-
-let modify_params features plist =
-  let is_flipped =
-    List.exists (function Flipped _ -> true | _ -> false) features in
-  if is_flipped then
-    match plist with
-      [ a; b ] -> [ b; a ]
-    | _ ->
-      failwith ("Don't know how to flip args " ^ (String.concat ", " plist))
-  else
-    plist
-
-(* !!! Decide whether to add an extra information word based on the shape
-   form.  *)
-let extra_word shape features paramlist bits =
-  let use_word =
-    match shape with
-      All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow
-    | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm
-    | Narrow_imm -> true
-    | _ -> List.mem InfoWord features
-  in
-    if use_word then
-      paramlist @ [string_of_int bits]
-    else
-      paramlist
-
-(* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0).
-   Bit 1 represents floats & polynomials (1), or ordinary integers (0).
-   Bit 2 represents rounding (1) vs none (0).  *)
-let infoword_value elttype features =
-  let bits01 =
-    match elt_class elttype with
-      Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001
-    | Poly -> 0b010
-    | Float -> 0b011
-    | _ -> 0b000
-  and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in
-  bits01 lor rounding_bit
-
-(* "Cast" type operations will throw an exception in mode_of_elt (actually in
-   elt_width, called from there). Deal with that here, and generate a suffix
-   with multiple modes (<to><from>).  *)
-let rec mode_suffix elttype shape =
-  try
-    let mode = mode_of_elt elttype shape in
-    string_of_mode mode
-  with MixedMode (dst, src) ->
-    let dstmode = mode_of_elt ~argpos:0 dst shape
-    and srcmode = mode_of_elt ~argpos:1 src shape in
-    string_of_mode dstmode ^ string_of_mode srcmode
-
-let get_shuffle features =
-  try
-    match List.find (function Use_shuffle _ -> true | _ -> false) features with
-      Use_shuffle fn -> Some fn
-    | _ -> None
-  with Not_found -> None
-
-let print_feature_test_start features =
-  try
-    match List.find (fun feature ->
-                       match feature with Requires_feature _ -> true
-                                        | Requires_arch _ -> true
-                                        | Requires_FP_bit _ -> true
-                                        | _ -> false)
-                     features with
-      Requires_feature feature ->
-        Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
-    | Requires_arch arch ->
-        Format.printf "#if __ARM_ARCH >= %d@\n" arch
-    | Requires_FP_bit bit ->
-        Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
-                      (1 lsl bit)
-    | _ -> assert false
-  with Not_found -> assert true
-
-let print_feature_test_end features =
-  let feature =
-    List.exists (function Requires_feature _ -> true
-                          | Requires_arch _ -> true
-                          | Requires_FP_bit _ -> true
-                          |  _ -> false) features in
-  if feature then Format.printf "#endif@\n"
-
-
-let print_variant opcode features shape name (ctype, asmtype, elttype) =
-  let bits = infoword_value elttype features in
-  let modesuf = mode_suffix elttype shape in
-  let pdecls, paramlist = params ctype in
-  let rdecls, stmts =
-    match get_shuffle features with
-      Some shuffle -> shuffle_fn shuffle shape ctype elttype
-    | None ->
-	let paramlist' = modify_params features paramlist in
-	let paramlist'' = extra_word shape features paramlist' bits in
-	let parstr = String.concat ", " paramlist'' in
-	let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)"
-                	(builtin_name features name) modesuf parstr in
-	return ctype builtin in
-  let body = pdecls @ rdecls @ stmts
-  and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
-  begin
-    print_feature_test_start features;
-    print_function ctype fnname body;
-    print_feature_test_end features;
-  end
-
-(* When this function processes the element types in the ops table, it rewrites
-   them in a list of tuples (a,b,c):
-     a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8)
-     b : Asm type : a single, processed element type, e.g. P16. This is the
-         type which should be attached to the asm opcode.
-     c : Variant type : the unprocessed type for this variant (e.g. in add
-         instructions which don't care about the sign, b might be i16 and c
-         might be s16.)
-*)
-
-let print_op (opcode, features, shape, name, munge, types) =
-  let sorted_types = List.sort compare types in
-  let munged_types = List.map
-    (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in
-  List.iter
-    (fun variant -> print_variant opcode features shape name variant)
-    munged_types
-
-let print_ops ops =
-  List.iter print_op ops
-
-(* Output type definitions. Table entries are:
-     cbase : "C" name for the type.
-     abase : "ARM" base name for the type (i.e. int in int8x8_t).
-     esize : element size.
-     enum : element count.
-     alevel: architecture level at which available.
-*)
-
-type fpulevel = CRYPTO | ALL
-
-let deftypes () =
-  let typeinfo = [
-    (* Doubleword vector types.  *)
-    "__builtin_neon_qi", "int", 8, 8, ALL;
-    "__builtin_neon_hi", "int", 16, 4, ALL;
-    "__builtin_neon_si", "int", 32, 2, ALL;
-    "__builtin_neon_di", "int", 64, 1, ALL;
-    "__builtin_neon_hf", "float", 16, 4, ALL;
-    "__builtin_neon_sf", "float", 32, 2, ALL;
-    "__builtin_neon_poly8", "poly", 8, 8, ALL;
-    "__builtin_neon_poly16", "poly", 16, 4, ALL;
-    "__builtin_neon_poly64", "poly", 64, 1, CRYPTO;
-    "__builtin_neon_uqi", "uint", 8, 8, ALL;
-    "__builtin_neon_uhi", "uint", 16, 4, ALL;
-    "__builtin_neon_usi", "uint", 32, 2, ALL;
-    "__builtin_neon_udi", "uint", 64, 1, ALL;
-
-    (* Quadword vector types.  *)
-    "__builtin_neon_qi", "int", 8, 16, ALL;
-    "__builtin_neon_hi", "int", 16, 8, ALL;
-    "__builtin_neon_si", "int", 32, 4, ALL;
-    "__builtin_neon_di", "int", 64, 2, ALL;
-    "__builtin_neon_sf", "float", 32, 4, ALL;
-    "__builtin_neon_poly8", "poly", 8, 16, ALL;
-    "__builtin_neon_poly16", "poly", 16, 8, ALL;
-    "__builtin_neon_poly64", "poly", 64, 2, CRYPTO;
-    "__builtin_neon_uqi", "uint", 8, 16, ALL;
-    "__builtin_neon_uhi", "uint", 16, 8, ALL;
-    "__builtin_neon_usi", "uint", 32, 4, ALL;
-    "__builtin_neon_udi", "uint", 64, 2, ALL
-  ] in
-  List.iter
-    (fun (cbase, abase, esize, enum, fpulevel) ->
-      let attr =
-        match enum with
-          1 -> ""
-        | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))"
-                              (esize * enum / 8) in
-      if fpulevel == CRYPTO then
-        Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n";
-      Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr;
-      if fpulevel == CRYPTO then
-        Format.printf "#endif\n";)
-    typeinfo;
-  Format.print_newline ();
-  (* Extra types not in <stdint.h>.  *)
-  Format.printf "typedef float float32_t;\n";
-  Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
-  Format.printf "typedef __builtin_neon_poly16 poly16_t;\n";
-  Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n";
-  Format.printf "typedef __builtin_neon_poly64 poly64_t;\n";
-  Format.printf "typedef __builtin_neon_poly128 poly128_t;\n";
-  Format.printf "#endif\n"
-
-(* Output structs containing arrays, for load & store instructions etc.
-   poly128_t is deliberately not included here because it has no array types
-   defined for it.  *)
-
-let arrtypes () =
-  let typeinfo = [
-    "int", 8, ALL;    "int", 16, ALL;
-    "int", 32, ALL;   "int", 64, ALL;
-    "uint", 8, ALL;   "uint", 16, ALL;
-    "uint", 32, ALL;  "uint", 64, ALL;
-    "float", 32, ALL; "poly", 8, ALL;
-    "poly", 16, ALL; "poly", 64, CRYPTO
-  ] in
-  let writestruct elname elsize regsize arrsize fpulevel =
-    let elnum = regsize / elsize in
-    let structname =
-      Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in
-    let sfmt = start_function () in
-    Format.printf "%stypedef struct %s"
-      (if fpulevel == CRYPTO then "#ifdef __ARM_FEATURE_CRYPTO\n" else "") structname;
-    open_braceblock sfmt;
-    Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize;
-    close_braceblock sfmt;
-    Format.printf " %s;%s" structname (if fpulevel == CRYPTO then "\n#endif\n" else "");
-    end_function sfmt;
-  in
-    for n = 2 to 4 do
-      List.iter
-        (fun (elname, elsize, alevel) ->
-          writestruct elname elsize 64 n alevel;
-          writestruct elname elsize 128 n alevel)
-        typeinfo
-    done
-
-let print_lines = List.iter (fun s -> Format.printf "%s@\n" s)
-
-(* Do it.  *)
-
-let _ =
-  print_lines [
-"/* ARM NEON intrinsics include file. This file is generated automatically";
-"   using neon-gen.ml.  Please do not edit manually.";
-"";
-"   Copyright (C) 2006-2014 Free Software Foundation, Inc.";
-"   Contributed by CodeSourcery.";
-"";
-"   This file is part of GCC.";
-"";
-"   GCC is free software; you can redistribute it and/or modify it";
-"   under the terms of the GNU General Public License as published";
-"   by the Free Software Foundation; either version 3, or (at your";
-"   option) any later version.";
-"";
-"   GCC is distributed in the hope that it will be useful, but WITHOUT";
-"   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
-"   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public";
-"   License for more details.";
-"";
-"   Under Section 7 of GPL version 3, you are granted additional";
-"   permissions described in the GCC Runtime Library Exception, version";
-"   3.1, as published by the Free Software Foundation.";
-"";
-"   You should have received a copy of the GNU General Public License and";
-"   a copy of the GCC Runtime Library Exception along with this program;";
-"   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see";
-"   <http://www.gnu.org/licenses/>.  */";
-"";
-"#ifndef _GCC_ARM_NEON_H";
-"#define _GCC_ARM_NEON_H 1";
-"";
-"#ifndef __ARM_NEON__";
-"#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h";
-"#else";
-"";
-"#ifdef __cplusplus";
-"extern \"C\" {";
-"#endif";
-"";
-"#include <stdint.h>";
-""];
-  deftypes ();
-  arrtypes ();
-  Format.print_newline ();
-  print_ops ops;
-  Format.print_newline ();
-  print_ops reinterp;
-  print_ops reinterpq;
-  Format.printf "%s" crypto_intrinsics;
-  print_lines [
-"#ifdef __cplusplus";
-"}";
-"#endif";
-"#endif";
-"#endif"]
diff --git a/gcc-4.9/gcc/config/arm/netbsd-elf.h b/gcc-4.9/gcc/config/arm/netbsd-elf.h
index 9deda9679..645551fdf 100644
--- a/gcc-4.9/gcc/config/arm/netbsd-elf.h
+++ b/gcc-4.9/gcc/config/arm/netbsd-elf.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* Run-time Target Specification.  */
diff --git a/gcc-4.9/gcc/config/arm/uclinux-eabi.h b/gcc-4.9/gcc/config/arm/uclinux-eabi.h
index b5055ce40..4d57d1388 100644
--- a/gcc-4.9/gcc/config/arm/uclinux-eabi.h
+++ b/gcc-4.9/gcc/config/arm/uclinux-eabi.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* Override settings that are different to the uclinux-elf or
diff --git a/gcc-4.9/gcc/config/arm/uclinux-elf.h b/gcc-4.9/gcc/config/arm/uclinux-elf.h
index 5cd4fe527..43edba70c 100644
--- a/gcc-4.9/gcc/config/arm/uclinux-elf.h
+++ b/gcc-4.9/gcc/config/arm/uclinux-elf.h
@@ -14,8 +14,13 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
 /* We don't want a PLT.  */
diff --git a/gcc-4.9/gcc/config/arm/unspecs.md b/gcc-4.9/gcc/config/arm/unspecs.md
index 87edf18e2..db3fea5f5 100644
--- a/gcc-4.9/gcc/config/arm/unspecs.md
+++ b/gcc-4.9/gcc/config/arm/unspecs.md
@@ -58,6 +58,7 @@
                         ; instruction stream.
   UNSPEC_PIC_OFFSET     ; A symbolic 12-bit OFFSET that has been treated
                         ; correctly for PIC usage.
+  UNSPEC_GOT_PREL_SYM   ; Specify an R_ARM_GOT_PREL relocation of a symbol.
   UNSPEC_GOTSYM_OFF     ; The offset of the start of the GOT from a
                         ; a given symbolic address.
   UNSPEC_THUMB1_CASESI  ; A Thumb1 compressed dispatch-table call.
@@ -70,6 +71,11 @@
 			; that.
   UNSPEC_UNALIGNED_STORE ; Same for str/strh.
   UNSPEC_PIC_UNIFIED    ; Create a common pic addressing form.
+  UNSPEC_PROLOGUE_USE   ; As USE insns are not meaningful after reload,
+                        ; this unspec is used to prevent the deletion of
+                        ; instructions setting registers for EH handling
+                        ; and stack frame generation.  Operand 0 is the
+                        ; register to "use".
   UNSPEC_LL		; Represent an unpaired load-register-exclusive.
   UNSPEC_VRINTZ         ; Represent a float to integral float rounding
                         ; towards zero.
@@ -83,11 +89,12 @@
                         ; FPSCR rounding mode and signal inexactness.
   UNSPEC_VRINTA         ; Represent a float to integral float rounding
                         ; towards nearest, ties away from zero.
-  UNSPEC_GOT_PREL_SYM   ; Specify an R_ARM_GOT_PREL relocation of a symbol
 ])
 
 (define_c_enum "unspec" [
   UNSPEC_WADDC		; Used by the intrinsic form of the iWMMXt WADDC instruction.
+  UNSPEC_WMADDS         ; Used by the intrinsic form of the iWMMXt WMADDS instruction.
+  UNSPEC_WMADDU         ; Used by the intrinsic form of the iWMMXt WMADDU instruction.
   UNSPEC_WABS		; Used by the intrinsic form of the iWMMXt WABS instruction.
   UNSPEC_WQMULWMR	; Used by the intrinsic form of the iWMMXt WQMULWMR instruction.
   UNSPEC_WQMULMR	; Used by the intrinsic form of the iWMMXt WQMULMR instruction.
diff --git a/gcc-4.9/gcc/config/arm/vxworks.h b/gcc-4.9/gcc/config/arm/vxworks.h
index 8bef16bc4..faf84724d 100644
--- a/gcc-4.9/gcc/config/arm/vxworks.h
+++ b/gcc-4.9/gcc/config/arm/vxworks.h
@@ -17,8 +17,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3.  If not see
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */