aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/gcc/config')
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h59
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-linux.h35
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-modes.def1
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-simd.md461
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64.c20
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64.md28
-rw-r--r--gcc-4.9/gcc/config/aarch64/arm_neon.h50
-rw-r--r--gcc-4.9/gcc/config/aarch64/iterators.md16
-rw-r--r--gcc-4.9/gcc/config/alpha/alpha.c6
-rw-r--r--gcc-4.9/gcc/config/arm/aout.h9
-rw-r--r--gcc-4.9/gcc/config/arm/arm-cores.def9
-rw-r--r--gcc-4.9/gcc/config/arm/arm-opts.h9
-rw-r--r--gcc-4.9/gcc/config/arm/arm.c13
-rw-r--r--gcc-4.9/gcc/config/arm/arm.h9
-rw-r--r--gcc-4.9/gcc/config/arm/arm.md13
-rw-r--r--gcc-4.9/gcc/config/arm/arm_neon.h608
-rw-r--r--gcc-4.9/gcc/config/arm/bpabi.h9
-rw-r--r--gcc-4.9/gcc/config/arm/elf.h9
-rw-r--r--gcc-4.9/gcc/config/arm/linux-eabi.h5
-rw-r--r--gcc-4.9/gcc/config/arm/linux-elf.h11
-rw-r--r--gcc-4.9/gcc/config/arm/linux-gas.h9
-rw-r--r--gcc-4.9/gcc/config/arm/linux-grte.h27
-rw-r--r--gcc-4.9/gcc/config/arm/neon-docgen.ml424
-rw-r--r--gcc-4.9/gcc/config/arm/neon-gen.ml520
-rw-r--r--gcc-4.9/gcc/config/arm/netbsd-elf.h9
-rw-r--r--gcc-4.9/gcc/config/arm/uclinux-eabi.h9
-rw-r--r--gcc-4.9/gcc/config/arm/uclinux-elf.h9
-rw-r--r--gcc-4.9/gcc/config/arm/unspecs.md9
-rw-r--r--gcc-4.9/gcc/config/arm/vxworks.h9
-rw-r--r--gcc-4.9/gcc/config/avr/avr-fixed.md4
-rw-r--r--gcc-4.9/gcc/config/avr/avr.h14
-rw-r--r--gcc-4.9/gcc/config/avr/avr.md9
-rw-r--r--gcc-4.9/gcc/config/dbx.h9
-rw-r--r--gcc-4.9/gcc/config/i386/driver-i386.c5
-rw-r--r--gcc-4.9/gcc/config/i386/gnu-user.h6
-rw-r--r--gcc-4.9/gcc/config/i386/i386-protos.h11
-rw-r--r--gcc-4.9/gcc/config/i386/i386.c362
-rw-r--r--gcc-4.9/gcc/config/i386/i386.md97
-rw-r--r--gcc-4.9/gcc/config/i386/i386.opt20
-rw-r--r--gcc-4.9/gcc/config/i386/linux.h20
-rw-r--r--gcc-4.9/gcc/config/i386/linux64.h19
-rw-r--r--gcc-4.9/gcc/config/i386/sse.md64
-rw-r--r--gcc-4.9/gcc/config/initfini-array.h9
-rw-r--r--gcc-4.9/gcc/config/linux-grte.h41
-rw-r--r--gcc-4.9/gcc/config/linux.c4
-rw-r--r--gcc-4.9/gcc/config/linux.h17
-rw-r--r--gcc-4.9/gcc/config/msp430/msp430-opts.h32
-rw-r--r--gcc-4.9/gcc/config/msp430/msp430.c117
-rw-r--r--gcc-4.9/gcc/config/msp430/msp430.h4
-rw-r--r--gcc-4.9/gcc/config/msp430/msp430.md32
-rw-r--r--gcc-4.9/gcc/config/msp430/predicates.md4
-rw-r--r--gcc-4.9/gcc/config/newlib-stdint.h9
-rw-r--r--gcc-4.9/gcc/config/rs6000/htm.md2
-rw-r--r--gcc-4.9/gcc/config/rs6000/htmxlintrin.h9
-rw-r--r--gcc-4.9/gcc/config/rs6000/linux-grte.h41
-rw-r--r--gcc-4.9/gcc/config/rs6000/linux64.h6
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000-builtin.def21
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000-protos.h2
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000.c71
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000.h17
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000.md24
-rw-r--r--gcc-4.9/gcc/config/rs6000/sysv4.h11
-rw-r--r--gcc-4.9/gcc/config/rs6000/vsx.md43
-rw-r--r--gcc-4.9/gcc/config/rtems.h9
-rw-r--r--gcc-4.9/gcc/config/sol2-clearcap.map2
-rw-r--r--gcc-4.9/gcc/config/sol2-clearcapv2.map7
-rw-r--r--gcc-4.9/gcc/config/sol2.h11
-rw-r--r--gcc-4.9/gcc/config/sol2.opt4
-rw-r--r--gcc-4.9/gcc/config/t-sol27
-rw-r--r--gcc-4.9/gcc/config/v850/rtems.h9
-rw-r--r--gcc-4.9/gcc/config/v850/v850-opts.h9
-rw-r--r--gcc-4.9/gcc/config/v850/v850.h9
72 files changed, 2183 insertions, 1445 deletions
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h
new file mode 100644
index 000000000..91d235ff1
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h
@@ -0,0 +1,59 @@
+/* Machine description for AArch64 architecture.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_AARCH64_LINUX_ANDROID_H
+#define GCC_AARCH64_LINUX_ANDROID_H
+
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ GNU_USER_TARGET_OS_CPP_BUILTINS(); \
+ ANDROID_TARGET_OS_CPP_BUILTINS(); \
+ } \
+ while (0)
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \
+ LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
+
+#undef CC1_SPEC
+#define CC1_SPEC \
+ LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \
+ GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic"))
+
+#define CC1PLUS_SPEC \
+ LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
+
+#undef LIB_SPEC
+#define LIB_SPEC \
+ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \
+ GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
+
+#endif /* GCC_AARCH64_LINUX_ANDROID_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
index f32d19f16..f8a97c899 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
@@ -21,8 +21,10 @@
#ifndef GCC_AARCH64_LINUX_H
#define GCC_AARCH64_LINUX_H
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
-#define BIONIC_DYNAMIC_LINKER "/system/bin/linker64"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
#define CPP_SPEC "%{pthread:-D_REENTRANT}"
@@ -36,38 +38,13 @@
%{mbig-endian:-EB} %{mlittle-endian:-EL} \
-maarch64linux%{mbig-endian:b}"
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+
#define TARGET_OS_CPP_BUILTINS() \
do \
{ \
GNU_USER_TARGET_OS_CPP_BUILTINS(); \
- ANDROID_TARGET_OS_CPP_BUILTINS(); \
} \
while (0)
-#undef LINK_SPEC
-#define LINK_SPEC \
- LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \
- LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
-
-#undef CC1_SPEC
-#define CC1_SPEC \
- LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \
- GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic"))
-
-#define CC1PLUS_SPEC \
- LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
-
-#undef LIB_SPEC
-#define LIB_SPEC \
- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \
- GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
-
-#undef STARTFILE_SPEC
-#define STARTFILE_SPEC \
- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
-
-#undef ENDFILE_SPEC
-#define ENDFILE_SPEC \
- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
-
#endif /* GCC_AARCH64_LINUX_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
index 1d2cc7679..f9c436948 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
@@ -31,6 +31,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */
VECTOR_MODES (FLOAT, 8); /* V2SF. */
VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */
+VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */
/* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */
INT_MODE (OI, 32);
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
index 73aee2c3d..1f827b57d 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
@@ -934,14 +934,22 @@
[(set_attr "type" "neon_minmax<q>")]
)
-;; Move into low-half clearing high half to 0.
+;; vec_concat gives a new vector with the low elements from operand 1, and
+;; the high elements from operand 2. That is to say, given op1 = { a, b }
+;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
+;; What that means, is that the RTL descriptions of the below patterns
+;; need to change depending on endianness.
-(define_insn "move_lo_quad_<mode>"
+;; Move to the low architectural bits of the register.
+;; On little-endian this is { operand, zeroes }
+;; On big-endian this is { zeroes, operand }
+
+(define_insn "move_lo_quad_internal_<mode>"
[(set (match_operand:VQ 0 "register_operand" "=w,w,w")
(vec_concat:VQ
(match_operand:<VHALF> 1 "register_operand" "w,r,r")
(vec_duplicate:<VHALF> (const_int 0))))]
- "TARGET_SIMD"
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
"@
dup\\t%d0, %1.d[0]
fmov\\t%d0, %1
@@ -952,7 +960,39 @@
(set_attr "length" "4")]
)
-;; Move into high-half.
+(define_insn "move_lo_quad_internal_be_<mode>"
+ [(set (match_operand:VQ 0 "register_operand" "=w,w,w")
+ (vec_concat:VQ
+ (vec_duplicate:<VHALF> (const_int 0))
+ (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "@
+ dup\\t%d0, %1.d[0]
+ fmov\\t%d0, %1
+ dup\\t%d0, %1"
+ [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
+ (set_attr "simd" "yes,*,yes")
+ (set_attr "fp" "*,yes,*")
+ (set_attr "length" "4")]
+)
+
+(define_expand "move_lo_quad_<mode>"
+ [(match_operand:VQ 0 "register_operand")
+ (match_operand:VQ 1 "register_operand")]
+ "TARGET_SIMD"
+{
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
+ else
+ emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
+ DONE;
+}
+)
+
+;; Move operand1 to the high architectural bits of the register, keeping
+;; the low architectural bits of operand2.
+;; For little-endian this is { operand2, operand1 }
+;; For big-endian this is { operand1, operand2 }
(define_insn "aarch64_simd_move_hi_quad_<mode>"
[(set (match_operand:VQ 0 "register_operand" "+w,w")
@@ -961,12 +1001,25 @@
(match_dup 0)
(match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
(match_operand:<VHALF> 1 "register_operand" "w,r")))]
- "TARGET_SIMD"
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
"@
ins\\t%0.d[1], %1.d[0]
ins\\t%0.d[1], %1"
- [(set_attr "type" "neon_ins")
- (set_attr "length" "4")]
+ [(set_attr "type" "neon_ins")]
+)
+
+(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
+ [(set (match_operand:VQ 0 "register_operand" "+w,w")
+ (vec_concat:VQ
+ (match_operand:<VHALF> 1 "register_operand" "w,r")
+ (vec_select:<VHALF>
+ (match_dup 0)
+ (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "@
+ ins\\t%0.d[1], %1.d[0]
+ ins\\t%0.d[1], %1"
+ [(set_attr "type" "neon_ins")]
)
(define_expand "move_hi_quad_<mode>"
@@ -974,9 +1027,13 @@
(match_operand:<VHALF> 1 "register_operand" "")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
- emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
- operands[1], p));
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, BYTES_BIG_ENDIAN);
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
+ operands[1], p));
+ else
+ emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
+ operands[1], p));
DONE;
})
@@ -2321,12 +2378,44 @@
(vec_concat:<VDBL>
(match_operand:VDIC 1 "register_operand" "w")
(match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))]
- "TARGET_SIMD"
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
"mov\\t%0.8b, %1.8b"
[(set_attr "type" "neon_move<q>")]
)
-(define_insn_and_split "aarch64_combine<mode>"
+(define_insn "*aarch64_combinez_be<mode>"
+ [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+ (vec_concat:<VDBL>
+ (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")
+ (match_operand:VDIC 1 "register_operand" "w")))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "mov\\t%0.8b, %1.8b"
+ [(set_attr "type" "neon_move<q>")]
+)
+
+(define_expand "aarch64_combine<mode>"
+ [(match_operand:<VDBL> 0 "register_operand")
+ (match_operand:VDC 1 "register_operand")
+ (match_operand:VDC 2 "register_operand")]
+ "TARGET_SIMD"
+{
+ rtx op1, op2;
+ if (BYTES_BIG_ENDIAN)
+ {
+ op1 = operands[2];
+ op2 = operands[1];
+ }
+ else
+ {
+ op1 = operands[1];
+ op2 = operands[2];
+ }
+ emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
+ DONE;
+}
+)
+
+(define_insn_and_split "aarch64_combine_internal<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=&w")
(vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
(match_operand:VDC 2 "register_operand" "w")))]
@@ -2335,16 +2424,19 @@
"&& reload_completed"
[(const_int 0)]
{
- aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+ if (BYTES_BIG_ENDIAN)
+ aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
+ else
+ aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
DONE;
}
[(set_attr "type" "multiple")]
)
(define_expand "aarch64_simd_combine<mode>"
- [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
- (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
- (match_operand:VDC 2 "register_operand" "w")))]
+ [(match_operand:<VDBL> 0 "register_operand")
+ (match_operand:VDC 1 "register_operand")
+ (match_operand:VDC 2 "register_operand")]
"TARGET_SIMD"
{
emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
@@ -2633,7 +2725,41 @@
;; sq<r>dmulh_lane
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_expand "aarch64_sqdmulh_lane<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_expand "aarch64_sqrdmulh_lane<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "w")
@@ -2649,7 +2775,41 @@
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
+(define_expand "aarch64_sqdmulh_laneq<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCONQ> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmulh_laneq<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_expand "aarch64_sqrdmulh_laneq<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCONQ> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqrdmulh_laneq<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sq<r>dmulh_laneq<mode>_internal"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "w")
@@ -2659,24 +2819,56 @@
VQDMULH))]
"TARGET_SIMD"
"*
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_expand "aarch64_sqdmulh_lane<mode>"
+ [(match_operand:SD_HSI 0 "register_operand" "")
+ (match_operand:SD_HSI 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_expand "aarch64_sqrdmulh_lane<mode>"
+ [(match_operand:SD_HSI 0 "register_operand" "")
+ (match_operand:SD_HSI 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal"
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
(unspec:SD_HSI
[(match_operand:SD_HSI 1 "register_operand" "w")
(vec_select:<VEL>
- (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
VQDMULH))]
"TARGET_SIMD"
"*
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
@@ -2712,7 +2904,31 @@
(sign_extend:<VWIDE>
(vec_duplicate:VD_HSI
(vec_select:<VEL>
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+ ))
+ (const_int 1))))]
+ "TARGET_SIMD"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ return
+ "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (SBINQOPS:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:VD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
))
(const_int 1))))]
@@ -2735,7 +2951,30 @@
(match_operand:SD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+ )
+ (const_int 1))))]
+ "TARGET_SIMD"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ return
+ "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (SBINQOPS:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:SD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
)
(const_int 1))))]
@@ -2752,11 +2991,12 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "0")
(match_operand:VSD_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
@@ -2767,12 +3007,13 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "0")
(match_operand:VSD_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
- emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ emit_insn (gen_aarch64_sqdmlal_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
DONE;
@@ -2782,11 +3023,12 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "0")
(match_operand:VSD_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
@@ -2797,12 +3039,13 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "0")
(match_operand:VSD_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
- emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ emit_insn (gen_aarch64_sqdmlsl_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
DONE;
@@ -2890,7 +3133,33 @@
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(vec_select:<VEL>
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])
+ ))))
+ (const_int 1))))]
+ "TARGET_SIMD"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ return
+ "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (SBINQOPS:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:<VHALF>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))))]
@@ -2907,12 +3176,13 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
@@ -2923,13 +3193,14 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
- emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
DONE;
@@ -2939,12 +3210,13 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
@@ -2955,13 +3227,14 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
- emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
DONE;
@@ -3041,7 +3314,28 @@
(sign_extend:<VWIDE>
(vec_duplicate:VD_HSI
(vec_select:<VEL>
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+ ))
+ (const_int 1)))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:VD_HSI 1 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
(const_int 1)))]
@@ -3061,7 +3355,27 @@
(match_operand:SD_HSI 1 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
+ ))
+ (const_int 1)))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:SD_HSI 1 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))
))
(const_int 1)))]
@@ -3076,11 +3390,12 @@
(define_expand "aarch64_sqdmull_lane<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VSD_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
(match_operand:SI 3 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3]));
DONE;
@@ -3089,12 +3404,13 @@
(define_expand "aarch64_sqdmull_laneq<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VD_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(match_operand:SI 3 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
- emit_insn (gen_aarch64_sqdmull_lane<mode>_internal
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmull_laneq<mode>_internal
(operands[0], operands[1], operands[2], operands[3]));
DONE;
})
@@ -3143,7 +3459,7 @@
(define_expand "aarch64_sqdmull2<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "w")]
+ (match_operand:VQ_HSI 2 "register_operand" "w")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
@@ -3165,7 +3481,30 @@
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(vec_select:<VEL>
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+ ))
+ (const_int 1)))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:<VHALF>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
(const_int 1)))]
@@ -3180,12 +3519,13 @@
(define_expand "aarch64_sqdmull2_lane<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
(match_operand:SI 3 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
@@ -3195,13 +3535,14 @@
(define_expand "aarch64_sqdmull2_laneq<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(match_operand:SI 3 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
- emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
DONE;
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c
index 7b6c2b38e..bf35031ec 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.c
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.c
@@ -1405,6 +1405,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
int ncrn, nvrn, nregs;
bool allocate_ncrn, allocate_nvrn;
+ HOST_WIDE_INT size;
/* We need to do this once per argument. */
if (pcum->aapcs_arg_processed)
@@ -1412,6 +1413,11 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
pcum->aapcs_arg_processed = true;
+ /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
+ size
+ = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
+ UNITS_PER_WORD);
+
allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
mode,
@@ -1462,9 +1468,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
}
ncrn = pcum->aapcs_ncrn;
- nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
- + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-
+ nregs = size / UNITS_PER_WORD;
/* C6 - C9. though the sign and zero extension semantics are
handled elsewhere. This is the case where the argument fits
@@ -1513,13 +1517,12 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
pcum->aapcs_nextncrn = NUM_ARG_REGS;
/* The argument is passed on stack; record the needed number of words for
- this argument (we can re-use NREGS) and align the total size if
- necessary. */
+ this argument and align the total size if necessary. */
on_stack:
- pcum->aapcs_stack_words = nregs;
+ pcum->aapcs_stack_words = size / UNITS_PER_WORD;
if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
- 16 / UNITS_PER_WORD) + 1;
+ 16 / UNITS_PER_WORD);
return;
}
@@ -6304,7 +6307,8 @@ aarch64_vector_mode_supported_p (enum machine_mode mode)
|| mode == V16QImode || mode == V2DImode
|| mode == V2SImode || mode == V4HImode
|| mode == V8QImode || mode == V2SFmode
- || mode == V4SFmode || mode == V2DFmode))
+ || mode == V4SFmode || mode == V2DFmode
+ || mode == V1DFmode))
return true;
return false;
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md
index c86a29d8e..df81045e9 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.md
@@ -2823,17 +2823,18 @@
;; Arithmetic right shift using SISD or Integer instruction
(define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
- [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+ [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r")
(ashiftrt:GPI
- (match_operand:GPI 1 "register_operand" "w,w,r")
- (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,rUs<cmode>")))]
+ (match_operand:GPI 1 "register_operand" "w,w,w,r")
+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,0,rUs<cmode>")))]
""
"@
sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
#
+ #
asr\t%<w>0, %<w>1, %<w>2"
- [(set_attr "simd" "yes,yes,no")
- (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
+ [(set_attr "simd" "yes,yes,yes,no")
+ (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>,shift_reg")]
)
(define_split
@@ -2842,11 +2843,13 @@
(match_operand:DI 1 "aarch64_simd_register")
(match_operand:QI 2 "aarch64_simd_register")))]
"TARGET_SIMD && reload_completed"
- [(set (match_dup 2)
+ [(set (match_dup 3)
(unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
(set (match_dup 0)
- (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))]
- ""
+ (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_SSHL))]
+{
+ operands[3] = gen_lowpart (QImode, operands[0]);
+}
)
(define_split
@@ -2855,11 +2858,13 @@
(match_operand:SI 1 "aarch64_simd_register")
(match_operand:QI 2 "aarch64_simd_register")))]
"TARGET_SIMD && reload_completed"
- [(set (match_dup 2)
+ [(set (match_dup 3)
(unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
(set (match_dup 0)
- (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))]
- ""
+ (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_SSHL_2S))]
+{
+ operands[3] = gen_lowpart (QImode, operands[0]);
+}
)
(define_insn "*aarch64_sisd_ushl"
@@ -3608,6 +3613,7 @@
(unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")]
UNSPEC_TLSDESC))
(clobber (reg:DI LR_REGNUM))
+ (clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:DI 1 "=r"))]
"TARGET_TLS_DESC"
"adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h
index b03d11422..c01669b2c 100644
--- a/gcc-4.9/gcc/config/aarch64/arm_neon.h
+++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h
@@ -21008,7 +21008,7 @@ vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
int const __d)
{
return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
@@ -21030,8 +21030,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
{
- int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
+ return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21059,7 +21058,7 @@ vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
int const __d)
{
return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
@@ -21081,8 +21080,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
{
- int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
+ return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21104,7 +21102,7 @@ vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
{
return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
}
@@ -21116,7 +21114,7 @@ vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
{
return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
}
@@ -21136,7 +21134,7 @@ vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
int const __d)
{
return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
@@ -21158,8 +21156,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
{
- int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
+ return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21187,7 +21184,7 @@ vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
int const __d)
{
return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
@@ -21209,8 +21206,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
{
- int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
+ return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21232,7 +21228,7 @@ vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
{
return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
}
@@ -21244,7 +21240,7 @@ vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
{
return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
}
@@ -21282,7 +21278,7 @@ vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
}
__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
{
return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
}
@@ -21294,7 +21290,7 @@ vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
{
return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
}
@@ -21314,7 +21310,7 @@ vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
+vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
{
return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
}
@@ -21334,8 +21330,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
{
- int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
+ return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21363,7 +21358,7 @@ vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
+vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
{
return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
}
@@ -21383,8 +21378,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
{
- int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
+ return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21406,7 +21400,7 @@ vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
{
return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
}
@@ -21418,7 +21412,7 @@ vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
{
return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
}
@@ -21594,7 +21588,7 @@ vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
}
__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
{
return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
}
@@ -21606,7 +21600,7 @@ vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
{
return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
}
diff --git a/gcc-4.9/gcc/config/aarch64/iterators.md b/gcc-4.9/gcc/config/aarch64/iterators.md
index f1339b8cc..e76e3ef10 100644
--- a/gcc-4.9/gcc/config/aarch64/iterators.md
+++ b/gcc-4.9/gcc/config/aarch64/iterators.md
@@ -396,14 +396,15 @@
(SI "SI") (HI "HI")
(QI "QI")])
-;; Define container mode for lane selection.
-(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI")
+;; 64-bit container modes the inner or scalar source mode.
+(define_mode_attr VCOND [(HI "V4HI") (SI "V2SI")
+ (V4HI "V4HI") (V8HI "V4HI")
(V2SI "V2SI") (V4SI "V2SI")
(DI "DI") (V2DI "DI")
(V2SF "V2SF") (V4SF "V2SF")
(V2DF "DF")])
-;; Define container mode for lane selection.
+;; 128-bit container modes the inner or scalar source mode.
(define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
(V4HI "V8HI") (V8HI "V8HI")
(V2SI "V4SI") (V4SI "V4SI")
@@ -412,15 +413,6 @@
(V2DF "V2DF") (SI "V4SI")
(HI "V8HI") (QI "V16QI")])
-;; Define container mode for lane selection.
-(define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI")
- (V4HI "V8HI") (V8HI "V8HI")
- (V2SI "V4SI") (V4SI "V4SI")
- (DI "V2DI") (V2DI "V2DI")
- (V2SF "V4SF") (V4SF "V4SF")
- (V2DF "V2DF") (SI "V4SI")
- (HI "V8HI") (QI "V16QI")])
-
;; Half modes of all vector modes.
(define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI")
(V4HI "V2HI") (V8HI "V4HI")
diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c
index dc07a02c0..d5c7908be 100644
--- a/gcc-4.9/gcc/config/alpha/alpha.c
+++ b/gcc-4.9/gcc/config/alpha/alpha.c
@@ -8715,6 +8715,11 @@ alpha_handle_trap_shadows (void)
}
break;
+ case BARRIER:
+ /* __builtin_unreachable can expand to no code at all,
+ leaving (barrier) RTXes in the instruction stream. */
+ goto close_shadow_notrapb;
+
case JUMP_INSN:
case CALL_INSN:
case CODE_LABEL:
@@ -8730,6 +8735,7 @@ alpha_handle_trap_shadows (void)
n = emit_insn_before (gen_trapb (), i);
PUT_MODE (n, TImode);
PUT_MODE (i, TImode);
+ close_shadow_notrapb:
trap_pending = 0;
shadow.used.i = 0;
shadow.used.fp = 0;
diff --git a/gcc-4.9/gcc/config/arm/aout.h b/gcc-4.9/gcc/config/arm/aout.h
index 51d32a9d4..c8f4e45c6 100644
--- a/gcc-4.9/gcc/config/arm/aout.h
+++ b/gcc-4.9/gcc/config/arm/aout.h
@@ -14,8 +14,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef ASM_APP_ON
diff --git a/gcc-4.9/gcc/config/arm/arm-cores.def b/gcc-4.9/gcc/config/arm/arm-cores.def
index 42f00b463..56041ec8b 100644
--- a/gcc-4.9/gcc/config/arm/arm-cores.def
+++ b/gcc-4.9/gcc/config/arm/arm-cores.def
@@ -14,8 +14,13 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Before using #include to read this file, define a macro:
diff --git a/gcc-4.9/gcc/config/arm/arm-opts.h b/gcc-4.9/gcc/config/arm/arm-opts.h
index a8393975a..21902940e 100644
--- a/gcc-4.9/gcc/config/arm/arm-opts.h
+++ b/gcc-4.9/gcc/config/arm/arm-opts.h
@@ -13,8 +13,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef ARM_OPTS_H
diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c
index 83763555c..3c237cb6d 100644
--- a/gcc-4.9/gcc/config/arm/arm.c
+++ b/gcc-4.9/gcc/config/arm/arm.c
@@ -16739,11 +16739,12 @@ thumb1_reorg (void)
rtx prev, insn = BB_END (bb);
bool insn_clobbered = false;
- while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
+ while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
insn = PREV_INSN (insn);
/* Find the last cbranchsi4_insn in basic block BB. */
- if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
+ if (insn == BB_HEAD (bb)
+ || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
continue;
/* Get the register with which we are comparing. */
@@ -28210,9 +28211,13 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
fputs (":\n", file);
if (flag_pic)
{
- /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
+ /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
rtx tem = XEXP (DECL_RTL (function), 0);
- tem = plus_constant (GET_MODE (tem), tem, -7);
+ /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
+ pipeline offset is four rather than eight. Adjust the offset
+ accordingly. */
+ tem = plus_constant (GET_MODE (tem), tem,
+ TARGET_THUMB1_ONLY ? -3 : -7);
tem = gen_rtx_MINUS (GET_MODE (tem),
tem,
gen_rtx_SYMBOL_REF (Pmode,
diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h
index 4d9121436..ab5167a8b 100644
--- a/gcc-4.9/gcc/config/arm/arm.h
+++ b/gcc-4.9/gcc/config/arm/arm.h
@@ -17,8 +17,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef GCC_ARM_H
diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md
index 662465f2a..467f9ce4e 100644
--- a/gcc-4.9/gcc/config/arm/arm.md
+++ b/gcc-4.9/gcc/config/arm/arm.md
@@ -75,6 +75,8 @@
]
)
+;; UNSPEC_VOLATILE Usage:
+
;;---------------------------------------------------------------------------
;; Attributes
@@ -8349,8 +8351,8 @@
(define_insn_and_split "*arm_cmpdi_unsigned"
[(set (reg:CC_CZ CC_REGNUM)
- (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r")
- (match_operand:DI 1 "arm_di_operand" "Py,r,rDi")))]
+ (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r,r")
+ (match_operand:DI 1 "arm_di_operand" "Py,r,Di,rDi")))]
"TARGET_32BIT"
"#" ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1"
@@ -8370,9 +8372,9 @@
operands[1] = gen_lowpart (SImode, operands[1]);
}
[(set_attr "conds" "set")
- (set_attr "enabled_for_depr_it" "yes,yes,no")
- (set_attr "arch" "t2,t2,*")
- (set_attr "length" "6,6,8")
+ (set_attr "enabled_for_depr_it" "yes,yes,no,*")
+ (set_attr "arch" "t2,t2,t2,a")
+ (set_attr "length" "6,6,10,8")
(set_attr "type" "multiple")]
)
@@ -9860,6 +9862,7 @@
"TARGET_32BIT"
"%i1%?\\t%0, %2, %4%S3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "shift" "4")
(set_attr "arch" "a,t2,t2,a")
;; Thumb2 doesn't allow the stack pointer to be used for
diff --git a/gcc-4.9/gcc/config/arm/arm_neon.h b/gcc-4.9/gcc/config/arm/arm_neon.h
index 37a6e611b..95735433d 100644
--- a/gcc-4.9/gcc/config/arm/arm_neon.h
+++ b/gcc-4.9/gcc/config/arm/arm_neon.h
@@ -1,5 +1,4 @@
-/* ARM NEON intrinsics include file. This file is generated automatically
- using neon-gen.ml. Please do not edit manually.
+/* ARM NEON intrinsics include file.
Copyright (C) 2006-2014 Free Software Foundation, Inc.
Contributed by CodeSourcery.
@@ -7707,12 +7706,32 @@ vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c);
}
+/* For big-endian, the shuffle masks for ZIP, UZP and TRN must be changed as
+ follows. (nelt = the number of elements within a vector.)
+
+ Firstly, a value of N within a mask, becomes (N ^ (nelt - 1)), as gcc vector
+ extension's indexing scheme is reversed *within each vector* (relative to the
+ neon intrinsics view), but without changing which of the two vectors.
+
+ Secondly, the elements within each mask are reversed, as the mask is itself a
+ vector, and will itself be loaded in reverse order (again, relative to the
+ neon intrinsics view, i.e. that would result from a "vld1" instruction). */
+
__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
vtrn_s8 (int8x8_t __a, int8x8_t __b)
{
int8x8x2_t __rv;
- __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
- __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 9, 1, 11, 3, 13, 5, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 0, 8, 2, 10, 4, 12, 6, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
return __rv;
}
@@ -7720,8 +7739,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vtrn_s16 (int16x4_t __a, int16x4_t __b)
{
int16x4x2_t __rv;
- __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
- __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#endif
return __rv;
}
@@ -7729,8 +7753,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
vtrn_u8 (uint8x8_t __a, uint8x8_t __b)
{
uint8x8x2_t __rv;
- __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
- __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 9, 1, 11, 3, 13, 5, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 0, 8, 2, 10, 4, 12, 6, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
return __rv;
}
@@ -7738,8 +7771,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
vtrn_u16 (uint16x4_t __a, uint16x4_t __b)
{
uint16x4x2_t __rv;
- __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
- __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#endif
return __rv;
}
@@ -7747,8 +7785,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
vtrn_p8 (poly8x8_t __a, poly8x8_t __b)
{
poly8x8x2_t __rv;
- __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
- __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 9, 1, 11, 3, 13, 5, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 0, 8, 2, 10, 4, 12, 6, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
return __rv;
}
@@ -7756,8 +7803,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
vtrn_p16 (poly16x4_t __a, poly16x4_t __b)
{
poly16x4x2_t __rv;
- __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
- __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 });
+#endif
return __rv;
}
@@ -7765,8 +7817,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
vtrn_s32 (int32x2_t __a, int32x2_t __b)
{
int32x2x2_t __rv;
- __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
- __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
return __rv;
}
@@ -7774,8 +7831,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vtrn_f32 (float32x2_t __a, float32x2_t __b)
{
float32x2x2_t __rv;
- __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
- __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
return __rv;
}
@@ -7783,8 +7845,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
vtrn_u32 (uint32x2_t __a, uint32x2_t __b)
{
uint32x2x2_t __rv;
- __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
- __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
return __rv;
}
@@ -7792,8 +7859,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
vtrnq_s8 (int8x16_t __a, int8x16_t __b)
{
int8x16x2_t __rv;
- __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
- __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#endif
return __rv;
}
@@ -7801,8 +7877,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
vtrnq_s16 (int16x8_t __a, int16x8_t __b)
{
int16x8x2_t __rv;
- __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
- __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 9, 1, 11, 3, 13, 5, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 0, 8, 2, 10, 4, 12, 6, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
return __rv;
}
@@ -7810,8 +7895,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vtrnq_s32 (int32x4_t __a, int32x4_t __b)
{
int32x4x2_t __rv;
- __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
- __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#endif
return __rv;
}
@@ -7819,8 +7909,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vtrnq_f32 (float32x4_t __a, float32x4_t __b)
{
float32x4x2_t __rv;
- __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
- __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#endif
return __rv;
}
@@ -7828,8 +7923,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
vtrnq_u8 (uint8x16_t __a, uint8x16_t __b)
{
uint8x16x2_t __rv;
- __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
- __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#endif
return __rv;
}
@@ -7837,8 +7941,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
vtrnq_u16 (uint16x8_t __a, uint16x8_t __b)
{
uint16x8x2_t __rv;
- __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
- __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 9, 1, 11, 3, 13, 5, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 0, 8, 2, 10, 4, 12, 6, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
return __rv;
}
@@ -7846,8 +7959,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
vtrnq_u32 (uint32x4_t __a, uint32x4_t __b)
{
uint32x4x2_t __rv;
- __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
- __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 });
+#endif
return __rv;
}
@@ -7855,8 +7973,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
vtrnq_p8 (poly8x16_t __a, poly8x16_t __b)
{
poly8x16x2_t __rv;
- __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
- __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 });
+#endif
return __rv;
}
@@ -7864,8 +7991,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
vtrnq_p16 (poly16x8_t __a, poly16x8_t __b)
{
poly16x8x2_t __rv;
- __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 });
- __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 9, 1, 11, 3, 13, 5, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 0, 8, 2, 10, 4, 12, 6, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
return __rv;
}
@@ -7873,8 +8009,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
vzip_s8 (int8x8_t __a, int8x8_t __b)
{
int8x8x2_t __rv;
- __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
- __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 12, 4, 13, 5, 14, 6, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 0, 8, 1, 9, 2, 10, 3, 11 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
return __rv;
}
@@ -7882,8 +8027,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vzip_s16 (int16x4_t __a, int16x4_t __b)
{
int16x4x2_t __rv;
- __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
- __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#endif
return __rv;
}
@@ -7891,8 +8041,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
vzip_u8 (uint8x8_t __a, uint8x8_t __b)
{
uint8x8x2_t __rv;
- __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
- __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 12, 4, 13, 5, 14, 6, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 0, 8, 1, 9, 2, 10, 3, 11 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
return __rv;
}
@@ -7900,8 +8059,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
vzip_u16 (uint16x4_t __a, uint16x4_t __b)
{
uint16x4x2_t __rv;
- __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
- __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#endif
return __rv;
}
@@ -7909,8 +8073,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
vzip_p8 (poly8x8_t __a, poly8x8_t __b)
{
poly8x8x2_t __rv;
- __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
- __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 12, 4, 13, 5, 14, 6, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 0, 8, 1, 9, 2, 10, 3, 11 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
return __rv;
}
@@ -7918,8 +8091,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
vzip_p16 (poly16x4_t __a, poly16x4_t __b)
{
poly16x4x2_t __rv;
- __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
- __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 });
+#endif
return __rv;
}
@@ -7927,8 +8105,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
vzip_s32 (int32x2_t __a, int32x2_t __b)
{
int32x2x2_t __rv;
- __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
- __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
return __rv;
}
@@ -7936,8 +8119,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vzip_f32 (float32x2_t __a, float32x2_t __b)
{
float32x2x2_t __rv;
- __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
- __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
return __rv;
}
@@ -7945,8 +8133,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
vzip_u32 (uint32x2_t __a, uint32x2_t __b)
{
uint32x2x2_t __rv;
- __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
- __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
return __rv;
}
@@ -7954,8 +8147,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
vzipq_s8 (int8x16_t __a, int8x16_t __b)
{
int8x16x2_t __rv;
- __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
- __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#endif
return __rv;
}
@@ -7963,8 +8165,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
vzipq_s16 (int16x8_t __a, int16x8_t __b)
{
int16x8x2_t __rv;
- __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
- __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 12, 4, 13, 5, 14, 6, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 0, 8, 1, 9, 2, 10, 3, 11 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
return __rv;
}
@@ -7972,8 +8183,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vzipq_s32 (int32x4_t __a, int32x4_t __b)
{
int32x4x2_t __rv;
- __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
- __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#endif
return __rv;
}
@@ -7981,8 +8197,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vzipq_f32 (float32x4_t __a, float32x4_t __b)
{
float32x4x2_t __rv;
- __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
- __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#endif
return __rv;
}
@@ -7990,8 +8211,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
{
uint8x16x2_t __rv;
- __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
- __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#endif
return __rv;
}
@@ -7999,8 +8229,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
{
uint16x8x2_t __rv;
- __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
- __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 12, 4, 13, 5, 14, 6, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 0, 8, 1, 9, 2, 10, 3, 11 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
return __rv;
}
@@ -8008,8 +8247,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
{
uint32x4x2_t __rv;
- __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
- __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
+#endif
return __rv;
}
@@ -8017,8 +8261,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
{
poly8x16x2_t __rv;
- __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
- __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+#endif
return __rv;
}
@@ -8026,8 +8279,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
{
poly16x8x2_t __rv;
- __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 });
- __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 12, 4, 13, 5, 14, 6, 15, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 8, 0, 9, 1, 10, 2, 11, 3 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 0, 8, 1, 9, 2, 10, 3, 11 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
return __rv;
}
@@ -8035,8 +8297,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
vuzp_s8 (int8x8_t __a, int8x8_t __b)
{
int8x8x2_t __rv;
- __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
- __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 9, 11, 13, 15, 1, 3, 5, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 0, 2, 4, 6, 8, 10, 12, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
return __rv;
}
@@ -8044,8 +8315,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
vuzp_s16 (int16x4_t __a, int16x4_t __b)
{
int16x4x2_t __rv;
- __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
- __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#endif
return __rv;
}
@@ -8053,8 +8329,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
vuzp_s32 (int32x2_t __a, int32x2_t __b)
{
int32x2x2_t __rv;
- __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
- __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
return __rv;
}
@@ -8062,8 +8343,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
vuzp_f32 (float32x2_t __a, float32x2_t __b)
{
float32x2x2_t __rv;
- __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
- __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
return __rv;
}
@@ -8071,8 +8357,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
vuzp_u8 (uint8x8_t __a, uint8x8_t __b)
{
uint8x8x2_t __rv;
- __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
- __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 9, 11, 13, 15, 1, 3, 5, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 0, 2, 4, 6, 8, 10, 12, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
return __rv;
}
@@ -8080,8 +8375,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
vuzp_u16 (uint16x4_t __a, uint16x4_t __b)
{
uint16x4x2_t __rv;
- __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
- __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#endif
return __rv;
}
@@ -8089,8 +8389,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
vuzp_u32 (uint32x2_t __a, uint32x2_t __b)
{
uint32x2x2_t __rv;
- __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
- __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 });
+#endif
return __rv;
}
@@ -8098,8 +8403,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
vuzp_p8 (poly8x8_t __a, poly8x8_t __b)
{
poly8x8x2_t __rv;
- __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
- __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 9, 11, 13, 15, 1, 3, 5, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 0, 2, 4, 6, 8, 10, 12, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t)
+ { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
return __rv;
}
@@ -8107,8 +8421,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
vuzp_p16 (poly16x4_t __a, poly16x4_t __b)
{
poly16x4x2_t __rv;
- __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
- __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 });
+#endif
return __rv;
}
@@ -8116,8 +8435,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
vuzpq_s8 (int8x16_t __a, int8x16_t __b)
{
int8x16x2_t __rv;
- __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
- __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#endif
return __rv;
}
@@ -8125,8 +8453,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
vuzpq_s16 (int16x8_t __a, int16x8_t __b)
{
int16x8x2_t __rv;
- __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
- __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 9, 11, 13, 15, 1, 3, 5, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 0, 2, 4, 6, 8, 10, 12, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
return __rv;
}
@@ -8134,8 +8471,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
vuzpq_s32 (int32x4_t __a, int32x4_t __b)
{
int32x4x2_t __rv;
- __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
- __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#endif
return __rv;
}
@@ -8143,8 +8485,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
vuzpq_f32 (float32x4_t __a, float32x4_t __b)
{
float32x4x2_t __rv;
- __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
- __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#endif
return __rv;
}
@@ -8152,8 +8499,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
{
uint8x16x2_t __rv;
- __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
- __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#endif
return __rv;
}
@@ -8161,8 +8517,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
{
uint16x8x2_t __rv;
- __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
- __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 9, 11, 13, 15, 1, 3, 5, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 0, 2, 4, 6, 8, 10, 12, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
return __rv;
}
@@ -8170,8 +8535,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
{
uint32x4x2_t __rv;
- __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
- __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
+#endif
return __rv;
}
@@ -8179,8 +8549,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
{
poly8x16x2_t __rv;
- __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
- __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
+ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+#endif
return __rv;
}
@@ -8188,8 +8567,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
{
poly16x8x2_t __rv;
- __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 });
- __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 });
+#ifdef __ARM_BIG_ENDIAN
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 9, 11, 13, 15, 1, 3, 5, 7 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 8, 10, 12, 14, 0, 2, 4, 6 });
+#else
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 0, 2, 4, 6, 8, 10, 12, 14 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+ { 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
return __rv;
}
diff --git a/gcc-4.9/gcc/config/arm/bpabi.h b/gcc-4.9/gcc/config/arm/bpabi.h
index bc223f8e3..7a576ac46 100644
--- a/gcc-4.9/gcc/config/arm/bpabi.h
+++ b/gcc-4.9/gcc/config/arm/bpabi.h
@@ -14,8 +14,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Use the AAPCS ABI by default. */
diff --git a/gcc-4.9/gcc/config/arm/elf.h b/gcc-4.9/gcc/config/arm/elf.h
index 2edf520de..15a32fb8a 100644
--- a/gcc-4.9/gcc/config/arm/elf.h
+++ b/gcc-4.9/gcc/config/arm/elf.h
@@ -16,8 +16,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef OBJECT_FORMAT_ELF
diff --git a/gcc-4.9/gcc/config/arm/linux-eabi.h b/gcc-4.9/gcc/config/arm/linux-eabi.h
index 4d42cbfc8..350639f32 100644
--- a/gcc-4.9/gcc/config/arm/linux-eabi.h
+++ b/gcc-4.9/gcc/config/arm/linux-eabi.h
@@ -68,8 +68,9 @@
GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI. */
#undef GLIBC_DYNAMIC_LINKER
-#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3"
-#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3"
+
+#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.3"
+#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux-armhf.so.3"
#define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT
#define GLIBC_DYNAMIC_LINKER \
diff --git a/gcc-4.9/gcc/config/arm/linux-elf.h b/gcc-4.9/gcc/config/arm/linux-elf.h
index 5dc3328e8..e825ae48c 100644
--- a/gcc-4.9/gcc/config/arm/linux-elf.h
+++ b/gcc-4.9/gcc/config/arm/linux-elf.h
@@ -14,8 +14,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* elfos.h should have already been included. Now just override
@@ -57,7 +62,7 @@
#define LIBGCC_SPEC "%{mfloat-abi=soft*:-lfloat} -lgcc"
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
#define LINUX_TARGET_LINK_SPEC "%{h*} \
%{static:-Bstatic} \
diff --git a/gcc-4.9/gcc/config/arm/linux-gas.h b/gcc-4.9/gcc/config/arm/linux-gas.h
index 52a739c26..1dd043782 100644
--- a/gcc-4.9/gcc/config/arm/linux-gas.h
+++ b/gcc-4.9/gcc/config/arm/linux-gas.h
@@ -15,8 +15,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This is how we tell the assembler that a symbol is weak.
diff --git a/gcc-4.9/gcc/config/arm/linux-grte.h b/gcc-4.9/gcc/config/arm/linux-grte.h
new file mode 100644
index 000000000..7ee5806b7
--- /dev/null
+++ b/gcc-4.9/gcc/config/arm/linux-grte.h
@@ -0,0 +1,27 @@
+/* Definitions for ARM Linux-based GRTE (Google RunTime Environment).
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Chris Demetriou.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#undef SUBSUBTARGET_EXTRA_SPECS
+#define SUBSUBTARGET_EXTRA_SPECS LINUX_GRTE_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/arm/neon-docgen.ml b/gcc-4.9/gcc/config/arm/neon-docgen.ml
deleted file mode 100644
index 5788a533e..000000000
--- a/gcc-4.9/gcc/config/arm/neon-docgen.ml
+++ /dev/null
@@ -1,424 +0,0 @@
-(* ARM NEON documentation generator.
-
- Copyright (C) 2006-2014 Free Software Foundation, Inc.
- Contributed by CodeSourcery.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 3, or (at your option) any later
- version.
-
- GCC is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
-
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
- <http://www.gnu.org/licenses/>.
-
- This is an O'Caml program. The O'Caml compiler is available from:
-
- http://caml.inria.fr/
-
- Or from your favourite OS's friendly packaging system. Tested with version
- 3.09.2, though other versions will probably work too.
-
- Compile with:
- ocamlc -c neon.ml
- ocamlc -o neon-docgen neon.cmo neon-docgen.ml
-
- Run with:
- /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
-*)
-
-open Neon
-
-(* The combined "ops" and "reinterp" table. *)
-let ops_reinterp = reinterp @ ops
-
-(* Helper functions for extracting things from the "ops" table. *)
-let single_opcode desired_opcode () =
- List.fold_left (fun got_so_far ->
- fun row ->
- match row with
- (opcode, _, _, _, _, _) ->
- if opcode = desired_opcode then row :: got_so_far
- else got_so_far
- ) [] ops_reinterp
-
-let multiple_opcodes desired_opcodes () =
- List.fold_left (fun got_so_far ->
- fun desired_opcode ->
- (single_opcode desired_opcode ()) @ got_so_far)
- [] desired_opcodes
-
-let ldx_opcode number () =
- List.fold_left (fun got_so_far ->
- fun row ->
- match row with
- (opcode, _, _, _, _, _) ->
- match opcode with
- Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
- row :: got_so_far
- | _ -> got_so_far
- ) [] ops_reinterp
-
-let stx_opcode number () =
- List.fold_left (fun got_so_far ->
- fun row ->
- match row with
- (opcode, _, _, _, _, _) ->
- match opcode with
- Vstx n | Vstx_lane n when n = number ->
- row :: got_so_far
- | _ -> got_so_far
- ) [] ops_reinterp
-
-let tbl_opcode () =
- List.fold_left (fun got_so_far ->
- fun row ->
- match row with
- (opcode, _, _, _, _, _) ->
- match opcode with
- Vtbl _ -> row :: got_so_far
- | _ -> got_so_far
- ) [] ops_reinterp
-
-let tbx_opcode () =
- List.fold_left (fun got_so_far ->
- fun row ->
- match row with
- (opcode, _, _, _, _, _) ->
- match opcode with
- Vtbx _ -> row :: got_so_far
- | _ -> got_so_far
- ) [] ops_reinterp
-
-(* The groups of intrinsics. *)
-let intrinsic_groups =
- [ "Addition", single_opcode Vadd;
- "Multiplication", single_opcode Vmul;
- "Multiply-accumulate", single_opcode Vmla;
- "Multiply-subtract", single_opcode Vmls;
- "Fused-multiply-accumulate", single_opcode Vfma;
- "Fused-multiply-subtract", single_opcode Vfms;
- "Round to integral (to nearest, ties to even)", single_opcode Vrintn;
- "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta;
- "Round to integral (towards +Inf)", single_opcode Vrintp;
- "Round to integral (towards -Inf)", single_opcode Vrintm;
- "Round to integral (towards 0)", single_opcode Vrintz;
- "Subtraction", single_opcode Vsub;
- "Comparison (equal-to)", single_opcode Vceq;
- "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
- "Comparison (less-than-or-equal-to)", single_opcode Vcle;
- "Comparison (greater-than)", single_opcode Vcgt;
- "Comparison (less-than)", single_opcode Vclt;
- "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
- "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
- "Comparison (absolute greater-than)", single_opcode Vcagt;
- "Comparison (absolute less-than)", single_opcode Vcalt;
- "Test bits", single_opcode Vtst;
- "Absolute difference", single_opcode Vabd;
- "Absolute difference and accumulate", single_opcode Vaba;
- "Maximum", single_opcode Vmax;
- "Minimum", single_opcode Vmin;
- "Pairwise add", single_opcode Vpadd;
- "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
- "Folding maximum", single_opcode Vpmax;
- "Folding minimum", single_opcode Vpmin;
- "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
- "Vector shift left", single_opcode Vshl;
- "Vector shift left by constant", single_opcode Vshl_n;
- "Vector shift right by constant", single_opcode Vshr_n;
- "Vector shift right by constant and accumulate", single_opcode Vsra_n;
- "Vector shift right and insert", single_opcode Vsri;
- "Vector shift left and insert", single_opcode Vsli;
- "Absolute value", single_opcode Vabs;
- "Negation", single_opcode Vneg;
- "Bitwise not", single_opcode Vmvn;
- "Count leading sign bits", single_opcode Vcls;
- "Count leading zeros", single_opcode Vclz;
- "Count number of set bits", single_opcode Vcnt;
- "Reciprocal estimate", single_opcode Vrecpe;
- "Reciprocal square-root estimate", single_opcode Vrsqrte;
- "Get lanes from a vector", single_opcode Vget_lane;
- "Set lanes in a vector", single_opcode Vset_lane;
- "Create vector from literal bit pattern", single_opcode Vcreate;
- "Set all lanes to the same value",
- multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
- "Combining vectors", single_opcode Vcombine;
- "Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
- "Conversions", multiple_opcodes [Vcvt; Vcvt_n];
- "Move, single_opcode narrowing", single_opcode Vmovn;
- "Move, single_opcode long", single_opcode Vmovl;
- "Table lookup", tbl_opcode;
- "Extended table lookup", tbx_opcode;
- "Multiply, lane", single_opcode Vmul_lane;
- "Long multiply, lane", single_opcode Vmull_lane;
- "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
- "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
- "Multiply-accumulate, lane", single_opcode Vmla_lane;
- "Multiply-subtract, lane", single_opcode Vmls_lane;
- "Vector multiply by scalar", single_opcode Vmul_n;
- "Vector long multiply by scalar", single_opcode Vmull_n;
- "Vector saturating doubling long multiply by scalar",
- single_opcode Vqdmull_n;
- "Vector saturating doubling multiply high by scalar",
- single_opcode Vqdmulh_n;
- "Vector multiply-accumulate by scalar", single_opcode Vmla_n;
- "Vector multiply-subtract by scalar", single_opcode Vmls_n;
- "Vector extract", single_opcode Vext;
- "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
- "Bit selection", single_opcode Vbsl;
- "Transpose elements", single_opcode Vtrn;
- "Zip elements", single_opcode Vzip;
- "Unzip elements", single_opcode Vuzp;
- "Element/structure loads, VLD1 variants", ldx_opcode 1;
- "Element/structure stores, VST1 variants", stx_opcode 1;
- "Element/structure loads, VLD2 variants", ldx_opcode 2;
- "Element/structure stores, VST2 variants", stx_opcode 2;
- "Element/structure loads, VLD3 variants", ldx_opcode 3;
- "Element/structure stores, VST3 variants", stx_opcode 3;
- "Element/structure loads, VLD4 variants", ldx_opcode 4;
- "Element/structure stores, VST4 variants", stx_opcode 4;
- "Logical operations (AND)", single_opcode Vand;
- "Logical operations (OR)", single_opcode Vorr;
- "Logical operations (exclusive OR)", single_opcode Veor;
- "Logical operations (AND-NOT)", single_opcode Vbic;
- "Logical operations (OR-NOT)", single_opcode Vorn;
- "Reinterpret casts", single_opcode Vreinterp ]
-
-(* Given an intrinsic shape, produce a string to document the corresponding
- operand shapes. *)
-let rec analyze_shape shape =
- let rec n_things n thing =
- match n with
- 0 -> []
- | n -> thing :: (n_things (n - 1) thing)
- in
- let rec analyze_shape_elt reg_no elt =
- match elt with
- Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
- | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
- | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
- | Immed -> "#@var{0}"
- | VecArray (1, elt) ->
- let elt_regexp = analyze_shape_elt 0 elt in
- "@{" ^ elt_regexp ^ "@}"
- | VecArray (n, elt) ->
- let rec f m =
- match m with
- 0 -> []
- | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
- in
- let ops = List.rev (f n) in
- "@{" ^ (commas (fun x -> x) ops "") ^ "@}"
- | (PtrTo elt | CstPtrTo elt) ->
- "[" ^ (analyze_shape_elt reg_no elt) ^ "]"
- | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
- | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
- | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
- | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts))
- in
- match shape with
- All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
- | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
- ", " ^ (analyze_shape_elt 0 Dreg)
- | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
- (analyze_shape_elt 0 elt)
- | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
- ", " ^ (analyze_shape_elt 0 Dreg)
- | Wide_noreg elt -> analyze_shape (Long_noreg elt)
- | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
- ", " ^ (analyze_shape_elt 0 Qreg)
- | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
- | By_scalar Dreg ->
- analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
- | By_scalar Qreg ->
- analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
- | By_scalar _ -> assert false
- | Wide_lane ->
- analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
- | Wide_scalar ->
- analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
- | Pair_result elt ->
- let elt_regexp = analyze_shape_elt 0 elt in
- let elt_regexp' = analyze_shape_elt 1 elt in
- elt_regexp ^ ", " ^ elt_regexp'
- | Unary_scalar _ -> "FIXME Unary_scalar"
- | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
- | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
- | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
-
-(* Document a single intrinsic. *)
-let describe_intrinsic first chan
- (elt_ty, (_, features, shape, name, munge, _)) =
- let c_arity, new_elt_ty = munge shape elt_ty in
- let c_types = strings_of_arity c_arity in
- Printf.fprintf chan "@itemize @bullet\n";
- let item_code = if first then "@item" else "@itemx" in
- Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
- (intrinsic_name name) (string_of_elt elt_ty);
- Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
- if not (List.exists (fun feature -> feature = No_op) features) then
- begin
- let print_one_insn name =
- Printf.fprintf chan "@code{";
- let no_suffix = (new_elt_ty = NoElts) in
- let name_with_suffix =
- if no_suffix then name
- else name ^ "." ^ (string_of_elt_dots new_elt_ty)
- in
- let possible_operands = analyze_all_shapes features shape
- analyze_shape
- in
- let rec print_one_possible_operand op =
- Printf.fprintf chan "%s %s}" name_with_suffix op
- in
- (* If the intrinsic expands to multiple instructions, we assume
- they are all of the same form. *)
- print_one_possible_operand (List.hd possible_operands)
- in
- let rec print_insns names =
- match names with
- [] -> ()
- | [name] -> print_one_insn name
- | name::names -> (print_one_insn name;
- Printf.fprintf chan " @emph{or} ";
- print_insns names)
- in
- let insn_names = get_insn_names features name in
- Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
- print_insns insn_names;
- Printf.fprintf chan "\n"
- end;
- Printf.fprintf chan "@end itemize\n";
- Printf.fprintf chan "\n\n"
-
-(* Document a group of intrinsics. *)
-let document_group chan (group_title, group_extractor) =
- (* Extract the rows in question from the ops table and then turn them
- into a list of intrinsics. *)
- let intrinsics =
- List.fold_left (fun got_so_far ->
- fun row ->
- match row with
- (_, _, _, _, _, elt_tys) ->
- List.fold_left (fun got_so_far' ->
- fun elt_ty ->
- (elt_ty, row) :: got_so_far')
- got_so_far elt_tys
- ) [] (group_extractor ())
- in
- (* Emit the title for this group. *)
- Printf.fprintf chan "@subsubsection %s\n\n" group_title;
- (* Emit a description of each intrinsic. *)
- List.iter (describe_intrinsic true chan) intrinsics;
- (* Close this group. *)
- Printf.fprintf chan "\n\n"
-
-let gnu_header chan =
- List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
- "@c Copyright (C) 2006-2014 Free Software Foundation, Inc.";
- "@c This is part of the GCC manual.";
- "@c For copying conditions, see the file gcc.texi.";
- "";
- "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
- "@c Please do not edit manually."]
-
-let crypto_doc =
-"
-@itemize @bullet
-@item poly128_t vldrq_p128(poly128_t const *)
-@end itemize
-
-@itemize @bullet
-@item void vstrq_p128(poly128_t *, poly128_t)
-@end itemize
-
-@itemize @bullet
-@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
-@end itemize
-
-@itemize @bullet
-@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
-@end itemize
-
-@itemize @bullet
-@item uint32_t vsha1h_u32 (uint32_t)
-@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}}
-@end itemize
-
-@itemize @bullet
-@item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
-@*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}}
-@end itemize
-
-@itemize @bullet
-@item poly128_t vmull_p64 (poly64_t a, poly64_t b)
-@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
-@end itemize
-
-@itemize @bullet
-@item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b)
-@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
-@end itemize
-"
-
-(* Program entry point. *)
-let _ =
- if Array.length Sys.argv <> 2 then
- failwith "Usage: neon-docgen <output filename>"
- else
- let file = Sys.argv.(1) in
- try
- let chan = open_out file in
- gnu_header chan;
- List.iter (document_group chan) intrinsic_groups;
- Printf.fprintf chan "%s\n" crypto_doc;
- close_out chan
- with Sys_error sys ->
- failwith ("Could not create output file " ^ file ^ ": " ^ sys)
diff --git a/gcc-4.9/gcc/config/arm/neon-gen.ml b/gcc-4.9/gcc/config/arm/neon-gen.ml
deleted file mode 100644
index f3dd86b0a..000000000
--- a/gcc-4.9/gcc/config/arm/neon-gen.ml
+++ /dev/null
@@ -1,520 +0,0 @@
-(* Auto-generate ARM Neon intrinsics header file.
- Copyright (C) 2006-2014 Free Software Foundation, Inc.
- Contributed by CodeSourcery.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 3, or (at your option) any later
- version.
-
- GCC is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- for more details.
-
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
- <http://www.gnu.org/licenses/>.
-
- This is an O'Caml program. The O'Caml compiler is available from:
-
- http://caml.inria.fr/
-
- Or from your favourite OS's friendly packaging system. Tested with version
- 3.09.2, though other versions will probably work too.
-
- Compile with:
- ocamlc -c neon.ml
- ocamlc -o neon-gen neon.cmo neon-gen.ml
-
- Run with:
- ./neon-gen > arm_neon.h
-*)
-
-open Neon
-
-(* The format codes used in the following functions are documented at:
- http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\
- #6_printflikefunctionsforprettyprinting
- (one line, remove the backslash.)
-*)
-
-(* Following functions can be used to approximate GNU indentation style. *)
-let start_function () =
- Format.printf "@[<v 0>";
- ref 0
-
-let end_function nesting =
- match !nesting with
- 0 -> Format.printf "@;@;@]"
- | _ -> failwith ("Bad nesting (ending function at level "
- ^ (string_of_int !nesting) ^ ")")
-
-let open_braceblock nesting =
- begin match !nesting with
- 0 -> Format.printf "@,@<0>{@[<v 2>@,"
- | _ -> Format.printf "@,@[<v 2> @<0>{@[<v 2>@,"
- end;
- incr nesting
-
-let close_braceblock nesting =
- decr nesting;
- match !nesting with
- 0 -> Format.printf "@]@,@<0>}"
- | _ -> Format.printf "@]@,@<0>}@]"
-
-let print_function arity fnname body =
- let ffmt = start_function () in
- Format.printf "__extension__ static __inline ";
- let inl = "__attribute__ ((__always_inline__))" in
- begin match arity with
- Arity0 ret ->
- Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname
- | Arity1 (ret, arg0) ->
- Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname
- (string_of_vectype arg0)
- | Arity2 (ret, arg0, arg1) ->
- Format.printf "%s %s@,%s (%s __a, %s __b)"
- (string_of_vectype ret) inl fnname (string_of_vectype arg0)
- (string_of_vectype arg1)
- | Arity3 (ret, arg0, arg1, arg2) ->
- Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)"
- (string_of_vectype ret) inl fnname (string_of_vectype arg0)
- (string_of_vectype arg1) (string_of_vectype arg2)
- | Arity4 (ret, arg0, arg1, arg2, arg3) ->
- Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)"
- (string_of_vectype ret) inl fnname (string_of_vectype arg0)
- (string_of_vectype arg1) (string_of_vectype arg2)
- (string_of_vectype arg3)
- end;
- open_braceblock ffmt;
- let rec print_lines = function
- [] -> ()
- | "" :: lines -> print_lines lines
- | [line] -> Format.printf "%s" line
- | line::lines -> Format.printf "%s@," line ; print_lines lines in
- print_lines body;
- close_braceblock ffmt;
- end_function ffmt
-
-let union_string num elts base =
- let itype = inttype_for_array num elts in
- let iname = string_of_inttype itype
- and sname = string_of_vectype (T_arrayof (num, elts)) in
- Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base
-
-let rec signed_ctype = function
- T_uint8x8 | T_poly8x8 -> T_int8x8
- | T_uint8x16 | T_poly8x16 -> T_int8x16
- | T_uint16x4 | T_poly16x4 -> T_int16x4
- | T_uint16x8 | T_poly16x8 -> T_int16x8
- | T_uint32x2 -> T_int32x2
- | T_uint32x4 -> T_int32x4
- | T_uint64x1 -> T_int64x1
- | T_uint64x2 -> T_int64x2
- | T_poly64x2 -> T_int64x2
- (* Cast to types defined by mode in arm.c, not random types pulled in from
- the <stdint.h> header in use. This fixes incompatible pointer errors when
- compiling with C++. *)
- | T_uint8 | T_int8 -> T_intQI
- | T_uint16 | T_int16 -> T_intHI
- | T_uint32 | T_int32 -> T_intSI
- | T_uint64 | T_int64 -> T_intDI
- | T_float16 -> T_floatHF
- | T_float32 -> T_floatSF
- | T_poly8 -> T_intQI
- | T_poly16 -> T_intHI
- | T_poly64 -> T_intDI
- | T_poly128 -> T_intTI
- | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
- | T_ptrto elt -> T_ptrto (signed_ctype elt)
- | T_const elt -> T_const (signed_ctype elt)
- | x -> x
-
-let add_cast ctype cval =
- let stype = signed_ctype ctype in
- if ctype <> stype then
- Printf.sprintf "(%s) %s" (string_of_vectype stype) cval
- else
- cval
-
-let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")"
-
-(* Return a tuple of a list of declarations to go at the start of the function,
- and a list of statements needed to return THING. *)
-let return arity thing =
- match arity with
- Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
- | Arity4 (ret, _, _, _, _) ->
- begin match ret with
- T_arrayof (num, vec) ->
- let uname = union_string num vec "__rv" in
- [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"]
- | T_void ->
- [], [thing ^ ";"]
- | _ ->
- [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"]
- end
-
-let mask_shape_for_shuffle = function
- All (num, reg) -> All (num, reg)
- | Pair_result reg -> All (2, reg)
- | _ -> failwith "mask_for_shuffle"
-
-let mask_elems shuffle shape elttype part =
- let elem_size = elt_width elttype in
- let num_elems =
- match regmap shape 0 with
- Dreg -> 64 / elem_size
- | Qreg -> 128 / elem_size
- | _ -> failwith "mask_elems" in
- shuffle elem_size num_elems part
-
-(* Return a tuple of a list of declarations 0and a list of statements needed
- to implement an intrinsic using __builtin_shuffle. SHUFFLE is a function
- which returns a list of elements suitable for using as a mask. *)
-
-let shuffle_fn shuffle shape arity elttype =
- let mshape = mask_shape_for_shuffle shape in
- let masktype = type_for_elt mshape (unsigned_of_elt elttype) 0 in
- let masktype_str = string_of_vectype masktype in
- let shuffle_res = type_for_elt mshape elttype 0 in
- let shuffle_res_str = string_of_vectype shuffle_res in
- match arity with
- Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
- | Arity4 (ret, _, _, _, _) ->
- begin match ret with
- T_arrayof (num, vec) ->
- let elems1 = mask_elems shuffle mshape elttype `lo
- and elems2 = mask_elems shuffle mshape elttype `hi in
- let mask1 = (String.concat ", " (List.map string_of_int elems1))
- and mask2 = (String.concat ", " (List.map string_of_int elems2)) in
- let shuf1 = Printf.sprintf
- "__rv.val[0] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
- shuffle_res_str masktype_str mask1
- and shuf2 = Printf.sprintf
- "__rv.val[1] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });"
- shuffle_res_str masktype_str mask2 in
- [Printf.sprintf "%s __rv;" (string_of_vectype ret);],
- [shuf1; shuf2; "return __rv;"]
- | _ ->
- let elems = mask_elems shuffle mshape elttype `lo in
- let mask = (String.concat ", " (List.map string_of_int elems)) in
- let shuf = Printf.sprintf
- "return (%s) __builtin_shuffle (__a, (%s) { %s });" shuffle_res_str masktype_str mask in
- [""],
- [shuf]
- end
-
-let rec element_type ctype =
- match ctype with
- T_arrayof (_, v) -> element_type v
- | _ -> ctype
-
-let params ps =
- let pdecls = ref [] in
- let ptype t p =
- match t with
- T_arrayof (num, elts) ->
- let uname = union_string num elts (p ^ "u") in
- let decl = Printf.sprintf "%s = { %s };" uname p in
- pdecls := decl :: !pdecls;
- p ^ "u.__o"
- | _ -> add_cast t p in
- let plist = match ps with
- Arity0 _ -> []
- | Arity1 (_, t1) -> [ptype t1 "__a"]
- | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"]
- | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"]
- | Arity4 (_, t1, t2, t3, t4) ->
- [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in
- !pdecls, plist
-
-let modify_params features plist =
- let is_flipped =
- List.exists (function Flipped _ -> true | _ -> false) features in
- if is_flipped then
- match plist with
- [ a; b ] -> [ b; a ]
- | _ ->
- failwith ("Don't know how to flip args " ^ (String.concat ", " plist))
- else
- plist
-
-(* !!! Decide whether to add an extra information word based on the shape
- form. *)
-let extra_word shape features paramlist bits =
- let use_word =
- match shape with
- All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow
- | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm
- | Narrow_imm -> true
- | _ -> List.mem InfoWord features
- in
- if use_word then
- paramlist @ [string_of_int bits]
- else
- paramlist
-
-(* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0).
- Bit 1 represents floats & polynomials (1), or ordinary integers (0).
- Bit 2 represents rounding (1) vs none (0). *)
-let infoword_value elttype features =
- let bits01 =
- match elt_class elttype with
- Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001
- | Poly -> 0b010
- | Float -> 0b011
- | _ -> 0b000
- and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in
- bits01 lor rounding_bit
-
-(* "Cast" type operations will throw an exception in mode_of_elt (actually in
- elt_width, called from there). Deal with that here, and generate a suffix
- with multiple modes (<to><from>). *)
-let rec mode_suffix elttype shape =
- try
- let mode = mode_of_elt elttype shape in
- string_of_mode mode
- with MixedMode (dst, src) ->
- let dstmode = mode_of_elt ~argpos:0 dst shape
- and srcmode = mode_of_elt ~argpos:1 src shape in
- string_of_mode dstmode ^ string_of_mode srcmode
-
-let get_shuffle features =
- try
- match List.find (function Use_shuffle _ -> true | _ -> false) features with
- Use_shuffle fn -> Some fn
- | _ -> None
- with Not_found -> None
-
-let print_feature_test_start features =
- try
- match List.find (fun feature ->
- match feature with Requires_feature _ -> true
- | Requires_arch _ -> true
- | Requires_FP_bit _ -> true
- | _ -> false)
- features with
- Requires_feature feature ->
- Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
- | Requires_arch arch ->
- Format.printf "#if __ARM_ARCH >= %d@\n" arch
- | Requires_FP_bit bit ->
- Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
- (1 lsl bit)
- | _ -> assert false
- with Not_found -> assert true
-
-let print_feature_test_end features =
- let feature =
- List.exists (function Requires_feature _ -> true
- | Requires_arch _ -> true
- | Requires_FP_bit _ -> true
- | _ -> false) features in
- if feature then Format.printf "#endif@\n"
-
-
-let print_variant opcode features shape name (ctype, asmtype, elttype) =
- let bits = infoword_value elttype features in
- let modesuf = mode_suffix elttype shape in
- let pdecls, paramlist = params ctype in
- let rdecls, stmts =
- match get_shuffle features with
- Some shuffle -> shuffle_fn shuffle shape ctype elttype
- | None ->
- let paramlist' = modify_params features paramlist in
- let paramlist'' = extra_word shape features paramlist' bits in
- let parstr = String.concat ", " paramlist'' in
- let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)"
- (builtin_name features name) modesuf parstr in
- return ctype builtin in
- let body = pdecls @ rdecls @ stmts
- and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
- begin
- print_feature_test_start features;
- print_function ctype fnname body;
- print_feature_test_end features;
- end
-
-(* When this function processes the element types in the ops table, it rewrites
- them in a list of tuples (a,b,c):
- a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8)
- b : Asm type : a single, processed element type, e.g. P16. This is the
- type which should be attached to the asm opcode.
- c : Variant type : the unprocessed type for this variant (e.g. in add
- instructions which don't care about the sign, b might be i16 and c
- might be s16.)
-*)
-
-let print_op (opcode, features, shape, name, munge, types) =
- let sorted_types = List.sort compare types in
- let munged_types = List.map
- (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in
- List.iter
- (fun variant -> print_variant opcode features shape name variant)
- munged_types
-
-let print_ops ops =
- List.iter print_op ops
-
-(* Output type definitions. Table entries are:
- cbase : "C" name for the type.
- abase : "ARM" base name for the type (i.e. int in int8x8_t).
- esize : element size.
- enum : element count.
- alevel: architecture level at which available.
-*)
-
-type fpulevel = CRYPTO | ALL
-
-let deftypes () =
- let typeinfo = [
- (* Doubleword vector types. *)
- "__builtin_neon_qi", "int", 8, 8, ALL;
- "__builtin_neon_hi", "int", 16, 4, ALL;
- "__builtin_neon_si", "int", 32, 2, ALL;
- "__builtin_neon_di", "int", 64, 1, ALL;
- "__builtin_neon_hf", "float", 16, 4, ALL;
- "__builtin_neon_sf", "float", 32, 2, ALL;
- "__builtin_neon_poly8", "poly", 8, 8, ALL;
- "__builtin_neon_poly16", "poly", 16, 4, ALL;
- "__builtin_neon_poly64", "poly", 64, 1, CRYPTO;
- "__builtin_neon_uqi", "uint", 8, 8, ALL;
- "__builtin_neon_uhi", "uint", 16, 4, ALL;
- "__builtin_neon_usi", "uint", 32, 2, ALL;
- "__builtin_neon_udi", "uint", 64, 1, ALL;
-
- (* Quadword vector types. *)
- "__builtin_neon_qi", "int", 8, 16, ALL;
- "__builtin_neon_hi", "int", 16, 8, ALL;
- "__builtin_neon_si", "int", 32, 4, ALL;
- "__builtin_neon_di", "int", 64, 2, ALL;
- "__builtin_neon_sf", "float", 32, 4, ALL;
- "__builtin_neon_poly8", "poly", 8, 16, ALL;
- "__builtin_neon_poly16", "poly", 16, 8, ALL;
- "__builtin_neon_poly64", "poly", 64, 2, CRYPTO;
- "__builtin_neon_uqi", "uint", 8, 16, ALL;
- "__builtin_neon_uhi", "uint", 16, 8, ALL;
- "__builtin_neon_usi", "uint", 32, 4, ALL;
- "__builtin_neon_udi", "uint", 64, 2, ALL
- ] in
- List.iter
- (fun (cbase, abase, esize, enum, fpulevel) ->
- let attr =
- match enum with
- 1 -> ""
- | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))"
- (esize * enum / 8) in
- if fpulevel == CRYPTO then
- Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n";
- Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr;
- if fpulevel == CRYPTO then
- Format.printf "#endif\n";)
- typeinfo;
- Format.print_newline ();
- (* Extra types not in <stdint.h>. *)
- Format.printf "typedef float float32_t;\n";
- Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
- Format.printf "typedef __builtin_neon_poly16 poly16_t;\n";
- Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n";
- Format.printf "typedef __builtin_neon_poly64 poly64_t;\n";
- Format.printf "typedef __builtin_neon_poly128 poly128_t;\n";
- Format.printf "#endif\n"
-
-(* Output structs containing arrays, for load & store instructions etc.
- poly128_t is deliberately not included here because it has no array types
- defined for it. *)
-
-let arrtypes () =
- let typeinfo = [
- "int", 8, ALL; "int", 16, ALL;
- "int", 32, ALL; "int", 64, ALL;
- "uint", 8, ALL; "uint", 16, ALL;
- "uint", 32, ALL; "uint", 64, ALL;
- "float", 32, ALL; "poly", 8, ALL;
- "poly", 16, ALL; "poly", 64, CRYPTO
- ] in
- let writestruct elname elsize regsize arrsize fpulevel =
- let elnum = regsize / elsize in
- let structname =
- Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in
- let sfmt = start_function () in
- Format.printf "%stypedef struct %s"
- (if fpulevel == CRYPTO then "#ifdef __ARM_FEATURE_CRYPTO\n" else "") structname;
- open_braceblock sfmt;
- Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize;
- close_braceblock sfmt;
- Format.printf " %s;%s" structname (if fpulevel == CRYPTO then "\n#endif\n" else "");
- end_function sfmt;
- in
- for n = 2 to 4 do
- List.iter
- (fun (elname, elsize, alevel) ->
- writestruct elname elsize 64 n alevel;
- writestruct elname elsize 128 n alevel)
- typeinfo
- done
-
-let print_lines = List.iter (fun s -> Format.printf "%s@\n" s)
-
-(* Do it. *)
-
-let _ =
- print_lines [
-"/* ARM NEON intrinsics include file. This file is generated automatically";
-" using neon-gen.ml. Please do not edit manually.";
-"";
-" Copyright (C) 2006-2014 Free Software Foundation, Inc.";
-" Contributed by CodeSourcery.";
-"";
-" This file is part of GCC.";
-"";
-" GCC is free software; you can redistribute it and/or modify it";
-" under the terms of the GNU General Public License as published";
-" by the Free Software Foundation; either version 3, or (at your";
-" option) any later version.";
-"";
-" GCC is distributed in the hope that it will be useful, but WITHOUT";
-" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
-" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public";
-" License for more details.";
-"";
-" Under Section 7 of GPL version 3, you are granted additional";
-" permissions described in the GCC Runtime Library Exception, version";
-" 3.1, as published by the Free Software Foundation.";
-"";
-" You should have received a copy of the GNU General Public License and";
-" a copy of the GCC Runtime Library Exception along with this program;";
-" see the files COPYING3 and COPYING.RUNTIME respectively. If not, see";
-" <http://www.gnu.org/licenses/>. */";
-"";
-"#ifndef _GCC_ARM_NEON_H";
-"#define _GCC_ARM_NEON_H 1";
-"";
-"#ifndef __ARM_NEON__";
-"#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h";
-"#else";
-"";
-"#ifdef __cplusplus";
-"extern \"C\" {";
-"#endif";
-"";
-"#include <stdint.h>";
-""];
- deftypes ();
- arrtypes ();
- Format.print_newline ();
- print_ops ops;
- Format.print_newline ();
- print_ops reinterp;
- print_ops reinterpq;
- Format.printf "%s" crypto_intrinsics;
- print_lines [
-"#ifdef __cplusplus";
-"}";
-"#endif";
-"#endif";
-"#endif"]
diff --git a/gcc-4.9/gcc/config/arm/netbsd-elf.h b/gcc-4.9/gcc/config/arm/netbsd-elf.h
index 9deda9679..645551fdf 100644
--- a/gcc-4.9/gcc/config/arm/netbsd-elf.h
+++ b/gcc-4.9/gcc/config/arm/netbsd-elf.h
@@ -14,8 +14,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Run-time Target Specification. */
diff --git a/gcc-4.9/gcc/config/arm/uclinux-eabi.h b/gcc-4.9/gcc/config/arm/uclinux-eabi.h
index b5055ce40..4d57d1388 100644
--- a/gcc-4.9/gcc/config/arm/uclinux-eabi.h
+++ b/gcc-4.9/gcc/config/arm/uclinux-eabi.h
@@ -14,8 +14,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Override settings that are different to the uclinux-elf or
diff --git a/gcc-4.9/gcc/config/arm/uclinux-elf.h b/gcc-4.9/gcc/config/arm/uclinux-elf.h
index 5cd4fe527..43edba70c 100644
--- a/gcc-4.9/gcc/config/arm/uclinux-elf.h
+++ b/gcc-4.9/gcc/config/arm/uclinux-elf.h
@@ -14,8 +14,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* We don't want a PLT. */
diff --git a/gcc-4.9/gcc/config/arm/unspecs.md b/gcc-4.9/gcc/config/arm/unspecs.md
index 87edf18e2..db3fea5f5 100644
--- a/gcc-4.9/gcc/config/arm/unspecs.md
+++ b/gcc-4.9/gcc/config/arm/unspecs.md
@@ -58,6 +58,7 @@
; instruction stream.
UNSPEC_PIC_OFFSET ; A symbolic 12-bit OFFSET that has been treated
; correctly for PIC usage.
+ UNSPEC_GOT_PREL_SYM ; Specify an R_ARM_GOT_PREL relocation of a symbol.
UNSPEC_GOTSYM_OFF ; The offset of the start of the GOT from a
; a given symbolic address.
UNSPEC_THUMB1_CASESI ; A Thumb1 compressed dispatch-table call.
@@ -70,6 +71,11 @@
; that.
UNSPEC_UNALIGNED_STORE ; Same for str/strh.
UNSPEC_PIC_UNIFIED ; Create a common pic addressing form.
+ UNSPEC_PROLOGUE_USE ; As USE insns are not meaningful after reload,
+ ; this unspec is used to prevent the deletion of
+ ; instructions setting registers for EH handling
+ ; and stack frame generation. Operand 0 is the
+ ; register to "use".
UNSPEC_LL ; Represent an unpaired load-register-exclusive.
UNSPEC_VRINTZ ; Represent a float to integral float rounding
; towards zero.
@@ -83,11 +89,12 @@
; FPSCR rounding mode and signal inexactness.
UNSPEC_VRINTA ; Represent a float to integral float rounding
; towards nearest, ties away from zero.
- UNSPEC_GOT_PREL_SYM ; Specify an R_ARM_GOT_PREL relocation of a symbol
])
(define_c_enum "unspec" [
UNSPEC_WADDC ; Used by the intrinsic form of the iWMMXt WADDC instruction.
+ UNSPEC_WMADDS ; Used by the intrinsic form of the iWMMXt WMADDS instruction.
+ UNSPEC_WMADDU ; Used by the intrinsic form of the iWMMXt WMADDU instruction.
UNSPEC_WABS ; Used by the intrinsic form of the iWMMXt WABS instruction.
UNSPEC_WQMULWMR ; Used by the intrinsic form of the iWMMXt WQMULWMR instruction.
UNSPEC_WQMULMR ; Used by the intrinsic form of the iWMMXt WQMULMR instruction.
diff --git a/gcc-4.9/gcc/config/arm/vxworks.h b/gcc-4.9/gcc/config/arm/vxworks.h
index 8bef16bc4..faf84724d 100644
--- a/gcc-4.9/gcc/config/arm/vxworks.h
+++ b/gcc-4.9/gcc/config/arm/vxworks.h
@@ -17,8 +17,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
diff --git a/gcc-4.9/gcc/config/avr/avr-fixed.md b/gcc-4.9/gcc/config/avr/avr-fixed.md
index 1652415b1..9c8489edd 100644
--- a/gcc-4.9/gcc/config/avr/avr-fixed.md
+++ b/gcc-4.9/gcc/config/avr/avr-fixed.md
@@ -430,8 +430,8 @@
}
// Input and output of the libgcc function
- const unsigned int regno_in[] = { -1, 22, 22, -1, 18 };
- const unsigned int regno_out[] = { -1, 24, 24, -1, 22 };
+ const unsigned int regno_in[] = { -1U, 22, 22, -1U, 18 };
+ const unsigned int regno_out[] = { -1U, 24, 24, -1U, 22 };
operands[3] = gen_rtx_REG (<MODE>mode, regno_out[(size_t) GET_MODE_SIZE (<MODE>mode)]);
operands[4] = gen_rtx_REG (<MODE>mode, regno_in[(size_t) GET_MODE_SIZE (<MODE>mode)]);
diff --git a/gcc-4.9/gcc/config/avr/avr.h b/gcc-4.9/gcc/config/avr/avr.h
index 78434ec5e..9d34983e4 100644
--- a/gcc-4.9/gcc/config/avr/avr.h
+++ b/gcc-4.9/gcc/config/avr/avr.h
@@ -251,18 +251,18 @@ enum reg_class {
#define REG_CLASS_CONTENTS { \
{0x00000000,0x00000000}, /* NO_REGS */ \
{0x00000001,0x00000000}, /* R0_REG */ \
- {3 << REG_X,0x00000000}, /* POINTER_X_REGS, r26 - r27 */ \
- {3 << REG_Y,0x00000000}, /* POINTER_Y_REGS, r28 - r29 */ \
- {3 << REG_Z,0x00000000}, /* POINTER_Z_REGS, r30 - r31 */ \
+ {3u << REG_X,0x00000000}, /* POINTER_X_REGS, r26 - r27 */ \
+ {3u << REG_Y,0x00000000}, /* POINTER_Y_REGS, r28 - r29 */ \
+ {3u << REG_Z,0x00000000}, /* POINTER_Z_REGS, r30 - r31 */ \
{0x00000000,0x00000003}, /* STACK_REG, STACK */ \
- {(3 << REG_Y) | (3 << REG_Z), \
+ {(3u << REG_Y) | (3u << REG_Z), \
0x00000000}, /* BASE_POINTER_REGS, r28 - r31 */ \
- {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z), \
+ {(3u << REG_X) | (3u << REG_Y) | (3u << REG_Z), \
0x00000000}, /* POINTER_REGS, r26 - r31 */ \
- {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z) | (3 << REG_W), \
+ {(3u << REG_X) | (3u << REG_Y) | (3u << REG_Z) | (3u << REG_W), \
0x00000000}, /* ADDW_REGS, r24 - r31 */ \
{0x00ff0000,0x00000000}, /* SIMPLE_LD_REGS r16 - r23 */ \
- {(3 << REG_X)|(3 << REG_Y)|(3 << REG_Z)|(3 << REG_W)|(0xff << 16), \
+ {(3u << REG_X)|(3u << REG_Y)|(3u << REG_Z)|(3u << REG_W)|(0xffu << 16),\
0x00000000}, /* LD_REGS, r16 - r31 */ \
{0x0000ffff,0x00000000}, /* NO_LD_REGS r0 - r15 */ \
{0xffffffff,0x00000000}, /* GENERAL_REGS, r0 - r31 */ \
diff --git a/gcc-4.9/gcc/config/avr/avr.md b/gcc-4.9/gcc/config/avr/avr.md
index 2c59bf3f9..3bb2a914a 100644
--- a/gcc-4.9/gcc/config/avr/avr.md
+++ b/gcc-4.9/gcc/config/avr/avr.md
@@ -368,6 +368,15 @@
""
{
int i;
+
+ // Avoid (subreg (mem)) for non-generic address spaces below. Because
+ // of the poor addressing capabilities of these spaces it's better to
+ // load them in one chunk. And it avoids PR61443.
+
+ if (MEM_P (operands[0])
+ && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[0])))
+ operands[0] = copy_to_mode_reg (<MODE>mode, operands[0]);
+
for (i = GET_MODE_SIZE (<MODE>mode) - 1; i >= 0; --i)
{
rtx part = simplify_gen_subreg (QImode, operands[0], <MODE>mode, i);
diff --git a/gcc-4.9/gcc/config/dbx.h b/gcc-4.9/gcc/config/dbx.h
index 1b68bcd9a..8173bb06c 100644
--- a/gcc-4.9/gcc/config/dbx.h
+++ b/gcc-4.9/gcc/config/dbx.h
@@ -13,8 +13,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file causes gcc to prefer using DBX (stabs) debugging
diff --git a/gcc-4.9/gcc/config/i386/driver-i386.c b/gcc-4.9/gcc/config/i386/driver-i386.c
index 1f5a11c9c..80f6a0879 100644
--- a/gcc-4.9/gcc/config/i386/driver-i386.c
+++ b/gcc-4.9/gcc/config/i386/driver-i386.c
@@ -739,6 +739,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* Assume Core 2. */
cpu = "core2";
}
+ else if (has_longmode)
+ /* Perhaps some emulator? Assume x86-64, otherwise gcc
+ -march=native would be unusable for 64-bit compilations,
+ as all the CPUs below are 32-bit only. */
+ cpu = "x86-64";
else if (has_sse3)
/* It is Core Duo. */
cpu = "pentium-m";
diff --git a/gcc-4.9/gcc/config/i386/gnu-user.h b/gcc-4.9/gcc/config/i386/gnu-user.h
index d9e3fa434..21b9e9692 100644
--- a/gcc-4.9/gcc/config/i386/gnu-user.h
+++ b/gcc-4.9/gcc/config/i386/gnu-user.h
@@ -70,10 +70,12 @@ along with GCC; see the file COPYING3. If not see
"--32 %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}} " \
LINUX_OR_ANDROID_CC ("", ANDROID_ASM_SPEC)
-#undef SUBTARGET_EXTRA_SPECS
-#define SUBTARGET_EXTRA_SPECS \
+#undef SUBTARGET_EXTRA_SPECS_STR
+#define SUBTARGET_EXTRA_SPECS_STR \
{ "link_emulation", GNU_USER_LINK_EMULATION },\
{ "dynamic_linker", GNU_USER_DYNAMIC_LINKER }
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS SUBTARGET_EXTRA_SPECS_STR
#define GNU_USER_TARGET_LINK_SPEC "-m %(link_emulation) %{shared:-shared} \
%{!shared: \
diff --git a/gcc-4.9/gcc/config/i386/i386-protos.h b/gcc-4.9/gcc/config/i386/i386-protos.h
index 6e3297880..fc0eb53f8 100644
--- a/gcc-4.9/gcc/config/i386/i386-protos.h
+++ b/gcc-4.9/gcc/config/i386/i386-protos.h
@@ -28,6 +28,16 @@ extern bool ix86_target_stack_probe (void);
extern bool ix86_can_use_return_insn_p (void);
extern void ix86_setup_frame_addresses (void);
+/* Section names for function patch prologue and epilogue section. See
+ ix86_output_function_nops_prologue_epilogue() in i386.c for details. */
+#define FUNCTION_PATCH_PROLOGUE_SECTION "_function_patch_prologue"
+#define FUNCTION_PATCH_EPILOGUE_SECTION "_function_patch_epilogue"
+
+extern bool ix86_output_function_nops_prologue_epilogue (FILE *,
+ const char *,
+ const char *,
+ int);
+
extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int);
extern void ix86_expand_prologue (void);
extern void ix86_maybe_emit_epilogue_vzeroupper (void);
@@ -312,6 +322,7 @@ extern enum attr_cpu ix86_schedule;
#endif
extern const char * ix86_output_call_insn (rtx insn, rtx call_op);
+extern bool adjacent_mem_locations (rtx mem1, rtx mem2);
#ifdef RTX_CODE
/* Target data for multipass lookahead scheduling.
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
index d7c592f48..df504335e 100644
--- a/gcc-4.9/gcc/config/i386/i386.c
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -78,6 +78,7 @@ along with GCC; see the file COPYING3. If not see
#include "diagnostic.h"
#include "dumpfile.h"
#include "tree-pass.h"
+#include "cfgloop.h"
#include "context.h"
#include "pass_manager.h"
#include "target-globals.h"
@@ -5017,8 +5018,11 @@ ix86_in_large_data_p (tree exp)
HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
/* If this is an incomplete type with size 0, then we can't put it
- in data because it might be too big when completed. */
- if (!size || size > ix86_section_threshold)
+ in data because it might be too big when completed. Also,
+ int_size_in_bytes returns -1 if size can vary or is larger than
+ an integer in which case also it is safer to assume that it goes in
+ large data. */
+ if (size <= 0 || size > ix86_section_threshold)
return true;
}
@@ -11730,6 +11734,246 @@ ix86_expand_epilogue (int style)
m->fs = frame_state_save;
}
+
+/* True if the current function should be patched with nops at prologue and
+ returns. */
+static bool patch_current_function_p = false;
+
+static inline bool
+has_attribute (const char* attribute_name)
+{
+ return lookup_attribute (attribute_name,
+ DECL_ATTRIBUTES (current_function_decl)) != NULL;
+}
+
+/* Return true if we patch the current function. By default a function
+ is patched if it has loops or if the number of insns is greater than
+ patch_functions_min_instructions (number of insns roughly translates
+ to number of instructions). */
+
+static bool
+check_should_patch_current_function (void)
+{
+ int num_insns = 0;
+ rtx insn;
+ const char *func_name = NULL;
+ struct loops *loops;
+ int num_loops = 0;
+ int min_functions_instructions;
+
+ /* If a function has an attribute forcing patching on or off, do as it
+ indicates. */
+ if (has_attribute ("always_patch_for_instrumentation"))
+ return true;
+ else if (has_attribute ("never_patch_for_instrumentation"))
+ return false;
+
+ /* Patch the function if it has at least a loop. */
+ if (!patch_functions_ignore_loops)
+ {
+ if (DECL_STRUCT_FUNCTION (current_function_decl)->cfg)
+ {
+ loops = flow_loops_find (NULL);
+ num_loops = loops->larray->length();
+ /* FIXME - Deallocating the loop causes a seg-fault. */
+#if 0
+ flow_loops_free (loops);
+#endif
+ /* We are not concerned with the function body as a loop. */
+ if (num_loops > 1)
+ return true;
+ }
+ }
+
+ /* Else, check if function has more than patch_functions_min_instrctions. */
+
+ /* Borrowed this code from rest_of_handle_final() in final.c. */
+ func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+ if (!patch_functions_dont_always_patch_main &&
+ func_name &&
+ strcmp("main", func_name) == 0)
+ return true;
+
+ min_functions_instructions =
+ PARAM_VALUE (PARAM_FUNCTION_PATCH_MIN_INSTRUCTIONS);
+ if (min_functions_instructions > 0)
+ {
+ /* Calculate the number of instructions in this function and only emit
+ function patch for instrumentation if it is greater than
+ patch_functions_min_instructions. */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (NONDEBUG_INSN_P (insn))
+ ++num_insns;
+ }
+ if (num_insns < min_functions_instructions)
+ return false;
+ }
+
+ return true;
+}
+
+/* Emit the 11-byte patch space for the function prologue for functions that
+ qualify. */
+
+static void
+ix86_output_function_prologue (FILE *file,
+ HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+ /* Only for 64-bit target. */
+ if (TARGET_64BIT && patch_functions_for_instrumentation)
+ {
+ patch_current_function_p = check_should_patch_current_function();
+ /* Emit the instruction 'jmp 09' followed by 9 bytes to make it 11-bytes
+ of nop. */
+ ix86_output_function_nops_prologue_epilogue (
+ file,
+ FUNCTION_PATCH_PROLOGUE_SECTION,
+ ASM_BYTE"0xeb,0x09",
+ 9);
+ }
+}
+
+/* Emit the nop bytes at function prologue or return (including tail call
+ jumps). The number of nop bytes generated is at least 8.
+ Also emits a section named SECTION_NAME, which is a backpointer section
+ holding the addresses of the nop bytes in the text section.
+ SECTION_NAME is either '_function_patch_prologue' or
+ '_function_patch_epilogue'. The backpointer section can be used to navigate
+ through all the function entry and exit points which are patched with nops.
+ PRE_INSTRUCTIONS are the instructions, if any, at the start of the nop byte
+ sequence. NUM_REMAINING_NOPS are the number of nop bytes to fill,
+ excluding the number of bytes in PRE_INSTRUCTIONS.
+ Returns true if the function was patched, false otherwise. */
+
+bool
+ix86_output_function_nops_prologue_epilogue (FILE *file,
+ const char *section_name,
+ const char *pre_instructions,
+ int num_remaining_nops)
+{
+ static int labelno = 0;
+ char label[32], section_label[32];
+ section *section = NULL;
+ int num_actual_nops = num_remaining_nops - sizeof(void *);
+ unsigned int section_flags = SECTION_RELRO;
+ char *section_name_comdat = NULL;
+ const char *decl_section_name = NULL;
+ const char *func_name = NULL;
+ char *section_name_function_sections = NULL;
+ size_t len;
+
+ gcc_assert (num_remaining_nops >= 0);
+
+ if (!patch_current_function_p)
+ return false;
+
+ ASM_GENERATE_INTERNAL_LABEL (label, "LFPEL", labelno);
+ ASM_GENERATE_INTERNAL_LABEL (section_label, "LFPESL", labelno++);
+
+ /* Align the start of nops to 2-byte boundary so that the 2-byte jump
+ instruction can be patched atomically at run time. */
+ ASM_OUTPUT_ALIGN (file, 1);
+
+ /* Emit nop bytes. They look like the following:
+ $LFPEL0:
+ <pre_instruction>
+ 0x90 (repeated num_actual_nops times)
+ .quad $LFPESL0 - .
+ followed by section 'section_name' which contains the address
+ of instruction at 'label'.
+ */
+ ASM_OUTPUT_INTERNAL_LABEL (file, label);
+ if (pre_instructions)
+ fprintf (file, "%s\n", pre_instructions);
+
+ while (num_actual_nops-- > 0)
+ asm_fprintf (file, ASM_BYTE"0x90\n");
+
+ fprintf (file, ASM_QUAD);
+ /* Output "section_label - ." for the relative address of the entry in
+ the section 'section_name'. */
+ assemble_name_raw (file, section_label);
+ fprintf (file, " - .");
+ fprintf (file, "\n");
+
+ /* Emit the backpointer section. For functions belonging to comdat group,
+ we emit a different section named '<section_name>.foo' where 'foo' is
+ the name of the comdat section. This section is later renamed to
+ '<section_name>' by ix86_elf_asm_named_section().
+ We emit a unique section name for the back pointer section for comdat
+ functions because otherwise the 'get_section' call may return an existing
+ non-comdat section with the same name, leading to references from
+ non-comdat section to comdat functions.
+ */
+ if (current_function_decl != NULL_TREE &&
+ DECL_ONE_ONLY (current_function_decl) &&
+ HAVE_COMDAT_GROUP)
+ {
+ decl_section_name =
+ TREE_STRING_POINTER (DECL_SECTION_NAME (current_function_decl));
+ len = strlen (decl_section_name) + strlen (section_name) + 2;
+ section_name_comdat = (char *) alloca (len);
+ sprintf (section_name_comdat, "%s.%s", section_name, decl_section_name);
+ section_name = section_name_comdat;
+ section_flags |= SECTION_LINKONCE;
+ }
+ else if (flag_function_sections)
+ {
+ func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+ if (func_name)
+ {
+ len = strlen (func_name) + strlen (section_name) + 2;
+ section_name_function_sections = (char *) alloca (len);
+ sprintf (section_name_function_sections, "%s.%s", section_name,
+ func_name);
+ section_name = section_name_function_sections;
+ }
+ }
+ section = get_section (section_name, section_flags, current_function_decl);
+ switch_to_section (section);
+ /* Align the section to 8-byte boundary. */
+ ASM_OUTPUT_ALIGN (file, 3);
+
+ /* Emit address of the start of nop bytes in the section:
+ $LFPESP0:
+ .quad $LFPEL0
+ */
+ ASM_OUTPUT_INTERNAL_LABEL (file, section_label);
+ fprintf(file, ASM_QUAD);
+ assemble_name_raw (file, label);
+ fprintf (file, "\n");
+
+ /* Switching back to text section. */
+ switch_to_section (function_section (current_function_decl));
+ return true;
+}
+
+/* Strips the characters after '_function_patch_prologue' or
+ '_function_patch_epilogue' and emits the section. */
+
+static void
+ix86_elf_asm_named_section (const char *name, unsigned int flags,
+ tree decl)
+{
+ const char *section_name = name;
+ if (!flag_function_sections && HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
+ {
+ const int prologue_section_name_length =
+ sizeof(FUNCTION_PATCH_PROLOGUE_SECTION) - 1;
+ const int epilogue_section_name_length =
+ sizeof(FUNCTION_PATCH_EPILOGUE_SECTION) - 1;
+
+ if (strncmp (name, FUNCTION_PATCH_PROLOGUE_SECTION,
+ prologue_section_name_length) == 0)
+ section_name = FUNCTION_PATCH_PROLOGUE_SECTION;
+ else if (strncmp (name, FUNCTION_PATCH_EPILOGUE_SECTION,
+ epilogue_section_name_length) == 0)
+ section_name = FUNCTION_PATCH_EPILOGUE_SECTION;
+ }
+ default_elf_asm_named_section (section_name, flags, decl);
+}
+
/* Reset from the function's potential modifications. */
static void
@@ -12659,7 +12903,9 @@ legitimate_pic_address_disp_p (rtx disp)
return true;
}
else if (!SYMBOL_REF_FAR_ADDR_P (op0)
- && SYMBOL_REF_LOCAL_P (op0)
+ && (SYMBOL_REF_LOCAL_P (op0)
+ || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie
+ && !SYMBOL_REF_FUNCTION_P (op0)))
&& ix86_cmodel != CM_LARGE_PIC)
return true;
break;
@@ -21507,7 +21753,7 @@ ix86_expand_vec_perm (rtx operands[])
t1 = gen_reg_rtx (V32QImode);
t2 = gen_reg_rtx (V32QImode);
t3 = gen_reg_rtx (V32QImode);
- vt2 = GEN_INT (128);
+ vt2 = GEN_INT (-128);
for (i = 0; i < 32; i++)
vec[i] = vt2;
vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
@@ -23794,7 +24040,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
{
const struct stringop_algs * algs;
bool optimize_for_speed;
- int max = -1;
+ int max = 0;
const struct processor_costs *cost;
int i;
bool any_alg_usable_p = false;
@@ -23832,7 +24078,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
/* If expected size is not known but max size is small enough
so inline version is a win, set expected size into
the range. */
- if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
+ if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
&& expected_size == -1)
expected_size = min_size / 2 + max_size / 2;
@@ -23921,7 +24167,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
*dynamic_check = 128;
return loop_1_byte;
}
- if (max == -1)
+ if (max <= 0)
max = 4096;
alg = decide_alg (count, max / 2, min_size, max_size, memset,
zero_memset, dynamic_check, noalign);
@@ -24945,6 +25191,15 @@ ix86_output_call_insn (rtx insn, rtx call_op)
else
xasm = "jmp\t%A0";
+ /* Just before the sibling call, add 11-bytes of nops to patch function
+ exit: 2 bytes for 'jmp 09' and remaining 9 bytes. */
+ if (TARGET_64BIT && patch_functions_for_instrumentation)
+ ix86_output_function_nops_prologue_epilogue (
+ asm_out_file,
+ FUNCTION_PATCH_EPILOGUE_SECTION,
+ ASM_BYTE"0xeb, 0x09",
+ 9);
+
output_asm_insn (xasm, &call_op);
return "";
}
@@ -26238,13 +26493,17 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail)
{
edge e;
edge_iterator ei;
- /* Assume that region is SCC, i.e. all immediate predecessors
- of non-head block are in the same region. */
+
+ /* Regions are SCCs with the exception of selective
+ scheduling with pipelining of outer blocks enabled.
+ So also check that immediate predecessors of a non-head
+ block are in the same region. */
FOR_EACH_EDGE (e, ei, bb->preds)
{
/* Avoid creating of loop-carried dependencies through
- using topological odering in region. */
- if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
+ using topological ordering in the region. */
+ if (rgn == CONTAINING_RGN (e->src->index)
+ && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
add_dependee_for_func_arg (first_arg, e->src);
}
}
@@ -28789,7 +29048,8 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
ix86_builtins_isa[(int) code].isa = mask;
mask &= ~OPTION_MASK_ISA_64BIT;
- if (mask == 0
+ if (flag_dyn_ipa
+ || mask == 0
|| (mask & ix86_isa_flags) != 0
|| (lang_hooks.builtin_function
== lang_hooks.builtin_function_ext_scope))
@@ -37802,10 +38062,10 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
*total = 2;
else if (flag_pic && SYMBOLIC_CONST (x)
- && (!TARGET_64BIT
- || (!GET_CODE (x) != LABEL_REF
- && (GET_CODE (x) != SYMBOL_REF
- || !SYMBOL_REF_LOCAL_P (x)))))
+ && !(TARGET_64BIT
+ && (GET_CODE (x) == LABEL_REF
+ || (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_LOCAL_P (x)))))
*total = 1;
else
*total = 0;
@@ -46745,6 +47005,70 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
atomic_feraiseexcept_call);
}
+/* Try to determine BASE/OFFSET/SIZE parts of the given MEM.
+ Return true if successful, false if all the values couldn't
+ be determined.
+
+ This function only looks for REG/SYMBOL or REG/SYMBOL+CONST
+ address forms. */
+
+static bool
+get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
+ HOST_WIDE_INT *size)
+{
+ rtx addr_rtx;
+ if MEM_SIZE_KNOWN_P (mem)
+ *size = MEM_SIZE (mem);
+ else
+ return false;
+
+ if (GET_CODE (XEXP (mem, 0)) == CONST)
+ addr_rtx = XEXP (XEXP (mem, 0), 0);
+ else
+ addr_rtx = (XEXP (mem, 0));
+
+ if (GET_CODE (addr_rtx) == REG
+ || GET_CODE (addr_rtx) == SYMBOL_REF)
+ {
+ *base = addr_rtx;
+ *offset = 0;
+ }
+ else if (GET_CODE (addr_rtx) == PLUS
+ && CONST_INT_P (XEXP (addr_rtx, 1)))
+ {
+ *base = XEXP (addr_rtx, 0);
+ *offset = INTVAL (XEXP (addr_rtx, 1));
+ }
+ else
+ return false;
+
+ return true;
+}
+
+/* If MEM1 is adjacent to MEM2 and MEM1 has lower address,
+ return true. */
+
+extern bool
+adjacent_mem_locations (rtx mem1, rtx mem2)
+{
+ rtx base1, base2;
+ HOST_WIDE_INT off1, size1, off2, size2;
+
+ if (get_memref_parts (mem1, &base1, &off1, &size1)
+ && get_memref_parts (mem2, &base2, &off2, &size2))
+ {
+ if (GET_CODE (base1) == SYMBOL_REF
+ && GET_CODE (base2) == SYMBOL_REF
+ && SYMBOL_REF_DECL (base1) == SYMBOL_REF_DECL (base2))
+ return (off1 + size1 == off2);
+ else if (REG_P (base1)
+ && REG_P (base2)
+ && REGNO (base1) == REGNO (base2))
+ return (off1 + size1 == off2);
+ }
+ return false;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
@@ -46787,9 +47111,15 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
+#undef TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE ix86_output_function_prologue
+
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION ix86_elf_asm_named_section
+
#undef TARGET_ENCODE_SECTION_INFO
#ifndef SUBTARGET_ENCODE_SECTION_INFO
#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md
index 9f103cf30..058702904 100644
--- a/gcc-4.9/gcc/config/i386/i386.md
+++ b/gcc-4.9/gcc/config/i386/i386.md
@@ -3201,7 +3201,7 @@
(const_string "1")
(const_string "*")))
(set (attr "mode")
- (cond [(eq_attr "alternative" "3,4,9,10,13,14,15")
+ (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15")
(const_string "SI")
(eq_attr "alternative" "11")
(const_string "DI")
@@ -4933,66 +4933,37 @@
;; Avoid store forwarding (partial memory) stall penalty by extending
;; SImode value to DImode through XMM register instead of pushing two
-;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC
-;; targets benefit from this optimization. Also note that fild
-;; loads from memory only.
+;; SImode values to stack. Also note that fild loads from memory only.
-(define_insn "*floatunssi<mode>2_1"
- [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+(define_insn_and_split "*floatunssi<mode>2_i387_with_xmm"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f")
(unsigned_float:X87MODEF
- (match_operand:SI 1 "nonimmediate_operand" "x,m")))
- (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
- (clobber (match_scratch:SI 3 "=X,x"))]
+ (match_operand:SI 1 "nonimmediate_operand" "rm")))
+ (clobber (match_scratch:DI 3 "=x"))
+ (clobber (match_operand:DI 2 "memory_operand" "=m"))]
"!TARGET_64BIT
&& TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
- && TARGET_SSE"
+ && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
"#"
- [(set_attr "type" "multi")
- (set_attr "mode" "<MODE>")])
-
-(define_split
- [(set (match_operand:X87MODEF 0 "register_operand")
- (unsigned_float:X87MODEF
- (match_operand:SI 1 "register_operand")))
- (clobber (match_operand:DI 2 "memory_operand"))
- (clobber (match_scratch:SI 3))]
- "!TARGET_64BIT
- && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
- && TARGET_SSE
- && reload_completed"
- [(set (match_dup 2) (match_dup 1))
- (set (match_dup 0)
- (float:X87MODEF (match_dup 2)))]
- "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
-
-(define_split
- [(set (match_operand:X87MODEF 0 "register_operand")
- (unsigned_float:X87MODEF
- (match_operand:SI 1 "memory_operand")))
- (clobber (match_operand:DI 2 "memory_operand"))
- (clobber (match_scratch:SI 3))]
- "!TARGET_64BIT
- && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
- && TARGET_SSE
- && reload_completed"
- [(set (match_dup 2) (match_dup 3))
+ "&& reload_completed"
+ [(set (match_dup 3) (zero_extend:DI (match_dup 1)))
+ (set (match_dup 2) (match_dup 3))
(set (match_dup 0)
(float:X87MODEF (match_dup 2)))]
-{
- emit_move_insn (operands[3], operands[1]);
- operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
-})
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<MODE>")])
(define_expand "floatunssi<mode>2"
[(parallel
[(set (match_operand:X87MODEF 0 "register_operand")
(unsigned_float:X87MODEF
(match_operand:SI 1 "nonimmediate_operand")))
- (clobber (match_dup 2))
- (clobber (match_scratch:SI 3))])]
+ (clobber (match_scratch:DI 3))
+ (clobber (match_dup 2))])]
"!TARGET_64BIT
&& ((TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
- && TARGET_SSE)
+ && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
@@ -9627,7 +9598,7 @@
(define_insn "x86_64_shrd"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
- (ior:DI (ashiftrt:DI (match_dup 0)
+ (ior:DI (lshiftrt:DI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand" "Jc"))
(ashift:DI (match_operand:DI 1 "register_operand" "r")
(minus:QI (const_int 64) (match_dup 2)))))
@@ -9643,7 +9614,7 @@
(define_insn "x86_shrd"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
- (ior:SI (ashiftrt:SI (match_dup 0)
+ (ior:SI (lshiftrt:SI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand" "Ic"))
(ashift:SI (match_operand:SI 1 "register_operand" "r")
(minus:QI (const_int 32) (match_dup 2)))))
@@ -10095,13 +10066,13 @@
[(set (match_dup 3) (match_dup 4))
(parallel
[(set (match_dup 4)
- (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2))
+ (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
(ashift:DWIH (match_dup 5)
(minus:QI (match_dup 6) (match_dup 2)))))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 5)
- (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2))
+ (ior:DWIH (lshiftrt:DWIH (match_dup 5) (match_dup 2))
(ashift:DWIH (match_dup 3)
(minus:QI (match_dup 6) (match_dup 2)))))
(clobber (reg:CC FLAGS_REG))])]
@@ -11611,7 +11582,18 @@
(define_insn "simple_return_internal"
[(simple_return)]
"reload_completed"
- "ret"
+{
+ if (TARGET_64BIT && patch_functions_for_instrumentation)
+ {
+ /* Emit 10 nop bytes after ret. */
+ if (ix86_output_function_nops_prologue_epilogue (asm_out_file,
+ FUNCTION_PATCH_EPILOGUE_SECTION,
+ "\tret",
+ 10))
+ return "";
+ }
+ return "ret";
+}
[(set_attr "length" "1")
(set_attr "atom_unit" "jeu")
(set_attr "length_immediate" "0")
@@ -11624,7 +11606,18 @@
[(simple_return)
(unspec [(const_int 0)] UNSPEC_REP)]
"reload_completed"
- "rep%; ret"
+{
+ if (TARGET_64BIT && patch_functions_for_instrumentation)
+ {
+ /* Emit 9 nop bytes after rep;ret. */
+ if (ix86_output_function_nops_prologue_epilogue (asm_out_file,
+ FUNCTION_PATCH_EPILOGUE_SECTION,
+ "\trep\;ret",
+ 9))
+ return "";
+ }
+ return "rep\;ret";
+}
[(set_attr "length" "2")
(set_attr "atom_unit" "jeu")
(set_attr "length_immediate" "0")
diff --git a/gcc-4.9/gcc/config/i386/i386.opt b/gcc-4.9/gcc/config/i386/i386.opt
index 0f463a238..1e00b660e 100644
--- a/gcc-4.9/gcc/config/i386/i386.opt
+++ b/gcc-4.9/gcc/config/i386/i386.opt
@@ -108,6 +108,10 @@ int x_ix86_dump_tunes
TargetSave
int x_ix86_force_align_arg_pointer
+;; -mcopyrelocs=
+TargetSave
+int x_ix86_copyrelocs
+
;; -mforce-drap=
TargetSave
int x_ix86_force_drap
@@ -291,6 +295,10 @@ mfancy-math-387
Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
Generate sin, cos, sqrt for FPU
+mcopyrelocs
+Target Report Var(ix86_pie_copyrelocs) Init(0)
+Assume copy relocations support for pie builds.
+
mforce-drap
Target Report Var(ix86_force_drap)
Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack
@@ -781,6 +789,18 @@ mrtm
Target Report Mask(ISA_RTM) Var(ix86_isa_flags) Save
Support RTM built-in functions and code generation
+mpatch-functions-for-instrumentation
+Target RejectNegative Report Var(patch_functions_for_instrumentation) Save
+Patch function prologue and epilogue with custom NOPs for dynamic instrumentation. By default, functions with loops (controlled by -mpatch-functions-without-loop) or functions having instructions more than -mpatch-functions-min-instructions are patched.
+
+mpatch-functions-ignore-loops
+Target RejectNegative Report Var(patch_functions_ignore_loops) Save
+Ignore loops when deciding whether to patch a function for instrumentation (for use with -mpatch-functions-for-instrumentation).
+
+mno-patch-functions-main-always
+Target Report RejectNegative Var(patch_functions_dont_always_patch_main) Save
+Treat 'main' as any other function and only patch it if it meets the criteria for loops and minimum number of instructions (for use with -mpatch-functions-for-instrumentation).
+
mstack-protector-guard=
Target RejectNegative Joined Enum(stack_protector_guard) Var(ix86_stack_protector_guard) Init(SSP_TLS)
Use given stack-protector guard
diff --git a/gcc-4.9/gcc/config/i386/linux.h b/gcc-4.9/gcc/config/i386/linux.h
index 1fb1e0321..27d68b5db 100644
--- a/gcc-4.9/gcc/config/i386/linux.h
+++ b/gcc-4.9/gcc/config/i386/linux.h
@@ -20,4 +20,22 @@ along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#define GNU_USER_LINK_EMULATION "elf_i386"
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
+
+/* These may be provided by config/linux-grtev*.h. */
+#ifndef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS
+#endif
+
+#undef SUBTARGET_EXTRA_SPECS
+#ifndef SUBTARGET_EXTRA_SPECS_STR
+#define SUBTARGET_EXTRA_SPECS \
+ LINUX_GRTE_EXTRA_SPECS
+#else
+#define SUBTARGET_EXTRA_SPECS \
+ LINUX_GRTE_EXTRA_SPECS \
+ SUBTARGET_EXTRA_SPECS_STR
+#endif
diff --git a/gcc-4.9/gcc/config/i386/linux64.h b/gcc-4.9/gcc/config/i386/linux64.h
index a90171e8c..5124a341b 100644
--- a/gcc-4.9/gcc/config/i386/linux64.h
+++ b/gcc-4.9/gcc/config/i386/linux64.h
@@ -27,6 +27,19 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define GNU_USER_LINK_EMULATION64 "elf_x86_64"
#define GNU_USER_LINK_EMULATIONX32 "elf32_x86_64"
-#define GLIBC_DYNAMIC_LINKER32 "/lib/ld-linux.so.2"
-#define GLIBC_DYNAMIC_LINKER64 "/lib64/ld-linux-x86-64.so.2"
-#define GLIBC_DYNAMIC_LINKERX32 "/libx32/ld-linux-x32.so.2"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2"
+#define GLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/libx32/ld-linux-x32.so.2"
+
+/* These may be provided by config/linux-grtev*.h. */
+#ifndef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS
+#endif
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ LINUX_GRTE_EXTRA_SPECS
+
diff --git a/gcc-4.9/gcc/config/i386/sse.md b/gcc-4.9/gcc/config/i386/sse.md
index 72a4d6d07..27ade1964 100644
--- a/gcc-4.9/gcc/config/i386/sse.md
+++ b/gcc-4.9/gcc/config/i386/sse.md
@@ -8255,6 +8255,36 @@
DONE;
})
+(define_expand "usadv16qi"
+ [(match_operand:V4SI 0 "register_operand")
+ (match_operand:V16QI 1 "register_operand")
+ (match_operand:V16QI 2 "nonimmediate_operand")
+ (match_operand:V4SI 3 "nonimmediate_operand")]
+ "TARGET_SSE2"
+{
+ rtx t1 = gen_reg_rtx (V2DImode);
+ rtx t2 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
+ convert_move (t2, t1, 0);
+ emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
+ DONE;
+})
+
+(define_expand "usadv32qi"
+ [(match_operand:V8SI 0 "register_operand")
+ (match_operand:V32QI 1 "register_operand")
+ (match_operand:V32QI 2 "nonimmediate_operand")
+ (match_operand:V8SI 3 "nonimmediate_operand")]
+ "TARGET_AVX2"
+{
+ rtx t1 = gen_reg_rtx (V4DImode);
+ rtx t2 = gen_reg_rtx (V8SImode);
+ emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
+ convert_move (t2, t1, 0);
+ emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
+ DONE;
+})
+
(define_insn "ashr<mode>3"
[(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
(ashiftrt:VI24_AVX2
@@ -15606,3 +15636,37 @@
[(set_attr "type" "sselog1")
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
+
+;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+;; is true.
+(define_peephole2
+ [(set (match_operand:DF 0 "register_operand")
+ (match_operand:DF 1 "memory_operand"))
+ (set (match_operand:V2DF 2 "register_operand")
+ (vec_concat:V2DF (match_dup 0)
+ (match_operand:DF 3 "memory_operand")))]
+ "TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+ && REGNO (operands[0]) == REGNO (operands[2])
+ && adjacent_mem_locations (operands[1], operands[3])"
+ [(set (match_dup 2)
+ (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
+{
+ operands[4] = gen_rtx_MEM (V2DFmode, XEXP(operands[1], 0));
+})
+
+;; merge movsd/movhpd to movupd when TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+;; is true.
+(define_peephole2
+ [(set (match_operand:DF 0 "memory_operand")
+ (vec_select:DF (match_operand:V2DF 1 "register_operand")
+ (parallel [(const_int 0)])))
+ (set (match_operand:DF 2 "memory_operand")
+ (vec_select:DF (match_dup 1)
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+ && adjacent_mem_locations (operands[0], operands[2])"
+ [(set (match_dup 3)
+ (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
+{
+ operands[3] = gen_rtx_MEM (V2DFmode, XEXP(operands[0], 0));
+})
diff --git a/gcc-4.9/gcc/config/initfini-array.h b/gcc-4.9/gcc/config/initfini-array.h
index f7ae836e6..67e66f6f2 100644
--- a/gcc-4.9/gcc/config/initfini-array.h
+++ b/gcc-4.9/gcc/config/initfini-array.h
@@ -14,8 +14,13 @@
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifdef HAVE_INITFINI_ARRAY_SUPPORT
diff --git a/gcc-4.9/gcc/config/linux-grte.h b/gcc-4.9/gcc/config/linux-grte.h
new file mode 100644
index 000000000..31e8a94ce
--- /dev/null
+++ b/gcc-4.9/gcc/config/linux-grte.h
@@ -0,0 +1,41 @@
+/* Definitions for Linux-based GRTE (Google RunTime Environment).
+ Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc.
+ Contributed by Chris Demetriou and Ollie Wild.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Overrides LIB_SPEC from gnu-user.h. */
+#undef LIB_SPEC
+#define LIB_SPEC \
+ "%{pthread:-lpthread} \
+ %{shared:-lc} \
+ %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}"
+
+/* When GRTE links statically, it needs its NSS and resolver libraries
+ linked in as well. Note that when linking statically, these are
+ enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */
+#undef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS \
+ { "libc", "%{static:%(libc_static);:-lc}" }, \
+ { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \
+ { "libc_static", "-lc -lresolv" }, \
+ { "libc_p_static", "-lc_p -lresolv_p" },
diff --git a/gcc-4.9/gcc/config/linux.c b/gcc-4.9/gcc/config/linux.c
index 6162675d8..2f1cd8e63 100644
--- a/gcc-4.9/gcc/config/linux.c
+++ b/gcc-4.9/gcc/config/linux.c
@@ -38,7 +38,9 @@ linux_libc_has_function (enum function_class fn_class)
return true;
if (OPTION_BIONIC)
if (fn_class == function_c94
- || fn_class == function_c99_misc)
+ || fn_class == function_c99_misc
+ || (fn_class == function_sincos && !TARGET_ANDROID)
+ )
return true;
return false;
diff --git a/gcc-4.9/gcc/config/linux.h b/gcc-4.9/gcc/config/linux.h
index d38ef81e3..7c2a8e2c0 100644
--- a/gcc-4.9/gcc/config/linux.h
+++ b/gcc-4.9/gcc/config/linux.h
@@ -73,13 +73,16 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
GLIBC_DYNAMIC_LINKER must be defined for each target using them, or
GLIBC_DYNAMIC_LINKER32 and GLIBC_DYNAMIC_LINKER64 for targets
supporting both 32-bit and 64-bit compilation. */
-#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
-#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
-#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
-#define UCLIBC_DYNAMIC_LINKERX32 "/lib/ldx32-uClibc.so.0"
-#define BIONIC_DYNAMIC_LINKER "/system/bin/linker"
-#define BIONIC_DYNAMIC_LINKER32 "/system/bin/linker"
-#define BIONIC_DYNAMIC_LINKER64 "/system/bin/linker64"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define UCLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib/ld64-uClibc.so.0"
+#define UCLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/lib/ldx32-uClibc.so.0"
+#define BIONIC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/system/bin/linker"
+#define BIONIC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/system/bin/linker"
+#define BIONIC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/system/bin/linker64"
#define BIONIC_DYNAMIC_LINKERX32 "/system/bin/linkerx32"
#define GNU_USER_DYNAMIC_LINKER \
diff --git a/gcc-4.9/gcc/config/msp430/msp430-opts.h b/gcc-4.9/gcc/config/msp430/msp430-opts.h
new file mode 100644
index 000000000..119cfcb7f
--- /dev/null
+++ b/gcc-4.9/gcc/config/msp430/msp430-opts.h
@@ -0,0 +1,32 @@
+/* GCC option-handling definitions for the TI MSP430
+ Copyright (C) 2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef MSP430_OPTS_H
+#define MSP430_OPTS_H
+
+enum msp430_hwmult_types
+{
+ NONE,
+ AUTO,
+ SMALL,
+ LARGE,
+ F5SERIES
+};
+
+#endif
diff --git a/gcc-4.9/gcc/config/msp430/msp430.c b/gcc-4.9/gcc/config/msp430/msp430.c
index c844aa2a1..9eb140974 100644
--- a/gcc-4.9/gcc/config/msp430/msp430.c
+++ b/gcc-4.9/gcc/config/msp430/msp430.c
@@ -730,6 +730,97 @@ msp430_get_raw_result_mode (int regno ATTRIBUTE_UNUSED)
{
return Pmode;
}
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR msp430_gimplify_va_arg_expr
+
+#include "gimplify.h"
+#include "gimple-expr.h"
+
+static tree
+msp430_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+ gimple_seq *post_p)
+{
+ tree addr, t, type_size, rounded_size, valist_tmp;
+ unsigned HOST_WIDE_INT align, boundary;
+ bool indirect;
+
+ indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+ if (indirect)
+ type = build_pointer_type (type);
+
+ align = PARM_BOUNDARY / BITS_PER_UNIT;
+ boundary = targetm.calls.function_arg_boundary (TYPE_MODE (type), type);
+
+ /* When we align parameter on stack for caller, if the parameter
+ alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
+ aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
+ here with caller. */
+ if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
+ boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+
+ boundary /= BITS_PER_UNIT;
+
+ /* Hoist the valist value into a temporary for the moment. */
+ valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
+
+ /* va_list pointer is aligned to PARM_BOUNDARY. If argument actually
+ requires greater alignment, we must perform dynamic alignment. */
+ if (boundary > align
+ && !integer_zerop (TYPE_SIZE (type)))
+ {
+ /* FIXME: This is where this function diverts from targhooks.c:
+ std_gimplify_va_arg_expr(). It works, but I do not know why... */
+ if (! POINTER_TYPE_P (type))
+ {
+ t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+ fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
+ gimplify_and_add (t, pre_p);
+
+ t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+ fold_build2 (BIT_AND_EXPR, TREE_TYPE (valist),
+ valist_tmp,
+ build_int_cst (TREE_TYPE (valist), -boundary)));
+ gimplify_and_add (t, pre_p);
+ }
+ }
+ else
+ boundary = align;
+
+ /* If the actual alignment is less than the alignment of the type,
+ adjust the type accordingly so that we don't assume strict alignment
+ when dereferencing the pointer. */
+ boundary *= BITS_PER_UNIT;
+ if (boundary < TYPE_ALIGN (type))
+ {
+ type = build_variant_type_copy (type);
+ TYPE_ALIGN (type) = boundary;
+ }
+
+ /* Compute the rounded size of the type. */
+ type_size = size_in_bytes (type);
+ rounded_size = round_up (type_size, align);
+
+ /* Reduce rounded_size so it's sharable with the postqueue. */
+ gimplify_expr (&rounded_size, pre_p, post_p, is_gimple_val, fb_rvalue);
+
+ /* Get AP. */
+ addr = valist_tmp;
+
+ /* Compute new value for AP. */
+ t = fold_build_pointer_plus (valist_tmp, rounded_size);
+ t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+ gimplify_and_add (t, pre_p);
+
+ addr = fold_convert (build_pointer_type (type), addr);
+
+ if (indirect)
+ addr = build_va_arg_indirect_ref (addr);
+
+ addr = build_va_arg_indirect_ref (addr);
+
+ return addr;
+}
/* Addressing Modes */
@@ -2185,8 +2276,32 @@ msp430_print_operand (FILE * file, rtx op, int letter)
msp430_print_operand_addr (file, addr);
break;
- case CONST_INT:
case CONST:
+ if (GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT)
+ {
+ op = XEXP (op, 0);
+ switch (INTVAL (XEXP (op, 2)))
+ {
+ case 0:
+ fprintf (file, "#lo (");
+ msp430_print_operand_raw (file, XEXP (op, 0));
+ fprintf (file, ")");
+ break;
+
+ case 16:
+ fprintf (file, "#hi (");
+ msp430_print_operand_raw (file, XEXP (op, 0));
+ fprintf (file, ")");
+ break;
+
+ default:
+ output_operand_lossage ("invalid zero extract");
+ break;
+ }
+ break;
+ }
+ /* Fall through. */
+ case CONST_INT:
case SYMBOL_REF:
case LABEL_REF:
if (letter == 0)
diff --git a/gcc-4.9/gcc/config/msp430/msp430.h b/gcc-4.9/gcc/config/msp430/msp430.h
index 65d6ad66d..044e558a3 100644
--- a/gcc-4.9/gcc/config/msp430/msp430.h
+++ b/gcc-4.9/gcc/config/msp430/msp430.h
@@ -55,8 +55,8 @@ extern bool msp430x;
"%{mcpu=*:-mcpu=%*}%{!mcpu=*:%{mmcu=*:-mmcu=%*}} " /* Pass the CPU type on to the assembler. */ \
"%{mrelax=-mQ} " /* Pass the relax option on to the assembler. */ \
"%{mlarge:-ml} " /* Tell the assembler if we are building for the LARGE pointer model. */ \
- "%{!msim:-md} %{msim:%{mlarge:-md}}" /* Copy data from ROM to RAM if necessary. */ \
- "%{ffunction-sections:-gdwarf-sections}" /* If function sections are being created then create DWARF line number sections as well. */
+ "%{!msim:-md} %{msim:%{mlarge:-md}} " /* Copy data from ROM to RAM if necessary. */ \
+ "%{ffunction-sections:-gdwarf-sections} " /* If function sections are being created then create DWARF line number sections as well. */
/* Enable linker section garbage collection by default, unless we
are creating a relocatable binary (gc does not work) or debugging
diff --git a/gcc-4.9/gcc/config/msp430/msp430.md b/gcc-4.9/gcc/config/msp430/msp430.md
index 74a98b480..5e890eced 100644
--- a/gcc-4.9/gcc/config/msp430/msp430.md
+++ b/gcc-4.9/gcc/config/msp430/msp430.md
@@ -362,8 +362,8 @@
; halves.
(define_split
[(set (match_operand:SI 0 "msp430_nonsubreg_operand")
- (plus:SI (match_operand:SI 1 "nonimmediate_operand")
- (match_operand:SI 2 "general_operand")))
+ (plus:SI (match_operand:SI 1 "msp430_nonsubreg_operand")
+ (match_operand:SI 2 "msp430_nonsubreg_or_imm_operand")))
]
""
[(parallel [(set (match_operand:HI 3 "nonimmediate_operand" "=&rm")
@@ -609,9 +609,15 @@
; when the PSI value is negative..
;
; Note: using PUSHM.A #1 is two bytes smaller than using PUSHX.A....
+;
+; Note: We use a + constraint on operand 0 as otherwise GCC gets confused
+; about extending a single PSI mode register into a pair of SImode registers
+; with the same starting register. It thinks that the upper register of
+; the pair is unused and so it can clobber it. Try compiling 20050826-2.c
+; at -O2 to see this.
(define_insn "zero_extendpsisi2"
- [(set (match_operand:SI 0 "register_operand" "=r")
+ [(set (match_operand:SI 0 "register_operand" "+r")
(zero_extend:SI (match_operand:PSI 1 "register_operand" "r")))]
""
"*
@@ -1311,9 +1317,9 @@
""
"*
if (REGNO (operands[0]) != REGNO (operands[1]))
- return \"MOV.W\t%1, %0 { SUB.W\t#0, %0 { AND.W\t%2, %0\";
+ return \"MOV.W\t%1, %0 { INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
else
- return \"SUB.W\t#0, %0 { AND.W\t%2, %0\";
+ return \"INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
"
)
@@ -1324,9 +1330,9 @@
"optimize > 2 && msp430_hwmult_type != NONE"
"*
if (msp430_use_f5_series_hwmult ())
- return \"PUSH.W sr { DINT { MOV.W %1, &0x04C2 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\";
+ return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x04C2 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\";
else
- return \"PUSH.W sr { DINT { MOV.W %1, &0x0132 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\";
+ return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x0132 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\";
"
)
@@ -1337,9 +1343,9 @@
"optimize > 2 && msp430_hwmult_type != NONE"
"*
if (msp430_use_f5_series_hwmult ())
- return \"PUSH.W sr { DINT { MOV.W %1, &0x04C0 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\";
+ return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x04C0 { MOV.W %2, &0x04C8 { MOV.W &0x04CA, %L0 { MOV.W &0x04CC, %H0 { POP.W sr\";
else
- return \"PUSH.W sr { DINT { MOV.W %1, &0x0130 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\";
+ return \"PUSH.W sr { DINT { NOP { MOV.W %1, &0x0130 { MOV.W %2, &0x0138 { MOV.W &0x013A, %L0 { MOV.W &0x013C, %H0 { POP.W sr\";
"
)
@@ -1350,9 +1356,9 @@
"optimize > 2 && msp430_hwmult_type != NONE"
"*
if (msp430_use_f5_series_hwmult ())
- return \"PUSH.W sr { DINT { MOV.W %L1, &0x04D4 { MOV.W %H1, &0x04D6 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\";
+ return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x04D4 { MOV.W %H1, &0x04D6 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\";
else
- return \"PUSH.W sr { DINT { MOV.W %L1, &0x0144 { MOV.W %H1, &0x0146 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
+ return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x0144 { MOV.W %H1, &0x0146 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
"
)
@@ -1363,8 +1369,8 @@
"optimize > 2 && msp430_hwmult_type != NONE"
"*
if (msp430_use_f5_series_hwmult ())
- return \"PUSH.W sr { DINT { MOV.W %L1, &0x04D0 { MOV.W %H1, &0x04D2 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\";
+ return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x04D0 { MOV.W %H1, &0x04D2 { MOV.W %L2, &0x04E0 { MOV.W %H2, &0x04E2 { MOV.W &0x04E4, %A0 { MOV.W &0x04E6, %B0 { MOV.W &0x04E8, %C0 { MOV.W &0x04EA, %D0 { POP.W sr\";
else
- return \"PUSH.W sr { DINT { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0142 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
+ return \"PUSH.W sr { DINT { NOP { MOV.W %L1, &0x0140 { MOV.W %H1, &0x0142 { MOV.W %L2, &0x0150 { MOV.W %H2, &0x0152 { MOV.W &0x0154, %A0 { MOV.W &0x0156, %B0 { MOV.W &0x0158, %C0 { MOV.W &0x015A, %D0 { POP.W sr\";
"
)
diff --git a/gcc-4.9/gcc/config/msp430/predicates.md b/gcc-4.9/gcc/config/msp430/predicates.md
index 9a8e2da0a..94a628cd5 100644
--- a/gcc-4.9/gcc/config/msp430/predicates.md
+++ b/gcc-4.9/gcc/config/msp430/predicates.md
@@ -73,6 +73,10 @@
(define_predicate "msp430_nonsubreg_operand"
(match_code "reg,mem"))
+(define_predicate "msp430_nonsubreg_or_imm_operand"
+ (ior (match_operand 0 "msp430_nonsubreg_operand")
+ (match_operand 0 "immediate_operand")))
+
; TRUE for constants which are bit positions for zero_extract
(define_predicate "msp430_bitpos"
(and (match_code "const_int")
diff --git a/gcc-4.9/gcc/config/newlib-stdint.h b/gcc-4.9/gcc/config/newlib-stdint.h
index f4a78a544..47445e42d 100644
--- a/gcc-4.9/gcc/config/newlib-stdint.h
+++ b/gcc-4.9/gcc/config/newlib-stdint.h
@@ -13,8 +13,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* newlib uses 32-bit long in certain cases for all non-SPU
diff --git a/gcc-4.9/gcc/config/rs6000/htm.md b/gcc-4.9/gcc/config/rs6000/htm.md
index 9dbb499ce..ca7f7fdf4 100644
--- a/gcc-4.9/gcc/config/rs6000/htm.md
+++ b/gcc-4.9/gcc/config/rs6000/htm.md
@@ -179,7 +179,7 @@
(const_int 0)]
UNSPECV_HTM_TABORTWCI))
(set (subreg:CC (match_dup 2) 0) (match_dup 1))
- (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 24)))
+ (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 28)))
(parallel [(set (match_operand:SI 0 "int_reg_operand" "")
(and:SI (match_dup 3) (const_int 15)))
(clobber (scratch:CC))])]
diff --git a/gcc-4.9/gcc/config/rs6000/htmxlintrin.h b/gcc-4.9/gcc/config/rs6000/htmxlintrin.h
index 38dc066d3..bf7fe3a75 100644
--- a/gcc-4.9/gcc/config/rs6000/htmxlintrin.h
+++ b/gcc-4.9/gcc/config/rs6000/htmxlintrin.h
@@ -46,12 +46,17 @@ extern "C" {
typedef char TM_buff_type[16];
+/* Compatibility macro with s390. This macro can be used to determine
+ whether a transaction was successfully started from the __TM_begin()
+ and __TM_simple_begin() intrinsic functions below. */
+#define _HTM_TBEGIN_STARTED 1
+
extern __inline long
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__TM_simple_begin (void)
{
if (__builtin_expect (__builtin_tbegin (0), 1))
- return 1;
+ return _HTM_TBEGIN_STARTED;
return 0;
}
@@ -61,7 +66,7 @@ __TM_begin (void* const TM_buff)
{
*_TEXASRL_PTR (TM_buff) = 0;
if (__builtin_expect (__builtin_tbegin (0), 1))
- return 1;
+ return _HTM_TBEGIN_STARTED;
#ifdef __powerpc64__
*_TEXASR_PTR (TM_buff) = __builtin_get_texasr ();
#else
diff --git a/gcc-4.9/gcc/config/rs6000/linux-grte.h b/gcc-4.9/gcc/config/rs6000/linux-grte.h
new file mode 100644
index 000000000..53997f027
--- /dev/null
+++ b/gcc-4.9/gcc/config/rs6000/linux-grte.h
@@ -0,0 +1,41 @@
+/* Definitions for Linux-based GRTE (Google RunTime Environment).
+ Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc.
+ Contributed by Chris Demetriou and Ollie Wild.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Overrides LIB_LINUX_SPEC from sysv4.h. */
+#undef LIB_LINUX_SPEC
+#define LIB_LINUX_SPEC \
+ "%{pthread:-lpthread} \
+ %{shared:-lc} \
+ %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}"
+
+/* When GRTE links statically, it needs its NSS and resolver libraries
+ linked in as well. Note that when linking statically, these are
+ enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */
+#undef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS \
+ { "libc", "%{static:%(libc_static);:-lc}" }, \
+ { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \
+ { "libc_static", "-lc -lresolv" }, \
+ { "libc_p_static", "-lc_p -lresolv_p" },
diff --git a/gcc-4.9/gcc/config/rs6000/linux64.h b/gcc-4.9/gcc/config/rs6000/linux64.h
index dbc9a527e..52c233b7d 100644
--- a/gcc-4.9/gcc/config/rs6000/linux64.h
+++ b/gcc-4.9/gcc/config/rs6000/linux64.h
@@ -367,11 +367,11 @@ extern int dot_symbols;
#undef LINK_OS_DEFAULT_SPEC
#define LINK_OS_DEFAULT_SPEC "%(link_os_linux)"
-#define GLIBC_DYNAMIC_LINKER32 "/lib/ld.so.1"
+#define GLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld.so.1"
#ifdef LINUX64_DEFAULT_ABI_ELFv2
-#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:/lib64/ld64.so.1;:/lib64/ld64.so.2}"
+#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv1:" RUNTIME_ROOT_PREFIX "/lib64/ld64.so.1;:" RUNTIME_ROOT_PREFIX "/lib64/ld64.so.2}"
#else
-#define GLIBC_DYNAMIC_LINKER64 "%{mabi=elfv2:/lib64/ld64.so.2;:/lib64/ld64.so.1}"
+#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "%{mabi=elfv2:"RUNTIME_ROOT_PREFIX "/lib64/ld64.so.2;:" RUNTIME_ROOT_PREFIX "/lib64/ld64.so.1}"
#endif
#define UCLIBC_DYNAMIC_LINKER32 "/lib/ld-uClibc.so.0"
#define UCLIBC_DYNAMIC_LINKER64 "/lib/ld64-uClibc.so.0"
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
index 8e15bdf16..220d1e970 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
@@ -622,19 +622,12 @@
| RS6000_BTC_TERNARY), \
CODE_FOR_ ## ICODE) /* ICODE */
-/* Miscellaneous builtins. */
-#define BU_MISC_1(ENUM, NAME, ATTR, ICODE) \
- RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \
- "__builtin_" NAME, /* NAME */ \
- RS6000_BTM_HARD_FLOAT, /* MASK */ \
- (RS6000_BTC_ ## ATTR /* ATTR */ \
- | RS6000_BTC_UNARY), \
- CODE_FOR_ ## ICODE) /* ICODE */
-
-#define BU_MISC_2(ENUM, NAME, ATTR, ICODE) \
+/* 128-bit long double floating point builtins. */
+#define BU_LDBL128_2(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_2 (MISC_BUILTIN_ ## ENUM, /* ENUM */ \
"__builtin_" NAME, /* NAME */ \
- RS6000_BTM_HARD_FLOAT, /* MASK */ \
+ (RS6000_BTM_HARD_FLOAT /* MASK */ \
+ | RS6000_BTM_LDBL128), \
(RS6000_BTC_ ## ATTR /* ATTR */ \
| RS6000_BTC_BINARY), \
CODE_FOR_ ## ICODE) /* ICODE */
@@ -1593,10 +1586,8 @@ BU_P8V_MISC_3 (BCDSUB_OV, "bcdsub_ov", CONST, bcdsub_unordered)
BU_DFP_MISC_2 (PACK_TD, "pack_dec128", CONST, packtd)
BU_DFP_MISC_2 (UNPACK_TD, "unpack_dec128", CONST, unpacktd)
-BU_MISC_2 (PACK_TF, "pack_longdouble", CONST, packtf)
-BU_MISC_2 (UNPACK_TF, "unpack_longdouble", CONST, unpacktf)
-BU_MISC_1 (UNPACK_TF_0, "longdouble_dw0", CONST, unpacktf_0)
-BU_MISC_1 (UNPACK_TF_1, "longdouble_dw1", CONST, unpacktf_1)
+BU_LDBL128_2 (PACK_TF, "pack_longdouble", CONST, packtf)
+BU_LDBL128_2 (UNPACK_TF, "unpack_longdouble", CONST, unpacktf)
BU_P7_MISC_2 (PACK_V1TI, "pack_vector_int128", CONST, packv1ti)
BU_P7_MISC_2 (UNPACK_V1TI, "unpack_vector_int128", CONST, unpackv1ti)
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
index 69bb26331..785f6ce1b 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
@@ -163,7 +163,7 @@ extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *);
extern rtx rs6000_libcall_value (enum machine_mode);
extern rtx rs6000_va_arg (tree, tree);
extern int function_ok_for_sibcall (tree);
-extern int rs6000_reg_parm_stack_space (tree);
+extern int rs6000_reg_parm_stack_space (tree, bool);
extern void rs6000_elf_declare_function_name (FILE *, const char *, tree);
extern bool rs6000_elf_in_small_data_p (const_tree);
#ifdef ARGS_SIZE_RTX
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.c b/gcc-4.9/gcc/config/rs6000/rs6000.c
index 4f1a399a2..bf67e7298 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.c
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.c
@@ -3037,7 +3037,8 @@ rs6000_builtin_mask_calculate (void)
| ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
| ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
| ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
- | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0));
+ | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
+ | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
}
/* Override command line options. Mostly we process the processor type and
@@ -6118,7 +6119,8 @@ mem_operand_gpr (rtx op, enum machine_mode mode)
return false;
extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
- gcc_assert (extra >= 0);
+ if (extra < 0)
+ extra = 0;
if (GET_CODE (addr) == LO_SUM)
/* For lo_sum addresses, we must allow any offset except one that
@@ -10477,35 +10479,65 @@ rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
list, or passes any parameter in memory. */
static bool
-rs6000_function_parms_need_stack (tree fun)
+rs6000_function_parms_need_stack (tree fun, bool incoming)
{
- function_args_iterator args_iter;
- tree arg_type;
+ tree fntype, result;
CUMULATIVE_ARGS args_so_far_v;
cumulative_args_t args_so_far;
if (!fun)
/* Must be a libcall, all of which only use reg parms. */
return false;
+
+ fntype = fun;
if (!TYPE_P (fun))
- fun = TREE_TYPE (fun);
+ fntype = TREE_TYPE (fun);
/* Varargs functions need the parameter save area. */
- if (!prototype_p (fun) || stdarg_p (fun))
+ if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
return true;
- INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX);
+ INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
args_so_far = pack_cumulative_args (&args_so_far_v);
- if (aggregate_value_p (TREE_TYPE (fun), fun))
+ /* When incoming, we will have been passed the function decl.
+ It is necessary to use the decl to handle K&R style functions,
+ where TYPE_ARG_TYPES may not be available. */
+ if (incoming)
{
- tree type = build_pointer_type (TREE_TYPE (fun));
- rs6000_parm_needs_stack (args_so_far, type);
+ gcc_assert (DECL_P (fun));
+ result = DECL_RESULT (fun);
}
+ else
+ result = TREE_TYPE (fntype);
- FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter)
- if (rs6000_parm_needs_stack (args_so_far, arg_type))
- return true;
+ if (result && aggregate_value_p (result, fntype))
+ {
+ if (!TYPE_P (result))
+ result = TREE_TYPE (result);
+ result = build_pointer_type (result);
+ rs6000_parm_needs_stack (args_so_far, result);
+ }
+
+ if (incoming)
+ {
+ tree parm;
+
+ for (parm = DECL_ARGUMENTS (fun);
+ parm && parm != void_list_node;
+ parm = TREE_CHAIN (parm))
+ if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
+ return true;
+ }
+ else
+ {
+ function_args_iterator args_iter;
+ tree arg_type;
+
+ FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
+ if (rs6000_parm_needs_stack (args_so_far, arg_type))
+ return true;
+ }
return false;
}
@@ -10517,7 +10549,7 @@ rs6000_function_parms_need_stack (tree fun)
all parameters in registers. */
int
-rs6000_reg_parm_stack_space (tree fun)
+rs6000_reg_parm_stack_space (tree fun, bool incoming)
{
int reg_parm_stack_space;
@@ -10535,7 +10567,7 @@ rs6000_reg_parm_stack_space (tree fun)
case ABI_ELFv2:
/* ??? Recomputing this every time is a bit expensive. Is there
a place to cache this information? */
- if (rs6000_function_parms_need_stack (fun))
+ if (rs6000_function_parms_need_stack (fun, incoming))
reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
else
reg_parm_stack_space = 0;
@@ -13559,11 +13591,15 @@ rs6000_invalid_builtin (enum rs6000_builtins fncode)
else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
== (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
error ("Builtin function %s requires the -mhard-dfp and"
- "-mpower8-vector options", name);
+ " -mpower8-vector options", name);
else if ((fnmask & RS6000_BTM_DFP) != 0)
error ("Builtin function %s requires the -mhard-dfp option", name);
else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
error ("Builtin function %s requires the -mpower8-vector option", name);
+ else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
+ == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
+ error ("Builtin function %s requires the -mhard-float and"
+ " -mlong-double-128 options", name);
else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
error ("Builtin function %s requires the -mhard-float option", name);
else
@@ -31313,6 +31349,7 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
{ "htm", RS6000_BTM_HTM, false, false },
{ "hard-dfp", RS6000_BTM_DFP, false, false },
{ "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
+ { "long-double-128", RS6000_BTM_LDBL128, false, false },
};
/* Option variables that we want to support inside attribute((target)) and
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.h b/gcc-4.9/gcc/config/rs6000/rs6000.h
index 21330dc65..2b5d033f8 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.h
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.h
@@ -1602,7 +1602,14 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
/* Define this if stack space is still allocated for a parameter passed
in a register. The value is the number of bytes allocated to this
area. */
-#define REG_PARM_STACK_SPACE(FNDECL) rs6000_reg_parm_stack_space((FNDECL))
+#define REG_PARM_STACK_SPACE(FNDECL) \
+ rs6000_reg_parm_stack_space ((FNDECL), false)
+
+/* Define this macro if space guaranteed when compiling a function body
+ is different to space required when making a call, a situation that
+ can arise with K&R style function definitions. */
+#define INCOMING_REG_PARM_STACK_SPACE(FNDECL) \
+ rs6000_reg_parm_stack_space ((FNDECL), true)
/* Define this if the above stack space is to be considered part of the
space allocated by the caller. */
@@ -2501,8 +2508,8 @@ extern int frame_pointer_needed;
#define RS6000_BTC_SAT RS6000_BTC_MISC /* saturate sets VSCR. */
/* Builtin targets. For now, we reuse the masks for those options that are in
- target flags, and pick two random bits for SPE and paired which aren't in
- target_flags. */
+ target flags, and pick three random bits for SPE, paired and ldbl128 which
+ aren't in target_flags. */
#define RS6000_BTM_ALWAYS 0 /* Always enabled. */
#define RS6000_BTM_ALTIVEC MASK_ALTIVEC /* VMX/altivec vectors. */
#define RS6000_BTM_VSX MASK_VSX /* VSX (vector/scalar). */
@@ -2519,6 +2526,7 @@ extern int frame_pointer_needed;
#define RS6000_BTM_CELL MASK_FPRND /* Target is cell powerpc. */
#define RS6000_BTM_DFP MASK_DFP /* Decimal floating point. */
#define RS6000_BTM_HARD_FLOAT MASK_SOFT_FLOAT /* Hardware floating point. */
+#define RS6000_BTM_LDBL128 MASK_MULTIPLE /* 128-bit long double. */
#define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \
| RS6000_BTM_VSX \
@@ -2532,7 +2540,8 @@ extern int frame_pointer_needed;
| RS6000_BTM_POPCNTD \
| RS6000_BTM_CELL \
| RS6000_BTM_DFP \
- | RS6000_BTM_HARD_FLOAT)
+ | RS6000_BTM_HARD_FLOAT \
+ | RS6000_BTM_LDBL128)
/* Define builtin enum index. */
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.md b/gcc-4.9/gcc/config/rs6000/rs6000.md
index e853bc4f9..26d0d1530 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.md
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.md
@@ -746,7 +746,7 @@
(define_insn "*extendsidi2_lfiwax"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wu")
- (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+ (sign_extend:DI (match_operand:SI 1 "lwa_operand" "Y,r,r,Z,Z")))]
"TARGET_POWERPC64 && TARGET_LFIWAX"
"@
lwa%U1%X1 %0,%1
@@ -769,7 +769,7 @@
(define_insn "*extendsidi2_nocell"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
- (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
+ (sign_extend:DI (match_operand:SI 1 "lwa_operand" "Y,r")))]
"TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
"@
lwa%U1%X1 %0,%1
@@ -15806,26 +15806,6 @@
""
"")
-;; The Advance Toolchain 7.0-3 added private builtins: __builtin_longdouble_dw0
-;; and __builtin_longdouble_dw1 to optimize glibc. Add support for these
-;; builtins here.
-
-(define_expand "unpacktf_0"
- [(set (match_operand:DF 0 "nonimmediate_operand" "")
- (unspec:DF [(match_operand:TF 1 "register_operand" "")
- (const_int 0)]
- UNSPEC_UNPACK_128BIT))]
- ""
- "")
-
-(define_expand "unpacktf_1"
- [(set (match_operand:DF 0 "nonimmediate_operand" "")
- (unspec:DF [(match_operand:TF 1 "register_operand" "")
- (const_int 1)]
- UNSPEC_UNPACK_128BIT))]
- ""
- "")
-
(define_insn_and_split "unpack<mode>_dm"
[(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "=d,m,d,r,m")
(unspec:<FP128_64>
diff --git a/gcc-4.9/gcc/config/rs6000/sysv4.h b/gcc-4.9/gcc/config/rs6000/sysv4.h
index 3fb6bd59f..afbd2892e 100644
--- a/gcc-4.9/gcc/config/rs6000/sysv4.h
+++ b/gcc-4.9/gcc/config/rs6000/sysv4.h
@@ -761,7 +761,10 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
#define LINK_START_LINUX_SPEC ""
-#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld.so.1"
#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
#if DEFAULT_LIBC == LIBC_UCLIBC
#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
@@ -843,6 +846,11 @@ ncrtn.o%s"
#define CPP_OS_OPENBSD_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
#endif
+/* These may be provided by rs6000/linux-grtev2.h. */
+#ifndef LINUX_GRTE_EXTRA_SPECS
+#define LINUX_GRTE_EXTRA_SPECS
+#endif
+
/* Define any extra SPECS that the compiler needs to generate. */
/* Override rs6000.h definition. */
#undef SUBTARGET_EXTRA_SPECS
@@ -908,6 +916,7 @@ ncrtn.o%s"
{ "cpp_os_openbsd", CPP_OS_OPENBSD_SPEC }, \
{ "cpp_os_default", CPP_OS_DEFAULT_SPEC }, \
{ "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }, \
+ LINUX_GRTE_EXTRA_SPECS \
SUBSUBTARGET_EXTRA_SPECS
#define SUBSUBTARGET_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/rs6000/vsx.md b/gcc-4.9/gcc/config/rs6000/vsx.md
index 23d85ab06..6d20eab11 100644
--- a/gcc-4.9/gcc/config/rs6000/vsx.md
+++ b/gcc-4.9/gcc/config/rs6000/vsx.md
@@ -24,6 +24,13 @@
;; Iterator for the 2 64-bit vector types
(define_mode_iterator VSX_D [V2DF V2DI])
+;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
+;; lxvd2x to properly handle swapping words on little endian
+(define_mode_iterator VSX_LE [V2DF
+ V2DI
+ V1TI
+ (TI "VECTOR_MEM_VSX_P (TImode)")])
+
;; Iterator for the 2 32-bit vector types
(define_mode_iterator VSX_W [V4SF V4SI])
@@ -228,8 +235,8 @@
;; The patterns for LE permuted loads and stores come before the general
;; VSX moves so they match first.
(define_insn_and_split "*vsx_le_perm_load_<mode>"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (match_operand:VSX_D 1 "memory_operand" "Z"))]
+ [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa")
+ (match_operand:VSX_LE 1 "memory_operand" "Z"))]
"!BYTES_BIG_ENDIAN && TARGET_VSX"
"#"
"!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -342,16 +349,16 @@
(set_attr "length" "8")])
(define_insn "*vsx_le_perm_store_<mode>"
- [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
- (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
+ [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
+ (match_operand:VSX_LE 1 "vsx_register_operand" "+wa"))]
"!BYTES_BIG_ENDIAN && TARGET_VSX"
"#"
[(set_attr "type" "vecstore")
(set_attr "length" "12")])
(define_split
- [(set (match_operand:VSX_D 0 "memory_operand" "")
- (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+ [(set (match_operand:VSX_LE 0 "memory_operand" "")
+ (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
[(set (match_dup 2)
(vec_select:<MODE>
@@ -369,8 +376,8 @@
;; The post-reload split requires that we re-permute the source
;; register in case it is still live.
(define_split
- [(set (match_operand:VSX_D 0 "memory_operand" "")
- (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+ [(set (match_operand:VSX_LE 0 "memory_operand" "")
+ (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
[(set (match_dup 1)
(vec_select:<MODE>
@@ -1352,9 +1359,9 @@
;; xxpermdi for little endian loads and stores. We need several of
;; these since the form of the PARALLEL differs by mode.
(define_insn "*vsx_xxpermdi2_le_<mode>"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_select:VSX_D
- (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa")
+ (vec_select:VSX_LE
+ (match_operand:VSX_LE 1 "vsx_register_operand" "wa")
(parallel [(const_int 1) (const_int 0)])))]
"!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxpermdi %x0,%x1,%x1,2"
@@ -1401,9 +1408,9 @@
;; lxvd2x for little endian loads. We need several of
;; these since the form of the PARALLEL differs by mode.
(define_insn "*vsx_lxvd2x2_le_<mode>"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_select:VSX_D
- (match_operand:VSX_D 1 "memory_operand" "Z")
+ [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa")
+ (vec_select:VSX_LE
+ (match_operand:VSX_LE 1 "memory_operand" "Z")
(parallel [(const_int 1) (const_int 0)])))]
"!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
"lxvd2x %x0,%y1"
@@ -1450,9 +1457,9 @@
;; stxvd2x for little endian stores. We need several of
;; these since the form of the PARALLEL differs by mode.
(define_insn "*vsx_stxvd2x2_le_<mode>"
- [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
- (vec_select:VSX_D
- (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
+ (vec_select:VSX_LE
+ (match_operand:VSX_LE 1 "vsx_register_operand" "wa")
(parallel [(const_int 1) (const_int 0)])))]
"!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
"stxvd2x %x1,%y0"
@@ -1683,7 +1690,7 @@
{
if (GET_CODE (op3) == SCRATCH)
op3 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2));
+ emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
tmp = op3;
}
emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
diff --git a/gcc-4.9/gcc/config/rtems.h b/gcc-4.9/gcc/config/rtems.h
index 3da27c57e..f14aed36a 100644
--- a/gcc-4.9/gcc/config/rtems.h
+++ b/gcc-4.9/gcc/config/rtems.h
@@ -13,8 +13,13 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* The system headers under RTEMS are C++-aware. */
diff --git a/gcc-4.9/gcc/config/sol2-clearcap.map b/gcc-4.9/gcc/config/sol2-clearcap.map
new file mode 100644
index 000000000..2d880c983
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-clearcap.map
@@ -0,0 +1,2 @@
+# Clear all hardware capabilities emitted by Sun as.
+hwcap_1 = V0x0 OVERRIDE;
diff --git a/gcc-4.9/gcc/config/sol2-clearcapv2.map b/gcc-4.9/gcc/config/sol2-clearcapv2.map
new file mode 100644
index 000000000..3c0cacedb
--- /dev/null
+++ b/gcc-4.9/gcc/config/sol2-clearcapv2.map
@@ -0,0 +1,7 @@
+# Clear all hardware capabilities emitted by Sun as.
+#
+# Uses mapfile v2 syntax which is the only way to clear AT_SUN_CAP_HW2 flags.
+$mapfile_version 2
+CAPABILITY {
+ HW = ;
+};
diff --git a/gcc-4.9/gcc/config/sol2.h b/gcc-4.9/gcc/config/sol2.h
index eb8e0b91f..db7844b29 100644
--- a/gcc-4.9/gcc/config/sol2.h
+++ b/gcc-4.9/gcc/config/sol2.h
@@ -183,12 +183,21 @@ along with GCC; see the file COPYING3. If not see
#define LINK_LIBGCC_MAPFILE_SPEC ""
#endif
+/* Clear hardware capabilities, either explicitly or with OpenMP:
+ #pragma openmp declare simd creates clones for SSE2, AVX, and AVX2. */
+#ifdef HAVE_LD_CLEARCAP
+#define LINK_CLEARCAP_SPEC " %{mclear-hwcap|fopenmp*:-M %sclearcap.map}"
+#else
+#define LINK_CLEARCAP_SPEC ""
+#endif
+
#undef LINK_SPEC
#define LINK_SPEC \
"%{h*} %{v:-V} \
%{!shared:%{!static:%{rdynamic: " RDYNAMIC_SPEC "}}} \
%{static:-dn -Bstatic} \
- %{shared:-G -dy %{!mimpure-text:-z text}} " LINK_LIBGCC_MAPFILE_SPEC " \
+ %{shared:-G -dy %{!mimpure-text:-z text}} " \
+ LINK_LIBGCC_MAPFILE_SPEC LINK_CLEARCAP_SPEC " \
%{symbolic:-Bsymbolic -G -dy -z text} \
%(link_arch) \
%{Qy:} %{!Qn:-Qy}"
diff --git a/gcc-4.9/gcc/config/sol2.opt b/gcc-4.9/gcc/config/sol2.opt
index a5ae7c510..16a3e5f58 100644
--- a/gcc-4.9/gcc/config/sol2.opt
+++ b/gcc-4.9/gcc/config/sol2.opt
@@ -27,6 +27,10 @@ Driver Joined
Ym,
Driver Joined
+mclear-hwcap
+Target Report
+Clear hardware capabilities when linking
+
mimpure-text
Target Report
Pass -z text to linker
diff --git a/gcc-4.9/gcc/config/t-sol2 b/gcc-4.9/gcc/config/t-sol2
index a4c4af4ad..25feb0486 100644
--- a/gcc-4.9/gcc/config/t-sol2
+++ b/gcc-4.9/gcc/config/t-sol2
@@ -35,3 +35,10 @@ sol2-stubs.o: $(srcdir)/config/sol2-stubs.c
sol2.o: $(srcdir)/config/sol2.c
$(COMPILE) $<
$(POSTCOMPILE)
+
+# Install clearcap.map if present.
+install: install-clearcap-map
+
+# Ignore failures: file only exists if linker supports it.
+install-clearcap-map:
+ -$(INSTALL_DATA) clearcap.map $(DESTDIR)$(libdir)
diff --git a/gcc-4.9/gcc/config/v850/rtems.h b/gcc-4.9/gcc/config/v850/rtems.h
index 01dff3e52..42ebb9fc0 100644
--- a/gcc-4.9/gcc/config/v850/rtems.h
+++ b/gcc-4.9/gcc/config/v850/rtems.h
@@ -13,8 +13,13 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Specify predefined symbols in preprocessor. */
diff --git a/gcc-4.9/gcc/config/v850/v850-opts.h b/gcc-4.9/gcc/config/v850/v850-opts.h
index a91b1b059..c3d9b7aac 100644
--- a/gcc-4.9/gcc/config/v850/v850-opts.h
+++ b/gcc-4.9/gcc/config/v850/v850-opts.h
@@ -13,8 +13,13 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef V850_OPTS_H
diff --git a/gcc-4.9/gcc/config/v850/v850.h b/gcc-4.9/gcc/config/v850/v850.h
index 92db20a44..6ccdc5d65 100644
--- a/gcc-4.9/gcc/config/v850/v850.h
+++ b/gcc-4.9/gcc/config/v850/v850.h
@@ -14,8 +14,13 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef GCC_V850_H