aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/config/aarch64
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/gcc/config/aarch64')
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h59
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-linux.h35
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-modes.def1
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-simd.md461
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64.c20
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64.md28
-rw-r--r--gcc-4.9/gcc/config/aarch64/arm_neon.h50
-rw-r--r--gcc-4.9/gcc/config/aarch64/iterators.md16
8 files changed, 522 insertions, 148 deletions
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h
new file mode 100644
index 000000000..91d235ff1
--- /dev/null
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h
@@ -0,0 +1,59 @@
+/* Machine description for AArch64 architecture.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_AARCH64_LINUX_ANDROID_H
+#define GCC_AARCH64_LINUX_ANDROID_H
+
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ GNU_USER_TARGET_OS_CPP_BUILTINS(); \
+ ANDROID_TARGET_OS_CPP_BUILTINS(); \
+ } \
+ while (0)
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \
+ LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
+
+#undef CC1_SPEC
+#define CC1_SPEC \
+ LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \
+ GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic"))
+
+#define CC1PLUS_SPEC \
+ LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
+
+#undef LIB_SPEC
+#define LIB_SPEC \
+ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \
+ GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
+
+#endif /* GCC_AARCH64_LINUX_ANDROID_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
index f32d19f16..f8a97c899 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
@@ -21,8 +21,10 @@
#ifndef GCC_AARCH64_LINUX_H
#define GCC_AARCH64_LINUX_H
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
-#define BIONIC_DYNAMIC_LINKER "/system/bin/linker64"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
#define CPP_SPEC "%{pthread:-D_REENTRANT}"
@@ -36,38 +38,13 @@
%{mbig-endian:-EB} %{mlittle-endian:-EL} \
-maarch64linux%{mbig-endian:b}"
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+
#define TARGET_OS_CPP_BUILTINS() \
do \
{ \
GNU_USER_TARGET_OS_CPP_BUILTINS(); \
- ANDROID_TARGET_OS_CPP_BUILTINS(); \
} \
while (0)
-#undef LINK_SPEC
-#define LINK_SPEC \
- LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \
- LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC)
-
-#undef CC1_SPEC
-#define CC1_SPEC \
- LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \
- GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic"))
-
-#define CC1PLUS_SPEC \
- LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC)
-
-#undef LIB_SPEC
-#define LIB_SPEC \
- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \
- GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)
-
-#undef STARTFILE_SPEC
-#define STARTFILE_SPEC \
- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
-
-#undef ENDFILE_SPEC
-#define ENDFILE_SPEC \
- LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
-
#endif /* GCC_AARCH64_LINUX_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
index 1d2cc7679..f9c436948 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def
@@ -31,6 +31,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */
VECTOR_MODES (FLOAT, 8); /* V2SF. */
VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */
+VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */
/* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */
INT_MODE (OI, 32);
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
index 73aee2c3d..1f827b57d 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
@@ -934,14 +934,22 @@
[(set_attr "type" "neon_minmax<q>")]
)
-;; Move into low-half clearing high half to 0.
+;; vec_concat gives a new vector with the low elements from operand 1, and
+;; the high elements from operand 2. That is to say, given op1 = { a, b }
+;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
+;; What that means, is that the RTL descriptions of the below patterns
+;; need to change depending on endianness.
-(define_insn "move_lo_quad_<mode>"
+;; Move to the low architectural bits of the register.
+;; On little-endian this is { operand, zeroes }
+;; On big-endian this is { zeroes, operand }
+
+(define_insn "move_lo_quad_internal_<mode>"
[(set (match_operand:VQ 0 "register_operand" "=w,w,w")
(vec_concat:VQ
(match_operand:<VHALF> 1 "register_operand" "w,r,r")
(vec_duplicate:<VHALF> (const_int 0))))]
- "TARGET_SIMD"
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
"@
dup\\t%d0, %1.d[0]
fmov\\t%d0, %1
@@ -952,7 +960,39 @@
(set_attr "length" "4")]
)
-;; Move into high-half.
+(define_insn "move_lo_quad_internal_be_<mode>"
+ [(set (match_operand:VQ 0 "register_operand" "=w,w,w")
+ (vec_concat:VQ
+ (vec_duplicate:<VHALF> (const_int 0))
+ (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "@
+ dup\\t%d0, %1.d[0]
+ fmov\\t%d0, %1
+ dup\\t%d0, %1"
+ [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
+ (set_attr "simd" "yes,*,yes")
+ (set_attr "fp" "*,yes,*")
+ (set_attr "length" "4")]
+)
+
+(define_expand "move_lo_quad_<mode>"
+ [(match_operand:VQ 0 "register_operand")
+ (match_operand:VQ 1 "register_operand")]
+ "TARGET_SIMD"
+{
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
+ else
+ emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
+ DONE;
+}
+)
+
+;; Move operand1 to the high architectural bits of the register, keeping
+;; the low architectural bits of operand2.
+;; For little-endian this is { operand2, operand1 }
+;; For big-endian this is { operand1, operand2 }
(define_insn "aarch64_simd_move_hi_quad_<mode>"
[(set (match_operand:VQ 0 "register_operand" "+w,w")
@@ -961,12 +1001,25 @@
(match_dup 0)
(match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
(match_operand:<VHALF> 1 "register_operand" "w,r")))]
- "TARGET_SIMD"
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
"@
ins\\t%0.d[1], %1.d[0]
ins\\t%0.d[1], %1"
- [(set_attr "type" "neon_ins")
- (set_attr "length" "4")]
+ [(set_attr "type" "neon_ins")]
+)
+
+(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
+ [(set (match_operand:VQ 0 "register_operand" "+w,w")
+ (vec_concat:VQ
+ (match_operand:<VHALF> 1 "register_operand" "w,r")
+ (vec_select:<VHALF>
+ (match_dup 0)
+ (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "@
+ ins\\t%0.d[1], %1.d[0]
+ ins\\t%0.d[1], %1"
+ [(set_attr "type" "neon_ins")]
)
(define_expand "move_hi_quad_<mode>"
@@ -974,9 +1027,13 @@
(match_operand:<VHALF> 1 "register_operand" "")]
"TARGET_SIMD"
{
- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
- emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
- operands[1], p));
+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, BYTES_BIG_ENDIAN);
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
+ operands[1], p));
+ else
+ emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
+ operands[1], p));
DONE;
})
@@ -2321,12 +2378,44 @@
(vec_concat:<VDBL>
(match_operand:VDIC 1 "register_operand" "w")
(match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))]
- "TARGET_SIMD"
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
"mov\\t%0.8b, %1.8b"
[(set_attr "type" "neon_move<q>")]
)
-(define_insn_and_split "aarch64_combine<mode>"
+(define_insn "*aarch64_combinez_be<mode>"
+ [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+ (vec_concat:<VDBL>
+ (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")
+ (match_operand:VDIC 1 "register_operand" "w")))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "mov\\t%0.8b, %1.8b"
+ [(set_attr "type" "neon_move<q>")]
+)
+
+(define_expand "aarch64_combine<mode>"
+ [(match_operand:<VDBL> 0 "register_operand")
+ (match_operand:VDC 1 "register_operand")
+ (match_operand:VDC 2 "register_operand")]
+ "TARGET_SIMD"
+{
+ rtx op1, op2;
+ if (BYTES_BIG_ENDIAN)
+ {
+ op1 = operands[2];
+ op2 = operands[1];
+ }
+ else
+ {
+ op1 = operands[1];
+ op2 = operands[2];
+ }
+ emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
+ DONE;
+}
+)
+
+(define_insn_and_split "aarch64_combine_internal<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=&w")
(vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
(match_operand:VDC 2 "register_operand" "w")))]
@@ -2335,16 +2424,19 @@
"&& reload_completed"
[(const_int 0)]
{
- aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+ if (BYTES_BIG_ENDIAN)
+ aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
+ else
+ aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
DONE;
}
[(set_attr "type" "multiple")]
)
(define_expand "aarch64_simd_combine<mode>"
- [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
- (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
- (match_operand:VDC 2 "register_operand" "w")))]
+ [(match_operand:<VDBL> 0 "register_operand")
+ (match_operand:VDC 1 "register_operand")
+ (match_operand:VDC 2 "register_operand")]
"TARGET_SIMD"
{
emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
@@ -2633,7 +2725,41 @@
;; sq<r>dmulh_lane
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_expand "aarch64_sqdmulh_lane<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_expand "aarch64_sqrdmulh_lane<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "w")
@@ -2649,7 +2775,41 @@
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
+(define_expand "aarch64_sqdmulh_laneq<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCONQ> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmulh_laneq<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_expand "aarch64_sqrdmulh_laneq<mode>"
+ [(match_operand:VDQHS 0 "register_operand" "")
+ (match_operand:VDQHS 1 "register_operand" "")
+ (match_operand:<VCONQ> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqrdmulh_laneq<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sq<r>dmulh_laneq<mode>_internal"
[(set (match_operand:VDQHS 0 "register_operand" "=w")
(unspec:VDQHS
[(match_operand:VDQHS 1 "register_operand" "w")
@@ -2659,24 +2819,56 @@
VQDMULH))]
"TARGET_SIMD"
"*
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_expand "aarch64_sqdmulh_lane<mode>"
+ [(match_operand:SD_HSI 0 "register_operand" "")
+ (match_operand:SD_HSI 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_expand "aarch64_sqrdmulh_lane<mode>"
+ [(match_operand:SD_HSI 0 "register_operand" "")
+ (match_operand:SD_HSI 1 "register_operand" "")
+ (match_operand:<VCOND> 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" "")]
+ "TARGET_SIMD"
+ {
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0],
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal"
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
(unspec:SD_HSI
[(match_operand:SD_HSI 1 "register_operand" "w")
(vec_select:<VEL>
- (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
VQDMULH))]
"TARGET_SIMD"
"*
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
- operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
@@ -2712,7 +2904,31 @@
(sign_extend:<VWIDE>
(vec_duplicate:VD_HSI
(vec_select:<VEL>
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+ ))
+ (const_int 1))))]
+ "TARGET_SIMD"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ return
+ "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (SBINQOPS:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:VD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
))
(const_int 1))))]
@@ -2735,7 +2951,30 @@
(match_operand:SD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+ )
+ (const_int 1))))]
+ "TARGET_SIMD"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ return
+ "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (SBINQOPS:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:SD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
)
(const_int 1))))]
@@ -2752,11 +2991,12 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "0")
(match_operand:VSD_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
@@ -2767,12 +3007,13 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "0")
(match_operand:VSD_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
- emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ emit_insn (gen_aarch64_sqdmlal_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
DONE;
@@ -2782,11 +3023,12 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "0")
(match_operand:VSD_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
@@ -2797,12 +3039,13 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "0")
(match_operand:VSD_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
- emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ emit_insn (gen_aarch64_sqdmlsl_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4]));
DONE;
@@ -2890,7 +3133,33 @@
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(vec_select:<VEL>
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])
+ ))))
+ (const_int 1))))]
+ "TARGET_SIMD"
+ {
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
+ return
+ "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (SBINQOPS:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:<VHALF>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
))))
(const_int 1))))]
@@ -2907,12 +3176,13 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
@@ -2923,13 +3193,14 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
- emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
DONE;
@@ -2939,12 +3210,13 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
@@ -2955,13 +3227,14 @@
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:<VWIDE> 1 "register_operand" "w")
(match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:<VCON> 3 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(match_operand:SI 4 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
- emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
+ emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
operands[4], p));
DONE;
@@ -3041,7 +3314,28 @@
(sign_extend:<VWIDE>
(vec_duplicate:VD_HSI
(vec_select:<VEL>
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+ ))
+ (const_int 1)))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:VD_HSI 1 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
(const_int 1)))]
@@ -3061,7 +3355,27 @@
(match_operand:SD_HSI 1 "register_operand" "w"))
(sign_extend:<VWIDE>
(vec_select:<VEL>
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
+ ))
+ (const_int 1)))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:SD_HSI 1 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))
))
(const_int 1)))]
@@ -3076,11 +3390,12 @@
(define_expand "aarch64_sqdmull_lane<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VSD_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
(match_operand:SI 3 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3]));
DONE;
@@ -3089,12 +3404,13 @@
(define_expand "aarch64_sqdmull_laneq<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VD_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(match_operand:SI 3 "immediate_operand" "i")]
"TARGET_SIMD"
{
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
- emit_insn (gen_aarch64_sqdmull_lane<mode>_internal
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmull_laneq<mode>_internal
(operands[0], operands[1], operands[2], operands[3]));
DONE;
})
@@ -3143,7 +3459,7 @@
(define_expand "aarch64_sqdmull2<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "w")]
+ (match_operand:VQ_HSI 2 "register_operand" "w")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
@@ -3165,7 +3481,30 @@
(sign_extend:<VWIDE>
(vec_duplicate:<VHALF>
(vec_select:<VEL>
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
+ ))
+ (const_int 1)))]
+ "TARGET_SIMD"
+ {
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
+ return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
+ }
+ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:<VHALF>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
(const_int 1)))]
@@ -3180,12 +3519,13 @@
(define_expand "aarch64_sqdmull2_lane<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCOND> 2 "register_operand" "<vwx>")
(match_operand:SI 3 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
@@ -3195,13 +3535,14 @@
(define_expand "aarch64_sqdmull2_laneq<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "=w")
(match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:<VCON> 2 "register_operand" "<vwx>")
+ (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(match_operand:SI 3 "immediate_operand" "i")]
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
- aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
- emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
+ emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
operands[2], operands[3],
p));
DONE;
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c
index 7b6c2b38e..bf35031ec 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.c
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.c
@@ -1405,6 +1405,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
int ncrn, nvrn, nregs;
bool allocate_ncrn, allocate_nvrn;
+ HOST_WIDE_INT size;
/* We need to do this once per argument. */
if (pcum->aapcs_arg_processed)
@@ -1412,6 +1413,11 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
pcum->aapcs_arg_processed = true;
+ /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
+ size
+ = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
+ UNITS_PER_WORD);
+
allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
mode,
@@ -1462,9 +1468,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
}
ncrn = pcum->aapcs_ncrn;
- nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
- + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
-
+ nregs = size / UNITS_PER_WORD;
/* C6 - C9. though the sign and zero extension semantics are
handled elsewhere. This is the case where the argument fits
@@ -1513,13 +1517,12 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
pcum->aapcs_nextncrn = NUM_ARG_REGS;
/* The argument is passed on stack; record the needed number of words for
- this argument (we can re-use NREGS) and align the total size if
- necessary. */
+ this argument and align the total size if necessary. */
on_stack:
- pcum->aapcs_stack_words = nregs;
+ pcum->aapcs_stack_words = size / UNITS_PER_WORD;
if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
- 16 / UNITS_PER_WORD) + 1;
+ 16 / UNITS_PER_WORD);
return;
}
@@ -6304,7 +6307,8 @@ aarch64_vector_mode_supported_p (enum machine_mode mode)
|| mode == V16QImode || mode == V2DImode
|| mode == V2SImode || mode == V4HImode
|| mode == V8QImode || mode == V2SFmode
- || mode == V4SFmode || mode == V2DFmode))
+ || mode == V4SFmode || mode == V2DFmode
+ || mode == V1DFmode))
return true;
return false;
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md
index c86a29d8e..df81045e9 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.md
@@ -2823,17 +2823,18 @@
;; Arithmetic right shift using SISD or Integer instruction
(define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
- [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+ [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r")
(ashiftrt:GPI
- (match_operand:GPI 1 "register_operand" "w,w,r")
- (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,rUs<cmode>")))]
+ (match_operand:GPI 1 "register_operand" "w,w,w,r")
+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,0,rUs<cmode>")))]
""
"@
sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
#
+ #
asr\t%<w>0, %<w>1, %<w>2"
- [(set_attr "simd" "yes,yes,no")
- (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
+ [(set_attr "simd" "yes,yes,yes,no")
+ (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>,shift_reg")]
)
(define_split
@@ -2842,11 +2843,13 @@
(match_operand:DI 1 "aarch64_simd_register")
(match_operand:QI 2 "aarch64_simd_register")))]
"TARGET_SIMD && reload_completed"
- [(set (match_dup 2)
+ [(set (match_dup 3)
(unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
(set (match_dup 0)
- (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))]
- ""
+ (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_SSHL))]
+{
+ operands[3] = gen_lowpart (QImode, operands[0]);
+}
)
(define_split
@@ -2855,11 +2858,13 @@
(match_operand:SI 1 "aarch64_simd_register")
(match_operand:QI 2 "aarch64_simd_register")))]
"TARGET_SIMD && reload_completed"
- [(set (match_dup 2)
+ [(set (match_dup 3)
(unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
(set (match_dup 0)
- (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))]
- ""
+ (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_SSHL_2S))]
+{
+ operands[3] = gen_lowpart (QImode, operands[0]);
+}
)
(define_insn "*aarch64_sisd_ushl"
@@ -3608,6 +3613,7 @@
(unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")]
UNSPEC_TLSDESC))
(clobber (reg:DI LR_REGNUM))
+ (clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:DI 1 "=r"))]
"TARGET_TLS_DESC"
"adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h
index b03d11422..c01669b2c 100644
--- a/gcc-4.9/gcc/config/aarch64/arm_neon.h
+++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h
@@ -21008,7 +21008,7 @@ vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
int const __d)
{
return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
@@ -21030,8 +21030,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
{
- int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
+ return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21059,7 +21058,7 @@ vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
int const __d)
{
return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
@@ -21081,8 +21080,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
{
- int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
+ return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21104,7 +21102,7 @@ vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
{
return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
}
@@ -21116,7 +21114,7 @@ vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
{
return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
}
@@ -21136,7 +21134,7 @@ vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
+vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
int const __d)
{
return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
@@ -21158,8 +21156,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
{
- int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
+ return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21187,7 +21184,7 @@ vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
+vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
int const __d)
{
return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
@@ -21209,8 +21206,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
{
- int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
+ return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21232,7 +21228,7 @@ vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
+vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
{
return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
}
@@ -21244,7 +21240,7 @@ vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
+vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
{
return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
}
@@ -21282,7 +21278,7 @@ vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
}
__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
{
return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
}
@@ -21294,7 +21290,7 @@ vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
{
return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
}
@@ -21314,7 +21310,7 @@ vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
+vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
{
return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
}
@@ -21334,8 +21330,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
{
- int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
+ return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
@@ -21363,7 +21358,7 @@ vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
+vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
{
return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
}
@@ -21383,8 +21378,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
{
- int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0)));
- return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
+ return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
}
__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
@@ -21406,7 +21400,7 @@ vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
{
return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
}
@@ -21418,7 +21412,7 @@ vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
{
return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
}
@@ -21594,7 +21588,7 @@ vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
}
__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
+vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
{
return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
}
@@ -21606,7 +21600,7 @@ vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
+vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
{
return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
}
diff --git a/gcc-4.9/gcc/config/aarch64/iterators.md b/gcc-4.9/gcc/config/aarch64/iterators.md
index f1339b8cc..e76e3ef10 100644
--- a/gcc-4.9/gcc/config/aarch64/iterators.md
+++ b/gcc-4.9/gcc/config/aarch64/iterators.md
@@ -396,14 +396,15 @@
(SI "SI") (HI "HI")
(QI "QI")])
-;; Define container mode for lane selection.
-(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI")
+;; 64-bit container modes the inner or scalar source mode.
+(define_mode_attr VCOND [(HI "V4HI") (SI "V2SI")
+ (V4HI "V4HI") (V8HI "V4HI")
(V2SI "V2SI") (V4SI "V2SI")
(DI "DI") (V2DI "DI")
(V2SF "V2SF") (V4SF "V2SF")
(V2DF "DF")])
-;; Define container mode for lane selection.
+;; 128-bit container modes the inner or scalar source mode.
(define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI")
(V4HI "V8HI") (V8HI "V8HI")
(V2SI "V4SI") (V4SI "V4SI")
@@ -412,15 +413,6 @@
(V2DF "V2DF") (SI "V4SI")
(HI "V8HI") (QI "V16QI")])
-;; Define container mode for lane selection.
-(define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI")
- (V4HI "V8HI") (V8HI "V8HI")
- (V2SI "V4SI") (V4SI "V4SI")
- (DI "V2DI") (V2DI "V2DI")
- (V2SF "V4SF") (V4SF "V4SF")
- (V2DF "V2DF") (SI "V4SI")
- (HI "V8HI") (QI "V16QI")])
-
;; Half modes of all vector modes.
(define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI")
(V4HI "V2HI") (V8HI "V4HI")