aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/gcc/config')
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-builtins.c154
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h8
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-linux.h11
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-simd.md39
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64.c12
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64.md55
-rw-r--r--gcc-4.9/gcc/config/aarch64/arm_neon.h494
-rw-r--r--gcc-4.9/gcc/config/alpha/alpha.c6
-rw-r--r--gcc-4.9/gcc/config/alpha/alpha.md63
-rw-r--r--gcc-4.9/gcc/config/arm/arm-protos.h3
-rw-r--r--gcc-4.9/gcc/config/arm/arm.c88
-rw-r--r--gcc-4.9/gcc/config/arm/arm.h11
-rw-r--r--gcc-4.9/gcc/config/arm/arm.md64
-rw-r--r--gcc-4.9/gcc/config/arm/constraints.md11
-rw-r--r--gcc-4.9/gcc/config/arm/linux-grte.h27
-rw-r--r--gcc-4.9/gcc/config/arm/t-aprofile3
-rw-r--r--gcc-4.9/gcc/config/avr/avr-dimode.md7
-rw-r--r--gcc-4.9/gcc/config/avr/avr-fixed.md41
-rw-r--r--gcc-4.9/gcc/config/avr/avr-protos.h9
-rw-r--r--gcc-4.9/gcc/config/avr/avr.c109
-rw-r--r--gcc-4.9/gcc/config/avr/avr.md155
-rw-r--r--gcc-4.9/gcc/config/darwin-c.c25
-rw-r--r--gcc-4.9/gcc/config/darwin-driver.c35
-rw-r--r--gcc-4.9/gcc/config/gnu-user.h7
-rw-r--r--gcc-4.9/gcc/config/i386/i386.c349
-rw-r--r--gcc-4.9/gcc/config/i386/i386.md62
-rw-r--r--gcc-4.9/gcc/config/i386/i386.opt8
-rw-r--r--gcc-4.9/gcc/config/i386/linux.h15
-rw-r--r--gcc-4.9/gcc/config/i386/linux64.h9
-rw-r--r--gcc-4.9/gcc/config/i386/mmx.md36
-rw-r--r--gcc-4.9/gcc/config/i386/x86-tune.def8
-rw-r--r--gcc-4.9/gcc/config/ia64/ia64.c5
-rw-r--r--gcc-4.9/gcc/config/linux-grte.h41
-rw-r--r--gcc-4.9/gcc/config/linux.c2
-rw-r--r--gcc-4.9/gcc/config/msp430/msp430.md6
-rw-r--r--gcc-4.9/gcc/config/nios2/nios2.c15
-rw-r--r--gcc-4.9/gcc/config/pa/pa.c26
-rw-r--r--gcc-4.9/gcc/config/pa/pa.md25
-rw-r--r--gcc-4.9/gcc/config/pa/predicates.md21
-rw-r--r--gcc-4.9/gcc/config/rs6000/altivec.h6
-rw-r--r--gcc-4.9/gcc/config/rs6000/altivec.md56
-rw-r--r--gcc-4.9/gcc/config/rs6000/darwin.h6
-rw-r--r--gcc-4.9/gcc/config/rs6000/linux-grte.h41
-rw-r--r--gcc-4.9/gcc/config/rs6000/predicates.md75
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000-builtin.def10
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000-c.c147
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000-protos.h5
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000.c1367
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000.md88
-rw-r--r--gcc-4.9/gcc/config/rs6000/rs6000.opt4
-rw-r--r--gcc-4.9/gcc/config/rs6000/rtems.h3
-rw-r--r--gcc-4.9/gcc/config/rs6000/sysv4.h6
-rw-r--r--gcc-4.9/gcc/config/rs6000/t-rtems73
-rw-r--r--gcc-4.9/gcc/config/rs6000/vsx.md227
-rw-r--r--gcc-4.9/gcc/config/rs6000/xcoff.h7
-rw-r--r--gcc-4.9/gcc/config/rx/rx.c2
-rw-r--r--gcc-4.9/gcc/config/rx/rx.h6
-rw-r--r--gcc-4.9/gcc/config/s390/s390.c19
-rw-r--r--gcc-4.9/gcc/config/s390/s390.md2
-rw-r--r--gcc-4.9/gcc/config/sh/predicates.md2
-rw-r--r--gcc-4.9/gcc/config/sh/sh-mem.cc24
-rw-r--r--gcc-4.9/gcc/config/sh/sh.c2
-rw-r--r--gcc-4.9/gcc/config/sh/sh.md47
-rw-r--r--gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc24
-rw-r--r--gcc-4.9/gcc/config/sh/sh_treg_combine.cc49
-rw-r--r--gcc-4.9/gcc/config/sh/sync.md18
-rw-r--r--gcc-4.9/gcc/config/sparc/leon.md14
-rw-r--r--gcc-4.9/gcc/config/sparc/linux.h6
-rw-r--r--gcc-4.9/gcc/config/sparc/linux64.h6
-rw-r--r--gcc-4.9/gcc/config/sparc/sparc-opts.h1
-rw-r--r--gcc-4.9/gcc/config/sparc/sparc.c40
-rw-r--r--gcc-4.9/gcc/config/sparc/sparc.h51
-rw-r--r--gcc-4.9/gcc/config/sparc/sparc.md1
-rw-r--r--gcc-4.9/gcc/config/sparc/sparc.opt3
-rw-r--r--gcc-4.9/gcc/config/sparc/t-rtems13
76 files changed, 3459 insertions, 1058 deletions
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c
index 55cfe0ab2..a5af874bf 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c
@@ -371,6 +371,12 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
enum aarch64_builtins
{
AARCH64_BUILTIN_MIN,
+
+ AARCH64_BUILTIN_GET_FPCR,
+ AARCH64_BUILTIN_SET_FPCR,
+ AARCH64_BUILTIN_GET_FPSR,
+ AARCH64_BUILTIN_SET_FPSR,
+
AARCH64_SIMD_BUILTIN_BASE,
#include "aarch64-simd-builtins.def"
AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
@@ -752,6 +758,24 @@ aarch64_init_simd_builtins (void)
void
aarch64_init_builtins (void)
{
+ tree ftype_set_fpr
+ = build_function_type_list (void_type_node, unsigned_type_node, NULL);
+ tree ftype_get_fpr
+ = build_function_type_list (unsigned_type_node, NULL);
+
+ aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
+ = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
+ AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+ aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
+ = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
+ AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
+ aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
+ = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
+ AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+ aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
+ = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
+ AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
+
if (TARGET_SIMD)
aarch64_init_simd_builtins ();
}
@@ -964,6 +988,36 @@ aarch64_expand_builtin (tree exp,
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
int fcode = DECL_FUNCTION_CODE (fndecl);
+ int icode;
+ rtx pat, op0;
+ tree arg0;
+
+ switch (fcode)
+ {
+ case AARCH64_BUILTIN_GET_FPCR:
+ case AARCH64_BUILTIN_SET_FPCR:
+ case AARCH64_BUILTIN_GET_FPSR:
+ case AARCH64_BUILTIN_SET_FPSR:
+ if ((fcode == AARCH64_BUILTIN_GET_FPCR)
+ || (fcode == AARCH64_BUILTIN_GET_FPSR))
+ {
+ icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
+ CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
+ target = gen_reg_rtx (SImode);
+ pat = GEN_FCN (icode) (target);
+ }
+ else
+ {
+ target = NULL_RTX;
+ icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
+ CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+ pat = GEN_FCN (icode) (op0);
+ }
+ emit_insn (pat);
+ return target;
+ }
if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
return aarch64_simd_expand_builtin (fcode, exp, target);
@@ -1196,6 +1250,106 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
return changed;
}
+void
+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+ const unsigned AARCH64_FE_INVALID = 1;
+ const unsigned AARCH64_FE_DIVBYZERO = 2;
+ const unsigned AARCH64_FE_OVERFLOW = 4;
+ const unsigned AARCH64_FE_UNDERFLOW = 8;
+ const unsigned AARCH64_FE_INEXACT = 16;
+ const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
+ | AARCH64_FE_DIVBYZERO
+ | AARCH64_FE_OVERFLOW
+ | AARCH64_FE_UNDERFLOW
+ | AARCH64_FE_INEXACT);
+ const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
+ tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
+ tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
+ tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
+ tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
+
+ /* Generate the equivalence of :
+ unsigned int fenv_cr;
+ fenv_cr = __builtin_aarch64_get_fpcr ();
+
+ unsigned int fenv_sr;
+ fenv_sr = __builtin_aarch64_get_fpsr ();
+
+ Now set all exceptions to non-stop
+ unsigned int mask_cr
+ = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
+ unsigned int masked_cr;
+ masked_cr = fenv_cr & mask_cr;
+
+ And clear all exception flags
+ unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
+ unsigned int masked_cr;
+ masked_sr = fenv_sr & mask_sr;
+
+ __builtin_aarch64_set_cr (masked_cr);
+ __builtin_aarch64_set_sr (masked_sr); */
+
+ fenv_cr = create_tmp_var (unsigned_type_node, NULL);
+ fenv_sr = create_tmp_var (unsigned_type_node, NULL);
+
+ get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
+ set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
+ get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
+ set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
+
+ mask_cr = build_int_cst (unsigned_type_node,
+ ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
+ mask_sr = build_int_cst (unsigned_type_node,
+ ~(AARCH64_FE_ALL_EXCEPT));
+
+ ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
+ fenv_cr, build_call_expr (get_fpcr, 0));
+ ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
+ fenv_sr, build_call_expr (get_fpsr, 0));
+
+ masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
+ masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
+
+ hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
+ hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+ hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
+ hold_fnclex_sr);
+ masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
+ masked_fenv_sr);
+ ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
+
+ *hold = build2 (COMPOUND_EXPR, void_type_node,
+ build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+ hold_fnclex);
+
+ /* Store the value of masked_fenv to clear the exceptions:
+ __builtin_aarch64_set_fpsr (masked_fenv_sr); */
+
+ *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
+
+ /* Generate the equivalent of :
+ unsigned int new_fenv_var;
+ new_fenv_var = __builtin_aarch64_get_fpsr ();
+
+ __builtin_aarch64_set_fpsr (fenv_sr);
+
+ __atomic_feraiseexcept (new_fenv_var); */
+
+ new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
+ reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
+ new_fenv_var, build_call_expr (get_fpsr, 0));
+ restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
+ atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+ update_call = build_call_expr (atomic_feraiseexcept, 1,
+ fold_convert (integer_type_node, new_fenv_var));
+ *update = build2 (COMPOUND_EXPR, void_type_node,
+ build2 (COMPOUND_EXPR, void_type_node,
+ reload_fenv, restore_fnenv), update_call);
+}
+
+
#undef AARCH64_CHECK_BUILTIN_MODE
#undef AARCH64_FIND_FRINT_VARIANT
#undef BUILTIN_DX
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h
index eafdd551d..bb5c88d53 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h
@@ -33,6 +33,14 @@
" %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
#endif
+#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
+#define CA53_ERR_835769_SPEC \
+ " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#else
+#define CA53_ERR_835769_SPEC \
+ " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#endif
+
#ifndef LINK_SPEC
#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X \
-maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
index b77becd23..651abe3ce 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h
@@ -50,7 +50,16 @@
#define LINUX_TARGET_LINK_SPEC LINUX_TARGET_LINK_SPEC0 CA53_ERR_835769_SPEC
-#define LINK_SPEC LINUX_TARGET_LINK_SPEC
+#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
+#define CA53_ERR_835769_SPEC \
+ " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#else
+#define CA53_ERR_835769_SPEC \
+ " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#endif
+
+#define LINK_SPEC LINUX_TARGET_LINK_SPEC \
+ CA53_ERR_835769_SPEC
#define TARGET_OS_CPP_BUILTINS() \
do \
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
index bef58bf71..8b0a70538 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
@@ -291,4 +291,5 @@ extern bool aarch64_madd_needs_nop (rtx);
extern void aarch64_final_prescan_insn (rtx);
extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
+void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
index 851e77a02..7626ed31f 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md
@@ -934,6 +934,41 @@
[(set_attr "type" "neon_minmax<q>")]
)
+(define_expand "<su><maxmin>v2di3"
+ [(parallel [
+ (set (match_operand:V2DI 0 "register_operand" "")
+ (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:V2DI 2 "register_operand" "")))
+ (clobber (reg:CC CC_REGNUM))])]
+ "TARGET_SIMD"
+{
+ enum rtx_code cmp_operator;
+ rtx cmp_fmt;
+
+ switch (<CODE>)
+ {
+ case UMIN:
+ cmp_operator = LTU;
+ break;
+ case SMIN:
+ cmp_operator = LT;
+ break;
+ case UMAX:
+ cmp_operator = GTU;
+ break;
+ case SMAX:
+ cmp_operator = GT;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
+ emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
+ operands[2], cmp_fmt, operands[1], operands[2]));
+ DONE;
+})
+
;; vec_concat gives a new vector with the low elements from operand 1, and
;; the high elements from operand 2. That is to say, given op1 = { a, b }
;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
@@ -4565,8 +4600,8 @@
})
(define_insn "*aarch64_simd_ld1r<mode>"
- [(set (match_operand:VALLDI 0 "register_operand" "=w")
- (vec_duplicate:VALLDI
+ [(set (match_operand:VALL 0 "register_operand" "=w")
+ (vec_duplicate:VALL
(match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
"TARGET_SIMD"
"ld1r\\t{%0.<Vtype>}, %1"
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c
index 2ff6c7cb8..029c54ca3 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.c
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.c
@@ -3874,7 +3874,7 @@ aarch64_print_operand_address (FILE *f, rtx x)
switch (GET_CODE (x))
{
case PRE_INC:
- asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
+ asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
GET_MODE_SIZE (aarch64_memory_reference_mode));
return;
case POST_INC:
@@ -5152,7 +5152,6 @@ aarch64_parse_cpu (void)
if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
{
selected_cpu = cpu;
- selected_tune = cpu;
aarch64_isa_flags = selected_cpu->flags;
if (ext != NULL)
@@ -5248,9 +5247,8 @@ aarch64_override_options (void)
gcc_assert (selected_cpu);
- /* The selected cpu may be an architecture, so lookup tuning by core ID. */
if (!selected_tune)
- selected_tune = &all_cores[selected_cpu->core];
+ selected_tune = selected_cpu;
aarch64_tune_flags = selected_tune->flags;
aarch64_tune = selected_tune->core;
@@ -7194,7 +7192,7 @@ aarch64_expand_vector_init (rtx target, rtx vals)
x = XVECEXP (vals, 0, 0);
if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
n_var = 1, one_var = 0;
-
+
for (i = 1; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
@@ -8642,6 +8640,10 @@ aarch64_cannot_change_mode_class (enum machine_mode from,
#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
aarch64_autovectorize_vector_sizes
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
+ aarch64_atomic_assign_expand_fenv
+
/* Section anchor support. */
#undef TARGET_MIN_ANCHOR_OFFSET
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md
index 319f80591..05f5e1b35 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.md
@@ -107,6 +107,10 @@
(define_c_enum "unspecv" [
UNSPECV_EH_RETURN ; Represent EH_RETURN
+ UNSPECV_GET_FPCR ; Represent fetch of FPCR content.
+ UNSPECV_SET_FPCR ; Represent assign of FPCR content.
+ UNSPECV_GET_FPSR ; Represent fetch of FPSR content.
+ UNSPECV_SET_FPSR ; Represent assign of FPSR content.
]
)
@@ -1102,7 +1106,7 @@
add\\t%x0, %x1, %x2
sub\\t%x0, %x1, #%n2
add\\t%d0, %d1, %d2"
- [(set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg")
+ [(set_attr "type" "alu_imm,alu_reg,alu_imm,neon_add")
(set_attr "simd" "*,*,*,yes")]
)
@@ -2782,7 +2786,7 @@
;; Logical right shift using SISD or Integer instruction
(define_insn "*aarch64_lshr_sisd_or_int_<mode>3"
- [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+ [(set (match_operand:GPI 0 "register_operand" "=w,&w,r")
(lshiftrt:GPI
(match_operand:GPI 1 "register_operand" "w,w,r")
(match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
@@ -2801,11 +2805,13 @@
(match_operand:DI 1 "aarch64_simd_register")
(match_operand:QI 2 "aarch64_simd_register")))]
"TARGET_SIMD && reload_completed"
- [(set (match_dup 2)
+ [(set (match_dup 3)
(unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
(set (match_dup 0)
- (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))]
- ""
+ (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_USHL))]
+ {
+ operands[3] = gen_lowpart (QImode, operands[0]);
+ }
)
(define_split
@@ -2814,11 +2820,13 @@
(match_operand:SI 1 "aarch64_simd_register")
(match_operand:QI 2 "aarch64_simd_register")))]
"TARGET_SIMD && reload_completed"
- [(set (match_dup 2)
+ [(set (match_dup 3)
(unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
(set (match_dup 0)
- (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))]
- ""
+ (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_USHL_2S))]
+ {
+ operands[3] = gen_lowpart (QImode, operands[0]);
+ }
)
;; Arithmetic right shift using SISD or Integer instruction
@@ -3642,6 +3650,37 @@
DONE;
})
+;; Write Floating-point Control Register.
+(define_insn "set_fpcr"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
+ ""
+ "msr\\tfpcr, %0\;isb"
+ [(set_attr "type" "mrs")])
+
+;; Read Floating-point Control Register.
+(define_insn "get_fpcr"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))]
+ ""
+ "mrs\\t%0, fpcr"
+ [(set_attr "type" "mrs")])
+
+;; Write Floating-point Status Register.
+(define_insn "set_fpsr"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
+ ""
+ "msr\\tfpsr, %0"
+ [(set_attr "type" "mrs")])
+
+;; Read Floating-point Status Register.
+(define_insn "get_fpsr"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
+ ""
+ "mrs\\t%0, fpsr"
+ [(set_attr "type" "mrs")])
+
+
;; AdvSIMD Stuff
(include "aarch64-simd.md")
diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h
index c01669b2c..ae0ae9c1b 100644
--- a/gcc-4.9/gcc/config/aarch64/arm_neon.h
+++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h
@@ -39,9 +39,6 @@ typedef __builtin_aarch64_simd_hi int16x4_t
typedef __builtin_aarch64_simd_si int32x2_t
__attribute__ ((__vector_size__ (8)));
typedef int64_t int64x1_t;
-typedef int32_t int32x1_t;
-typedef int16_t int16x1_t;
-typedef int8_t int8x1_t;
typedef double float64x1_t;
typedef __builtin_aarch64_simd_sf float32x2_t
__attribute__ ((__vector_size__ (8)));
@@ -56,9 +53,6 @@ typedef __builtin_aarch64_simd_uhi uint16x4_t
typedef __builtin_aarch64_simd_usi uint32x2_t
__attribute__ ((__vector_size__ (8)));
typedef uint64_t uint64x1_t;
-typedef uint32_t uint32x1_t;
-typedef uint16_t uint16x1_t;
-typedef uint8_t uint8x1_t;
typedef __builtin_aarch64_simd_qi int8x16_t
__attribute__ ((__vector_size__ (16)));
typedef __builtin_aarch64_simd_hi int16x8_t
@@ -8400,7 +8394,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
#define vmull_high_lane_s16(a, b, c) \
__extension__ \
({ \
- int16x8_t b_ = (b); \
+ int16x4_t b_ = (b); \
int16x8_t a_ = (a); \
int32x4_t result; \
__asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
@@ -8413,7 +8407,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
#define vmull_high_lane_s32(a, b, c) \
__extension__ \
({ \
- int32x4_t b_ = (b); \
+ int32x2_t b_ = (b); \
int32x4_t a_ = (a); \
int64x2_t result; \
__asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
@@ -8426,7 +8420,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
#define vmull_high_lane_u16(a, b, c) \
__extension__ \
({ \
- uint16x8_t b_ = (b); \
+ uint16x4_t b_ = (b); \
uint16x8_t a_ = (a); \
uint32x4_t result; \
__asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
@@ -8439,7 +8433,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
#define vmull_high_lane_u32(a, b, c) \
__extension__ \
({ \
- uint32x4_t b_ = (b); \
+ uint32x2_t b_ = (b); \
uint32x4_t a_ = (a); \
uint64x2_t result; \
__asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
@@ -20925,42 +20919,42 @@ vqabsq_s64 (int64x2_t __a)
return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqabsb_s8 (int8x1_t __a)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqabsb_s8 (int8_t __a)
{
- return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
+ return (int8_t) __builtin_aarch64_sqabsqi (__a);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqabsh_s16 (int16x1_t __a)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqabsh_s16 (int16_t __a)
{
- return (int16x1_t) __builtin_aarch64_sqabshi (__a);
+ return (int16_t) __builtin_aarch64_sqabshi (__a);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqabss_s32 (int32x1_t __a)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqabss_s32 (int32_t __a)
{
- return (int32x1_t) __builtin_aarch64_sqabssi (__a);
+ return (int32_t) __builtin_aarch64_sqabssi (__a);
}
/* vqadd */
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqaddb_s8 (int8x1_t __a, int8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqaddb_s8 (int8_t __a, int8_t __b)
{
- return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqaddh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqaddh_s16 (int16_t __a, int16_t __b)
{
- return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqadds_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqadds_s32 (int32_t __a, int32_t __b)
{
- return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -20969,22 +20963,22 @@ vqaddd_s64 (int64x1_t __a, int64x1_t __b)
return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
}
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqaddb_u8 (uint8_t __a, uint8_t __b)
{
- return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
+ return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b);
}
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqaddh_u16 (uint16_t __a, uint16_t __b)
{
- return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
+ return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b);
}
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqadds_u32 (uint32_t __a, uint32_t __b)
{
- return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
+ return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -21095,26 +21089,26 @@ vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c)
{
return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
{
return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
+vqdmlals_s32 (int64x1_t __a, int32_t __b, int32_t __c)
{
return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
+vqdmlals_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d)
{
return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
}
@@ -21221,26 +21215,26 @@ vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c)
{
return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d)
{
return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
+vqdmlsls_s32 (int64x1_t __a, int32_t __b, int32_t __c)
{
return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
+vqdmlsls_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d)
{
return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
}
@@ -21271,26 +21265,26 @@ vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqdmulhh_s16 (int16_t __a, int16_t __b)
{
- return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
{
return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmulhs_s32 (int32_t __a, int32_t __b)
{
- return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
{
return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
}
@@ -21393,26 +21387,26 @@ vqdmull_n_s32 (int32x2_t __a, int32_t __b)
return __builtin_aarch64_sqdmull_nv2si (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmullh_s16 (int16_t __a, int16_t __b)
{
- return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
{
return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
+vqdmulls_s32 (int32_t __a, int32_t __b)
{
return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
+vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
{
return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
}
@@ -21455,40 +21449,40 @@ vqmovn_u64 (uint64x2_t __a)
return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqmovnh_s16 (int16x1_t __a)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqmovnh_s16 (int16_t __a)
{
- return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
+ return (int8_t) __builtin_aarch64_sqmovnhi (__a);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqmovns_s32 (int32x1_t __a)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqmovns_s32 (int32_t __a)
{
- return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
+ return (int16_t) __builtin_aarch64_sqmovnsi (__a);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqmovnd_s64 (int64x1_t __a)
{
- return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
+ return (int32_t) __builtin_aarch64_sqmovndi (__a);
}
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqmovnh_u16 (uint16x1_t __a)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqmovnh_u16 (uint16_t __a)
{
- return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
+ return (uint8_t) __builtin_aarch64_uqmovnhi (__a);
}
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqmovns_u32 (uint32x1_t __a)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqmovns_u32 (uint32_t __a)
{
- return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
+ return (uint16_t) __builtin_aarch64_uqmovnsi (__a);
}
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqmovnd_u64 (uint64x1_t __a)
{
- return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
+ return (uint32_t) __builtin_aarch64_uqmovndi (__a);
}
/* vqmovun */
@@ -21511,22 +21505,22 @@ vqmovun_s64 (int64x2_t __a)
return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqmovunh_s16 (int16x1_t __a)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqmovunh_s16 (int16_t __a)
{
- return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
+ return (int8_t) __builtin_aarch64_sqmovunhi (__a);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqmovuns_s32 (int32x1_t __a)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqmovuns_s32 (int32_t __a)
{
- return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
+ return (int16_t) __builtin_aarch64_sqmovunsi (__a);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqmovund_s64 (int64x1_t __a)
{
- return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
+ return (int32_t) __builtin_aarch64_sqmovundi (__a);
}
/* vqneg */
@@ -21537,22 +21531,22 @@ vqnegq_s64 (int64x2_t __a)
return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqnegb_s8 (int8x1_t __a)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqnegb_s8 (int8_t __a)
{
- return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
+ return (int8_t) __builtin_aarch64_sqnegqi (__a);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqnegh_s16 (int16x1_t __a)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqnegh_s16 (int16_t __a)
{
- return (int16x1_t) __builtin_aarch64_sqneghi (__a);
+ return (int16_t) __builtin_aarch64_sqneghi (__a);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqnegs_s32 (int32x1_t __a)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqnegs_s32 (int32_t __a)
{
- return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
+ return (int32_t) __builtin_aarch64_sqnegsi (__a);
}
/* vqrdmulh */
@@ -21581,26 +21575,26 @@ vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmulhh_s16 (int16_t __a, int16_t __b)
{
- return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c)
{
return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmulhs_s32 (int32_t __a, int32_t __b)
{
- return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
{
return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
}
@@ -21703,20 +21697,20 @@ vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqrshlb_s8 (int8_t __a, int8_t __b)
{
return __builtin_aarch64_sqrshlqi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrshlh_s16 (int16_t __a, int16_t __b)
{
return __builtin_aarch64_sqrshlhi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqrshls_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrshls_s32 (int32_t __a, int32_t __b)
{
return __builtin_aarch64_sqrshlsi (__a, __b);
}
@@ -21727,22 +21721,22 @@ vqrshld_s64 (int64x1_t __a, int64x1_t __b)
return __builtin_aarch64_sqrshldi (__a, __b);
}
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqrshlb_u8 (uint8_t __a, uint8_t __b)
{
- return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
+ return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b);
}
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqrshlh_u16 (uint16_t __a, uint16_t __b)
{
- return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
+ return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b);
}
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqrshls_u32 (uint32_t __a, uint32_t __b)
{
- return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
+ return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -21789,40 +21783,40 @@ vqrshrn_n_u64 (uint64x2_t __a, const int __b)
return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqrshrnh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqrshrnh_n_s16 (int16_t __a, const int __b)
{
- return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrshrns_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrshrns_n_s32 (int32_t __a, const int __b)
{
- return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrshrnd_n_s64 (int64x1_t __a, const int __b)
{
- return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
}
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqrshrnh_n_u16 (uint16_t __a, const int __b)
{
- return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
+ return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
}
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqrshrns_n_u32 (uint32x1_t __a, const int __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqrshrns_n_u32 (uint32_t __a, const int __b)
{
- return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
+ return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
}
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
{
- return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
+ return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
}
/* vqrshrun */
@@ -21845,22 +21839,22 @@ vqrshrun_n_s64 (int64x2_t __a, const int __b)
return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqrshrunh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqrshrunh_n_s16 (int16_t __a, const int __b)
{
- return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqrshruns_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrshruns_n_s32 (int32_t __a, const int __b)
{
- return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrshrund_n_s64 (int64x1_t __a, const int __b)
{
- return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
}
/* vqshl */
@@ -21961,20 +21955,20 @@ vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshlb_s8 (int8x1_t __a, int8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshlb_s8 (int8_t __a, int8_t __b)
{
return __builtin_aarch64_sqshlqi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshlh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshlh_s16 (int16_t __a, int16_t __b)
{
return __builtin_aarch64_sqshlhi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqshls_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqshls_s32 (int32_t __a, int32_t __b)
{
return __builtin_aarch64_sqshlsi (__a, __b);
}
@@ -21985,22 +21979,22 @@ vqshld_s64 (int64x1_t __a, int64x1_t __b)
return __builtin_aarch64_sqshldi (__a, __b);
}
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqshlb_u8 (uint8_t __a, uint8_t __b)
{
- return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
+ return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b);
}
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqshlh_u16 (uint16_t __a, uint16_t __b)
{
- return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
+ return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b);
}
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqshls_u32 (uint32_t __a, uint32_t __b)
{
- return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
+ return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -22105,22 +22099,22 @@ vqshlq_n_u64 (uint64x2_t __a, const int __b)
return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshlb_n_s8 (int8x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshlb_n_s8 (int8_t __a, const int __b)
{
- return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshlh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshlh_n_s16 (int16_t __a, const int __b)
{
- return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqshls_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqshls_n_s32 (int32_t __a, const int __b)
{
- return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -22129,22 +22123,22 @@ vqshld_n_s64 (int64x1_t __a, const int __b)
return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
}
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqshlb_n_u8 (uint8x1_t __a, const int __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqshlb_n_u8 (uint8_t __a, const int __b)
{
- return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
+ return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b);
}
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqshlh_n_u16 (uint16x1_t __a, const int __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqshlh_n_u16 (uint16_t __a, const int __b)
{
- return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
+ return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b);
}
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqshls_n_u32 (uint32x1_t __a, const int __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqshls_n_u32 (uint32_t __a, const int __b)
{
- return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
+ return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -22203,22 +22197,22 @@ vqshluq_n_s64 (int64x2_t __a, const int __b)
return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshlub_n_s8 (int8x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshlub_n_s8 (int8_t __a, const int __b)
{
- return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshluh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshluh_n_s16 (int16_t __a, const int __b)
{
- return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqshlus_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqshlus_n_s32 (int32_t __a, const int __b)
{
- return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -22265,40 +22259,40 @@ vqshrn_n_u64 (uint64x2_t __a, const int __b)
return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshrnh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshrnh_n_s16 (int16_t __a, const int __b)
{
- return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshrns_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshrns_n_s32 (int32_t __a, const int __b)
{
- return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqshrnd_n_s64 (int64x1_t __a, const int __b)
{
- return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
}
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqshrnh_n_u16 (uint16x1_t __a, const int __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqshrnh_n_u16 (uint16_t __a, const int __b)
{
- return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
+ return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
}
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqshrns_n_u32 (uint32x1_t __a, const int __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqshrns_n_u32 (uint32_t __a, const int __b)
{
- return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
+ return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
}
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vqshrnd_n_u64 (uint64x1_t __a, const int __b)
{
- return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
+ return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
}
/* vqshrun */
@@ -22321,42 +22315,42 @@ vqshrun_n_s64 (int64x2_t __a, const int __b)
return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqshrunh_n_s16 (int16x1_t __a, const int __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqshrunh_n_s16 (int16_t __a, const int __b)
{
- return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqshruns_n_s32 (int32x1_t __a, const int __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqshruns_n_s32 (int32_t __a, const int __b)
{
- return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqshrund_n_s64 (int64x1_t __a, const int __b)
{
- return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
}
/* vqsub */
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vqsubb_s8 (int8x1_t __a, int8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vqsubb_s8 (int8_t __a, int8_t __b)
{
- return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
+ return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vqsubh_s16 (int16x1_t __a, int16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqsubh_s16 (int16_t __a, int16_t __b)
{
- return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
+ return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vqsubs_s32 (int32x1_t __a, int32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqsubs_s32 (int32_t __a, int32_t __b)
{
- return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
+ return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
@@ -22365,22 +22359,22 @@ vqsubd_s64 (int64x1_t __a, int64x1_t __b)
return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
}
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vqsubb_u8 (uint8_t __a, uint8_t __b)
{
- return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
+ return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b);
}
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vqsubh_u16 (uint16_t __a, uint16_t __b)
{
- return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
+ return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b);
}
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vqsubs_u32 (uint32_t __a, uint32_t __b)
{
- return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
+ return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -23596,22 +23590,22 @@ vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
(int64x2_t) __b);
}
-__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
-vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
+__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
+vsqaddb_u8 (uint8_t __a, int8_t __b)
{
- return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
+ return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b);
}
-__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
-vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
+__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
+vsqaddh_u16 (uint16_t __a, int16_t __b)
{
- return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
+ return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b);
}
-__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
-vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vsqadds_u32 (uint32_t __a, int32_t __b)
{
- return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
+ return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
@@ -25251,22 +25245,22 @@ vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
}
-__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
-vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
+__extension__ static __inline int8_t __attribute__ ((__always_inline__))
+vuqaddb_s8 (int8_t __a, uint8_t __b)
{
- return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
+ return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b);
}
-__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
-vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vuqaddh_s16 (int16_t __a, uint16_t __b)
{
- return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
+ return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b);
}
-__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
-vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vuqadds_s32 (int32_t __a, uint32_t __b)
{
- return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
+ return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c
index d5c7908be..19ae3665a 100644
--- a/gcc-4.9/gcc/config/alpha/alpha.c
+++ b/gcc-4.9/gcc/config/alpha/alpha.c
@@ -9918,12 +9918,6 @@ alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
#undef TARGET_EXPAND_BUILTIN_VA_START
#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
-/* The Alpha architecture does not require sequential consistency. See
- http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
- for an example of how it can be violated in practice. */
-#undef TARGET_RELAXED_ORDERING
-#define TARGET_RELAXED_ORDERING true
-
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE alpha_option_override
diff --git a/gcc-4.9/gcc/config/alpha/alpha.md b/gcc-4.9/gcc/config/alpha/alpha.md
index 795b4df3f..1179d572d 100644
--- a/gcc-4.9/gcc/config/alpha/alpha.md
+++ b/gcc-4.9/gcc/config/alpha/alpha.md
@@ -5984,16 +5984,38 @@
[(set_attr "type" "jsr")
(set_attr "length" "*,*,8")])
-(define_insn_and_split "call_value_osf_tlsgd"
+(define_int_iterator TLS_CALL
+ [UNSPEC_TLSGD_CALL
+ UNSPEC_TLSLDM_CALL])
+
+(define_int_attr tls
+ [(UNSPEC_TLSGD_CALL "tlsgd")
+ (UNSPEC_TLSLDM_CALL "tlsldm")])
+
+(define_insn "call_value_osf_<tls>"
[(set (match_operand 0)
(call (mem:DI (match_operand:DI 1 "symbolic_operand"))
(const_int 0)))
- (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSGD_CALL)
+ (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
(use (reg:DI 29))
(clobber (reg:DI 26))]
"HAVE_AS_TLS"
- "#"
- "&& reload_completed"
+ "ldq $27,%1($29)\t\t!literal!%2\;jsr $26,($27),%1\t\t!lituse_<tls>!%2\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"
+ [(set_attr "type" "jsr")
+ (set_attr "length" "16")])
+
+;; We must use peep2 instead of a split because we need accurate life
+;; information for $gp.
+(define_peephole2
+ [(parallel
+ [(set (match_operand 0)
+ (call (mem:DI (match_operand:DI 1 "symbolic_operand"))
+ (const_int 0)))
+ (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
+ (use (reg:DI 29))
+ (clobber (reg:DI 26))])]
+ "HAVE_AS_TLS && reload_completed
+ && peep2_regno_dead_p (1, 29)"
[(set (match_dup 3)
(unspec:DI [(match_dup 5)
(match_dup 1)
@@ -6001,10 +6023,9 @@
(parallel [(set (match_dup 0)
(call (mem:DI (match_dup 3))
(const_int 0)))
- (set (match_dup 5)
- (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
+ (use (match_dup 5))
(use (match_dup 1))
- (use (unspec [(match_dup 2)] UNSPEC_TLSGD_CALL))
+ (use (unspec [(match_dup 2)] TLS_CALL))
(clobber (reg:DI 26))])
(set (match_dup 5)
(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
@@ -6012,19 +6033,18 @@
operands[3] = gen_rtx_REG (Pmode, 27);
operands[4] = GEN_INT (alpha_next_sequence_number++);
operands[5] = pic_offset_table_rtx;
-}
- [(set_attr "type" "multi")])
+})
-(define_insn_and_split "call_value_osf_tlsldm"
- [(set (match_operand 0)
- (call (mem:DI (match_operand:DI 1 "symbolic_operand"))
- (const_int 0)))
- (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSLDM_CALL)
- (use (reg:DI 29))
- (clobber (reg:DI 26))]
- "HAVE_AS_TLS"
- "#"
- "&& reload_completed"
+(define_peephole2
+ [(parallel
+ [(set (match_operand 0)
+ (call (mem:DI (match_operand:DI 1 "symbolic_operand"))
+ (const_int 0)))
+ (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
+ (use (reg:DI 29))
+ (clobber (reg:DI 26))])]
+ "HAVE_AS_TLS && reload_completed
+ && !peep2_regno_dead_p (1, 29)"
[(set (match_dup 3)
(unspec:DI [(match_dup 5)
(match_dup 1)
@@ -6035,7 +6055,7 @@
(set (match_dup 5)
(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
(use (match_dup 1))
- (use (unspec [(match_dup 2)] UNSPEC_TLSLDM_CALL))
+ (use (unspec [(match_dup 2)] TLS_CALL))
(clobber (reg:DI 26))])
(set (match_dup 5)
(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
@@ -6043,8 +6063,7 @@
operands[3] = gen_rtx_REG (Pmode, 27);
operands[4] = GEN_INT (alpha_next_sequence_number++);
operands[5] = pic_offset_table_rtx;
-}
- [(set_attr "type" "multi")])
+})
(define_insn "*call_value_osf_1"
[(set (match_operand 0)
diff --git a/gcc-4.9/gcc/config/arm/arm-protos.h b/gcc-4.9/gcc/config/arm/arm-protos.h
index 13874ee6e..2ac3b3009 100644
--- a/gcc-4.9/gcc/config/arm/arm-protos.h
+++ b/gcc-4.9/gcc/config/arm/arm-protos.h
@@ -56,6 +56,7 @@ extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
extern int legitimate_pic_operand_p (rtx);
extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
extern rtx legitimize_tls_address (rtx, rtx);
+extern bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
extern int arm_legitimate_address_outer_p (enum machine_mode, rtx, RTX_CODE, int);
extern int thumb_legitimate_offset_p (enum machine_mode, HOST_WIDE_INT);
extern bool arm_legitimize_reload_address (rtx *, enum machine_mode, int, int,
@@ -294,4 +295,6 @@ extern void arm_emit_eabi_attribute (const char *, int, int);
/* Defined in gcc/common/config/arm-common.c. */
extern const char *arm_rewrite_selected_cpu (const char *name);
+extern bool arm_is_constant_pool_ref (rtx);
+
#endif /* ! GCC_ARM_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c
index 3c237cb6d..b79bb48b1 100644
--- a/gcc-4.9/gcc/config/arm/arm.c
+++ b/gcc-4.9/gcc/config/arm/arm.c
@@ -89,7 +89,6 @@ static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
inline static int thumb1_index_register_rtx_p (rtx, int);
-static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
static int thumb_far_jump_used_p (void);
static bool thumb_force_lr_save (void);
static unsigned arm_size_return_regs (void);
@@ -13952,9 +13951,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
HOST_WIDE_INT srcoffset, dstoffset;
HOST_WIDE_INT src_autoinc, dst_autoinc;
rtx mem, addr;
-
+
gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
-
+
/* Use hard registers if we have aligned source or destination so we can use
load/store multiple with contiguous registers. */
if (dst_aligned || src_aligned)
@@ -13968,7 +13967,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
src = copy_addr_to_reg (XEXP (srcbase, 0));
srcoffset = dstoffset = 0;
-
+
/* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
For copying the last bytes we want to subtract this offset again. */
src_autoinc = dst_autoinc = 0;
@@ -14022,14 +14021,14 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
remaining -= block_size_bytes;
}
-
+
/* Copy any whole words left (note these aren't interleaved with any
subsequent halfword/byte load/stores in the interests of simplicity). */
-
+
words = remaining / UNITS_PER_WORD;
gcc_assert (words < interleave_factor);
-
+
if (src_aligned && words > 1)
{
emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
@@ -14069,11 +14068,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
}
remaining -= words * UNITS_PER_WORD;
-
+
gcc_assert (remaining < 4);
-
+
/* Copy a halfword if necessary. */
-
+
if (remaining >= 2)
{
halfword_tmp = gen_reg_rtx (SImode);
@@ -14097,11 +14096,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
remaining -= 2;
srcoffset += 2;
}
-
+
gcc_assert (remaining < 2);
-
+
/* Copy last byte. */
-
+
if ((remaining & 1) != 0)
{
byte_tmp = gen_reg_rtx (SImode);
@@ -14122,9 +14121,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
remaining--;
srcoffset++;
}
-
+
/* Store last halfword if we haven't done so already. */
-
+
if (halfword_tmp)
{
addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
@@ -14143,7 +14142,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
dstoffset++;
}
-
+
gcc_assert (remaining == 0 && srcoffset == dstoffset);
}
@@ -14162,7 +14161,7 @@ arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
rtx *loop_mem)
{
*loop_reg = copy_addr_to_reg (XEXP (mem, 0));
-
+
/* Although the new mem does not refer to a known location,
it does keep up to LENGTH bytes of alignment. */
*loop_mem = change_address (mem, BLKmode, *loop_reg);
@@ -14182,14 +14181,14 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
{
rtx label, src_reg, dest_reg, final_src, test;
HOST_WIDE_INT leftover;
-
+
leftover = length % bytes_per_iter;
length -= leftover;
-
+
/* Create registers and memory references for use within the loop. */
arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
-
+
/* Calculate the value that SRC_REG should have after the last iteration of
the loop. */
final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
@@ -14198,7 +14197,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
/* Emit the start of the loop. */
label = gen_label_rtx ();
emit_label (label);
-
+
/* Emit the loop body. */
arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
interleave_factor);
@@ -14206,11 +14205,11 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
/* Move on to the next block. */
emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
-
+
/* Emit the loop condition. */
test = gen_rtx_NE (VOIDmode, src_reg, final_src);
emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
-
+
/* Mop up any left-over bytes. */
if (leftover)
arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
@@ -14224,7 +14223,7 @@ static int
arm_movmemqi_unaligned (rtx *operands)
{
HOST_WIDE_INT length = INTVAL (operands[2]);
-
+
if (optimize_size)
{
bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
@@ -14235,7 +14234,7 @@ arm_movmemqi_unaligned (rtx *operands)
resulting code can be smaller. */
unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
-
+
if (length > 12)
arm_block_move_unaligned_loop (operands[0], operands[1], length,
interleave_factor, bytes_per_iter);
@@ -14253,7 +14252,7 @@ arm_movmemqi_unaligned (rtx *operands)
else
arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
}
-
+
return 1;
}
@@ -28520,7 +28519,11 @@ arm_set_return_address (rtx source, rtx scratch)
addr = plus_constant (Pmode, addr, delta);
}
- emit_move_insn (gen_frame_mem (Pmode, addr), source);
+ /* The store needs to be marked as frame related in order to prevent
+ DSE from deleting it as dead if it is based on fp. */
+ rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
}
}
@@ -28572,7 +28575,11 @@ thumb_set_return_address (rtx source, rtx scratch)
else
addr = plus_constant (Pmode, addr, delta);
- emit_move_insn (gen_frame_mem (Pmode, addr), source);
+ /* The store needs to be marked as frame related in order to prevent
+ DSE from deleting it as dead if it is based on fp. */
+ rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
}
else
emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
@@ -29828,10 +29835,10 @@ int
vfp3_const_double_for_fract_bits (rtx operand)
{
REAL_VALUE_TYPE r0;
-
+
if (!CONST_DOUBLE_P (operand))
return 0;
-
+
REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
if (exact_real_inverse (DFmode, &r0))
{
@@ -30825,7 +30832,7 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
else
return false;
}
-
+
return true;
case ARM_POST_DEC:
@@ -30842,10 +30849,10 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
return false;
return true;
-
+
default:
return false;
-
+
}
return false;
@@ -30856,7 +30863,7 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
Additionally, the default expansion code is not available or suitable
for post-reload insn splits (this can occur when the register allocator
chooses not to do a shift in NEON).
-
+
This function is used in both initial expand and post-reload splits, and
handles all kinds of 64-bit shifts.
@@ -31109,7 +31116,7 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
{
enum rtx_code code = GET_CODE (*comparison);
int code_int;
- enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
+ enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
? GET_MODE (*op2) : GET_MODE (*op1);
gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
@@ -31163,7 +31170,7 @@ arm_asan_shadow_offset (void)
/* This is a temporary fix for PR60655. Ideally we need
to handle most of these cases in the generic part but
- currently we reject minus (..) (sym_ref). We try to
+ currently we reject minus (..) (sym_ref). We try to
ameliorate the case with minus (sym_ref1) (sym_ref2)
where they are in the same section. */
@@ -31393,4 +31400,13 @@ arm_load_global_address (rtx symbol, rtx offset_reg,
df_insn_rescan (load_insn);
}
+/* return TRUE if x is a reference to a value in a constant pool */
+extern bool
+arm_is_constant_pool_ref (rtx x)
+{
+ return (MEM_P (x)
+ && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
+}
+
#include "gt-arm.h"
diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h
index ab5167a8b..433a3dd77 100644
--- a/gcc-4.9/gcc/config/arm/arm.h
+++ b/gcc-4.9/gcc/config/arm/arm.h
@@ -74,8 +74,8 @@ extern char arm_arch_name[];
builtin_define_with_int_value ( \
"__ARM_SIZEOF_MINIMAL_ENUM", \
flag_short_enums ? 1 : 4); \
- builtin_define_with_int_value ( \
- "__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE); \
+ builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T", \
+ wchar_type_node); \
if (TARGET_ARM_ARCH_PROFILE) \
builtin_define_with_int_value ( \
"__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE); \
@@ -2139,9 +2139,10 @@ extern int making_const_table;
? reverse_condition_maybe_unordered (code) \
: reverse_condition (code))
-/* The arm5 clz instruction returns 32. */
-#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
-#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE))
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE))
#define CC_STATUS_INIT \
do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0)
diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md
index 467f9ce4e..1153a1e34 100644
--- a/gcc-4.9/gcc/config/arm/arm.md
+++ b/gcc-4.9/gcc/config/arm/arm.md
@@ -127,9 +127,10 @@
; This can be "a" for ARM, "t" for either of the Thumbs, "32" for
; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode. "v6"
; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
-; arm_arch6. This attribute is used to compute attribute "enabled",
-; use type "any" to enable an alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
+; arm_arch6. "v6t2" for Thumb-2 with arm_arch6. This attribute is
+; used to compute attribute "enabled", use type "any" to enable an
+; alternative in all cases.
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
(const_string "any"))
(define_attr "arch_enabled" "no,yes"
@@ -164,6 +165,10 @@
(match_test "TARGET_32BIT && !arm_arch6"))
(const_string "yes")
+ (and (eq_attr "arch" "v6t2")
+ (match_test "TARGET_32BIT && arm_arch6 && arm_arch_thumb2"))
+ (const_string "yes")
+
(and (eq_attr "arch" "avoid_neon_for_64bits")
(match_test "TARGET_NEON")
(not (match_test "TARGET_PREFER_NEON_64BITS")))
@@ -3631,7 +3636,7 @@
[(match_operand:SI 1 "s_register_operand" "r")
(match_operand:SI 2 "s_register_operand" "r")]))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_32BIT && optimize_function_for_size_p (cfun)"
+ "TARGET_32BIT && optimize_function_for_size_p (cfun) && !arm_restrict_it"
"*
operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode,
operands[1], operands[2]);
@@ -4374,7 +4379,7 @@
(define_insn "unaligned_loadhis"
[(set (match_operand:SI 0 "s_register_operand" "=l,r")
(sign_extend:SI
- (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+ (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,Uh")]
UNSPEC_UNALIGNED_LOAD)))]
"unaligned_access && TARGET_32BIT"
"ldr%(sh%)\t%0, %1\t@ unaligned"
@@ -5287,7 +5292,7 @@
(define_insn "*arm_zero_extendhisi2_v6"
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
- (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+ (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))]
"TARGET_ARM && arm_arch6"
"@
uxth%?\\t%0, %1
@@ -5381,7 +5386,7 @@
(define_insn "*arm_zero_extendqisi2_v6"
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
- (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,Uh")))]
"TARGET_ARM && arm_arch6"
"@
uxtb%(%)\\t%0, %1
@@ -5615,31 +5620,27 @@
(define_insn "*arm_extendhisi2"
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
- (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+ (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))]
"TARGET_ARM && arm_arch4 && !arm_arch6"
"@
#
ldr%(sh%)\\t%0, %1"
[(set_attr "length" "8,4")
(set_attr "type" "alu_shift_reg,load_byte")
- (set_attr "predicable" "yes")
- (set_attr "pool_range" "*,256")
- (set_attr "neg_pool_range" "*,244")]
+ (set_attr "predicable" "yes")]
)
;; ??? Check Thumb-2 pool range
(define_insn "*arm_extendhisi2_v6"
[(set (match_operand:SI 0 "s_register_operand" "=r,r")
- (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+ (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))]
"TARGET_32BIT && arm_arch6"
"@
sxth%?\\t%0, %1
ldr%(sh%)\\t%0, %1"
[(set_attr "type" "extend,load_byte")
(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "pool_range" "*,256")
- (set_attr "neg_pool_range" "*,244")]
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "*arm_extendhisi2addsi"
@@ -5682,9 +5683,7 @@
"TARGET_ARM && arm_arch4"
"ldr%(sb%)\\t%0, %1"
[(set_attr "type" "load_byte")
- (set_attr "predicable" "yes")
- (set_attr "pool_range" "256")
- (set_attr "neg_pool_range" "244")]
+ (set_attr "predicable" "yes")]
)
(define_expand "extendqisi2"
@@ -5724,9 +5723,7 @@
ldr%(sb%)\\t%0, %1"
[(set_attr "length" "8,4")
(set_attr "type" "alu_shift_reg,load_byte")
- (set_attr "predicable" "yes")
- (set_attr "pool_range" "*,256")
- (set_attr "neg_pool_range" "*,244")]
+ (set_attr "predicable" "yes")]
)
(define_insn "*arm_extendqisi_v6"
@@ -5738,9 +5735,7 @@
sxtb%?\\t%0, %1
ldr%(sb%)\\t%0, %1"
[(set_attr "type" "extend,load_byte")
- (set_attr "predicable" "yes")
- (set_attr "pool_range" "*,256")
- (set_attr "neg_pool_range" "*,244")]
+ (set_attr "predicable" "yes")]
)
(define_insn "*arm_extendqisi2addsi"
@@ -6973,8 +6968,8 @@
;; Pattern to recognize insn generated default case above
(define_insn "*movhi_insn_arch4"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r")
- (match_operand:HI 1 "general_operand" "rI,K,r,mi"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m,r")
+ (match_operand:HI 1 "general_operand" "rI,K,n,r,mi"))]
"TARGET_ARM
&& arm_arch4
&& (register_operand (operands[0], HImode)
@@ -6982,16 +6977,19 @@
"@
mov%?\\t%0, %1\\t%@ movhi
mvn%?\\t%0, #%B1\\t%@ movhi
+ movw%?\\t%0, %L1\\t%@ movhi
str%(h%)\\t%1, %0\\t%@ movhi
ldr%(h%)\\t%0, %1\\t%@ movhi"
[(set_attr "predicable" "yes")
- (set_attr "pool_range" "*,*,*,256")
- (set_attr "neg_pool_range" "*,*,*,244")
+ (set_attr "pool_range" "*,*,*,*,256")
+ (set_attr "neg_pool_range" "*,*,*,*,244")
+ (set_attr "arch" "*,*,v6t2,*,*")
(set_attr_alternative "type"
[(if_then_else (match_operand 1 "const_int_operand" "")
(const_string "mov_imm" )
(const_string "mov_reg"))
(const_string "mvn_imm")
+ (const_string "mov_imm")
(const_string "store1")
(const_string "load1")])]
)
@@ -10944,10 +10942,16 @@
enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[5]),
operands[3], operands[4]);
enum rtx_code rc = GET_CODE (operands[5]);
-
operands[6] = gen_rtx_REG (mode, CC_REGNUM);
gcc_assert (!(mode == CCFPmode || mode == CCFPEmode));
- rc = reverse_condition (rc);
+ if (REGNO (operands[2]) != REGNO (operands[0]))
+ rc = reverse_condition (rc);
+ else
+ {
+ rtx tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
operands[6] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx);
}
diff --git a/gcc-4.9/gcc/config/arm/constraints.md b/gcc-4.9/gcc/config/arm/constraints.md
index 85dd116ce..f848664d5 100644
--- a/gcc-4.9/gcc/config/arm/constraints.md
+++ b/gcc-4.9/gcc/config/arm/constraints.md
@@ -36,7 +36,7 @@
;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
;; The following memory constraints have been used:
-;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
+;; in ARM/Thumb-2 state: Q, Uh, Ut, Uv, Uy, Un, Um, Us
;; in ARM state: Uq
;; in Thumb state: Uu, Uw
@@ -348,6 +348,12 @@
An address valid for loading/storing register exclusive"
(match_operand 0 "mem_noofs_operand"))
+(define_memory_constraint "Uh"
+ "@internal
+ An address suitable for byte and half-word loads which does not point inside a constant pool"
+ (and (match_code "mem")
+ (match_test "arm_legitimate_address_p (GET_MODE (op), XEXP (op, 0), false) && !arm_is_constant_pool_ref (op)")))
+
(define_memory_constraint "Ut"
"@internal
In ARM/Thumb-2 state an address valid for loading/storing opaque structure
@@ -394,7 +400,8 @@
(and (match_code "mem")
(match_test "TARGET_ARM
&& arm_legitimate_address_outer_p (GET_MODE (op), XEXP (op, 0),
- SIGN_EXTEND, 0)")))
+ SIGN_EXTEND, 0)
+ && !arm_is_constant_pool_ref (op)")))
(define_memory_constraint "Q"
"@internal
diff --git a/gcc-4.9/gcc/config/arm/linux-grte.h b/gcc-4.9/gcc/config/arm/linux-grte.h
index 7ee5806b7..e69de29bb 100644
--- a/gcc-4.9/gcc/config/arm/linux-grte.h
+++ b/gcc-4.9/gcc/config/arm/linux-grte.h
@@ -1,27 +0,0 @@
-/* Definitions for ARM Linux-based GRTE (Google RunTime Environment).
- Copyright (C) 2011 Free Software Foundation, Inc.
- Contributed by Chris Demetriou.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-<http://www.gnu.org/licenses/>. */
-
-#undef SUBSUBTARGET_EXTRA_SPECS
-#define SUBSUBTARGET_EXTRA_SPECS LINUX_GRTE_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/arm/t-aprofile b/gcc-4.9/gcc/config/arm/t-aprofile
index ff9e2e1b3..86741e6b0 100644
--- a/gcc-4.9/gcc/config/arm/t-aprofile
+++ b/gcc-4.9/gcc/config/arm/t-aprofile
@@ -88,6 +88,9 @@ MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a53
MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57
MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57.cortex-a53
+# Arch Matches
+MULTILIB_MATCHES += march?armv8-a=march?armv8-a+crc
+
# FPU matches
MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3
MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16
diff --git a/gcc-4.9/gcc/config/avr/avr-dimode.md b/gcc-4.9/gcc/config/avr/avr-dimode.md
index 639810518..56cd30458 100644
--- a/gcc-4.9/gcc/config/avr/avr-dimode.md
+++ b/gcc-4.9/gcc/config/avr/avr-dimode.md
@@ -68,6 +68,7 @@
{
rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
emit_move_insn (acc_a, operands[1]);
if (DImode == <MODE>mode
@@ -145,6 +146,7 @@
{
rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
emit_move_insn (acc_a, operands[1]);
if (const_operand (operands[2], GET_MODE (operands[2])))
@@ -201,6 +203,7 @@
{
rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
emit_move_insn (acc_a, operands[1]);
if (const_operand (operands[2], GET_MODE (operands[2])))
@@ -249,6 +252,7 @@
{
rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
emit_move_insn (acc_a, operands[1]);
if (const_operand (operands[2], GET_MODE (operands[2])))
@@ -338,6 +342,7 @@
{
rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
emit_move_insn (acc_a, operands[1]);
if (s8_operand (operands[2], VOIDmode))
@@ -424,6 +429,7 @@
{
rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A));
emit_move_insn (acc_a, operands[1]);
emit_move_insn (gen_rtx_REG (QImode, 16), operands[2]);
emit_insn (gen_<code_stdname><mode>3_insn ());
@@ -457,6 +463,7 @@
(clobber (any_extend:SI (match_dup 1)))])]
"avr_have_dimode"
{
+ avr_fix_inputs (operands, 1 << 2, regmask (SImode, 22));
emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]);
emit_move_insn (gen_rtx_REG (SImode, 18), operands[2]);
emit_insn (gen_<extend_u>mulsidi3_insn());
diff --git a/gcc-4.9/gcc/config/avr/avr-fixed.md b/gcc-4.9/gcc/config/avr/avr-fixed.md
index 9c8489edd..6763f596e 100644
--- a/gcc-4.9/gcc/config/avr/avr-fixed.md
+++ b/gcc-4.9/gcc/config/avr/avr-fixed.md
@@ -231,7 +231,11 @@
(clobber (reg:HI 24))])
(set (match_operand:QQ 0 "register_operand" "")
(reg:QQ 23))]
- "!AVR_HAVE_MUL")
+ "!AVR_HAVE_MUL"
+ {
+ avr_fix_inputs (operands, 1 << 2, regmask (QQmode, 24));
+ })
+
(define_expand "muluqq3_nomul"
[(set (reg:UQQ 22)
@@ -246,7 +250,10 @@
(clobber (reg:HI 22))])
(set (match_operand:UQQ 0 "register_operand" "")
(reg:UQQ 25))]
- "!AVR_HAVE_MUL")
+ "!AVR_HAVE_MUL"
+ {
+ avr_fix_inputs (operands, 1 << 2, regmask (UQQmode, 22));
+ })
(define_insn "*mulqq3.call"
[(set (reg:QQ 23)
@@ -274,7 +281,10 @@
(clobber (reg:HI 22))])
(set (match_operand:ALL2QA 0 "register_operand" "")
(reg:ALL2QA 24))]
- "AVR_HAVE_MUL")
+ "AVR_HAVE_MUL"
+ {
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 18));
+ })
;; "*mulhq3.call" "*muluhq3.call"
;; "*mulha3.call" "*muluha3.call"
@@ -302,7 +312,10 @@
(reg:ALL4A 20)))
(set (match_operand:ALL4A 0 "register_operand" "")
(reg:ALL4A 24))]
- "AVR_HAVE_MUL")
+ "AVR_HAVE_MUL"
+ {
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 16));
+ })
;; "*mulsa3.call" "*mulusa3.call"
(define_insn "*mul<mode>3.call"
@@ -330,7 +343,12 @@
(reg:ALL1Q 22)))
(clobber (reg:QI 25))])
(set (match_operand:ALL1Q 0 "register_operand" "")
- (reg:ALL1Q 24))])
+ (reg:ALL1Q 24))]
+ ""
+ {
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 25));
+ })
+
;; "*divqq3.call" "*udivuqq3.call"
(define_insn "*<code><mode>3.call"
@@ -356,7 +374,11 @@
(clobber (reg:HI 26))
(clobber (reg:QI 21))])
(set (match_operand:ALL2QA 0 "register_operand" "")
- (reg:ALL2QA 24))])
+ (reg:ALL2QA 24))]
+ ""
+ {
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 26));
+ })
;; "*divhq3.call" "*udivuhq3.call"
;; "*divha3.call" "*udivuha3.call"
@@ -385,7 +407,11 @@
(clobber (reg:HI 26))
(clobber (reg:HI 30))])
(set (match_operand:ALL4A 0 "register_operand" "")
- (reg:ALL4A 22))])
+ (reg:ALL4A 22))]
+ ""
+ {
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 24));
+ })
;; "*divsa3.call" "*udivusa3.call"
(define_insn "*<code><mode>3.call"
@@ -435,6 +461,7 @@
operands[3] = gen_rtx_REG (<MODE>mode, regno_out[(size_t) GET_MODE_SIZE (<MODE>mode)]);
operands[4] = gen_rtx_REG (<MODE>mode, regno_in[(size_t) GET_MODE_SIZE (<MODE>mode)]);
+ avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, REGNO (operands[4])));
operands[5] = simplify_gen_subreg (QImode, force_reg (HImode, operands[2]), HImode, 0);
// $2 is no more needed, but is referenced for expand.
operands[2] = const0_rtx;
diff --git a/gcc-4.9/gcc/config/avr/avr-protos.h b/gcc-4.9/gcc/config/avr/avr-protos.h
index c5ce78429..4a899a27c 100644
--- a/gcc-4.9/gcc/config/avr/avr-protos.h
+++ b/gcc-4.9/gcc/config/avr/avr-protos.h
@@ -124,6 +124,15 @@ extern bool avr_mem_memx_p (rtx);
extern bool avr_load_libgcc_p (rtx);
extern bool avr_xload_libgcc_p (enum machine_mode);
+static inline unsigned
+regmask (enum machine_mode mode, unsigned regno)
+{
+ return ((1u << GET_MODE_SIZE (mode)) - 1) << regno;
+}
+
+extern void avr_fix_inputs (rtx*, unsigned, unsigned);
+extern bool avr_emit3_fix_outputs (rtx (*)(rtx,rtx,rtx), rtx*, unsigned, unsigned);
+
extern rtx lpm_reg_rtx;
extern rtx lpm_addr_reg_rtx;
extern rtx tmp_reg_rtx;
diff --git a/gcc-4.9/gcc/config/avr/avr.c b/gcc-4.9/gcc/config/avr/avr.c
index fa979df46..4c65f5efa 100644
--- a/gcc-4.9/gcc/config/avr/avr.c
+++ b/gcc-4.9/gcc/config/avr/avr.c
@@ -11118,6 +11118,115 @@ avr_convert_to_type (tree type, tree expr)
}
+/* PR63633: The middle-end might come up with hard regs as input operands.
+
+ RMASK is a bit mask representing a subset of hard registers R0...R31:
+ Rn is an element of that set iff bit n of RMASK is set.
+ OPMASK describes a subset of OP[]: If bit n of OPMASK is 1 then
+ OP[n] has to be fixed; otherwise OP[n] is left alone.
+
+ For each element of OPMASK which is a hard register overlapping RMASK,
+ replace OP[n] with a newly created pseudo register
+
+ HREG == 0: Also emit a move insn that copies the contents of that
+ hard register into the new pseudo.
+
+ HREG != 0: Also set HREG[n] to the hard register. */
+
+static void
+avr_fix_operands (rtx *op, rtx *hreg, unsigned opmask, unsigned rmask)
+{
+ for (; opmask; opmask >>= 1, op++)
+ {
+ rtx reg = *op;
+
+ if (hreg)
+ *hreg = NULL_RTX;
+
+ if ((opmask & 1)
+ && REG_P (reg)
+ && REGNO (reg) < FIRST_PSEUDO_REGISTER
+ // This hard-reg overlaps other prohibited hard regs?
+ && (rmask & regmask (GET_MODE (reg), REGNO (reg))))
+ {
+ *op = gen_reg_rtx (GET_MODE (reg));
+ if (hreg == NULL)
+ emit_move_insn (*op, reg);
+ else
+ *hreg = reg;
+ }
+
+ if (hreg)
+ hreg++;
+ }
+}
+
+
+void
+avr_fix_inputs (rtx *op, unsigned opmask, unsigned rmask)
+{
+ avr_fix_operands (op, NULL, opmask, rmask);
+}
+
+
+/* Helper for the function below: If bit n of MASK is set and
+ HREG[n] != NULL, then emit a move insn to copy OP[n] to HREG[n].
+ Otherwise do nothing for that n. Return TRUE. */
+
+static bool
+avr_move_fixed_operands (rtx *op, rtx *hreg, unsigned mask)
+{
+ for (; mask; mask >>= 1, op++, hreg++)
+ if ((mask & 1)
+ && *hreg)
+ emit_move_insn (*hreg, *op);
+
+ return true;
+}
+
+
+/* PR63633: The middle-end might come up with hard regs as output operands.
+
+ GEN is a sequence generating function like gen_mulsi3 with 3 operands OP[].
+ RMASK is a bit mask representing a subset of hard registers R0...R31:
+ Rn is an element of that set iff bit n of RMASK is set.
+ OPMASK describes a subset of OP[]: If bit n of OPMASK is 1 then
+ OP[n] has to be fixed; otherwise OP[n] is left alone.
+
+ Emit the insn sequence as generated by GEN() with all elements of OPMASK
+ which are hard registers overlapping RMASK replaced by newly created
+ pseudo registers. After the sequence has been emitted, emit insns that
+ move the contents of respective pseudos to their hard regs. */
+
+bool
+avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op,
+ unsigned opmask, unsigned rmask)
+{
+ const int n = 3;
+ rtx hreg[n];
+
+ /* It is legitimate for GEN to call this function, and in order not to
+ get self-recursive we use the following static kludge. This is the
+ only way not to duplicate all expanders and to avoid ugly and
+ hard-to-maintain C-code instead of the much more appreciated RTL
+ representation as supplied by define_expand. */
+ static bool lock = false;
+
+ gcc_assert (opmask < (1u << n));
+
+ if (lock)
+ return false;
+
+ avr_fix_operands (op, hreg, opmask, rmask);
+
+ lock = true;
+ emit_insn (gen (op[0], op[1], op[2]));
+ lock = false;
+
+ return avr_move_fixed_operands (op, hreg, opmask);
+}
+
+
/* Worker function for movmemhi expander.
XOP[0] Destination as MEM:BLK
XOP[1] Source " "
diff --git a/gcc-4.9/gcc/config/avr/avr.md b/gcc-4.9/gcc/config/avr/avr.md
index 3bb2a914a..3f4181dab 100644
--- a/gcc-4.9/gcc/config/avr/avr.md
+++ b/gcc-4.9/gcc/config/avr/avr.md
@@ -1482,7 +1482,11 @@
(set (reg:QI 22) (match_operand:QI 2 "register_operand" ""))
(parallel [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22)))
(clobber (reg:QI 22))])
- (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))])
+ (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))]
+ ""
+ {
+ avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24));
+ })
(define_insn "*mulqi3_call"
[(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22)))
@@ -2210,7 +2214,13 @@
(parallel [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22)))
(clobber (reg:HI 22))
(clobber (reg:QI 21))])
- (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ {
+ avr_fix_inputs (operands, (1 << 2), regmask (HImode, 24));
+ })
+
(define_insn "*mulhi3_call"
[(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22)))
@@ -2248,6 +2258,10 @@
emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
DONE;
}
+
+ if (avr_emit3_fix_outputs (gen_mulsi3, operands, 1 << 0,
+ regmask (DImode, 18) | regmask (HImode, 26)))
+ DONE;
})
(define_insn_and_split "*mulsi3"
@@ -2287,7 +2301,23 @@
;; "muluqisi3"
;; "muluhisi3"
-(define_insn_and_split "mulu<mode>si3"
+(define_expand "mulu<mode>si3"
+ [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "")
+ (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" ""))
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "")))
+ (clobber (reg:HI 26))
+ (clobber (reg:DI 18))])]
+ "AVR_HAVE_MUL"
+ {
+ avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u);
+ if (avr_emit3_fix_outputs (gen_mulu<mode>si3, operands, 1 << 0,
+ regmask (DImode, 18) | regmask (HImode, 26)))
+ DONE;
+ })
+
+;; "*muluqisi3"
+;; "*muluhisi3"
+(define_insn_and_split "*mulu<mode>si3"
[(set (match_operand:SI 0 "pseudo_register_operand" "=r")
(mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
(match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
@@ -2323,7 +2353,23 @@
;; "mulsqisi3"
;; "mulshisi3"
-(define_insn_and_split "muls<mode>si3"
+(define_expand "muls<mode>si3"
+ [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "")
+ (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" ""))
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "")))
+ (clobber (reg:HI 26))
+ (clobber (reg:DI 18))])]
+ "AVR_HAVE_MUL"
+ {
+ avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u);
+ if (avr_emit3_fix_outputs (gen_muls<mode>si3, operands, 1 << 0,
+ regmask (DImode, 18) | regmask (HImode, 26)))
+ DONE;
+ })
+
+;; "*mulsqisi3"
+;; "*mulshisi3"
+(define_insn_and_split "*muls<mode>si3"
[(set (match_operand:SI 0 "pseudo_register_operand" "=r")
(mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
(match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
@@ -2366,7 +2412,22 @@
;; One-extend operand 1
-(define_insn_and_split "mulohisi3"
+(define_expand "mulohisi3"
+ [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "")
+ (mult:SI (not:SI (zero_extend:SI
+ (not:HI (match_operand:HI 1 "pseudo_register_operand" ""))))
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "")))
+ (clobber (reg:HI 26))
+ (clobber (reg:DI 18))])]
+ "AVR_HAVE_MUL"
+ {
+ avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u);
+ if (avr_emit3_fix_outputs (gen_mulohisi3, operands, 1 << 0,
+ regmask (DImode, 18) | regmask (HImode, 26)))
+ DONE;
+ })
+
+(define_insn_and_split "*mulohisi3"
[(set (match_operand:SI 0 "pseudo_register_operand" "=r")
(mult:SI (not:SI (zero_extend:SI
(not:HI (match_operand:HI 1 "pseudo_register_operand" "r"))))
@@ -2394,7 +2455,12 @@
(any_extend:SI (match_operand:HI 2 "register_operand" ""))))
(clobber (reg:HI 26))
(clobber (reg:DI 18))])]
- "AVR_HAVE_MUL")
+ "AVR_HAVE_MUL"
+ {
+ if (avr_emit3_fix_outputs (gen_<extend_u>mulhisi3, operands, 1 << 0,
+ regmask (DImode, 18) | regmask (HImode, 26)))
+ DONE;
+ })
(define_expand "usmulhisi3"
[(parallel [(set (match_operand:SI 0 "register_operand" "")
@@ -2402,7 +2468,12 @@
(sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
(clobber (reg:HI 26))
(clobber (reg:DI 18))])]
- "AVR_HAVE_MUL")
+ "AVR_HAVE_MUL"
+ {
+ if (avr_emit3_fix_outputs (gen_usmulhisi3, operands, 1 << 0,
+ regmask (DImode, 18) | regmask (HImode, 26)))
+ DONE;
+ })
;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3"
;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3"
@@ -2474,7 +2545,10 @@
(clobber (reg:HI 22))])
(set (match_operand:HI 0 "register_operand" "")
(reg:HI 24))]
- "AVR_HAVE_MUL")
+ "AVR_HAVE_MUL"
+ {
+ avr_fix_inputs (operands, 1 << 2, regmask (HImode, 18));
+ })
(define_insn "*mulsi3_call"
@@ -2697,6 +2771,10 @@
emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1]));
DONE;
}
+
+ if (avr_emit3_fix_outputs (gen_mulpsi3, operands, 1u << 0,
+ regmask (DImode, 18) | regmask (HImode, 26)))
+ DONE;
})
(define_insn "*umulqihipsi3"
@@ -2729,7 +2807,21 @@
[(set_attr "length" "7")
(set_attr "cc" "clobber")])
-(define_insn_and_split "mulsqipsi3"
+(define_expand "mulsqipsi3"
+ [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
+ (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" ""))
+ (match_operand:PSI 2 "pseudo_register_or_const_int_operand""")))
+ (clobber (reg:HI 26))
+ (clobber (reg:DI 18))])]
+ "AVR_HAVE_MUL"
+ {
+ avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u);
+ if (avr_emit3_fix_outputs (gen_mulsqipsi3, operands, 1 << 0,
+ regmask (DImode, 18) | regmask (HImode, 26)))
+ DONE;
+ })
+
+(define_insn_and_split "*mulsqipsi3"
[(set (match_operand:PSI 0 "pseudo_register_operand" "=r")
(mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r"))
(match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn")))
@@ -4931,8 +5023,9 @@
(unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r,z")]
UNSPEC_INDEX_JMP))
(use (label_ref (match_operand 1 "" "")))
- (clobber (match_dup 0))]
- ""
+ (clobber (match_dup 0))
+ (clobber (const_int 0))]
+ "!AVR_HAVE_EIJMP_EICALL"
"@
ijmp
push %A0\;push %B0\;ret
@@ -4941,6 +5034,19 @@
(set_attr "isa" "rjmp,rjmp,jmp")
(set_attr "cc" "none,none,clobber")])
+(define_insn "*tablejump.3byte-pc"
+ [(set (pc)
+ (unspec:HI [(reg:HI REG_Z)]
+ UNSPEC_INDEX_JMP))
+ (use (label_ref (match_operand 0 "" "")))
+ (clobber (reg:HI REG_Z))
+ (clobber (reg:QI 24))]
+ "AVR_HAVE_EIJMP_EICALL"
+ "clr r24\;subi r30,pm_lo8(-(%0))\;sbci r31,pm_hi8(-(%0))\;sbci r24,pm_hh8(-(%0))\;jmp __tablejump2__"
+ [(set_attr "length" "6")
+ (set_attr "isa" "eijmp")
+ (set_attr "cc" "clobber")])
+
(define_expand "casesi"
[(parallel [(set (match_dup 6)
@@ -4958,15 +5064,31 @@
(label_ref (match_operand 4 "" ""))
(pc)))
- (set (match_dup 6)
- (plus:HI (match_dup 6) (label_ref (match_operand:HI 3 "" ""))))
+ (set (match_dup 10)
+ (match_dup 7))
- (parallel [(set (pc) (unspec:HI [(match_dup 6)] UNSPEC_INDEX_JMP))
+ (parallel [(set (pc)
+ (unspec:HI [(match_dup 10)] UNSPEC_INDEX_JMP))
(use (label_ref (match_dup 3)))
- (clobber (match_dup 6))])]
+ (clobber (match_dup 10))
+ (clobber (match_dup 8))])]
""
{
operands[6] = gen_reg_rtx (HImode);
+
+ if (AVR_HAVE_EIJMP_EICALL)
+ {
+ operands[7] = operands[6];
+ operands[8] = all_regs_rtx[24];
+ operands[10] = gen_rtx_REG (HImode, REG_Z);
+ }
+ else
+ {
+ operands[7] = gen_rtx_PLUS (HImode, operands[6],
+ gen_rtx_LABEL_REF (VOIDmode, operands[3]));
+ operands[8] = const0_rtx;
+ operands[10] = operands[6];
+ }
})
@@ -6034,6 +6156,7 @@
emit_insn (gen_fmul_insn (operand0, operand1, operand2));
DONE;
}
+ avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24));
})
(define_insn "fmul_insn"
@@ -6077,6 +6200,7 @@
emit_insn (gen_fmuls_insn (operand0, operand1, operand2));
DONE;
}
+ avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24));
})
(define_insn "fmuls_insn"
@@ -6120,6 +6244,7 @@
emit_insn (gen_fmulsu_insn (operand0, operand1, operand2));
DONE;
}
+ avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24));
})
(define_insn "fmulsu_insn"
diff --git a/gcc-4.9/gcc/config/darwin-c.c b/gcc-4.9/gcc/config/darwin-c.c
index 892ba3547..7fe4b1f2e 100644
--- a/gcc-4.9/gcc/config/darwin-c.c
+++ b/gcc-4.9/gcc/config/darwin-c.c
@@ -571,21 +571,34 @@ find_subframework_header (cpp_reader *pfile, const char *header, cpp_dir **dirp)
}
/* Return the value of darwin_macosx_version_min suitable for the
- __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro,
- so '10.4.2' becomes 1040. The lowest digit is always zero.
+ __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro, so '10.4.2'
+ becomes 1040 and '10.10.0' becomes 101000. The lowest digit is
+ always zero, as is the second lowest for '10.10.x' and above.
Print a warning if the version number can't be understood. */
static const char *
version_as_macro (void)
{
- static char result[] = "1000";
+ static char result[7] = "1000";
+ int minorDigitIdx;
if (strncmp (darwin_macosx_version_min, "10.", 3) != 0)
goto fail;
if (! ISDIGIT (darwin_macosx_version_min[3]))
goto fail;
- result[2] = darwin_macosx_version_min[3];
- if (darwin_macosx_version_min[4] != '\0'
- && darwin_macosx_version_min[4] != '.')
+
+ minorDigitIdx = 3;
+ result[2] = darwin_macosx_version_min[minorDigitIdx++];
+ if (ISDIGIT (darwin_macosx_version_min[minorDigitIdx]))
+ {
+ /* Starting with OS X 10.10, the macro ends '00' rather than '0',
+ i.e. 10.10.x becomes 101000 rather than 10100. */
+ result[3] = darwin_macosx_version_min[minorDigitIdx++];
+ result[4] = '0';
+ result[5] = '0';
+ result[6] = '\0';
+ }
+ if (darwin_macosx_version_min[minorDigitIdx] != '\0'
+ && darwin_macosx_version_min[minorDigitIdx] != '.')
goto fail;
return result;
diff --git a/gcc-4.9/gcc/config/darwin-driver.c b/gcc-4.9/gcc/config/darwin-driver.c
index 8b6ae9391..541e10bc0 100644
--- a/gcc-4.9/gcc/config/darwin-driver.c
+++ b/gcc-4.9/gcc/config/darwin-driver.c
@@ -29,8 +29,8 @@ along with GCC; see the file COPYING3. If not see
#include <sys/sysctl.h>
#include "xregex.h"
-static bool
-darwin_find_version_from_kernel (char *new_flag)
+static char *
+darwin_find_version_from_kernel (void)
{
char osversion[32];
size_t osversion_len = sizeof (osversion) - 1;
@@ -39,6 +39,7 @@ darwin_find_version_from_kernel (char *new_flag)
char minor_vers[6];
char * version_p;
char * version_pend;
+ char * new_flag;
/* Determine the version of the running OS. If we can't, warn user,
and do nothing. */
@@ -46,7 +47,7 @@ darwin_find_version_from_kernel (char *new_flag)
&osversion_len, NULL, 0) == -1)
{
warning (0, "sysctl for kern.osversion failed: %m");
- return false;
+ return NULL;
}
/* Try to parse the first two parts of the OS version number. Warn
@@ -57,8 +58,6 @@ darwin_find_version_from_kernel (char *new_flag)
version_p = osversion + 1;
if (ISDIGIT (*version_p))
major_vers = major_vers * 10 + (*version_p++ - '0');
- if (major_vers > 4 + 9)
- goto parse_failed;
if (*version_p++ != '.')
goto parse_failed;
version_pend = strchr(version_p, '.');
@@ -74,17 +73,16 @@ darwin_find_version_from_kernel (char *new_flag)
if (major_vers - 4 <= 4)
/* On 10.4 and earlier, the old linker is used which does not
support three-component system versions. */
- sprintf (new_flag, "10.%d", major_vers - 4);
+ asprintf (&new_flag, "10.%d", major_vers - 4);
else
- sprintf (new_flag, "10.%d.%s", major_vers - 4,
- minor_vers);
+ asprintf (&new_flag, "10.%d.%s", major_vers - 4, minor_vers);
- return true;
+ return new_flag;
parse_failed:
warning (0, "couldn%'t understand kern.osversion %q.*s",
(int) osversion_len, osversion);
- return false;
+ return NULL;
}
#endif
@@ -105,7 +103,7 @@ darwin_default_min_version (unsigned int *decoded_options_count,
const unsigned int argc = *decoded_options_count;
struct cl_decoded_option *const argv = *decoded_options;
unsigned int i;
- static char new_flag[sizeof ("10.0.0") + 6];
+ const char *new_flag;
/* If the command-line is empty, just return. */
if (argc <= 1)
@@ -142,16 +140,16 @@ darwin_default_min_version (unsigned int *decoded_options_count,
#ifndef CROSS_DIRECTORY_STRUCTURE
- /* Try to find the version from the kernel, if we fail - we print a message
- and give up. */
- if (!darwin_find_version_from_kernel (new_flag))
- return;
+ /* Try to find the version from the kernel, if we fail - we print a message
+ and give up. */
+ new_flag = darwin_find_version_from_kernel ();
+ if (!new_flag)
+ return;
#else
- /* For cross-compilers, default to the target OS version. */
-
- strncpy (new_flag, DEF_MIN_OSX_VERSION, sizeof (new_flag));
+ /* For cross-compilers, default to the target OS version. */
+ new_flag = DEF_MIN_OSX_VERSION;
#endif /* CROSS_DIRECTORY_STRUCTURE */
@@ -165,7 +163,6 @@ darwin_default_min_version (unsigned int *decoded_options_count,
memcpy (*decoded_options + 2, argv + 1,
(argc - 1) * sizeof (struct cl_decoded_option));
return;
-
}
/* Translate -filelist and -framework options in *DECODED_OPTIONS
diff --git a/gcc-4.9/gcc/config/gnu-user.h b/gcc-4.9/gcc/config/gnu-user.h
index 2af0a54ed..1a9a487a8 100644
--- a/gcc-4.9/gcc/config/gnu-user.h
+++ b/gcc-4.9/gcc/config/gnu-user.h
@@ -118,7 +118,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
/* Link -lasan early on the command line. For -static-libasan, don't link
it for -shared link, the executable should be compiled with -static-libasan
in that case, and for executable link link with --{,no-}whole-archive around
- it to force everything into the executable. And similarly for -ltsan. */
+ it to force everything into the executable. And similarly for -ltsan
+ and -llsan. */
#if defined(HAVE_LD_STATIC_DYNAMIC)
#undef LIBASAN_EARLY_SPEC
#define LIBASAN_EARLY_SPEC "%{!shared:libasan_preinit%O%s} " \
@@ -129,4 +130,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define LIBTSAN_EARLY_SPEC "%{static-libtsan:%{!shared:" \
LD_STATIC_OPTION " --whole-archive -ltsan --no-whole-archive " \
LD_DYNAMIC_OPTION "}}%{!static-libtsan:-ltsan}"
+#undef LIBLSAN_EARLY_SPEC
+#define LIBLSAN_EARLY_SPEC "%{static-liblsan:%{!shared:" \
+ LD_STATIC_OPTION " --whole-archive -llsan --no-whole-archive " \
+ LD_DYNAMIC_OPTION "}}%{!static-liblsan:-llsan}"
#endif
diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c
index a598b8eef..54942d520 100644
--- a/gcc-4.9/gcc/config/i386/i386.c
+++ b/gcc-4.9/gcc/config/i386/i386.c
@@ -2465,7 +2465,7 @@ struct ptt
const int align_func;
};
-/* This table must be in sync with enum processor_type in i386.h. */
+/* This table must be in sync with enum processor_type in i386.h. */
static const struct ptt processor_target_table[PROCESSOR_max] =
{
{"generic", &generic_cost, 16, 10, 16, 10, 16},
@@ -3257,14 +3257,14 @@ ix86_option_override_internal (bool main_args_p,
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
- | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
+ | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
| PTA_XSAVEOPT | PTA_FSGSBASE},
{"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
- | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
- | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
+ | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
+ | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
+ | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
| PTA_MOVBE},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
@@ -3334,8 +3334,9 @@ ix86_option_override_internal (bool main_args_p,
/* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
on and OPTION_MASK_ABI_64 is off. We turn off
OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
- -m64. */
- if (TARGET_LP64_P (opts->x_ix86_isa_flags))
+ -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
+ if (TARGET_LP64_P (opts->x_ix86_isa_flags)
+ || TARGET_16BIT_P (opts->x_ix86_isa_flags))
opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
#endif
}
@@ -3846,11 +3847,30 @@ ix86_option_override_internal (bool main_args_p,
opts->x_target_flags |= MASK_NO_RED_ZONE;
}
+ if (!global_options_set.x_flag_shrink_wrap_frame_pointer)
+ flag_shrink_wrap_frame_pointer = 1;
+
+ /* -fshrink-wrap-frame-pointer is an optimization based on
+ -fno-omit-frame-pointer mode, so it is only effective when
+ flag_omit_frame_pointer is false.
+ Frame pointer shrinkwrap may increase code size, so disable
+ it when optimize_size is true. */
+ if (flag_omit_frame_pointer
+ || optimize == 0
+ || optimize_size)
+ flag_shrink_wrap_frame_pointer = 0;
+
+ /* If only no -mno-omit-leaf-frame-pointer is explicitly specified,
+ -fshrink_wrap_frame_pointer will enable omitting leaf frame
+ pointer by default. */
+ if (flag_shrink_wrap_frame_pointer
+ && !(TARGET_OMIT_LEAF_FRAME_POINTER_P (opts_set->x_target_flags)
+ && !TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)))
+ opts->x_target_flags |= MASK_OMIT_LEAF_FRAME_POINTER;
+
/* Keep nonleaf frame pointers. */
if (opts->x_flag_omit_frame_pointer)
opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
- else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
- opts->x_flag_omit_frame_pointer = 1;
/* If we're doing fast math, we don't care about comparison order
wrt NaNs. This lets us use a shorter comparison sequence. */
@@ -3969,7 +3989,7 @@ ix86_option_override_internal (bool main_args_p,
/* For all chips supporting SSE2, -mfpmath=sse performs better than
fpmath=387. The second is however default at many targets since the
extra 80bit precision of temporaries is considered to be part of ABI.
- Overwrite the default at least for -ffast-math.
+ Overwrite the default at least for -ffast-math.
TODO: -mfpmath=both seems to produce same performing code with bit
smaller binaries. It is however not clear if register allocation is
ready for this setting.
@@ -4291,7 +4311,7 @@ ix86_conditional_register_usage (void)
c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
: TARGET_64BIT ? (1 << 2)
: (1 << 1));
-
+
CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
@@ -4840,9 +4860,9 @@ ix86_valid_target_attribute_p (tree fndecl,
tree old_optimize = build_optimization_node (&global_options);
- /* Get the optimization options of the current function. */
+ /* Get the optimization options of the current function. */
tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
-
+
if (!func_optimize)
func_optimize = old_optimize;
@@ -4850,7 +4870,7 @@ ix86_valid_target_attribute_p (tree fndecl,
memset (&func_options, 0, sizeof (func_options));
init_options_struct (&func_options, NULL);
lang_hooks.init_options_struct (&func_options);
-
+
cl_optimization_restore (&func_options,
TREE_OPTIMIZATION (func_optimize));
@@ -5007,6 +5027,10 @@ ix86_in_large_data_p (tree exp)
if (TREE_CODE (exp) == FUNCTION_DECL)
return false;
+ /* Automatic variables are never large data. */
+ if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
+ return false;
+
if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
{
const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
@@ -5040,8 +5064,7 @@ ATTRIBUTE_UNUSED static section *
x86_64_elf_select_section (tree decl, int reloc,
unsigned HOST_WIDE_INT align)
{
- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
- && ix86_in_large_data_p (decl))
+ if (ix86_in_large_data_p (decl))
{
const char *sname = NULL;
unsigned int flags = SECTION_WRITE;
@@ -5127,8 +5150,7 @@ x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
static void ATTRIBUTE_UNUSED
x86_64_elf_unique_section (tree decl, int reloc)
{
- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
- && ix86_in_large_data_p (decl))
+ if (ix86_in_large_data_p (decl))
{
const char *prefix = NULL;
/* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
@@ -5197,7 +5219,7 @@ x86_elf_aligned_common (FILE *file,
{
if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
&& size > (unsigned int)ix86_section_threshold)
- fputs (".largecomm\t", file);
+ fputs ("\t.largecomm\t", file);
else
fputs (COMMON_ASM_OP, file);
assemble_name (file, name);
@@ -5976,7 +5998,18 @@ ix86_function_type_abi (const_tree fntype)
if (abi == SYSV_ABI)
{
if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
- abi = MS_ABI;
+ {
+ if (TARGET_X32)
+ {
+ static bool warned = false;
+ if (!warned)
+ {
+ error ("X32 does not support ms_abi attribute");
+ warned = true;
+ }
+ }
+ abi = MS_ABI;
+ }
}
else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
abi = SYSV_ABI;
@@ -6212,7 +6245,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
The midde-end can't deal with the vector types > 16 bytes. In this
case, we return the original mode and warn ABI change if CUM isn't
- NULL.
+ NULL.
If INT_RETURN is true, warn ABI change if the vector mode isn't
available for function return value. */
@@ -9083,20 +9116,22 @@ ix86_frame_pointer_required (void)
if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
return true;
- /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
- turns off the frame pointer by default. Turn it back on now if
- we've not got a leaf function. */
- if (TARGET_OMIT_LEAF_FRAME_POINTER
- && (!crtl->is_leaf
- || ix86_current_function_calls_tls_descriptor))
- return true;
-
if (crtl->profile && !flag_fentry)
return true;
return false;
}
+/* Return true if the frame pointer of the function could be omitted. */
+
+static bool
+ix86_can_omit_leaf_frame_pointer (void)
+{
+ return TARGET_OMIT_LEAF_FRAME_POINTER
+ && (crtl->is_leaf
+ && !ix86_current_function_calls_tls_descriptor);
+}
+
/* Record that the current function accesses previous call frames. */
void
@@ -9569,7 +9604,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
offset += UNITS_PER_WORD;
/* Skip saved base pointer. */
- if (frame_pointer_needed)
+ if (frame_pointer_needed || frame_pointer_partially_needed)
offset += UNITS_PER_WORD;
frame->hfp_save_offset = offset;
@@ -10890,6 +10925,26 @@ ix86_expand_prologue (void)
m->fs.fp_valid = true;
}
}
+ else if (frame_pointer_partially_needed)
+ {
+ insn = emit_insn (gen_push (hard_frame_pointer_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ if (fpset_needed_in_prologue)
+ {
+ insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+ /* Using sp as cfa_reg will involve more .cfi_def_cfa_offset for
+ pushes in prologue, so use fp as cfa_reg to reduce .eh_frame
+ size when possible. */
+ if (!any_fp_def)
+ {
+ RTX_FRAME_RELATED_P (insn) = 1;
+ if (m->fs.cfa_reg == stack_pointer_rtx)
+ m->fs.cfa_reg = hard_frame_pointer_rtx;
+ m->fs.fp_offset = m->fs.sp_offset;
+ m->fs.fp_valid = true;
+ }
+ }
+ }
if (!int_registers_saved)
{
@@ -11067,6 +11122,10 @@ ix86_expand_prologue (void)
if (sp_is_cfa_reg)
m->fs.cfa_offset += UNITS_PER_WORD;
RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+ -UNITS_PER_WORD)));
}
}
@@ -11080,6 +11139,10 @@ ix86_expand_prologue (void)
if (sp_is_cfa_reg)
m->fs.cfa_offset += UNITS_PER_WORD;
RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx,
+ -UNITS_PER_WORD)));
}
}
@@ -11231,6 +11294,34 @@ ix86_expand_prologue (void)
emit_insn (gen_prologue_use (stack_pointer_rtx));
}
+/* Get frame pointer setting insn based on frame state. */
+static rtx
+ix86_set_fp_insn ()
+{
+ rtx r, seq;
+ struct ix86_frame frame;
+ HOST_WIDE_INT offset;
+
+ ix86_compute_frame_layout (&frame);
+ gcc_assert (frame_pointer_partially_needed);
+ offset = frame.stack_pointer_offset - frame.hard_frame_pointer_offset;
+
+ if (TARGET_64BIT && (offset > 0x7fffffff))
+ {
+ r = gen_rtx_SET (DImode, hard_frame_pointer_rtx, GEN_INT (offset));
+ emit_insn (r);
+ r = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, stack_pointer_rtx);
+ r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r);
+ }
+ else
+ {
+ r = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
+ r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r);
+ }
+ emit_insn (r);
+ return r;
+}
+
/* Emit code to restore REG using a POP insn. */
static void
@@ -11415,7 +11506,11 @@ ix86_expand_epilogue (int style)
|| m->fs.sp_offset == frame.stack_pointer_offset);
/* The FP must be valid if the frame pointer is present. */
- gcc_assert (frame_pointer_needed == m->fs.fp_valid);
+ if (!frame_pointer_partially_needed)
+ gcc_assert (frame_pointer_needed == m->fs.fp_valid);
+ else
+ gcc_assert (!(any_fp_def && m->fs.fp_valid));
+
gcc_assert (!m->fs.fp_valid
|| m->fs.fp_offset == frame.hard_frame_pointer_offset);
@@ -11619,7 +11714,7 @@ ix86_expand_epilogue (int style)
/* If we used a stack pointer and haven't already got rid of it,
then do so now. */
- if (m->fs.fp_valid)
+ if (m->fs.fp_valid || frame_pointer_partially_needed)
{
/* If the stack pointer is valid and pointing at the frame
pointer store address, then we only need a pop. */
@@ -11627,15 +11722,20 @@ ix86_expand_epilogue (int style)
ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
/* Leave results in shorter dependency chains on CPUs that are
able to grok it fast. */
- else if (TARGET_USE_LEAVE
- || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
- || !cfun->machine->use_fast_prologue_epilogue)
+ else if (m->fs.fp_valid
+ && (TARGET_USE_LEAVE
+ || optimize_function_for_size_p (cfun)
+ || !cfun->machine->use_fast_prologue_epilogue))
ix86_emit_leave ();
else
{
+ rtx dest, offset;
+ dest = (m->fs.fp_valid) ? hard_frame_pointer_rtx : stack_pointer_rtx;
+ offset = (m->fs.fp_valid) ? const0_rtx :
+ GEN_INT (m->fs.sp_offset - frame.hfp_save_offset);
pro_epilogue_adjust_stack (stack_pointer_rtx,
- hard_frame_pointer_rtx,
- const0_rtx, style, !using_drap);
+ dest,
+ offset, style, !using_drap);
ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
}
}
@@ -11947,7 +12047,7 @@ ix86_output_function_nops_prologue_epilogue (FILE *file,
fprintf (file, "\n");
/* Switching back to text section. */
- switch_to_section (function_section (current_function_decl));
+ switch_to_section (current_function_section ());
return true;
}
@@ -12379,7 +12479,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
addr = XEXP (addr, 0);
if (CONST_INT_P (addr))
return 0;
- }
+ }
else if (GET_CODE (addr) == AND
&& const_32bit_mask (XEXP (addr, 1), DImode))
{
@@ -12905,8 +13005,16 @@ legitimate_pic_address_disp_p (rtx disp)
return true;
}
else if (!SYMBOL_REF_FAR_ADDR_P (op0)
- && (SYMBOL_REF_LOCAL_P (op0)
- || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie
+ && (SYMBOL_REF_LOCAL_P (op0)
+ || (HAVE_LD_PIE_COPYRELOC
+ && flag_pie
+ && !(SYMBOL_REF_WEAK (op0)
+ /* TODO:Temporary fix for weak defined symbols. Weak defined
+ symbols in an executable cannot be overridden even with
+ a non-weak symbol in a shared library.
+ Revert after fix is checked in here:
+ http://gcc.gnu.org/ml/gcc-patches/2015-02/msg00366.html*/
+ && SYMBOL_REF_EXTERNAL_P (op0))
&& !SYMBOL_REF_FUNCTION_P (op0)))
&& ix86_cmodel != CM_LARGE_PIC)
return true;
@@ -13010,7 +13118,7 @@ ix86_legitimize_reload_address (rtx x,
(reg:DI 2 cx))
This RTX is rejected from ix86_legitimate_address_p due to
- non-strictness of base register 97. Following this rejection,
+ non-strictness of base register 97. Following this rejection,
reload pushes all three components into separate registers,
creating invalid memory address RTX.
@@ -13025,7 +13133,7 @@ ix86_legitimize_reload_address (rtx x,
rtx base, index;
bool something_reloaded = false;
- base = XEXP (XEXP (x, 0), 1);
+ base = XEXP (XEXP (x, 0), 1);
if (!REG_OK_FOR_BASE_STRICT_P (base))
{
push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
@@ -13929,7 +14037,7 @@ get_dllimport_decl (tree decl, bool beimport)
#ifdef SUB_TARGET_RECORD_STUB
SUB_TARGET_RECORD_STUB (name);
#endif
- }
+ }
rtl = gen_const_mem (Pmode, rtl);
set_mem_alias_set (rtl, ix86_GOT_alias_set ());
@@ -13976,7 +14084,7 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg)
return x;
}
-/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
+/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
is true if we require the result be a register. */
static rtx
@@ -14749,7 +14857,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
if (mode == CCmode)
suffix = "b";
else if (mode == CCCmode)
- suffix = "c";
+ suffix = fp ? "b" : "c";
else
gcc_unreachable ();
break;
@@ -14772,9 +14880,9 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
break;
case GEU:
if (mode == CCmode)
- suffix = fp ? "nb" : "ae";
+ suffix = "nb";
else if (mode == CCCmode)
- suffix = "nc";
+ suffix = fp ? "nb" : "nc";
else
gcc_unreachable ();
break;
@@ -15109,7 +15217,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
case 2:
putc ('w', file);
break;
-
+
case 4:
putc ('l', file);
break;
@@ -16408,7 +16516,7 @@ ix86_mode_needed (int entity, rtx insn)
}
/* Check if a 256bit AVX register is referenced in stores. */
-
+
static void
ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
{
@@ -16417,7 +16525,7 @@ ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
bool *used = (bool *) data;
*used = true;
}
- }
+ }
/* Calculate mode of upper 128bit AVX registers after the insn. */
@@ -17463,7 +17571,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
t = gen_reg_rtx (V4SFmode);
else
t = op0;
-
+
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
emit_move_insn (t, CONST0_RTX (V4SFmode));
else
@@ -18527,7 +18635,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-
+
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
}
@@ -21852,7 +21960,7 @@ ix86_expand_vec_perm (rtx operands[])
if (TARGET_XOP)
{
- /* The XOP VPPERM insn supports three inputs. By ignoring the
+ /* The XOP VPPERM insn supports three inputs. By ignoring the
one_operand_shuffle special case, we avoid creating another
set of constant vectors in memory. */
one_operand_shuffle = false;
@@ -23708,7 +23816,7 @@ expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
DONE_LABEL is a label after the whole copying sequence. The label is created
on demand if *DONE_LABEL is NULL.
MIN_SIZE is minimal size of block copied. This value gets adjusted for new
- bounds after the initial copies.
+ bounds after the initial copies.
DESTMEM/SRCMEM are memory expressions pointing to the copies block,
DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
@@ -24013,7 +24121,7 @@ expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
return dst;
}
-/* Return true if ALG can be used in current context.
+/* Return true if ALG can be used in current context.
Assume we expand memset if MEMSET is true. */
static bool
alg_usable_p (enum stringop_alg alg, bool memset)
@@ -24136,7 +24244,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
*noalign = alg_noalign;
return alg;
}
- break;
+ else if (!any_alg_usable_p)
+ break;
}
else if (alg_usable_p (candidate, memset))
{
@@ -24174,9 +24283,10 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
alg = decide_alg (count, max / 2, min_size, max_size, memset,
zero_memset, dynamic_check, noalign);
gcc_assert (*dynamic_check == -1);
- gcc_assert (alg != libcall);
if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
*dynamic_check = max;
+ else
+ gcc_assert (alg != libcall);
return alg;
}
return (alg_usable_p (algs->unknown_size, memset)
@@ -24336,7 +24446,7 @@ promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
with specified algorithm.
4) Epilogue: code copying tail of the block that is too small to be
- handled by main body (or up to size guarded by prologue guard).
+ handled by main body (or up to size guarded by prologue guard).
Misaligned move sequence
@@ -24531,7 +24641,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
/* Do the cheap promotion to allow better CSE across the
main loop and epilogue (ie one load of the big constant in the
- front of all code.
+ front of all code.
For now the misaligned move sequences do not have fast path
without broadcasting. */
if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
@@ -25103,13 +25213,19 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
}
else
{
- /* Static functions and indirect calls don't need the pic register. */
+ /* Static functions and indirect calls don't need the pic register. Also,
+ check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
+ it an indirect call. */
if (flag_pic
&& (!TARGET_64BIT
|| (ix86_cmodel == CM_LARGE_PIC
&& DEFAULT_ABI != MS_ABI))
&& GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
- && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
+ && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
+ && flag_plt
+ && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
+ || !lookup_attribute ("noplt",
+ DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
use_reg (&use, pic_offset_table_rtx);
}
@@ -25173,6 +25289,31 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
return call;
}
+/* Return true if the function being called was marked with attribute "noplt"
+ or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
+ handle the non-PIC case in the backend because there is no easy interface
+ for the front-end to force non-PLT calls to use the GOT. This is currently
+ used only with 64-bit ELF targets to call the function marked "noplt"
+ indirectly. */
+
+static bool
+ix86_nopic_noplt_attribute_p (rtx call_op)
+{
+ if (flag_pic || ix86_cmodel == CM_LARGE
+ || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
+ || SYMBOL_REF_LOCAL_P (call_op))
+ return false;
+
+ tree symbol_decl = SYMBOL_REF_DECL (call_op);
+
+ if (!flag_plt
+ || (symbol_decl != NULL_TREE
+ && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
+ return true;
+
+ return false;
+}
+
/* Output the assembly for a call instruction. */
const char *
@@ -25184,7 +25325,9 @@ ix86_output_call_insn (rtx insn, rtx call_op)
if (SIBLING_CALL_P (insn))
{
- if (direct_p)
+ if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
+ xasm = "jmp\t*%p0@GOTPCREL(%%rip)";
+ else if (direct_p)
xasm = "jmp\t%P0";
/* SEH epilogue detection requires the indirect branch case
to include REX.W. */
@@ -25236,7 +25379,9 @@ ix86_output_call_insn (rtx insn, rtx call_op)
seh_nop_p = true;
}
- if (direct_p)
+ if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
+ xasm = "call\t*%p0@GOTPCREL(%%rip)";
+ else if (direct_p)
xasm = "call\t%P0";
else
xasm = "call\t%A0";
@@ -26506,7 +26651,7 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail)
using topological ordering in the region. */
if (rgn == CONTAINING_RGN (e->src->index)
&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
- add_dependee_for_func_arg (first_arg, e->src);
+ add_dependee_for_func_arg (first_arg, e->src);
}
}
insn = first_arg;
@@ -26974,7 +27119,7 @@ ix86_local_alignment (tree exp, enum machine_mode mode,
other unit can not rely on the alignment.
Exclude va_list type. It is the common case of local array where
- we can not benefit from the alignment.
+ we can not benefit from the alignment.
TODO: Probably one should optimize for size only when var is not escaping. */
if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
@@ -31443,7 +31588,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
build_fold_addr_expr (version_decl));
result_var = create_tmp_var (ptr_type_node, NULL);
- convert_stmt = gimple_build_assign (result_var, convert_expr);
+ convert_stmt = gimple_build_assign (result_var, convert_expr);
return_stmt = gimple_build_return (result_var);
if (predicate_chain == NULL_TREE)
@@ -31470,7 +31615,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
gimple_seq_add_stmt (&gseq, call_cond_stmt);
predicate_chain = TREE_CHAIN (predicate_chain);
-
+
if (and_expr_var == NULL)
and_expr_var = cond_var;
else
@@ -31511,7 +31656,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
gimple_set_bb (return_stmt, bb2);
bb3 = e23->dest;
- make_edge (bb1, bb3, EDGE_FALSE_VALUE);
+ make_edge (bb1, bb3, EDGE_FALSE_VALUE);
remove_edge (e23);
make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
@@ -31563,7 +31708,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
P_FMA4,
P_XOP,
P_PROC_XOP,
- P_FMA,
+ P_FMA,
P_PROC_FMA,
P_AVX2,
P_PROC_AVX2
@@ -31628,11 +31773,11 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
cl_target_option_save (&cur_target, &global_options);
target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
&global_options_set);
-
+
gcc_assert (target_node);
new_target = TREE_TARGET_OPTION (target_node);
gcc_assert (new_target);
-
+
if (new_target->arch_specified && new_target->arch > 0)
{
switch (new_target->arch)
@@ -31701,18 +31846,18 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
arg_str = "bdver4";
priority = P_PROC_AVX2;
break;
- }
- }
-
+ }
+ }
+
cl_target_option_restore (&global_options, &cur_target);
-
+
if (predicate_list && arg_str == NULL)
{
error_at (DECL_SOURCE_LOCATION (decl),
"No dispatcher found for the versioning attributes");
return 0;
}
-
+
if (predicate_list)
{
predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
@@ -31779,7 +31924,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
*predicate_list = predicate_chain;
}
- return priority;
+ return priority;
}
/* This compares the priority of target features in function DECL1
@@ -31798,7 +31943,7 @@ ix86_compare_version_priority (tree decl1, tree decl2)
/* V1 and V2 point to function versions with different priorities
based on the target ISA. This function compares their priorities. */
-
+
static int
feature_compare (const void *v1, const void *v2)
{
@@ -32111,12 +32256,12 @@ ix86_function_versions (tree fn1, tree fn2)
result = true;
XDELETEVEC (target1);
- XDELETEVEC (target2);
-
+ XDELETEVEC (target2);
+
return result;
}
-static tree
+static tree
ix86_mangle_decl_assembler_name (tree decl, tree id)
{
/* For function version, add the target suffix to the assembler name. */
@@ -32186,7 +32331,7 @@ make_dispatcher_decl (const tree decl)
fn_type = TREE_TYPE (decl);
func_type = build_function_type (TREE_TYPE (fn_type),
TYPE_ARG_TYPES (fn_type));
-
+
func_decl = build_fn_decl (func_name, func_type);
XDELETEVEC (func_name);
TREE_USED (func_decl) = 1;
@@ -32199,7 +32344,7 @@ make_dispatcher_decl (const tree decl)
/* This will be of type IFUNCs have to be externally visible. */
TREE_PUBLIC (func_decl) = 1;
- return func_decl;
+ return func_decl;
}
#endif
@@ -32236,7 +32381,7 @@ ix86_get_function_versions_dispatcher (void *decl)
tree dispatch_decl = NULL;
struct cgraph_function_version_info *default_version_info = NULL;
-
+
gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
node = cgraph_get_node (fn);
@@ -32244,7 +32389,7 @@ ix86_get_function_versions_dispatcher (void *decl)
node_v = get_cgraph_node_version (node);
gcc_assert (node_v != NULL);
-
+
if (node_v->dispatcher_resolver != NULL)
return node_v->dispatcher_resolver;
@@ -32409,7 +32554,7 @@ make_resolver_func (const tree default_decl,
gcc_assert (dispatch_decl != NULL);
/* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
- DECL_ATTRIBUTES (dispatch_decl)
+ DECL_ATTRIBUTES (dispatch_decl)
= make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
/* Create the alias for dispatch to resolver here. */
@@ -32424,7 +32569,7 @@ make_resolver_func (const tree default_decl,
provide the code to dispatch the right function at run-time. NODE points
to the dispatcher decl whose body will be created. */
-static tree
+static tree
ix86_generate_version_dispatcher_body (void *node_p)
{
tree resolver_decl;
@@ -32476,7 +32621,7 @@ ix86_generate_version_dispatcher_body (void *node_p)
}
dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
- rebuild_cgraph_edges ();
+ rebuild_cgraph_edges ();
pop_cfun ();
return resolver_decl;
}
@@ -32587,7 +32732,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
M_AMDFAM15H,
M_INTEL_SILVERMONT,
M_AMD_BTVER1,
- M_AMD_BTVER2,
+ M_AMD_BTVER2,
M_CPU_SUBTYPE_START,
M_INTEL_COREI7_NEHALEM,
M_INTEL_COREI7_WESTMERE,
@@ -32627,13 +32772,13 @@ fold_builtin_cpu (tree fndecl, tree *args)
{"barcelona", M_AMDFAM10H_BARCELONA},
{"shanghai", M_AMDFAM10H_SHANGHAI},
{"istanbul", M_AMDFAM10H_ISTANBUL},
- {"btver1", M_AMD_BTVER1},
+ {"btver1", M_AMD_BTVER1},
{"amdfam15h", M_AMDFAM15H},
{"bdver1", M_AMDFAM15H_BDVER1},
{"bdver2", M_AMDFAM15H_BDVER2},
{"bdver3", M_AMDFAM15H_BDVER3},
{"bdver4", M_AMDFAM15H_BDVER4},
- {"btver2", M_AMD_BTVER2},
+ {"btver2", M_AMD_BTVER2},
};
static struct _isa_names_table
@@ -35238,9 +35383,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
{
/* Make it call __cpu_indicator_init in libgcc. */
tree call_expr, fndecl, type;
- type = build_function_type_list (integer_type_node, NULL_TREE);
+ type = build_function_type_list (integer_type_node, NULL_TREE);
fndecl = build_fn_decl ("__cpu_indicator_init", type);
- call_expr = build_call_expr (fndecl, 0);
+ call_expr = build_call_expr (fndecl, 0);
return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
}
case IX86_BUILTIN_CPU_IS:
@@ -41332,8 +41477,8 @@ ix86_encode_section_info (tree decl, rtx rtl, int first)
{
default_encode_section_info (decl, rtl, first);
- if (TREE_CODE (decl) == VAR_DECL
- && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+ if (((TREE_CODE (decl) == VAR_DECL && is_global_var (decl))
+ || TREE_CODE(decl) == STRING_CST)
&& ix86_in_large_data_p (decl))
SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
}
@@ -42957,8 +43102,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
op0 = gen_lowpart (V4DImode, d->op0);
op1 = gen_lowpart (V4DImode, d->op1);
rperm[0]
- = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
- || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
+ = GEN_INT ((d->perm[0] / (nelt / 2))
+ | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
if (target != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
@@ -47277,6 +47422,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2)
#undef TARGET_PROFILE_BEFORE_PROLOGUE
#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
+#undef TARGET_SET_FP_INSN
+#define TARGET_SET_FP_INSN ix86_set_fp_insn
+
#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
@@ -47562,6 +47710,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2)
#undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
+#undef TARGET_CAN_OMIT_LEAF_FRAME_POINTER
+#define TARGET_CAN_OMIT_LEAF_FRAME_POINTER ix86_can_omit_leaf_frame_pointer
+
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE ix86_can_eliminate
@@ -47601,6 +47752,8 @@ adjacent_mem_locations (rtx mem1, rtx mem2)
#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
ix86_float_exceptions_rounding_supported_p
+#undef TARGET_STRICT_ALIGN
+#define TARGET_STRICT_ALIGN true
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md
index 39d395875..2369e4b40 100644
--- a/gcc-4.9/gcc/config/i386/i386.md
+++ b/gcc-4.9/gcc/config/i386/i386.md
@@ -12159,18 +12159,52 @@
(set_attr "mode" "<MODE>")])
;; BMI2 instructions.
-(define_insn "bmi2_bzhi_<mode>3"
+(define_expand "bmi2_bzhi_<mode>3"
+ [(parallel
+ [(set (match_operand:SWI48 0 "register_operand")
+ (zero_extract:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand")
+ (umin:SWI48
+ (and:SWI48 (match_operand:SWI48 2 "register_operand")
+ (const_int 255))
+ (match_dup 3))
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_BMI2"
+ "operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);")
+
+(define_insn "*bmi2_bzhi_<mode>3"
[(set (match_operand:SWI48 0 "register_operand" "=r")
- (and:SWI48 (lshiftrt:SWI48 (const_int -1)
- (match_operand:SWI48 2 "register_operand" "r"))
- (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (zero_extract:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+ (umin:SWI48
+ (and:SWI48 (match_operand:SWI48 2 "register_operand" "r")
+ (const_int 255))
+ (match_operand:SWI48 3 "const_int_operand" "n"))
+ (const_int 0)))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_BMI2"
+ "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
"bzhi\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "bitmanip")
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
+(define_mode_attr k [(SI "k") (DI "q")])
+(define_insn "*bmi2_bzhi_<mode>3_1"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (zero_extract:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+ (umin:SWI48
+ (zero_extend:SWI48 (match_operand:QI 2 "register_operand" "r"))
+ (match_operand:SWI48 3 "const_int_operand" "n"))
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT"
+ "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}"
+ [(set_attr "type" "bitmanip")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "bmi2_pdep_<mode>3"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
@@ -13518,7 +13552,8 @@
(set (reg:CCFP FPSR_REG)
(unspec:CCFP [(match_dup 2) (match_dup 3)]
UNSPEC_C2_FLAG))]
- "TARGET_USE_FANCY_MATH_387"
+ "TARGET_USE_FANCY_MATH_387
+ && flag_finite_math_only"
"fprem"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")])
@@ -13527,7 +13562,8 @@
[(use (match_operand:XF 0 "register_operand"))
(use (match_operand:XF 1 "general_operand"))
(use (match_operand:XF 2 "general_operand"))]
- "TARGET_USE_FANCY_MATH_387"
+ "TARGET_USE_FANCY_MATH_387
+ && flag_finite_math_only"
{
rtx label = gen_label_rtx ();
@@ -13550,7 +13586,8 @@
[(use (match_operand:MODEF 0 "register_operand"))
(use (match_operand:MODEF 1 "general_operand"))
(use (match_operand:MODEF 2 "general_operand"))]
- "TARGET_USE_FANCY_MATH_387"
+ "TARGET_USE_FANCY_MATH_387
+ && flag_finite_math_only"
{
rtx (*gen_truncxf) (rtx, rtx);
@@ -13589,7 +13626,8 @@
(set (reg:CCFP FPSR_REG)
(unspec:CCFP [(match_dup 2) (match_dup 3)]
UNSPEC_C2_FLAG))]
- "TARGET_USE_FANCY_MATH_387"
+ "TARGET_USE_FANCY_MATH_387
+ && flag_finite_math_only"
"fprem1"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")])
@@ -13598,7 +13636,8 @@
[(use (match_operand:XF 0 "register_operand"))
(use (match_operand:XF 1 "general_operand"))
(use (match_operand:XF 2 "general_operand"))]
- "TARGET_USE_FANCY_MATH_387"
+ "TARGET_USE_FANCY_MATH_387
+ && flag_finite_math_only"
{
rtx label = gen_label_rtx ();
@@ -13621,7 +13660,8 @@
[(use (match_operand:MODEF 0 "register_operand"))
(use (match_operand:MODEF 1 "general_operand"))
(use (match_operand:MODEF 2 "general_operand"))]
- "TARGET_USE_FANCY_MATH_387"
+ "TARGET_USE_FANCY_MATH_387
+ && flag_finite_math_only"
{
rtx (*gen_truncxf) (rtx, rtx);
diff --git a/gcc-4.9/gcc/config/i386/i386.opt b/gcc-4.9/gcc/config/i386/i386.opt
index 1e00b660e..f64a9e1eb 100644
--- a/gcc-4.9/gcc/config/i386/i386.opt
+++ b/gcc-4.9/gcc/config/i386/i386.opt
@@ -108,10 +108,6 @@ int x_ix86_dump_tunes
TargetSave
int x_ix86_force_align_arg_pointer
-;; -mcopyrelocs=
-TargetSave
-int x_ix86_copyrelocs
-
;; -mforce-drap=
TargetSave
int x_ix86_force_drap
@@ -295,10 +291,6 @@ mfancy-math-387
Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
Generate sin, cos, sqrt for FPU
-mcopyrelocs
-Target Report Var(ix86_pie_copyrelocs) Init(0)
-Assume copy relocations support for pie builds.
-
mforce-drap
Target Report Var(ix86_force_drap)
Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack
diff --git a/gcc-4.9/gcc/config/i386/linux.h b/gcc-4.9/gcc/config/i386/linux.h
index 27d68b5db..bfc7746bc 100644
--- a/gcc-4.9/gcc/config/i386/linux.h
+++ b/gcc-4.9/gcc/config/i386/linux.h
@@ -24,18 +24,3 @@ along with GCC; see the file COPYING3. If not see
#define RUNTIME_ROOT_PREFIX ""
#endif
#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
-
-/* These may be provided by config/linux-grtev*.h. */
-#ifndef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS
-#endif
-
-#undef SUBTARGET_EXTRA_SPECS
-#ifndef SUBTARGET_EXTRA_SPECS_STR
-#define SUBTARGET_EXTRA_SPECS \
- LINUX_GRTE_EXTRA_SPECS
-#else
-#define SUBTARGET_EXTRA_SPECS \
- LINUX_GRTE_EXTRA_SPECS \
- SUBTARGET_EXTRA_SPECS_STR
-#endif
diff --git a/gcc-4.9/gcc/config/i386/linux64.h b/gcc-4.9/gcc/config/i386/linux64.h
index 5124a341b..b71616fea 100644
--- a/gcc-4.9/gcc/config/i386/linux64.h
+++ b/gcc-4.9/gcc/config/i386/linux64.h
@@ -34,12 +34,3 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2"
#define GLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/libx32/ld-linux-x32.so.2"
-/* These may be provided by config/linux-grtev*.h. */
-#ifndef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS
-#endif
-
-#undef SUBTARGET_EXTRA_SPECS
-#define SUBTARGET_EXTRA_SPECS \
- LINUX_GRTE_EXTRA_SPECS
-
diff --git a/gcc-4.9/gcc/config/i386/mmx.md b/gcc-4.9/gcc/config/i386/mmx.md
index 214acde23..a7d2a7eec 100644
--- a/gcc-4.9/gcc/config/i386/mmx.md
+++ b/gcc-4.9/gcc/config/i386/mmx.md
@@ -600,20 +600,25 @@
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "*vec_extractv2sf_1"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,y,x,f,r")
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,x,y,x,f,r")
(vec_select:SF
- (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o")
+ (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o")
(parallel [(const_int 1)])))]
"TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
punpckhdq\t%0, %0
- unpckhps\t%0, %0
+ %vmovshdup\t{%1, %0|%0, %1}
+ shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
#
#
#
#"
- [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov")
- (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")])
+ [(set_attr "isa" "*,sse3,noavx,*,*,*,*")
+ (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
+ (set_attr "length_immediate" "*,*,1,*,*,*,*")
+ (set_attr "prefix_rep" "*,1,*,*,*,*,*")
+ (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig")
+ (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")])
(define_split
[(set (match_operand:SF 0 "register_operand")
@@ -1288,26 +1293,23 @@
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "*vec_extractv2si_1"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,x,y,x,r")
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,y,x,r")
(vec_select:SI
- (match_operand:V2SI 1 "nonimmediate_operand" " 0,0,x,0,o,o,o")
+ (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o")
(parallel [(const_int 1)])))]
"TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
punpckhdq\t%0, %0
- punpckhdq\t%0, %0
- pshufd\t{$85, %1, %0|%0, %1, 85}
- unpckhps\t%0, %0
+ %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5}
+ shufps\t{$0xe5, %1, %0|%0, %1, 0xe5}
#
#
#"
- [(set (attr "isa")
- (if_then_else (eq_attr "alternative" "1,2")
- (const_string "sse2")
- (const_string "*")))
- (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov")
- (set_attr "length_immediate" "*,*,1,*,*,*,*")
- (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")])
+ [(set_attr "isa" "*,sse2,noavx,*,*,*")
+ (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov")
+ (set_attr "length_immediate" "*,1,1,*,*,*")
+ (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig")
+ (set_attr "mode" "DI,TI,V4SF,SI,SI,SI")])
(define_split
[(set (match_operand:SI 0 "register_operand")
diff --git a/gcc-4.9/gcc/config/i386/x86-tune.def b/gcc-4.9/gcc/config/i386/x86-tune.def
index ddf1d21c9..215c71f4d 100644
--- a/gcc-4.9/gcc/config/i386/x86-tune.def
+++ b/gcc-4.9/gcc/config/i386/x86-tune.def
@@ -97,25 +97,25 @@ DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
conditional jump instruction for 32 bit TARGET.
FIXME: revisit for generic. */
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32",
- m_CORE_ALL | m_BDVER)
+ m_GENERIC | m_CORE_ALL | m_BDVER)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent
conditional jump instruction for TARGET_64BIT.
FIXME: revisit for generic. */
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64",
- m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
+ m_GENERIC | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a
subsequent conditional jump instruction when the condition jump
check sign flag (SF) or overflow flag (OF). */
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
- m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
+ m_GENERIC | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER)
/* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
jump instruction when the alu instruction produces the CCFLAG consumed by
the conditional jump instruction. */
DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
- m_SANDYBRIDGE | m_HASWELL)
+ m_GENERIC | m_SANDYBRIDGE | m_HASWELL)
/* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
during reassociation of integer computation. */
diff --git a/gcc-4.9/gcc/config/ia64/ia64.c b/gcc-4.9/gcc/config/ia64/ia64.c
index 41adc4adc..4ec3e3abe 100644
--- a/gcc-4.9/gcc/config/ia64/ia64.c
+++ b/gcc-4.9/gcc/config/ia64/ia64.c
@@ -602,11 +602,6 @@ static const struct attribute_spec ia64_attribute_table[] =
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
-/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
- in an order different from the specified program order. */
-#undef TARGET_RELAXED_ORDERING
-#define TARGET_RELAXED_ORDERING true
-
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
#undef TARGET_LEGITIMATE_ADDRESS_P
diff --git a/gcc-4.9/gcc/config/linux-grte.h b/gcc-4.9/gcc/config/linux-grte.h
index 31e8a94ce..e69de29bb 100644
--- a/gcc-4.9/gcc/config/linux-grte.h
+++ b/gcc-4.9/gcc/config/linux-grte.h
@@ -1,41 +0,0 @@
-/* Definitions for Linux-based GRTE (Google RunTime Environment).
- Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc.
- Contributed by Chris Demetriou and Ollie Wild.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-<http://www.gnu.org/licenses/>. */
-
-/* Overrides LIB_SPEC from gnu-user.h. */
-#undef LIB_SPEC
-#define LIB_SPEC \
- "%{pthread:-lpthread} \
- %{shared:-lc} \
- %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}"
-
-/* When GRTE links statically, it needs its NSS and resolver libraries
- linked in as well. Note that when linking statically, these are
- enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */
-#undef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS \
- { "libc", "%{static:%(libc_static);:-lc}" }, \
- { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \
- { "libc_static", "-lc -lresolv" }, \
- { "libc_p_static", "-lc_p -lresolv_p" },
diff --git a/gcc-4.9/gcc/config/linux.c b/gcc-4.9/gcc/config/linux.c
index 7c3c5a461..cdb2b5bdd 100644
--- a/gcc-4.9/gcc/config/linux.c
+++ b/gcc-4.9/gcc/config/linux.c
@@ -23,8 +23,6 @@ along with GCC; see the file COPYING3. If not see
#include "tm.h"
#include "linux-protos.h"
-/* Android does not support GNU indirect functions. */
-
bool
linux_has_ifunc_p (void)
{
diff --git a/gcc-4.9/gcc/config/msp430/msp430.md b/gcc-4.9/gcc/config/msp430/msp430.md
index 5e890eced..3f29d6d62 100644
--- a/gcc-4.9/gcc/config/msp430/msp430.md
+++ b/gcc-4.9/gcc/config/msp430/msp430.md
@@ -559,7 +559,7 @@
[(set (match_operand:PSI 0 "nonimmediate_operand" "=r")
(subreg:PSI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")) 0))]
"TARGET_LARGE"
- "RLAM #4, %0 { RRAM #4, %0"
+ "RLAM.A #4, %0 { RRAM.A #4, %0"
)
;; Look for cases where integer/pointer conversions are suboptimal due
@@ -587,7 +587,7 @@
(ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0"))
(const_int 1)))]
"TARGET_LARGE"
- "RLAM #4, %0 { RRAM #3, %0"
+ "RLAM.A #4, %0 { RRAM.A #3, %0"
)
(define_insn "extend_and_shift2_hipsi2"
@@ -595,7 +595,7 @@
(ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0"))
(const_int 2)))]
"TARGET_LARGE"
- "RLAM #4, %0 { RRAM #2, %0"
+ "RLAM.A #4, %0 { RRAM.A #2, %0"
)
; Nasty - we are sign-extending a 20-bit PSI value in one register into
diff --git a/gcc-4.9/gcc/config/nios2/nios2.c b/gcc-4.9/gcc/config/nios2/nios2.c
index cdd2e6bc9..047b615ba 100644
--- a/gcc-4.9/gcc/config/nios2/nios2.c
+++ b/gcc-4.9/gcc/config/nios2/nios2.c
@@ -2135,6 +2135,18 @@ nios2_output_dwarf_dtprel (FILE *file, int size, rtx x)
fprintf (file, ")");
}
+/* Implemet TARGET_ASM_FILE_END. */
+
+static void
+nios2_asm_file_end (void)
+{
+ /* The Nios II Linux stack is mapped non-executable by default, so add a
+ .note.GNU-stack section for switching to executable stacks only when
+ trampolines are generated. */
+ if (TARGET_LINUX_ABI && trampolines_created)
+ file_end_indicate_exec_stack ();
+}
+
/* Implement TARGET_ASM_FUNCTION_PROLOGUE. */
static void
nios2_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
@@ -3313,6 +3325,9 @@ nios2_merge_decl_attributes (tree olddecl, tree newdecl)
#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nios2_output_addr_const_extra
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END nios2_asm_file_end
+
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE nios2_option_override
diff --git a/gcc-4.9/gcc/config/pa/pa.c b/gcc-4.9/gcc/config/pa/pa.c
index 5a7598ca7..801982068 100644
--- a/gcc-4.9/gcc/config/pa/pa.c
+++ b/gcc-4.9/gcc/config/pa/pa.c
@@ -3235,7 +3235,12 @@ pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
&& aligned_p
&& function_label_operand (x, VOIDmode))
{
- fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
+ fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
+
+ /* We don't want an OPD when generating fast indirect calls. */
+ if (!TARGET_FAST_INDIRECT_CALLS)
+ fputs ("P%", asm_out_file);
+
output_addr_const (asm_out_file, x);
fputc ('\n', asm_out_file);
return true;
@@ -4203,9 +4208,12 @@ pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
{
last_address = extra_nop ? 4 : 0;
insn = get_last_nonnote_insn ();
- last_address += INSN_ADDRESSES (INSN_UID (insn));
- if (INSN_P (insn))
- last_address += insn_default_length (insn);
+ if (insn)
+ {
+ last_address += INSN_ADDRESSES (INSN_UID (insn));
+ if (INSN_P (insn))
+ last_address += insn_default_length (insn);
+ }
last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
& ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
}
@@ -9308,6 +9316,12 @@ pa_function_value (const_tree valtype,
|| TREE_CODE (valtype) == COMPLEX_TYPE
|| TREE_CODE (valtype) == VECTOR_TYPE)
{
+ HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
+
+ /* Handle aggregates that fit exactly in a word or double word. */
+ if ((valsize & (UNITS_PER_WORD - 1)) == 0)
+ return gen_rtx_REG (TYPE_MODE (valtype), 28);
+
if (TARGET_64BIT)
{
/* Aggregates with a size less than or equal to 128 bits are
@@ -9316,7 +9330,7 @@ pa_function_value (const_tree valtype,
memory. */
rtx loc[2];
int i, offset = 0;
- int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
+ int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
for (i = 0; i < ub; i++)
{
@@ -9328,7 +9342,7 @@ pa_function_value (const_tree valtype,
return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
}
- else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
+ else if (valsize > UNITS_PER_WORD)
{
/* Aggregates 5 to 8 bytes in size are returned in general
registers r28-r29 in the same manner as other non
diff --git a/gcc-4.9/gcc/config/pa/pa.md b/gcc-4.9/gcc/config/pa/pa.md
index a9421ac2e..43b909e35 100644
--- a/gcc-4.9/gcc/config/pa/pa.md
+++ b/gcc-4.9/gcc/config/pa/pa.md
@@ -123,7 +123,7 @@
;; type "binary" insns have two input operands (1,2) and one output (0)
(define_attr "type"
- "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload"
+ "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload,trap"
(const_string "binary"))
(define_attr "pa_combine_type"
@@ -166,7 +166,7 @@
;; For conditional branches. Frame related instructions are not allowed
;; because they confuse the unwind support.
(define_attr "in_branch_delay" "false,true"
- (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+ (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,trap")
(eq_attr "length" "4")
(not (match_test "RTX_FRAME_RELATED_P (insn)")))
(const_string "true")
@@ -175,7 +175,7 @@
;; Disallow instructions which use the FPU since they will tie up the FPU
;; even if the instruction is nullified.
(define_attr "in_nullified_branch_delay" "false,true"
- (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch")
+ (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch,trap")
(eq_attr "length" "4")
(not (match_test "RTX_FRAME_RELATED_P (insn)")))
(const_string "true")
@@ -184,7 +184,7 @@
;; For calls and millicode calls. Allow unconditional branches in the
;; delay slot.
(define_attr "in_call_delay" "false,true"
- (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch")
+ (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,trap")
(eq_attr "length" "4")
(not (match_test "RTX_FRAME_RELATED_P (insn)")))
(const_string "true")
@@ -5331,6 +5331,15 @@
[(set_attr "type" "binary,binary")
(set_attr "length" "4,4")])
+;; Trap instructions.
+
+(define_insn "trap"
+ [(trap_if (const_int 1) (const_int 0))]
+ ""
+ "{addit|addi,tc},<> 1,%%r0,%%r0"
+ [(set_attr "type" "trap")
+ (set_attr "length" "4")])
+
;; Clobbering a "register_operand" instead of a match_scratch
;; in operand3 of millicode calls avoids spilling %r1 and
;; produces better code.
@@ -8926,14 +8935,14 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
;; strength reduction is used. It is actually created when the instruction
;; combination phase combines the special loop test. Since this insn
;; is both a jump insn and has an output, it must deal with its own
-;; reloads, hence the `m' constraints. The `!' constraints direct reload
+;; reloads, hence the `Q' constraints. The `!' constraints direct reload
;; to not choose the register alternatives in the event a reload is needed.
(define_insn "decrement_and_branch_until_zero"
[(set (pc)
(if_then_else
(match_operator 2 "comparison_operator"
[(plus:SI
- (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*m")
+ (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*Q")
(match_operand:SI 1 "int5_operand" "L,L,L"))
(const_int 0)])
(label_ref (match_operand 3 "" ""))
@@ -9022,7 +9031,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
[(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
(label_ref (match_operand 3 "" ""))
(pc)))
- (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
+ (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*Q,!*q")
(match_dup 1))]
""
"* return pa_output_movb (operands, insn, which_alternative, 0); "
@@ -9094,7 +9103,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
[(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)])
(pc)
(label_ref (match_operand 3 "" ""))))
- (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q")
+ (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*Q,!*q")
(match_dup 1))]
""
"* return pa_output_movb (operands, insn, which_alternative, 1); "
diff --git a/gcc-4.9/gcc/config/pa/predicates.md b/gcc-4.9/gcc/config/pa/predicates.md
index 8dcfce0e9..405cf7f63 100644
--- a/gcc-4.9/gcc/config/pa/predicates.md
+++ b/gcc-4.9/gcc/config/pa/predicates.md
@@ -528,20 +528,29 @@
;; This predicate is used for branch patterns that internally handle
;; register reloading. We need to accept non-symbolic memory operands
;; after reload to ensure that the pattern is still valid if reload
-;; didn't find a hard register for the operand.
+;; didn't find a hard register for the operand. We also reject index
+;; and lo_sum DLT address as these are invalid for move destinations.
(define_predicate "reg_before_reload_operand"
(match_code "reg,mem")
{
+ rtx op0;
+
if (register_operand (op, mode))
return true;
- if (reload_completed
- && memory_operand (op, mode)
- && !symbolic_memory_operand (op, mode))
- return true;
+ if (!reload_in_progress && !reload_completed)
+ return false;
- return false;
+ if (! MEM_P (op))
+ return false;
+
+ op0 = XEXP (op, 0);
+
+ return (memory_address_p (mode, op0)
+ && !IS_INDEX_ADDR_P (op0)
+ && !IS_LO_SUM_DLT_ADDR_P (op0)
+ && !symbolic_memory_operand (op, mode));
})
;; True iff OP is a register or const_0 operand for MODE.
diff --git a/gcc-4.9/gcc/config/rs6000/altivec.h b/gcc-4.9/gcc/config/rs6000/altivec.h
index 129cf6fa1..9ee0ae5ec 100644
--- a/gcc-4.9/gcc/config/rs6000/altivec.h
+++ b/gcc-4.9/gcc/config/rs6000/altivec.h
@@ -124,6 +124,7 @@
#define vec_vcfux __builtin_vec_vcfux
#define vec_cts __builtin_vec_cts
#define vec_ctu __builtin_vec_ctu
+#define vec_cpsgn __builtin_vec_copysign
#define vec_expte __builtin_vec_expte
#define vec_floor __builtin_vec_floor
#define vec_loge __builtin_vec_loge
@@ -214,8 +215,10 @@
#define vec_lvsl __builtin_vec_lvsl
#define vec_lvsr __builtin_vec_lvsr
#define vec_max __builtin_vec_max
+#define vec_mergee __builtin_vec_vmrgew
#define vec_mergeh __builtin_vec_mergeh
#define vec_mergel __builtin_vec_mergel
+#define vec_mergeo __builtin_vec_vmrgow
#define vec_min __builtin_vec_min
#define vec_mladd __builtin_vec_mladd
#define vec_msum __builtin_vec_msum
@@ -319,6 +322,8 @@
#define vec_sqrt __builtin_vec_sqrt
#define vec_vsx_ld __builtin_vec_vsx_ld
#define vec_vsx_st __builtin_vec_vsx_st
+#define vec_xl __builtin_vec_vsx_ld
+#define vec_xst __builtin_vec_vsx_st
/* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions
instead of __builtin_vec_<xxx> */
@@ -336,6 +341,7 @@
#define vec_vadduqm __builtin_vec_vadduqm
#define vec_vbpermq __builtin_vec_vbpermq
#define vec_vclz __builtin_vec_vclz
+#define vec_cntlz __builtin_vec_vclz
#define vec_vclzb __builtin_vec_vclzb
#define vec_vclzd __builtin_vec_vclzd
#define vec_vclzh __builtin_vec_vclzh
diff --git a/gcc-4.9/gcc/config/rs6000/altivec.md b/gcc-4.9/gcc/config/rs6000/altivec.md
index a8cfcb739..02ea14237 100644
--- a/gcc-4.9/gcc/config/rs6000/altivec.md
+++ b/gcc-4.9/gcc/config/rs6000/altivec.md
@@ -67,7 +67,7 @@
UNSPEC_VCTSXS
UNSPEC_VLOGEFP
UNSPEC_VEXPTEFP
- UNSPEC_VLSDOI
+ UNSPEC_VSLDOI
UNSPEC_VUNPACK_HI_SIGN
UNSPEC_VUNPACK_LO_SIGN
UNSPEC_VUNPACK_HI_SIGN_DIRECT
@@ -2077,7 +2077,7 @@
(unspec:VM [(match_operand:VM 1 "register_operand" "v")
(match_operand:VM 2 "register_operand" "v")
(match_operand:QI 3 "immediate_operand" "i")]
- UNSPEC_VLSDOI))]
+ UNSPEC_VSLDOI))]
"TARGET_ALTIVEC"
"vsldoi %0,%1,%2,%3"
[(set_attr "type" "vecperm")])
@@ -2297,7 +2297,31 @@
"dststt %0,%1,%2"
[(set_attr "type" "vecsimple")])
-(define_insn "altivec_lvsl"
+(define_expand "altivec_lvsl"
+ [(use (match_operand:V16QI 0 "register_operand" ""))
+ (use (match_operand:V16QI 1 "memory_operand" ""))]
+ "TARGET_ALTIVEC"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1]));
+ else
+ {
+ int i;
+ rtx mask, perm[16], constv, vperm;
+ mask = gen_reg_rtx (V16QImode);
+ emit_insn (gen_altivec_lvsl_direct (mask, operands[1]));
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (i);
+ constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+ constv = force_reg (V16QImode, constv);
+ vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+ UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+ }
+ DONE;
+})
+
+(define_insn "altivec_lvsl_direct"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
UNSPEC_LVSL))]
@@ -2305,7 +2329,31 @@
"lvsl %0,%y1"
[(set_attr "type" "vecload")])
-(define_insn "altivec_lvsr"
+(define_expand "altivec_lvsr"
+ [(use (match_operand:V16QI 0 "register_operand" ""))
+ (use (match_operand:V16QI 1 "memory_operand" ""))]
+ "TARGET_ALTIVEC"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1]));
+ else
+ {
+ int i;
+ rtx mask, perm[16], constv, vperm;
+ mask = gen_reg_rtx (V16QImode);
+ emit_insn (gen_altivec_lvsr_direct (mask, operands[1]));
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (i);
+ constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+ constv = force_reg (V16QImode, constv);
+ vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv),
+ UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm));
+ }
+ DONE;
+})
+
+(define_insn "altivec_lvsr_direct"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")]
UNSPEC_LVSR))]
diff --git a/gcc-4.9/gcc/config/rs6000/darwin.h b/gcc-4.9/gcc/config/rs6000/darwin.h
index 0329f3f62..dfd181e43 100644
--- a/gcc-4.9/gcc/config/rs6000/darwin.h
+++ b/gcc-4.9/gcc/config/rs6000/darwin.h
@@ -206,7 +206,11 @@ extern int darwin_emit_branch_islands;
"vrsave", "vscr", \
"spe_acc", "spefscr", \
"sfp", \
- "tfhar", "tfiar", "texasr" \
+ "tfhar", "tfiar", "texasr", \
+ "rh0", "rh1", "rh2", "rh3", "rh4", "rh5", "rh6", "rh7", \
+ "rh8", "rh9", "rh10", "rh11", "rh12", "rh13", "rh14", "rh15", \
+ "rh16", "rh17", "rh18", "rh19", "rh20", "rh21", "rh22", "rh23", \
+ "rh24", "rh25", "rh26", "rh27", "rh28", "rh29", "rh30", "rh31" \
}
/* This outputs NAME to FILE. */
diff --git a/gcc-4.9/gcc/config/rs6000/linux-grte.h b/gcc-4.9/gcc/config/rs6000/linux-grte.h
index 53997f027..e69de29bb 100644
--- a/gcc-4.9/gcc/config/rs6000/linux-grte.h
+++ b/gcc-4.9/gcc/config/rs6000/linux-grte.h
@@ -1,41 +0,0 @@
-/* Definitions for Linux-based GRTE (Google RunTime Environment).
- Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc.
- Contributed by Chris Demetriou and Ollie Wild.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-<http://www.gnu.org/licenses/>. */
-
-/* Overrides LIB_LINUX_SPEC from sysv4.h. */
-#undef LIB_LINUX_SPEC
-#define LIB_LINUX_SPEC \
- "%{pthread:-lpthread} \
- %{shared:-lc} \
- %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}"
-
-/* When GRTE links statically, it needs its NSS and resolver libraries
- linked in as well. Note that when linking statically, these are
- enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */
-#undef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS \
- { "libc", "%{static:%(libc_static);:-lc}" }, \
- { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \
- { "libc_static", "-lc -lresolv" }, \
- { "libc_p_static", "-lc_p -lresolv_p" },
diff --git a/gcc-4.9/gcc/config/rs6000/predicates.md b/gcc-4.9/gcc/config/rs6000/predicates.md
index 8c384b380..2f4046215 100644
--- a/gcc-4.9/gcc/config/rs6000/predicates.md
+++ b/gcc-4.9/gcc/config/rs6000/predicates.md
@@ -1783,7 +1783,7 @@
(define_predicate "fusion_gpr_mem_load"
(match_code "mem,sign_extend,zero_extend")
{
- rtx addr;
+ rtx addr, base, offset;
/* Handle sign/zero extend. */
if (GET_CODE (op) == ZERO_EXTEND
@@ -1813,24 +1813,79 @@
}
addr = XEXP (op, 0);
+ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+ return 0;
+
+ base = XEXP (addr, 0);
+ if (!base_reg_operand (base, GET_MODE (base)))
+ return 0;
+
+ offset = XEXP (addr, 1);
+
if (GET_CODE (addr) == PLUS)
+ return satisfies_constraint_I (offset);
+
+ else if (GET_CODE (addr) == LO_SUM)
{
- rtx base = XEXP (addr, 0);
- rtx offset = XEXP (addr, 1);
+ if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
+ return small_toc_ref (offset, GET_MODE (offset));
- return (base_reg_operand (base, GET_MODE (base))
- && satisfies_constraint_I (offset));
+ else if (TARGET_ELF && !TARGET_POWERPC64)
+ return CONSTANT_P (offset);
}
- else if (GET_CODE (addr) == LO_SUM)
+ return 0;
+})
+
+;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the
+;; memory field with both the addis and the memory offset. Sign extension
+;; is not handled here, since lha and lwa are not fused.
+(define_predicate "fusion_gpr_mem_combo"
+ (match_code "mem,zero_extend")
+{
+ rtx addr, base, offset;
+
+ /* Handle zero extend. */
+ if (GET_CODE (op) == ZERO_EXTEND)
{
- rtx base = XEXP (addr, 0);
- rtx offset = XEXP (addr, 1);
+ op = XEXP (op, 0);
+ mode = GET_MODE (op);
+ }
+
+ if (!MEM_P (op))
+ return 0;
- if (!base_reg_operand (base, GET_MODE (base)))
+ switch (mode)
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ break;
+
+ case DImode:
+ if (!TARGET_POWERPC64)
return 0;
+ break;
- else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
+ default:
+ return 0;
+ }
+
+ addr = XEXP (op, 0);
+ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+ return 0;
+
+ base = XEXP (addr, 0);
+ if (!fusion_gpr_addis (base, GET_MODE (base)))
+ return 0;
+
+ offset = XEXP (addr, 1);
+ if (GET_CODE (addr) == PLUS)
+ return satisfies_constraint_I (offset);
+
+ else if (GET_CODE (addr) == LO_SUM)
+ {
+ if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
return small_toc_ref (offset, GET_MODE (offset));
else if (TARGET_ELF && !TARGET_POWERPC64)
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
index 220d1e970..9bb870394 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def
@@ -1258,6 +1258,16 @@ BU_VSX_2 (VEC_MERGEL_V2DF, "mergel_2df", CONST, vsx_mergel_v2df)
BU_VSX_2 (VEC_MERGEL_V2DI, "mergel_2di", CONST, vsx_mergel_v2di)
BU_VSX_2 (VEC_MERGEH_V2DF, "mergeh_2df", CONST, vsx_mergeh_v2df)
BU_VSX_2 (VEC_MERGEH_V2DI, "mergeh_2di", CONST, vsx_mergeh_v2di)
+BU_VSX_2 (XXSPLTD_V2DF, "xxspltd_2df", CONST, vsx_xxspltd_v2df)
+BU_VSX_2 (XXSPLTD_V2DI, "xxspltd_2di", CONST, vsx_xxspltd_v2di)
+BU_VSX_2 (DIV_V2DI, "div_2di", CONST, vsx_div_v2di)
+BU_VSX_2 (UDIV_V2DI, "udiv_2di", CONST, vsx_udiv_v2di)
+BU_VSX_2 (MUL_V2DI, "mul_2di", CONST, vsx_mul_v2di)
+
+BU_VSX_2 (XVCVSXDDP_SCALE, "xvcvsxddp_scale", CONST, vsx_xvcvsxddp_scale)
+BU_VSX_2 (XVCVUXDDP_SCALE, "xvcvuxddp_scale", CONST, vsx_xvcvuxddp_scale)
+BU_VSX_2 (XVCVDPSXDS_SCALE, "xvcvdpsxds_scale", CONST, vsx_xvcvdpsxds_scale)
+BU_VSX_2 (XVCVDPUXDS_SCALE, "xvcvdpuxds_scale", CONST, vsx_xvcvdpuxds_scale)
/* VSX abs builtin functions. */
BU_VSX_A (XVABSDP, "xvabsdp", CONST, absv2df2)
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-c.c b/gcc-4.9/gcc/config/rs6000/rs6000-c.c
index 46c4a9d8c..8dedeec26 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-c.c
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-c.c
@@ -597,6 +597,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_ROUND, ALTIVEC_BUILTIN_VRFIN,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_ROUND, VSX_BUILTIN_XVRDPI,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_RECIP, ALTIVEC_BUILTIN_VRECIPFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_RECIP, VSX_BUILTIN_RECIP_V2DF,
@@ -877,6 +879,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND,
RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
@@ -931,6 +945,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC,
RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
@@ -1118,18 +1144,30 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
{ ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFSX,
RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
+ { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVSXDDP_SCALE,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0},
+ { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVUXDDP_SCALE,
+ RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0},
{ ALTIVEC_BUILTIN_VEC_VCFSX, ALTIVEC_BUILTIN_VCFSX,
RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 },
{ ALTIVEC_BUILTIN_VEC_VCFUX, ALTIVEC_BUILTIN_VCFUX,
RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
{ ALTIVEC_BUILTIN_VEC_CTS, ALTIVEC_BUILTIN_VCTSXS,
RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+ { ALTIVEC_BUILTIN_VEC_CTS, VSX_BUILTIN_XVCVDPSXDS_SCALE,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 },
{ ALTIVEC_BUILTIN_VEC_CTU, ALTIVEC_BUILTIN_VCTUXS,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
+ { ALTIVEC_BUILTIN_VEC_CTU, VSX_BUILTIN_XVCVDPUXDS_SCALE,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 },
{ VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVSP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_DIV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_UDIV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DF,
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI,
@@ -1595,6 +1633,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW,
@@ -1643,6 +1691,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW,
@@ -1771,6 +1829,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+ { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_MUL_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_MUL_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB,
@@ -1812,6 +1874,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
@@ -1842,6 +1916,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR,
RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
@@ -1945,6 +2031,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
@@ -2127,6 +2215,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 },
{ ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, 0 },
{ ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 },
{ ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW,
@@ -2519,6 +2615,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 },
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
@@ -2778,6 +2886,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI },
{ ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI,
RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V16QI },
+ { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI },
{ ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SF,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI },
{ ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI,
@@ -2818,6 +2928,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI },
{ ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+ { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+ { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+ { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DI },
{ ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI },
{ ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF,
@@ -3267,6 +3383,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF,
RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 },
+ { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 },
{ VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI,
@@ -3321,6 +3439,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF,
RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF },
+ { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF,
+ RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double },
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI },
{ VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI,
@@ -3431,6 +3551,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
{ ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
{ ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
{ ALTIVEC_BUILTIN_VEC_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P,
@@ -3889,12 +4021,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
@@ -4128,7 +4264,8 @@ altivec_build_resolved_builtin (tree *args, int n,
argument) is reversed. Patch the arguments here before building
the resolved CALL_EXPR. */
if (desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P
- && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P)
+ && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P
+ && desc->overloaded_code != VSX_BUILTIN_XVCMPGEDP_P)
{
tree t;
t = args[2], args[2] = args[1], args[1] = t;
@@ -4186,6 +4323,14 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n",
(int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+
+ /* vec_lvsl and vec_lvsr are deprecated for use with LE element order. */
+ if (fcode == ALTIVEC_BUILTIN_VEC_LVSL && !VECTOR_ELT_ORDER_BIG)
+ warning (OPT_Wdeprecated, "vec_lvsl is deprecated for little endian; use \
+assignment for unaligned loads and stores");
+ else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !VECTOR_ELT_ORDER_BIG)
+ warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \
+assignment for unaligned loads and stores");
/* For now treat vec_splats and vec_promote as the same. */
if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
index 067a74aa6..aa8e76249 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h
@@ -65,6 +65,7 @@ extern void altivec_expand_stvx_be (rtx, rtx, enum machine_mode, unsigned);
extern void altivec_expand_stvex_be (rtx, rtx, enum machine_mode, unsigned);
extern void rs6000_expand_extract_even (rtx, rtx, rtx);
extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
+extern void rs6000_scale_v2df (rtx, rtx, int);
extern void build_mask64_2_operands (rtx, rtx *);
extern int expand_block_clear (rtx[]);
extern int expand_block_move (rtx[]);
@@ -79,9 +80,9 @@ extern int mems_ok_for_quad_peep (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
extern bool direct_move_p (rtx, rtx);
extern bool quad_load_store_p (rtx, rtx);
-extern bool fusion_gpr_load_p (rtx *, bool);
+extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx);
extern void expand_fusion_gpr_load (rtx *);
-extern const char *emit_fusion_gpr_load (rtx *);
+extern const char *emit_fusion_gpr_load (rtx, rtx);
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
enum reg_class);
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.c b/gcc-4.9/gcc/config/rs6000/rs6000.c
index 28ccf86df..730e6c8a6 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.c
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.c
@@ -79,6 +79,9 @@
#include "dumpfile.h"
#include "cgraph.h"
#include "target-globals.h"
+#include "real.h"
+#include "context.h"
+#include "tree-pass.h"
#if TARGET_XCOFF
#include "xcoffout.h" /* get declarations of xcoff_*_section_name */
#endif
@@ -1171,6 +1174,7 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
enum machine_mode,
secondary_reload_info *,
bool);
+rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
/* Hash table stuff for keeping track of TOC entries. */
@@ -1541,17 +1545,6 @@ static const struct attribute_spec rs6000_attribute_table[] =
#define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
#endif
-/* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
- The PowerPC architecture requires only weak consistency among
- processors--that is, memory accesses between processors need not be
- sequentially consistent and memory accesses among processors can occur
- in any order. The ability to order memory accesses weakly provides
- opportunities for more efficient use of the system bus. Unless a
- dependency exists, the 604e allows read operations to precede store
- operations. */
-#undef TARGET_RELAXED_ORDERING
-#define TARGET_RELAXED_ORDERING true
-
#ifdef HAVE_AS_TLS
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
#define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
@@ -4084,6 +4077,15 @@ static void
rs6000_option_override (void)
{
(void) rs6000_option_override_internal (true);
+
+ /* Register machine-specific passes. This needs to be done at start-up.
+ It's convenient to do it here (like i386 does). */
+ opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
+
+ static struct register_pass_info analyze_swaps_info
+ = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
+
+ register_pass (&analyze_swaps_info);
}
@@ -6896,24 +6898,6 @@ rs6000_delegitimize_address (rtx orig_x)
if (GET_CODE (y) == UNSPEC
&& XINT (y, 1) == UNSPEC_TOCREL)
{
-#ifdef ENABLE_CHECKING
- if (REG_P (XVECEXP (y, 0, 1))
- && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER)
- {
- /* All good. */
- }
- else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR)
- {
- /* Weirdness alert. df_note_compute can replace r2 with a
- debug_expr when this unspec is in a debug_insn.
- Seen in gcc.dg/pr51957-1.c */
- }
- else
- {
- debug_rtx (orig_x);
- abort ();
- }
-#endif
y = XVECEXP (y, 0, 0);
#ifdef HAVE_AS_TLS
@@ -13842,8 +13826,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
case ALTIVEC_BUILTIN_MASK_FOR_STORE:
{
- int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
- : (int) CODE_FOR_altivec_lvsl);
+ int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode = insn_data[icode].operand[1].mode;
tree arg;
@@ -13871,7 +13855,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
- /*pat = gen_altivec_lvsr (target, op);*/
pat = GEN_FCN (icode) (target, op);
if (!pat)
return 0;
@@ -16654,10 +16637,13 @@ rs6000_secondary_reload (bool in_p,
: (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
&& (offset & 3) != 0))
{
+ /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
if (in_p)
- sri->icode = CODE_FOR_reload_di_load;
+ sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
+ : CODE_FOR_reload_di_load);
else
- sri->icode = CODE_FOR_reload_di_store;
+ sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
+ : CODE_FOR_reload_di_store);
sri->extra_cost = 2;
ret = NO_REGS;
}
@@ -30923,6 +30909,23 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
}
+/* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
+void
+rs6000_scale_v2df (rtx tgt, rtx src, int scale)
+{
+ HOST_WIDE_INT hwi_scale (scale);
+ REAL_VALUE_TYPE r_pow;
+ rtvec v = rtvec_alloc (2);
+ rtx elt;
+ rtx scale_vec = gen_reg_rtx (V2DFmode);
+ (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
+ elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
+ RTVEC_ELT (v, 0) = elt;
+ RTVEC_ELT (v, 1) = elt;
+ rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
+ emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
+}
+
/* Return an RTX representing where to find the function value of a
function returning MODE. */
static rtx
@@ -32551,6 +32554,14 @@ rs6000_split_logical_inner (rtx dest,
if (complement_op2_p)
op2 = gen_rtx_NOT (mode, op2);
+ /* For canonical RTL, if only one arm is inverted it is the first. */
+ if (!complement_op1_p && complement_op2_p)
+ {
+ rtx temp = op1;
+ op1 = op2;
+ op2 = temp;
+ }
+
bool_rtx = ((code == NOT)
? gen_rtx_NOT (mode, op1)
: gen_rtx_fmt_ee (code, mode, op1, op2));
@@ -32755,25 +32766,14 @@ rs6000_split_logical (rtx operands[3],
/* Return true if the peephole2 can combine a load involving a combination of
an addis instruction and a load with an offset that can be fused together on
- a power8.
-
- The operands are:
- operands[0] register set with addis
- operands[1] value set via addis
- operands[2] target register being loaded
- operands[3] D-form memory reference using operands[0].
-
- In addition, we are passed a boolean that is true if this is a peephole2,
- and we can use see if the addis_reg is dead after the insn and can be
- replaced by the target register. */
+ a power8. */
bool
-fusion_gpr_load_p (rtx *operands, bool peep2_p)
+fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
+ rtx addis_value, /* addis value. */
+ rtx target, /* target register that is loaded. */
+ rtx mem) /* bottom part of the memory addr. */
{
- rtx addis_reg = operands[0];
- rtx addis_value = operands[1];
- rtx target = operands[2];
- rtx mem = operands[3];
rtx addr;
rtx base_reg;
@@ -32787,9 +32787,6 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
return false;
- if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
- return false;
-
/* Allow sign/zero extension. */
if (GET_CODE (mem) == ZERO_EXTEND
|| (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
@@ -32798,22 +32795,22 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
if (!MEM_P (mem))
return false;
+ if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
+ return false;
+
addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
return false;
/* Validate that the register used to load the high value is either the
- register being loaded, or we can safely replace its use in a peephole2.
+ register being loaded, or we can safely replace its use.
- If this is a peephole2, we assume that there are 2 instructions in the
- peephole (addis and load), so we want to check if the target register was
- not used in the memory address and the register to hold the addis result
- is dead after the peephole. */
+ This function is only called from the peephole2 pass and we assume that
+ there are 2 instructions in the peephole (addis and load), so we want to
+ check if the target register was not used in the memory address and the
+ register to hold the addis result is dead after the peephole. */
if (REGNO (addis_reg) != REGNO (target))
{
- if (!peep2_p)
- return false;
-
if (reg_mentioned_p (target, mem))
return false;
@@ -32854,9 +32851,6 @@ expand_fusion_gpr_load (rtx *operands)
enum machine_mode extend_mode = target_mode;
enum machine_mode ptr_mode = Pmode;
enum rtx_code extend = UNKNOWN;
- rtx addis_reg = ((ptr_mode == target_mode)
- ? target
- : simplify_subreg (ptr_mode, target, target_mode, 0));
if (GET_CODE (orig_mem) == ZERO_EXTEND
|| (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
@@ -32873,13 +32867,14 @@ expand_fusion_gpr_load (rtx *operands)
gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
offset = XEXP (orig_addr, 1);
- new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
- new_mem = change_address (orig_mem, target_mode, new_addr);
+ new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
+ new_mem = replace_equiv_address_nv (orig_mem, new_addr);
if (extend != UNKNOWN)
new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
- emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
+ new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
+ UNSPEC_FUSION_GPR);
emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
if (extend == SIGN_EXTEND)
@@ -32898,55 +32893,40 @@ expand_fusion_gpr_load (rtx *operands)
}
/* Return a string to fuse an addis instruction with a gpr load to the same
- register that we loaded up the addis instruction. The code is complicated,
- so we call output_asm_insn directly, and just return "".
+ register that we loaded up the addis instruction. The address that is used
+ is the logical address that was formed during peephole2:
+ (lo_sum (high) (low-part))
- The operands are:
- operands[0] register set with addis (must be same reg as target).
- operands[1] value set via addis
- operands[2] target register being loaded
- operands[3] D-form memory reference using operands[0]. */
+ The code is complicated, so we call output_asm_insn directly, and just
+ return "". */
const char *
-emit_fusion_gpr_load (rtx *operands)
+emit_fusion_gpr_load (rtx target, rtx mem)
{
- rtx addis_reg = operands[0];
- rtx addis_value = operands[1];
- rtx target = operands[2];
- rtx mem = operands[3];
+ rtx addis_value;
rtx fuse_ops[10];
rtx addr;
rtx load_offset;
const char *addis_str = NULL;
const char *load_str = NULL;
- const char *extend_insn = NULL;
const char *mode_name = NULL;
char insn_template[80];
enum machine_mode mode;
const char *comment_str = ASM_COMMENT_START;
- bool sign_p = false;
- gcc_assert (REG_P (addis_reg) && REG_P (target));
- gcc_assert (REGNO (addis_reg) == REGNO (target));
-
- if (*comment_str == ' ')
- comment_str++;
-
- /* Allow sign/zero extension. */
if (GET_CODE (mem) == ZERO_EXTEND)
mem = XEXP (mem, 0);
- else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
- {
- sign_p = true;
- mem = XEXP (mem, 0);
- }
+ gcc_assert (REG_P (target) && MEM_P (mem));
+
+ if (*comment_str == ' ')
+ comment_str++;
- gcc_assert (MEM_P (mem));
addr = XEXP (mem, 0);
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
gcc_unreachable ();
+ addis_value = XEXP (addr, 0);
load_offset = XEXP (addr, 1);
/* Now emit the load instruction to the same register. */
@@ -32956,29 +32936,22 @@ emit_fusion_gpr_load (rtx *operands)
case QImode:
mode_name = "char";
load_str = "lbz";
- extend_insn = "extsb %0,%0";
break;
case HImode:
mode_name = "short";
load_str = "lhz";
- extend_insn = "extsh %0,%0";
break;
case SImode:
mode_name = "int";
load_str = "lwz";
- extend_insn = "extsw %0,%0";
break;
case DImode:
- if (TARGET_POWERPC64)
- {
- mode_name = "long";
- load_str = "ld";
- }
- else
- gcc_unreachable ();
+ gcc_assert (TARGET_POWERPC64);
+ mode_name = "long";
+ load_str = "ld";
break;
default:
@@ -33122,17 +33095,1191 @@ emit_fusion_gpr_load (rtx *operands)
else
fatal_insn ("Unable to generate load offset for fusion", load_offset);
- /* Handle sign extension. The peephole2 pass generates this as a separate
- insn, but we handle it just in case it got reattached. */
- if (sign_p)
+ return "";
+}
+
+/* Analyze vector computations and remove unnecessary doubleword
+ swaps (xxswapdi instructions). This pass is performed only
+ for little-endian VSX code generation.
+
+ For this specific case, loads and stores of 4x32 and 2x64 vectors
+ are inefficient. These are implemented using the lvx2dx and
+ stvx2dx instructions, which invert the order of doublewords in
+ a vector register. Thus the code generation inserts an xxswapdi
+ after each such load, and prior to each such store. (For spill
+ code after register assignment, an additional xxswapdi is inserted
+ following each store in order to return a hard register to its
+ unpermuted value.)
+
+ The extra xxswapdi instructions reduce performance. This can be
+ particularly bad for vectorized code. The purpose of this pass
+ is to reduce the number of xxswapdi instructions required for
+ correctness.
+
+ The primary insight is that much code that operates on vectors
+ does not care about the relative order of elements in a register,
+ so long as the correct memory order is preserved. If we have
+ a computation where all input values are provided by lvxd2x/xxswapdi
+ sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
+ and all intermediate computations are pure SIMD (independent of
+ element order), then all the xxswapdi's associated with the loads
+ and stores may be removed.
+
+ This pass uses some of the infrastructure and logical ideas from
+ the "web" pass in web.c. We create maximal webs of computations
+ fitting the description above using union-find. Each such web is
+ then optimized by removing its unnecessary xxswapdi instructions.
+
+ The pass is placed prior to global optimization so that we can
+ perform the optimization in the safest and simplest way possible;
+ that is, by replacing each xxswapdi insn with a register copy insn.
+ Subsequent forward propagation will remove copies where possible.
+
+ There are some operations sensitive to element order for which we
+ can still allow the operation, provided we modify those operations.
+ These include CONST_VECTORs, for which we must swap the first and
+ second halves of the constant vector; and SUBREGs, for which we
+ must adjust the byte offset to account for the swapped doublewords.
+ A remaining opportunity would be non-immediate-form splats, for
+ which we should adjust the selected lane of the input. We should
+ also make code generation adjustments for sum-across operations,
+ since this is a common vectorizer reduction.
+
+ Because we run prior to the first split, we can see loads and stores
+ here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
+ vector loads and stores that have not yet been split into a permuting
+ load/store and a swap. (One way this can happen is with a builtin
+ call to vec_vsx_{ld,st}.) We can handle these as well, but rather
+ than deleting a swap, we convert the load/store into a permuting
+ load/store (which effectively removes the swap). */
+
+/* Notes on Permutes
+
+ We do not currently handle computations that contain permutes. There
+ is a general transformation that can be performed correctly, but it
+ may introduce more expensive code than it replaces. To handle these
+ would require a cost model to determine when to perform the optimization.
+ This commentary records how this could be done if desired.
+
+ The most general permute is something like this (example for V16QI):
+
+ (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
+ (parallel [(const_int a0) (const_int a1)
+ ...
+ (const_int a14) (const_int a15)]))
+
+ where a0,...,a15 are in [0,31] and select elements from op1 and op2
+ to produce in the result.
+
+ Regardless of mode, we can convert the PARALLEL to a mask of 16
+ byte-element selectors. Let's call this M, with M[i] representing
+ the ith byte-element selector value. Then if we swap doublewords
+ throughout the computation, we can get correct behavior by replacing
+ M with M' as follows:
+
+ { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
+ M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
+ { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
+ { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
+
+ This seems promising at first, since we are just replacing one mask
+ with another. But certain masks are preferable to others. If M
+ is a mask that matches a vmrghh pattern, for example, M' certainly
+ will not. Instead of a single vmrghh, we would generate a load of
+ M' and a vperm. So we would need to know how many xxswapd's we can
+ remove as a result of this transformation to determine if it's
+ profitable; and preferably the logic would need to be aware of all
+ the special preferable masks.
+
+ Another form of permute is an UNSPEC_VPERM, in which the mask is
+ already in a register. In some cases, this mask may be a constant
+ that we can discover with ud-chains, in which case the above
+ transformation is ok. However, the common usage here is for the
+ mask to be produced by an UNSPEC_LVSL, in which case the mask
+ cannot be known at compile time. In such a case we would have to
+ generate several instructions to compute M' as above at run time,
+ and a cost model is needed again. */
+
+/* This is based on the union-find logic in web.c. web_entry_base is
+ defined in df.h. */
+class swap_web_entry : public web_entry_base
+{
+ public:
+ /* Pointer to the insn. */
+ rtx insn;
+ /* Set if insn contains a mention of a vector register. All other
+ fields are undefined if this field is unset. */
+ unsigned int is_relevant : 1;
+ /* Set if insn is a load. */
+ unsigned int is_load : 1;
+ /* Set if insn is a store. */
+ unsigned int is_store : 1;
+ /* Set if insn is a doubleword swap. This can either be a register swap
+ or a permuting load or store (test is_load and is_store for this). */
+ unsigned int is_swap : 1;
+ /* Set if the insn has a live-in use of a parameter register. */
+ unsigned int is_live_in : 1;
+ /* Set if the insn has a live-out def of a return register. */
+ unsigned int is_live_out : 1;
+ /* Set if the insn contains a subreg reference of a vector register. */
+ unsigned int contains_subreg : 1;
+ /* Set if the insn contains a 128-bit integer operand. */
+ unsigned int is_128_int : 1;
+ /* Set if this is a call-insn. */
+ unsigned int is_call : 1;
+ /* Set if this insn does not perform a vector operation for which
+ element order matters, or if we know how to fix it up if it does.
+ Undefined if is_swap is set. */
+ unsigned int is_swappable : 1;
+ /* A nonzero value indicates what kind of special handling for this
+ insn is required if doublewords are swapped. Undefined if
+ is_swappable is not set. */
+ unsigned int special_handling : 3;
+ /* Set if the web represented by this entry cannot be optimized. */
+ unsigned int web_not_optimizable : 1;
+ /* Set if this insn should be deleted. */
+ unsigned int will_delete : 1;
+};
+
+enum special_handling_values {
+ SH_NONE = 0,
+ SH_CONST_VECTOR,
+ SH_SUBREG,
+ SH_NOSWAP_LD,
+ SH_NOSWAP_ST,
+ SH_EXTRACT,
+ SH_SPLAT
+};
+
+/* Union INSN with all insns containing definitions that reach USE.
+ Detect whether USE is live-in to the current function. */
+static void
+union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
+{
+ struct df_link *link = DF_REF_CHAIN (use);
+
+ if (!link)
+ insn_entry[INSN_UID (insn)].is_live_in = 1;
+
+ while (link)
{
- gcc_assert (extend_insn != NULL);
- output_asm_insn (extend_insn, fuse_ops);
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ insn_entry[INSN_UID (insn)].is_live_in = 1;
+
+ if (DF_REF_INSN_INFO (link->ref))
+ {
+ rtx def_insn = DF_REF_INSN (link->ref);
+ (void)unionfind_union (insn_entry + INSN_UID (insn),
+ insn_entry + INSN_UID (def_insn));
+ }
+
+ link = link->next;
}
+}
- return "";
+/* Union INSN with all insns containing uses reached from DEF.
+ Detect whether DEF is live-out from the current function. */
+static void
+union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
+{
+ struct df_link *link = DF_REF_CHAIN (def);
+
+ if (!link)
+ insn_entry[INSN_UID (insn)].is_live_out = 1;
+
+ while (link)
+ {
+ /* This could be an eh use or some other artificial use;
+ we treat these all the same (killing the optimization). */
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ insn_entry[INSN_UID (insn)].is_live_out = 1;
+
+ if (DF_REF_INSN_INFO (link->ref))
+ {
+ rtx use_insn = DF_REF_INSN (link->ref);
+ (void)unionfind_union (insn_entry + INSN_UID (insn),
+ insn_entry + INSN_UID (use_insn));
+ }
+
+ link = link->next;
+ }
+}
+
+/* Return 1 iff INSN is a load insn, including permuting loads that
+ represent an lvxd2x instruction; else return 0. */
+static unsigned int
+insn_is_load_p (rtx insn)
+{
+ rtx body = PATTERN (insn);
+
+ if (GET_CODE (body) == SET)
+ {
+ if (GET_CODE (SET_SRC (body)) == MEM)
+ return 1;
+
+ if (GET_CODE (SET_SRC (body)) == VEC_SELECT
+ && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
+ return 1;
+
+ return 0;
+ }
+
+ if (GET_CODE (body) != PARALLEL)
+ return 0;
+
+ rtx set = XVECEXP (body, 0, 0);
+
+ if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
+ return 1;
+
+ return 0;
+}
+
+/* Return 1 iff INSN is a store insn, including permuting stores that
+ represent an stvxd2x instruction; else return 0. */
+static unsigned int
+insn_is_store_p (rtx insn)
+{
+ rtx body = PATTERN (insn);
+ if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
+ return 1;
+ if (GET_CODE (body) != PARALLEL)
+ return 0;
+ rtx set = XVECEXP (body, 0, 0);
+ if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
+ return 1;
+ return 0;
}
+/* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
+ a permuting load, or a permuting store. */
+static unsigned int
+insn_is_swap_p (rtx insn)
+{
+ rtx body = PATTERN (insn);
+ if (GET_CODE (body) != SET)
+ return 0;
+ rtx rhs = SET_SRC (body);
+ if (GET_CODE (rhs) != VEC_SELECT)
+ return 0;
+ rtx parallel = XEXP (rhs, 1);
+ if (GET_CODE (parallel) != PARALLEL)
+ return 0;
+ unsigned int len = XVECLEN (parallel, 0);
+ if (len != 2 && len != 4 && len != 8 && len != 16)
+ return 0;
+ for (unsigned int i = 0; i < len / 2; ++i)
+ {
+ rtx op = XVECEXP (parallel, 0, i);
+ if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
+ return 0;
+ }
+ for (unsigned int i = len / 2; i < len; ++i)
+ {
+ rtx op = XVECEXP (parallel, 0, i);
+ if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
+ return 0;
+ }
+ return 1;
+}
+
+/* Return 1 iff OP is an operand that will not be affected by having
+ vector doublewords swapped in memory. */
+static unsigned int
+rtx_is_swappable_p (rtx op, unsigned int *special)
+{
+ enum rtx_code code = GET_CODE (op);
+ int i, j;
+ rtx parallel;
+
+ switch (code)
+ {
+ case LABEL_REF:
+ case SYMBOL_REF:
+ case CLOBBER:
+ case REG:
+ return 1;
+
+ case VEC_CONCAT:
+ case ASM_INPUT:
+ case ASM_OPERANDS:
+ return 0;
+
+ case CONST_VECTOR:
+ {
+ *special = SH_CONST_VECTOR;
+ return 1;
+ }
+
+ case VEC_DUPLICATE:
+ /* Opportunity: If XEXP (op, 0) has the same mode as the result,
+ and XEXP (op, 1) is a PARALLEL with a single QImode const int,
+ it represents a vector splat for which we can do special
+ handling. */
+ if (GET_CODE (XEXP (op, 0)) == CONST_INT)
+ return 1;
+ else if (GET_CODE (XEXP (op, 0)) == REG
+ && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
+ /* This catches V2DF and V2DI splat, at a minimum. */
+ return 1;
+ else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
+ /* If the duplicated item is from a select, defer to the select
+ processing to see if we can change the lane for the splat. */
+ return rtx_is_swappable_p (XEXP (op, 0), special);
+ else
+ return 0;
+
+ case VEC_SELECT:
+ /* A vec_extract operation is ok if we change the lane. */
+ if (GET_CODE (XEXP (op, 0)) == REG
+ && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
+ && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+ && XVECLEN (parallel, 0) == 1
+ && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
+ {
+ *special = SH_EXTRACT;
+ return 1;
+ }
+ else
+ return 0;
+
+ case UNSPEC:
+ {
+ /* Various operations are unsafe for this optimization, at least
+ without significant additional work. Permutes are obviously
+ problematic, as both the permute control vector and the ordering
+ of the target values are invalidated by doubleword swapping.
+ Vector pack and unpack modify the number of vector lanes.
+ Merge-high/low will not operate correctly on swapped operands.
+ Vector shifts across element boundaries are clearly uncool,
+ as are vector select and concatenate operations. Vector
+ sum-across instructions define one operand with a specific
+ order-dependent element, so additional fixup code would be
+ needed to make those work. Vector set and non-immediate-form
+ vector splat are element-order sensitive. A few of these
+ cases might be workable with special handling if required. */
+ int val = XINT (op, 1);
+ switch (val)
+ {
+ default:
+ break;
+ case UNSPEC_VMRGH_DIRECT:
+ case UNSPEC_VMRGL_DIRECT:
+ case UNSPEC_VPACK_SIGN_SIGN_SAT:
+ case UNSPEC_VPACK_SIGN_UNS_SAT:
+ case UNSPEC_VPACK_UNS_UNS_MOD:
+ case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
+ case UNSPEC_VPACK_UNS_UNS_SAT:
+ case UNSPEC_VPERM:
+ case UNSPEC_VPERM_UNS:
+ case UNSPEC_VPERMHI:
+ case UNSPEC_VPERMSI:
+ case UNSPEC_VPKPX:
+ case UNSPEC_VSLDOI:
+ case UNSPEC_VSLO:
+ case UNSPEC_VSRO:
+ case UNSPEC_VSUM2SWS:
+ case UNSPEC_VSUM4S:
+ case UNSPEC_VSUM4UBS:
+ case UNSPEC_VSUMSWS:
+ case UNSPEC_VSUMSWS_DIRECT:
+ case UNSPEC_VSX_CONCAT:
+ case UNSPEC_VSX_SET:
+ case UNSPEC_VSX_SLDWI:
+ case UNSPEC_VUNPACK_HI_SIGN:
+ case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
+ case UNSPEC_VUNPACK_LO_SIGN:
+ case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
+ case UNSPEC_VUPKHPX:
+ case UNSPEC_VUPKHS_V4SF:
+ case UNSPEC_VUPKHU_V4SF:
+ case UNSPEC_VUPKLPX:
+ case UNSPEC_VUPKLS_V4SF:
+ case UNSPEC_VUPKLU_V4SF:
+ /* The following could be handled as an idiom with XXSPLTW.
+ These place a scalar in BE element zero, but the XXSPLTW
+ will currently expect it in BE element 2 in a swapped
+ region. When one of these feeds an XXSPLTW with no other
+ defs/uses either way, we can avoid the lane change for
+ XXSPLTW and things will be correct. TBD. */
+ case UNSPEC_VSX_CVDPSPN:
+ case UNSPEC_VSX_CVSPDP:
+ case UNSPEC_VSX_CVSPDPN:
+ return 0;
+ case UNSPEC_VSPLT_DIRECT:
+ *special = SH_SPLAT;
+ return 1;
+ }
+ }
+
+ default:
+ break;
+ }
+
+ const char *fmt = GET_RTX_FORMAT (code);
+ int ok = 1;
+
+ for (i = 0; i < GET_RTX_LENGTH (code); ++i)
+ if (fmt[i] == 'e' || fmt[i] == 'u')
+ {
+ unsigned int special_op = SH_NONE;
+ ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
+ /* Ensure we never have two kinds of special handling
+ for the same insn. */
+ if (*special != SH_NONE && special_op != SH_NONE
+ && *special != special_op)
+ return 0;
+ *special = special_op;
+ }
+ else if (fmt[i] == 'E')
+ for (j = 0; j < XVECLEN (op, i); ++j)
+ {
+ unsigned int special_op = SH_NONE;
+ ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
+ /* Ensure we never have two kinds of special handling
+ for the same insn. */
+ if (*special != SH_NONE && special_op != SH_NONE
+ && *special != special_op)
+ return 0;
+ *special = special_op;
+ }
+
+ return ok;
+}
+
+/* Return 1 iff INSN is an operand that will not be affected by
+ having vector doublewords swapped in memory (in which case
+ *SPECIAL is unchanged), or that can be modified to be correct
+ if vector doublewords are swapped in memory (in which case
+ *SPECIAL is changed to a value indicating how). */
+static unsigned int
+insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
+ unsigned int *special)
+{
+ /* Calls are always bad. */
+ if (GET_CODE (insn) == CALL_INSN)
+ return 0;
+
+ /* Loads and stores seen here are not permuting, but we can still
+ fix them up by converting them to permuting ones. Exceptions:
+ UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
+ body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
+ for the SET source. */
+ rtx body = PATTERN (insn);
+ int i = INSN_UID (insn);
+
+ if (insn_entry[i].is_load)
+ {
+ if (GET_CODE (body) == SET)
+ {
+ *special = SH_NOSWAP_LD;
+ return 1;
+ }
+ else
+ return 0;
+ }
+
+ if (insn_entry[i].is_store)
+ {
+ if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
+ {
+ *special = SH_NOSWAP_ST;
+ return 1;
+ }
+ else
+ return 0;
+ }
+
+ /* Otherwise check the operands for vector lane violations. */
+ return rtx_is_swappable_p (body, special);
+}
+
+enum chain_purpose { FOR_LOADS, FOR_STORES };
+
+/* Return true if the UD or DU chain headed by LINK is non-empty,
+ and every entry on the chain references an insn that is a
+ register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
+ register swap must have only permuting loads as reaching defs.
+ If PURPOSE is FOR_STORES, each such register swap must have only
+ register swaps or permuting stores as reached uses. */
+static bool
+chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
+ enum chain_purpose purpose)
+{
+ if (!link)
+ return false;
+
+ for (; link; link = link->next)
+ {
+ if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
+ continue;
+
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ return false;
+
+ rtx reached_insn = DF_REF_INSN (link->ref);
+ unsigned uid = INSN_UID (reached_insn);
+
+ if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
+ || insn_entry[uid].is_store)
+ return false;
+
+ if (purpose == FOR_LOADS)
+ {
+ df_ref *use_rec;
+ for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
+ {
+ df_ref use = *use_rec;
+ struct df_link *swap_link = DF_REF_CHAIN (use);
+
+ while (swap_link)
+ {
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ return false;
+
+ rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
+ unsigned uid2 = INSN_UID (swap_def_insn);
+
+ /* Only permuting loads are allowed. */
+ if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
+ return false;
+
+ swap_link = swap_link->next;
+ }
+ }
+ }
+ else if (purpose == FOR_STORES)
+ {
+ df_ref *def_rec;
+ for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+ {
+ df_ref def = *def_rec;
+ struct df_link *swap_link = DF_REF_CHAIN (def);
+
+ while (swap_link)
+ {
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ return false;
+
+ rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
+ unsigned uid2 = INSN_UID (swap_use_insn);
+
+ /* Permuting stores or register swaps are allowed. */
+ if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
+ return false;
+
+ swap_link = swap_link->next;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/* Mark the xxswapdi instructions associated with permuting loads and
+ stores for removal. Note that we only flag them for deletion here,
+ as there is a possibility of a swap being reached from multiple
+ loads, etc. */
+static void
+mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
+{
+ rtx insn = insn_entry[i].insn;
+ unsigned uid = INSN_UID (insn);
+
+ if (insn_entry[i].is_load)
+ {
+ df_ref *def_rec;
+ for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+ {
+ df_ref def = *def_rec;
+ struct df_link *link = DF_REF_CHAIN (def);
+
+ /* We know by now that these are swaps, so we can delete
+ them confidently. */
+ while (link)
+ {
+ rtx use_insn = DF_REF_INSN (link->ref);
+ insn_entry[INSN_UID (use_insn)].will_delete = 1;
+ link = link->next;
+ }
+ }
+ }
+ else if (insn_entry[i].is_store)
+ {
+ df_ref *use_rec;
+ for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
+ {
+ df_ref use = *use_rec;
+ /* Ignore uses for addressability. */
+ machine_mode mode = GET_MODE (DF_REF_REG (use));
+ if (!VECTOR_MODE_P (mode))
+ continue;
+
+ struct df_link *link = DF_REF_CHAIN (use);
+
+ /* We know by now that these are swaps, so we can delete
+ them confidently. */
+ while (link)
+ {
+ rtx def_insn = DF_REF_INSN (link->ref);
+ insn_entry[INSN_UID (def_insn)].will_delete = 1;
+ link = link->next;
+ }
+ }
+ }
+}
+
+/* OP is either a CONST_VECTOR or an expression containing one.
+ Swap the first half of the vector with the second in the first
+ case. Recurse to find it in the second. */
+static void
+swap_const_vector_halves (rtx op)
+{
+ int i;
+ enum rtx_code code = GET_CODE (op);
+ if (GET_CODE (op) == CONST_VECTOR)
+ {
+ int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
+ for (i = 0; i < half_units; ++i)
+ {
+ rtx temp = CONST_VECTOR_ELT (op, i);
+ CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
+ CONST_VECTOR_ELT (op, i + half_units) = temp;
+ }
+ }
+ else
+ {
+ int j;
+ const char *fmt = GET_RTX_FORMAT (code);
+ for (i = 0; i < GET_RTX_LENGTH (code); ++i)
+ if (fmt[i] == 'e' || fmt[i] == 'u')
+ swap_const_vector_halves (XEXP (op, i));
+ else if (fmt[i] == 'E')
+ for (j = 0; j < XVECLEN (op, i); ++j)
+ swap_const_vector_halves (XVECEXP (op, i, j));
+ }
+}
+
+/* Find all subregs of a vector expression that perform a narrowing,
+ and adjust the subreg index to account for doubleword swapping. */
+static void
+adjust_subreg_index (rtx op)
+{
+ enum rtx_code code = GET_CODE (op);
+ if (code == SUBREG
+ && (GET_MODE_SIZE (GET_MODE (op))
+ < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
+ {
+ unsigned int index = SUBREG_BYTE (op);
+ if (index < 8)
+ index += 8;
+ else
+ index -= 8;
+ SUBREG_BYTE (op) = index;
+ }
+
+ const char *fmt = GET_RTX_FORMAT (code);
+ int i,j;
+ for (i = 0; i < GET_RTX_LENGTH (code); ++i)
+ if (fmt[i] == 'e' || fmt[i] == 'u')
+ adjust_subreg_index (XEXP (op, i));
+ else if (fmt[i] == 'E')
+ for (j = 0; j < XVECLEN (op, i); ++j)
+ adjust_subreg_index (XVECEXP (op, i, j));
+}
+
+/* Convert the non-permuting load INSN to a permuting one. */
+static void
+permute_load (rtx insn)
+{
+ rtx body = PATTERN (insn);
+ rtx mem_op = SET_SRC (body);
+ rtx tgt_reg = SET_DEST (body);
+ machine_mode mode = GET_MODE (tgt_reg);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int half_elts = n_elts / 2;
+ rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
+ int i, j;
+ for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
+ XVECEXP (par, 0, i) = GEN_INT (j);
+ for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
+ XVECEXP (par, 0, i) = GEN_INT (j);
+ rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
+ SET_SRC (body) = sel;
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Replacing load %d with permuted load\n",
+ INSN_UID (insn));
+}
+
+/* Convert the non-permuting store INSN to a permuting one. */
+static void
+permute_store (rtx insn)
+{
+ rtx body = PATTERN (insn);
+ rtx src_reg = SET_SRC (body);
+ machine_mode mode = GET_MODE (src_reg);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int half_elts = n_elts / 2;
+ rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
+ int i, j;
+ for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
+ XVECEXP (par, 0, i) = GEN_INT (j);
+ for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
+ XVECEXP (par, 0, i) = GEN_INT (j);
+ rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
+ SET_SRC (body) = sel;
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Replacing store %d with permuted store\n",
+ INSN_UID (insn));
+}
+
+/* Given OP that contains a vector extract operation, adjust the index
+ of the extracted lane to account for the doubleword swap. */
+static void
+adjust_extract (rtx insn)
+{
+ rtx src = SET_SRC (PATTERN (insn));
+ /* The vec_select may be wrapped in a vec_duplicate for a splat, so
+ account for that. */
+ rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
+ rtx par = XEXP (sel, 1);
+ int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
+ int lane = INTVAL (XVECEXP (par, 0, 0));
+ lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
+ XVECEXP (par, 0, 0) = GEN_INT (lane);
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
+}
+
+/* Given OP that contains a vector direct-splat operation, adjust the index
+ of the source lane to account for the doubleword swap. */
+static void
+adjust_splat (rtx insn)
+{
+ rtx body = PATTERN (insn);
+ rtx unspec = XEXP (body, 1);
+ int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
+ int lane = INTVAL (XVECEXP (unspec, 0, 1));
+ lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
+ XVECEXP (unspec, 0, 1) = GEN_INT (lane);
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
+}
+
+/* The insn described by INSN_ENTRY[I] can be swapped, but only
+ with special handling. Take care of that here. */
+static void
+handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
+{
+ rtx insn = insn_entry[i].insn;
+ rtx body = PATTERN (insn);
+
+ switch (insn_entry[i].special_handling)
+ {
+ default:
+ gcc_unreachable ();
+ case SH_CONST_VECTOR:
+ {
+ /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
+ gcc_assert (GET_CODE (body) == SET);
+ rtx rhs = SET_SRC (body);
+ swap_const_vector_halves (rhs);
+ if (dump_file)
+ fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
+ break;
+ }
+ case SH_SUBREG:
+ /* A subreg of the same size is already safe. For subregs that
+ select a smaller portion of a reg, adjust the index for
+ swapped doublewords. */
+ adjust_subreg_index (body);
+ if (dump_file)
+ fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
+ break;
+ case SH_NOSWAP_LD:
+ /* Convert a non-permuting load to a permuting one. */
+ permute_load (insn);
+ break;
+ case SH_NOSWAP_ST:
+ /* Convert a non-permuting store to a permuting one. */
+ permute_store (insn);
+ break;
+ case SH_EXTRACT:
+ /* Change the lane on an extract operation. */
+ adjust_extract (insn);
+ break;
+ case SH_SPLAT:
+ /* Change the lane on a direct-splat operation. */
+ adjust_splat (insn);
+ break;
+ }
+}
+
+/* Find the insn from the Ith table entry, which is known to be a
+ register swap Y = SWAP(X). Replace it with a copy Y = X. */
+static void
+replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
+{
+ rtx insn = insn_entry[i].insn;
+ rtx body = PATTERN (insn);
+ rtx src_reg = XEXP (SET_SRC (body), 0);
+ rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
+ rtx new_insn = emit_insn_before (copy, insn);
+ set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
+ df_insn_rescan (new_insn);
+
+ if (dump_file)
+ {
+ unsigned int new_uid = INSN_UID (new_insn);
+ fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
+ }
+
+ df_insn_delete (insn);
+ remove_insn (insn);
+ INSN_DELETED_P (insn) = 1;
+}
+
+/* Dump the swap table to DUMP_FILE. */
+static void
+dump_swap_insn_table (swap_web_entry *insn_entry)
+{
+ int e = get_max_uid ();
+ fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
+
+ for (int i = 0; i < e; ++i)
+ if (insn_entry[i].is_relevant)
+ {
+ swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
+ fprintf (dump_file, "%6d %6d ", i,
+ pred_entry && pred_entry->insn
+ ? INSN_UID (pred_entry->insn) : 0);
+ if (insn_entry[i].is_load)
+ fputs ("load ", dump_file);
+ if (insn_entry[i].is_store)
+ fputs ("store ", dump_file);
+ if (insn_entry[i].is_swap)
+ fputs ("swap ", dump_file);
+ if (insn_entry[i].is_live_in)
+ fputs ("live-in ", dump_file);
+ if (insn_entry[i].is_live_out)
+ fputs ("live-out ", dump_file);
+ if (insn_entry[i].contains_subreg)
+ fputs ("subreg ", dump_file);
+ if (insn_entry[i].is_128_int)
+ fputs ("int128 ", dump_file);
+ if (insn_entry[i].is_call)
+ fputs ("call ", dump_file);
+ if (insn_entry[i].is_swappable)
+ {
+ fputs ("swappable ", dump_file);
+ if (insn_entry[i].special_handling == SH_CONST_VECTOR)
+ fputs ("special:constvec ", dump_file);
+ else if (insn_entry[i].special_handling == SH_SUBREG)
+ fputs ("special:subreg ", dump_file);
+ else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
+ fputs ("special:load ", dump_file);
+ else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
+ fputs ("special:store ", dump_file);
+ else if (insn_entry[i].special_handling == SH_EXTRACT)
+ fputs ("special:extract ", dump_file);
+ else if (insn_entry[i].special_handling == SH_SPLAT)
+ fputs ("special:splat ", dump_file);
+ }
+ if (insn_entry[i].web_not_optimizable)
+ fputs ("unoptimizable ", dump_file);
+ if (insn_entry[i].will_delete)
+ fputs ("delete ", dump_file);
+ fputs ("\n", dump_file);
+ }
+ fputs ("\n", dump_file);
+}
+
+/* Main entry point for this pass. */
+unsigned int
+rs6000_analyze_swaps (function *fun)
+{
+ swap_web_entry *insn_entry;
+ basic_block bb;
+ rtx insn;
+
+ /* Dataflow analysis for use-def chains. */
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+ df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+ df_analyze ();
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+
+ /* Allocate structure to represent webs of insns. */
+ insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
+
+ /* Walk the insns to gather basic data. */
+ FOR_ALL_BB_FN (bb, fun)
+ FOR_BB_INSNS (bb, insn)
+ {
+ unsigned int uid = INSN_UID (insn);
+ if (NONDEBUG_INSN_P (insn))
+ {
+ insn_entry[uid].insn = insn;
+
+ if (GET_CODE (insn) == CALL_INSN)
+ insn_entry[uid].is_call = 1;
+
+ /* Walk the uses and defs to see if we mention vector regs.
+ Record any constraints on optimization of such mentions. */
+ df_ref *use_rec;
+ for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
+ {
+ df_ref mention = *use_rec;
+ /* We use DF_REF_REAL_REG here to get inside any subregs. */
+ machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
+
+ /* If a use gets its value from a call insn, it will be
+ a hard register and will look like (reg:V4SI 3 3).
+ The df analysis creates two mentions for GPR3 and GPR4,
+ both DImode. We must recognize this and treat it as a
+ vector mention to ensure the call is unioned with this
+ use. */
+ if (mode == DImode && DF_REF_INSN_INFO (mention))
+ {
+ rtx feeder = DF_REF_INSN (mention);
+ /* FIXME: It is pretty hard to get from the df mention
+ to the mode of the use in the insn. We arbitrarily
+ pick a vector mode here, even though the use might
+ be a real DImode. We can be too conservative
+ (create a web larger than necessary) because of
+ this, so consider eventually fixing this. */
+ if (GET_CODE (feeder) == CALL_INSN)
+ mode = V4SImode;
+ }
+
+ if (VECTOR_MODE_P (mode) || mode == TImode)
+ {
+ insn_entry[uid].is_relevant = 1;
+ if (mode == TImode || mode == V1TImode)
+ insn_entry[uid].is_128_int = 1;
+ if (DF_REF_INSN_INFO (mention))
+ insn_entry[uid].contains_subreg
+ = !rtx_equal_p (DF_REF_REG (mention),
+ DF_REF_REAL_REG (mention));
+ union_defs (insn_entry, insn, mention);
+ }
+ }
+ df_ref *def_rec;
+ for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+ {
+ df_ref mention = *def_rec;
+ /* We use DF_REF_REAL_REG here to get inside any subregs. */
+ machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
+
+ /* If we're loading up a hard vector register for a call,
+ it looks like (set (reg:V4SI 9 9) (...)). The df
+ analysis creates two mentions for GPR9 and GPR10, both
+ DImode. So relying on the mode from the mentions
+ isn't sufficient to ensure we union the call into the
+ web with the parameter setup code. */
+ if (mode == DImode && GET_CODE (insn) == SET
+ && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
+ mode = GET_MODE (SET_DEST (insn));
+
+ if (VECTOR_MODE_P (mode) || mode == TImode)
+ {
+ insn_entry[uid].is_relevant = 1;
+ if (mode == TImode || mode == V1TImode)
+ insn_entry[uid].is_128_int = 1;
+ if (DF_REF_INSN_INFO (mention))
+ insn_entry[uid].contains_subreg
+ = !rtx_equal_p (DF_REF_REG (mention),
+ DF_REF_REAL_REG (mention));
+ /* REG_FUNCTION_VALUE_P is not valid for subregs. */
+ else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
+ insn_entry[uid].is_live_out = 1;
+ union_uses (insn_entry, insn, mention);
+ }
+ }
+
+ if (insn_entry[uid].is_relevant)
+ {
+ /* Determine if this is a load or store. */
+ insn_entry[uid].is_load = insn_is_load_p (insn);
+ insn_entry[uid].is_store = insn_is_store_p (insn);
+
+ /* Determine if this is a doubleword swap. If not,
+ determine whether it can legally be swapped. */
+ if (insn_is_swap_p (insn))
+ insn_entry[uid].is_swap = 1;
+ else
+ {
+ unsigned int special = SH_NONE;
+ insn_entry[uid].is_swappable
+ = insn_is_swappable_p (insn_entry, insn, &special);
+ if (special != SH_NONE && insn_entry[uid].contains_subreg)
+ insn_entry[uid].is_swappable = 0;
+ else if (special != SH_NONE)
+ insn_entry[uid].special_handling = special;
+ else if (insn_entry[uid].contains_subreg)
+ insn_entry[uid].special_handling = SH_SUBREG;
+ }
+ }
+ }
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nSwap insn entry table when first built\n");
+ dump_swap_insn_table (insn_entry);
+ }
+
+ /* Record unoptimizable webs. */
+ unsigned e = get_max_uid (), i;
+ for (i = 0; i < e; ++i)
+ {
+ if (!insn_entry[i].is_relevant)
+ continue;
+
+ swap_web_entry *root
+ = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
+ unsigned uid = INSN_UID (insn_entry[i].insn);
+
+ if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
+ || (insn_entry[i].contains_subreg
+ && insn_entry[i].special_handling != SH_SUBREG)
+ || insn_entry[i].is_128_int || insn_entry[i].is_call
+ || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
+ root->web_not_optimizable = 1;
+
+ /* If we have loads or stores that aren't permuting then the
+ optimization isn't appropriate. */
+ else if ((insn_entry[i].is_load || insn_entry[i].is_store)
+ && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
+ root->web_not_optimizable = 1;
+
+ /* If we have permuting loads or stores that are not accompanied
+ by a register swap, the optimization isn't appropriate. */
+ else if (insn_entry[i].is_load && insn_entry[i].is_swap)
+ {
+ df_ref *def_rec;
+
+ for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+ {
+ df_ref def = *def_rec;
+ struct df_link *link = DF_REF_CHAIN (def);
+
+ if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
+ {
+ root->web_not_optimizable = 1;
+ break;
+ }
+ }
+ }
+ else if (insn_entry[i].is_store && insn_entry[i].is_swap)
+ {
+ df_ref *use_rec;
+
+ for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
+ {
+ df_ref use = *use_rec;
+ struct df_link *link = DF_REF_CHAIN (use);
+
+ if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
+ {
+ root->web_not_optimizable = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
+ dump_swap_insn_table (insn_entry);
+ }
+
+ /* For each load and store in an optimizable web (which implies
+ the loads and stores are permuting), find the associated
+ register swaps and mark them for removal. Due to various
+ optimizations we may mark the same swap more than once. Also
+ perform special handling for swappable insns that require it. */
+ for (i = 0; i < e; ++i)
+ if ((insn_entry[i].is_load || insn_entry[i].is_store)
+ && insn_entry[i].is_swap)
+ {
+ swap_web_entry* root_entry
+ = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
+ if (!root_entry->web_not_optimizable)
+ mark_swaps_for_removal (insn_entry, i);
+ }
+ else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
+ {
+ swap_web_entry* root_entry
+ = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
+ if (!root_entry->web_not_optimizable)
+ handle_special_swappables (insn_entry, i);
+ }
+
+ /* Now delete the swaps marked for removal. */
+ for (i = 0; i < e; ++i)
+ if (insn_entry[i].will_delete)
+ replace_swap_with_copy (insn_entry, i);
+
+ /* Clean up. */
+ free (insn_entry);
+ return 0;
+}
+
+const pass_data pass_data_analyze_swaps =
+{
+ RTL_PASS, /* type */
+ "swaps", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ true, /* has_gate */
+ true, /* has_execute */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_analyze_swaps : public rtl_opt_pass
+{
+public:
+ pass_analyze_swaps(gcc::context *ctxt)
+ : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate ()
+ {
+ return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
+ && rs6000_optimize_swaps);
+ }
+
+ unsigned int execute ()
+ {
+ return rs6000_analyze_swaps (cfun);
+ }
+
+}; // class pass_analyze_swaps
+
+rtl_opt_pass *
+make_pass_analyze_swaps (gcc::context *ctxt)
+{
+ return new pass_analyze_swaps (ctxt);
+}
struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.md b/gcc-4.9/gcc/config/rs6000/rs6000.md
index d078491e1..f77754aa1 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.md
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.md
@@ -137,6 +137,7 @@
UNSPEC_UNPACK_128BIT
UNSPEC_PACK_128BIT
UNSPEC_LSQ
+ UNSPEC_FUSION_GPR
])
;;
@@ -328,8 +329,25 @@
(define_mode_attr f32_sv [(SF "stxsspx %x1,%y0") (SD "stxsiwzx %x1,%y0")])
; Definitions for 32-bit fpr direct move
+; At present, the decimal modes are not allowed in the traditional altivec
+; registers, so restrict the constraints to just the traditional FPRs.
(define_mode_attr f32_dm [(SF "wn") (SD "wh")])
+; Definitions for 32-bit VSX
+(define_mode_attr f32_vsx [(SF "ww") (SD "wn")])
+
+; Definitions for 32-bit use of altivec registers
+(define_mode_attr f32_av [(SF "wu") (SD "wn")])
+
+; Definitions for 64-bit VSX
+(define_mode_attr f64_vsx [(DF "ws") (DD "wn")])
+
+; Definitions for 64-bit direct move
+(define_mode_attr f64_dm [(DF "wk") (DD "wh")])
+
+; Definitions for 64-bit use of altivec registers
+(define_mode_attr f64_av [(DF "wv") (DD "wn")])
+
; These modes do not fit in integer registers in 32-bit mode.
; but on e500v2, the gpr are 64 bit registers
(define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
@@ -435,7 +453,7 @@
;; either.
;; Mode attribute for boolean operation register constraints for output
-(define_mode_attr BOOL_REGS_OUTPUT [(TI "&r,r,r,wa,v")
+(define_mode_attr BOOL_REGS_OUTPUT [(TI "&r,r,r,wt,v")
(PTI "&r,r,r")
(V16QI "wa,v,&?r,?r,?r")
(V8HI "wa,v,&?r,?r,?r")
@@ -446,7 +464,7 @@
(V1TI "wa,v,&?r,?r,?r")])
;; Mode attribute for boolean operation register constraints for operand1
-(define_mode_attr BOOL_REGS_OP1 [(TI "r,0,r,wa,v")
+(define_mode_attr BOOL_REGS_OP1 [(TI "r,0,r,wt,v")
(PTI "r,0,r")
(V16QI "wa,v,r,0,r")
(V8HI "wa,v,r,0,r")
@@ -457,7 +475,7 @@
(V1TI "wa,v,r,0,r")])
;; Mode attribute for boolean operation register constraints for operand2
-(define_mode_attr BOOL_REGS_OP2 [(TI "r,r,0,wa,v")
+(define_mode_attr BOOL_REGS_OP2 [(TI "r,r,0,wt,v")
(PTI "r,r,0")
(V16QI "wa,v,r,r,0")
(V8HI "wa,v,r,r,0")
@@ -470,7 +488,7 @@
;; Mode attribute for boolean operation register constraints for operand1
;; for one_cmpl. To simplify things, we repeat the constraint where 0
;; is used for operand1 or operand2
-(define_mode_attr BOOL_REGS_UNARY [(TI "r,0,0,wa,v")
+(define_mode_attr BOOL_REGS_UNARY [(TI "r,0,0,wt,v")
(PTI "r,0,0")
(V16QI "wa,v,r,0,0")
(V8HI "wa,v,r,0,0")
@@ -8582,8 +8600,8 @@
[(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>")
(match_operator:BOOL_128 3 "boolean_operator"
[(not:BOOL_128
- (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP1>"))
- (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP2>")]))]
+ (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))
+ (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_OP1>")]))]
"TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)"
{
if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
@@ -8598,7 +8616,7 @@
&& reload_completed && int_reg_operand (operands[0], <MODE>mode)"
[(const_int 0)]
{
- rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, true,
NULL_RTX);
DONE;
}
@@ -8620,14 +8638,14 @@
[(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r")
(match_operator:TI2 3 "boolean_operator"
[(not:TI2
- (match_operand:TI2 1 "int_reg_operand" "r,0,r"))
- (match_operand:TI2 2 "int_reg_operand" "r,r,0")]))]
+ (match_operand:TI2 2 "int_reg_operand" "r,0,r"))
+ (match_operand:TI2 1 "int_reg_operand" "r,r,0")]))]
"!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
"#"
"reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)"
[(const_int 0)]
{
- rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false,
+ rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, true,
NULL_RTX);
DONE;
}
@@ -9188,8 +9206,8 @@
}")
(define_insn "mov<mode>_hardfloat"
- [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wu,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
- (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wu,r,<f32_dm>,r,h,0,G,Fn"))]
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,<f32_vsx>,<f32_vsx>,<f32_lr>,<f32_sm>,<f32_av>,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+ (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,<f32_vsx>,j,<f32_lm>,<f32_sr>,Z,<f32_av>,r,<f32_dm>,r, h, 0, G,Fn"))]
"(gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))
&& (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -9390,8 +9408,8 @@
;; reloading.
(define_insn "*mov<mode>_hardfloat32"
- [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r")
- (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))]
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_av>,Z,<f64_vsx>,<f64_vsx>,Y,r,!r,!r,!r,!r")
+ (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,<f64_av>,<f64_vsx>,j,r,Y,r,G,H,F"))]
"! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -9459,8 +9477,8 @@
; ld/std require word-aligned displacements -> 'Y' constraint.
; List Y->r and r->Y before r->r for reload.
(define_insn "*mov<mode>_hardfloat64"
- [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wk")
- (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wk,r"))]
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,<f64_av>,Z,<f64_vsx>,<f64_vsx>,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,<f64_dm>")
+ (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,<f64_av>,<f64_vsx>,j,r,Y,r,r,h,0,G,H,F,wg,r,<f64_dm>,r"))]
"TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -15714,22 +15732,9 @@
;; a GPR. The addis instruction must be adjacent to the load, and use the same
;; register that is being loaded. The fused ops must be physically adjacent.
-;; We use define_peephole for the actual addis/load, and the register used to
-;; hold the addis value must be the same as the register being loaded. We use
-;; define_peephole2 to change the register used for addis to be the register
-;; being loaded, since we can look at whether it is dead after the load insn.
-
-(define_peephole
- [(set (match_operand:P 0 "base_reg_operand" "")
- (match_operand:P 1 "fusion_gpr_addis" ""))
- (set (match_operand:INT1 2 "base_reg_operand" "")
- (match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
- "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)"
-{
- return emit_fusion_gpr_load (operands);
-}
- [(set_attr "type" "load")
- (set_attr "length" "8")])
+;; Find cases where the addis that feeds into a load instruction is either used
+;; once or is the same as the target register, and replace it with the fusion
+;; insn
(define_peephole2
[(set (match_operand:P 0 "base_reg_operand" "")
@@ -15737,15 +15742,28 @@
(set (match_operand:INT1 2 "base_reg_operand" "")
(match_operand:INT1 3 "fusion_gpr_mem_load" ""))]
"TARGET_P8_FUSION
- && (REGNO (operands[0]) != REGNO (operands[2])
- || GET_CODE (operands[3]) == SIGN_EXTEND)
- && fusion_gpr_load_p (operands, true)"
+ && fusion_gpr_load_p (operands[0], operands[1], operands[2],
+ operands[3])"
[(const_int 0)]
{
expand_fusion_gpr_load (operands);
DONE;
})
+;; Fusion insn, created by the define_peephole2 above (and eventually by
+;; reload)
+
+(define_insn "fusion_gpr_load_<mode>"
+ [(set (match_operand:INT1 0 "base_reg_operand" "=&b")
+ (unspec:INT1 [(match_operand:INT1 1 "fusion_gpr_mem_combo" "")]
+ UNSPEC_FUSION_GPR))]
+ "TARGET_P8_FUSION"
+{
+ return emit_fusion_gpr_load (operands[0], operands[1]);
+}
+ [(set_attr "type" "load")
+ (set_attr "length" "8")])
+
;; Miscellaneous ISA 2.06 (power7) instructions
(define_insn "addg6s"
diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.opt b/gcc-4.9/gcc/config/rs6000/rs6000.opt
index 4c1a02a52..4d0d5e73d 100644
--- a/gcc-4.9/gcc/config/rs6000/rs6000.opt
+++ b/gcc-4.9/gcc/config/rs6000/rs6000.opt
@@ -588,3 +588,7 @@ Allow double variables in upper registers with -mcpu=power7 or -mvsx
mupper-regs-sf
Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
Allow float variables in upper registers with -mcpu=power8 or -mp8-vector
+
+moptimize-swaps
+Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
+Analyze and remove doubleword swaps from VSX computations.
diff --git a/gcc-4.9/gcc/config/rs6000/rtems.h b/gcc-4.9/gcc/config/rs6000/rtems.h
index 2402d5336..046488034 100644
--- a/gcc-4.9/gcc/config/rs6000/rtems.h
+++ b/gcc-4.9/gcc/config/rs6000/rtems.h
@@ -52,7 +52,8 @@
%{mcpu=750: %{!Dppc*: %{!Dmpc*: -Dmpc750} } } \
%{mcpu=821: %{!Dppc*: %{!Dmpc*: -Dmpc821} } } \
%{mcpu=860: %{!Dppc*: %{!Dmpc*: -Dmpc860} } } \
-%{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540} } }"
+%{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540} } } \
+%{mcpu=e6500: -D__PPC_CPU_E6500__}"
#undef SUBSUBTARGET_EXTRA_SPECS
#define SUBSUBTARGET_EXTRA_SPECS \
diff --git a/gcc-4.9/gcc/config/rs6000/sysv4.h b/gcc-4.9/gcc/config/rs6000/sysv4.h
index 7cc543319..9d456ddec 100644
--- a/gcc-4.9/gcc/config/rs6000/sysv4.h
+++ b/gcc-4.9/gcc/config/rs6000/sysv4.h
@@ -846,11 +846,6 @@ ncrtn.o%s"
#define CPP_OS_OPENBSD_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}"
#endif
-/* These may be provided by rs6000/linux-grtev2.h. */
-#ifndef LINUX_GRTE_EXTRA_SPECS
-#define LINUX_GRTE_EXTRA_SPECS
-#endif
-
/* Define any extra SPECS that the compiler needs to generate. */
/* Override rs6000.h definition. */
#undef SUBTARGET_EXTRA_SPECS
@@ -916,7 +911,6 @@ ncrtn.o%s"
{ "cpp_os_openbsd", CPP_OS_OPENBSD_SPEC }, \
{ "cpp_os_default", CPP_OS_DEFAULT_SPEC }, \
{ "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }, \
- LINUX_GRTE_EXTRA_SPECS \
SUBSUBTARGET_EXTRA_SPECS
#define SUBSUBTARGET_EXTRA_SPECS
diff --git a/gcc-4.9/gcc/config/rs6000/t-rtems b/gcc-4.9/gcc/config/rs6000/t-rtems
index 426f75ac5..eadda0d20 100644
--- a/gcc-4.9/gcc/config/rs6000/t-rtems
+++ b/gcc-4.9/gcc/config/rs6000/t-rtems
@@ -18,16 +18,24 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-MULTILIB_OPTIONS = \
-mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540 \
-msoft-float/mfloat-gprs=double
+MULTILIB_OPTIONS =
+MULTILIB_DIRNAMES =
+MULTILIB_MATCHES =
+MULTILIB_EXCEPTIONS =
+MULTILIB_REQUIRED =
+
+MULTILIB_OPTIONS += mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540/mcpu=e6500
+MULTILIB_DIRNAMES += m403 m505 m603e m604 m860 m7400 m8540 me6500
+
+MULTILIB_OPTIONS += m32
+MULTILIB_DIRNAMES += m32
-MULTILIB_DIRNAMES = \
-m403 m505 m603e m604 m860 m7400 m8540 \
-nof gprsdouble
+MULTILIB_OPTIONS += msoft-float/mfloat-gprs=double
+MULTILIB_DIRNAMES += nof gprsdouble
+
+MULTILIB_OPTIONS += mno-spe/mno-altivec
+MULTILIB_DIRNAMES += nospe noaltivec
-# MULTILIB_MATCHES = ${MULTILIB_MATCHES_FLOAT}
-MULTILIB_MATCHES =
MULTILIB_MATCHES += ${MULTILIB_MATCHES_ENDIAN}
MULTILIB_MATCHES += ${MULTILIB_MATCHES_SYSV}
# Map 405 to 403
@@ -52,37 +60,20 @@ MULTILIB_MATCHES += mcpu?8540=mcpu?8548
# (mfloat-gprs=single is implicit default)
MULTILIB_MATCHES += mcpu?8540=mcpu?8540/mfloat-gprs?single
-# Soft-float only, default implies msoft-float
-# NOTE: Must match with MULTILIB_MATCHES_FLOAT and MULTILIB_MATCHES
-MULTILIB_SOFTFLOAT_ONLY = \
-*mcpu=401/*msoft-float* \
-*mcpu=403/*msoft-float* \
-*mcpu=405/*msoft-float* \
-*mcpu=801/*msoft-float* \
-*mcpu=821/*msoft-float* \
-*mcpu=823/*msoft-float* \
-*mcpu=860/*msoft-float*
-
-# Hard-float only, take out msoft-float
-MULTILIB_HARDFLOAT_ONLY = \
-*mcpu=505/*msoft-float*
-
-# Targets which do not support gprs
-MULTILIB_NOGPRS = \
-mfloat-gprs=* \
-*mcpu=403/*mfloat-gprs=* \
-*mcpu=505/*mfloat-gprs=* \
-*mcpu=603e/*mfloat-gprs=* \
-*mcpu=604/*mfloat-gprs=* \
-*mcpu=860/*mfloat-gprs=* \
-*mcpu=7400/*mfloat-gprs=*
-
-MULTILIB_EXCEPTIONS =
-
-# Disallow -Dppc and -Dmpc without other options
-MULTILIB_EXCEPTIONS += Dppc* Dmpc*
+# Enumeration of multilibs
-MULTILIB_EXCEPTIONS += \
-${MULTILIB_SOFTFLOAT_ONLY} \
-${MULTILIB_HARDFLOAT_ONLY} \
-${MULTILIB_NOGPRS}
+MULTILIB_REQUIRED += msoft-float
+MULTILIB_REQUIRED += mcpu=403
+MULTILIB_REQUIRED += mcpu=505
+MULTILIB_REQUIRED += mcpu=603e
+MULTILIB_REQUIRED += mcpu=603e/msoft-float
+MULTILIB_REQUIRED += mcpu=604
+MULTILIB_REQUIRED += mcpu=604/msoft-float
+MULTILIB_REQUIRED += mcpu=7400
+MULTILIB_REQUIRED += mcpu=7400/msoft-float
+MULTILIB_REQUIRED += mcpu=8540
+MULTILIB_REQUIRED += mcpu=8540/msoft-float/mno-spe
+MULTILIB_REQUIRED += mcpu=8540/mfloat-gprs=double
+MULTILIB_REQUIRED += mcpu=860
+MULTILIB_REQUIRED += mcpu=e6500/m32
+MULTILIB_REQUIRED += mcpu=e6500/m32/msoft-float/mno-altivec
diff --git a/gcc-4.9/gcc/config/rs6000/vsx.md b/gcc-4.9/gcc/config/rs6000/vsx.md
index 2cf5e7a94..9aaf06428 100644
--- a/gcc-4.9/gcc/config/rs6000/vsx.md
+++ b/gcc-4.9/gcc/config/rs6000/vsx.md
@@ -260,6 +260,14 @@
UNSPEC_VSX_ROUND_IC
UNSPEC_VSX_SLDWI
UNSPEC_VSX_XXSPLTW
+ UNSPEC_VSX_XXSPLTD
+ UNSPEC_VSX_DIVSD
+ UNSPEC_VSX_DIVUD
+ UNSPEC_VSX_MULSD
+ UNSPEC_VSX_XVCVSXDDP
+ UNSPEC_VSX_XVCVUXDDP
+ UNSPEC_VSX_XVCVDPSXDS
+ UNSPEC_VSX_XVCVDPUXDS
])
;; VSX moves
@@ -746,6 +754,34 @@
[(set_attr "type" "<VStype_simple>")
(set_attr "fp_type" "<VSfptype_mul>")])
+; Emulate vector with scalar for vec_mul in V2DImode
+(define_insn_and_split "vsx_mul_v2di"
+ [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+ (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
+ (match_operand:V2DI 2 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_MULSD))]
+ "VECTOR_MEM_VSX_P (V2DImode)"
+ "#"
+ "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
+ [(const_int 0)]
+ "
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = gen_reg_rtx (DImode);
+ rtx op4 = gen_reg_rtx (DImode);
+ rtx op5 = gen_reg_rtx (DImode);
+ emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
+ emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
+ emit_insn (gen_muldi3 (op5, op3, op4));
+ emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
+ emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
+ emit_insn (gen_muldi3 (op3, op3, op4));
+ emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
+}"
+ [(set_attr "type" "vecdouble")])
+
(define_insn "*vsx_div<mode>3"
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
(div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
@@ -755,6 +791,61 @@
[(set_attr "type" "<VStype_div>")
(set_attr "fp_type" "<VSfptype_div>")])
+; Emulate vector with scalar for vec_div in V2DImode
+(define_insn_and_split "vsx_div_v2di"
+ [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+ (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
+ (match_operand:V2DI 2 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_DIVSD))]
+ "VECTOR_MEM_VSX_P (V2DImode)"
+ "#"
+ "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
+ [(const_int 0)]
+ "
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = gen_reg_rtx (DImode);
+ rtx op4 = gen_reg_rtx (DImode);
+ rtx op5 = gen_reg_rtx (DImode);
+ emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
+ emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
+ emit_insn (gen_divdi3 (op5, op3, op4));
+ emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
+ emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
+ emit_insn (gen_divdi3 (op3, op3, op4));
+ emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
+}"
+ [(set_attr "type" "vecdiv")])
+
+(define_insn_and_split "vsx_udiv_v2di"
+ [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+ (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
+ (match_operand:V2DI 2 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_DIVUD))]
+ "VECTOR_MEM_VSX_P (V2DImode)"
+ "#"
+ "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
+ [(const_int 0)]
+ "
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = gen_reg_rtx (DImode);
+ rtx op4 = gen_reg_rtx (DImode);
+ rtx op5 = gen_reg_rtx (DImode);
+ emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
+ emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
+ emit_insn (gen_udivdi3 (op5, op3, op4));
+ emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
+ emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
+ emit_insn (gen_udivdi3 (op3, op3, op4));
+ emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
+}"
+ [(set_attr "type" "vecdiv")])
+
;; *tdiv* instruction returning the FG flag
(define_expand "vsx_tdiv<mode>3_fg"
[(set (match_dup 3)
@@ -904,11 +995,11 @@
;; multiply.
(define_insn "*vsx_fmav4sf4"
- [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v")
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
(fma:V4SF
- (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v")
- (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v")
- (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))]
+ (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
+ (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
+ (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
"VECTOR_UNIT_VSX_P (V4SFmode)"
"@
xvmaddasp %x0,%x1,%x2
@@ -919,11 +1010,11 @@
[(set_attr "type" "vecfloat")])
(define_insn "*vsx_fmav2df4"
- [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa")
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
(fma:V2DF
- (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa")
- (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0")
- (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))]
+ (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
+ (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
+ (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
"VECTOR_UNIT_VSX_P (V2DFmode)"
"@
xvmaddadp %x0,%x1,%x2
@@ -1268,6 +1359,102 @@
"xscvspdpn %x0,%x1"
[(set_attr "type" "fp")])
+;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
+
+(define_expand "vsx_xvcvsxddp_scale"
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
+ (match_operand:V2DI 1 "vsx_register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ int scale = INTVAL(operands[2]);
+ emit_insn (gen_vsx_xvcvsxddp (op0, op1));
+ if (scale != 0)
+ rs6000_scale_v2df (op0, op0, -scale);
+ DONE;
+})
+
+(define_insn "vsx_xvcvsxddp"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
+ (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_XVCVSXDDP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvsxddp %x0,%x1"
+ [(set_attr "type" "vecdouble")])
+
+(define_expand "vsx_xvcvuxddp_scale"
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
+ (match_operand:V2DI 1 "vsx_register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ int scale = INTVAL(operands[2]);
+ emit_insn (gen_vsx_xvcvuxddp (op0, op1));
+ if (scale != 0)
+ rs6000_scale_v2df (op0, op0, -scale);
+ DONE;
+})
+
+(define_insn "vsx_xvcvuxddp"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
+ (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_XVCVUXDDP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvuxddp %x0,%x1"
+ [(set_attr "type" "vecdouble")])
+
+(define_expand "vsx_xvcvdpsxds_scale"
+ [(match_operand:V2DI 0 "vsx_register_operand" "")
+ (match_operand:V2DF 1 "vsx_register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp = gen_reg_rtx (V2DFmode);
+ int scale = INTVAL(operands[2]);
+ if (scale != 0)
+ rs6000_scale_v2df (tmp, op1, scale);
+ emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
+ DONE;
+})
+
+(define_insn "vsx_xvcvdpsxds"
+ [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+ (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_XVCVDPSXDS))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvdpsxds %x0,%x1"
+ [(set_attr "type" "vecdouble")])
+
+(define_expand "vsx_xvcvdpuxds_scale"
+ [(match_operand:V2DI 0 "vsx_register_operand" "")
+ (match_operand:V2DF 1 "vsx_register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp = gen_reg_rtx (V2DFmode);
+ int scale = INTVAL(operands[2]);
+ if (scale != 0)
+ rs6000_scale_v2df (tmp, op1, scale);
+ emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
+ DONE;
+})
+
+(define_insn "vsx_xvcvdpuxds"
+ [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+ (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_XVCVDPUXDS))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvdpuxds %x0,%x1"
+ [(set_attr "type" "vecdouble")])
+
;; Convert from 64-bit to 32-bit types
;; Note, favor the Altivec registers since the usual use of these instructions
;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1359,8 +1546,8 @@
(define_insn "vsx_concat_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?<VSa>")
(vec_concat:VSX_D
- (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,<VSa>")
- (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,<VSa>")))]
+ (match_operand:<VS_scalar> 1 "vsx_register_operand" "<VS_64reg>,<VSa>")
+ (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
if (BYTES_BIG_ENDIAN)
@@ -1647,7 +1834,7 @@
[(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr")
(vec_select:<VS_scalar>
(match_operand:VSX_D 1 "memory_operand" "m,Z,m")
- (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
+ (parallel [(const_int 0)])))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
"@
lfd%U1%X1 %0,%1
@@ -1921,6 +2108,22 @@
"xxspltw %x0,%x1,%2"
[(set_attr "type" "vecperm")])
+;; V2DF/V2DI splat for use by vec_splat builtin
+(define_insn "vsx_xxspltd_<mode>"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_XXSPLTD))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
+ || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
+ return "xxpermdi %x0,%x1,%x1,0";
+ else
+ return "xxpermdi %x0,%x1,%x1,3";
+}
+ [(set_attr "type" "vecperm")])
+
;; V4SF/V4SI interleave
(define_insn "vsx_xxmrghw_<mode>"
[(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
@@ -2041,7 +2244,7 @@
;; to the top element of the V2DF array without doing an extract.
(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
- [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa")
+ [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
(vec_select:DF
(VEC_reduc:V2DF
(vec_concat:V2DF
diff --git a/gcc-4.9/gcc/config/rs6000/xcoff.h b/gcc-4.9/gcc/config/rs6000/xcoff.h
index f2b7bd07a..10123313f 100644
--- a/gcc-4.9/gcc/config/rs6000/xcoff.h
+++ b/gcc-4.9/gcc/config/rs6000/xcoff.h
@@ -304,14 +304,15 @@
do { fputs (LOCAL_COMMON_ASM_OP, (FILE)); \
RS6000_OUTPUT_BASENAME ((FILE), (NAME)); \
if ((ALIGN) > 32) \
- fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,%u\n", \
+ fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s%u_,%u\n", \
(SIZE), xcoff_bss_section_name, \
+ floor_log2 ((ALIGN) / BITS_PER_UNIT), \
floor_log2 ((ALIGN) / BITS_PER_UNIT)); \
else if ((SIZE) > 4) \
- fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,3\n", \
+ fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s3_,3\n", \
(SIZE), xcoff_bss_section_name); \
else \
- fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s\n", \
+ fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,2\n", \
(SIZE), xcoff_bss_section_name); \
} while (0)
#endif
diff --git a/gcc-4.9/gcc/config/rx/rx.c b/gcc-4.9/gcc/config/rx/rx.c
index 4242c1a97..11a825cfa 100644
--- a/gcc-4.9/gcc/config/rx/rx.c
+++ b/gcc-4.9/gcc/config/rx/rx.c
@@ -733,7 +733,7 @@ rx_print_operand (FILE * file, rtx op, int letter)
break;
case 'R':
- gcc_assert (GET_MODE_SIZE (GET_MODE (op)) < 4);
+ gcc_assert (GET_MODE_SIZE (GET_MODE (op)) <= 4);
unsigned_load = true;
/* Fall through. */
case 'Q':
diff --git a/gcc-4.9/gcc/config/rx/rx.h b/gcc-4.9/gcc/config/rx/rx.h
index d99b19ad2..06a0ae850 100644
--- a/gcc-4.9/gcc/config/rx/rx.h
+++ b/gcc-4.9/gcc/config/rx/rx.h
@@ -433,9 +433,9 @@ typedef unsigned int CUMULATIVE_ARGS;
/* Compute the alignment needed for label X in various situations.
If the user has specified an alignment then honour that, otherwise
use rx_align_for_label. */
-#define JUMP_ALIGN(x) (align_jumps ? align_jumps : rx_align_for_label (x, 0))
-#define LABEL_ALIGN(x) (align_labels ? align_labels : rx_align_for_label (x, 3))
-#define LOOP_ALIGN(x) (align_loops ? align_loops : rx_align_for_label (x, 2))
+#define JUMP_ALIGN(x) (align_jumps > 1 ? align_jumps_log : rx_align_for_label (x, 0))
+#define LABEL_ALIGN(x) (align_labels > 1 ? align_labels_log : rx_align_for_label (x, 3))
+#define LOOP_ALIGN(x) (align_loops > 1 ? align_loops_log : rx_align_for_label (x, 2))
#define LABEL_ALIGN_AFTER_BARRIER(x) rx_align_for_label (x, 0)
#define ASM_OUTPUT_MAX_SKIP_ALIGN(STREAM, LOG, MAX_SKIP) \
diff --git a/gcc-4.9/gcc/config/s390/s390.c b/gcc-4.9/gcc/config/s390/s390.c
index aac8de848..866de858c 100644
--- a/gcc-4.9/gcc/config/s390/s390.c
+++ b/gcc-4.9/gcc/config/s390/s390.c
@@ -9130,11 +9130,14 @@ s390_emit_epilogue (bool sibcall)
if (! sibcall)
{
/* Fetch return address from stack before load multiple,
- this will do good for scheduling. */
-
- if (cfun_frame_layout.save_return_addr_p
- || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
- && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
+ this will do good for scheduling.
+
+ Only do this if we already decided that r14 needs to be
+ saved to a stack slot. (And not just because r14 happens to
+ be in between two GPRs which need saving.) Otherwise it
+ would be difficult to take that decision back in
+ s390_optimize_prologue. */
+ if (cfun_gpr_save_slot (RETURN_REGNUM) == -1)
{
int return_regnum = find_unused_clobbered_reg();
if (!return_regnum)
@@ -9149,6 +9152,12 @@ s390_emit_epilogue (bool sibcall)
addr = gen_rtx_MEM (Pmode, addr);
set_mem_alias_set (addr, get_frame_alias_set ());
emit_move_insn (return_reg, addr);
+
+ /* Once we did that optimization we have to make sure
+ s390_optimize_prologue does not try to remove the
+ store of r14 since we will not be able to find the
+ load issued here. */
+ cfun_frame_layout.save_return_addr_p = true;
}
}
diff --git a/gcc-4.9/gcc/config/s390/s390.md b/gcc-4.9/gcc/config/s390/s390.md
index b17c1fac8..10d7a5a6d 100644
--- a/gcc-4.9/gcc/config/s390/s390.md
+++ b/gcc-4.9/gcc/config/s390/s390.md
@@ -460,7 +460,7 @@
;; This iterator and attribute allow to combine most atomic operations.
(define_code_iterator ATOMIC [and ior xor plus minus mult])
(define_code_iterator ATOMIC_Z196 [and ior xor plus])
-(define_code_attr atomic [(and "and") (ior "ior") (xor "xor")
+(define_code_attr atomic [(and "and") (ior "or") (xor "xor")
(plus "add") (minus "sub") (mult "nand")])
(define_code_attr noxa [(and "n") (ior "o") (xor "x") (plus "a")])
diff --git a/gcc-4.9/gcc/config/sh/predicates.md b/gcc-4.9/gcc/config/sh/predicates.md
index 73bb880d6..3af1f8a14 100644
--- a/gcc-4.9/gcc/config/sh/predicates.md
+++ b/gcc-4.9/gcc/config/sh/predicates.md
@@ -398,7 +398,7 @@
(define_predicate "general_extend_operand"
(match_code "subreg,reg,mem,truncate")
{
- if (GET_CODE (op) == TRUNCATE)
+ if (reload_completed && GET_CODE (op) == TRUNCATE)
return arith_operand (op, mode);
if (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))))
diff --git a/gcc-4.9/gcc/config/sh/sh-mem.cc b/gcc-4.9/gcc/config/sh/sh-mem.cc
index 45af23acb..e5ef165cf 100644
--- a/gcc-4.9/gcc/config/sh/sh-mem.cc
+++ b/gcc-4.9/gcc/config/sh/sh-mem.cc
@@ -1,5 +1,5 @@
/* Helper routines for memory move and comparison insns.
- Copyright (C) 2013-2014 Free Software Foundation, Inc.
+ Copyright (C) 2013-2015 Free Software Foundation, Inc.
This file is part of GCC.
@@ -226,7 +226,7 @@ sh_expand_cmpstr (rtx *operands)
emit_move_insn (tmp3, addr2);
emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
- /*start long loop. */
+ /* start long loop. */
emit_label (L_loop_long);
emit_move_insn (tmp2, tmp3);
@@ -335,7 +335,7 @@ sh_expand_cmpnstr (rtx *operands)
rtx len = force_reg (SImode, operands[3]);
int constp = CONST_INT_P (operands[3]);
- /* Loop on a register count. */
+ /* Loop on a register count. */
if (constp)
{
rtx tmp0 = gen_reg_rtx (SImode);
@@ -364,7 +364,7 @@ sh_expand_cmpnstr (rtx *operands)
add_int_reg_note (jump, REG_BR_PROB, prob_likely);
}
- /* word count. Do we have iterations ? */
+ /* word count. Do we have iterations ? */
emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
/*start long loop. */
@@ -407,6 +407,7 @@ sh_expand_cmpnstr (rtx *operands)
/* end loop. Reached max iterations. */
if (! sbytes)
{
+ emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
jump = emit_jump_insn (gen_jump_compact (L_return));
emit_barrier_after (jump);
}
@@ -482,6 +483,13 @@ sh_expand_cmpnstr (rtx *operands)
jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
emit_barrier_after (jump);
}
+ else
+ {
+ emit_insn (gen_cmpeqsi_t (len, const0_rtx));
+ emit_move_insn (operands[0], const0_rtx);
+ jump = emit_jump_insn (gen_branch_true (L_return));
+ add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+ }
addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
@@ -522,14 +530,14 @@ sh_expand_cmpnstr (rtx *operands)
emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
- emit_label (L_return);
-
emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
+ emit_label (L_return);
+
return true;
}
-/* Emit code to perform a strlen
+/* Emit code to perform a strlen.
OPERANDS[0] is the destination.
OPERANDS[1] is the string.
@@ -568,7 +576,7 @@ sh_expand_strlen (rtx *operands)
addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
- /*start long loop. */
+ /* start long loop. */
emit_label (L_loop_long);
/* tmp1 is aligned, OK to load. */
diff --git a/gcc-4.9/gcc/config/sh/sh.c b/gcc-4.9/gcc/config/sh/sh.c
index 3d4553a6e..06798181a 100644
--- a/gcc-4.9/gcc/config/sh/sh.c
+++ b/gcc-4.9/gcc/config/sh/sh.c
@@ -2957,7 +2957,7 @@ enum
struct ashl_lshr_sequence
{
char insn_count;
- char amount[6];
+ signed char amount[6];
char clobbers_t;
};
diff --git a/gcc-4.9/gcc/config/sh/sh.md b/gcc-4.9/gcc/config/sh/sh.md
index ab1f0a51c..d957e5540 100644
--- a/gcc-4.9/gcc/config/sh/sh.md
+++ b/gcc-4.9/gcc/config/sh/sh.md
@@ -868,9 +868,9 @@
(define_insn "*cmp_div0s_0"
[(set (reg:SI T_REG)
- (eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand")
+ (eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand" "%r")
(const_int 31))
- (ge:SI (match_operand:SI 1 "arith_reg_operand")
+ (ge:SI (match_operand:SI 1 "arith_reg_operand" "r")
(const_int 0))))]
"TARGET_SH1"
"div0s %0,%1"
@@ -4563,6 +4563,12 @@ label:
{
if (TARGET_SHMEDIA)
{
+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2]));
+ DONE;
+ }
emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
DONE;
}
@@ -4803,6 +4809,12 @@ label:
{
if (TARGET_SHMEDIA)
{
+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2]));
+ DONE;
+ }
emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2]));
DONE;
}
@@ -4896,6 +4908,12 @@ label:
{
if (TARGET_SHMEDIA)
{
+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
+ DONE;
+ }
emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2]));
DONE;
}
@@ -4995,6 +5013,12 @@ label:
{
if (TARGET_SHMEDIA)
{
+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2]));
+ DONE;
+ }
emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2]));
DONE;
}
@@ -5069,6 +5093,12 @@ label:
{
if (TARGET_SHMEDIA)
{
+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
+ DONE;
+ }
emit_insn (gen_lshrsi3_media (operands[0], operands[1], operands[2]));
DONE;
}
@@ -5263,6 +5293,12 @@ label:
{
if (TARGET_SHMEDIA)
{
+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2]));
+ DONE;
+ }
emit_insn (gen_lshrdi3_media (operands[0], operands[1], operands[2]));
DONE;
}
@@ -6295,10 +6331,9 @@ label:
})
(define_expand "extendqihi2"
- [(set (match_operand:HI 0 "arith_reg_dest" "")
- (sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "")))]
- ""
- "")
+ [(set (match_operand:HI 0 "arith_reg_dest")
+ (sign_extend:HI (match_operand:QI 1 "arith_reg_operand")))]
+ "TARGET_SH1")
(define_insn "*extendqihi2_compact_reg"
[(set (match_operand:HI 0 "arith_reg_dest" "=r")
diff --git a/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
index 313e5b5f4..3791cc76c 100644
--- a/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
+++ b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
@@ -111,7 +111,7 @@ private:
// Given a start insn and its basic block, recursively determine all
// possible ccreg values in all basic block paths that can lead to the
// start insn.
- void find_last_ccreg_values (rtx start_insn, basic_block bb,
+ bool find_last_ccreg_values (rtx start_insn, basic_block bb,
std::vector<ccreg_value>& values_out,
std::vector<basic_block>& prev_visited_bb) const;
@@ -226,8 +226,8 @@ sh_optimize_sett_clrt::execute (void)
ccreg_values.clear ();
visited_bbs.clear ();
- find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values,
- visited_bbs);
+ bool ok = find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values,
+ visited_bbs);
log_msg ("number of ccreg values collected: %u\n",
(unsigned int)ccreg_values.size ());
@@ -235,7 +235,7 @@ sh_optimize_sett_clrt::execute (void)
// If all the collected values are equal and are equal to the
// constant value of the setcc insn, the setcc insn can be
// removed.
- if (all_ccreg_values_equal (ccreg_values)
+ if (ok && all_ccreg_values_equal (ccreg_values)
&& rtx_equal_p (ccreg_values.front ().value, setcc_val))
{
log_msg ("all values are ");
@@ -309,7 +309,7 @@ sh_optimize_sett_clrt
gcc_unreachable ();
}
-void
+bool
sh_optimize_sett_clrt
::find_last_ccreg_values (rtx start_insn, basic_block bb,
std::vector<ccreg_value>& values_out,
@@ -348,7 +348,7 @@ sh_optimize_sett_clrt
log_msg ("\n");
values_out.push_back (v);
- return;
+ return true;
}
if (any_condjump_p (i) && onlyjump_p (i) && !prev_visited_bb.empty ())
@@ -372,7 +372,7 @@ sh_optimize_sett_clrt
log_msg ("\n");
values_out.push_back (v);
- return;
+ return true;
}
}
@@ -393,10 +393,14 @@ sh_optimize_sett_clrt
for (edge_iterator ei = ei_start (bb->preds); !ei_end_p (ei);
ei_next (&ei))
{
+ if (ei_edge (ei)->flags & EDGE_COMPLEX)
+ log_return (false, "aborting due to complex edge\n");
+
basic_block pred_bb = ei_edge (ei)->src;
pred_bb_count += 1;
- find_last_ccreg_values (BB_END (pred_bb), pred_bb, values_out,
- prev_visited_bb);
+ if (!find_last_ccreg_values (BB_END (pred_bb), pred_bb, values_out,
+ prev_visited_bb))
+ return false;
}
prev_visited_bb.pop_back ();
@@ -419,6 +423,8 @@ sh_optimize_sett_clrt
values_out.push_back (v);
}
+
+ return true;
}
bool
diff --git a/gcc-4.9/gcc/config/sh/sh_treg_combine.cc b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
index e73604022..38e28038d 100644
--- a/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
+++ b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
@@ -78,14 +78,17 @@ Example 1)
In [bb 4] elimination of the comparison would require inversion of the branch
condition and compensation of other BBs.
-Instead an inverting reg-move can be used:
+Instead the comparison in [bb 3] can be replaced with the comparison in [bb 5]
+by using a reg-reg move. In [bb 4] a logical not is used to compensate the
+inverted condition.
[bb 3]
(set (reg:SI 167) (reg:SI 173))
-> bb 5
[BB 4]
-(set (reg:SI 167) (not:SI (reg:SI 177)))
+(set (reg:SI 147 t) (eq:SI (reg:SI 177) (const_int 0)))
+(set (reg:SI 167) (reg:SI 147 t))
-> bb 5
[bb 5]
@@ -214,9 +217,9 @@ In order to handle cases such as above the RTL pass does the following:
and replace the comparisons in the BBs with reg-reg copies to get the
operands in place (create new pseudo regs).
- - If the cstores differ, try to apply the special case
- (eq (reg) (const_int 0)) -> inverted = (not (reg)).
- for the subordinate cstore types and eliminate the dominating ones.
+ - If the cstores differ and the comparison is a test against zero,
+ use reg-reg copies for the dominating cstores and logical not cstores
+ for the subordinate cstores.
- If the comparison types in the BBs are not the same, or the first approach
doesn't work out for some reason, try to eliminate the comparison before the
@@ -558,7 +561,8 @@ private:
bool can_extend_ccreg_usage (const bb_entry& e,
const cbranch_trace& trace) const;
- // Create an insn rtx that is a negating reg move (not operation).
+ // Create an insn rtx that performs a logical not (test != 0) on the src_reg
+ // and stores the result in dst_reg.
rtx make_not_reg_insn (rtx dst_reg, rtx src_reg) const;
// Create an insn rtx that inverts the ccreg.
@@ -892,12 +896,32 @@ sh_treg_combine::can_remove_comparison (const bb_entry& e,
rtx
sh_treg_combine::make_not_reg_insn (rtx dst_reg, rtx src_reg) const
{
- // This will to go through expanders and may output multiple insns
- // for multi-word regs.
+ // On SH we can do only SImode and DImode comparisons.
+ if (! (GET_MODE (src_reg) == SImode || GET_MODE (src_reg) == DImode))
+ return NULL;
+
+ // On SH we can store the ccreg into an SImode or DImode reg only.
+ if (! (GET_MODE (dst_reg) == SImode || GET_MODE (dst_reg) == DImode))
+ return NULL;
+
start_sequence ();
- expand_simple_unop (GET_MODE (dst_reg), NOT, src_reg, dst_reg, 0);
+
+ emit_insn (gen_rtx_SET (VOIDmode, m_ccreg,
+ gen_rtx_fmt_ee (EQ, SImode, src_reg, const0_rtx)));
+
+ if (GET_MODE (dst_reg) == SImode)
+ emit_move_insn (dst_reg, m_ccreg);
+ else if (GET_MODE (dst_reg) == DImode)
+ {
+ emit_move_insn (gen_lowpart (SImode, dst_reg), m_ccreg);
+ emit_move_insn (gen_highpart (SImode, dst_reg), const0_rtx);
+ }
+ else
+ gcc_unreachable ();
+
rtx i = get_insns ();
end_sequence ();
+
return i;
}
@@ -1080,7 +1104,12 @@ sh_treg_combine::try_combine_comparisons (cbranch_trace& trace,
// There is one special case though, where an integer comparison
// (eq (reg) (const_int 0))
// can be inverted with a sequence
- // (eq (not (reg)) (const_int 0))
+ // (set (t) (eq (reg) (const_int 0))
+ // (set (reg) (t))
+ // (eq (reg) (const_int 0))
+ //
+ // FIXME: On SH2A it might be better to use the nott insn in this case,
+ // i.e. do the try_eliminate_cstores approach instead.
if (inv_cstore_count != 0 && cstore_count != 0)
{
if (make_not_reg_insn (comp_op0, comp_op0) == NULL_RTX)
diff --git a/gcc-4.9/gcc/config/sh/sync.md b/gcc-4.9/gcc/config/sh/sync.md
index a0a22a1f5..a3acaac56 100644
--- a/gcc-4.9/gcc/config/sh/sync.md
+++ b/gcc-4.9/gcc/config/sh/sync.md
@@ -466,6 +466,7 @@
(set (mem:SI (match_dup 1))
(unspec:SI
[(match_operand:SI 2 "arith_operand" "rI08")] UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))
(clobber (reg:SI R0_REG))]
"TARGET_ATOMIC_HARD_LLCS
|| (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
@@ -484,6 +485,7 @@
(set (mem:QIHI (match_dup 1))
(unspec:QIHI
[(match_operand:QIHI 2 "register_operand" "r")] UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))
(clobber (reg:SI R0_REG))
(clobber (match_scratch:SI 3 "=&r"))
(clobber (match_scratch:SI 4 "=1"))]
@@ -617,6 +619,7 @@
[(FETCHOP:SI (mem:SI (match_dup 1))
(match_operand:SI 2 "<fetchop_predicate>" "<fetchop_constraint>"))]
UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))
(clobber (reg:SI R0_REG))]
"TARGET_ATOMIC_HARD_LLCS
|| (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
@@ -637,6 +640,7 @@
[(FETCHOP:QIHI (mem:QIHI (match_dup 1))
(match_operand:QIHI 2 "<fetchop_predicate>" "<fetchop_constraint>"))]
UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))
(clobber (reg:SI R0_REG))
(clobber (match_scratch:SI 3 "=&r"))
(clobber (match_scratch:SI 4 "=1"))]
@@ -784,6 +788,7 @@
[(not:SI (and:SI (mem:SI (match_dup 1))
(match_operand:SI 2 "logical_operand" "rK08")))]
UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))
(clobber (reg:SI R0_REG))]
"TARGET_ATOMIC_HARD_LLCS
|| (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
@@ -805,6 +810,7 @@
[(not:QIHI (and:QIHI (mem:QIHI (match_dup 1))
(match_operand:QIHI 2 "logical_operand" "rK08")))]
UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))
(clobber (reg:SI R0_REG))
(clobber (match_scratch:SI 3 "=&r"))
(clobber (match_scratch:SI 4 "=1"))]
@@ -903,7 +909,7 @@
" and %0,%3" "\n"
" not %3,%3" "\n"
" mov.<bwl> %3,@%1" "\n"
- " stc %4,sr";
+ " ldc %4,sr";
}
[(set_attr "length" "20")])
@@ -960,7 +966,8 @@
(set (mem:SI (match_dup 1))
(unspec:SI
[(FETCHOP:SI (mem:SI (match_dup 1)) (match_dup 2))]
- UNSPEC_ATOMIC))]
+ UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))]
"TARGET_ATOMIC_HARD_LLCS
|| (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
{
@@ -980,6 +987,7 @@
(unspec:QIHI
[(FETCHOP:QIHI (mem:QIHI (match_dup 1)) (match_dup 2))]
UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))
(clobber (reg:SI R0_REG))
(clobber (match_scratch:SI 3 "=&r"))
(clobber (match_scratch:SI 4 "=1"))]
@@ -1124,7 +1132,8 @@
(set (mem:SI (match_dup 1))
(unspec:SI
[(not:SI (and:SI (mem:SI (match_dup 1)) (match_dup 2)))]
- UNSPEC_ATOMIC))]
+ UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))]
"TARGET_ATOMIC_HARD_LLCS
|| (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
{
@@ -1145,6 +1154,7 @@
(unspec:QIHI
[(not:QIHI (and:QIHI (mem:QIHI (match_dup 1)) (match_dup 2)))]
UNSPEC_ATOMIC))
+ (set (reg:SI T_REG) (const_int 1))
(clobber (reg:SI R0_REG))
(clobber (match_scratch:SI 3 "=&r"))
(clobber (match_scratch:SI 4 "=1"))]
@@ -1353,7 +1363,7 @@
" ldc r0,sr" "\n"
" mov.b @%0,r0" "\n"
" mov.b %1,@%0" "\n"
- " stc %2,sr" "\n"
+ " ldc %2,sr" "\n"
" tst r0,r0";
}
[(set_attr "length" "16")])
diff --git a/gcc-4.9/gcc/config/sparc/leon.md b/gcc-4.9/gcc/config/sparc/leon.md
index 82b6a0d96..ad22e3b59 100644
--- a/gcc-4.9/gcc/config/sparc/leon.md
+++ b/gcc-4.9/gcc/config/sparc/leon.md
@@ -29,11 +29,11 @@
;; Use a double reservation to work around the load pipeline hazard on UT699.
(define_insn_reservation "leon3_load" 1
- (and (eq_attr "cpu" "leon3") (eq_attr "type" "load,sload"))
+ (and (eq_attr "cpu" "leon3,leon3v7") (eq_attr "type" "load,sload"))
"leon_memory*2")
(define_insn_reservation "leon_store" 2
- (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "store"))
+ (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "store"))
"leon_memory*2")
;; This describes Gaisler Research's FPU
@@ -44,21 +44,21 @@
(define_cpu_unit "grfpu_ds" "grfpu")
(define_insn_reservation "leon_fp_alu" 4
- (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fp,fpcmp,fpmul"))
+ (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fp,fpcmp,fpmul"))
"grfpu_alu, nothing*3")
(define_insn_reservation "leon_fp_divs" 16
- (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivs"))
+ (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpdivs"))
"grfpu_ds*14, nothing*2")
(define_insn_reservation "leon_fp_divd" 17
- (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivd"))
+ (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpdivd"))
"grfpu_ds*15, nothing*2")
(define_insn_reservation "leon_fp_sqrts" 24
- (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrts"))
+ (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpsqrts"))
"grfpu_ds*22, nothing*2")
(define_insn_reservation "leon_fp_sqrtd" 25
- (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrtd"))
+ (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpsqrtd"))
"grfpu_ds*23, nothing*2")
diff --git a/gcc-4.9/gcc/config/sparc/linux.h b/gcc-4.9/gcc/config/sparc/linux.h
index c54ba2cb5..c40bb0b78 100644
--- a/gcc-4.9/gcc/config/sparc/linux.h
+++ b/gcc-4.9/gcc/config/sparc/linux.h
@@ -147,12 +147,6 @@ do { \
/* Static stack checking is supported by means of probes. */
#define STACK_CHECK_STATIC_BUILTIN 1
-/* Linux currently uses RMO in uniprocessor mode, which is equivalent to
- TMO, and TMO in multiprocessor mode. But they reserve the right to
- change their minds. */
-#undef SPARC_RELAXED_ORDERING
-#define SPARC_RELAXED_ORDERING true
-
#undef NEED_INDICATE_EXEC_STACK
#define NEED_INDICATE_EXEC_STACK 1
diff --git a/gcc-4.9/gcc/config/sparc/linux64.h b/gcc-4.9/gcc/config/sparc/linux64.h
index f00fb42ff..12bb3780b 100644
--- a/gcc-4.9/gcc/config/sparc/linux64.h
+++ b/gcc-4.9/gcc/config/sparc/linux64.h
@@ -261,12 +261,6 @@ do { \
/* Static stack checking is supported by means of probes. */
#define STACK_CHECK_STATIC_BUILTIN 1
-/* Linux currently uses RMO in uniprocessor mode, which is equivalent to
- TMO, and TMO in multiprocessor mode. But they reserve the right to
- change their minds. */
-#undef SPARC_RELAXED_ORDERING
-#define SPARC_RELAXED_ORDERING true
-
#undef NEED_INDICATE_EXEC_STACK
#define NEED_INDICATE_EXEC_STACK 1
diff --git a/gcc-4.9/gcc/config/sparc/sparc-opts.h b/gcc-4.9/gcc/config/sparc/sparc-opts.h
index 13b375ae1..26017edc0 100644
--- a/gcc-4.9/gcc/config/sparc/sparc-opts.h
+++ b/gcc-4.9/gcc/config/sparc/sparc-opts.h
@@ -31,6 +31,7 @@ enum processor_type {
PROCESSOR_HYPERSPARC,
PROCESSOR_LEON,
PROCESSOR_LEON3,
+ PROCESSOR_LEON3V7,
PROCESSOR_SPARCLITE,
PROCESSOR_F930,
PROCESSOR_F934,
diff --git a/gcc-4.9/gcc/config/sparc/sparc.c b/gcc-4.9/gcc/config/sparc/sparc.c
index 5b00cca47..f7fc957b4 100644
--- a/gcc-4.9/gcc/config/sparc/sparc.c
+++ b/gcc-4.9/gcc/config/sparc/sparc.c
@@ -786,9 +786,6 @@ char sparc_hard_reg_printed[8];
#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
#endif
-#undef TARGET_RELAXED_ORDERING
-#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING
-
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE sparc_option_override
@@ -1246,6 +1243,7 @@ sparc_option_override (void)
{ TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
{ TARGET_CPU_leon, PROCESSOR_LEON },
{ TARGET_CPU_leon3, PROCESSOR_LEON3 },
+ { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
{ TARGET_CPU_sparclite, PROCESSOR_F930 },
{ TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
{ TARGET_CPU_sparclet, PROCESSOR_TSC701 },
@@ -1274,6 +1272,7 @@ sparc_option_override (void)
{ "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
{ "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU },
{ "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU },
+ { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU },
{ "sparclite", MASK_ISA, MASK_SPARCLITE },
/* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
{ "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
@@ -1526,6 +1525,7 @@ sparc_option_override (void)
sparc_costs = &leon_costs;
break;
case PROCESSOR_LEON3:
+ case PROCESSOR_LEON3V7:
sparc_costs = &leon3_costs;
break;
case PROCESSOR_SPARCLET:
@@ -6801,28 +6801,30 @@ function_arg_union_value (int size, enum machine_mode mode, int slotno,
}
/* Used by function_arg and sparc_function_value_1 to implement the conventions
- for passing and returning large (BLKmode) vectors.
+ for passing and returning BLKmode vectors.
Return an expression valid as a return value for the FUNCTION_ARG
and TARGET_FUNCTION_VALUE.
- SIZE is the size in bytes of the vector (at least 8 bytes).
+ SIZE is the size in bytes of the vector.
REGNO is the FP hard register the vector will be passed in. */
static rtx
function_arg_vector_value (int size, int regno)
{
- int i, nregs = size / 8;
- rtx regs;
-
- regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
+ const int nregs = MAX (1, size / 8);
+ rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
- for (i = 0; i < nregs; i++)
- {
+ if (size < 8)
+ XVECEXP (regs, 0, 0)
+ = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (SImode, regno),
+ const0_rtx);
+ else
+ for (int i = 0; i < nregs; i++)
XVECEXP (regs, 0, i)
= gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (DImode, regno + 2*i),
GEN_INT (i*8));
- }
return regs;
}
@@ -6868,10 +6870,9 @@ sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode,
|| (TARGET_ARCH64 && size <= 16));
if (mode == BLKmode)
- return function_arg_vector_value (size,
- SPARC_FP_ARG_FIRST + 2*slotno);
- else
- mclass = MODE_FLOAT;
+ return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
+
+ mclass = MODE_FLOAT;
}
if (TARGET_ARCH32)
@@ -7315,10 +7316,9 @@ sparc_function_value_1 (const_tree type, enum machine_mode mode,
|| (TARGET_ARCH64 && size <= 32));
if (mode == BLKmode)
- return function_arg_vector_value (size,
- SPARC_FP_ARG_FIRST);
- else
- mclass = MODE_FLOAT;
+ return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
+
+ mclass = MODE_FLOAT;
}
if (TARGET_ARCH64 && type)
diff --git a/gcc-4.9/gcc/config/sparc/sparc.h b/gcc-4.9/gcc/config/sparc/sparc.h
index dd2b5ad9c..87f1d82d6 100644
--- a/gcc-4.9/gcc/config/sparc/sparc.h
+++ b/gcc-4.9/gcc/config/sparc/sparc.h
@@ -106,17 +106,6 @@ extern enum cmodel sparc_cmodel;
#define SPARC_DEFAULT_CMODEL CM_32
-/* The SPARC-V9 architecture defines a relaxed memory ordering model (RMO)
- which requires the following macro to be true if enabled. Prior to V9,
- there are no instructions to even talk about memory synchronization.
- Note that the UltraSPARC III processors don't implement RMO, unlike the
- UltraSPARC II processors. Niagara, Niagara-2, and Niagara-3 do not
- implement RMO either.
-
- Default to false; for example, Solaris never enables RMO, only ever uses
- total memory ordering (TMO). */
-#define SPARC_RELAXED_ORDERING false
-
/* Do not use the .note.GNU-stack convention by default. */
#define NEED_INDICATE_EXEC_STACK 0
@@ -137,21 +126,22 @@ extern enum cmodel sparc_cmodel;
#define TARGET_CPU_hypersparc 3
#define TARGET_CPU_leon 4
#define TARGET_CPU_leon3 5
-#define TARGET_CPU_sparclite 6
-#define TARGET_CPU_f930 6 /* alias */
-#define TARGET_CPU_f934 6 /* alias */
-#define TARGET_CPU_sparclite86x 7
-#define TARGET_CPU_sparclet 8
-#define TARGET_CPU_tsc701 8 /* alias */
-#define TARGET_CPU_v9 9 /* generic v9 implementation */
-#define TARGET_CPU_sparcv9 9 /* alias */
-#define TARGET_CPU_sparc64 9 /* alias */
-#define TARGET_CPU_ultrasparc 10
-#define TARGET_CPU_ultrasparc3 11
-#define TARGET_CPU_niagara 12
-#define TARGET_CPU_niagara2 13
-#define TARGET_CPU_niagara3 14
-#define TARGET_CPU_niagara4 15
+#define TARGET_CPU_leon3v7 6
+#define TARGET_CPU_sparclite 7
+#define TARGET_CPU_f930 7 /* alias */
+#define TARGET_CPU_f934 7 /* alias */
+#define TARGET_CPU_sparclite86x 8
+#define TARGET_CPU_sparclet 9
+#define TARGET_CPU_tsc701 9 /* alias */
+#define TARGET_CPU_v9 10 /* generic v9 implementation */
+#define TARGET_CPU_sparcv9 10 /* alias */
+#define TARGET_CPU_sparc64 10 /* alias */
+#define TARGET_CPU_ultrasparc 11
+#define TARGET_CPU_ultrasparc3 12
+#define TARGET_CPU_niagara 13
+#define TARGET_CPU_niagara2 14
+#define TARGET_CPU_niagara3 15
+#define TARGET_CPU_niagara4 16
#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
|| TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
@@ -239,6 +229,11 @@ extern enum cmodel sparc_cmodel;
#define ASM_CPU32_DEFAULT_SPEC AS_LEON_FLAG
#endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_leon3v7
+#define CPP_CPU32_DEFAULT_SPEC "-D__leon__"
+#define ASM_CPU32_DEFAULT_SPEC AS_LEONV7_FLAG
+#endif
+
#endif
#if !defined(CPP_CPU32_DEFAULT_SPEC) || !defined(CPP_CPU64_DEFAULT_SPEC)
@@ -285,6 +280,7 @@ extern enum cmodel sparc_cmodel;
%{mcpu=hypersparc:-D__hypersparc__ -D__sparc_v8__} \
%{mcpu=leon:-D__leon__ -D__sparc_v8__} \
%{mcpu=leon3:-D__leon__ -D__sparc_v8__} \
+%{mcpu=leon3v7:-D__leon__} \
%{mcpu=v9:-D__sparc_v9__} \
%{mcpu=ultrasparc:-D__sparc_v9__} \
%{mcpu=ultrasparc3:-D__sparc_v9__} \
@@ -334,6 +330,7 @@ extern enum cmodel sparc_cmodel;
%{mcpu=hypersparc:-Av8} \
%{mcpu=leon:" AS_LEON_FLAG "} \
%{mcpu=leon3:" AS_LEON_FLAG "} \
+%{mcpu=leon3v7:" AS_LEONV7_FLAG "} \
%{mv8plus:-Av8plus} \
%{mcpu=v9:-Av9} \
%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
@@ -1760,8 +1757,10 @@ extern int sparc_indent_opcode;
#ifdef HAVE_AS_LEON
#define AS_LEON_FLAG "-Aleon"
+#define AS_LEONV7_FLAG "-Aleon"
#else
#define AS_LEON_FLAG "-Av8"
+#define AS_LEONV7_FLAG "-Av7"
#endif
/* We use gcc _mcount for profiling. */
diff --git a/gcc-4.9/gcc/config/sparc/sparc.md b/gcc-4.9/gcc/config/sparc/sparc.md
index 76c331597..954c297fd 100644
--- a/gcc-4.9/gcc/config/sparc/sparc.md
+++ b/gcc-4.9/gcc/config/sparc/sparc.md
@@ -221,6 +221,7 @@
hypersparc,
leon,
leon3,
+ leon3v7,
sparclite,
f930,
f934,
diff --git a/gcc-4.9/gcc/config/sparc/sparc.opt b/gcc-4.9/gcc/config/sparc/sparc.opt
index 64e40955a..3cd2b603a 100644
--- a/gcc-4.9/gcc/config/sparc/sparc.opt
+++ b/gcc-4.9/gcc/config/sparc/sparc.opt
@@ -153,6 +153,9 @@ EnumValue
Enum(sparc_processor_type) String(leon3) Value(PROCESSOR_LEON3)
EnumValue
+Enum(sparc_processor_type) String(leon3v7) Value(PROCESSOR_LEON3V7)
+
+EnumValue
Enum(sparc_processor_type) String(sparclite) Value(PROCESSOR_SPARCLITE)
EnumValue
diff --git a/gcc-4.9/gcc/config/sparc/t-rtems b/gcc-4.9/gcc/config/sparc/t-rtems
index 86a230261..ae7a33ab4 100644
--- a/gcc-4.9/gcc/config/sparc/t-rtems
+++ b/gcc-4.9/gcc/config/sparc/t-rtems
@@ -17,6 +17,15 @@
# <http://www.gnu.org/licenses/>.
#
-MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3
-MULTILIB_DIRNAMES = soft v8 leon3
+MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3/mcpu=leon3v7 muser-mode
+MULTILIB_DIRNAMES = soft v8 leon3 leon3v7 user-mode
MULTILIB_MATCHES = msoft-float=mno-fpu
+
+MULTILIB_EXCEPTIONS = muser-mode
+MULTILIB_EXCEPTIONS += mcpu=leon3
+MULTILIB_EXCEPTIONS += mcpu=leon3v7
+MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3
+MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3v7
+MULTILIB_EXCEPTIONS += msoft-float/muser-mode
+MULTILIB_EXCEPTIONS += msoft-float/mcpu=v8/muser-mode
+MULTILIB_EXCEPTIONS += mcpu=v8/muser-mode