From 1d9fec7937f45dde5e04cac966a2d9a12f2fc15a Mon Sep 17 00:00:00 2001 From: Yiran Wang Date: Tue, 23 Jun 2015 15:33:17 -0700 Subject: Synchronize with google/gcc-4_9 to r224707 (from r214835) Change-Id: I3d6f06fc613c8f8b6a82143dc44b7338483aac5d --- gcc-4.9/gcc/config/aarch64/aarch64-builtins.c | 154 +++ gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h | 8 + gcc-4.9/gcc/config/aarch64/aarch64-linux.h | 11 +- gcc-4.9/gcc/config/aarch64/aarch64-protos.h | 1 + gcc-4.9/gcc/config/aarch64/aarch64-simd.md | 39 +- gcc-4.9/gcc/config/aarch64/aarch64.c | 12 +- gcc-4.9/gcc/config/aarch64/aarch64.md | 55 +- gcc-4.9/gcc/config/aarch64/arm_neon.h | 494 +++++---- gcc-4.9/gcc/config/alpha/alpha.c | 6 - gcc-4.9/gcc/config/alpha/alpha.md | 63 +- gcc-4.9/gcc/config/arm/arm-protos.h | 3 + gcc-4.9/gcc/config/arm/arm.c | 88 +- gcc-4.9/gcc/config/arm/arm.h | 11 +- gcc-4.9/gcc/config/arm/arm.md | 64 +- gcc-4.9/gcc/config/arm/constraints.md | 11 +- gcc-4.9/gcc/config/arm/linux-grte.h | 27 - gcc-4.9/gcc/config/arm/t-aprofile | 3 + gcc-4.9/gcc/config/avr/avr-dimode.md | 7 + gcc-4.9/gcc/config/avr/avr-fixed.md | 41 +- gcc-4.9/gcc/config/avr/avr-protos.h | 9 + gcc-4.9/gcc/config/avr/avr.c | 109 ++ gcc-4.9/gcc/config/avr/avr.md | 155 ++- gcc-4.9/gcc/config/darwin-c.c | 25 +- gcc-4.9/gcc/config/darwin-driver.c | 35 +- gcc-4.9/gcc/config/gnu-user.h | 7 +- gcc-4.9/gcc/config/i386/i386.c | 349 ++++-- gcc-4.9/gcc/config/i386/i386.md | 62 +- gcc-4.9/gcc/config/i386/i386.opt | 8 - gcc-4.9/gcc/config/i386/linux.h | 15 - gcc-4.9/gcc/config/i386/linux64.h | 9 - gcc-4.9/gcc/config/i386/mmx.md | 36 +- gcc-4.9/gcc/config/i386/x86-tune.def | 8 +- gcc-4.9/gcc/config/ia64/ia64.c | 5 - gcc-4.9/gcc/config/linux-grte.h | 41 - gcc-4.9/gcc/config/linux.c | 2 - gcc-4.9/gcc/config/msp430/msp430.md | 6 +- gcc-4.9/gcc/config/nios2/nios2.c | 15 + gcc-4.9/gcc/config/pa/pa.c | 26 +- gcc-4.9/gcc/config/pa/pa.md | 25 +- gcc-4.9/gcc/config/pa/predicates.md | 21 +- gcc-4.9/gcc/config/rs6000/altivec.h | 6 + gcc-4.9/gcc/config/rs6000/altivec.md | 56 +- gcc-4.9/gcc/config/rs6000/darwin.h | 6 +- gcc-4.9/gcc/config/rs6000/linux-grte.h | 41 - gcc-4.9/gcc/config/rs6000/predicates.md | 75 +- gcc-4.9/gcc/config/rs6000/rs6000-builtin.def | 10 + gcc-4.9/gcc/config/rs6000/rs6000-c.c | 147 ++- gcc-4.9/gcc/config/rs6000/rs6000-protos.h | 5 +- gcc-4.9/gcc/config/rs6000/rs6000.c | 1367 ++++++++++++++++++++++-- gcc-4.9/gcc/config/rs6000/rs6000.md | 88 +- gcc-4.9/gcc/config/rs6000/rs6000.opt | 4 + gcc-4.9/gcc/config/rs6000/rtems.h | 3 +- gcc-4.9/gcc/config/rs6000/sysv4.h | 6 - gcc-4.9/gcc/config/rs6000/t-rtems | 73 +- gcc-4.9/gcc/config/rs6000/vsx.md | 227 +++- gcc-4.9/gcc/config/rs6000/xcoff.h | 7 +- gcc-4.9/gcc/config/rx/rx.c | 2 +- gcc-4.9/gcc/config/rx/rx.h | 6 +- gcc-4.9/gcc/config/s390/s390.c | 19 +- gcc-4.9/gcc/config/s390/s390.md | 2 +- gcc-4.9/gcc/config/sh/predicates.md | 2 +- gcc-4.9/gcc/config/sh/sh-mem.cc | 24 +- gcc-4.9/gcc/config/sh/sh.c | 2 +- gcc-4.9/gcc/config/sh/sh.md | 47 +- gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc | 24 +- gcc-4.9/gcc/config/sh/sh_treg_combine.cc | 49 +- gcc-4.9/gcc/config/sh/sync.md | 18 +- gcc-4.9/gcc/config/sparc/leon.md | 14 +- gcc-4.9/gcc/config/sparc/linux.h | 6 - gcc-4.9/gcc/config/sparc/linux64.h | 6 - gcc-4.9/gcc/config/sparc/sparc-opts.h | 1 + gcc-4.9/gcc/config/sparc/sparc.c | 40 +- gcc-4.9/gcc/config/sparc/sparc.h | 51 +- gcc-4.9/gcc/config/sparc/sparc.md | 1 + gcc-4.9/gcc/config/sparc/sparc.opt | 3 + gcc-4.9/gcc/config/sparc/t-rtems | 13 +- 76 files changed, 3459 insertions(+), 1058 deletions(-) (limited to 'gcc-4.9/gcc/config') diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c index 55cfe0ab2..a5af874bf 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c @@ -371,6 +371,12 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { enum aarch64_builtins { AARCH64_BUILTIN_MIN, + + AARCH64_BUILTIN_GET_FPCR, + AARCH64_BUILTIN_SET_FPCR, + AARCH64_BUILTIN_GET_FPSR, + AARCH64_BUILTIN_SET_FPSR, + AARCH64_SIMD_BUILTIN_BASE, #include "aarch64-simd-builtins.def" AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE @@ -752,6 +758,24 @@ aarch64_init_simd_builtins (void) void aarch64_init_builtins (void) { + tree ftype_set_fpr + = build_function_type_list (void_type_node, unsigned_type_node, NULL); + tree ftype_get_fpr + = build_function_type_list (unsigned_type_node, NULL); + + aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] + = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, + AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] + = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, + AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] + = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, + AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] + = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, + AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); + if (TARGET_SIMD) aarch64_init_simd_builtins (); } @@ -964,6 +988,36 @@ aarch64_expand_builtin (tree exp, { tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); int fcode = DECL_FUNCTION_CODE (fndecl); + int icode; + rtx pat, op0; + tree arg0; + + switch (fcode) + { + case AARCH64_BUILTIN_GET_FPCR: + case AARCH64_BUILTIN_SET_FPCR: + case AARCH64_BUILTIN_GET_FPSR: + case AARCH64_BUILTIN_SET_FPSR: + if ((fcode == AARCH64_BUILTIN_GET_FPCR) + || (fcode == AARCH64_BUILTIN_GET_FPSR)) + { + icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ? + CODE_FOR_get_fpsr : CODE_FOR_get_fpcr; + target = gen_reg_rtx (SImode); + pat = GEN_FCN (icode) (target); + } + else + { + target = NULL_RTX; + icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ? + CODE_FOR_set_fpsr : CODE_FOR_set_fpcr; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + pat = GEN_FCN (icode) (op0); + } + emit_insn (pat); + return target; + } if (fcode >= AARCH64_SIMD_BUILTIN_BASE) return aarch64_simd_expand_builtin (fcode, exp, target); @@ -1196,6 +1250,106 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) return changed; } +void +aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + const unsigned AARCH64_FE_INVALID = 1; + const unsigned AARCH64_FE_DIVBYZERO = 2; + const unsigned AARCH64_FE_OVERFLOW = 4; + const unsigned AARCH64_FE_UNDERFLOW = 8; + const unsigned AARCH64_FE_INEXACT = 16; + const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID + | AARCH64_FE_DIVBYZERO + | AARCH64_FE_OVERFLOW + | AARCH64_FE_UNDERFLOW + | AARCH64_FE_INEXACT); + const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8; + tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr; + tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr; + tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr; + tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv; + + /* Generate the equivalence of : + unsigned int fenv_cr; + fenv_cr = __builtin_aarch64_get_fpcr (); + + unsigned int fenv_sr; + fenv_sr = __builtin_aarch64_get_fpsr (); + + Now set all exceptions to non-stop + unsigned int mask_cr + = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT); + unsigned int masked_cr; + masked_cr = fenv_cr & mask_cr; + + And clear all exception flags + unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT; + unsigned int masked_cr; + masked_sr = fenv_sr & mask_sr; + + __builtin_aarch64_set_cr (masked_cr); + __builtin_aarch64_set_sr (masked_sr); */ + + fenv_cr = create_tmp_var (unsigned_type_node, NULL); + fenv_sr = create_tmp_var (unsigned_type_node, NULL); + + get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]; + set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]; + get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]; + set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]; + + mask_cr = build_int_cst (unsigned_type_node, + ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT)); + mask_sr = build_int_cst (unsigned_type_node, + ~(AARCH64_FE_ALL_EXCEPT)); + + ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node, + fenv_cr, build_call_expr (get_fpcr, 0)); + ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node, + fenv_sr, build_call_expr (get_fpsr, 0)); + + masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr); + masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr); + + hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr); + hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr); + + hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr, + hold_fnclex_sr); + masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr, + masked_fenv_sr); + ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr); + + *hold = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), + hold_fnclex); + + /* Store the value of masked_fenv to clear the exceptions: + __builtin_aarch64_set_fpsr (masked_fenv_sr); */ + + *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr); + + /* Generate the equivalent of : + unsigned int new_fenv_var; + new_fenv_var = __builtin_aarch64_get_fpsr (); + + __builtin_aarch64_set_fpsr (fenv_sr); + + __atomic_feraiseexcept (new_fenv_var); */ + + new_fenv_var = create_tmp_var (unsigned_type_node, NULL); + reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, + new_fenv_var, build_call_expr (get_fpsr, 0)); + restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr); + atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); + update_call = build_call_expr (atomic_feraiseexcept, 1, + fold_convert (integer_type_node, new_fenv_var)); + *update = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, + reload_fenv, restore_fnenv), update_call); +} + + #undef AARCH64_CHECK_BUILTIN_MODE #undef AARCH64_FIND_FRINT_VARIANT #undef BUILTIN_DX diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h index eafdd551d..bb5c88d53 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h @@ -33,6 +33,14 @@ " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" #endif +#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT +#define CA53_ERR_835769_SPEC \ + " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" +#else +#define CA53_ERR_835769_SPEC \ + " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" +#endif + #ifndef LINK_SPEC #define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X \ -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h index b77becd23..651abe3ce 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h @@ -50,7 +50,16 @@ #define LINUX_TARGET_LINK_SPEC LINUX_TARGET_LINK_SPEC0 CA53_ERR_835769_SPEC -#define LINK_SPEC LINUX_TARGET_LINK_SPEC +#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT +#define CA53_ERR_835769_SPEC \ + " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" +#else +#define CA53_ERR_835769_SPEC \ + " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" +#endif + +#define LINK_SPEC LINUX_TARGET_LINK_SPEC \ + CA53_ERR_835769_SPEC #define TARGET_OS_CPP_BUILTINS() \ do \ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h index bef58bf71..8b0a70538 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h @@ -291,4 +291,5 @@ extern bool aarch64_madd_needs_nop (rtx); extern void aarch64_final_prescan_insn (rtx); extern bool aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); +void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); #endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md index 851e77a02..7626ed31f 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md @@ -934,6 +934,41 @@ [(set_attr "type" "neon_minmax")] ) +(define_expand "v2di3" + [(parallel [ + (set (match_operand:V2DI 0 "register_operand" "") + (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "") + (match_operand:V2DI 2 "register_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_SIMD" +{ + enum rtx_code cmp_operator; + rtx cmp_fmt; + + switch () + { + case UMIN: + cmp_operator = LTU; + break; + case SMIN: + cmp_operator = LT; + break; + case UMAX: + cmp_operator = GTU; + break; + case SMAX: + cmp_operator = GT; + break; + default: + gcc_unreachable (); + } + + cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); + emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1], + operands[2], cmp_fmt, operands[1], operands[2])); + DONE; +}) + ;; vec_concat gives a new vector with the low elements from operand 1, and ;; the high elements from operand 2. That is to say, given op1 = { a, b } ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. @@ -4565,8 +4600,8 @@ }) (define_insn "*aarch64_simd_ld1r" - [(set (match_operand:VALLDI 0 "register_operand" "=w") - (vec_duplicate:VALLDI + [(set (match_operand:VALL 0 "register_operand" "=w") + (vec_duplicate:VALL (match_operand: 1 "aarch64_simd_struct_operand" "Utv")))] "TARGET_SIMD" "ld1r\\t{%0.}, %1" diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c index 2ff6c7cb8..029c54ca3 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64.c @@ -3874,7 +3874,7 @@ aarch64_print_operand_address (FILE *f, rtx x) switch (GET_CODE (x)) { case PRE_INC: - asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], GET_MODE_SIZE (aarch64_memory_reference_mode)); return; case POST_INC: @@ -5152,7 +5152,6 @@ aarch64_parse_cpu (void) if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0) { selected_cpu = cpu; - selected_tune = cpu; aarch64_isa_flags = selected_cpu->flags; if (ext != NULL) @@ -5248,9 +5247,8 @@ aarch64_override_options (void) gcc_assert (selected_cpu); - /* The selected cpu may be an architecture, so lookup tuning by core ID. */ if (!selected_tune) - selected_tune = &all_cores[selected_cpu->core]; + selected_tune = selected_cpu; aarch64_tune_flags = selected_tune->flags; aarch64_tune = selected_tune->core; @@ -7194,7 +7192,7 @@ aarch64_expand_vector_init (rtx target, rtx vals) x = XVECEXP (vals, 0, 0); if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) n_var = 1, one_var = 0; - + for (i = 1; i < n_elts; ++i) { x = XVECEXP (vals, 0, i); @@ -8642,6 +8640,10 @@ aarch64_cannot_change_mode_class (enum machine_mode from, #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ aarch64_autovectorize_vector_sizes +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \ + aarch64_atomic_assign_expand_fenv + /* Section anchor support. */ #undef TARGET_MIN_ANCHOR_OFFSET diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md index 319f80591..05f5e1b35 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64.md @@ -107,6 +107,10 @@ (define_c_enum "unspecv" [ UNSPECV_EH_RETURN ; Represent EH_RETURN + UNSPECV_GET_FPCR ; Represent fetch of FPCR content. + UNSPECV_SET_FPCR ; Represent assign of FPCR content. + UNSPECV_GET_FPSR ; Represent fetch of FPSR content. + UNSPECV_SET_FPSR ; Represent assign of FPSR content. ] ) @@ -1102,7 +1106,7 @@ add\\t%x0, %x1, %x2 sub\\t%x0, %x1, #%n2 add\\t%d0, %d1, %d2" - [(set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg") + [(set_attr "type" "alu_imm,alu_reg,alu_imm,neon_add") (set_attr "simd" "*,*,*,yes")] ) @@ -2782,7 +2786,7 @@ ;; Logical right shift using SISD or Integer instruction (define_insn "*aarch64_lshr_sisd_or_int_3" - [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + [(set (match_operand:GPI 0 "register_operand" "=w,&w,r") (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "w,w,r") (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "Us,w,rUs")))] @@ -2801,11 +2805,13 @@ (match_operand:DI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))] - "" + (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_USHL))] + { + operands[3] = gen_lowpart (QImode, operands[0]); + } ) (define_split @@ -2814,11 +2820,13 @@ (match_operand:SI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))] - "" + (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_USHL_2S))] + { + operands[3] = gen_lowpart (QImode, operands[0]); + } ) ;; Arithmetic right shift using SISD or Integer instruction @@ -3642,6 +3650,37 @@ DONE; }) +;; Write Floating-point Control Register. +(define_insn "set_fpcr" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)] + "" + "msr\\tfpcr, %0\;isb" + [(set_attr "type" "mrs")]) + +;; Read Floating-point Control Register. +(define_insn "get_fpcr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))] + "" + "mrs\\t%0, fpcr" + [(set_attr "type" "mrs")]) + +;; Write Floating-point Status Register. +(define_insn "set_fpsr" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)] + "" + "msr\\tfpsr, %0" + [(set_attr "type" "mrs")]) + +;; Read Floating-point Status Register. +(define_insn "get_fpsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))] + "" + "mrs\\t%0, fpsr" + [(set_attr "type" "mrs")]) + + ;; AdvSIMD Stuff (include "aarch64-simd.md") diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h index c01669b2c..ae0ae9c1b 100644 --- a/gcc-4.9/gcc/config/aarch64/arm_neon.h +++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h @@ -39,9 +39,6 @@ typedef __builtin_aarch64_simd_hi int16x4_t typedef __builtin_aarch64_simd_si int32x2_t __attribute__ ((__vector_size__ (8))); typedef int64_t int64x1_t; -typedef int32_t int32x1_t; -typedef int16_t int16x1_t; -typedef int8_t int8x1_t; typedef double float64x1_t; typedef __builtin_aarch64_simd_sf float32x2_t __attribute__ ((__vector_size__ (8))); @@ -56,9 +53,6 @@ typedef __builtin_aarch64_simd_uhi uint16x4_t typedef __builtin_aarch64_simd_usi uint32x2_t __attribute__ ((__vector_size__ (8))); typedef uint64_t uint64x1_t; -typedef uint32_t uint32x1_t; -typedef uint16_t uint16x1_t; -typedef uint8_t uint8x1_t; typedef __builtin_aarch64_simd_qi int8x16_t __attribute__ ((__vector_size__ (16))); typedef __builtin_aarch64_simd_hi int16x8_t @@ -8400,7 +8394,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) #define vmull_high_lane_s16(a, b, c) \ __extension__ \ ({ \ - int16x8_t b_ = (b); \ + int16x4_t b_ = (b); \ int16x8_t a_ = (a); \ int32x4_t result; \ __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ @@ -8413,7 +8407,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) #define vmull_high_lane_s32(a, b, c) \ __extension__ \ ({ \ - int32x4_t b_ = (b); \ + int32x2_t b_ = (b); \ int32x4_t a_ = (a); \ int64x2_t result; \ __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ @@ -8426,7 +8420,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) #define vmull_high_lane_u16(a, b, c) \ __extension__ \ ({ \ - uint16x8_t b_ = (b); \ + uint16x4_t b_ = (b); \ uint16x8_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ @@ -8439,7 +8433,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) #define vmull_high_lane_u32(a, b, c) \ __extension__ \ ({ \ - uint32x4_t b_ = (b); \ + uint32x2_t b_ = (b); \ uint32x4_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ @@ -20925,42 +20919,42 @@ vqabsq_s64 (int64x2_t __a) return (int64x2_t) __builtin_aarch64_sqabsv2di (__a); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqabsb_s8 (int8x1_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqabsb_s8 (int8_t __a) { - return (int8x1_t) __builtin_aarch64_sqabsqi (__a); + return (int8_t) __builtin_aarch64_sqabsqi (__a); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqabsh_s16 (int16x1_t __a) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqabsh_s16 (int16_t __a) { - return (int16x1_t) __builtin_aarch64_sqabshi (__a); + return (int16_t) __builtin_aarch64_sqabshi (__a); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqabss_s32 (int32x1_t __a) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqabss_s32 (int32_t __a) { - return (int32x1_t) __builtin_aarch64_sqabssi (__a); + return (int32_t) __builtin_aarch64_sqabssi (__a); } /* vqadd */ -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqaddb_s8 (int8x1_t __a, int8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqaddb_s8 (int8_t __a, int8_t __b) { - return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b); + return (int8_t) __builtin_aarch64_sqaddqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqaddh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqaddh_s16 (int16_t __a, int16_t __b) { - return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b); + return (int16_t) __builtin_aarch64_sqaddhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqadds_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqadds_s32 (int32_t __a, int32_t __b) { - return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b); + return (int32_t) __builtin_aarch64_sqaddsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -20969,22 +20963,22 @@ vqaddd_s64 (int64x1_t __a, int64x1_t __b) return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqaddb_u8 (uint8x1_t __a, uint8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqaddb_u8 (uint8_t __a, uint8_t __b) { - return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqaddh_u16 (uint16x1_t __a, uint16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqaddh_u16 (uint16_t __a, uint16_t __b) { - return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqadds_u32 (uint32x1_t __a, uint32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqadds_u32 (uint32_t __a, uint32_t __b) { - return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -21095,26 +21089,26 @@ vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c) { return __builtin_aarch64_sqdmlalhi (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) +vqdmlals_s32 (int64x1_t __a, int32_t __b, int32_t __c) { return __builtin_aarch64_sqdmlalsi (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) +vqdmlals_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d); } @@ -21221,26 +21215,26 @@ vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c) { return __builtin_aarch64_sqdmlslhi (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) +vqdmlsls_s32 (int64x1_t __a, int32_t __b, int32_t __c) { return __builtin_aarch64_sqdmlslsi (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) +vqdmlsls_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d); } @@ -21271,26 +21265,26 @@ vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqdmulhh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqdmulhh_s16 (int16_t __a, int16_t __b) { - return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b); + return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmulhs_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmulhs_s32 (int32_t __a, int32_t __b) { - return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b); + return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); } @@ -21393,26 +21387,26 @@ vqdmull_n_s32 (int32x2_t __a, int32_t __b) return __builtin_aarch64_sqdmull_nv2si (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmullh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmullh_s16 (int16_t __a, int16_t __b) { - return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b); + return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmulls_s32 (int32x1_t __a, int32x1_t __b) +vqdmulls_s32 (int32_t __a, int32_t __b) { return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) +vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); } @@ -21455,40 +21449,40 @@ vqmovn_u64 (uint64x2_t __a) return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqmovnh_s16 (int16x1_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqmovnh_s16 (int16_t __a) { - return (int8x1_t) __builtin_aarch64_sqmovnhi (__a); + return (int8_t) __builtin_aarch64_sqmovnhi (__a); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqmovns_s32 (int32x1_t __a) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqmovns_s32 (int32_t __a) { - return (int16x1_t) __builtin_aarch64_sqmovnsi (__a); + return (int16_t) __builtin_aarch64_sqmovnsi (__a); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqmovnd_s64 (int64x1_t __a) { - return (int32x1_t) __builtin_aarch64_sqmovndi (__a); + return (int32_t) __builtin_aarch64_sqmovndi (__a); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqmovnh_u16 (uint16x1_t __a) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqmovnh_u16 (uint16_t __a) { - return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a); + return (uint8_t) __builtin_aarch64_uqmovnhi (__a); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqmovns_u32 (uint32x1_t __a) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqmovns_u32 (uint32_t __a) { - return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a); + return (uint16_t) __builtin_aarch64_uqmovnsi (__a); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vqmovnd_u64 (uint64x1_t __a) { - return (uint32x1_t) __builtin_aarch64_uqmovndi (__a); + return (uint32_t) __builtin_aarch64_uqmovndi (__a); } /* vqmovun */ @@ -21511,22 +21505,22 @@ vqmovun_s64 (int64x2_t __a) return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqmovunh_s16 (int16x1_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqmovunh_s16 (int16_t __a) { - return (int8x1_t) __builtin_aarch64_sqmovunhi (__a); + return (int8_t) __builtin_aarch64_sqmovunhi (__a); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqmovuns_s32 (int32x1_t __a) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqmovuns_s32 (int32_t __a) { - return (int16x1_t) __builtin_aarch64_sqmovunsi (__a); + return (int16_t) __builtin_aarch64_sqmovunsi (__a); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqmovund_s64 (int64x1_t __a) { - return (int32x1_t) __builtin_aarch64_sqmovundi (__a); + return (int32_t) __builtin_aarch64_sqmovundi (__a); } /* vqneg */ @@ -21537,22 +21531,22 @@ vqnegq_s64 (int64x2_t __a) return (int64x2_t) __builtin_aarch64_sqnegv2di (__a); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqnegb_s8 (int8x1_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqnegb_s8 (int8_t __a) { - return (int8x1_t) __builtin_aarch64_sqnegqi (__a); + return (int8_t) __builtin_aarch64_sqnegqi (__a); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqnegh_s16 (int16x1_t __a) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqnegh_s16 (int16_t __a) { - return (int16x1_t) __builtin_aarch64_sqneghi (__a); + return (int16_t) __builtin_aarch64_sqneghi (__a); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqnegs_s32 (int32x1_t __a) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqnegs_s32 (int32_t __a) { - return (int32x1_t) __builtin_aarch64_sqnegsi (__a); + return (int32_t) __builtin_aarch64_sqnegsi (__a); } /* vqrdmulh */ @@ -21581,26 +21575,26 @@ vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmulhh_s16 (int16_t __a, int16_t __b) { - return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b); + return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmulhs_s32 (int32_t __a, int32_t __b) { - return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b); + return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); } @@ -21703,20 +21697,20 @@ vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqrshlb_s8 (int8x1_t __a, int8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqrshlb_s8 (int8_t __a, int8_t __b) { return __builtin_aarch64_sqrshlqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrshlh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrshlh_s16 (int16_t __a, int16_t __b) { return __builtin_aarch64_sqrshlhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqrshls_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrshls_s32 (int32_t __a, int32_t __b) { return __builtin_aarch64_sqrshlsi (__a, __b); } @@ -21727,22 +21721,22 @@ vqrshld_s64 (int64x1_t __a, int64x1_t __b) return __builtin_aarch64_sqrshldi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqrshlb_u8 (uint8_t __a, uint8_t __b) { - return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqrshlh_u16 (uint16_t __a, uint16_t __b) { - return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqrshls_u32 (uint32x1_t __a, uint32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqrshls_u32 (uint32_t __a, uint32_t __b) { - return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -21789,40 +21783,40 @@ vqrshrn_n_u64 (uint64x2_t __a, const int __b) return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqrshrnh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqrshrnh_n_s16 (int16_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b); + return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrshrns_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrshrns_n_s32 (int32_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b); + return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqrshrnd_n_s64 (int64x1_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b); + return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqrshrnh_n_u16 (uint16x1_t __a, const int __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqrshrnh_n_u16 (uint16_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b); + return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqrshrns_n_u32 (uint32x1_t __a, const int __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqrshrns_n_u32 (uint32_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b); + return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vqrshrnd_n_u64 (uint64x1_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b); + return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b); } /* vqrshrun */ @@ -21845,22 +21839,22 @@ vqrshrun_n_s64 (int64x2_t __a, const int __b) return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqrshrunh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqrshrunh_n_s16 (int16_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b); + return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrshruns_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrshruns_n_s32 (int32_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b); + return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqrshrund_n_s64 (int64x1_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b); + return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b); } /* vqshl */ @@ -21961,20 +21955,20 @@ vqshlq_u64 (uint64x2_t __a, int64x2_t __b) return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshlb_s8 (int8x1_t __a, int8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshlb_s8 (int8_t __a, int8_t __b) { return __builtin_aarch64_sqshlqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshlh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshlh_s16 (int16_t __a, int16_t __b) { return __builtin_aarch64_sqshlhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqshls_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqshls_s32 (int32_t __a, int32_t __b) { return __builtin_aarch64_sqshlsi (__a, __b); } @@ -21985,22 +21979,22 @@ vqshld_s64 (int64x1_t __a, int64x1_t __b) return __builtin_aarch64_sqshldi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqshlb_u8 (uint8x1_t __a, uint8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqshlb_u8 (uint8_t __a, uint8_t __b) { - return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqshlh_u16 (uint16x1_t __a, uint16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqshlh_u16 (uint16_t __a, uint16_t __b) { - return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqshls_u32 (uint32x1_t __a, uint32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqshls_u32 (uint32_t __a, uint32_t __b) { - return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -22105,22 +22099,22 @@ vqshlq_n_u64 (uint64x2_t __a, const int __b) return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshlb_n_s8 (int8x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshlb_n_s8 (int8_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b); + return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshlh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshlh_n_s16 (int16_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b); + return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqshls_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqshls_n_s32 (int32_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b); + return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -22129,22 +22123,22 @@ vqshld_n_s64 (int64x1_t __a, const int __b) return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqshlb_n_u8 (uint8x1_t __a, const int __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqshlb_n_u8 (uint8_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqshlh_n_u16 (uint16x1_t __a, const int __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqshlh_n_u16 (uint16_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqshls_n_u32 (uint32x1_t __a, const int __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqshls_n_u32 (uint32_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -22203,22 +22197,22 @@ vqshluq_n_s64 (int64x2_t __a, const int __b) return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshlub_n_s8 (int8x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshlub_n_s8 (int8_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b); + return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshluh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshluh_n_s16 (int16_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b); + return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqshlus_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqshlus_n_s32 (int32_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b); + return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -22265,40 +22259,40 @@ vqshrn_n_u64 (uint64x2_t __a, const int __b) return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshrnh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshrnh_n_s16 (int16_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b); + return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshrns_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshrns_n_s32 (int32_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b); + return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqshrnd_n_s64 (int64x1_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b); + return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqshrnh_n_u16 (uint16x1_t __a, const int __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqshrnh_n_u16 (uint16_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b); + return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqshrns_n_u32 (uint32x1_t __a, const int __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqshrns_n_u32 (uint32_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b); + return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vqshrnd_n_u64 (uint64x1_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b); + return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b); } /* vqshrun */ @@ -22321,42 +22315,42 @@ vqshrun_n_s64 (int64x2_t __a, const int __b) return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshrunh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshrunh_n_s16 (int16_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b); + return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshruns_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshruns_n_s32 (int32_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b); + return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqshrund_n_s64 (int64x1_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b); + return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b); } /* vqsub */ -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqsubb_s8 (int8x1_t __a, int8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqsubb_s8 (int8_t __a, int8_t __b) { - return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b); + return (int8_t) __builtin_aarch64_sqsubqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqsubh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqsubh_s16 (int16_t __a, int16_t __b) { - return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b); + return (int16_t) __builtin_aarch64_sqsubhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqsubs_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqsubs_s32 (int32_t __a, int32_t __b) { - return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b); + return (int32_t) __builtin_aarch64_sqsubsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -22365,22 +22359,22 @@ vqsubd_s64 (int64x1_t __a, int64x1_t __b) return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqsubb_u8 (uint8x1_t __a, uint8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqsubb_u8 (uint8_t __a, uint8_t __b) { - return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqsubh_u16 (uint16x1_t __a, uint16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqsubh_u16 (uint16_t __a, uint16_t __b) { - return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqsubs_u32 (uint32x1_t __a, uint32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqsubs_u32 (uint32_t __a, uint32_t __b) { - return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -23596,22 +23590,22 @@ vsqaddq_u64 (uint64x2_t __a, int64x2_t __b) (int64x2_t) __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vsqaddb_u8 (uint8x1_t __a, int8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vsqaddb_u8 (uint8_t __a, int8_t __b) { - return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b); + return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vsqaddh_u16 (uint16x1_t __a, int16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vsqaddh_u16 (uint16_t __a, int16_t __b) { - return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b); + return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vsqadds_u32 (uint32x1_t __a, int32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vsqadds_u32 (uint32_t __a, int32_t __b) { - return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b); + return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -25251,22 +25245,22 @@ vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vuqaddb_s8 (int8x1_t __a, uint8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vuqaddb_s8 (int8_t __a, uint8_t __b) { - return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b); + return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vuqaddh_s16 (int16x1_t __a, uint16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vuqaddh_s16 (int16_t __a, uint16_t __b) { - return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b); + return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vuqadds_s32 (int32x1_t __a, uint32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vuqadds_s32 (int32_t __a, uint32_t __b) { - return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b); + return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c index d5c7908be..19ae3665a 100644 --- a/gcc-4.9/gcc/config/alpha/alpha.c +++ b/gcc-4.9/gcc/config/alpha/alpha.c @@ -9918,12 +9918,6 @@ alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1, #undef TARGET_EXPAND_BUILTIN_VA_START #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start -/* The Alpha architecture does not require sequential consistency. See - http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html - for an example of how it can be violated in practice. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE alpha_option_override diff --git a/gcc-4.9/gcc/config/alpha/alpha.md b/gcc-4.9/gcc/config/alpha/alpha.md index 795b4df3f..1179d572d 100644 --- a/gcc-4.9/gcc/config/alpha/alpha.md +++ b/gcc-4.9/gcc/config/alpha/alpha.md @@ -5984,16 +5984,38 @@ [(set_attr "type" "jsr") (set_attr "length" "*,*,8")]) -(define_insn_and_split "call_value_osf_tlsgd" +(define_int_iterator TLS_CALL + [UNSPEC_TLSGD_CALL + UNSPEC_TLSLDM_CALL]) + +(define_int_attr tls + [(UNSPEC_TLSGD_CALL "tlsgd") + (UNSPEC_TLSLDM_CALL "tlsldm")]) + +(define_insn "call_value_osf_" [(set (match_operand 0) (call (mem:DI (match_operand:DI 1 "symbolic_operand")) (const_int 0))) - (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSGD_CALL) + (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL) (use (reg:DI 29)) (clobber (reg:DI 26))] "HAVE_AS_TLS" - "#" - "&& reload_completed" + "ldq $27,%1($29)\t\t!literal!%2\;jsr $26,($27),%1\t\t!lituse_!%2\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "jsr") + (set_attr "length" "16")]) + +;; We must use peep2 instead of a split because we need accurate life +;; information for $gp. +(define_peephole2 + [(parallel + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "HAVE_AS_TLS && reload_completed + && peep2_regno_dead_p (1, 29)" [(set (match_dup 3) (unspec:DI [(match_dup 5) (match_dup 1) @@ -6001,10 +6023,9 @@ (parallel [(set (match_dup 0) (call (mem:DI (match_dup 3)) (const_int 0))) - (set (match_dup 5) - (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 5)) (use (match_dup 1)) - (use (unspec [(match_dup 2)] UNSPEC_TLSGD_CALL)) + (use (unspec [(match_dup 2)] TLS_CALL)) (clobber (reg:DI 26))]) (set (match_dup 5) (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] @@ -6012,19 +6033,18 @@ operands[3] = gen_rtx_REG (Pmode, 27); operands[4] = GEN_INT (alpha_next_sequence_number++); operands[5] = pic_offset_table_rtx; -} - [(set_attr "type" "multi")]) +}) -(define_insn_and_split "call_value_osf_tlsldm" - [(set (match_operand 0) - (call (mem:DI (match_operand:DI 1 "symbolic_operand")) - (const_int 0))) - (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSLDM_CALL) - (use (reg:DI 29)) - (clobber (reg:DI 26))] - "HAVE_AS_TLS" - "#" - "&& reload_completed" +(define_peephole2 + [(parallel + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "HAVE_AS_TLS && reload_completed + && !peep2_regno_dead_p (1, 29)" [(set (match_dup 3) (unspec:DI [(match_dup 5) (match_dup 1) @@ -6035,7 +6055,7 @@ (set (match_dup 5) (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1)) (use (match_dup 1)) - (use (unspec [(match_dup 2)] UNSPEC_TLSLDM_CALL)) + (use (unspec [(match_dup 2)] TLS_CALL)) (clobber (reg:DI 26))]) (set (match_dup 5) (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] @@ -6043,8 +6063,7 @@ operands[3] = gen_rtx_REG (Pmode, 27); operands[4] = GEN_INT (alpha_next_sequence_number++); operands[5] = pic_offset_table_rtx; -} - [(set_attr "type" "multi")]) +}) (define_insn "*call_value_osf_1" [(set (match_operand 0) diff --git a/gcc-4.9/gcc/config/arm/arm-protos.h b/gcc-4.9/gcc/config/arm/arm-protos.h index 13874ee6e..2ac3b3009 100644 --- a/gcc-4.9/gcc/config/arm/arm-protos.h +++ b/gcc-4.9/gcc/config/arm/arm-protos.h @@ -56,6 +56,7 @@ extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, extern int legitimate_pic_operand_p (rtx); extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx); extern rtx legitimize_tls_address (rtx, rtx); +extern bool arm_legitimate_address_p (enum machine_mode, rtx, bool); extern int arm_legitimate_address_outer_p (enum machine_mode, rtx, RTX_CODE, int); extern int thumb_legitimate_offset_p (enum machine_mode, HOST_WIDE_INT); extern bool arm_legitimize_reload_address (rtx *, enum machine_mode, int, int, @@ -294,4 +295,6 @@ extern void arm_emit_eabi_attribute (const char *, int, int); /* Defined in gcc/common/config/arm-common.c. */ extern const char *arm_rewrite_selected_cpu (const char *name); +extern bool arm_is_constant_pool_ref (rtx); + #endif /* ! GCC_ARM_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c index 3c237cb6d..b79bb48b1 100644 --- a/gcc-4.9/gcc/config/arm/arm.c +++ b/gcc-4.9/gcc/config/arm/arm.c @@ -89,7 +89,6 @@ static rtx arm_legitimize_address (rtx, rtx, enum machine_mode); static reg_class_t arm_preferred_reload_class (rtx, reg_class_t); static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode); inline static int thumb1_index_register_rtx_p (rtx, int); -static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); static int thumb_far_jump_used_p (void); static bool thumb_force_lr_save (void); static unsigned arm_size_return_regs (void); @@ -13952,9 +13951,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, HOST_WIDE_INT srcoffset, dstoffset; HOST_WIDE_INT src_autoinc, dst_autoinc; rtx mem, addr; - + gcc_assert (1 <= interleave_factor && interleave_factor <= 4); - + /* Use hard registers if we have aligned source or destination so we can use load/store multiple with contiguous registers. */ if (dst_aligned || src_aligned) @@ -13968,7 +13967,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, src = copy_addr_to_reg (XEXP (srcbase, 0)); srcoffset = dstoffset = 0; - + /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST. For copying the last bytes we want to subtract this offset again. */ src_autoinc = dst_autoinc = 0; @@ -14022,14 +14021,14 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, remaining -= block_size_bytes; } - + /* Copy any whole words left (note these aren't interleaved with any subsequent halfword/byte load/stores in the interests of simplicity). */ - + words = remaining / UNITS_PER_WORD; gcc_assert (words < interleave_factor); - + if (src_aligned && words > 1) { emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase, @@ -14069,11 +14068,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, } remaining -= words * UNITS_PER_WORD; - + gcc_assert (remaining < 4); - + /* Copy a halfword if necessary. */ - + if (remaining >= 2) { halfword_tmp = gen_reg_rtx (SImode); @@ -14097,11 +14096,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, remaining -= 2; srcoffset += 2; } - + gcc_assert (remaining < 2); - + /* Copy last byte. */ - + if ((remaining & 1) != 0) { byte_tmp = gen_reg_rtx (SImode); @@ -14122,9 +14121,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, remaining--; srcoffset++; } - + /* Store last halfword if we haven't done so already. */ - + if (halfword_tmp) { addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); @@ -14143,7 +14142,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); dstoffset++; } - + gcc_assert (remaining == 0 && srcoffset == dstoffset); } @@ -14162,7 +14161,7 @@ arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg, rtx *loop_mem) { *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); - + /* Although the new mem does not refer to a known location, it does keep up to LENGTH bytes of alignment. */ *loop_mem = change_address (mem, BLKmode, *loop_reg); @@ -14182,14 +14181,14 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, { rtx label, src_reg, dest_reg, final_src, test; HOST_WIDE_INT leftover; - + leftover = length % bytes_per_iter; length -= leftover; - + /* Create registers and memory references for use within the loop. */ arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); - + /* Calculate the value that SRC_REG should have after the last iteration of the loop. */ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), @@ -14198,7 +14197,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, /* Emit the start of the loop. */ label = gen_label_rtx (); emit_label (label); - + /* Emit the loop body. */ arm_block_move_unaligned_straight (dest, src, bytes_per_iter, interleave_factor); @@ -14206,11 +14205,11 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, /* Move on to the next block. */ emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); - + /* Emit the loop condition. */ test = gen_rtx_NE (VOIDmode, src_reg, final_src); emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); - + /* Mop up any left-over bytes. */ if (leftover) arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor); @@ -14224,7 +14223,7 @@ static int arm_movmemqi_unaligned (rtx *operands) { HOST_WIDE_INT length = INTVAL (operands[2]); - + if (optimize_size) { bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD; @@ -14235,7 +14234,7 @@ arm_movmemqi_unaligned (rtx *operands) resulting code can be smaller. */ unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1; HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4; - + if (length > 12) arm_block_move_unaligned_loop (operands[0], operands[1], length, interleave_factor, bytes_per_iter); @@ -14253,7 +14252,7 @@ arm_movmemqi_unaligned (rtx *operands) else arm_block_move_unaligned_straight (operands[0], operands[1], length, 4); } - + return 1; } @@ -28520,7 +28519,11 @@ arm_set_return_address (rtx source, rtx scratch) addr = plus_constant (Pmode, addr, delta); } - emit_move_insn (gen_frame_mem (Pmode, addr), source); + /* The store needs to be marked as frame related in order to prevent + DSE from deleting it as dead if it is based on fp. */ + rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM)); } } @@ -28572,7 +28575,11 @@ thumb_set_return_address (rtx source, rtx scratch) else addr = plus_constant (Pmode, addr, delta); - emit_move_insn (gen_frame_mem (Pmode, addr), source); + /* The store needs to be marked as frame related in order to prevent + DSE from deleting it as dead if it is based on fp. */ + rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM)); } else emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source); @@ -29828,10 +29835,10 @@ int vfp3_const_double_for_fract_bits (rtx operand) { REAL_VALUE_TYPE r0; - + if (!CONST_DOUBLE_P (operand)) return 0; - + REAL_VALUE_FROM_CONST_DOUBLE (r0, operand); if (exact_real_inverse (DFmode, &r0)) { @@ -30825,7 +30832,7 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code) else return false; } - + return true; case ARM_POST_DEC: @@ -30842,10 +30849,10 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code) return false; return true; - + default: return false; - + } return false; @@ -30856,7 +30863,7 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code) Additionally, the default expansion code is not available or suitable for post-reload insn splits (this can occur when the register allocator chooses not to do a shift in NEON). - + This function is used in both initial expand and post-reload splits, and handles all kinds of 64-bit shifts. @@ -31109,7 +31116,7 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) { enum rtx_code code = GET_CODE (*comparison); int code_int; - enum machine_mode mode = (GET_MODE (*op1) == VOIDmode) + enum machine_mode mode = (GET_MODE (*op1) == VOIDmode) ? GET_MODE (*op2) : GET_MODE (*op1); gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode); @@ -31163,7 +31170,7 @@ arm_asan_shadow_offset (void) /* This is a temporary fix for PR60655. Ideally we need to handle most of these cases in the generic part but - currently we reject minus (..) (sym_ref). We try to + currently we reject minus (..) (sym_ref). We try to ameliorate the case with minus (sym_ref1) (sym_ref2) where they are in the same section. */ @@ -31393,4 +31400,13 @@ arm_load_global_address (rtx symbol, rtx offset_reg, df_insn_rescan (load_insn); } +/* return TRUE if x is a reference to a value in a constant pool */ +extern bool +arm_is_constant_pool_ref (rtx x) +{ + return (MEM_P (x) + && GET_CODE (XEXP (x, 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))); +} + #include "gt-arm.h" diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h index ab5167a8b..433a3dd77 100644 --- a/gcc-4.9/gcc/config/arm/arm.h +++ b/gcc-4.9/gcc/config/arm/arm.h @@ -74,8 +74,8 @@ extern char arm_arch_name[]; builtin_define_with_int_value ( \ "__ARM_SIZEOF_MINIMAL_ENUM", \ flag_short_enums ? 1 : 4); \ - builtin_define_with_int_value ( \ - "__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE); \ + builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T", \ + wchar_type_node); \ if (TARGET_ARM_ARCH_PROFILE) \ builtin_define_with_int_value ( \ "__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE); \ @@ -2139,9 +2139,10 @@ extern int making_const_table; ? reverse_condition_maybe_unordered (code) \ : reverse_condition (code)) -/* The arm5 clz instruction returns 32. */ -#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) -#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE)) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE)) #define CC_STATUS_INIT \ do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0) diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md index 467f9ce4e..1153a1e34 100644 --- a/gcc-4.9/gcc/config/arm/arm.md +++ b/gcc-4.9/gcc/config/arm/arm.md @@ -127,9 +127,10 @@ ; This can be "a" for ARM, "t" for either of the Thumbs, "32" for ; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode. "v6" ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without -; arm_arch6. This attribute is used to compute attribute "enabled", -; use type "any" to enable an alternative in all cases. -(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2" +; arm_arch6. "v6t2" for Thumb-2 with arm_arch6. This attribute is +; used to compute attribute "enabled", use type "any" to enable an +; alternative in all cases. +(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2" (const_string "any")) (define_attr "arch_enabled" "no,yes" @@ -164,6 +165,10 @@ (match_test "TARGET_32BIT && !arm_arch6")) (const_string "yes") + (and (eq_attr "arch" "v6t2") + (match_test "TARGET_32BIT && arm_arch6 && arm_arch_thumb2")) + (const_string "yes") + (and (eq_attr "arch" "avoid_neon_for_64bits") (match_test "TARGET_NEON") (not (match_test "TARGET_PREFER_NEON_64BITS"))) @@ -3631,7 +3636,7 @@ [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "s_register_operand" "r")])) (clobber (reg:CC CC_REGNUM))] - "TARGET_32BIT && optimize_function_for_size_p (cfun)" + "TARGET_32BIT && optimize_function_for_size_p (cfun) && !arm_restrict_it" "* operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, operands[1], operands[2]); @@ -4374,7 +4379,7 @@ (define_insn "unaligned_loadhis" [(set (match_operand:SI 0 "s_register_operand" "=l,r") (sign_extend:SI - (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] + (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,Uh")] UNSPEC_UNALIGNED_LOAD)))] "unaligned_access && TARGET_32BIT" "ldr%(sh%)\t%0, %1\t@ unaligned" @@ -5287,7 +5292,7 @@ (define_insn "*arm_zero_extendhisi2_v6" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))] "TARGET_ARM && arm_arch6" "@ uxth%?\\t%0, %1 @@ -5381,7 +5386,7 @@ (define_insn "*arm_zero_extendqisi2_v6" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,Uh")))] "TARGET_ARM && arm_arch6" "@ uxtb%(%)\\t%0, %1 @@ -5615,31 +5620,27 @@ (define_insn "*arm_extendhisi2" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))] "TARGET_ARM && arm_arch4 && !arm_arch6" "@ # ldr%(sh%)\\t%0, %1" [(set_attr "length" "8,4") (set_attr "type" "alu_shift_reg,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) ;; ??? Check Thumb-2 pool range (define_insn "*arm_extendhisi2_v6" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))] "TARGET_32BIT && arm_arch6" "@ sxth%?\\t%0, %1 ldr%(sh%)\\t%0, %1" [(set_attr "type" "extend,load_byte") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable_short_it" "no")] ) (define_insn "*arm_extendhisi2addsi" @@ -5682,9 +5683,7 @@ "TARGET_ARM && arm_arch4" "ldr%(sb%)\\t%0, %1" [(set_attr "type" "load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "256") - (set_attr "neg_pool_range" "244")] + (set_attr "predicable" "yes")] ) (define_expand "extendqisi2" @@ -5724,9 +5723,7 @@ ldr%(sb%)\\t%0, %1" [(set_attr "length" "8,4") (set_attr "type" "alu_shift_reg,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) (define_insn "*arm_extendqisi_v6" @@ -5738,9 +5735,7 @@ sxtb%?\\t%0, %1 ldr%(sb%)\\t%0, %1" [(set_attr "type" "extend,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) (define_insn "*arm_extendqisi2addsi" @@ -6973,8 +6968,8 @@ ;; Pattern to recognize insn generated default case above (define_insn "*movhi_insn_arch4" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r") - (match_operand:HI 1 "general_operand" "rI,K,r,mi"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m,r") + (match_operand:HI 1 "general_operand" "rI,K,n,r,mi"))] "TARGET_ARM && arm_arch4 && (register_operand (operands[0], HImode) @@ -6982,16 +6977,19 @@ "@ mov%?\\t%0, %1\\t%@ movhi mvn%?\\t%0, #%B1\\t%@ movhi + movw%?\\t%0, %L1\\t%@ movhi str%(h%)\\t%1, %0\\t%@ movhi ldr%(h%)\\t%0, %1\\t%@ movhi" [(set_attr "predicable" "yes") - (set_attr "pool_range" "*,*,*,256") - (set_attr "neg_pool_range" "*,*,*,244") + (set_attr "pool_range" "*,*,*,*,256") + (set_attr "neg_pool_range" "*,*,*,*,244") + (set_attr "arch" "*,*,v6t2,*,*") (set_attr_alternative "type" [(if_then_else (match_operand 1 "const_int_operand" "") (const_string "mov_imm" ) (const_string "mov_reg")) (const_string "mvn_imm") + (const_string "mov_imm") (const_string "store1") (const_string "load1")])] ) @@ -10944,10 +10942,16 @@ enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[5]), operands[3], operands[4]); enum rtx_code rc = GET_CODE (operands[5]); - operands[6] = gen_rtx_REG (mode, CC_REGNUM); gcc_assert (!(mode == CCFPmode || mode == CCFPEmode)); - rc = reverse_condition (rc); + if (REGNO (operands[2]) != REGNO (operands[0])) + rc = reverse_condition (rc); + else + { + rtx tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } operands[6] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx); } diff --git a/gcc-4.9/gcc/config/arm/constraints.md b/gcc-4.9/gcc/config/arm/constraints.md index 85dd116ce..f848664d5 100644 --- a/gcc-4.9/gcc/config/arm/constraints.md +++ b/gcc-4.9/gcc/config/arm/constraints.md @@ -36,7 +36,7 @@ ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py ;; The following memory constraints have been used: -;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us +;; in ARM/Thumb-2 state: Q, Uh, Ut, Uv, Uy, Un, Um, Us ;; in ARM state: Uq ;; in Thumb state: Uu, Uw @@ -348,6 +348,12 @@ An address valid for loading/storing register exclusive" (match_operand 0 "mem_noofs_operand")) +(define_memory_constraint "Uh" + "@internal + An address suitable for byte and half-word loads which does not point inside a constant pool" + (and (match_code "mem") + (match_test "arm_legitimate_address_p (GET_MODE (op), XEXP (op, 0), false) && !arm_is_constant_pool_ref (op)"))) + (define_memory_constraint "Ut" "@internal In ARM/Thumb-2 state an address valid for loading/storing opaque structure @@ -394,7 +400,8 @@ (and (match_code "mem") (match_test "TARGET_ARM && arm_legitimate_address_outer_p (GET_MODE (op), XEXP (op, 0), - SIGN_EXTEND, 0)"))) + SIGN_EXTEND, 0) + && !arm_is_constant_pool_ref (op)"))) (define_memory_constraint "Q" "@internal diff --git a/gcc-4.9/gcc/config/arm/linux-grte.h b/gcc-4.9/gcc/config/arm/linux-grte.h index 7ee5806b7..e69de29bb 100644 --- a/gcc-4.9/gcc/config/arm/linux-grte.h +++ b/gcc-4.9/gcc/config/arm/linux-grte.h @@ -1,27 +0,0 @@ -/* Definitions for ARM Linux-based GRTE (Google RunTime Environment). - Copyright (C) 2011 Free Software Foundation, Inc. - Contributed by Chris Demetriou. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#undef SUBSUBTARGET_EXTRA_SPECS -#define SUBSUBTARGET_EXTRA_SPECS LINUX_GRTE_EXTRA_SPECS diff --git a/gcc-4.9/gcc/config/arm/t-aprofile b/gcc-4.9/gcc/config/arm/t-aprofile index ff9e2e1b3..86741e6b0 100644 --- a/gcc-4.9/gcc/config/arm/t-aprofile +++ b/gcc-4.9/gcc/config/arm/t-aprofile @@ -88,6 +88,9 @@ MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a53 MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57 MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57.cortex-a53 +# Arch Matches +MULTILIB_MATCHES += march?armv8-a=march?armv8-a+crc + # FPU matches MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3 MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16 diff --git a/gcc-4.9/gcc/config/avr/avr-dimode.md b/gcc-4.9/gcc/config/avr/avr-dimode.md index 639810518..56cd30458 100644 --- a/gcc-4.9/gcc/config/avr/avr-dimode.md +++ b/gcc-4.9/gcc/config/avr/avr-dimode.md @@ -68,6 +68,7 @@ { rtx acc_a = gen_rtx_REG (mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (DImode == mode @@ -145,6 +146,7 @@ { rtx acc_a = gen_rtx_REG (mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (const_operand (operands[2], GET_MODE (operands[2]))) @@ -201,6 +203,7 @@ { rtx acc_a = gen_rtx_REG (mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (const_operand (operands[2], GET_MODE (operands[2]))) @@ -249,6 +252,7 @@ { rtx acc_a = gen_rtx_REG (mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (const_operand (operands[2], GET_MODE (operands[2]))) @@ -338,6 +342,7 @@ { rtx acc_a = gen_rtx_REG (mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (s8_operand (operands[2], VOIDmode)) @@ -424,6 +429,7 @@ { rtx acc_a = gen_rtx_REG (mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (mode, ACC_A)); emit_move_insn (acc_a, operands[1]); emit_move_insn (gen_rtx_REG (QImode, 16), operands[2]); emit_insn (gen_3_insn ()); @@ -457,6 +463,7 @@ (clobber (any_extend:SI (match_dup 1)))])] "avr_have_dimode" { + avr_fix_inputs (operands, 1 << 2, regmask (SImode, 22)); emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]); emit_move_insn (gen_rtx_REG (SImode, 18), operands[2]); emit_insn (gen_mulsidi3_insn()); diff --git a/gcc-4.9/gcc/config/avr/avr-fixed.md b/gcc-4.9/gcc/config/avr/avr-fixed.md index 9c8489edd..6763f596e 100644 --- a/gcc-4.9/gcc/config/avr/avr-fixed.md +++ b/gcc-4.9/gcc/config/avr/avr-fixed.md @@ -231,7 +231,11 @@ (clobber (reg:HI 24))]) (set (match_operand:QQ 0 "register_operand" "") (reg:QQ 23))] - "!AVR_HAVE_MUL") + "!AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (QQmode, 24)); + }) + (define_expand "muluqq3_nomul" [(set (reg:UQQ 22) @@ -246,7 +250,10 @@ (clobber (reg:HI 22))]) (set (match_operand:UQQ 0 "register_operand" "") (reg:UQQ 25))] - "!AVR_HAVE_MUL") + "!AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (UQQmode, 22)); + }) (define_insn "*mulqq3.call" [(set (reg:QQ 23) @@ -274,7 +281,10 @@ (clobber (reg:HI 22))]) (set (match_operand:ALL2QA 0 "register_operand" "") (reg:ALL2QA 24))] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (mode, 18)); + }) ;; "*mulhq3.call" "*muluhq3.call" ;; "*mulha3.call" "*muluha3.call" @@ -302,7 +312,10 @@ (reg:ALL4A 20))) (set (match_operand:ALL4A 0 "register_operand" "") (reg:ALL4A 24))] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (mode, 16)); + }) ;; "*mulsa3.call" "*mulusa3.call" (define_insn "*mul3.call" @@ -330,7 +343,12 @@ (reg:ALL1Q 22))) (clobber (reg:QI 25))]) (set (match_operand:ALL1Q 0 "register_operand" "") - (reg:ALL1Q 24))]) + (reg:ALL1Q 24))] + "" + { + avr_fix_inputs (operands, 1 << 2, regmask (mode, 25)); + }) + ;; "*divqq3.call" "*udivuqq3.call" (define_insn "*3.call" @@ -356,7 +374,11 @@ (clobber (reg:HI 26)) (clobber (reg:QI 21))]) (set (match_operand:ALL2QA 0 "register_operand" "") - (reg:ALL2QA 24))]) + (reg:ALL2QA 24))] + "" + { + avr_fix_inputs (operands, 1 << 2, regmask (mode, 26)); + }) ;; "*divhq3.call" "*udivuhq3.call" ;; "*divha3.call" "*udivuha3.call" @@ -385,7 +407,11 @@ (clobber (reg:HI 26)) (clobber (reg:HI 30))]) (set (match_operand:ALL4A 0 "register_operand" "") - (reg:ALL4A 22))]) + (reg:ALL4A 22))] + "" + { + avr_fix_inputs (operands, 1 << 2, regmask (mode, 24)); + }) ;; "*divsa3.call" "*udivusa3.call" (define_insn "*3.call" @@ -435,6 +461,7 @@ operands[3] = gen_rtx_REG (mode, regno_out[(size_t) GET_MODE_SIZE (mode)]); operands[4] = gen_rtx_REG (mode, regno_in[(size_t) GET_MODE_SIZE (mode)]); + avr_fix_inputs (operands, 1 << 2, regmask (mode, REGNO (operands[4]))); operands[5] = simplify_gen_subreg (QImode, force_reg (HImode, operands[2]), HImode, 0); // $2 is no more needed, but is referenced for expand. operands[2] = const0_rtx; diff --git a/gcc-4.9/gcc/config/avr/avr-protos.h b/gcc-4.9/gcc/config/avr/avr-protos.h index c5ce78429..4a899a27c 100644 --- a/gcc-4.9/gcc/config/avr/avr-protos.h +++ b/gcc-4.9/gcc/config/avr/avr-protos.h @@ -124,6 +124,15 @@ extern bool avr_mem_memx_p (rtx); extern bool avr_load_libgcc_p (rtx); extern bool avr_xload_libgcc_p (enum machine_mode); +static inline unsigned +regmask (enum machine_mode mode, unsigned regno) +{ + return ((1u << GET_MODE_SIZE (mode)) - 1) << regno; +} + +extern void avr_fix_inputs (rtx*, unsigned, unsigned); +extern bool avr_emit3_fix_outputs (rtx (*)(rtx,rtx,rtx), rtx*, unsigned, unsigned); + extern rtx lpm_reg_rtx; extern rtx lpm_addr_reg_rtx; extern rtx tmp_reg_rtx; diff --git a/gcc-4.9/gcc/config/avr/avr.c b/gcc-4.9/gcc/config/avr/avr.c index fa979df46..4c65f5efa 100644 --- a/gcc-4.9/gcc/config/avr/avr.c +++ b/gcc-4.9/gcc/config/avr/avr.c @@ -11118,6 +11118,115 @@ avr_convert_to_type (tree type, tree expr) } +/* PR63633: The middle-end might come up with hard regs as input operands. + + RMASK is a bit mask representing a subset of hard registers R0...R31: + Rn is an element of that set iff bit n of RMASK is set. + OPMASK describes a subset of OP[]: If bit n of OPMASK is 1 then + OP[n] has to be fixed; otherwise OP[n] is left alone. + + For each element of OPMASK which is a hard register overlapping RMASK, + replace OP[n] with a newly created pseudo register + + HREG == 0: Also emit a move insn that copies the contents of that + hard register into the new pseudo. + + HREG != 0: Also set HREG[n] to the hard register. */ + +static void +avr_fix_operands (rtx *op, rtx *hreg, unsigned opmask, unsigned rmask) +{ + for (; opmask; opmask >>= 1, op++) + { + rtx reg = *op; + + if (hreg) + *hreg = NULL_RTX; + + if ((opmask & 1) + && REG_P (reg) + && REGNO (reg) < FIRST_PSEUDO_REGISTER + // This hard-reg overlaps other prohibited hard regs? + && (rmask & regmask (GET_MODE (reg), REGNO (reg)))) + { + *op = gen_reg_rtx (GET_MODE (reg)); + if (hreg == NULL) + emit_move_insn (*op, reg); + else + *hreg = reg; + } + + if (hreg) + hreg++; + } +} + + +void +avr_fix_inputs (rtx *op, unsigned opmask, unsigned rmask) +{ + avr_fix_operands (op, NULL, opmask, rmask); +} + + +/* Helper for the function below: If bit n of MASK is set and + HREG[n] != NULL, then emit a move insn to copy OP[n] to HREG[n]. + Otherwise do nothing for that n. Return TRUE. */ + +static bool +avr_move_fixed_operands (rtx *op, rtx *hreg, unsigned mask) +{ + for (; mask; mask >>= 1, op++, hreg++) + if ((mask & 1) + && *hreg) + emit_move_insn (*hreg, *op); + + return true; +} + + +/* PR63633: The middle-end might come up with hard regs as output operands. + + GEN is a sequence generating function like gen_mulsi3 with 3 operands OP[]. + RMASK is a bit mask representing a subset of hard registers R0...R31: + Rn is an element of that set iff bit n of RMASK is set. + OPMASK describes a subset of OP[]: If bit n of OPMASK is 1 then + OP[n] has to be fixed; otherwise OP[n] is left alone. + + Emit the insn sequence as generated by GEN() with all elements of OPMASK + which are hard registers overlapping RMASK replaced by newly created + pseudo registers. After the sequence has been emitted, emit insns that + move the contents of respective pseudos to their hard regs. */ + +bool +avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op, + unsigned opmask, unsigned rmask) +{ + const int n = 3; + rtx hreg[n]; + + /* It is legitimate for GEN to call this function, and in order not to + get self-recursive we use the following static kludge. This is the + only way not to duplicate all expanders and to avoid ugly and + hard-to-maintain C-code instead of the much more appreciated RTL + representation as supplied by define_expand. */ + static bool lock = false; + + gcc_assert (opmask < (1u << n)); + + if (lock) + return false; + + avr_fix_operands (op, hreg, opmask, rmask); + + lock = true; + emit_insn (gen (op[0], op[1], op[2])); + lock = false; + + return avr_move_fixed_operands (op, hreg, opmask); +} + + /* Worker function for movmemhi expander. XOP[0] Destination as MEM:BLK XOP[1] Source " " diff --git a/gcc-4.9/gcc/config/avr/avr.md b/gcc-4.9/gcc/config/avr/avr.md index 3bb2a914a..3f4181dab 100644 --- a/gcc-4.9/gcc/config/avr/avr.md +++ b/gcc-4.9/gcc/config/avr/avr.md @@ -1482,7 +1482,11 @@ (set (reg:QI 22) (match_operand:QI 2 "register_operand" "")) (parallel [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22))) (clobber (reg:QI 22))]) - (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))]) + (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))] + "" + { + avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24)); + }) (define_insn "*mulqi3_call" [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22))) @@ -2210,7 +2214,13 @@ (parallel [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22))) (clobber (reg:HI 22)) (clobber (reg:QI 21))]) - (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + { + avr_fix_inputs (operands, (1 << 2), regmask (HImode, 24)); + }) + (define_insn "*mulhi3_call" [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22))) @@ -2248,6 +2258,10 @@ emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1])); DONE; } + + if (avr_emit3_fix_outputs (gen_mulsi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; }) (define_insn_and_split "*mulsi3" @@ -2287,7 +2301,23 @@ ;; "muluqisi3" ;; "muluhisi3" -(define_insn_and_split "mulusi3" +(define_expand "mulusi3" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "")) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" ""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u); + if (avr_emit3_fix_outputs (gen_mulusi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) + +;; "*muluqisi3" +;; "*muluhisi3" +(define_insn_and_split "*mulusi3" [(set (match_operand:SI 0 "pseudo_register_operand" "=r") (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) @@ -2323,7 +2353,23 @@ ;; "mulsqisi3" ;; "mulshisi3" -(define_insn_and_split "mulssi3" +(define_expand "mulssi3" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "")) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" ""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u); + if (avr_emit3_fix_outputs (gen_mulssi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) + +;; "*mulsqisi3" +;; "*mulshisi3" +(define_insn_and_split "*mulssi3" [(set (match_operand:SI 0 "pseudo_register_operand" "=r") (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) @@ -2366,7 +2412,22 @@ ;; One-extend operand 1 -(define_insn_and_split "mulohisi3" +(define_expand "mulohisi3" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (mult:SI (not:SI (zero_extend:SI + (not:HI (match_operand:HI 1 "pseudo_register_operand" "")))) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" ""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u); + if (avr_emit3_fix_outputs (gen_mulohisi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) + +(define_insn_and_split "*mulohisi3" [(set (match_operand:SI 0 "pseudo_register_operand" "=r") (mult:SI (not:SI (zero_extend:SI (not:HI (match_operand:HI 1 "pseudo_register_operand" "r")))) @@ -2394,7 +2455,12 @@ (any_extend:SI (match_operand:HI 2 "register_operand" "")))) (clobber (reg:HI 26)) (clobber (reg:DI 18))])] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + if (avr_emit3_fix_outputs (gen_mulhisi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) (define_expand "usmulhisi3" [(parallel [(set (match_operand:SI 0 "register_operand" "") @@ -2402,7 +2468,12 @@ (sign_extend:SI (match_operand:HI 2 "register_operand" "")))) (clobber (reg:HI 26)) (clobber (reg:DI 18))])] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + if (avr_emit3_fix_outputs (gen_usmulhisi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) ;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3" ;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3" @@ -2474,7 +2545,10 @@ (clobber (reg:HI 22))]) (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (HImode, 18)); + }) (define_insn "*mulsi3_call" @@ -2697,6 +2771,10 @@ emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1])); DONE; } + + if (avr_emit3_fix_outputs (gen_mulpsi3, operands, 1u << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; }) (define_insn "*umulqihipsi3" @@ -2729,7 +2807,21 @@ [(set_attr "length" "7") (set_attr "cc" "clobber")]) -(define_insn_and_split "mulsqipsi3" +(define_expand "mulsqipsi3" + [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "") + (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "")) + (match_operand:PSI 2 "pseudo_register_or_const_int_operand"""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u); + if (avr_emit3_fix_outputs (gen_mulsqipsi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) + +(define_insn_and_split "*mulsqipsi3" [(set (match_operand:PSI 0 "pseudo_register_operand" "=r") (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r")) (match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn"))) @@ -4931,8 +5023,9 @@ (unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r,z")] UNSPEC_INDEX_JMP)) (use (label_ref (match_operand 1 "" ""))) - (clobber (match_dup 0))] - "" + (clobber (match_dup 0)) + (clobber (const_int 0))] + "!AVR_HAVE_EIJMP_EICALL" "@ ijmp push %A0\;push %B0\;ret @@ -4941,6 +5034,19 @@ (set_attr "isa" "rjmp,rjmp,jmp") (set_attr "cc" "none,none,clobber")]) +(define_insn "*tablejump.3byte-pc" + [(set (pc) + (unspec:HI [(reg:HI REG_Z)] + UNSPEC_INDEX_JMP)) + (use (label_ref (match_operand 0 "" ""))) + (clobber (reg:HI REG_Z)) + (clobber (reg:QI 24))] + "AVR_HAVE_EIJMP_EICALL" + "clr r24\;subi r30,pm_lo8(-(%0))\;sbci r31,pm_hi8(-(%0))\;sbci r24,pm_hh8(-(%0))\;jmp __tablejump2__" + [(set_attr "length" "6") + (set_attr "isa" "eijmp") + (set_attr "cc" "clobber")]) + (define_expand "casesi" [(parallel [(set (match_dup 6) @@ -4958,15 +5064,31 @@ (label_ref (match_operand 4 "" "")) (pc))) - (set (match_dup 6) - (plus:HI (match_dup 6) (label_ref (match_operand:HI 3 "" "")))) + (set (match_dup 10) + (match_dup 7)) - (parallel [(set (pc) (unspec:HI [(match_dup 6)] UNSPEC_INDEX_JMP)) + (parallel [(set (pc) + (unspec:HI [(match_dup 10)] UNSPEC_INDEX_JMP)) (use (label_ref (match_dup 3))) - (clobber (match_dup 6))])] + (clobber (match_dup 10)) + (clobber (match_dup 8))])] "" { operands[6] = gen_reg_rtx (HImode); + + if (AVR_HAVE_EIJMP_EICALL) + { + operands[7] = operands[6]; + operands[8] = all_regs_rtx[24]; + operands[10] = gen_rtx_REG (HImode, REG_Z); + } + else + { + operands[7] = gen_rtx_PLUS (HImode, operands[6], + gen_rtx_LABEL_REF (VOIDmode, operands[3])); + operands[8] = const0_rtx; + operands[10] = operands[6]; + } }) @@ -6034,6 +6156,7 @@ emit_insn (gen_fmul_insn (operand0, operand1, operand2)); DONE; } + avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24)); }) (define_insn "fmul_insn" @@ -6077,6 +6200,7 @@ emit_insn (gen_fmuls_insn (operand0, operand1, operand2)); DONE; } + avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24)); }) (define_insn "fmuls_insn" @@ -6120,6 +6244,7 @@ emit_insn (gen_fmulsu_insn (operand0, operand1, operand2)); DONE; } + avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24)); }) (define_insn "fmulsu_insn" diff --git a/gcc-4.9/gcc/config/darwin-c.c b/gcc-4.9/gcc/config/darwin-c.c index 892ba3547..7fe4b1f2e 100644 --- a/gcc-4.9/gcc/config/darwin-c.c +++ b/gcc-4.9/gcc/config/darwin-c.c @@ -571,21 +571,34 @@ find_subframework_header (cpp_reader *pfile, const char *header, cpp_dir **dirp) } /* Return the value of darwin_macosx_version_min suitable for the - __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro, - so '10.4.2' becomes 1040. The lowest digit is always zero. + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro, so '10.4.2' + becomes 1040 and '10.10.0' becomes 101000. The lowest digit is + always zero, as is the second lowest for '10.10.x' and above. Print a warning if the version number can't be understood. */ static const char * version_as_macro (void) { - static char result[] = "1000"; + static char result[7] = "1000"; + int minorDigitIdx; if (strncmp (darwin_macosx_version_min, "10.", 3) != 0) goto fail; if (! ISDIGIT (darwin_macosx_version_min[3])) goto fail; - result[2] = darwin_macosx_version_min[3]; - if (darwin_macosx_version_min[4] != '\0' - && darwin_macosx_version_min[4] != '.') + + minorDigitIdx = 3; + result[2] = darwin_macosx_version_min[minorDigitIdx++]; + if (ISDIGIT (darwin_macosx_version_min[minorDigitIdx])) + { + /* Starting with OS X 10.10, the macro ends '00' rather than '0', + i.e. 10.10.x becomes 101000 rather than 10100. */ + result[3] = darwin_macosx_version_min[minorDigitIdx++]; + result[4] = '0'; + result[5] = '0'; + result[6] = '\0'; + } + if (darwin_macosx_version_min[minorDigitIdx] != '\0' + && darwin_macosx_version_min[minorDigitIdx] != '.') goto fail; return result; diff --git a/gcc-4.9/gcc/config/darwin-driver.c b/gcc-4.9/gcc/config/darwin-driver.c index 8b6ae9391..541e10bc0 100644 --- a/gcc-4.9/gcc/config/darwin-driver.c +++ b/gcc-4.9/gcc/config/darwin-driver.c @@ -29,8 +29,8 @@ along with GCC; see the file COPYING3. If not see #include #include "xregex.h" -static bool -darwin_find_version_from_kernel (char *new_flag) +static char * +darwin_find_version_from_kernel (void) { char osversion[32]; size_t osversion_len = sizeof (osversion) - 1; @@ -39,6 +39,7 @@ darwin_find_version_from_kernel (char *new_flag) char minor_vers[6]; char * version_p; char * version_pend; + char * new_flag; /* Determine the version of the running OS. If we can't, warn user, and do nothing. */ @@ -46,7 +47,7 @@ darwin_find_version_from_kernel (char *new_flag) &osversion_len, NULL, 0) == -1) { warning (0, "sysctl for kern.osversion failed: %m"); - return false; + return NULL; } /* Try to parse the first two parts of the OS version number. Warn @@ -57,8 +58,6 @@ darwin_find_version_from_kernel (char *new_flag) version_p = osversion + 1; if (ISDIGIT (*version_p)) major_vers = major_vers * 10 + (*version_p++ - '0'); - if (major_vers > 4 + 9) - goto parse_failed; if (*version_p++ != '.') goto parse_failed; version_pend = strchr(version_p, '.'); @@ -74,17 +73,16 @@ darwin_find_version_from_kernel (char *new_flag) if (major_vers - 4 <= 4) /* On 10.4 and earlier, the old linker is used which does not support three-component system versions. */ - sprintf (new_flag, "10.%d", major_vers - 4); + asprintf (&new_flag, "10.%d", major_vers - 4); else - sprintf (new_flag, "10.%d.%s", major_vers - 4, - minor_vers); + asprintf (&new_flag, "10.%d.%s", major_vers - 4, minor_vers); - return true; + return new_flag; parse_failed: warning (0, "couldn%'t understand kern.osversion %q.*s", (int) osversion_len, osversion); - return false; + return NULL; } #endif @@ -105,7 +103,7 @@ darwin_default_min_version (unsigned int *decoded_options_count, const unsigned int argc = *decoded_options_count; struct cl_decoded_option *const argv = *decoded_options; unsigned int i; - static char new_flag[sizeof ("10.0.0") + 6]; + const char *new_flag; /* If the command-line is empty, just return. */ if (argc <= 1) @@ -142,16 +140,16 @@ darwin_default_min_version (unsigned int *decoded_options_count, #ifndef CROSS_DIRECTORY_STRUCTURE - /* Try to find the version from the kernel, if we fail - we print a message - and give up. */ - if (!darwin_find_version_from_kernel (new_flag)) - return; + /* Try to find the version from the kernel, if we fail - we print a message + and give up. */ + new_flag = darwin_find_version_from_kernel (); + if (!new_flag) + return; #else - /* For cross-compilers, default to the target OS version. */ - - strncpy (new_flag, DEF_MIN_OSX_VERSION, sizeof (new_flag)); + /* For cross-compilers, default to the target OS version. */ + new_flag = DEF_MIN_OSX_VERSION; #endif /* CROSS_DIRECTORY_STRUCTURE */ @@ -165,7 +163,6 @@ darwin_default_min_version (unsigned int *decoded_options_count, memcpy (*decoded_options + 2, argv + 1, (argc - 1) * sizeof (struct cl_decoded_option)); return; - } /* Translate -filelist and -framework options in *DECODED_OPTIONS diff --git a/gcc-4.9/gcc/config/gnu-user.h b/gcc-4.9/gcc/config/gnu-user.h index 2af0a54ed..1a9a487a8 100644 --- a/gcc-4.9/gcc/config/gnu-user.h +++ b/gcc-4.9/gcc/config/gnu-user.h @@ -118,7 +118,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* Link -lasan early on the command line. For -static-libasan, don't link it for -shared link, the executable should be compiled with -static-libasan in that case, and for executable link link with --{,no-}whole-archive around - it to force everything into the executable. And similarly for -ltsan. */ + it to force everything into the executable. And similarly for -ltsan + and -llsan. */ #if defined(HAVE_LD_STATIC_DYNAMIC) #undef LIBASAN_EARLY_SPEC #define LIBASAN_EARLY_SPEC "%{!shared:libasan_preinit%O%s} " \ @@ -129,4 +130,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define LIBTSAN_EARLY_SPEC "%{static-libtsan:%{!shared:" \ LD_STATIC_OPTION " --whole-archive -ltsan --no-whole-archive " \ LD_DYNAMIC_OPTION "}}%{!static-libtsan:-ltsan}" +#undef LIBLSAN_EARLY_SPEC +#define LIBLSAN_EARLY_SPEC "%{static-liblsan:%{!shared:" \ + LD_STATIC_OPTION " --whole-archive -llsan --no-whole-archive " \ + LD_DYNAMIC_OPTION "}}%{!static-liblsan:-llsan}" #endif diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c index a598b8eef..54942d520 100644 --- a/gcc-4.9/gcc/config/i386/i386.c +++ b/gcc-4.9/gcc/config/i386/i386.c @@ -2465,7 +2465,7 @@ struct ptt const int align_func; }; -/* This table must be in sync with enum processor_type in i386.h. */ +/* This table must be in sync with enum processor_type in i386.h. */ static const struct ptt processor_target_table[PROCESSOR_max] = { {"generic", &generic_cost, 16, 10, 16, 10, 16}, @@ -3257,14 +3257,14 @@ ix86_option_override_internal (bool main_args_p, | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C - | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE + | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 - | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 - | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 - | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 + | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, @@ -3334,8 +3334,9 @@ ix86_option_override_internal (bool main_args_p, /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is on and OPTION_MASK_ABI_64 is off. We turn off OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by - -m64. */ - if (TARGET_LP64_P (opts->x_ix86_isa_flags)) + -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ + if (TARGET_LP64_P (opts->x_ix86_isa_flags) + || TARGET_16BIT_P (opts->x_ix86_isa_flags)) opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; #endif } @@ -3846,11 +3847,30 @@ ix86_option_override_internal (bool main_args_p, opts->x_target_flags |= MASK_NO_RED_ZONE; } + if (!global_options_set.x_flag_shrink_wrap_frame_pointer) + flag_shrink_wrap_frame_pointer = 1; + + /* -fshrink-wrap-frame-pointer is an optimization based on + -fno-omit-frame-pointer mode, so it is only effective when + flag_omit_frame_pointer is false. + Frame pointer shrinkwrap may increase code size, so disable + it when optimize_size is true. */ + if (flag_omit_frame_pointer + || optimize == 0 + || optimize_size) + flag_shrink_wrap_frame_pointer = 0; + + /* If only no -mno-omit-leaf-frame-pointer is explicitly specified, + -fshrink_wrap_frame_pointer will enable omitting leaf frame + pointer by default. */ + if (flag_shrink_wrap_frame_pointer + && !(TARGET_OMIT_LEAF_FRAME_POINTER_P (opts_set->x_target_flags) + && !TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))) + opts->x_target_flags |= MASK_OMIT_LEAF_FRAME_POINTER; + /* Keep nonleaf frame pointers. */ if (opts->x_flag_omit_frame_pointer) opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; - else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) - opts->x_flag_omit_frame_pointer = 1; /* If we're doing fast math, we don't care about comparison order wrt NaNs. This lets us use a shorter comparison sequence. */ @@ -3969,7 +3989,7 @@ ix86_option_override_internal (bool main_args_p, /* For all chips supporting SSE2, -mfpmath=sse performs better than fpmath=387. The second is however default at many targets since the extra 80bit precision of temporaries is considered to be part of ABI. - Overwrite the default at least for -ffast-math. + Overwrite the default at least for -ffast-math. TODO: -mfpmath=both seems to produce same performing code with bit smaller binaries. It is however not clear if register allocation is ready for this setting. @@ -4291,7 +4311,7 @@ ix86_conditional_register_usage (void) c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3) : TARGET_64BIT ? (1 << 2) : (1 << 1)); - + CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) @@ -4840,9 +4860,9 @@ ix86_valid_target_attribute_p (tree fndecl, tree old_optimize = build_optimization_node (&global_options); - /* Get the optimization options of the current function. */ + /* Get the optimization options of the current function. */ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); - + if (!func_optimize) func_optimize = old_optimize; @@ -4850,7 +4870,7 @@ ix86_valid_target_attribute_p (tree fndecl, memset (&func_options, 0, sizeof (func_options)); init_options_struct (&func_options, NULL); lang_hooks.init_options_struct (&func_options); - + cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize)); @@ -5007,6 +5027,10 @@ ix86_in_large_data_p (tree exp) if (TREE_CODE (exp) == FUNCTION_DECL) return false; + /* Automatic variables are never large data. */ + if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp)) + return false; + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) { const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); @@ -5040,8 +5064,7 @@ ATTRIBUTE_UNUSED static section * x86_64_elf_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) { - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) + if (ix86_in_large_data_p (decl)) { const char *sname = NULL; unsigned int flags = SECTION_WRITE; @@ -5127,8 +5150,7 @@ x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) static void ATTRIBUTE_UNUSED x86_64_elf_unique_section (tree decl, int reloc) { - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) + if (ix86_in_large_data_p (decl)) { const char *prefix = NULL; /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ @@ -5197,7 +5219,7 @@ x86_elf_aligned_common (FILE *file, { if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) && size > (unsigned int)ix86_section_threshold) - fputs (".largecomm\t", file); + fputs ("\t.largecomm\t", file); else fputs (COMMON_ASM_OP, file); assemble_name (file, name); @@ -5976,7 +5998,18 @@ ix86_function_type_abi (const_tree fntype) if (abi == SYSV_ABI) { if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) - abi = MS_ABI; + { + if (TARGET_X32) + { + static bool warned = false; + if (!warned) + { + error ("X32 does not support ms_abi attribute"); + warned = true; + } + } + abi = MS_ABI; + } } else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) abi = SYSV_ABI; @@ -6212,7 +6245,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ The midde-end can't deal with the vector types > 16 bytes. In this case, we return the original mode and warn ABI change if CUM isn't - NULL. + NULL. If INT_RETURN is true, warn ABI change if the vector mode isn't available for function return value. */ @@ -9083,20 +9116,22 @@ ix86_frame_pointer_required (void) if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) return true; - /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER - turns off the frame pointer by default. Turn it back on now if - we've not got a leaf function. */ - if (TARGET_OMIT_LEAF_FRAME_POINTER - && (!crtl->is_leaf - || ix86_current_function_calls_tls_descriptor)) - return true; - if (crtl->profile && !flag_fentry) return true; return false; } +/* Return true if the frame pointer of the function could be omitted. */ + +static bool +ix86_can_omit_leaf_frame_pointer (void) +{ + return TARGET_OMIT_LEAF_FRAME_POINTER + && (crtl->is_leaf + && !ix86_current_function_calls_tls_descriptor); +} + /* Record that the current function accesses previous call frames. */ void @@ -9569,7 +9604,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame) offset += UNITS_PER_WORD; /* Skip saved base pointer. */ - if (frame_pointer_needed) + if (frame_pointer_needed || frame_pointer_partially_needed) offset += UNITS_PER_WORD; frame->hfp_save_offset = offset; @@ -10890,6 +10925,26 @@ ix86_expand_prologue (void) m->fs.fp_valid = true; } } + else if (frame_pointer_partially_needed) + { + insn = emit_insn (gen_push (hard_frame_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + if (fpset_needed_in_prologue) + { + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + /* Using sp as cfa_reg will involve more .cfi_def_cfa_offset for + pushes in prologue, so use fp as cfa_reg to reduce .eh_frame + size when possible. */ + if (!any_fp_def) + { + RTX_FRAME_RELATED_P (insn) = 1; + if (m->fs.cfa_reg == stack_pointer_rtx) + m->fs.cfa_reg = hard_frame_pointer_rtx; + m->fs.fp_offset = m->fs.sp_offset; + m->fs.fp_valid = true; + } + } + } if (!int_registers_saved) { @@ -11067,6 +11122,10 @@ ix86_expand_prologue (void) if (sp_is_cfa_reg) m->fs.cfa_offset += UNITS_PER_WORD; RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); } } @@ -11080,6 +11139,10 @@ ix86_expand_prologue (void) if (sp_is_cfa_reg) m->fs.cfa_offset += UNITS_PER_WORD; RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); } } @@ -11231,6 +11294,34 @@ ix86_expand_prologue (void) emit_insn (gen_prologue_use (stack_pointer_rtx)); } +/* Get frame pointer setting insn based on frame state. */ +static rtx +ix86_set_fp_insn () +{ + rtx r, seq; + struct ix86_frame frame; + HOST_WIDE_INT offset; + + ix86_compute_frame_layout (&frame); + gcc_assert (frame_pointer_partially_needed); + offset = frame.stack_pointer_offset - frame.hard_frame_pointer_offset; + + if (TARGET_64BIT && (offset > 0x7fffffff)) + { + r = gen_rtx_SET (DImode, hard_frame_pointer_rtx, GEN_INT (offset)); + emit_insn (r); + r = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, stack_pointer_rtx); + r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r); + } + else + { + r = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); + r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r); + } + emit_insn (r); + return r; +} + /* Emit code to restore REG using a POP insn. */ static void @@ -11415,7 +11506,11 @@ ix86_expand_epilogue (int style) || m->fs.sp_offset == frame.stack_pointer_offset); /* The FP must be valid if the frame pointer is present. */ - gcc_assert (frame_pointer_needed == m->fs.fp_valid); + if (!frame_pointer_partially_needed) + gcc_assert (frame_pointer_needed == m->fs.fp_valid); + else + gcc_assert (!(any_fp_def && m->fs.fp_valid)); + gcc_assert (!m->fs.fp_valid || m->fs.fp_offset == frame.hard_frame_pointer_offset); @@ -11619,7 +11714,7 @@ ix86_expand_epilogue (int style) /* If we used a stack pointer and haven't already got rid of it, then do so now. */ - if (m->fs.fp_valid) + if (m->fs.fp_valid || frame_pointer_partially_needed) { /* If the stack pointer is valid and pointing at the frame pointer store address, then we only need a pop. */ @@ -11627,15 +11722,20 @@ ix86_expand_epilogue (int style) ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); /* Leave results in shorter dependency chains on CPUs that are able to grok it fast. */ - else if (TARGET_USE_LEAVE - || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) - || !cfun->machine->use_fast_prologue_epilogue) + else if (m->fs.fp_valid + && (TARGET_USE_LEAVE + || optimize_function_for_size_p (cfun) + || !cfun->machine->use_fast_prologue_epilogue)) ix86_emit_leave (); else { + rtx dest, offset; + dest = (m->fs.fp_valid) ? hard_frame_pointer_rtx : stack_pointer_rtx; + offset = (m->fs.fp_valid) ? const0_rtx : + GEN_INT (m->fs.sp_offset - frame.hfp_save_offset); pro_epilogue_adjust_stack (stack_pointer_rtx, - hard_frame_pointer_rtx, - const0_rtx, style, !using_drap); + dest, + offset, style, !using_drap); ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); } } @@ -11947,7 +12047,7 @@ ix86_output_function_nops_prologue_epilogue (FILE *file, fprintf (file, "\n"); /* Switching back to text section. */ - switch_to_section (function_section (current_function_decl)); + switch_to_section (current_function_section ()); return true; } @@ -12379,7 +12479,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) addr = XEXP (addr, 0); if (CONST_INT_P (addr)) return 0; - } + } else if (GET_CODE (addr) == AND && const_32bit_mask (XEXP (addr, 1), DImode)) { @@ -12905,8 +13005,16 @@ legitimate_pic_address_disp_p (rtx disp) return true; } else if (!SYMBOL_REF_FAR_ADDR_P (op0) - && (SYMBOL_REF_LOCAL_P (op0) - || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie + && (SYMBOL_REF_LOCAL_P (op0) + || (HAVE_LD_PIE_COPYRELOC + && flag_pie + && !(SYMBOL_REF_WEAK (op0) + /* TODO:Temporary fix for weak defined symbols. Weak defined + symbols in an executable cannot be overridden even with + a non-weak symbol in a shared library. + Revert after fix is checked in here: + http://gcc.gnu.org/ml/gcc-patches/2015-02/msg00366.html*/ + && SYMBOL_REF_EXTERNAL_P (op0)) && !SYMBOL_REF_FUNCTION_P (op0))) && ix86_cmodel != CM_LARGE_PIC) return true; @@ -13010,7 +13118,7 @@ ix86_legitimize_reload_address (rtx x, (reg:DI 2 cx)) This RTX is rejected from ix86_legitimate_address_p due to - non-strictness of base register 97. Following this rejection, + non-strictness of base register 97. Following this rejection, reload pushes all three components into separate registers, creating invalid memory address RTX. @@ -13025,7 +13133,7 @@ ix86_legitimize_reload_address (rtx x, rtx base, index; bool something_reloaded = false; - base = XEXP (XEXP (x, 0), 1); + base = XEXP (XEXP (x, 0), 1); if (!REG_OK_FOR_BASE_STRICT_P (base)) { push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL, @@ -13929,7 +14037,7 @@ get_dllimport_decl (tree decl, bool beimport) #ifdef SUB_TARGET_RECORD_STUB SUB_TARGET_RECORD_STUB (name); #endif - } + } rtl = gen_const_mem (Pmode, rtl); set_mem_alias_set (rtl, ix86_GOT_alias_set ()); @@ -13976,7 +14084,7 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg) return x; } -/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG +/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG is true if we require the result be a register. */ static rtx @@ -14749,7 +14857,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, if (mode == CCmode) suffix = "b"; else if (mode == CCCmode) - suffix = "c"; + suffix = fp ? "b" : "c"; else gcc_unreachable (); break; @@ -14772,9 +14880,9 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, break; case GEU: if (mode == CCmode) - suffix = fp ? "nb" : "ae"; + suffix = "nb"; else if (mode == CCCmode) - suffix = "nc"; + suffix = fp ? "nb" : "nc"; else gcc_unreachable (); break; @@ -15109,7 +15217,7 @@ ix86_print_operand (FILE *file, rtx x, int code) case 2: putc ('w', file); break; - + case 4: putc ('l', file); break; @@ -16408,7 +16516,7 @@ ix86_mode_needed (int entity, rtx insn) } /* Check if a 256bit AVX register is referenced in stores. */ - + static void ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) { @@ -16417,7 +16525,7 @@ ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) bool *used = (bool *) data; *used = true; } - } + } /* Calculate mode of upper 128bit AVX registers after the insn. */ @@ -17463,7 +17571,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) t = gen_reg_rtx (V4SFmode); else t = op0; - + if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) emit_move_insn (t, CONST0_RTX (V4SFmode)); else @@ -18527,7 +18635,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_mode mode, op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src)); clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); } @@ -21852,7 +21960,7 @@ ix86_expand_vec_perm (rtx operands[]) if (TARGET_XOP) { - /* The XOP VPPERM insn supports three inputs. By ignoring the + /* The XOP VPPERM insn supports three inputs. By ignoring the one_operand_shuffle special case, we avoid creating another set of constant vectors in memory. */ one_operand_shuffle = false; @@ -23708,7 +23816,7 @@ expand_small_movmem_or_setmem (rtx destmem, rtx srcmem, DONE_LABEL is a label after the whole copying sequence. The label is created on demand if *DONE_LABEL is NULL. MIN_SIZE is minimal size of block copied. This value gets adjusted for new - bounds after the initial copies. + bounds after the initial copies. DESTMEM/SRCMEM are memory expressions pointing to the copies block, DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether @@ -24013,7 +24121,7 @@ expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, return dst; } -/* Return true if ALG can be used in current context. +/* Return true if ALG can be used in current context. Assume we expand memset if MEMSET is true. */ static bool alg_usable_p (enum stringop_alg alg, bool memset) @@ -24136,7 +24244,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, *noalign = alg_noalign; return alg; } - break; + else if (!any_alg_usable_p) + break; } else if (alg_usable_p (candidate, memset)) { @@ -24174,9 +24283,10 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, alg = decide_alg (count, max / 2, min_size, max_size, memset, zero_memset, dynamic_check, noalign); gcc_assert (*dynamic_check == -1); - gcc_assert (alg != libcall); if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) *dynamic_check = max; + else + gcc_assert (alg != libcall); return alg; } return (alg_usable_p (algs->unknown_size, memset) @@ -24336,7 +24446,7 @@ promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, with specified algorithm. 4) Epilogue: code copying tail of the block that is too small to be - handled by main body (or up to size guarded by prologue guard). + handled by main body (or up to size guarded by prologue guard). Misaligned move sequence @@ -24531,7 +24641,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, /* Do the cheap promotion to allow better CSE across the main loop and epilogue (ie one load of the big constant in the - front of all code. + front of all code. For now the misaligned move sequences do not have fast path without broadcasting. */ if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) @@ -25103,13 +25213,19 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, } else { - /* Static functions and indirect calls don't need the pic register. */ + /* Static functions and indirect calls don't need the pic register. Also, + check if PLT was explicitly avoided via no-plt or "noplt" attribute, making + it an indirect call. */ if (flag_pic && (!TARGET_64BIT || (ix86_cmodel == CM_LARGE_PIC && DEFAULT_ABI != MS_ABI)) && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF - && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) + && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)) + && flag_plt + && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE + || !lookup_attribute ("noplt", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0)))))) use_reg (&use, pic_offset_table_rtx); } @@ -25173,6 +25289,31 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, return call; } +/* Return true if the function being called was marked with attribute "noplt" + or using -fno-plt and we are compiling for non-PIC and x86_64. We need to + handle the non-PIC case in the backend because there is no easy interface + for the front-end to force non-PLT calls to use the GOT. This is currently + used only with 64-bit ELF targets to call the function marked "noplt" + indirectly. */ + +static bool +ix86_nopic_noplt_attribute_p (rtx call_op) +{ + if (flag_pic || ix86_cmodel == CM_LARGE + || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF + || SYMBOL_REF_LOCAL_P (call_op)) + return false; + + tree symbol_decl = SYMBOL_REF_DECL (call_op); + + if (!flag_plt + || (symbol_decl != NULL_TREE + && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))) + return true; + + return false; +} + /* Output the assembly for a call instruction. */ const char * @@ -25184,7 +25325,9 @@ ix86_output_call_insn (rtx insn, rtx call_op) if (SIBLING_CALL_P (insn)) { - if (direct_p) + if (direct_p && ix86_nopic_noplt_attribute_p (call_op)) + xasm = "jmp\t*%p0@GOTPCREL(%%rip)"; + else if (direct_p) xasm = "jmp\t%P0"; /* SEH epilogue detection requires the indirect branch case to include REX.W. */ @@ -25236,7 +25379,9 @@ ix86_output_call_insn (rtx insn, rtx call_op) seh_nop_p = true; } - if (direct_p) + if (direct_p && ix86_nopic_noplt_attribute_p (call_op)) + xasm = "call\t*%p0@GOTPCREL(%%rip)"; + else if (direct_p) xasm = "call\t%P0"; else xasm = "call\t%A0"; @@ -26506,7 +26651,7 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail) using topological ordering in the region. */ if (rgn == CONTAINING_RGN (e->src->index) && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) - add_dependee_for_func_arg (first_arg, e->src); + add_dependee_for_func_arg (first_arg, e->src); } } insn = first_arg; @@ -26974,7 +27119,7 @@ ix86_local_alignment (tree exp, enum machine_mode mode, other unit can not rely on the alignment. Exclude va_list type. It is the common case of local array where - we can not benefit from the alignment. + we can not benefit from the alignment. TODO: Probably one should optimize for size only when var is not escaping. */ if (TARGET_64BIT && optimize_function_for_speed_p (cfun) @@ -31443,7 +31588,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, convert_expr = build1 (CONVERT_EXPR, ptr_type_node, build_fold_addr_expr (version_decl)); result_var = create_tmp_var (ptr_type_node, NULL); - convert_stmt = gimple_build_assign (result_var, convert_expr); + convert_stmt = gimple_build_assign (result_var, convert_expr); return_stmt = gimple_build_return (result_var); if (predicate_chain == NULL_TREE) @@ -31470,7 +31615,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, gimple_seq_add_stmt (&gseq, call_cond_stmt); predicate_chain = TREE_CHAIN (predicate_chain); - + if (and_expr_var == NULL) and_expr_var = cond_var; else @@ -31511,7 +31656,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, gimple_set_bb (return_stmt, bb2); bb3 = e23->dest; - make_edge (bb1, bb3, EDGE_FALSE_VALUE); + make_edge (bb1, bb3, EDGE_FALSE_VALUE); remove_edge (e23); make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); @@ -31563,7 +31708,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) P_FMA4, P_XOP, P_PROC_XOP, - P_FMA, + P_FMA, P_PROC_FMA, P_AVX2, P_PROC_AVX2 @@ -31628,11 +31773,11 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) cl_target_option_save (&cur_target, &global_options); target_node = ix86_valid_target_attribute_tree (attrs, &global_options, &global_options_set); - + gcc_assert (target_node); new_target = TREE_TARGET_OPTION (target_node); gcc_assert (new_target); - + if (new_target->arch_specified && new_target->arch > 0) { switch (new_target->arch) @@ -31701,18 +31846,18 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) arg_str = "bdver4"; priority = P_PROC_AVX2; break; - } - } - + } + } + cl_target_option_restore (&global_options, &cur_target); - + if (predicate_list && arg_str == NULL) { error_at (DECL_SOURCE_LOCATION (decl), "No dispatcher found for the versioning attributes"); return 0; } - + if (predicate_list) { predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; @@ -31779,7 +31924,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) *predicate_list = predicate_chain; } - return priority; + return priority; } /* This compares the priority of target features in function DECL1 @@ -31798,7 +31943,7 @@ ix86_compare_version_priority (tree decl1, tree decl2) /* V1 and V2 point to function versions with different priorities based on the target ISA. This function compares their priorities. */ - + static int feature_compare (const void *v1, const void *v2) { @@ -32111,12 +32256,12 @@ ix86_function_versions (tree fn1, tree fn2) result = true; XDELETEVEC (target1); - XDELETEVEC (target2); - + XDELETEVEC (target2); + return result; } -static tree +static tree ix86_mangle_decl_assembler_name (tree decl, tree id) { /* For function version, add the target suffix to the assembler name. */ @@ -32186,7 +32331,7 @@ make_dispatcher_decl (const tree decl) fn_type = TREE_TYPE (decl); func_type = build_function_type (TREE_TYPE (fn_type), TYPE_ARG_TYPES (fn_type)); - + func_decl = build_fn_decl (func_name, func_type); XDELETEVEC (func_name); TREE_USED (func_decl) = 1; @@ -32199,7 +32344,7 @@ make_dispatcher_decl (const tree decl) /* This will be of type IFUNCs have to be externally visible. */ TREE_PUBLIC (func_decl) = 1; - return func_decl; + return func_decl; } #endif @@ -32236,7 +32381,7 @@ ix86_get_function_versions_dispatcher (void *decl) tree dispatch_decl = NULL; struct cgraph_function_version_info *default_version_info = NULL; - + gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); node = cgraph_get_node (fn); @@ -32244,7 +32389,7 @@ ix86_get_function_versions_dispatcher (void *decl) node_v = get_cgraph_node_version (node); gcc_assert (node_v != NULL); - + if (node_v->dispatcher_resolver != NULL) return node_v->dispatcher_resolver; @@ -32409,7 +32554,7 @@ make_resolver_func (const tree default_decl, gcc_assert (dispatch_decl != NULL); /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */ - DECL_ATTRIBUTES (dispatch_decl) + DECL_ATTRIBUTES (dispatch_decl) = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl)); /* Create the alias for dispatch to resolver here. */ @@ -32424,7 +32569,7 @@ make_resolver_func (const tree default_decl, provide the code to dispatch the right function at run-time. NODE points to the dispatcher decl whose body will be created. */ -static tree +static tree ix86_generate_version_dispatcher_body (void *node_p) { tree resolver_decl; @@ -32476,7 +32621,7 @@ ix86_generate_version_dispatcher_body (void *node_p) } dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); - rebuild_cgraph_edges (); + rebuild_cgraph_edges (); pop_cfun (); return resolver_decl; } @@ -32587,7 +32732,7 @@ fold_builtin_cpu (tree fndecl, tree *args) M_AMDFAM15H, M_INTEL_SILVERMONT, M_AMD_BTVER1, - M_AMD_BTVER2, + M_AMD_BTVER2, M_CPU_SUBTYPE_START, M_INTEL_COREI7_NEHALEM, M_INTEL_COREI7_WESTMERE, @@ -32627,13 +32772,13 @@ fold_builtin_cpu (tree fndecl, tree *args) {"barcelona", M_AMDFAM10H_BARCELONA}, {"shanghai", M_AMDFAM10H_SHANGHAI}, {"istanbul", M_AMDFAM10H_ISTANBUL}, - {"btver1", M_AMD_BTVER1}, + {"btver1", M_AMD_BTVER1}, {"amdfam15h", M_AMDFAM15H}, {"bdver1", M_AMDFAM15H_BDVER1}, {"bdver2", M_AMDFAM15H_BDVER2}, {"bdver3", M_AMDFAM15H_BDVER3}, {"bdver4", M_AMDFAM15H_BDVER4}, - {"btver2", M_AMD_BTVER2}, + {"btver2", M_AMD_BTVER2}, }; static struct _isa_names_table @@ -35238,9 +35383,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, { /* Make it call __cpu_indicator_init in libgcc. */ tree call_expr, fndecl, type; - type = build_function_type_list (integer_type_node, NULL_TREE); + type = build_function_type_list (integer_type_node, NULL_TREE); fndecl = build_fn_decl ("__cpu_indicator_init", type); - call_expr = build_call_expr (fndecl, 0); + call_expr = build_call_expr (fndecl, 0); return expand_expr (call_expr, target, mode, EXPAND_NORMAL); } case IX86_BUILTIN_CPU_IS: @@ -41332,8 +41477,8 @@ ix86_encode_section_info (tree decl, rtx rtl, int first) { default_encode_section_info (decl, rtl, first); - if (TREE_CODE (decl) == VAR_DECL - && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) + if (((TREE_CODE (decl) == VAR_DECL && is_global_var (decl)) + || TREE_CODE(decl) == STRING_CST) && ix86_in_large_data_p (decl)) SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; } @@ -42957,8 +43102,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) op0 = gen_lowpart (V4DImode, d->op0); op1 = gen_lowpart (V4DImode, d->op1); rperm[0] - = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0) - || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0)); + = GEN_INT ((d->perm[0] / (nelt / 2)) + | ((d->perm[nelt / 2] / (nelt / 2)) * 16)); emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); if (target != d->target) emit_move_insn (d->target, gen_lowpart (d->vmode, target)); @@ -47277,6 +47422,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2) #undef TARGET_PROFILE_BEFORE_PROLOGUE #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue +#undef TARGET_SET_FP_INSN +#define TARGET_SET_FP_INSN ix86_set_fp_insn + #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name @@ -47562,6 +47710,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2) #undef TARGET_FRAME_POINTER_REQUIRED #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required +#undef TARGET_CAN_OMIT_LEAF_FRAME_POINTER +#define TARGET_CAN_OMIT_LEAF_FRAME_POINTER ix86_can_omit_leaf_frame_pointer + #undef TARGET_CAN_ELIMINATE #define TARGET_CAN_ELIMINATE ix86_can_eliminate @@ -47601,6 +47752,8 @@ adjacent_mem_locations (rtx mem1, rtx mem2) #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ ix86_float_exceptions_rounding_supported_p +#undef TARGET_STRICT_ALIGN +#define TARGET_STRICT_ALIGN true struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md index 39d395875..2369e4b40 100644 --- a/gcc-4.9/gcc/config/i386/i386.md +++ b/gcc-4.9/gcc/config/i386/i386.md @@ -12159,18 +12159,52 @@ (set_attr "mode" "")]) ;; BMI2 instructions. -(define_insn "bmi2_bzhi_3" +(define_expand "bmi2_bzhi_3" + [(parallel + [(set (match_operand:SWI48 0 "register_operand") + (zero_extract:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand") + (umin:SWI48 + (and:SWI48 (match_operand:SWI48 2 "register_operand") + (const_int 255)) + (match_dup 3)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_BMI2" + "operands[3] = GEN_INT ( * BITS_PER_UNIT);") + +(define_insn "*bmi2_bzhi_3" [(set (match_operand:SWI48 0 "register_operand" "=r") - (and:SWI48 (lshiftrt:SWI48 (const_int -1) - (match_operand:SWI48 2 "register_operand" "r")) - (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (zero_extract:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (umin:SWI48 + (and:SWI48 (match_operand:SWI48 2 "register_operand" "r") + (const_int 255)) + (match_operand:SWI48 3 "const_int_operand" "n")) + (const_int 0))) (clobber (reg:CC FLAGS_REG))] - "TARGET_BMI2" + "TARGET_BMI2 && INTVAL (operands[3]) == * BITS_PER_UNIT" "bzhi\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "")]) +(define_mode_attr k [(SI "k") (DI "q")]) +(define_insn "*bmi2_bzhi_3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (zero_extract:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (umin:SWI48 + (zero_extend:SWI48 (match_operand:QI 2 "register_operand" "r")) + (match_operand:SWI48 3 "const_int_operand" "n")) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && INTVAL (operands[3]) == * BITS_PER_UNIT" + "bzhi\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + (define_insn "bmi2_pdep_3" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r") @@ -13518,7 +13552,8 @@ (set (reg:CCFP FPSR_REG) (unspec:CCFP [(match_dup 2) (match_dup 3)] UNSPEC_C2_FLAG))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" "fprem" [(set_attr "type" "fpspc") (set_attr "mode" "XF")]) @@ -13527,7 +13562,8 @@ [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "general_operand")) (use (match_operand:XF 2 "general_operand"))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" { rtx label = gen_label_rtx (); @@ -13550,7 +13586,8 @@ [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:MODEF 2 "general_operand"))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" { rtx (*gen_truncxf) (rtx, rtx); @@ -13589,7 +13626,8 @@ (set (reg:CCFP FPSR_REG) (unspec:CCFP [(match_dup 2) (match_dup 3)] UNSPEC_C2_FLAG))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" "fprem1" [(set_attr "type" "fpspc") (set_attr "mode" "XF")]) @@ -13598,7 +13636,8 @@ [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "general_operand")) (use (match_operand:XF 2 "general_operand"))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" { rtx label = gen_label_rtx (); @@ -13621,7 +13660,8 @@ [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:MODEF 2 "general_operand"))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" { rtx (*gen_truncxf) (rtx, rtx); diff --git a/gcc-4.9/gcc/config/i386/i386.opt b/gcc-4.9/gcc/config/i386/i386.opt index 1e00b660e..f64a9e1eb 100644 --- a/gcc-4.9/gcc/config/i386/i386.opt +++ b/gcc-4.9/gcc/config/i386/i386.opt @@ -108,10 +108,6 @@ int x_ix86_dump_tunes TargetSave int x_ix86_force_align_arg_pointer -;; -mcopyrelocs= -TargetSave -int x_ix86_copyrelocs - ;; -mforce-drap= TargetSave int x_ix86_force_drap @@ -295,10 +291,6 @@ mfancy-math-387 Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save Generate sin, cos, sqrt for FPU -mcopyrelocs -Target Report Var(ix86_pie_copyrelocs) Init(0) -Assume copy relocations support for pie builds. - mforce-drap Target Report Var(ix86_force_drap) Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack diff --git a/gcc-4.9/gcc/config/i386/linux.h b/gcc-4.9/gcc/config/i386/linux.h index 27d68b5db..bfc7746bc 100644 --- a/gcc-4.9/gcc/config/i386/linux.h +++ b/gcc-4.9/gcc/config/i386/linux.h @@ -24,18 +24,3 @@ along with GCC; see the file COPYING3. If not see #define RUNTIME_ROOT_PREFIX "" #endif #define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2" - -/* These may be provided by config/linux-grtev*.h. */ -#ifndef LINUX_GRTE_EXTRA_SPECS -#define LINUX_GRTE_EXTRA_SPECS -#endif - -#undef SUBTARGET_EXTRA_SPECS -#ifndef SUBTARGET_EXTRA_SPECS_STR -#define SUBTARGET_EXTRA_SPECS \ - LINUX_GRTE_EXTRA_SPECS -#else -#define SUBTARGET_EXTRA_SPECS \ - LINUX_GRTE_EXTRA_SPECS \ - SUBTARGET_EXTRA_SPECS_STR -#endif diff --git a/gcc-4.9/gcc/config/i386/linux64.h b/gcc-4.9/gcc/config/i386/linux64.h index 5124a341b..b71616fea 100644 --- a/gcc-4.9/gcc/config/i386/linux64.h +++ b/gcc-4.9/gcc/config/i386/linux64.h @@ -34,12 +34,3 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2" #define GLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/libx32/ld-linux-x32.so.2" -/* These may be provided by config/linux-grtev*.h. */ -#ifndef LINUX_GRTE_EXTRA_SPECS -#define LINUX_GRTE_EXTRA_SPECS -#endif - -#undef SUBTARGET_EXTRA_SPECS -#define SUBTARGET_EXTRA_SPECS \ - LINUX_GRTE_EXTRA_SPECS - diff --git a/gcc-4.9/gcc/config/i386/mmx.md b/gcc-4.9/gcc/config/i386/mmx.md index 214acde23..a7d2a7eec 100644 --- a/gcc-4.9/gcc/config/i386/mmx.md +++ b/gcc-4.9/gcc/config/i386/mmx.md @@ -600,20 +600,25 @@ ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "*vec_extractv2sf_1" - [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,y,x,f,r") + [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,x,y,x,f,r") (vec_select:SF - (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o") + (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o") (parallel [(const_int 1)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 - unpckhps\t%0, %0 + %vmovshdup\t{%1, %0|%0, %1} + shufps\t{$0xe5, %1, %0|%0, %1, 0xe5} # # # #" - [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov") - (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")]) + [(set_attr "isa" "*,sse3,noavx,*,*,*,*") + (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov") + (set_attr "length_immediate" "*,*,1,*,*,*,*") + (set_attr "prefix_rep" "*,1,*,*,*,*,*") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig") + (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")]) (define_split [(set (match_operand:SF 0 "register_operand") @@ -1288,26 +1293,23 @@ ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "*vec_extractv2si_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,x,y,x,r") + [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,y,x,r") (vec_select:SI - (match_operand:V2SI 1 "nonimmediate_operand" " 0,0,x,0,o,o,o") + (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o") (parallel [(const_int 1)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 - punpckhdq\t%0, %0 - pshufd\t{$85, %1, %0|%0, %1, 85} - unpckhps\t%0, %0 + %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5} + shufps\t{$0xe5, %1, %0|%0, %1, 0xe5} # # #" - [(set (attr "isa") - (if_then_else (eq_attr "alternative" "1,2") - (const_string "sse2") - (const_string "*"))) - (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov") - (set_attr "length_immediate" "*,*,1,*,*,*,*") - (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")]) + [(set_attr "isa" "*,sse2,noavx,*,*,*") + (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov") + (set_attr "length_immediate" "*,1,1,*,*,*") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig") + (set_attr "mode" "DI,TI,V4SF,SI,SI,SI")]) (define_split [(set (match_operand:SI 0 "register_operand") diff --git a/gcc-4.9/gcc/config/i386/x86-tune.def b/gcc-4.9/gcc/config/i386/x86-tune.def index ddf1d21c9..215c71f4d 100644 --- a/gcc-4.9/gcc/config/i386/x86-tune.def +++ b/gcc-4.9/gcc/config/i386/x86-tune.def @@ -97,25 +97,25 @@ DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", conditional jump instruction for 32 bit TARGET. FIXME: revisit for generic. */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", - m_CORE_ALL | m_BDVER) + m_GENERIC | m_CORE_ALL | m_BDVER) /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent conditional jump instruction for TARGET_64BIT. FIXME: revisit for generic. */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER) + m_GENERIC | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER) /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a subsequent conditional jump instruction when the condition jump check sign flag (SF) or overflow flag (OF). */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER) + m_GENERIC | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER) /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional jump instruction when the alu instruction produces the CCFLAG consumed by the conditional jump instruction. */ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", - m_SANDYBRIDGE | m_HASWELL) + m_GENERIC | m_SANDYBRIDGE | m_HASWELL) /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations during reassociation of integer computation. */ diff --git a/gcc-4.9/gcc/config/ia64/ia64.c b/gcc-4.9/gcc/config/ia64/ia64.c index 41adc4adc..4ec3e3abe 100644 --- a/gcc-4.9/gcc/config/ia64/ia64.c +++ b/gcc-4.9/gcc/config/ia64/ia64.c @@ -602,11 +602,6 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p -/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur - in an order different from the specified program order. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p #undef TARGET_LEGITIMATE_ADDRESS_P diff --git a/gcc-4.9/gcc/config/linux-grte.h b/gcc-4.9/gcc/config/linux-grte.h index 31e8a94ce..e69de29bb 100644 --- a/gcc-4.9/gcc/config/linux-grte.h +++ b/gcc-4.9/gcc/config/linux-grte.h @@ -1,41 +0,0 @@ -/* Definitions for Linux-based GRTE (Google RunTime Environment). - Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc. - Contributed by Chris Demetriou and Ollie Wild. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -/* Overrides LIB_SPEC from gnu-user.h. */ -#undef LIB_SPEC -#define LIB_SPEC \ - "%{pthread:-lpthread} \ - %{shared:-lc} \ - %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}" - -/* When GRTE links statically, it needs its NSS and resolver libraries - linked in as well. Note that when linking statically, these are - enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */ -#undef LINUX_GRTE_EXTRA_SPECS -#define LINUX_GRTE_EXTRA_SPECS \ - { "libc", "%{static:%(libc_static);:-lc}" }, \ - { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \ - { "libc_static", "-lc -lresolv" }, \ - { "libc_p_static", "-lc_p -lresolv_p" }, diff --git a/gcc-4.9/gcc/config/linux.c b/gcc-4.9/gcc/config/linux.c index 7c3c5a461..cdb2b5bdd 100644 --- a/gcc-4.9/gcc/config/linux.c +++ b/gcc-4.9/gcc/config/linux.c @@ -23,8 +23,6 @@ along with GCC; see the file COPYING3. If not see #include "tm.h" #include "linux-protos.h" -/* Android does not support GNU indirect functions. */ - bool linux_has_ifunc_p (void) { diff --git a/gcc-4.9/gcc/config/msp430/msp430.md b/gcc-4.9/gcc/config/msp430/msp430.md index 5e890eced..3f29d6d62 100644 --- a/gcc-4.9/gcc/config/msp430/msp430.md +++ b/gcc-4.9/gcc/config/msp430/msp430.md @@ -559,7 +559,7 @@ [(set (match_operand:PSI 0 "nonimmediate_operand" "=r") (subreg:PSI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")) 0))] "TARGET_LARGE" - "RLAM #4, %0 { RRAM #4, %0" + "RLAM.A #4, %0 { RRAM.A #4, %0" ) ;; Look for cases where integer/pointer conversions are suboptimal due @@ -587,7 +587,7 @@ (ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")) (const_int 1)))] "TARGET_LARGE" - "RLAM #4, %0 { RRAM #3, %0" + "RLAM.A #4, %0 { RRAM.A #3, %0" ) (define_insn "extend_and_shift2_hipsi2" @@ -595,7 +595,7 @@ (ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")) (const_int 2)))] "TARGET_LARGE" - "RLAM #4, %0 { RRAM #2, %0" + "RLAM.A #4, %0 { RRAM.A #2, %0" ) ; Nasty - we are sign-extending a 20-bit PSI value in one register into diff --git a/gcc-4.9/gcc/config/nios2/nios2.c b/gcc-4.9/gcc/config/nios2/nios2.c index cdd2e6bc9..047b615ba 100644 --- a/gcc-4.9/gcc/config/nios2/nios2.c +++ b/gcc-4.9/gcc/config/nios2/nios2.c @@ -2135,6 +2135,18 @@ nios2_output_dwarf_dtprel (FILE *file, int size, rtx x) fprintf (file, ")"); } +/* Implemet TARGET_ASM_FILE_END. */ + +static void +nios2_asm_file_end (void) +{ + /* The Nios II Linux stack is mapped non-executable by default, so add a + .note.GNU-stack section for switching to executable stacks only when + trampolines are generated. */ + if (TARGET_LINUX_ABI && trampolines_created) + file_end_indicate_exec_stack (); +} + /* Implement TARGET_ASM_FUNCTION_PROLOGUE. */ static void nios2_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) @@ -3313,6 +3325,9 @@ nios2_merge_decl_attributes (tree olddecl, tree newdecl) #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nios2_output_addr_const_extra +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END nios2_asm_file_end + #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nios2_option_override diff --git a/gcc-4.9/gcc/config/pa/pa.c b/gcc-4.9/gcc/config/pa/pa.c index 5a7598ca7..801982068 100644 --- a/gcc-4.9/gcc/config/pa/pa.c +++ b/gcc-4.9/gcc/config/pa/pa.c @@ -3235,7 +3235,12 @@ pa_assemble_integer (rtx x, unsigned int size, int aligned_p) && aligned_p && function_label_operand (x, VOIDmode)) { - fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file); + fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file); + + /* We don't want an OPD when generating fast indirect calls. */ + if (!TARGET_FAST_INDIRECT_CALLS) + fputs ("P%", asm_out_file); + output_addr_const (asm_out_file, x); fputc ('\n', asm_out_file); return true; @@ -4203,9 +4208,12 @@ pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) { last_address = extra_nop ? 4 : 0; insn = get_last_nonnote_insn (); - last_address += INSN_ADDRESSES (INSN_UID (insn)); - if (INSN_P (insn)) - last_address += insn_default_length (insn); + if (insn) + { + last_address += INSN_ADDRESSES (INSN_UID (insn)); + if (INSN_P (insn)) + last_address += insn_default_length (insn); + } last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); } @@ -9308,6 +9316,12 @@ pa_function_value (const_tree valtype, || TREE_CODE (valtype) == COMPLEX_TYPE || TREE_CODE (valtype) == VECTOR_TYPE) { + HOST_WIDE_INT valsize = int_size_in_bytes (valtype); + + /* Handle aggregates that fit exactly in a word or double word. */ + if ((valsize & (UNITS_PER_WORD - 1)) == 0) + return gen_rtx_REG (TYPE_MODE (valtype), 28); + if (TARGET_64BIT) { /* Aggregates with a size less than or equal to 128 bits are @@ -9316,7 +9330,7 @@ pa_function_value (const_tree valtype, memory. */ rtx loc[2]; int i, offset = 0; - int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2; + int ub = valsize <= UNITS_PER_WORD ? 1 : 2; for (i = 0; i < ub; i++) { @@ -9328,7 +9342,7 @@ pa_function_value (const_tree valtype, return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc)); } - else if (int_size_in_bytes (valtype) > UNITS_PER_WORD) + else if (valsize > UNITS_PER_WORD) { /* Aggregates 5 to 8 bytes in size are returned in general registers r28-r29 in the same manner as other non diff --git a/gcc-4.9/gcc/config/pa/pa.md b/gcc-4.9/gcc/config/pa/pa.md index a9421ac2e..43b909e35 100644 --- a/gcc-4.9/gcc/config/pa/pa.md +++ b/gcc-4.9/gcc/config/pa/pa.md @@ -123,7 +123,7 @@ ;; type "binary" insns have two input operands (1,2) and one output (0) (define_attr "type" - "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload" + "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload,trap" (const_string "binary")) (define_attr "pa_combine_type" @@ -166,7 +166,7 @@ ;; For conditional branches. Frame related instructions are not allowed ;; because they confuse the unwind support. (define_attr "in_branch_delay" "false,true" - (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch") + (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,trap") (eq_attr "length" "4") (not (match_test "RTX_FRAME_RELATED_P (insn)"))) (const_string "true") @@ -175,7 +175,7 @@ ;; Disallow instructions which use the FPU since they will tie up the FPU ;; even if the instruction is nullified. (define_attr "in_nullified_branch_delay" "false,true" - (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch") + (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch,trap") (eq_attr "length" "4") (not (match_test "RTX_FRAME_RELATED_P (insn)"))) (const_string "true") @@ -184,7 +184,7 @@ ;; For calls and millicode calls. Allow unconditional branches in the ;; delay slot. (define_attr "in_call_delay" "false,true" - (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch") + (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,trap") (eq_attr "length" "4") (not (match_test "RTX_FRAME_RELATED_P (insn)"))) (const_string "true") @@ -5331,6 +5331,15 @@ [(set_attr "type" "binary,binary") (set_attr "length" "4,4")]) +;; Trap instructions. + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "" + "{addit|addi,tc},<> 1,%%r0,%%r0" + [(set_attr "type" "trap") + (set_attr "length" "4")]) + ;; Clobbering a "register_operand" instead of a match_scratch ;; in operand3 of millicode calls avoids spilling %r1 and ;; produces better code. @@ -8926,14 +8935,14 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" ;; strength reduction is used. It is actually created when the instruction ;; combination phase combines the special loop test. Since this insn ;; is both a jump insn and has an output, it must deal with its own -;; reloads, hence the `m' constraints. The `!' constraints direct reload +;; reloads, hence the `Q' constraints. The `!' constraints direct reload ;; to not choose the register alternatives in the event a reload is needed. (define_insn "decrement_and_branch_until_zero" [(set (pc) (if_then_else (match_operator 2 "comparison_operator" [(plus:SI - (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*m") + (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*Q") (match_operand:SI 1 "int5_operand" "L,L,L")) (const_int 0)]) (label_ref (match_operand 3 "" "")) @@ -9022,7 +9031,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)]) (label_ref (match_operand 3 "" "")) (pc))) - (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q") + (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*Q,!*q") (match_dup 1))] "" "* return pa_output_movb (operands, insn, which_alternative, 0); " @@ -9094,7 +9103,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)]) (pc) (label_ref (match_operand 3 "" "")))) - (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q") + (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*Q,!*q") (match_dup 1))] "" "* return pa_output_movb (operands, insn, which_alternative, 1); " diff --git a/gcc-4.9/gcc/config/pa/predicates.md b/gcc-4.9/gcc/config/pa/predicates.md index 8dcfce0e9..405cf7f63 100644 --- a/gcc-4.9/gcc/config/pa/predicates.md +++ b/gcc-4.9/gcc/config/pa/predicates.md @@ -528,20 +528,29 @@ ;; This predicate is used for branch patterns that internally handle ;; register reloading. We need to accept non-symbolic memory operands ;; after reload to ensure that the pattern is still valid if reload -;; didn't find a hard register for the operand. +;; didn't find a hard register for the operand. We also reject index +;; and lo_sum DLT address as these are invalid for move destinations. (define_predicate "reg_before_reload_operand" (match_code "reg,mem") { + rtx op0; + if (register_operand (op, mode)) return true; - if (reload_completed - && memory_operand (op, mode) - && !symbolic_memory_operand (op, mode)) - return true; + if (!reload_in_progress && !reload_completed) + return false; - return false; + if (! MEM_P (op)) + return false; + + op0 = XEXP (op, 0); + + return (memory_address_p (mode, op0) + && !IS_INDEX_ADDR_P (op0) + && !IS_LO_SUM_DLT_ADDR_P (op0) + && !symbolic_memory_operand (op, mode)); }) ;; True iff OP is a register or const_0 operand for MODE. diff --git a/gcc-4.9/gcc/config/rs6000/altivec.h b/gcc-4.9/gcc/config/rs6000/altivec.h index 129cf6fa1..9ee0ae5ec 100644 --- a/gcc-4.9/gcc/config/rs6000/altivec.h +++ b/gcc-4.9/gcc/config/rs6000/altivec.h @@ -124,6 +124,7 @@ #define vec_vcfux __builtin_vec_vcfux #define vec_cts __builtin_vec_cts #define vec_ctu __builtin_vec_ctu +#define vec_cpsgn __builtin_vec_copysign #define vec_expte __builtin_vec_expte #define vec_floor __builtin_vec_floor #define vec_loge __builtin_vec_loge @@ -214,8 +215,10 @@ #define vec_lvsl __builtin_vec_lvsl #define vec_lvsr __builtin_vec_lvsr #define vec_max __builtin_vec_max +#define vec_mergee __builtin_vec_vmrgew #define vec_mergeh __builtin_vec_mergeh #define vec_mergel __builtin_vec_mergel +#define vec_mergeo __builtin_vec_vmrgow #define vec_min __builtin_vec_min #define vec_mladd __builtin_vec_mladd #define vec_msum __builtin_vec_msum @@ -319,6 +322,8 @@ #define vec_sqrt __builtin_vec_sqrt #define vec_vsx_ld __builtin_vec_vsx_ld #define vec_vsx_st __builtin_vec_vsx_st +#define vec_xl __builtin_vec_vsx_ld +#define vec_xst __builtin_vec_vsx_st /* Note, xxsldi and xxpermdi were added as __builtin_vsx_ functions instead of __builtin_vec_ */ @@ -336,6 +341,7 @@ #define vec_vadduqm __builtin_vec_vadduqm #define vec_vbpermq __builtin_vec_vbpermq #define vec_vclz __builtin_vec_vclz +#define vec_cntlz __builtin_vec_vclz #define vec_vclzb __builtin_vec_vclzb #define vec_vclzd __builtin_vec_vclzd #define vec_vclzh __builtin_vec_vclzh diff --git a/gcc-4.9/gcc/config/rs6000/altivec.md b/gcc-4.9/gcc/config/rs6000/altivec.md index a8cfcb739..02ea14237 100644 --- a/gcc-4.9/gcc/config/rs6000/altivec.md +++ b/gcc-4.9/gcc/config/rs6000/altivec.md @@ -67,7 +67,7 @@ UNSPEC_VCTSXS UNSPEC_VLOGEFP UNSPEC_VEXPTEFP - UNSPEC_VLSDOI + UNSPEC_VSLDOI UNSPEC_VUNPACK_HI_SIGN UNSPEC_VUNPACK_LO_SIGN UNSPEC_VUNPACK_HI_SIGN_DIRECT @@ -2077,7 +2077,7 @@ (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") (match_operand:QI 3 "immediate_operand" "i")] - UNSPEC_VLSDOI))] + UNSPEC_VSLDOI))] "TARGET_ALTIVEC" "vsldoi %0,%1,%2,%3" [(set_attr "type" "vecperm")]) @@ -2297,7 +2297,31 @@ "dststt %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_lvsl" +(define_expand "altivec_lvsl" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "memory_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsl_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsl_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] UNSPEC_LVSL))] @@ -2305,7 +2329,31 @@ "lvsl %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_lvsr" +(define_expand "altivec_lvsr" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "memory_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsr_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsr_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] UNSPEC_LVSR))] diff --git a/gcc-4.9/gcc/config/rs6000/darwin.h b/gcc-4.9/gcc/config/rs6000/darwin.h index 0329f3f62..dfd181e43 100644 --- a/gcc-4.9/gcc/config/rs6000/darwin.h +++ b/gcc-4.9/gcc/config/rs6000/darwin.h @@ -206,7 +206,11 @@ extern int darwin_emit_branch_islands; "vrsave", "vscr", \ "spe_acc", "spefscr", \ "sfp", \ - "tfhar", "tfiar", "texasr" \ + "tfhar", "tfiar", "texasr", \ + "rh0", "rh1", "rh2", "rh3", "rh4", "rh5", "rh6", "rh7", \ + "rh8", "rh9", "rh10", "rh11", "rh12", "rh13", "rh14", "rh15", \ + "rh16", "rh17", "rh18", "rh19", "rh20", "rh21", "rh22", "rh23", \ + "rh24", "rh25", "rh26", "rh27", "rh28", "rh29", "rh30", "rh31" \ } /* This outputs NAME to FILE. */ diff --git a/gcc-4.9/gcc/config/rs6000/linux-grte.h b/gcc-4.9/gcc/config/rs6000/linux-grte.h index 53997f027..e69de29bb 100644 --- a/gcc-4.9/gcc/config/rs6000/linux-grte.h +++ b/gcc-4.9/gcc/config/rs6000/linux-grte.h @@ -1,41 +0,0 @@ -/* Definitions for Linux-based GRTE (Google RunTime Environment). - Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc. - Contributed by Chris Demetriou and Ollie Wild. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -/* Overrides LIB_LINUX_SPEC from sysv4.h. */ -#undef LIB_LINUX_SPEC -#define LIB_LINUX_SPEC \ - "%{pthread:-lpthread} \ - %{shared:-lc} \ - %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}" - -/* When GRTE links statically, it needs its NSS and resolver libraries - linked in as well. Note that when linking statically, these are - enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */ -#undef LINUX_GRTE_EXTRA_SPECS -#define LINUX_GRTE_EXTRA_SPECS \ - { "libc", "%{static:%(libc_static);:-lc}" }, \ - { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \ - { "libc_static", "-lc -lresolv" }, \ - { "libc_p_static", "-lc_p -lresolv_p" }, diff --git a/gcc-4.9/gcc/config/rs6000/predicates.md b/gcc-4.9/gcc/config/rs6000/predicates.md index 8c384b380..2f4046215 100644 --- a/gcc-4.9/gcc/config/rs6000/predicates.md +++ b/gcc-4.9/gcc/config/rs6000/predicates.md @@ -1783,7 +1783,7 @@ (define_predicate "fusion_gpr_mem_load" (match_code "mem,sign_extend,zero_extend") { - rtx addr; + rtx addr, base, offset; /* Handle sign/zero extend. */ if (GET_CODE (op) == ZERO_EXTEND @@ -1813,24 +1813,79 @@ } addr = XEXP (op, 0); + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return 0; + + base = XEXP (addr, 0); + if (!base_reg_operand (base, GET_MODE (base))) + return 0; + + offset = XEXP (addr, 1); + if (GET_CODE (addr) == PLUS) + return satisfies_constraint_I (offset); + + else if (GET_CODE (addr) == LO_SUM) { - rtx base = XEXP (addr, 0); - rtx offset = XEXP (addr, 1); + if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + return small_toc_ref (offset, GET_MODE (offset)); - return (base_reg_operand (base, GET_MODE (base)) - && satisfies_constraint_I (offset)); + else if (TARGET_ELF && !TARGET_POWERPC64) + return CONSTANT_P (offset); } - else if (GET_CODE (addr) == LO_SUM) + return 0; +}) + +;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the +;; memory field with both the addis and the memory offset. Sign extension +;; is not handled here, since lha and lwa are not fused. +(define_predicate "fusion_gpr_mem_combo" + (match_code "mem,zero_extend") +{ + rtx addr, base, offset; + + /* Handle zero extend. */ + if (GET_CODE (op) == ZERO_EXTEND) { - rtx base = XEXP (addr, 0); - rtx offset = XEXP (addr, 1); + op = XEXP (op, 0); + mode = GET_MODE (op); + } + + if (!MEM_P (op)) + return 0; - if (!base_reg_operand (base, GET_MODE (base))) + switch (mode) + { + case QImode: + case HImode: + case SImode: + break; + + case DImode: + if (!TARGET_POWERPC64) return 0; + break; - else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + default: + return 0; + } + + addr = XEXP (op, 0); + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return 0; + + base = XEXP (addr, 0); + if (!fusion_gpr_addis (base, GET_MODE (base))) + return 0; + + offset = XEXP (addr, 1); + if (GET_CODE (addr) == PLUS) + return satisfies_constraint_I (offset); + + else if (GET_CODE (addr) == LO_SUM) + { + if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) return small_toc_ref (offset, GET_MODE (offset)); else if (TARGET_ELF && !TARGET_POWERPC64) diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def index 220d1e970..9bb870394 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def @@ -1258,6 +1258,16 @@ BU_VSX_2 (VEC_MERGEL_V2DF, "mergel_2df", CONST, vsx_mergel_v2df) BU_VSX_2 (VEC_MERGEL_V2DI, "mergel_2di", CONST, vsx_mergel_v2di) BU_VSX_2 (VEC_MERGEH_V2DF, "mergeh_2df", CONST, vsx_mergeh_v2df) BU_VSX_2 (VEC_MERGEH_V2DI, "mergeh_2di", CONST, vsx_mergeh_v2di) +BU_VSX_2 (XXSPLTD_V2DF, "xxspltd_2df", CONST, vsx_xxspltd_v2df) +BU_VSX_2 (XXSPLTD_V2DI, "xxspltd_2di", CONST, vsx_xxspltd_v2di) +BU_VSX_2 (DIV_V2DI, "div_2di", CONST, vsx_div_v2di) +BU_VSX_2 (UDIV_V2DI, "udiv_2di", CONST, vsx_udiv_v2di) +BU_VSX_2 (MUL_V2DI, "mul_2di", CONST, vsx_mul_v2di) + +BU_VSX_2 (XVCVSXDDP_SCALE, "xvcvsxddp_scale", CONST, vsx_xvcvsxddp_scale) +BU_VSX_2 (XVCVUXDDP_SCALE, "xvcvuxddp_scale", CONST, vsx_xvcvuxddp_scale) +BU_VSX_2 (XVCVDPSXDS_SCALE, "xvcvdpsxds_scale", CONST, vsx_xvcvdpsxds_scale) +BU_VSX_2 (XVCVDPUXDS_SCALE, "xvcvdpuxds_scale", CONST, vsx_xvcvdpuxds_scale) /* VSX abs builtin functions. */ BU_VSX_A (XVABSDP, "xvabsdp", CONST, absv2df2) diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-c.c b/gcc-4.9/gcc/config/rs6000/rs6000-c.c index 46c4a9d8c..8dedeec26 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000-c.c +++ b/gcc-4.9/gcc/config/rs6000/rs6000-c.c @@ -597,6 +597,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ROUND, ALTIVEC_BUILTIN_VRFIN, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ROUND, VSX_BUILTIN_XVRDPI, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_RECIP, ALTIVEC_BUILTIN_VRECIPFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_RECIP, VSX_BUILTIN_RECIP_V2DF, @@ -876,6 +878,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, @@ -930,6 +944,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, @@ -1118,18 +1144,30 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFSX, RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVSXDDP_SCALE, + RS6000_BTI_V2DF, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0}, + { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVUXDDP_SCALE, + RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0}, { ALTIVEC_BUILTIN_VEC_VCFSX, ALTIVEC_BUILTIN_VCFSX, RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_VCFUX, ALTIVEC_BUILTIN_VCFUX, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_CTS, ALTIVEC_BUILTIN_VCTSXS, RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTS, VSX_BUILTIN_XVCVDPSXDS_SCALE, + RS6000_BTI_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_CTU, ALTIVEC_BUILTIN_VCTUXS, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTU, VSX_BUILTIN_XVCVDPUXDS_SCALE, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVSP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_DIV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_UDIV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI, @@ -1595,6 +1633,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW, @@ -1643,6 +1691,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW, @@ -1771,6 +1829,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_MUL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_MUL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB, @@ -1811,6 +1873,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, @@ -1841,6 +1915,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, @@ -1945,6 +2031,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS, @@ -2127,6 +2215,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW, @@ -2518,6 +2614,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, @@ -2778,6 +2886,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI, @@ -2818,6 +2928,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF, @@ -3267,6 +3383,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, @@ -3321,6 +3439,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF, RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double }, { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, @@ -3431,6 +3551,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P, @@ -3889,12 +4021,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, @@ -4128,7 +4264,8 @@ altivec_build_resolved_builtin (tree *args, int n, argument) is reversed. Patch the arguments here before building the resolved CALL_EXPR. */ if (desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P - && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P) + && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P + && desc->overloaded_code != VSX_BUILTIN_XVCMPGEDP_P) { tree t; t = args[2], args[2] = args[1], args[1] = t; @@ -4186,6 +4323,14 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n", (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); + + /* vec_lvsl and vec_lvsr are deprecated for use with LE element order. */ + if (fcode == ALTIVEC_BUILTIN_VEC_LVSL && !VECTOR_ELT_ORDER_BIG) + warning (OPT_Wdeprecated, "vec_lvsl is deprecated for little endian; use \ +assignment for unaligned loads and stores"); + else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !VECTOR_ELT_ORDER_BIG) + warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \ +assignment for unaligned loads and stores"); /* For now treat vec_splats and vec_promote as the same. */ if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h index 067a74aa6..aa8e76249 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h +++ b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h @@ -65,6 +65,7 @@ extern void altivec_expand_stvx_be (rtx, rtx, enum machine_mode, unsigned); extern void altivec_expand_stvex_be (rtx, rtx, enum machine_mode, unsigned); extern void rs6000_expand_extract_even (rtx, rtx, rtx); extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); +extern void rs6000_scale_v2df (rtx, rtx, int); extern void build_mask64_2_operands (rtx, rtx *); extern int expand_block_clear (rtx[]); extern int expand_block_move (rtx[]); @@ -79,9 +80,9 @@ extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); extern bool direct_move_p (rtx, rtx); extern bool quad_load_store_p (rtx, rtx); -extern bool fusion_gpr_load_p (rtx *, bool); +extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx); extern void expand_fusion_gpr_load (rtx *); -extern const char *emit_fusion_gpr_load (rtx *); +extern const char *emit_fusion_gpr_load (rtx, rtx); extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class); extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.c b/gcc-4.9/gcc/config/rs6000/rs6000.c index 28ccf86df..730e6c8a6 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000.c +++ b/gcc-4.9/gcc/config/rs6000/rs6000.c @@ -79,6 +79,9 @@ #include "dumpfile.h" #include "cgraph.h" #include "target-globals.h" +#include "real.h" +#include "context.h" +#include "tree-pass.h" #if TARGET_XCOFF #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ #endif @@ -1171,6 +1174,7 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type, enum machine_mode, secondary_reload_info *, bool); +rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); /* Hash table stuff for keeping track of TOC entries. */ @@ -1541,17 +1545,6 @@ static const struct attribute_spec rs6000_attribute_table[] = #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail #endif -/* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors - The PowerPC architecture requires only weak consistency among - processors--that is, memory accesses between processors need not be - sequentially consistent and memory accesses among processors can occur - in any order. The ability to order memory accesses weakly provides - opportunities for more efficient use of the system bus. Unless a - dependency exists, the 604e allows read operations to precede store - operations. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - #ifdef HAVE_AS_TLS #undef TARGET_ASM_OUTPUT_DWARF_DTPREL #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel @@ -4084,6 +4077,15 @@ static void rs6000_option_override (void) { (void) rs6000_option_override_internal (true); + + /* Register machine-specific passes. This needs to be done at start-up. + It's convenient to do it here (like i386 does). */ + opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g); + + static struct register_pass_info analyze_swaps_info + = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE }; + + register_pass (&analyze_swaps_info); } @@ -6896,24 +6898,6 @@ rs6000_delegitimize_address (rtx orig_x) if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL) { -#ifdef ENABLE_CHECKING - if (REG_P (XVECEXP (y, 0, 1)) - && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER) - { - /* All good. */ - } - else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR) - { - /* Weirdness alert. df_note_compute can replace r2 with a - debug_expr when this unspec is in a debug_insn. - Seen in gcc.dg/pr51957-1.c */ - } - else - { - debug_rtx (orig_x); - abort (); - } -#endif y = XVECEXP (y, 0, 0); #ifdef HAVE_AS_TLS @@ -13842,8 +13826,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case ALTIVEC_BUILTIN_MASK_FOR_LOAD: case ALTIVEC_BUILTIN_MASK_FOR_STORE: { - int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr - : (int) CODE_FOR_altivec_lvsl); + int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct + : (int) CODE_FOR_altivec_lvsl_direct); enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode = insn_data[icode].operand[1].mode; tree arg; @@ -13871,7 +13855,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); - /*pat = gen_altivec_lvsr (target, op);*/ pat = GEN_FCN (icode) (target, op); if (!pat) return 0; @@ -16654,10 +16637,13 @@ rs6000_secondary_reload (bool in_p, : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */ && (offset & 3) != 0)) { + /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */ if (in_p) - sri->icode = CODE_FOR_reload_di_load; + sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load + : CODE_FOR_reload_di_load); else - sri->icode = CODE_FOR_reload_di_store; + sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store + : CODE_FOR_reload_di_store); sri->extra_cost = 2; ret = NO_REGS; } @@ -30923,6 +30909,23 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); } +/* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ +void +rs6000_scale_v2df (rtx tgt, rtx src, int scale) +{ + HOST_WIDE_INT hwi_scale (scale); + REAL_VALUE_TYPE r_pow; + rtvec v = rtvec_alloc (2); + rtx elt; + rtx scale_vec = gen_reg_rtx (V2DFmode); + (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale); + elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode); + RTVEC_ELT (v, 0) = elt; + RTVEC_ELT (v, 1) = elt; + rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v)); + emit_insn (gen_mulv2df3 (tgt, src, scale_vec)); +} + /* Return an RTX representing where to find the function value of a function returning MODE. */ static rtx @@ -32551,6 +32554,14 @@ rs6000_split_logical_inner (rtx dest, if (complement_op2_p) op2 = gen_rtx_NOT (mode, op2); + /* For canonical RTL, if only one arm is inverted it is the first. */ + if (!complement_op1_p && complement_op2_p) + { + rtx temp = op1; + op1 = op2; + op2 = temp; + } + bool_rtx = ((code == NOT) ? gen_rtx_NOT (mode, op1) : gen_rtx_fmt_ee (code, mode, op1, op2)); @@ -32755,25 +32766,14 @@ rs6000_split_logical (rtx operands[3], /* Return true if the peephole2 can combine a load involving a combination of an addis instruction and a load with an offset that can be fused together on - a power8. - - The operands are: - operands[0] register set with addis - operands[1] value set via addis - operands[2] target register being loaded - operands[3] D-form memory reference using operands[0]. - - In addition, we are passed a boolean that is true if this is a peephole2, - and we can use see if the addis_reg is dead after the insn and can be - replaced by the target register. */ + a power8. */ bool -fusion_gpr_load_p (rtx *operands, bool peep2_p) +fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */ + rtx addis_value, /* addis value. */ + rtx target, /* target register that is loaded. */ + rtx mem) /* bottom part of the memory addr. */ { - rtx addis_reg = operands[0]; - rtx addis_value = operands[1]; - rtx target = operands[2]; - rtx mem = operands[3]; rtx addr; rtx base_reg; @@ -32787,9 +32787,6 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p) if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) return false; - if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) - return false; - /* Allow sign/zero extension. */ if (GET_CODE (mem) == ZERO_EXTEND || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) @@ -32798,22 +32795,22 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p) if (!MEM_P (mem)) return false; + if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) + return false; + addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) return false; /* Validate that the register used to load the high value is either the - register being loaded, or we can safely replace its use in a peephole2. + register being loaded, or we can safely replace its use. - If this is a peephole2, we assume that there are 2 instructions in the - peephole (addis and load), so we want to check if the target register was - not used in the memory address and the register to hold the addis result - is dead after the peephole. */ + This function is only called from the peephole2 pass and we assume that + there are 2 instructions in the peephole (addis and load), so we want to + check if the target register was not used in the memory address and the + register to hold the addis result is dead after the peephole. */ if (REGNO (addis_reg) != REGNO (target)) { - if (!peep2_p) - return false; - if (reg_mentioned_p (target, mem)) return false; @@ -32854,9 +32851,6 @@ expand_fusion_gpr_load (rtx *operands) enum machine_mode extend_mode = target_mode; enum machine_mode ptr_mode = Pmode; enum rtx_code extend = UNKNOWN; - rtx addis_reg = ((ptr_mode == target_mode) - ? target - : simplify_subreg (ptr_mode, target, target_mode, 0)); if (GET_CODE (orig_mem) == ZERO_EXTEND || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) @@ -32873,13 +32867,14 @@ expand_fusion_gpr_load (rtx *operands) gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); offset = XEXP (orig_addr, 1); - new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset); - new_mem = change_address (orig_mem, target_mode, new_addr); + new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); + new_mem = replace_equiv_address_nv (orig_mem, new_addr); if (extend != UNKNOWN) new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem); - emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value)); + new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), + UNSPEC_FUSION_GPR); emit_insn (gen_rtx_SET (VOIDmode, target, new_mem)); if (extend == SIGN_EXTEND) @@ -32898,55 +32893,40 @@ expand_fusion_gpr_load (rtx *operands) } /* Return a string to fuse an addis instruction with a gpr load to the same - register that we loaded up the addis instruction. The code is complicated, - so we call output_asm_insn directly, and just return "". + register that we loaded up the addis instruction. The address that is used + is the logical address that was formed during peephole2: + (lo_sum (high) (low-part)) - The operands are: - operands[0] register set with addis (must be same reg as target). - operands[1] value set via addis - operands[2] target register being loaded - operands[3] D-form memory reference using operands[0]. */ + The code is complicated, so we call output_asm_insn directly, and just + return "". */ const char * -emit_fusion_gpr_load (rtx *operands) +emit_fusion_gpr_load (rtx target, rtx mem) { - rtx addis_reg = operands[0]; - rtx addis_value = operands[1]; - rtx target = operands[2]; - rtx mem = operands[3]; + rtx addis_value; rtx fuse_ops[10]; rtx addr; rtx load_offset; const char *addis_str = NULL; const char *load_str = NULL; - const char *extend_insn = NULL; const char *mode_name = NULL; char insn_template[80]; enum machine_mode mode; const char *comment_str = ASM_COMMENT_START; - bool sign_p = false; - gcc_assert (REG_P (addis_reg) && REG_P (target)); - gcc_assert (REGNO (addis_reg) == REGNO (target)); - - if (*comment_str == ' ') - comment_str++; - - /* Allow sign/zero extension. */ if (GET_CODE (mem) == ZERO_EXTEND) mem = XEXP (mem, 0); - else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN) - { - sign_p = true; - mem = XEXP (mem, 0); - } + gcc_assert (REG_P (target) && MEM_P (mem)); + + if (*comment_str == ' ') + comment_str++; - gcc_assert (MEM_P (mem)); addr = XEXP (mem, 0); if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) gcc_unreachable (); + addis_value = XEXP (addr, 0); load_offset = XEXP (addr, 1); /* Now emit the load instruction to the same register. */ @@ -32956,29 +32936,22 @@ emit_fusion_gpr_load (rtx *operands) case QImode: mode_name = "char"; load_str = "lbz"; - extend_insn = "extsb %0,%0"; break; case HImode: mode_name = "short"; load_str = "lhz"; - extend_insn = "extsh %0,%0"; break; case SImode: mode_name = "int"; load_str = "lwz"; - extend_insn = "extsw %0,%0"; break; case DImode: - if (TARGET_POWERPC64) - { - mode_name = "long"; - load_str = "ld"; - } - else - gcc_unreachable (); + gcc_assert (TARGET_POWERPC64); + mode_name = "long"; + load_str = "ld"; break; default: @@ -33122,17 +33095,1191 @@ emit_fusion_gpr_load (rtx *operands) else fatal_insn ("Unable to generate load offset for fusion", load_offset); - /* Handle sign extension. The peephole2 pass generates this as a separate - insn, but we handle it just in case it got reattached. */ - if (sign_p) + return ""; +} + +/* Analyze vector computations and remove unnecessary doubleword + swaps (xxswapdi instructions). This pass is performed only + for little-endian VSX code generation. + + For this specific case, loads and stores of 4x32 and 2x64 vectors + are inefficient. These are implemented using the lvx2dx and + stvx2dx instructions, which invert the order of doublewords in + a vector register. Thus the code generation inserts an xxswapdi + after each such load, and prior to each such store. (For spill + code after register assignment, an additional xxswapdi is inserted + following each store in order to return a hard register to its + unpermuted value.) + + The extra xxswapdi instructions reduce performance. This can be + particularly bad for vectorized code. The purpose of this pass + is to reduce the number of xxswapdi instructions required for + correctness. + + The primary insight is that much code that operates on vectors + does not care about the relative order of elements in a register, + so long as the correct memory order is preserved. If we have + a computation where all input values are provided by lvxd2x/xxswapdi + sequences, all outputs are stored using xxswapdi/stvxd2x sequences, + and all intermediate computations are pure SIMD (independent of + element order), then all the xxswapdi's associated with the loads + and stores may be removed. + + This pass uses some of the infrastructure and logical ideas from + the "web" pass in web.c. We create maximal webs of computations + fitting the description above using union-find. Each such web is + then optimized by removing its unnecessary xxswapdi instructions. + + The pass is placed prior to global optimization so that we can + perform the optimization in the safest and simplest way possible; + that is, by replacing each xxswapdi insn with a register copy insn. + Subsequent forward propagation will remove copies where possible. + + There are some operations sensitive to element order for which we + can still allow the operation, provided we modify those operations. + These include CONST_VECTORs, for which we must swap the first and + second halves of the constant vector; and SUBREGs, for which we + must adjust the byte offset to account for the swapped doublewords. + A remaining opportunity would be non-immediate-form splats, for + which we should adjust the selected lane of the input. We should + also make code generation adjustments for sum-across operations, + since this is a common vectorizer reduction. + + Because we run prior to the first split, we can see loads and stores + here that match *vsx_le_perm_{load,store}_. These are vanilla + vector loads and stores that have not yet been split into a permuting + load/store and a swap. (One way this can happen is with a builtin + call to vec_vsx_{ld,st}.) We can handle these as well, but rather + than deleting a swap, we convert the load/store into a permuting + load/store (which effectively removes the swap). */ + +/* Notes on Permutes + + We do not currently handle computations that contain permutes. There + is a general transformation that can be performed correctly, but it + may introduce more expensive code than it replaces. To handle these + would require a cost model to determine when to perform the optimization. + This commentary records how this could be done if desired. + + The most general permute is something like this (example for V16QI): + + (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI)) + (parallel [(const_int a0) (const_int a1) + ... + (const_int a14) (const_int a15)])) + + where a0,...,a15 are in [0,31] and select elements from op1 and op2 + to produce in the result. + + Regardless of mode, we can convert the PARALLEL to a mask of 16 + byte-element selectors. Let's call this M, with M[i] representing + the ith byte-element selector value. Then if we swap doublewords + throughout the computation, we can get correct behavior by replacing + M with M' as follows: + + { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23] + M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31] + { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23] + { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31] + + This seems promising at first, since we are just replacing one mask + with another. But certain masks are preferable to others. If M + is a mask that matches a vmrghh pattern, for example, M' certainly + will not. Instead of a single vmrghh, we would generate a load of + M' and a vperm. So we would need to know how many xxswapd's we can + remove as a result of this transformation to determine if it's + profitable; and preferably the logic would need to be aware of all + the special preferable masks. + + Another form of permute is an UNSPEC_VPERM, in which the mask is + already in a register. In some cases, this mask may be a constant + that we can discover with ud-chains, in which case the above + transformation is ok. However, the common usage here is for the + mask to be produced by an UNSPEC_LVSL, in which case the mask + cannot be known at compile time. In such a case we would have to + generate several instructions to compute M' as above at run time, + and a cost model is needed again. */ + +/* This is based on the union-find logic in web.c. web_entry_base is + defined in df.h. */ +class swap_web_entry : public web_entry_base +{ + public: + /* Pointer to the insn. */ + rtx insn; + /* Set if insn contains a mention of a vector register. All other + fields are undefined if this field is unset. */ + unsigned int is_relevant : 1; + /* Set if insn is a load. */ + unsigned int is_load : 1; + /* Set if insn is a store. */ + unsigned int is_store : 1; + /* Set if insn is a doubleword swap. This can either be a register swap + or a permuting load or store (test is_load and is_store for this). */ + unsigned int is_swap : 1; + /* Set if the insn has a live-in use of a parameter register. */ + unsigned int is_live_in : 1; + /* Set if the insn has a live-out def of a return register. */ + unsigned int is_live_out : 1; + /* Set if the insn contains a subreg reference of a vector register. */ + unsigned int contains_subreg : 1; + /* Set if the insn contains a 128-bit integer operand. */ + unsigned int is_128_int : 1; + /* Set if this is a call-insn. */ + unsigned int is_call : 1; + /* Set if this insn does not perform a vector operation for which + element order matters, or if we know how to fix it up if it does. + Undefined if is_swap is set. */ + unsigned int is_swappable : 1; + /* A nonzero value indicates what kind of special handling for this + insn is required if doublewords are swapped. Undefined if + is_swappable is not set. */ + unsigned int special_handling : 3; + /* Set if the web represented by this entry cannot be optimized. */ + unsigned int web_not_optimizable : 1; + /* Set if this insn should be deleted. */ + unsigned int will_delete : 1; +}; + +enum special_handling_values { + SH_NONE = 0, + SH_CONST_VECTOR, + SH_SUBREG, + SH_NOSWAP_LD, + SH_NOSWAP_ST, + SH_EXTRACT, + SH_SPLAT +}; + +/* Union INSN with all insns containing definitions that reach USE. + Detect whether USE is live-in to the current function. */ +static void +union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use) +{ + struct df_link *link = DF_REF_CHAIN (use); + + if (!link) + insn_entry[INSN_UID (insn)].is_live_in = 1; + + while (link) { - gcc_assert (extend_insn != NULL); - output_asm_insn (extend_insn, fuse_ops); + if (DF_REF_IS_ARTIFICIAL (link->ref)) + insn_entry[INSN_UID (insn)].is_live_in = 1; + + if (DF_REF_INSN_INFO (link->ref)) + { + rtx def_insn = DF_REF_INSN (link->ref); + (void)unionfind_union (insn_entry + INSN_UID (insn), + insn_entry + INSN_UID (def_insn)); + } + + link = link->next; } +} - return ""; +/* Union INSN with all insns containing uses reached from DEF. + Detect whether DEF is live-out from the current function. */ +static void +union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def) +{ + struct df_link *link = DF_REF_CHAIN (def); + + if (!link) + insn_entry[INSN_UID (insn)].is_live_out = 1; + + while (link) + { + /* This could be an eh use or some other artificial use; + we treat these all the same (killing the optimization). */ + if (DF_REF_IS_ARTIFICIAL (link->ref)) + insn_entry[INSN_UID (insn)].is_live_out = 1; + + if (DF_REF_INSN_INFO (link->ref)) + { + rtx use_insn = DF_REF_INSN (link->ref); + (void)unionfind_union (insn_entry + INSN_UID (insn), + insn_entry + INSN_UID (use_insn)); + } + + link = link->next; + } +} + +/* Return 1 iff INSN is a load insn, including permuting loads that + represent an lvxd2x instruction; else return 0. */ +static unsigned int +insn_is_load_p (rtx insn) +{ + rtx body = PATTERN (insn); + + if (GET_CODE (body) == SET) + { + if (GET_CODE (SET_SRC (body)) == MEM) + return 1; + + if (GET_CODE (SET_SRC (body)) == VEC_SELECT + && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM) + return 1; + + return 0; + } + + if (GET_CODE (body) != PARALLEL) + return 0; + + rtx set = XVECEXP (body, 0, 0); + + if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM) + return 1; + + return 0; +} + +/* Return 1 iff INSN is a store insn, including permuting stores that + represent an stvxd2x instruction; else return 0. */ +static unsigned int +insn_is_store_p (rtx insn) +{ + rtx body = PATTERN (insn); + if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM) + return 1; + if (GET_CODE (body) != PARALLEL) + return 0; + rtx set = XVECEXP (body, 0, 0); + if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM) + return 1; + return 0; } +/* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap, + a permuting load, or a permuting store. */ +static unsigned int +insn_is_swap_p (rtx insn) +{ + rtx body = PATTERN (insn); + if (GET_CODE (body) != SET) + return 0; + rtx rhs = SET_SRC (body); + if (GET_CODE (rhs) != VEC_SELECT) + return 0; + rtx parallel = XEXP (rhs, 1); + if (GET_CODE (parallel) != PARALLEL) + return 0; + unsigned int len = XVECLEN (parallel, 0); + if (len != 2 && len != 4 && len != 8 && len != 16) + return 0; + for (unsigned int i = 0; i < len / 2; ++i) + { + rtx op = XVECEXP (parallel, 0, i); + if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i) + return 0; + } + for (unsigned int i = len / 2; i < len; ++i) + { + rtx op = XVECEXP (parallel, 0, i); + if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2) + return 0; + } + return 1; +} + +/* Return 1 iff OP is an operand that will not be affected by having + vector doublewords swapped in memory. */ +static unsigned int +rtx_is_swappable_p (rtx op, unsigned int *special) +{ + enum rtx_code code = GET_CODE (op); + int i, j; + rtx parallel; + + switch (code) + { + case LABEL_REF: + case SYMBOL_REF: + case CLOBBER: + case REG: + return 1; + + case VEC_CONCAT: + case ASM_INPUT: + case ASM_OPERANDS: + return 0; + + case CONST_VECTOR: + { + *special = SH_CONST_VECTOR; + return 1; + } + + case VEC_DUPLICATE: + /* Opportunity: If XEXP (op, 0) has the same mode as the result, + and XEXP (op, 1) is a PARALLEL with a single QImode const int, + it represents a vector splat for which we can do special + handling. */ + if (GET_CODE (XEXP (op, 0)) == CONST_INT) + return 1; + else if (GET_CODE (XEXP (op, 0)) == REG + && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) + /* This catches V2DF and V2DI splat, at a minimum. */ + return 1; + else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT) + /* If the duplicated item is from a select, defer to the select + processing to see if we can change the lane for the splat. */ + return rtx_is_swappable_p (XEXP (op, 0), special); + else + return 0; + + case VEC_SELECT: + /* A vec_extract operation is ok if we change the lane. */ + if (GET_CODE (XEXP (op, 0)) == REG + && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op) + && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL + && XVECLEN (parallel, 0) == 1 + && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT) + { + *special = SH_EXTRACT; + return 1; + } + else + return 0; + + case UNSPEC: + { + /* Various operations are unsafe for this optimization, at least + without significant additional work. Permutes are obviously + problematic, as both the permute control vector and the ordering + of the target values are invalidated by doubleword swapping. + Vector pack and unpack modify the number of vector lanes. + Merge-high/low will not operate correctly on swapped operands. + Vector shifts across element boundaries are clearly uncool, + as are vector select and concatenate operations. Vector + sum-across instructions define one operand with a specific + order-dependent element, so additional fixup code would be + needed to make those work. Vector set and non-immediate-form + vector splat are element-order sensitive. A few of these + cases might be workable with special handling if required. */ + int val = XINT (op, 1); + switch (val) + { + default: + break; + case UNSPEC_VMRGH_DIRECT: + case UNSPEC_VMRGL_DIRECT: + case UNSPEC_VPACK_SIGN_SIGN_SAT: + case UNSPEC_VPACK_SIGN_UNS_SAT: + case UNSPEC_VPACK_UNS_UNS_MOD: + case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT: + case UNSPEC_VPACK_UNS_UNS_SAT: + case UNSPEC_VPERM: + case UNSPEC_VPERM_UNS: + case UNSPEC_VPERMHI: + case UNSPEC_VPERMSI: + case UNSPEC_VPKPX: + case UNSPEC_VSLDOI: + case UNSPEC_VSLO: + case UNSPEC_VSRO: + case UNSPEC_VSUM2SWS: + case UNSPEC_VSUM4S: + case UNSPEC_VSUM4UBS: + case UNSPEC_VSUMSWS: + case UNSPEC_VSUMSWS_DIRECT: + case UNSPEC_VSX_CONCAT: + case UNSPEC_VSX_SET: + case UNSPEC_VSX_SLDWI: + case UNSPEC_VUNPACK_HI_SIGN: + case UNSPEC_VUNPACK_HI_SIGN_DIRECT: + case UNSPEC_VUNPACK_LO_SIGN: + case UNSPEC_VUNPACK_LO_SIGN_DIRECT: + case UNSPEC_VUPKHPX: + case UNSPEC_VUPKHS_V4SF: + case UNSPEC_VUPKHU_V4SF: + case UNSPEC_VUPKLPX: + case UNSPEC_VUPKLS_V4SF: + case UNSPEC_VUPKLU_V4SF: + /* The following could be handled as an idiom with XXSPLTW. + These place a scalar in BE element zero, but the XXSPLTW + will currently expect it in BE element 2 in a swapped + region. When one of these feeds an XXSPLTW with no other + defs/uses either way, we can avoid the lane change for + XXSPLTW and things will be correct. TBD. */ + case UNSPEC_VSX_CVDPSPN: + case UNSPEC_VSX_CVSPDP: + case UNSPEC_VSX_CVSPDPN: + return 0; + case UNSPEC_VSPLT_DIRECT: + *special = SH_SPLAT; + return 1; + } + } + + default: + break; + } + + const char *fmt = GET_RTX_FORMAT (code); + int ok = 1; + + for (i = 0; i < GET_RTX_LENGTH (code); ++i) + if (fmt[i] == 'e' || fmt[i] == 'u') + { + unsigned int special_op = SH_NONE; + ok &= rtx_is_swappable_p (XEXP (op, i), &special_op); + /* Ensure we never have two kinds of special handling + for the same insn. */ + if (*special != SH_NONE && special_op != SH_NONE + && *special != special_op) + return 0; + *special = special_op; + } + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (op, i); ++j) + { + unsigned int special_op = SH_NONE; + ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op); + /* Ensure we never have two kinds of special handling + for the same insn. */ + if (*special != SH_NONE && special_op != SH_NONE + && *special != special_op) + return 0; + *special = special_op; + } + + return ok; +} + +/* Return 1 iff INSN is an operand that will not be affected by + having vector doublewords swapped in memory (in which case + *SPECIAL is unchanged), or that can be modified to be correct + if vector doublewords are swapped in memory (in which case + *SPECIAL is changed to a value indicating how). */ +static unsigned int +insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, + unsigned int *special) +{ + /* Calls are always bad. */ + if (GET_CODE (insn) == CALL_INSN) + return 0; + + /* Loads and stores seen here are not permuting, but we can still + fix them up by converting them to permuting ones. Exceptions: + UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL + body instead of a SET; and UNSPEC_STVE, which has an UNSPEC + for the SET source. */ + rtx body = PATTERN (insn); + int i = INSN_UID (insn); + + if (insn_entry[i].is_load) + { + if (GET_CODE (body) == SET) + { + *special = SH_NOSWAP_LD; + return 1; + } + else + return 0; + } + + if (insn_entry[i].is_store) + { + if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC) + { + *special = SH_NOSWAP_ST; + return 1; + } + else + return 0; + } + + /* Otherwise check the operands for vector lane violations. */ + return rtx_is_swappable_p (body, special); +} + +enum chain_purpose { FOR_LOADS, FOR_STORES }; + +/* Return true if the UD or DU chain headed by LINK is non-empty, + and every entry on the chain references an insn that is a + register swap. Furthermore, if PURPOSE is FOR_LOADS, each such + register swap must have only permuting loads as reaching defs. + If PURPOSE is FOR_STORES, each such register swap must have only + register swaps or permuting stores as reached uses. */ +static bool +chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link, + enum chain_purpose purpose) +{ + if (!link) + return false; + + for (; link; link = link->next) + { + if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref)))) + continue; + + if (DF_REF_IS_ARTIFICIAL (link->ref)) + return false; + + rtx reached_insn = DF_REF_INSN (link->ref); + unsigned uid = INSN_UID (reached_insn); + + if (!insn_entry[uid].is_swap || insn_entry[uid].is_load + || insn_entry[uid].is_store) + return false; + + if (purpose == FOR_LOADS) + { + df_ref *use_rec; + for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++) + { + df_ref use = *use_rec; + struct df_link *swap_link = DF_REF_CHAIN (use); + + while (swap_link) + { + if (DF_REF_IS_ARTIFICIAL (link->ref)) + return false; + + rtx swap_def_insn = DF_REF_INSN (swap_link->ref); + unsigned uid2 = INSN_UID (swap_def_insn); + + /* Only permuting loads are allowed. */ + if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load) + return false; + + swap_link = swap_link->next; + } + } + } + else if (purpose == FOR_STORES) + { + df_ref *def_rec; + for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++) + { + df_ref def = *def_rec; + struct df_link *swap_link = DF_REF_CHAIN (def); + + while (swap_link) + { + if (DF_REF_IS_ARTIFICIAL (link->ref)) + return false; + + rtx swap_use_insn = DF_REF_INSN (swap_link->ref); + unsigned uid2 = INSN_UID (swap_use_insn); + + /* Permuting stores or register swaps are allowed. */ + if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load) + return false; + + swap_link = swap_link->next; + } + } + } + } + + return true; +} + +/* Mark the xxswapdi instructions associated with permuting loads and + stores for removal. Note that we only flag them for deletion here, + as there is a possibility of a swap being reached from multiple + loads, etc. */ +static void +mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i) +{ + rtx insn = insn_entry[i].insn; + unsigned uid = INSN_UID (insn); + + if (insn_entry[i].is_load) + { + df_ref *def_rec; + for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++) + { + df_ref def = *def_rec; + struct df_link *link = DF_REF_CHAIN (def); + + /* We know by now that these are swaps, so we can delete + them confidently. */ + while (link) + { + rtx use_insn = DF_REF_INSN (link->ref); + insn_entry[INSN_UID (use_insn)].will_delete = 1; + link = link->next; + } + } + } + else if (insn_entry[i].is_store) + { + df_ref *use_rec; + for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++) + { + df_ref use = *use_rec; + /* Ignore uses for addressability. */ + machine_mode mode = GET_MODE (DF_REF_REG (use)); + if (!VECTOR_MODE_P (mode)) + continue; + + struct df_link *link = DF_REF_CHAIN (use); + + /* We know by now that these are swaps, so we can delete + them confidently. */ + while (link) + { + rtx def_insn = DF_REF_INSN (link->ref); + insn_entry[INSN_UID (def_insn)].will_delete = 1; + link = link->next; + } + } + } +} + +/* OP is either a CONST_VECTOR or an expression containing one. + Swap the first half of the vector with the second in the first + case. Recurse to find it in the second. */ +static void +swap_const_vector_halves (rtx op) +{ + int i; + enum rtx_code code = GET_CODE (op); + if (GET_CODE (op) == CONST_VECTOR) + { + int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2; + for (i = 0; i < half_units; ++i) + { + rtx temp = CONST_VECTOR_ELT (op, i); + CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units); + CONST_VECTOR_ELT (op, i + half_units) = temp; + } + } + else + { + int j; + const char *fmt = GET_RTX_FORMAT (code); + for (i = 0; i < GET_RTX_LENGTH (code); ++i) + if (fmt[i] == 'e' || fmt[i] == 'u') + swap_const_vector_halves (XEXP (op, i)); + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (op, i); ++j) + swap_const_vector_halves (XVECEXP (op, i, j)); + } +} + +/* Find all subregs of a vector expression that perform a narrowing, + and adjust the subreg index to account for doubleword swapping. */ +static void +adjust_subreg_index (rtx op) +{ + enum rtx_code code = GET_CODE (op); + if (code == SUBREG + && (GET_MODE_SIZE (GET_MODE (op)) + < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))) + { + unsigned int index = SUBREG_BYTE (op); + if (index < 8) + index += 8; + else + index -= 8; + SUBREG_BYTE (op) = index; + } + + const char *fmt = GET_RTX_FORMAT (code); + int i,j; + for (i = 0; i < GET_RTX_LENGTH (code); ++i) + if (fmt[i] == 'e' || fmt[i] == 'u') + adjust_subreg_index (XEXP (op, i)); + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (op, i); ++j) + adjust_subreg_index (XVECEXP (op, i, j)); +} + +/* Convert the non-permuting load INSN to a permuting one. */ +static void +permute_load (rtx insn) +{ + rtx body = PATTERN (insn); + rtx mem_op = SET_SRC (body); + rtx tgt_reg = SET_DEST (body); + machine_mode mode = GET_MODE (tgt_reg); + int n_elts = GET_MODE_NUNITS (mode); + int half_elts = n_elts / 2; + rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); + int i, j; + for (i = 0, j = half_elts; i < half_elts; ++i, ++j) + XVECEXP (par, 0, i) = GEN_INT (j); + for (i = half_elts, j = 0; j < half_elts; ++i, ++j) + XVECEXP (par, 0, i) = GEN_INT (j); + rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par); + SET_SRC (body) = sel; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Replacing load %d with permuted load\n", + INSN_UID (insn)); +} + +/* Convert the non-permuting store INSN to a permuting one. */ +static void +permute_store (rtx insn) +{ + rtx body = PATTERN (insn); + rtx src_reg = SET_SRC (body); + machine_mode mode = GET_MODE (src_reg); + int n_elts = GET_MODE_NUNITS (mode); + int half_elts = n_elts / 2; + rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); + int i, j; + for (i = 0, j = half_elts; i < half_elts; ++i, ++j) + XVECEXP (par, 0, i) = GEN_INT (j); + for (i = half_elts, j = 0; j < half_elts; ++i, ++j) + XVECEXP (par, 0, i) = GEN_INT (j); + rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par); + SET_SRC (body) = sel; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Replacing store %d with permuted store\n", + INSN_UID (insn)); +} + +/* Given OP that contains a vector extract operation, adjust the index + of the extracted lane to account for the doubleword swap. */ +static void +adjust_extract (rtx insn) +{ + rtx src = SET_SRC (PATTERN (insn)); + /* The vec_select may be wrapped in a vec_duplicate for a splat, so + account for that. */ + rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src; + rtx par = XEXP (sel, 1); + int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1; + int lane = INTVAL (XVECEXP (par, 0, 0)); + lane = lane >= half_elts ? lane - half_elts : lane + half_elts; + XVECEXP (par, 0, 0) = GEN_INT (lane); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn)); +} + +/* Given OP that contains a vector direct-splat operation, adjust the index + of the source lane to account for the doubleword swap. */ +static void +adjust_splat (rtx insn) +{ + rtx body = PATTERN (insn); + rtx unspec = XEXP (body, 1); + int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1; + int lane = INTVAL (XVECEXP (unspec, 0, 1)); + lane = lane >= half_elts ? lane - half_elts : lane + half_elts; + XVECEXP (unspec, 0, 1) = GEN_INT (lane); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); +} + +/* The insn described by INSN_ENTRY[I] can be swapped, but only + with special handling. Take care of that here. */ +static void +handle_special_swappables (swap_web_entry *insn_entry, unsigned i) +{ + rtx insn = insn_entry[i].insn; + rtx body = PATTERN (insn); + + switch (insn_entry[i].special_handling) + { + default: + gcc_unreachable (); + case SH_CONST_VECTOR: + { + /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */ + gcc_assert (GET_CODE (body) == SET); + rtx rhs = SET_SRC (body); + swap_const_vector_halves (rhs); + if (dump_file) + fprintf (dump_file, "Swapping constant halves in insn %d\n", i); + break; + } + case SH_SUBREG: + /* A subreg of the same size is already safe. For subregs that + select a smaller portion of a reg, adjust the index for + swapped doublewords. */ + adjust_subreg_index (body); + if (dump_file) + fprintf (dump_file, "Adjusting subreg in insn %d\n", i); + break; + case SH_NOSWAP_LD: + /* Convert a non-permuting load to a permuting one. */ + permute_load (insn); + break; + case SH_NOSWAP_ST: + /* Convert a non-permuting store to a permuting one. */ + permute_store (insn); + break; + case SH_EXTRACT: + /* Change the lane on an extract operation. */ + adjust_extract (insn); + break; + case SH_SPLAT: + /* Change the lane on a direct-splat operation. */ + adjust_splat (insn); + break; + } +} + +/* Find the insn from the Ith table entry, which is known to be a + register swap Y = SWAP(X). Replace it with a copy Y = X. */ +static void +replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) +{ + rtx insn = insn_entry[i].insn; + rtx body = PATTERN (insn); + rtx src_reg = XEXP (SET_SRC (body), 0); + rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg); + rtx new_insn = emit_insn_before (copy, insn); + set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn)); + df_insn_rescan (new_insn); + + if (dump_file) + { + unsigned int new_uid = INSN_UID (new_insn); + fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid); + } + + df_insn_delete (insn); + remove_insn (insn); + INSN_DELETED_P (insn) = 1; +} + +/* Dump the swap table to DUMP_FILE. */ +static void +dump_swap_insn_table (swap_web_entry *insn_entry) +{ + int e = get_max_uid (); + fprintf (dump_file, "\nRelevant insns with their flag settings\n\n"); + + for (int i = 0; i < e; ++i) + if (insn_entry[i].is_relevant) + { + swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred (); + fprintf (dump_file, "%6d %6d ", i, + pred_entry && pred_entry->insn + ? INSN_UID (pred_entry->insn) : 0); + if (insn_entry[i].is_load) + fputs ("load ", dump_file); + if (insn_entry[i].is_store) + fputs ("store ", dump_file); + if (insn_entry[i].is_swap) + fputs ("swap ", dump_file); + if (insn_entry[i].is_live_in) + fputs ("live-in ", dump_file); + if (insn_entry[i].is_live_out) + fputs ("live-out ", dump_file); + if (insn_entry[i].contains_subreg) + fputs ("subreg ", dump_file); + if (insn_entry[i].is_128_int) + fputs ("int128 ", dump_file); + if (insn_entry[i].is_call) + fputs ("call ", dump_file); + if (insn_entry[i].is_swappable) + { + fputs ("swappable ", dump_file); + if (insn_entry[i].special_handling == SH_CONST_VECTOR) + fputs ("special:constvec ", dump_file); + else if (insn_entry[i].special_handling == SH_SUBREG) + fputs ("special:subreg ", dump_file); + else if (insn_entry[i].special_handling == SH_NOSWAP_LD) + fputs ("special:load ", dump_file); + else if (insn_entry[i].special_handling == SH_NOSWAP_ST) + fputs ("special:store ", dump_file); + else if (insn_entry[i].special_handling == SH_EXTRACT) + fputs ("special:extract ", dump_file); + else if (insn_entry[i].special_handling == SH_SPLAT) + fputs ("special:splat ", dump_file); + } + if (insn_entry[i].web_not_optimizable) + fputs ("unoptimizable ", dump_file); + if (insn_entry[i].will_delete) + fputs ("delete ", dump_file); + fputs ("\n", dump_file); + } + fputs ("\n", dump_file); +} + +/* Main entry point for this pass. */ +unsigned int +rs6000_analyze_swaps (function *fun) +{ + swap_web_entry *insn_entry; + basic_block bb; + rtx insn; + + /* Dataflow analysis for use-def chains. */ + df_set_flags (DF_RD_PRUNE_DEAD_DEFS); + df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); + df_analyze (); + df_set_flags (DF_DEFER_INSN_RESCAN); + + /* Allocate structure to represent webs of insns. */ + insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); + + /* Walk the insns to gather basic data. */ + FOR_ALL_BB_FN (bb, fun) + FOR_BB_INSNS (bb, insn) + { + unsigned int uid = INSN_UID (insn); + if (NONDEBUG_INSN_P (insn)) + { + insn_entry[uid].insn = insn; + + if (GET_CODE (insn) == CALL_INSN) + insn_entry[uid].is_call = 1; + + /* Walk the uses and defs to see if we mention vector regs. + Record any constraints on optimization of such mentions. */ + df_ref *use_rec; + for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++) + { + df_ref mention = *use_rec; + /* We use DF_REF_REAL_REG here to get inside any subregs. */ + machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); + + /* If a use gets its value from a call insn, it will be + a hard register and will look like (reg:V4SI 3 3). + The df analysis creates two mentions for GPR3 and GPR4, + both DImode. We must recognize this and treat it as a + vector mention to ensure the call is unioned with this + use. */ + if (mode == DImode && DF_REF_INSN_INFO (mention)) + { + rtx feeder = DF_REF_INSN (mention); + /* FIXME: It is pretty hard to get from the df mention + to the mode of the use in the insn. We arbitrarily + pick a vector mode here, even though the use might + be a real DImode. We can be too conservative + (create a web larger than necessary) because of + this, so consider eventually fixing this. */ + if (GET_CODE (feeder) == CALL_INSN) + mode = V4SImode; + } + + if (VECTOR_MODE_P (mode) || mode == TImode) + { + insn_entry[uid].is_relevant = 1; + if (mode == TImode || mode == V1TImode) + insn_entry[uid].is_128_int = 1; + if (DF_REF_INSN_INFO (mention)) + insn_entry[uid].contains_subreg + = !rtx_equal_p (DF_REF_REG (mention), + DF_REF_REAL_REG (mention)); + union_defs (insn_entry, insn, mention); + } + } + df_ref *def_rec; + for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++) + { + df_ref mention = *def_rec; + /* We use DF_REF_REAL_REG here to get inside any subregs. */ + machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); + + /* If we're loading up a hard vector register for a call, + it looks like (set (reg:V4SI 9 9) (...)). The df + analysis creates two mentions for GPR9 and GPR10, both + DImode. So relying on the mode from the mentions + isn't sufficient to ensure we union the call into the + web with the parameter setup code. */ + if (mode == DImode && GET_CODE (insn) == SET + && VECTOR_MODE_P (GET_MODE (SET_DEST (insn)))) + mode = GET_MODE (SET_DEST (insn)); + + if (VECTOR_MODE_P (mode) || mode == TImode) + { + insn_entry[uid].is_relevant = 1; + if (mode == TImode || mode == V1TImode) + insn_entry[uid].is_128_int = 1; + if (DF_REF_INSN_INFO (mention)) + insn_entry[uid].contains_subreg + = !rtx_equal_p (DF_REF_REG (mention), + DF_REF_REAL_REG (mention)); + /* REG_FUNCTION_VALUE_P is not valid for subregs. */ + else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention))) + insn_entry[uid].is_live_out = 1; + union_uses (insn_entry, insn, mention); + } + } + + if (insn_entry[uid].is_relevant) + { + /* Determine if this is a load or store. */ + insn_entry[uid].is_load = insn_is_load_p (insn); + insn_entry[uid].is_store = insn_is_store_p (insn); + + /* Determine if this is a doubleword swap. If not, + determine whether it can legally be swapped. */ + if (insn_is_swap_p (insn)) + insn_entry[uid].is_swap = 1; + else + { + unsigned int special = SH_NONE; + insn_entry[uid].is_swappable + = insn_is_swappable_p (insn_entry, insn, &special); + if (special != SH_NONE && insn_entry[uid].contains_subreg) + insn_entry[uid].is_swappable = 0; + else if (special != SH_NONE) + insn_entry[uid].special_handling = special; + else if (insn_entry[uid].contains_subreg) + insn_entry[uid].special_handling = SH_SUBREG; + } + } + } + } + + if (dump_file) + { + fprintf (dump_file, "\nSwap insn entry table when first built\n"); + dump_swap_insn_table (insn_entry); + } + + /* Record unoptimizable webs. */ + unsigned e = get_max_uid (), i; + for (i = 0; i < e; ++i) + { + if (!insn_entry[i].is_relevant) + continue; + + swap_web_entry *root + = (swap_web_entry*)(&insn_entry[i])->unionfind_root (); + unsigned uid = INSN_UID (insn_entry[i].insn); + + if (insn_entry[i].is_live_in || insn_entry[i].is_live_out + || (insn_entry[i].contains_subreg + && insn_entry[i].special_handling != SH_SUBREG) + || insn_entry[i].is_128_int || insn_entry[i].is_call + || !(insn_entry[i].is_swappable || insn_entry[i].is_swap)) + root->web_not_optimizable = 1; + + /* If we have loads or stores that aren't permuting then the + optimization isn't appropriate. */ + else if ((insn_entry[i].is_load || insn_entry[i].is_store) + && !insn_entry[i].is_swap && !insn_entry[i].is_swappable) + root->web_not_optimizable = 1; + + /* If we have permuting loads or stores that are not accompanied + by a register swap, the optimization isn't appropriate. */ + else if (insn_entry[i].is_load && insn_entry[i].is_swap) + { + df_ref *def_rec; + + for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++) + { + df_ref def = *def_rec; + struct df_link *link = DF_REF_CHAIN (def); + + if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS)) + { + root->web_not_optimizable = 1; + break; + } + } + } + else if (insn_entry[i].is_store && insn_entry[i].is_swap) + { + df_ref *use_rec; + + for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++) + { + df_ref use = *use_rec; + struct df_link *link = DF_REF_CHAIN (use); + + if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES)) + { + root->web_not_optimizable = 1; + break; + } + } + } + } + + if (dump_file) + { + fprintf (dump_file, "\nSwap insn entry table after web analysis\n"); + dump_swap_insn_table (insn_entry); + } + + /* For each load and store in an optimizable web (which implies + the loads and stores are permuting), find the associated + register swaps and mark them for removal. Due to various + optimizations we may mark the same swap more than once. Also + perform special handling for swappable insns that require it. */ + for (i = 0; i < e; ++i) + if ((insn_entry[i].is_load || insn_entry[i].is_store) + && insn_entry[i].is_swap) + { + swap_web_entry* root_entry + = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); + if (!root_entry->web_not_optimizable) + mark_swaps_for_removal (insn_entry, i); + } + else if (insn_entry[i].is_swappable && insn_entry[i].special_handling) + { + swap_web_entry* root_entry + = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); + if (!root_entry->web_not_optimizable) + handle_special_swappables (insn_entry, i); + } + + /* Now delete the swaps marked for removal. */ + for (i = 0; i < e; ++i) + if (insn_entry[i].will_delete) + replace_swap_with_copy (insn_entry, i); + + /* Clean up. */ + free (insn_entry); + return 0; +} + +const pass_data pass_data_analyze_swaps = +{ + RTL_PASS, /* type */ + "swaps", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_analyze_swaps : public rtl_opt_pass +{ +public: + pass_analyze_swaps(gcc::context *ctxt) + : rtl_opt_pass(pass_data_analyze_swaps, ctxt) + {} + + /* opt_pass methods: */ + bool gate () + { + return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX + && rs6000_optimize_swaps); + } + + unsigned int execute () + { + return rs6000_analyze_swaps (cfun); + } + +}; // class pass_analyze_swaps + +rtl_opt_pass * +make_pass_analyze_swaps (gcc::context *ctxt) +{ + return new pass_analyze_swaps (ctxt); +} struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.md b/gcc-4.9/gcc/config/rs6000/rs6000.md index d078491e1..f77754aa1 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000.md +++ b/gcc-4.9/gcc/config/rs6000/rs6000.md @@ -137,6 +137,7 @@ UNSPEC_UNPACK_128BIT UNSPEC_PACK_128BIT UNSPEC_LSQ + UNSPEC_FUSION_GPR ]) ;; @@ -328,8 +329,25 @@ (define_mode_attr f32_sv [(SF "stxsspx %x1,%y0") (SD "stxsiwzx %x1,%y0")]) ; Definitions for 32-bit fpr direct move +; At present, the decimal modes are not allowed in the traditional altivec +; registers, so restrict the constraints to just the traditional FPRs. (define_mode_attr f32_dm [(SF "wn") (SD "wh")]) +; Definitions for 32-bit VSX +(define_mode_attr f32_vsx [(SF "ww") (SD "wn")]) + +; Definitions for 32-bit use of altivec registers +(define_mode_attr f32_av [(SF "wu") (SD "wn")]) + +; Definitions for 64-bit VSX +(define_mode_attr f64_vsx [(DF "ws") (DD "wn")]) + +; Definitions for 64-bit direct move +(define_mode_attr f64_dm [(DF "wk") (DD "wh")]) + +; Definitions for 64-bit use of altivec registers +(define_mode_attr f64_av [(DF "wv") (DD "wn")]) + ; These modes do not fit in integer registers in 32-bit mode. ; but on e500v2, the gpr are 64 bit registers (define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD]) @@ -435,7 +453,7 @@ ;; either. ;; Mode attribute for boolean operation register constraints for output -(define_mode_attr BOOL_REGS_OUTPUT [(TI "&r,r,r,wa,v") +(define_mode_attr BOOL_REGS_OUTPUT [(TI "&r,r,r,wt,v") (PTI "&r,r,r") (V16QI "wa,v,&?r,?r,?r") (V8HI "wa,v,&?r,?r,?r") @@ -446,7 +464,7 @@ (V1TI "wa,v,&?r,?r,?r")]) ;; Mode attribute for boolean operation register constraints for operand1 -(define_mode_attr BOOL_REGS_OP1 [(TI "r,0,r,wa,v") +(define_mode_attr BOOL_REGS_OP1 [(TI "r,0,r,wt,v") (PTI "r,0,r") (V16QI "wa,v,r,0,r") (V8HI "wa,v,r,0,r") @@ -457,7 +475,7 @@ (V1TI "wa,v,r,0,r")]) ;; Mode attribute for boolean operation register constraints for operand2 -(define_mode_attr BOOL_REGS_OP2 [(TI "r,r,0,wa,v") +(define_mode_attr BOOL_REGS_OP2 [(TI "r,r,0,wt,v") (PTI "r,r,0") (V16QI "wa,v,r,r,0") (V8HI "wa,v,r,r,0") @@ -470,7 +488,7 @@ ;; Mode attribute for boolean operation register constraints for operand1 ;; for one_cmpl. To simplify things, we repeat the constraint where 0 ;; is used for operand1 or operand2 -(define_mode_attr BOOL_REGS_UNARY [(TI "r,0,0,wa,v") +(define_mode_attr BOOL_REGS_UNARY [(TI "r,0,0,wt,v") (PTI "r,0,0") (V16QI "wa,v,r,0,0") (V8HI "wa,v,r,0,0") @@ -8582,8 +8600,8 @@ [(set (match_operand:BOOL_128 0 "vlogical_operand" "=") (match_operator:BOOL_128 3 "boolean_operator" [(not:BOOL_128 - (match_operand:BOOL_128 2 "vlogical_operand" "")) - (match_operand:BOOL_128 1 "vlogical_operand" "")]))] + (match_operand:BOOL_128 2 "vlogical_operand" "")) + (match_operand:BOOL_128 1 "vlogical_operand" "")]))] "TARGET_P8_VECTOR || (GET_CODE (operands[3]) == AND)" { if (TARGET_VSX && vsx_register_operand (operands[0], mode)) @@ -8598,7 +8616,7 @@ && reload_completed && int_reg_operand (operands[0], mode)" [(const_int 0)] { - rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false, + rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, true, NULL_RTX); DONE; } @@ -8620,14 +8638,14 @@ [(set (match_operand:TI2 0 "int_reg_operand" "=&r,r,r") (match_operator:TI2 3 "boolean_operator" [(not:TI2 - (match_operand:TI2 1 "int_reg_operand" "r,0,r")) - (match_operand:TI2 2 "int_reg_operand" "r,r,0")]))] + (match_operand:TI2 2 "int_reg_operand" "r,0,r")) + (match_operand:TI2 1 "int_reg_operand" "r,r,0")]))] "!TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)" "#" "reload_completed && !TARGET_P8_VECTOR && (GET_CODE (operands[3]) != AND)" [(const_int 0)] { - rs6000_split_logical (operands, GET_CODE (operands[3]), false, true, false, + rs6000_split_logical (operands, GET_CODE (operands[3]), false, false, true, NULL_RTX); DONE; } @@ -9188,8 +9206,8 @@ }") (define_insn "mov_hardfloat" - [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,,,wu,Z,?,?r,*c*l,!r,*h,!r,!r") - (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,,,Z,wu,r,,r,h,0,G,Fn"))] + [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,,,,,,Z,?,?r,*c*l,!r,*h,!r,!r") + (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,,j,,,Z,,r,,r, h, 0, G,Fn"))] "(gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode)) && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)" @@ -9390,8 +9408,8 @@ ;; reloading. (define_insn "*mov_hardfloat32" - [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,!r,!r,!r") - (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,G,H,F"))] + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,,Z,,,Y,r,!r,!r,!r,!r") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,,,j,r,Y,r,G,H,F"))] "! TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode))" @@ -9459,8 +9477,8 @@ ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*mov_hardfloat64" - [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,wv,Z,wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wk") - (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,wv,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wk,r"))] + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,,Z,,,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,,,j,r,Y,r,r,h,0,G,H,F,wg,r,,r"))] "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode))" @@ -15714,22 +15732,9 @@ ;; a GPR. The addis instruction must be adjacent to the load, and use the same ;; register that is being loaded. The fused ops must be physically adjacent. -;; We use define_peephole for the actual addis/load, and the register used to -;; hold the addis value must be the same as the register being loaded. We use -;; define_peephole2 to change the register used for addis to be the register -;; being loaded, since we can look at whether it is dead after the load insn. - -(define_peephole - [(set (match_operand:P 0 "base_reg_operand" "") - (match_operand:P 1 "fusion_gpr_addis" "")) - (set (match_operand:INT1 2 "base_reg_operand" "") - (match_operand:INT1 3 "fusion_gpr_mem_load" ""))] - "TARGET_P8_FUSION && fusion_gpr_load_p (operands, false)" -{ - return emit_fusion_gpr_load (operands); -} - [(set_attr "type" "load") - (set_attr "length" "8")]) +;; Find cases where the addis that feeds into a load instruction is either used +;; once or is the same as the target register, and replace it with the fusion +;; insn (define_peephole2 [(set (match_operand:P 0 "base_reg_operand" "") @@ -15737,15 +15742,28 @@ (set (match_operand:INT1 2 "base_reg_operand" "") (match_operand:INT1 3 "fusion_gpr_mem_load" ""))] "TARGET_P8_FUSION - && (REGNO (operands[0]) != REGNO (operands[2]) - || GET_CODE (operands[3]) == SIGN_EXTEND) - && fusion_gpr_load_p (operands, true)" + && fusion_gpr_load_p (operands[0], operands[1], operands[2], + operands[3])" [(const_int 0)] { expand_fusion_gpr_load (operands); DONE; }) +;; Fusion insn, created by the define_peephole2 above (and eventually by +;; reload) + +(define_insn "fusion_gpr_load_" + [(set (match_operand:INT1 0 "base_reg_operand" "=&b") + (unspec:INT1 [(match_operand:INT1 1 "fusion_gpr_mem_combo" "")] + UNSPEC_FUSION_GPR))] + "TARGET_P8_FUSION" +{ + return emit_fusion_gpr_load (operands[0], operands[1]); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + ;; Miscellaneous ISA 2.06 (power7) instructions (define_insn "addg6s" diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.opt b/gcc-4.9/gcc/config/rs6000/rs6000.opt index 4c1a02a52..4d0d5e73d 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000.opt +++ b/gcc-4.9/gcc/config/rs6000/rs6000.opt @@ -588,3 +588,7 @@ Allow double variables in upper registers with -mcpu=power7 or -mvsx mupper-regs-sf Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags) Allow float variables in upper registers with -mcpu=power8 or -mp8-vector + +moptimize-swaps +Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save +Analyze and remove doubleword swaps from VSX computations. diff --git a/gcc-4.9/gcc/config/rs6000/rtems.h b/gcc-4.9/gcc/config/rs6000/rtems.h index 2402d5336..046488034 100644 --- a/gcc-4.9/gcc/config/rs6000/rtems.h +++ b/gcc-4.9/gcc/config/rs6000/rtems.h @@ -52,7 +52,8 @@ %{mcpu=750: %{!Dppc*: %{!Dmpc*: -Dmpc750} } } \ %{mcpu=821: %{!Dppc*: %{!Dmpc*: -Dmpc821} } } \ %{mcpu=860: %{!Dppc*: %{!Dmpc*: -Dmpc860} } } \ -%{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540} } }" +%{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540} } } \ +%{mcpu=e6500: -D__PPC_CPU_E6500__}" #undef SUBSUBTARGET_EXTRA_SPECS #define SUBSUBTARGET_EXTRA_SPECS \ diff --git a/gcc-4.9/gcc/config/rs6000/sysv4.h b/gcc-4.9/gcc/config/rs6000/sysv4.h index 7cc543319..9d456ddec 100644 --- a/gcc-4.9/gcc/config/rs6000/sysv4.h +++ b/gcc-4.9/gcc/config/rs6000/sysv4.h @@ -846,11 +846,6 @@ ncrtn.o%s" #define CPP_OS_OPENBSD_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_POSIX_THREADS}" #endif -/* These may be provided by rs6000/linux-grtev2.h. */ -#ifndef LINUX_GRTE_EXTRA_SPECS -#define LINUX_GRTE_EXTRA_SPECS -#endif - /* Define any extra SPECS that the compiler needs to generate. */ /* Override rs6000.h definition. */ #undef SUBTARGET_EXTRA_SPECS @@ -916,7 +911,6 @@ ncrtn.o%s" { "cpp_os_openbsd", CPP_OS_OPENBSD_SPEC }, \ { "cpp_os_default", CPP_OS_DEFAULT_SPEC }, \ { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }, \ - LINUX_GRTE_EXTRA_SPECS \ SUBSUBTARGET_EXTRA_SPECS #define SUBSUBTARGET_EXTRA_SPECS diff --git a/gcc-4.9/gcc/config/rs6000/t-rtems b/gcc-4.9/gcc/config/rs6000/t-rtems index 426f75ac5..eadda0d20 100644 --- a/gcc-4.9/gcc/config/rs6000/t-rtems +++ b/gcc-4.9/gcc/config/rs6000/t-rtems @@ -18,16 +18,24 @@ # along with GCC; see the file COPYING3. If not see # . -MULTILIB_OPTIONS = \ -mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540 \ -msoft-float/mfloat-gprs=double +MULTILIB_OPTIONS = +MULTILIB_DIRNAMES = +MULTILIB_MATCHES = +MULTILIB_EXCEPTIONS = +MULTILIB_REQUIRED = + +MULTILIB_OPTIONS += mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540/mcpu=e6500 +MULTILIB_DIRNAMES += m403 m505 m603e m604 m860 m7400 m8540 me6500 + +MULTILIB_OPTIONS += m32 +MULTILIB_DIRNAMES += m32 -MULTILIB_DIRNAMES = \ -m403 m505 m603e m604 m860 m7400 m8540 \ -nof gprsdouble +MULTILIB_OPTIONS += msoft-float/mfloat-gprs=double +MULTILIB_DIRNAMES += nof gprsdouble + +MULTILIB_OPTIONS += mno-spe/mno-altivec +MULTILIB_DIRNAMES += nospe noaltivec -# MULTILIB_MATCHES = ${MULTILIB_MATCHES_FLOAT} -MULTILIB_MATCHES = MULTILIB_MATCHES += ${MULTILIB_MATCHES_ENDIAN} MULTILIB_MATCHES += ${MULTILIB_MATCHES_SYSV} # Map 405 to 403 @@ -52,37 +60,20 @@ MULTILIB_MATCHES += mcpu?8540=mcpu?8548 # (mfloat-gprs=single is implicit default) MULTILIB_MATCHES += mcpu?8540=mcpu?8540/mfloat-gprs?single -# Soft-float only, default implies msoft-float -# NOTE: Must match with MULTILIB_MATCHES_FLOAT and MULTILIB_MATCHES -MULTILIB_SOFTFLOAT_ONLY = \ -*mcpu=401/*msoft-float* \ -*mcpu=403/*msoft-float* \ -*mcpu=405/*msoft-float* \ -*mcpu=801/*msoft-float* \ -*mcpu=821/*msoft-float* \ -*mcpu=823/*msoft-float* \ -*mcpu=860/*msoft-float* - -# Hard-float only, take out msoft-float -MULTILIB_HARDFLOAT_ONLY = \ -*mcpu=505/*msoft-float* - -# Targets which do not support gprs -MULTILIB_NOGPRS = \ -mfloat-gprs=* \ -*mcpu=403/*mfloat-gprs=* \ -*mcpu=505/*mfloat-gprs=* \ -*mcpu=603e/*mfloat-gprs=* \ -*mcpu=604/*mfloat-gprs=* \ -*mcpu=860/*mfloat-gprs=* \ -*mcpu=7400/*mfloat-gprs=* - -MULTILIB_EXCEPTIONS = - -# Disallow -Dppc and -Dmpc without other options -MULTILIB_EXCEPTIONS += Dppc* Dmpc* +# Enumeration of multilibs -MULTILIB_EXCEPTIONS += \ -${MULTILIB_SOFTFLOAT_ONLY} \ -${MULTILIB_HARDFLOAT_ONLY} \ -${MULTILIB_NOGPRS} +MULTILIB_REQUIRED += msoft-float +MULTILIB_REQUIRED += mcpu=403 +MULTILIB_REQUIRED += mcpu=505 +MULTILIB_REQUIRED += mcpu=603e +MULTILIB_REQUIRED += mcpu=603e/msoft-float +MULTILIB_REQUIRED += mcpu=604 +MULTILIB_REQUIRED += mcpu=604/msoft-float +MULTILIB_REQUIRED += mcpu=7400 +MULTILIB_REQUIRED += mcpu=7400/msoft-float +MULTILIB_REQUIRED += mcpu=8540 +MULTILIB_REQUIRED += mcpu=8540/msoft-float/mno-spe +MULTILIB_REQUIRED += mcpu=8540/mfloat-gprs=double +MULTILIB_REQUIRED += mcpu=860 +MULTILIB_REQUIRED += mcpu=e6500/m32 +MULTILIB_REQUIRED += mcpu=e6500/m32/msoft-float/mno-altivec diff --git a/gcc-4.9/gcc/config/rs6000/vsx.md b/gcc-4.9/gcc/config/rs6000/vsx.md index 2cf5e7a94..9aaf06428 100644 --- a/gcc-4.9/gcc/config/rs6000/vsx.md +++ b/gcc-4.9/gcc/config/rs6000/vsx.md @@ -260,6 +260,14 @@ UNSPEC_VSX_ROUND_IC UNSPEC_VSX_SLDWI UNSPEC_VSX_XXSPLTW + UNSPEC_VSX_XXSPLTD + UNSPEC_VSX_DIVSD + UNSPEC_VSX_DIVUD + UNSPEC_VSX_MULSD + UNSPEC_VSX_XVCVSXDDP + UNSPEC_VSX_XVCVUXDDP + UNSPEC_VSX_XVCVDPSXDS + UNSPEC_VSX_XVCVDPUXDS ]) ;; VSX moves @@ -746,6 +754,34 @@ [(set_attr "type" "") (set_attr "fp_type" "")]) +; Emulate vector with scalar for vec_mul in V2DImode +(define_insn_and_split "vsx_mul_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") + (match_operand:V2DI 2 "vsx_register_operand" "wa")] + UNSPEC_VSX_MULSD))] + "VECTOR_MEM_VSX_P (V2DImode)" + "#" + "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress" + [(const_int 0)] + " +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = gen_reg_rtx (DImode); + rtx op4 = gen_reg_rtx (DImode); + rtx op5 = gen_reg_rtx (DImode); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); + emit_insn (gen_muldi3 (op5, op3, op4)); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); + emit_insn (gen_muldi3 (op3, op3, op4)); + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); +}" + [(set_attr "type" "vecdouble")]) + (define_insn "*vsx_div3" [(set (match_operand:VSX_F 0 "vsx_register_operand" "=,?") (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" ",") @@ -755,6 +791,61 @@ [(set_attr "type" "") (set_attr "fp_type" "")]) +; Emulate vector with scalar for vec_div in V2DImode +(define_insn_and_split "vsx_div_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") + (match_operand:V2DI 2 "vsx_register_operand" "wa")] + UNSPEC_VSX_DIVSD))] + "VECTOR_MEM_VSX_P (V2DImode)" + "#" + "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress" + [(const_int 0)] + " +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = gen_reg_rtx (DImode); + rtx op4 = gen_reg_rtx (DImode); + rtx op5 = gen_reg_rtx (DImode); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); + emit_insn (gen_divdi3 (op5, op3, op4)); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); + emit_insn (gen_divdi3 (op3, op3, op4)); + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); +}" + [(set_attr "type" "vecdiv")]) + +(define_insn_and_split "vsx_udiv_v2di" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") + (match_operand:V2DI 2 "vsx_register_operand" "wa")] + UNSPEC_VSX_DIVUD))] + "VECTOR_MEM_VSX_P (V2DImode)" + "#" + "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress" + [(const_int 0)] + " +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op3 = gen_reg_rtx (DImode); + rtx op4 = gen_reg_rtx (DImode); + rtx op5 = gen_reg_rtx (DImode); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); + emit_insn (gen_udivdi3 (op5, op3, op4)); + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); + emit_insn (gen_udivdi3 (op3, op3, op4)); + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); +}" + [(set_attr "type" "vecdiv")]) + ;; *tdiv* instruction returning the FG flag (define_expand "vsx_tdiv3_fg" [(set (match_dup 3) @@ -904,11 +995,11 @@ ;; multiply. (define_insn "*vsx_fmav4sf4" - [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v") + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") (fma:V4SF - (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v") - (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v") - (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))] + (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v") + (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v") + (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))] "VECTOR_UNIT_VSX_P (V4SFmode)" "@ xvmaddasp %x0,%x1,%x2 @@ -919,11 +1010,11 @@ [(set_attr "type" "vecfloat")]) (define_insn "*vsx_fmav2df4" - [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa") + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa") (fma:V2DF - (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa") - (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0") - (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))] + (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa") + (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0") + (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))] "VECTOR_UNIT_VSX_P (V2DFmode)" "@ xvmaddadp %x0,%x1,%x2 @@ -1268,6 +1359,102 @@ "xscvspdpn %x0,%x1" [(set_attr "type" "fp")]) +;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long) + +(define_expand "vsx_xvcvsxddp_scale" + [(match_operand:V2DF 0 "vsx_register_operand" "") + (match_operand:V2DI 1 "vsx_register_operand" "") + (match_operand:QI 2 "immediate_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode)" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int scale = INTVAL(operands[2]); + emit_insn (gen_vsx_xvcvsxddp (op0, op1)); + if (scale != 0) + rs6000_scale_v2df (op0, op0, -scale); + DONE; +}) + +(define_insn "vsx_xvcvsxddp" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") + (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVSXDDP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvsxddp %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_expand "vsx_xvcvuxddp_scale" + [(match_operand:V2DF 0 "vsx_register_operand" "") + (match_operand:V2DI 1 "vsx_register_operand" "") + (match_operand:QI 2 "immediate_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode)" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + int scale = INTVAL(operands[2]); + emit_insn (gen_vsx_xvcvuxddp (op0, op1)); + if (scale != 0) + rs6000_scale_v2df (op0, op0, -scale); + DONE; +}) + +(define_insn "vsx_xvcvuxddp" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") + (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVUXDDP))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvuxddp %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_expand "vsx_xvcvdpsxds_scale" + [(match_operand:V2DI 0 "vsx_register_operand" "") + (match_operand:V2DF 1 "vsx_register_operand" "") + (match_operand:QI 2 "immediate_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode)" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx tmp = gen_reg_rtx (V2DFmode); + int scale = INTVAL(operands[2]); + if (scale != 0) + rs6000_scale_v2df (tmp, op1, scale); + emit_insn (gen_vsx_xvcvdpsxds (op0, tmp)); + DONE; +}) + +(define_insn "vsx_xvcvdpsxds" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVDPSXDS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvdpsxds %x0,%x1" + [(set_attr "type" "vecdouble")]) + +(define_expand "vsx_xvcvdpuxds_scale" + [(match_operand:V2DI 0 "vsx_register_operand" "") + (match_operand:V2DF 1 "vsx_register_operand" "") + (match_operand:QI 2 "immediate_operand" "")] + "VECTOR_UNIT_VSX_P (V2DFmode)" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx tmp = gen_reg_rtx (V2DFmode); + int scale = INTVAL(operands[2]); + if (scale != 0) + rs6000_scale_v2df (tmp, op1, scale); + emit_insn (gen_vsx_xvcvdpuxds (op0, tmp)); + DONE; +}) + +(define_insn "vsx_xvcvdpuxds" + [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") + (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVDPUXDS))] + "VECTOR_UNIT_VSX_P (V2DFmode)" + "xvcvdpuxds %x0,%x1" + [(set_attr "type" "vecdouble")]) + ;; Convert from 64-bit to 32-bit types ;; Note, favor the Altivec registers since the usual use of these instructions ;; is in vector converts and we need to use the Altivec vperm instruction. @@ -1359,8 +1546,8 @@ (define_insn "vsx_concat_" [(set (match_operand:VSX_D 0 "vsx_register_operand" "=,?") (vec_concat:VSX_D - (match_operand: 1 "vsx_register_operand" "ws,") - (match_operand: 2 "vsx_register_operand" "ws,")))] + (match_operand: 1 "vsx_register_operand" ",") + (match_operand: 2 "vsx_register_operand" ",")))] "VECTOR_MEM_VSX_P (mode)" { if (BYTES_BIG_ENDIAN) @@ -1647,7 +1834,7 @@ [(set (match_operand: 0 "register_operand" "=d,wv,wr") (vec_select: (match_operand:VSX_D 1 "memory_operand" "m,Z,m") - (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] + (parallel [(const_int 0)])))] "VECTOR_MEM_VSX_P (mode)" "@ lfd%U1%X1 %0,%1 @@ -1921,6 +2108,22 @@ "xxspltw %x0,%x1,%2" [(set_attr "type" "vecperm")]) +;; V2DF/V2DI splat for use by vec_splat builtin +(define_insn "vsx_xxspltd_" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa") + (match_operand:QI 2 "u5bit_cint_operand" "i")] + UNSPEC_VSX_XXSPLTD))] + "VECTOR_MEM_VSX_P (mode)" +{ + if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0) + || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1)) + return "xxpermdi %x0,%x1,%x1,0"; + else + return "xxpermdi %x0,%x1,%x1,3"; +} + [(set_attr "type" "vecperm")]) + ;; V4SF/V4SI interleave (define_insn "vsx_xxmrghw_" [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?") @@ -2041,7 +2244,7 @@ ;; to the top element of the V2DF array without doing an extract. (define_insn_and_split "*vsx_reduc__v2df_scalar" - [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa") + [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws") (vec_select:DF (VEC_reduc:V2DF (vec_concat:V2DF diff --git a/gcc-4.9/gcc/config/rs6000/xcoff.h b/gcc-4.9/gcc/config/rs6000/xcoff.h index f2b7bd07a..10123313f 100644 --- a/gcc-4.9/gcc/config/rs6000/xcoff.h +++ b/gcc-4.9/gcc/config/rs6000/xcoff.h @@ -304,14 +304,15 @@ do { fputs (LOCAL_COMMON_ASM_OP, (FILE)); \ RS6000_OUTPUT_BASENAME ((FILE), (NAME)); \ if ((ALIGN) > 32) \ - fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,%u\n", \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s%u_,%u\n", \ (SIZE), xcoff_bss_section_name, \ + floor_log2 ((ALIGN) / BITS_PER_UNIT), \ floor_log2 ((ALIGN) / BITS_PER_UNIT)); \ else if ((SIZE) > 4) \ - fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,3\n", \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s3_,3\n", \ (SIZE), xcoff_bss_section_name); \ else \ - fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s\n", \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%s,2\n", \ (SIZE), xcoff_bss_section_name); \ } while (0) #endif diff --git a/gcc-4.9/gcc/config/rx/rx.c b/gcc-4.9/gcc/config/rx/rx.c index 4242c1a97..11a825cfa 100644 --- a/gcc-4.9/gcc/config/rx/rx.c +++ b/gcc-4.9/gcc/config/rx/rx.c @@ -733,7 +733,7 @@ rx_print_operand (FILE * file, rtx op, int letter) break; case 'R': - gcc_assert (GET_MODE_SIZE (GET_MODE (op)) < 4); + gcc_assert (GET_MODE_SIZE (GET_MODE (op)) <= 4); unsigned_load = true; /* Fall through. */ case 'Q': diff --git a/gcc-4.9/gcc/config/rx/rx.h b/gcc-4.9/gcc/config/rx/rx.h index d99b19ad2..06a0ae850 100644 --- a/gcc-4.9/gcc/config/rx/rx.h +++ b/gcc-4.9/gcc/config/rx/rx.h @@ -433,9 +433,9 @@ typedef unsigned int CUMULATIVE_ARGS; /* Compute the alignment needed for label X in various situations. If the user has specified an alignment then honour that, otherwise use rx_align_for_label. */ -#define JUMP_ALIGN(x) (align_jumps ? align_jumps : rx_align_for_label (x, 0)) -#define LABEL_ALIGN(x) (align_labels ? align_labels : rx_align_for_label (x, 3)) -#define LOOP_ALIGN(x) (align_loops ? align_loops : rx_align_for_label (x, 2)) +#define JUMP_ALIGN(x) (align_jumps > 1 ? align_jumps_log : rx_align_for_label (x, 0)) +#define LABEL_ALIGN(x) (align_labels > 1 ? align_labels_log : rx_align_for_label (x, 3)) +#define LOOP_ALIGN(x) (align_loops > 1 ? align_loops_log : rx_align_for_label (x, 2)) #define LABEL_ALIGN_AFTER_BARRIER(x) rx_align_for_label (x, 0) #define ASM_OUTPUT_MAX_SKIP_ALIGN(STREAM, LOG, MAX_SKIP) \ diff --git a/gcc-4.9/gcc/config/s390/s390.c b/gcc-4.9/gcc/config/s390/s390.c index aac8de848..866de858c 100644 --- a/gcc-4.9/gcc/config/s390/s390.c +++ b/gcc-4.9/gcc/config/s390/s390.c @@ -9130,11 +9130,14 @@ s390_emit_epilogue (bool sibcall) if (! sibcall) { /* Fetch return address from stack before load multiple, - this will do good for scheduling. */ - - if (cfun_frame_layout.save_return_addr_p - || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM - && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM)) + this will do good for scheduling. + + Only do this if we already decided that r14 needs to be + saved to a stack slot. (And not just because r14 happens to + be in between two GPRs which need saving.) Otherwise it + would be difficult to take that decision back in + s390_optimize_prologue. */ + if (cfun_gpr_save_slot (RETURN_REGNUM) == -1) { int return_regnum = find_unused_clobbered_reg(); if (!return_regnum) @@ -9149,6 +9152,12 @@ s390_emit_epilogue (bool sibcall) addr = gen_rtx_MEM (Pmode, addr); set_mem_alias_set (addr, get_frame_alias_set ()); emit_move_insn (return_reg, addr); + + /* Once we did that optimization we have to make sure + s390_optimize_prologue does not try to remove the + store of r14 since we will not be able to find the + load issued here. */ + cfun_frame_layout.save_return_addr_p = true; } } diff --git a/gcc-4.9/gcc/config/s390/s390.md b/gcc-4.9/gcc/config/s390/s390.md index b17c1fac8..10d7a5a6d 100644 --- a/gcc-4.9/gcc/config/s390/s390.md +++ b/gcc-4.9/gcc/config/s390/s390.md @@ -460,7 +460,7 @@ ;; This iterator and attribute allow to combine most atomic operations. (define_code_iterator ATOMIC [and ior xor plus minus mult]) (define_code_iterator ATOMIC_Z196 [and ior xor plus]) -(define_code_attr atomic [(and "and") (ior "ior") (xor "xor") +(define_code_attr atomic [(and "and") (ior "or") (xor "xor") (plus "add") (minus "sub") (mult "nand")]) (define_code_attr noxa [(and "n") (ior "o") (xor "x") (plus "a")]) diff --git a/gcc-4.9/gcc/config/sh/predicates.md b/gcc-4.9/gcc/config/sh/predicates.md index 73bb880d6..3af1f8a14 100644 --- a/gcc-4.9/gcc/config/sh/predicates.md +++ b/gcc-4.9/gcc/config/sh/predicates.md @@ -398,7 +398,7 @@ (define_predicate "general_extend_operand" (match_code "subreg,reg,mem,truncate") { - if (GET_CODE (op) == TRUNCATE) + if (reload_completed && GET_CODE (op) == TRUNCATE) return arith_operand (op, mode); if (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))) diff --git a/gcc-4.9/gcc/config/sh/sh-mem.cc b/gcc-4.9/gcc/config/sh/sh-mem.cc index 45af23acb..e5ef165cf 100644 --- a/gcc-4.9/gcc/config/sh/sh-mem.cc +++ b/gcc-4.9/gcc/config/sh/sh-mem.cc @@ -1,5 +1,5 @@ /* Helper routines for memory move and comparison insns. - Copyright (C) 2013-2014 Free Software Foundation, Inc. + Copyright (C) 2013-2015 Free Software Foundation, Inc. This file is part of GCC. @@ -226,7 +226,7 @@ sh_expand_cmpstr (rtx *operands) emit_move_insn (tmp3, addr2); emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); - /*start long loop. */ + /* start long loop. */ emit_label (L_loop_long); emit_move_insn (tmp2, tmp3); @@ -335,7 +335,7 @@ sh_expand_cmpnstr (rtx *operands) rtx len = force_reg (SImode, operands[3]); int constp = CONST_INT_P (operands[3]); - /* Loop on a register count. */ + /* Loop on a register count. */ if (constp) { rtx tmp0 = gen_reg_rtx (SImode); @@ -364,7 +364,7 @@ sh_expand_cmpnstr (rtx *operands) add_int_reg_note (jump, REG_BR_PROB, prob_likely); } - /* word count. Do we have iterations ? */ + /* word count. Do we have iterations ? */ emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); /*start long loop. */ @@ -407,6 +407,7 @@ sh_expand_cmpnstr (rtx *operands) /* end loop. Reached max iterations. */ if (! sbytes) { + emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); jump = emit_jump_insn (gen_jump_compact (L_return)); emit_barrier_after (jump); } @@ -482,6 +483,13 @@ sh_expand_cmpnstr (rtx *operands) jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); emit_barrier_after (jump); } + else + { + emit_insn (gen_cmpeqsi_t (len, const0_rtx)); + emit_move_insn (operands[0], const0_rtx); + jump = emit_jump_insn (gen_branch_true (L_return)); + add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); + } addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); @@ -522,14 +530,14 @@ sh_expand_cmpnstr (rtx *operands) emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1))); - emit_label (L_return); - emit_insn (gen_subsi3 (operands[0], tmp1, tmp2)); + emit_label (L_return); + return true; } -/* Emit code to perform a strlen +/* Emit code to perform a strlen. OPERANDS[0] is the destination. OPERANDS[1] is the string. @@ -568,7 +576,7 @@ sh_expand_strlen (rtx *operands) addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0); - /*start long loop. */ + /* start long loop. */ emit_label (L_loop_long); /* tmp1 is aligned, OK to load. */ diff --git a/gcc-4.9/gcc/config/sh/sh.c b/gcc-4.9/gcc/config/sh/sh.c index 3d4553a6e..06798181a 100644 --- a/gcc-4.9/gcc/config/sh/sh.c +++ b/gcc-4.9/gcc/config/sh/sh.c @@ -2957,7 +2957,7 @@ enum struct ashl_lshr_sequence { char insn_count; - char amount[6]; + signed char amount[6]; char clobbers_t; }; diff --git a/gcc-4.9/gcc/config/sh/sh.md b/gcc-4.9/gcc/config/sh/sh.md index ab1f0a51c..d957e5540 100644 --- a/gcc-4.9/gcc/config/sh/sh.md +++ b/gcc-4.9/gcc/config/sh/sh.md @@ -868,9 +868,9 @@ (define_insn "*cmp_div0s_0" [(set (reg:SI T_REG) - (eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand") + (eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand" "%r") (const_int 31)) - (ge:SI (match_operand:SI 1 "arith_reg_operand") + (ge:SI (match_operand:SI 1 "arith_reg_operand" "r") (const_int 0))))] "TARGET_SH1" "div0s %0,%1" @@ -4563,6 +4563,12 @@ label: { if (TARGET_SHMEDIA) { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2])); + DONE; + } emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2])); DONE; } @@ -4803,6 +4809,12 @@ label: { if (TARGET_SHMEDIA) { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2])); + DONE; + } emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2])); DONE; } @@ -4896,6 +4908,12 @@ label: { if (TARGET_SHMEDIA) { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2])); + DONE; + } emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2])); DONE; } @@ -4995,6 +5013,12 @@ label: { if (TARGET_SHMEDIA) { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2])); + DONE; + } emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2])); DONE; } @@ -5069,6 +5093,12 @@ label: { if (TARGET_SHMEDIA) { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2])); + DONE; + } emit_insn (gen_lshrsi3_media (operands[0], operands[1], operands[2])); DONE; } @@ -5263,6 +5293,12 @@ label: { if (TARGET_SHMEDIA) { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2])); + DONE; + } emit_insn (gen_lshrdi3_media (operands[0], operands[1], operands[2])); DONE; } @@ -6295,10 +6331,9 @@ label: }) (define_expand "extendqihi2" - [(set (match_operand:HI 0 "arith_reg_dest" "") - (sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "")))] - "" - "") + [(set (match_operand:HI 0 "arith_reg_dest") + (sign_extend:HI (match_operand:QI 1 "arith_reg_operand")))] + "TARGET_SH1") (define_insn "*extendqihi2_compact_reg" [(set (match_operand:HI 0 "arith_reg_dest" "=r") diff --git a/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc index 313e5b5f4..3791cc76c 100644 --- a/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc +++ b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc @@ -111,7 +111,7 @@ private: // Given a start insn and its basic block, recursively determine all // possible ccreg values in all basic block paths that can lead to the // start insn. - void find_last_ccreg_values (rtx start_insn, basic_block bb, + bool find_last_ccreg_values (rtx start_insn, basic_block bb, std::vector& values_out, std::vector& prev_visited_bb) const; @@ -226,8 +226,8 @@ sh_optimize_sett_clrt::execute (void) ccreg_values.clear (); visited_bbs.clear (); - find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values, - visited_bbs); + bool ok = find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values, + visited_bbs); log_msg ("number of ccreg values collected: %u\n", (unsigned int)ccreg_values.size ()); @@ -235,7 +235,7 @@ sh_optimize_sett_clrt::execute (void) // If all the collected values are equal and are equal to the // constant value of the setcc insn, the setcc insn can be // removed. - if (all_ccreg_values_equal (ccreg_values) + if (ok && all_ccreg_values_equal (ccreg_values) && rtx_equal_p (ccreg_values.front ().value, setcc_val)) { log_msg ("all values are "); @@ -309,7 +309,7 @@ sh_optimize_sett_clrt gcc_unreachable (); } -void +bool sh_optimize_sett_clrt ::find_last_ccreg_values (rtx start_insn, basic_block bb, std::vector& values_out, @@ -348,7 +348,7 @@ sh_optimize_sett_clrt log_msg ("\n"); values_out.push_back (v); - return; + return true; } if (any_condjump_p (i) && onlyjump_p (i) && !prev_visited_bb.empty ()) @@ -372,7 +372,7 @@ sh_optimize_sett_clrt log_msg ("\n"); values_out.push_back (v); - return; + return true; } } @@ -393,10 +393,14 @@ sh_optimize_sett_clrt for (edge_iterator ei = ei_start (bb->preds); !ei_end_p (ei); ei_next (&ei)) { + if (ei_edge (ei)->flags & EDGE_COMPLEX) + log_return (false, "aborting due to complex edge\n"); + basic_block pred_bb = ei_edge (ei)->src; pred_bb_count += 1; - find_last_ccreg_values (BB_END (pred_bb), pred_bb, values_out, - prev_visited_bb); + if (!find_last_ccreg_values (BB_END (pred_bb), pred_bb, values_out, + prev_visited_bb)) + return false; } prev_visited_bb.pop_back (); @@ -419,6 +423,8 @@ sh_optimize_sett_clrt values_out.push_back (v); } + + return true; } bool diff --git a/gcc-4.9/gcc/config/sh/sh_treg_combine.cc b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc index e73604022..38e28038d 100644 --- a/gcc-4.9/gcc/config/sh/sh_treg_combine.cc +++ b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc @@ -78,14 +78,17 @@ Example 1) In [bb 4] elimination of the comparison would require inversion of the branch condition and compensation of other BBs. -Instead an inverting reg-move can be used: +Instead the comparison in [bb 3] can be replaced with the comparison in [bb 5] +by using a reg-reg move. In [bb 4] a logical not is used to compensate the +inverted condition. [bb 3] (set (reg:SI 167) (reg:SI 173)) -> bb 5 [BB 4] -(set (reg:SI 167) (not:SI (reg:SI 177))) +(set (reg:SI 147 t) (eq:SI (reg:SI 177) (const_int 0))) +(set (reg:SI 167) (reg:SI 147 t)) -> bb 5 [bb 5] @@ -214,9 +217,9 @@ In order to handle cases such as above the RTL pass does the following: and replace the comparisons in the BBs with reg-reg copies to get the operands in place (create new pseudo regs). - - If the cstores differ, try to apply the special case - (eq (reg) (const_int 0)) -> inverted = (not (reg)). - for the subordinate cstore types and eliminate the dominating ones. + - If the cstores differ and the comparison is a test against zero, + use reg-reg copies for the dominating cstores and logical not cstores + for the subordinate cstores. - If the comparison types in the BBs are not the same, or the first approach doesn't work out for some reason, try to eliminate the comparison before the @@ -558,7 +561,8 @@ private: bool can_extend_ccreg_usage (const bb_entry& e, const cbranch_trace& trace) const; - // Create an insn rtx that is a negating reg move (not operation). + // Create an insn rtx that performs a logical not (test != 0) on the src_reg + // and stores the result in dst_reg. rtx make_not_reg_insn (rtx dst_reg, rtx src_reg) const; // Create an insn rtx that inverts the ccreg. @@ -892,12 +896,32 @@ sh_treg_combine::can_remove_comparison (const bb_entry& e, rtx sh_treg_combine::make_not_reg_insn (rtx dst_reg, rtx src_reg) const { - // This will to go through expanders and may output multiple insns - // for multi-word regs. + // On SH we can do only SImode and DImode comparisons. + if (! (GET_MODE (src_reg) == SImode || GET_MODE (src_reg) == DImode)) + return NULL; + + // On SH we can store the ccreg into an SImode or DImode reg only. + if (! (GET_MODE (dst_reg) == SImode || GET_MODE (dst_reg) == DImode)) + return NULL; + start_sequence (); - expand_simple_unop (GET_MODE (dst_reg), NOT, src_reg, dst_reg, 0); + + emit_insn (gen_rtx_SET (VOIDmode, m_ccreg, + gen_rtx_fmt_ee (EQ, SImode, src_reg, const0_rtx))); + + if (GET_MODE (dst_reg) == SImode) + emit_move_insn (dst_reg, m_ccreg); + else if (GET_MODE (dst_reg) == DImode) + { + emit_move_insn (gen_lowpart (SImode, dst_reg), m_ccreg); + emit_move_insn (gen_highpart (SImode, dst_reg), const0_rtx); + } + else + gcc_unreachable (); + rtx i = get_insns (); end_sequence (); + return i; } @@ -1080,7 +1104,12 @@ sh_treg_combine::try_combine_comparisons (cbranch_trace& trace, // There is one special case though, where an integer comparison // (eq (reg) (const_int 0)) // can be inverted with a sequence - // (eq (not (reg)) (const_int 0)) + // (set (t) (eq (reg) (const_int 0)) + // (set (reg) (t)) + // (eq (reg) (const_int 0)) + // + // FIXME: On SH2A it might be better to use the nott insn in this case, + // i.e. do the try_eliminate_cstores approach instead. if (inv_cstore_count != 0 && cstore_count != 0) { if (make_not_reg_insn (comp_op0, comp_op0) == NULL_RTX) diff --git a/gcc-4.9/gcc/config/sh/sync.md b/gcc-4.9/gcc/config/sh/sync.md index a0a22a1f5..a3acaac56 100644 --- a/gcc-4.9/gcc/config/sh/sync.md +++ b/gcc-4.9/gcc/config/sh/sync.md @@ -466,6 +466,7 @@ (set (mem:SI (match_dup 1)) (unspec:SI [(match_operand:SI 2 "arith_operand" "rI08")] UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1)) (clobber (reg:SI R0_REG))] "TARGET_ATOMIC_HARD_LLCS || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)" @@ -484,6 +485,7 @@ (set (mem:QIHI (match_dup 1)) (unspec:QIHI [(match_operand:QIHI 2 "register_operand" "r")] UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1)) (clobber (reg:SI R0_REG)) (clobber (match_scratch:SI 3 "=&r")) (clobber (match_scratch:SI 4 "=1"))] @@ -617,6 +619,7 @@ [(FETCHOP:SI (mem:SI (match_dup 1)) (match_operand:SI 2 "" ""))] UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1)) (clobber (reg:SI R0_REG))] "TARGET_ATOMIC_HARD_LLCS || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)" @@ -637,6 +640,7 @@ [(FETCHOP:QIHI (mem:QIHI (match_dup 1)) (match_operand:QIHI 2 "" ""))] UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1)) (clobber (reg:SI R0_REG)) (clobber (match_scratch:SI 3 "=&r")) (clobber (match_scratch:SI 4 "=1"))] @@ -784,6 +788,7 @@ [(not:SI (and:SI (mem:SI (match_dup 1)) (match_operand:SI 2 "logical_operand" "rK08")))] UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1)) (clobber (reg:SI R0_REG))] "TARGET_ATOMIC_HARD_LLCS || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)" @@ -805,6 +810,7 @@ [(not:QIHI (and:QIHI (mem:QIHI (match_dup 1)) (match_operand:QIHI 2 "logical_operand" "rK08")))] UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1)) (clobber (reg:SI R0_REG)) (clobber (match_scratch:SI 3 "=&r")) (clobber (match_scratch:SI 4 "=1"))] @@ -903,7 +909,7 @@ " and %0,%3" "\n" " not %3,%3" "\n" " mov. %3,@%1" "\n" - " stc %4,sr"; + " ldc %4,sr"; } [(set_attr "length" "20")]) @@ -960,7 +966,8 @@ (set (mem:SI (match_dup 1)) (unspec:SI [(FETCHOP:SI (mem:SI (match_dup 1)) (match_dup 2))] - UNSPEC_ATOMIC))] + UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1))] "TARGET_ATOMIC_HARD_LLCS || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)" { @@ -980,6 +987,7 @@ (unspec:QIHI [(FETCHOP:QIHI (mem:QIHI (match_dup 1)) (match_dup 2))] UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1)) (clobber (reg:SI R0_REG)) (clobber (match_scratch:SI 3 "=&r")) (clobber (match_scratch:SI 4 "=1"))] @@ -1124,7 +1132,8 @@ (set (mem:SI (match_dup 1)) (unspec:SI [(not:SI (and:SI (mem:SI (match_dup 1)) (match_dup 2)))] - UNSPEC_ATOMIC))] + UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1))] "TARGET_ATOMIC_HARD_LLCS || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)" { @@ -1145,6 +1154,7 @@ (unspec:QIHI [(not:QIHI (and:QIHI (mem:QIHI (match_dup 1)) (match_dup 2)))] UNSPEC_ATOMIC)) + (set (reg:SI T_REG) (const_int 1)) (clobber (reg:SI R0_REG)) (clobber (match_scratch:SI 3 "=&r")) (clobber (match_scratch:SI 4 "=1"))] @@ -1353,7 +1363,7 @@ " ldc r0,sr" "\n" " mov.b @%0,r0" "\n" " mov.b %1,@%0" "\n" - " stc %2,sr" "\n" + " ldc %2,sr" "\n" " tst r0,r0"; } [(set_attr "length" "16")]) diff --git a/gcc-4.9/gcc/config/sparc/leon.md b/gcc-4.9/gcc/config/sparc/leon.md index 82b6a0d96..ad22e3b59 100644 --- a/gcc-4.9/gcc/config/sparc/leon.md +++ b/gcc-4.9/gcc/config/sparc/leon.md @@ -29,11 +29,11 @@ ;; Use a double reservation to work around the load pipeline hazard on UT699. (define_insn_reservation "leon3_load" 1 - (and (eq_attr "cpu" "leon3") (eq_attr "type" "load,sload")) + (and (eq_attr "cpu" "leon3,leon3v7") (eq_attr "type" "load,sload")) "leon_memory*2") (define_insn_reservation "leon_store" 2 - (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "store")) + (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "store")) "leon_memory*2") ;; This describes Gaisler Research's FPU @@ -44,21 +44,21 @@ (define_cpu_unit "grfpu_ds" "grfpu") (define_insn_reservation "leon_fp_alu" 4 - (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fp,fpcmp,fpmul")) + (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fp,fpcmp,fpmul")) "grfpu_alu, nothing*3") (define_insn_reservation "leon_fp_divs" 16 - (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivs")) + (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpdivs")) "grfpu_ds*14, nothing*2") (define_insn_reservation "leon_fp_divd" 17 - (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpdivd")) + (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpdivd")) "grfpu_ds*15, nothing*2") (define_insn_reservation "leon_fp_sqrts" 24 - (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrts")) + (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpsqrts")) "grfpu_ds*22, nothing*2") (define_insn_reservation "leon_fp_sqrtd" 25 - (and (eq_attr "cpu" "leon,leon3") (eq_attr "type" "fpsqrtd")) + (and (eq_attr "cpu" "leon,leon3,leon3v7") (eq_attr "type" "fpsqrtd")) "grfpu_ds*23, nothing*2") diff --git a/gcc-4.9/gcc/config/sparc/linux.h b/gcc-4.9/gcc/config/sparc/linux.h index c54ba2cb5..c40bb0b78 100644 --- a/gcc-4.9/gcc/config/sparc/linux.h +++ b/gcc-4.9/gcc/config/sparc/linux.h @@ -147,12 +147,6 @@ do { \ /* Static stack checking is supported by means of probes. */ #define STACK_CHECK_STATIC_BUILTIN 1 -/* Linux currently uses RMO in uniprocessor mode, which is equivalent to - TMO, and TMO in multiprocessor mode. But they reserve the right to - change their minds. */ -#undef SPARC_RELAXED_ORDERING -#define SPARC_RELAXED_ORDERING true - #undef NEED_INDICATE_EXEC_STACK #define NEED_INDICATE_EXEC_STACK 1 diff --git a/gcc-4.9/gcc/config/sparc/linux64.h b/gcc-4.9/gcc/config/sparc/linux64.h index f00fb42ff..12bb3780b 100644 --- a/gcc-4.9/gcc/config/sparc/linux64.h +++ b/gcc-4.9/gcc/config/sparc/linux64.h @@ -261,12 +261,6 @@ do { \ /* Static stack checking is supported by means of probes. */ #define STACK_CHECK_STATIC_BUILTIN 1 -/* Linux currently uses RMO in uniprocessor mode, which is equivalent to - TMO, and TMO in multiprocessor mode. But they reserve the right to - change their minds. */ -#undef SPARC_RELAXED_ORDERING -#define SPARC_RELAXED_ORDERING true - #undef NEED_INDICATE_EXEC_STACK #define NEED_INDICATE_EXEC_STACK 1 diff --git a/gcc-4.9/gcc/config/sparc/sparc-opts.h b/gcc-4.9/gcc/config/sparc/sparc-opts.h index 13b375ae1..26017edc0 100644 --- a/gcc-4.9/gcc/config/sparc/sparc-opts.h +++ b/gcc-4.9/gcc/config/sparc/sparc-opts.h @@ -31,6 +31,7 @@ enum processor_type { PROCESSOR_HYPERSPARC, PROCESSOR_LEON, PROCESSOR_LEON3, + PROCESSOR_LEON3V7, PROCESSOR_SPARCLITE, PROCESSOR_F930, PROCESSOR_F934, diff --git a/gcc-4.9/gcc/config/sparc/sparc.c b/gcc-4.9/gcc/config/sparc/sparc.c index 5b00cca47..f7fc957b4 100644 --- a/gcc-4.9/gcc/config/sparc/sparc.c +++ b/gcc-4.9/gcc/config/sparc/sparc.c @@ -786,9 +786,6 @@ char sparc_hard_reg_printed[8]; #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table #endif -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING - #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE sparc_option_override @@ -1246,6 +1243,7 @@ sparc_option_override (void) { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC }, { TARGET_CPU_leon, PROCESSOR_LEON }, { TARGET_CPU_leon3, PROCESSOR_LEON3 }, + { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 }, { TARGET_CPU_sparclite, PROCESSOR_F930 }, { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X }, { TARGET_CPU_sparclet, PROCESSOR_TSC701 }, @@ -1274,6 +1272,7 @@ sparc_option_override (void) { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU }, { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU }, { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU }, + { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU }, { "sparclite", MASK_ISA, MASK_SPARCLITE }, /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, @@ -1526,6 +1525,7 @@ sparc_option_override (void) sparc_costs = &leon_costs; break; case PROCESSOR_LEON3: + case PROCESSOR_LEON3V7: sparc_costs = &leon3_costs; break; case PROCESSOR_SPARCLET: @@ -6801,28 +6801,30 @@ function_arg_union_value (int size, enum machine_mode mode, int slotno, } /* Used by function_arg and sparc_function_value_1 to implement the conventions - for passing and returning large (BLKmode) vectors. + for passing and returning BLKmode vectors. Return an expression valid as a return value for the FUNCTION_ARG and TARGET_FUNCTION_VALUE. - SIZE is the size in bytes of the vector (at least 8 bytes). + SIZE is the size in bytes of the vector. REGNO is the FP hard register the vector will be passed in. */ static rtx function_arg_vector_value (int size, int regno) { - int i, nregs = size / 8; - rtx regs; - - regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs)); + const int nregs = MAX (1, size / 8); + rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs)); - for (i = 0; i < nregs; i++) - { + if (size < 8) + XVECEXP (regs, 0, 0) + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (SImode, regno), + const0_rtx); + else + for (int i = 0; i < nregs; i++) XVECEXP (regs, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (DImode, regno + 2*i), GEN_INT (i*8)); - } return regs; } @@ -6868,10 +6870,9 @@ sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode, || (TARGET_ARCH64 && size <= 16)); if (mode == BLKmode) - return function_arg_vector_value (size, - SPARC_FP_ARG_FIRST + 2*slotno); - else - mclass = MODE_FLOAT; + return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno); + + mclass = MODE_FLOAT; } if (TARGET_ARCH32) @@ -7315,10 +7316,9 @@ sparc_function_value_1 (const_tree type, enum machine_mode mode, || (TARGET_ARCH64 && size <= 32)); if (mode == BLKmode) - return function_arg_vector_value (size, - SPARC_FP_ARG_FIRST); - else - mclass = MODE_FLOAT; + return function_arg_vector_value (size, SPARC_FP_ARG_FIRST); + + mclass = MODE_FLOAT; } if (TARGET_ARCH64 && type) diff --git a/gcc-4.9/gcc/config/sparc/sparc.h b/gcc-4.9/gcc/config/sparc/sparc.h index dd2b5ad9c..87f1d82d6 100644 --- a/gcc-4.9/gcc/config/sparc/sparc.h +++ b/gcc-4.9/gcc/config/sparc/sparc.h @@ -106,17 +106,6 @@ extern enum cmodel sparc_cmodel; #define SPARC_DEFAULT_CMODEL CM_32 -/* The SPARC-V9 architecture defines a relaxed memory ordering model (RMO) - which requires the following macro to be true if enabled. Prior to V9, - there are no instructions to even talk about memory synchronization. - Note that the UltraSPARC III processors don't implement RMO, unlike the - UltraSPARC II processors. Niagara, Niagara-2, and Niagara-3 do not - implement RMO either. - - Default to false; for example, Solaris never enables RMO, only ever uses - total memory ordering (TMO). */ -#define SPARC_RELAXED_ORDERING false - /* Do not use the .note.GNU-stack convention by default. */ #define NEED_INDICATE_EXEC_STACK 0 @@ -137,21 +126,22 @@ extern enum cmodel sparc_cmodel; #define TARGET_CPU_hypersparc 3 #define TARGET_CPU_leon 4 #define TARGET_CPU_leon3 5 -#define TARGET_CPU_sparclite 6 -#define TARGET_CPU_f930 6 /* alias */ -#define TARGET_CPU_f934 6 /* alias */ -#define TARGET_CPU_sparclite86x 7 -#define TARGET_CPU_sparclet 8 -#define TARGET_CPU_tsc701 8 /* alias */ -#define TARGET_CPU_v9 9 /* generic v9 implementation */ -#define TARGET_CPU_sparcv9 9 /* alias */ -#define TARGET_CPU_sparc64 9 /* alias */ -#define TARGET_CPU_ultrasparc 10 -#define TARGET_CPU_ultrasparc3 11 -#define TARGET_CPU_niagara 12 -#define TARGET_CPU_niagara2 13 -#define TARGET_CPU_niagara3 14 -#define TARGET_CPU_niagara4 15 +#define TARGET_CPU_leon3v7 6 +#define TARGET_CPU_sparclite 7 +#define TARGET_CPU_f930 7 /* alias */ +#define TARGET_CPU_f934 7 /* alias */ +#define TARGET_CPU_sparclite86x 8 +#define TARGET_CPU_sparclet 9 +#define TARGET_CPU_tsc701 9 /* alias */ +#define TARGET_CPU_v9 10 /* generic v9 implementation */ +#define TARGET_CPU_sparcv9 10 /* alias */ +#define TARGET_CPU_sparc64 10 /* alias */ +#define TARGET_CPU_ultrasparc 11 +#define TARGET_CPU_ultrasparc3 12 +#define TARGET_CPU_niagara 13 +#define TARGET_CPU_niagara2 14 +#define TARGET_CPU_niagara3 15 +#define TARGET_CPU_niagara4 16 #if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \ @@ -239,6 +229,11 @@ extern enum cmodel sparc_cmodel; #define ASM_CPU32_DEFAULT_SPEC AS_LEON_FLAG #endif +#if TARGET_CPU_DEFAULT == TARGET_CPU_leon3v7 +#define CPP_CPU32_DEFAULT_SPEC "-D__leon__" +#define ASM_CPU32_DEFAULT_SPEC AS_LEONV7_FLAG +#endif + #endif #if !defined(CPP_CPU32_DEFAULT_SPEC) || !defined(CPP_CPU64_DEFAULT_SPEC) @@ -285,6 +280,7 @@ extern enum cmodel sparc_cmodel; %{mcpu=hypersparc:-D__hypersparc__ -D__sparc_v8__} \ %{mcpu=leon:-D__leon__ -D__sparc_v8__} \ %{mcpu=leon3:-D__leon__ -D__sparc_v8__} \ +%{mcpu=leon3v7:-D__leon__} \ %{mcpu=v9:-D__sparc_v9__} \ %{mcpu=ultrasparc:-D__sparc_v9__} \ %{mcpu=ultrasparc3:-D__sparc_v9__} \ @@ -334,6 +330,7 @@ extern enum cmodel sparc_cmodel; %{mcpu=hypersparc:-Av8} \ %{mcpu=leon:" AS_LEON_FLAG "} \ %{mcpu=leon3:" AS_LEON_FLAG "} \ +%{mcpu=leon3v7:" AS_LEONV7_FLAG "} \ %{mv8plus:-Av8plus} \ %{mcpu=v9:-Av9} \ %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \ @@ -1760,8 +1757,10 @@ extern int sparc_indent_opcode; #ifdef HAVE_AS_LEON #define AS_LEON_FLAG "-Aleon" +#define AS_LEONV7_FLAG "-Aleon" #else #define AS_LEON_FLAG "-Av8" +#define AS_LEONV7_FLAG "-Av7" #endif /* We use gcc _mcount for profiling. */ diff --git a/gcc-4.9/gcc/config/sparc/sparc.md b/gcc-4.9/gcc/config/sparc/sparc.md index 76c331597..954c297fd 100644 --- a/gcc-4.9/gcc/config/sparc/sparc.md +++ b/gcc-4.9/gcc/config/sparc/sparc.md @@ -221,6 +221,7 @@ hypersparc, leon, leon3, + leon3v7, sparclite, f930, f934, diff --git a/gcc-4.9/gcc/config/sparc/sparc.opt b/gcc-4.9/gcc/config/sparc/sparc.opt index 64e40955a..3cd2b603a 100644 --- a/gcc-4.9/gcc/config/sparc/sparc.opt +++ b/gcc-4.9/gcc/config/sparc/sparc.opt @@ -152,6 +152,9 @@ Enum(sparc_processor_type) String(leon) Value(PROCESSOR_LEON) EnumValue Enum(sparc_processor_type) String(leon3) Value(PROCESSOR_LEON3) +EnumValue +Enum(sparc_processor_type) String(leon3v7) Value(PROCESSOR_LEON3V7) + EnumValue Enum(sparc_processor_type) String(sparclite) Value(PROCESSOR_SPARCLITE) diff --git a/gcc-4.9/gcc/config/sparc/t-rtems b/gcc-4.9/gcc/config/sparc/t-rtems index 86a230261..ae7a33ab4 100644 --- a/gcc-4.9/gcc/config/sparc/t-rtems +++ b/gcc-4.9/gcc/config/sparc/t-rtems @@ -17,6 +17,15 @@ # . # -MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3 -MULTILIB_DIRNAMES = soft v8 leon3 +MULTILIB_OPTIONS = msoft-float mcpu=v8/mcpu=leon3/mcpu=leon3v7 muser-mode +MULTILIB_DIRNAMES = soft v8 leon3 leon3v7 user-mode MULTILIB_MATCHES = msoft-float=mno-fpu + +MULTILIB_EXCEPTIONS = muser-mode +MULTILIB_EXCEPTIONS += mcpu=leon3 +MULTILIB_EXCEPTIONS += mcpu=leon3v7 +MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3 +MULTILIB_EXCEPTIONS += msoft-float/mcpu=leon3v7 +MULTILIB_EXCEPTIONS += msoft-float/muser-mode +MULTILIB_EXCEPTIONS += msoft-float/mcpu=v8/muser-mode +MULTILIB_EXCEPTIONS += mcpu=v8/muser-mode -- cgit v1.2.3