diff options
author | Yiran Wang <yiran@google.com> | 2015-06-23 15:33:17 -0700 |
---|---|---|
committer | Yiran Wang <yiran@google.com> | 2015-06-29 10:56:28 -0700 |
commit | 1d9fec7937f45dde5e04cac966a2d9a12f2fc15a (patch) | |
tree | 3fbcd18a379a05fd6d43491a107e1f36bc61b185 /gcc-4.9/gcc | |
parent | f378ebf14df0952eae870c9865bab8326aa8f137 (diff) | |
download | toolchain_gcc-1d9fec7937f45dde5e04cac966a2d9a12f2fc15a.tar.gz toolchain_gcc-1d9fec7937f45dde5e04cac966a2d9a12f2fc15a.tar.bz2 toolchain_gcc-1d9fec7937f45dde5e04cac966a2d9a12f2fc15a.zip |
Synchronize with google/gcc-4_9 to r224707 (from r214835)
Change-Id: I3d6f06fc613c8f8b6a82143dc44b7338483aac5d
Diffstat (limited to 'gcc-4.9/gcc')
604 files changed, 20027 insertions, 3500 deletions
diff --git a/gcc-4.9/gcc/ChangeLog b/gcc-4.9/gcc/ChangeLog index b97054b2c..a6c51d36b 100644 --- a/gcc-4.9/gcc/ChangeLog +++ b/gcc-4.9/gcc/ChangeLog @@ -1,9 +1,887 @@ +2015-03-26 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + Backport of r214242, r214254, and bug fix patches from mainline + * config/rs6000/rs6000.c (context.h): New #include. + (tree-pass.h): Likewise. + (make_pass_analyze_swaps): New declaration. + (rs6000_option_override): Register swap-optimization pass. + (swap_web_entry): New class. + (special_handling_values): New enum. + (union_defs): New function. + (union_uses): Likewise. + (insn_is_load_p): Likewise. + (insn_is_store_p): Likewise. + (insn_is_swap_p): Likewise. + (rtx_is_swappable_p): Likewise. + (insn_is_swappable_p): Likewise. + (chain_purpose): New enum. + (chain_contains_only_swaps): New function. + (mark_swaps_for_removal): Likewise. + (swap_const_vector_halves): Likewise. + (adjust_subreg_index): Likewise. + (permute_load): Likewise. + (permute_store): Likewise. + (adjust_extract): Likewise. + (adjust_splat): Likewise. + (handle_special_swappables): Likewise. + (replace_swap_with_copy): Likewise. + (dump_swap_insn_table): Likewise. + (rs6000_analyze_swaps): Likewise. + (pass_data_analyze_swaps): New pass_data. + (pass_analyze_swaps): New class. + (pass_analyze_swaps::gate): New method. + (pass_analyze_swaps::execute): New method. + (make_pass_analyze_swaps): New function. + * config/rs6000/rs6000.opt (moptimize-swaps): New option. + * df.h (web_entry_base): New class, replacing struct web_entry. + (web_entry_base::pred): New method. + (web_entry_base::set_pred): Likewise. + (web_entry_base::unionfind_root): Likewise. + (web_entry_base::unionfind_union): Likewise. + (unionfind_root): Delete external reference. + (unionfind_union): Likewise. + (union_defs): Likewise. + * web.c (web_entry_base::unionfind_root): Convert to method. + (web_entry_base::unionfind_union): Likewise. + (web_entry): New class. + (union_match_dups): Convert to use class structure. + (union_defs): Likewise. + (entry_register): Likewise. + (web_main): Likewise. + +2015-01-23 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/64734 + * omp-low.c (scan_sharing_clauses): Don't ignore + OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION GOMP_MAP_POINTER clauses + on target data/update constructs. + +2015-01-23 Wei Mi <wmi@google.com> + + Backported from trunk. + 2015-01-22 Wei Mi <wmi@google.com> + + PR rtl-optimization/64557 + * dse.c (record_store): Call get_addr for mem_addr. + (check_mem_read_rtx): Likewise. + +2015-01-22 Andreas Krebbel <Andreas.Krebbel@de.ibm.com> + + * config/s390/s390.md (atomic code attribute): Fix typo "ior" -> + "or". + +2015-01-21 Wei Mi <wmi@google.com> + + Backported from trunk. + 2014-11-22 Jan Hubicka <hubicka@ucw.cz> + + PR ipa/63970 + * ipa.c (symbol_table::remove_unreachable_nodes): Mark all inline clones + as having abstract origin used. + * ipa-inline-transform.c (can_remove_node_now_p_1): Drop abstract origin check. + (clone_inlined_nodes): Copy abstract originflag. + * lto-cgraph.c (compute_ltrans_boundary): Use get_create to get abstract origin node. + +2015-01-20 Chung-Lin Tang <cltang@codesourcery.com> + + Backport from mainline + * config/nios2/nios2.c (nios2_asm_file_end): Implement + TARGET_ASM_FILE_END hook for adding .note.GNU-stack section when + needed. + (TARGET_ASM_FILE_END): Define. + +2015-01-15 Martin Liska <mliska@suse.cz> + + Backport from mainline + 2014-11-27 Richard Biener <rguenther@suse.de> + + PR middle-end/63704 + * alias.c (mems_in_disjoint_alias_sets_p): Remove assert + and instead return false when !fstrict-aliasing. + +2015-01-15 Eric Botcazou <ebotcazou@adacore.com> + + * expr.c (expand_expr_real_1) <normal_inner_ref>: Use the expression to + set the memory attributes in all cases but clear MEM_EXPR if need be. + +2015-01-14 Jakub Jelinek <jakub@redhat.com> + + Backported from mainline + 2015-01-12 Jakub Jelinek <jakub@redhat.com> + + PR target/64513 + * config/i386/i386.c (ix86_expand_prologue): Add + REG_FRAME_RELATED_EXPR to %rax and %r10 pushes. + + 2015-01-13 Jakub Jelinek <jakub@redhat.com> + + PR rtl-optimization/64286 + * ree.c (combine_reaching_defs): Move part of comment earlier, + remove !SCALAR_INT_MODE_P check. + (add_removable_extension): Don't add vector mode + extensions if all uses of the source register aren't the same + vector extensions. + + 2015-01-12 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/64563 + * tree-vrp.c (vrp_evaluate_conditional): Check for VR_RANGE + instead of != VR_VARYING. + +2015-01-14 Marek Polacek <polacek@redhat.com> + + Backport from mainline + 2015-01-13 Marek Polacek <polacek@redhat.com> + + PR middle-end/64391 + * trans-mem.c (get_attrs_for): Return NULL_TREE if X is NULL_TREE. + +2015-01-13 Marc Glisse <marc.glisse@inria.fr> + + PR c++/54442 + * tree.c (build_qualified_type): Use a canonical type for + TYPE_CANONICAL. + +2015-01-13 Pat Haugen <pthaugen@us.ibm.com> + + Backport from mainline + 2014-12-20 Segher Boessenkool <segher@kernel.crashing.org> + + PR target/64358 + * config/rs6000/rs6000.c (rs6000_split_logical_inner): Swap the + input operands if only the second is inverted. + * config/rs6000/rs6000.md (*boolc<mode>3_internal1 for BOOL_128): + Swap BOOL_REGS_OP1 and BOOL_REGS_OP2. Correct arguments to + rs6000_split_logical. + (*boolc<mode>3_internal2 for TI2): Swap operands[1] and operands[2]. + +2015-01-13 Renlin Li <renlin.li@arm.com> + + Backport from mainline: + 2014-11-19 Renlin Li <renlin.li@arm.com> + + PR target/63424 + * config/aarch64/aarch64-simd.md (<su><maxmin>v2di3): New. + +2015-01-13 Oleg Endo <olegendo@gcc.gnu.org> + + Backport form mainline + 2015-01-13 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/64479 + * rtlanal.c (set_reg_p): Handle SEQUENCE constructs. + +2015-01-09 Jakub Jelinek <jakub@redhat.com> + + PR rtl-optimization/64536 + * cfgrtl.c (rtl_tidy_fallthru_edge): Handle removal of degenerate + tablejumps. + +2015-01-09 Michael Meissner <meissner@linux.vnet.ibm.com> + + Backport from mainline: + 2015-01-06 Michael Meissner <meissner@linux.vnet.ibm.com> + + PR target/64505 + * config/rs6000/rs6000.c (rs6000_secondary_reload): Return the + correct reload handler if -m32 -mpowerpc64 is used. + +2015-01-09 Sebastian Huber <sebastian.huber@embedded-brains.de> + + Backport from mainline: + 2015-01-09 Sebastian Huber <sebastian.huber@embedded-brains.de> + + * config/rs6000/rtems.h (CPP_OS_RTEMS_SPEC): Define __PPC_CPU_E6500__ + for -mcpu=e6500. + * config/rs6000/t-rtems: Add e6500 multilibs. + +2015-01-09 Sebastian Huber <sebastian.huber@embedded-brains.de> + + Backport from mainline: + 2015-01-09 Sebastian Huber <sebastian.huber@embedded-brains.de> + + * config/rs6000/t-rtems: Add -mno-spe to soft-float multilib for + MPC8540. + +2015-01-09 Sebastian Huber <sebastian.huber@embedded-brains.de> + + Backport from mainline: + 2015-01-09 Sebastian Huber <sebastian.huber@embedded-brains.de> + + * config/rs6000/t-rtems: Use MULTILIB_REQUIRED instead of + MULTILIB_EXCEPTIONS. + +2015-01-09 Renlin Li <renlin.li@arm.com> + + Backport from mainline: + 2014-08-12 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> + + PR target/61413 + * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Fix definition + of __ARM_SIZEOF_WCHAR_T. + +2015-01-08 Christian Bruel <christian.bruel@st.com> + + PR target/64507 + * config/sh/sh-mem.cc (sh_expand_cmpnstr): Check 0 length. + +2015-01-03 John David Anglin <danglin@gcc.gnu.org> + + * config/pa/pa.md (decrement_and_branch_until_zero): Use `Q' constraint + instead of `m' constraint. Likewise for unnamed movb comparison + patterns using reg_before_reload_operand predicate. + * config/pa/predicates.md (reg_before_reload_operand): Tighten + predicate to reject register index and LO_SUM DLT memory forms + after reload. + +2014-12-27 H.J. Lu <hongjiu.lu@intel.com> + + Backport from mainline: + 2014-12-27 H.J. Lu <hongjiu.lu@intel.com> + + PR target/64409 + * config/i386/i386.c (ix86_function_type_abi): Issue an error + when ms_abi attribute is used with x32. + +2014-12-27 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/mmx.md (*vec_extractv2sf_1): Do not emit unpckhps. + Emit movshdup for SSE3 and shufps otherwise. + (*vec_extractv2si_1): Do not emit punpckhdq and unpckhps. + Emit pshufd for SSE2 and shufps otherwise. + +2014-12-24 Nick Clifton <nickc@redhat.com> + + Backport from mainline: + 2014-06-13 Nick Clifton <nickc@redhat.com> + + * config/rx/rx.h (JUMP_ALIGN): Return the log value if user + requested alignment is active. + (LABEL_ALIGN): Likewise. + (LOOP_ALIGN): Likewise. + + 2014-03-25 Nick Clifton <nickc@redhat.com> + + * config/rx/rx.c (rx_print_operand): Allow R operator to accept + SImode values. + +2014-12-17 Ulrich Weigand <Ulrich.Weigand@de.ibm.com> + + Backport from mainline + 2014-12-03 Ulrich Weigand <Ulrich.Weigand@de.ibm.com> + + PR rtl-optimization/64010 + * reload.c (push_reload): Before reusing a register contained + in an operand as input reload register, ensure that it is not + used in CALL_INSN_FUNCTION_USAGE. + +2014-12-15 Jakub Jelinek <jakub@redhat.com> + + PR sanitizer/64265 + * tsan.c (instrument_func_entry): Insert __tsan_func_entry + call on edge from entry block to single succ instead + of after labels of single succ of entry block. + +2014-12-14 H.J. Lu <hongjiu.lu@intel.com> + + Backported from mainline + 2014-12-14 H.J. Lu <hongjiu.lu@intel.com> + + PR rtl-optimization/64037 + * combine.c (setup_incoming_promotions): Pass the argument + before any promotions happen to promote_function_mode. + +2014-12-14 H.J. Lu <hongjiu.lu@intel.com> + + Backported from mainline + 2014-12-06 H.J. Lu <hongjiu.lu@intel.com> + + PR target/64200 + * config/i386/i386.c (decide_alg): Don't assert "alg != libcall" + for TARGET_INLINE_STRINGOPS_DYNAMICALLY. + +2014-12-13 Jakub Jelinek <jakub@redhat.com> + + Backported from mainline + 2014-12-12 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/64269 + * tree-ssa-forwprop.c (simplify_builtin_call): Bail out if + len2 or diff are too large. + +2014-12-11 Eric Botcazou <ebotcazou@adacore.com> + + * doc/md.texi (Insn Lengths): Fix description of (pc). + +2014-12-11 Renlin Li <renlin.li@arm.com> + + Backport from mainline + 2014-12-11 Renlin Li <renlin.li@arm.com> + + * config/aarch64/aarch64.c (aarch64_parse_cpu): Don't define + selected_tune. + (aarch64_override_options): Use selected_cpu's tuning. + +2014-12-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + Backport from mainline + 2014-09-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * config/rs6000/rs6000-builtin.def (XVCVSXDDP_SCALE): New + built-in definition. + (XVCVUXDDP_SCALE): Likewise. + (XVCVDPSXDS_SCALE): Likewise. + (XVCVDPUXDS_SCALE): Likewise. + * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add + entries for VSX_BUILTIN_XVCVSXDDP_SCALE, + VSX_BUILTIN_XVCVUXDDP_SCALE, VSX_BUILTIN_XVCVDPSXDS_SCALE, and + VSX_BUILTIN_XVCVDPUXDS_SCALE. + * config/rs6000/rs6000-protos.h (rs6000_scale_v2df): New + prototype. + * config/rs6000/rs6000.c (real.h): New include. + (rs6000_scale_v2df): New function. + * config/rs6000/vsx.md (UNSPEC_VSX_XVCVSXDDP): New unspec. + (UNSPEC_VSX_XVCVUXDDP): Likewise. + (UNSPEC_VSX_XVCVDPSXDS): Likewise. + (UNSPEC_VSX_XVCVDPUXDS): Likewise. + (vsx_xvcvsxddp_scale): New define_expand. + (vsx_xvcvsxddp): New define_insn. + (vsx_xvcvuxddp_scale): New define_expand. + (vsx_xvcvuxddp): New define_insn. + (vsx_xvcvdpsxds_scale): New define_expand. + (vsx_xvcvdpsxds): New define_insn. + (vsx_xvcvdpuxds_scale): New define_expand. + (vsx_xvcvdpuxds): New define_insn. + * doc/extend.texi (vec_ctf): Add new prototypes. + (vec_cts): Likewise. + (vec_ctu): Likewise. + (vec_splat): Likewise. + (vec_div): Likewise. + (vec_mul): Likewise. + + Backport from mainline + 2014-08-28 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * config/rs6000/altivec.h (vec_xl): New #define. + (vec_xst): Likewise. + * config/rs6000/rs6000-builtin.def (XXSPLTD_V2DF): New built-in. + (XXSPLTD_V2DI): Likewise. + (DIV_V2DI): Likewise. + (UDIV_V2DI): Likewise. + (MUL_V2DI): Likewise. + * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add + entries for VSX_BUILTIN_XVRDPI, VSX_BUILTIN_DIV_V2DI, + VSX_BUILTIN_UDIV_V2DI, VSX_BUILTIN_MUL_V2DI, + VSX_BUILTIN_XXSPLTD_V2DF, and VSX_BUILTIN_XXSPLTD_V2DI). + * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): New unspec. + (UNSPEC_VSX_DIVSD): Likewise. + (UNSPEC_VSX_DIVUD): Likewise. + (UNSPEC_VSX_MULSD): Likewise. + (vsx_mul_v2di): New insn-and-split. + (vsx_div_v2di): Likewise. + (vsx_udiv_v2di): Likewise. + (vsx_xxspltd_<mode>): New insn. + + Backport from mainline + 2014-08-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * config/rs6000/altivec.h (vec_cpsgn): New #define. + (vec_mergee): Likewise. + (vec_mergeo): Likewise. + (vec_cntlz): Likewise. + * config/rs600/rs6000-c.c (altivec_overloaded_builtins): Add new + entries for VEC_AND, VEC_ANDC, VEC_MERGEH, VEC_MERGEL, VEC_NOR, + VEC_OR, VEC_PACKSU, VEC_XOR, VEC_PERM, VEC_SEL, VEC_VCMPGT_P, + VMRGEW, and VMRGOW. + * doc/extend.texi: Document various forms of vec_cpsgn, + vec_splats, vec_and, vec_andc, vec_mergeh, vec_mergel, vec_nor, + vec_or, vec_perm, vec_sel, vec_sub, vec_xor, vec_all_eq, + vec_all_ge, vec_all_gt, vec_all_le, vec_all_lt, vec_all_ne, + vec_any_eq, vec_any_ge, vec_any_gt, vec_any_le, vec_any_lt, + vec_any_ne, vec_mergee, vec_mergeo, vec_packsu, and vec_cntlz. + + Backport from mainline + 2014-07-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * config/rs6000/altivec.md (unspec enum): Fix typo in UNSPEC_VSLDOI. + (altivec_vsldoi_<mode>): Likewise. + + +2014-12-10 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/62021 + * omp-low.c (simd_clone_adjust_return_type): Use + vector of pointer_sized_int_node types instead vector of pointer + types. + (simd_clone_adjust_argument_types): Likewise. + +2014-12-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + Backport from mainline: + 2014-12-09 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + PR middle-end/64225 + * tree-ssa-reassoc.c (acceptable_pow_call): Disable transformation + for BUILT_IN_POW when flag_errno_math is present. + +2014-12-10 Marek Polacek <polacek@redhat.com> + + Backport from mainline + 2014-12-10 Marek Polacek <polacek@redhat.com> + + PR tree-optimization/61686 + * tree-ssa-reassoc.c (range_entry_cmp): Use q->high instead of + p->high. + +2014-12-09 David Edelsohn <dje.gcc@gmail.com> + + Backport from mainline + 2014-12-05 David Edelsohn <dje.gcc@gmail.com> + + * config/rs6000/xcoff.h (ASM_OUTPUT_ALIGNED_LOCAL): Append + alignment to section name. Increase default alignment to + word. + +2014-12-09 Uros Bizjak <ubizjak@gmail.com> + + PR bootstrap/64213 + Revert: + 2014-11-28 H.J. Lu <hongjiu.lu@intel.com> + + PR rtl-optimization/64037 + * combine.c (setup_incoming_promotions): Pass the argument + before any promotions happen to promote_function_mode. + +2014-12-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/64191 + * tree-vect-stmts.c (vect_stmt_relevant_p): Clobbers are + not relevant (nor are their uses). + +2014-12-07 Oleg Endo <olegendo@gcc.gnu.org> + + Backport from mainline + 2014-12-07 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/50751 + * config/sh/sh.md (extendqihi2): Allow only for TARGET_SH1. + +2014-12-05 H.J. Lu <hongjiu.lu@intel.com> + + Backport from mainline + 2014-12-02 H.J. Lu <hongjiu.lu@intel.com> + + PR target/64108 + * config/i386/i386.c (decide_alg): Stop only if there aren't + any usable algorithms. + +2014-12-05 H.J. Lu <hongjiu.lu@intel.com> + + Backport from mainline + 2014-11-28 H.J. Lu <hongjiu.lu@intel.com> + + PR rtl-optimization/64037 + * combine.c (setup_incoming_promotions): Pass the argument + before any promotions happen to promote_function_mode. + +2014-12-04 Tobias Burnus <burnus@net-b.de> + + * configure.ac + (ac_has_isl_schedule_constraints_compute_schedule): + New check. + * graphite-clast-to-gimple.c: For ISL 0.14, include deprecate headers. + * graphite-interchange.c: Ditto. + * graphite-poly.c: Ditto. + * graphite-sese-to-poly.c: Ditto. + * graphite-optimize-isl.c (getScheduleForBandList): Ditto. + Conditionally use ISL 0.13+ functions. + * config.in: Regenerate. + * configure: Regenerate. + +2014-12-04 Jakub Jelinek <jakub@redhat.com> + + PR c++/56493 + * convert.c (convert_to_real, convert_to_expr, convert_to_complex): + Handle COMPOUND_EXPR. + +2014-12-03 Jakub Jelinek <jakub@redhat.com> + + PR c/59708 + * expmed.c (expand_widening_mult): Return const0_rtx if + coeff is 0. + +2014-12-03 Martin Jambor <mjambor@suse.cz> + + PR ipa/64153 + * ipa-inline-analysis.c (evaluate_conditions_for_known_args): Check + type sizes before view_converting. + +2014-12-03 Shanyao Chen <chenshanyao@huawei.com> + + Backport from mainline + 2014-11-20 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> + + PR target/59593 + * config/arm/arm.md (*movhi_insn): Use right formatting + for immediate. + + 2014-11-19 Felix Yang <felix.yang@huawei.com> + Shanyao Chen <chenshanyao@huawei.com> + + PR target/59593 + * config/arm/arm.md (define_attr "arch"): Add v6t2. + (define_attr "arch_enabled"): Add test for the above. + (*movhi_insn_arch4): Add new alternative. + +2014-12-03 Renlin Li <Renlin.Li@arm.com> + + Backported from mainline + 2014-12-03 Renlin Li <Renlin.Li@arm.com> + + PR middle-end/63762 + PR target/63661 + * ira.c (ira): Update preferred class. + +2014-12-02 Uros Bizjak <ubizjak@gmail.com> + + PR target/64113 + * config/alpha/alpha.md (call_value_osf_tlsgd): Do not split insn + using post-reload splitter. Use peephole2 pass instead. + (call_value_osf_tlsldm): Ditto. + (TLS_CALL): New int iterator. + (tls): New int attribute. + (call_value_osf_<tls>): Merge insn pattern from call_value_osf_tlsgd + and call_value_tlsldm using TLS_CALL int iterator. + +2014-12-02 Ulrich Weigand <Ulrich.Weigand@de.ibm.com> + + PR target/64115 + * config/rs6000/rs6000.c (rs6000_delegitimize_address): Remove + invalid UNSPEC_TOCREL sanity check under ENABLE_CHECKING. + +2014-12-01 Richard Biener <rguenther@suse.de> + + PR middle-end/64111 + * tree.c (int_cst_hash_hash): Use TYPE_UID instead of + htab_hash_pointer to not break PCH. + +2014-12-01 Martin Jambor <mjambor@suse.cz> + + PR ipa/63551 + * ipa-inline-analysis.c (evaluate_conditions_for_known_args): Convert + value of the argument to the type of the value in the condition. + +2014-11-28 Jakub Jelinek <jakub@redhat.com> + + Backported from mainline + 2014-11-27 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/64067 + * expr.c (expand_expr_addr_expr_1) <case COMPOUND_LITERAL_EXPR>: + Handle it by returning address of COMPOUND_LITERAL_EXPR_DECL + not only if modifier is EXPAND_INITIALIZER, but whenever + COMPOUND_LITERAL_EXPR_DECL is non-NULL and TREE_STATIC. + + 2014-11-19 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/63915 + * tree-vect-stmts.c (vectorizable_simd_clone_call): Pass + true instead of false as last argument to gsi_replace. + + PR sanitizer/63913 + * ubsan.c: Include tree-eh.h. + (instrument_bool_enum_load): Handle loads that can throw. + + 2014-10-31 Jakub Jelinek <jakub@redhat.com> + + PR rtl-optimization/63659 + * ree.c (update_reg_equal_equiv_notes): New function. + (combine_set_extension, transform_ifelse): Use it. + +2014-11-28 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> + + Backport from mainline. + 2014-11-28 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> + * config/arm/t-aprofile (MULTILIB_MATCHES): New entry for + -march=armv8-a+crc. + +2014-11-26 Richard Biener <rguenther@suse.de> + + PR middle-end/63738 + * tree-data-ref.c (split_constant_offset_1): Do not follow + SSA edges for SSA names with SSA_NAME_OCCURS_IN_ABNORMAL_PHI. + +2014-11-26 Richard Biener <rguenther@suse.de> + + Backport from mainline + 2014-11-26 Richard Biener <rguenther@suse.de> + + PR tree-optimization/62238 + * tree-predcom.c (ref_at_iteration): Unshare the expression + before gimplifying it. + + 2014-11-25 Richard Biener <rguenther@suse.de> + + PR tree-optimization/61927 + * tree-vect-loop.c (vect_analyze_loop_2): Revert ordering + of group and pattern analysis to the one in GCC 4.8. + + 2014-11-07 Richard Biener <rguenther@suse.de> + + PR tree-optimization/63605 + * fold-const.c (fold_binary_loc): Properly use element_precision + for types that may not be scalar. + + 2014-10-28 Richard Biener <rguenther@suse.de> + + PR middle-end/63665 + * fold-const.c (fold_comparison): Properly guard simplifying + against INT_MAX/INT_MIN with !TYPE_OVERFLOW_WRAPS. + +2014-11-25 Rohit <rohitarulraj@freescale.com> + + PR bootstrap/63703 + * config/rs6000/darwin.h (REGISTER_NAMES): Update based on 32 newly + added GCC hard register numbers for SPE high registers. + +2014-11-23 Oleg Endo <olegendo@gcc.gnu.org> + + Backport from mainline + 2014-11-23 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/53976 + * config/sh/sh_optimize_sett_clrt.cc + (sh_optimize_sett_clrt::find_last_ccreg_values): Return bool instead + of void. Abort at complex edges. + (sh_optimize_sett_clrt::execute): Do nothing if find_last_ccreg_values + returned false. + 2014-11-22 Uros Bizjak <ubizjak@gmail.com> * params.def (PARAM_MAX_COMPLETELY_PEELED_INSNS): Increase to 200. * config/i386/i386.c (ix86_option_override_internal): Do not increase PARAM_MAX_COMPLETELY_PEELED_INSNS. +2014-11-22 Oleg Endo <olegendo@gcc.gnu.org> + + Backport from mainline + 2014-11-22 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/63783 + PR target/51244 + * config/sh/sh_treg_combine.cc (sh_treg_combine::make_not_reg_insn): + Do not emit bitwise not insn. Emit logical not insn sequence instead. + Adjust related comments throughout the file. + +2014-11-22 Oleg Endo <olegendo@gcc.gnu.org> + + Backport from mainline + 2014-11-20 Segher Boessenkool <segher@kernel.crashing.org> + + PR target/60111 + * config/sh/sh.c: Use signed char for signed field. + +2014-11-21 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + PR target/63673 + * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Allow + the base pointer of vec_vsx_ld and vec_vsx_st to take a pointer to + double. + +2014-11-21 Richard Biener <rguenther@suse.de> + + PR tree-optimization/61750 + * tree-ssa-forwprop.c (simplify_vce): Verify type sizes + match for the resulting VIEW_CONVERT_EXPR. + +2014-11-19 Uros Bizjak <ubizjak@gmail.com> + + PR target/63947 + * config/i386/i386.c (put_condition_code) <case LTU, case GEU>: + Output "b" and "nb" suffix for FP mode. + +2014-11-19 Tom de Vries <tom@codesourcery.com> + + Backport from mainline + PR tree-optimization/62167 + * tree-ssa-tail-merge.c (stmt_local_def): Handle statements with vuse + conservatively. + (gimple_equal_p): Don't use vn_valueize to compare for lhs equality of + assigns. + +2014-11-16 Eric Botcazou <ebotcazou@adacore.com> + + * doc/tm.texi.in (TARGET_FLAGS_REGNUM): Move around. + * doc/tm.texi: Regenerate. + +2014-11-14 Felix Yang <felix.yang@huawei.com> + + Backport from mainline + 2014-11-14 Felix Yang <felix.yang@huawei.com> + Jiji Jiang <jiangjiji@huawei.com> + + * config/aarch64/aarch64-simd.md (*aarch64_simd_ld1r<mode>): Use + VALL mode iterator instead of VALLDI. + +2014-11-13 Teresa Johnson <tejohnson@google.com> + + PR tree-optimization/63841 + * tree-ssa-strlen.c (strlen_optimize_stmt): Ignore clobbers. + +2014-11-13 Christophe Lyon <christophe.lyon@linaro.org> + + Backport from mainline + 2014-11-02 Michael Collison <michael.collison@linaro.org> + + * config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO) : Update + to support vector modes. + (CTZ_DEFINED_VALUE_AT_ZERO): Ditto. + +2014-11-13 Eric Botcazou <ebotcazou@adacore.com> + + * doc/tm.texi.in (SELECT_CC_MODE): Update example. + (REVERSIBLE_CC_MODE): Fix example. + (REVERSE_CONDITION): Fix typo. + * doc/tm.texi: Regenerate. + +2014-11-12 Jakub Jelinek <jakub@redhat.com> + + PR ipa/63838 + * ipa-pure-const.c (propagate_nothrow): Walk w->indirect_calls + chain instead of node->indirect_calls. + +2014-11-11 Eric Botcazou <ebotcazou@adacore.com> + + PR target/61535 + * config/sparc/sparc.c (function_arg_vector_value): Deal with vectors + smaller than 8 bytes. + (sparc_function_arg_1): Tweak. + (sparc_function_value_1): Tweak. + +2014-11-08 Eric Botcazou <ebotcazou@adacore.com> + + * config/arm/arm.c (arm_set_return_address): Mark the store as frame + related, if any. + (thumb_set_return_address): Likewise. + +2014-11-07 Daniel Hellstrom <daniel@gaisler.com> + + * config.gcc (sparc-*-rtems*): Clean away unused t-elf. + * config/sparc/t-rtems: Add leon3v7 and muser-mode multilibs. + +2014-11-07 Marek Polacek <polacek@redhat.com> + + Backported from mainline + 2014-10-23 Marek Polacek <polacek@redhat.com> + + * c-ubsan.c (ubsan_instrument_shift): Perform the MINUS_EXPR + in unsigned type. + +2014-11-06 John David Anglin <danglin@gcc.gnu.org> + + * config/pa/pa.md (trap): New insn. Add "trap" to attribute type. + Don't allow trap insn in in_branch_delay, in_nullified_branch_delay + or in_call_delay. + +2014-11-06 Daniel Hellstrom <daniel@gaisler.com> + + * config.gcc (sparc*-*-*): Accept mcpu=leon3v7 processor. + * doc/invoke.texi (SPARC options): Add mcpu=leon3v7 comment. + * config/sparc/leon.md (leon3_load, leon_store, leon_fp_*): Handle + leon3v7 as leon3. + * config/sparc/sparc-opts.h (enum processor_type): Add LEON3V7. + * config/sparc/sparc.c (sparc_option_override): Add leon3v7 support. + * config/sparc/sparc.h (TARGET_CPU_leon3v7): New define. + * config/sparc/sparc.md (cpu): Add leon3v7. + * config/sparc/sparc.opt (enum processor_type): Add leon3v7. + +2014-11-05 Uros Bizjak <ubizjak@gmail.com> + + PR target/63538 + * config/i386/i386.c (in_large_data_p): Reject automatic variables. + (ix86_encode_section_info): Do not check for non-automatic varibles + when setting SYMBOL_FLAG_FAR_ADDR flag. + (x86_64_elf_select_section): Do not check ix86_cmodel here. + (x86_64_elf_unique_section): Ditto. + (x86_elf_aligned_common): Emit tab before .largecomm. + +2014-11-05 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline: + 2014-10-20 Uros Bizjak <ubizjak@gmail.com> + + * varasm.c (const_alias_set): Remove. + (init_varasm_once): Remove initialization of const_alias_set. + (build_constant_desc): Do not set alias set to const_alias_set. + + Backport from mainline: + 2014-10-14 Uros Bizjak <ubizjak@gmail.com> + + PR rtl-optimization/63475 + * alias.c (true_dependence_1): Always use get_addr to extract + true address operands from x_addr and mem_addr. Use extracted + address operands to check for references with alignment ANDs. + Use extracted address operands with find_base_term and + base_alias_check. For noncanonicalized operands call canon_rtx with + extracted address operand. + (write_dependence_1): Ditto. + (may_alias_p): Ditto. Remove unused calls to canon_rtx. + + Backport from mainline: + 2014-10-10 Uros Bizjak <ubizjak@gmail.com> + + PR rtl-optimization/63483 + * alias.c (true_dependence_1): Do not exit early for MEM_READONLY_P + references when alignment ANDs are involved. + (write_dependence_p): Ditto. + (may_alias_p): Ditto. + +2014-10-31 DJ Delorie <dj@redhat.com> + + * expmed.c (strict_volatile_bitfield_p): Fix off-by-one error. + +2014-10-31 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * config/aarch64/aarch64-elf-raw.h (CA53_ERR_835769_SPEC): Define. + (LINK_SPEC): Include CA53_ERR_835769_SPEC. + * config/aarch64/aarch64-linux.h (CA53_ERR_835769_SPEC): Define. + (LINK_SPEC): Include CA53_ERR_835769_SPEC. + +2014-10-31 Jakub Jelinek <jakub@redhat.com> + + PR sanitizer/63697 + * tree-vrp.c (simplify_internal_call_using_ranges): For subcode == + MINUS_EXPR, check overflow on vr0.min - vr1.max and vr0.max - vr1.min + instead of vr0.min - vr1.min and vr0.max - vr1.max. + +2014-10-30 Georg-Johann Lay <avr@gjlay.de> + + PR63633 + * config/avr/avr-protos.h (regmask): New inline function. + (avr_fix_inputs, avr_emit3_fix_outputs): New protos. + * config/avr/avr.c (avr_fix_operands, avr_move_fixed_operands) + (avr_fix_inputs, avr_emit3_fix_outputs): New functions. + * config/avr/avr-fixed.md (mulqq3_nomul, muluqq3_nomul) + (mul<ALL2QA>3, mul<ALL4A>3, <usdiv><ALL1Q>3, <usdiv><ALL2QA>3) + (<usdiv><ALL4A>3, round<ALL124QA>3): Fix input operands. + * config/avr/avr-dimode.md (add<ALL8>3, sub<ALL8>3) + (<ss_addsub><ALL8S>3, <us_addsub><ALL8U>3, cbranch<ALL8>4) + (<di_shifts><ALL8>3, <any_extend>mulsidi3): Fix input operands. + * config/avr/avr.md (mulqi3_call, mulhi3_call, mulsi3, mulpsi3) + (mulu<QIHI>si3, muls<QIHI>si3, mulohisi3, <any_extend>mulhisi3) + (usmulhisi3, <any_extend>mulhi3_highpart, mulsqipsi3) + (fmul, fmuls, fmulsu): Fix operands. Turn insn into expander as + needed. + +2014-10-30 Jakub Jelinek <jakub@redhat.com> + + * BASE-VER: Set to 4.9.3. + * DEV-PHASE: Set to prerelease. + +2014-10-30 Release Manager + + * GCC 4.9.2 released. + 2014-10-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com> * config/aarch64/aarch64.c (aarch64_madd_needs_nop): Restore @@ -15,6 +893,11 @@ tree-vect-data-refs.c (vect_create_addr_base_for_vector_ref): Set pointer alignment according to DR_MISALIGNMENT. +2014-10-25 Yury Gribov <y.gribov@samsung.com> + + PR sanitizer/63638 + * asan.c (enum asan_check_flags): Fixed ASAN_CHECK_LAST. + 2014-10-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com> * config/aarch64/aarch64-elf-raw.h (CA53_ERR_835769_SPEC): Define. @@ -22,12 +905,521 @@ * config/aarch64/aarch64-linux.h (CA53_ERR_835769_SPEC): Define. (LINK_SPEC): Include CA53_ERR_835769_SPEC. +2014-10-24 Markus Trippelsdorf <markus@trippelsdorf.de> + + PR bootstrap/63632 + * collect2.c (main): Filter out -fno-lto. + +2014-10-22 Richard Biener <rguenther@suse.de> + Tobias Burnus <burnus@net-b.de> + + PR lto/63603 + * gcc.c (LINK_COMMAND_SPEC): Add %{fno-lto}. + 2014-10-21 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/63563 * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Bail out if either dra or drb stmts are not normal loads/stores. +2014-10-17 Jakub Jelinek <jakub@redhat.com> + + * asan.c (instrument_derefs): Allow instrumentation of odd-sized + accesses even for -fsanitize=address. + (execute_sanopt): Only allow use_calls for -fsanitize=kernel-address. + + PR tree-optimization/63302 + * tree-ssa-reassoc.c (optimize_range_tests_xor, + optimize_range_tests_diff): Use !integer_pow2p () instead of + tree_log2 () < 0. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + * asan.c (instrument_derefs): Enable unaligned path for KASan. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-10-03 Yury Gribov <y.gribov@samsung.com> + + * asan.c (asan_finish_file): Disable __asan_init calls for KASan; + don't emit empty ctors. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-09-01 Yury Gribov <y.gribov@samsung.com> + + PR sanitizer/61897 + PR sanitizer/62140 + + * asan.c (asan_mem_ref_get_end): Handle non-ptroff_t lengths. + (build_check_stmt): Likewise. + (instrument_strlen_call): Likewise. + (asan_expand_check_ifn): Likewise and fix types. + (maybe_cast_to_ptrmode): New function. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-08-18 Yury Gribov <y.gribov@samsung.com> + + PR sanitizer/62089 + + * asan.c (instrument_derefs): Fix bitfield check. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-08-11 Yury Gribov <y.gribov@samsung.com> + + * asan.c (asan_check_flags): New enum. + (build_check_stmt_with_calls): Removed function. + (build_check_stmt): Split inlining logic to + asan_expand_check_ifn. + (instrument_derefs): Rename parameter. + (instrument_mem_region_access): Rename parameter. + (instrument_strlen_call): Likewise. + (asan_expand_check_ifn): New function. + (asan_instrument): Remove old code. + (pass_sanopt::execute): Change handling of + asan-instrumentation-with-call-threshold. + (asan_clear_shadow): Fix formatting. + (asan_function_start): Likewise. + (asan_emit_stack_protection): Likewise. + * doc/invoke.texi (asan-instrumentation-with-call-threshold): + Update description. + * internal-fn.c (expand_ASAN_CHECK): New function. + * internal-fn.def (ASAN_CHECK): New internal function. + * params.def (PARAM_ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD): + Update description. + (PARAM_ASAN_USE_AFTER_RETURN): Likewise. + * tree.c: Small comment fix. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-08-11 Yury Gribov <y.gribov@samsung.com> + + * gimple.c (gimple_call_fnspec): Support internal functions. + (gimple_call_return_flags): Use const. + * Makefile.in (GTFILES): Add internal-fn.h to list of GC files. + * internal-fn.def: Add fnspec information. + * internal-fn.h (internal_fn_fnspec): New function. + (init_internal_fns): Declare new function. + * internal-fn.c (internal_fn_fnspec_array): New global variable. + (init_internal_fns): New function. + * tree-core.h: Update macro call. + * tree.c (build_common_builtin_nodes): Initialize internal fns. + + Backport from mainline + 2014-08-12 Yury Gribov <y.gribov@samsung.com> + + * internal-fn.c (init_internal_fns): Fix off-by-one. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-07-31 Yury Gribov <y.gribov@samsung.com> + + * doc/cpp.texi (__SANITIZE_ADDRESS__): Updated description. + * doc/invoke.texi (-fsanitize=kernel-address): Describe new option. + * flag-types.h (SANITIZE_USER_ADDRESS, SANITIZE_KERNEL_ADDRESS): + New enums. + * gcc.c (sanitize_spec_function): Support new option. + (SANITIZER_SPEC): Remove now redundant check. + * opts.c (common_handle_option): Support new option. + (finish_options): Check for incompatibilities. + * toplev.c (process_options): Split userspace-specific checks. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-06-24 Max Ostapenko <m.ostapenko@partner.samsung.com> + + * asan.c (instrument_strlen_call): Do not instrument first byte in + strlen if already instrumented. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-06-16 Yury Gribov <y.gribov@samsung.com> + + * asan.c (check_func): New function. + (maybe_create_ssa_name): Likewise. + (build_check_stmt_with_calls): Likewise. + (use_calls_p): Likewise. + (report_error_func): Change interface. + (build_check_stmt): Allow non-integer lengths; add support + for new parameter. + (asan_instrument): Likewise. + (instrument_mem_region_access): Moved code to + build_check_stmt. + (instrument_derefs): Likewise. + (instrument_strlen_call): Likewise. + * cfgcleanup.c (old_insns_match_p): Add support for new + functions. + * doc/invoke.texi: Describe new parameter. + * params.def: Define new parameter. + * params.h: Likewise. + * sanitizer.def: Describe new builtins. + + Backport from mainline + 2014-06-16 Yury Gribov <y.gribov@samsung.com> + + * asan.c (build_check_stmt): Fix maybe-uninitialized warning. + + Backport from mainline + 2014-06-18 Yury Gribov <y.gribov@samsung.com> + + PR sanitizer/61530 + + * asan.c (build_check_stmt): Add condition. + + Backport from mainline + 2014-06-18 Yury Gribov <y.gribov@samsung.com> + + PR sanitizer/61547 + + * asan.c (instrument_strlen_call): Fixed instrumentation of + trailing byte. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-05-30 Jakub Jelinek <jakub@redhat.com> + + * asan.c (report_error_func): Add SLOW_P argument, use + BUILT_IN_ASAN_*_N if set. + (build_check_stmt): Likewise. + (instrument_derefs): If T has insufficient alignment, + force same handling as for odd sizes. + +2014-10-16 Yury Gribov <y.gribov@samsung.com> + + Backport from mainline + 2014-05-30 Jakub Jelinek <jakub@redhat.com> + + * sanitizer.def (BUILT_IN_ASAN_REPORT_LOAD_N, + BUILT_IN_ASAN_REPORT_STORE_N): New. + * asan.c (struct asan_mem_ref): Change access_size type to + HOST_WIDE_INT. + (asan_mem_ref_init, asan_mem_ref_new, get_mem_refs_of_builtin_call, + update_mem_ref_hash_table): Likewise. + (asan_mem_ref_hasher::hash): Hash in a HWI. + (report_error_func): Change size_in_bytes argument to HWI. + Use *_N builtins if size_in_bytes is larger than 16 or not power of + two. + (build_shadow_mem_access): New function. + (build_check_stmt): Use it. Change size_in_bytes argument to HWI. + Handle size_in_bytes not power of two or larger than 16. + (instrument_derefs): Don't give up if size_in_bytes is not + power of two or is larger than 16. + +2014-10-15 Vladimir Makarov <vmakarov@redhat.com> + + PR rtl-optimization/63448 + * lra-int.h (LRA_MAX_CONSTRAINT_ITERATION_NUMBER): Remove. + (LRA_MAX_ASSIGNMENT_ITERATION_NUMBER): New. + (LRA_MAX_INHERITANCE_PASSES): Use it. + (lra_constraint_iter_after_spill): Remove. + (lra_assignment_iter): New. + (lra_assignment_iter_after_spill): New. + * lra-assigns.c (lra_assignment_iter): New. + (lra_assignment_iter_after_spill): New. + (former_reload_pseudo_spill_p): New. + (spill_for): Set up former_reload_pseudo_spill_p. + (setup_live_pseudos_and_spill_after_risky): Ditto. + (assign_by_spills): Ditto. + (lra_assign): Increment lra_assignment_iter. Print the iteration + number. Reset former_reload_pseudo_spill_p. Check + lra_assignment_iter_after_spill. + * lra.c (lra): Remove lra_constraint_iter_after_spill. Initialize + lra_assignment_iter and lra_assignment_iter_after_spill. + * lra-constraints.c (lra_constraint_iter_after_spill): Remove. + (lra_constraints): Remove code with + lra_assignment_iter_after_spill. + +2014-10-15 Eric Botcazou <ebotcazou@adacore.com> + + * stor-layout.c (self_referential_size): Do not promote arguments. + +2014-10-15 Richard Biener <rguenther@suse.de> + + Backport from mainline + 2014-08-15 Richard Biener <rguenther@suse.de> + + PR tree-optimization/62031 + * tree-data-ref.c (dr_analyze_indices): Do not set + DR_UNCONSTRAINED_BASE. + (dr_may_alias_p): All indirect accesses have to go the + formerly DR_UNCONSTRAINED_BASE path. + * tree-data-ref.h (struct indices): Remove + unconstrained_base member. + (DR_UNCONSTRAINED_BASE): Remove. + +2014-10-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + Backport from mainline r215880 + 2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): + Issue a warning message when vec_lvsl or vec_lvsr is used with a + little endian target. + + Backport from mainline r215882 + 2014-10-03 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * altivec.md (altivec_lvsl): New define_expand. + (altivec_lvsl_direct): Rename define_insn from altivec_lvsl. + (altivec_lvsr): New define_expand. + (altivec_lvsr_direct): Rename define_insn from altivec_lvsr. + * rs6000.c (rs6000_expand_builtin): Change to use + altivec_lvs[lr]_direct; remove commented-out code. + +2014-10-10 Richard Biener <rguenther@suse.de> + + PR tree-optimization/63379 + * tree-vect-slp.c (vect_get_constant_vectors): Do not compute + a neutral operand for min/max when it is not a reduction chain. + +2014-10-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + Backport from mainline + 2014-10-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * configure.ac: Add --enable-fix-cortex-a53-835769 option. + * configure: Regenerate. + * config/aarch64/aarch64.c (aarch64_override_options): Handle + TARGET_FIX_ERR_A53_835769_DEFAULT. + * config/aarch64/aarch64.opt (mfix-cortex-a53-835769): Set Init + value to 2. + * doc/install.texi (aarch64*-*-*): Document new + --enable-fix-cortex-a53-835769 option. + +2014-10-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + Backport from mainline + 2014-10-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> + + * config/aarch64/aarch64.h (FINAL_PRESCAN_INSN): Define. + (ADJUST_INSN_LENGTH): Define. + * config/aarch64/aarch64.opt (mfix-cortex-a53-835769): New option. + * config/aarch64/aarch64.c (is_mem_p): New function. + (is_memory_op): Likewise. + (aarch64_prev_real_insn): Likewise. + (is_madd_op): Likewise. + (dep_between_memop_and_curr): Likewise. + (aarch64_madd_needs_nop): Likewise. + (aarch64_final_prescan_insn): Likewise. + * doc/invoke.texi (AArch64 Options): Document -mfix-cortex-a53-835769 + and -mno-fix-cortex-a53-835769 options. + +2014-10-10 Richard Biener <rguenther@suse.de> + + PR tree-optimization/63380 + * tree-ssa-tail-merge.c (stmt_local_def): Exclude stmts that + may trap. + +2014-10-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/61969 + * tree-nrv.c (pass_nrv::execute): Properly test for automatic + variables. + +2014-10-09 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2014-10-09 Uros Bizjak <ubizjak@gmail.com> + + PR rtl-optimization/57003 + * regcprop.c (copyprop_hardreg_forward_1): If ksvd.ignore_set_reg, + also check CALL_INSN_FUNCTION_USAGE for clobbers again after + killing regs_invalidated_by_call. + +2014-10-08 Rong Xu <xur@google.com> + + * gcov-tool.c (profile_overlap): New driver function + to compute profile overlap. + (print_overlap_usage_message): New. + (overlap_usage): New. + (do_overlap): New. + (print_usage): Add calls to overlap function. + (main): Ditto. + * doc/gcov-tool.texi: Add documentation. + +2014-10-08 Oleg Endo <olegendo@gcc.gnu.org> + + Backport from mainline + 2014-10-08 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/52941 + * config/sh/sync.md (atomic_exchangesi_hard, atomic_exchange<mode>_hard, + atomic_fetch_<fetchop_name>si_hard, + atomic_fetch_<fetchop_name><mode>_hard, atomic_fetch_nandsi_hard, + atomic_fetch_nand<mode>_hard, atomic_<fetchop_name>_fetchsi_hard, + atomic_<fetchop_name>_fetch<mode>_hard, atomic_nand_fetchsi_hard, + atomic_nand_fetch<mode>_hard): Add missing set of T_REG. + +2014-10-03 Jan Hubicka <hubicka@ucw.cz> + + PR ipa/61144 + * varpool.c (ctor_for_folding): Do not fold WEAK symbols. + +2014-10-03 Jan Hubicka <hubicka@ucw.cz> + + PR ipa/62121 + * ipa-devirt.c (restrict_to_inner_class): Do not ICE when type is + unknown. + +2014-10-03 Jan Hubicka <hubicka@ucw.cz> + + PR lto/62026 + * lto-streamer-out.c (lto_output): Handle thunks correctly. + * cgraphclones.c (duplicate_thunk_for_node): Get thunk's arguments. + +2014-10-03 Jakub Jelinek <jakub@redhat.com> + + PR libgomp/61200 + * omp-low.c (taskreg_contexts): New variable. + (scan_omp_parallel): Push newly created context into taskreg_contexts + vector and move record layout code to finish_taskreg_scan. + (scan_omp_task): Likewise. + (finish_taskreg_scan): New function. + (execute_lower_omp): Call finish_taskreg_scan on all taskreg_contexts + vector elements and release it. + +2014-10-02 Martin Jambor <mjambor@suse.cz> + + PR tree-optimization/63375 + * tree-sra.c (build_access_from_expr_1): Disqualify volatile + references. + +2014-10-01 Jakub Jelinek <jakub@redhat.com> + + PR debug/63342 + * dwarf2out.c (loc_list_from_tree): Handle TARGET_MEM_REF and + SSA_NAME. + + PR target/63428 + * config/i386/i386.c (expand_vec_perm_pshufb): Fix up rperm[0] + argument to avx2_permv2ti. + + PR c++/63306 + Backported from mainline + 2014-08-01 James Greenhalgh <james.greenhalgh@arm.com> + + PR regression/61510 + * cgraphunit.c (analyze_functions): Use get_create rather than get + for decls which are clones of abstract functions. + +2014-10-01 Jakub Jelinek <jakub@redhat.com> + + Backported from mainline + 2014-09-18 Vladimir Makarov <vmakarov@redhat.com> + + PR debug/63285 + * haifa-sched.c (schedule_block): Advance cycle at the end of BB + if advance != 0. + + 2014-09-10 Jan Hubicka <hubicka@ucw.cz> + + PR tree-optimization/63186 + * ipa-split.c (test_nonssa_use): Skip nonforced labels. + (mark_nonssa_use): Likewise. + (verify_non_ssa_vars): Verify all header blocks for label + definitions. + +2014-10-01 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + Backport from mainline + 2014-10-01 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * config/arm/arm.md (*store_minmaxsi): Disable for arm_restrict_it. + +2014-10-01 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2014-09-30 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.md (fmodxf3): Enable for flag_finite_math_only only. + (fmod<mode>3): Ditto. + (fpremxf4_i387): Ditto. + (reminderxf3): Ditto. + (reminder<mode>3): Ditto. + (fprem1xf4_i387): Ditto. + +2014-09-30 David Malcolm <dmalcolm@redhat.com> + + PR plugins/63410 + * Makefile.in (PLUGIN_HEADERS): Add pass-instances.def. + +2014-09-30 Jakub Jelinek <jakub@redhat.com> + + PR inline-asm/63282 + * ifcvt.c (dead_or_predicable): Don't call redirect_jump_1 + or invert_jump_1 if jump isn't any_condjump_p. + +2014-09-29 James Clarke <jrtc27@jrtc27.com> + Francois-Xavier Coudert <fxcoudert@gcc.gnu.org> + + PR target/61407 + * config/darwin-c.c (version_as_macro): Added extra 0 for OS X 10.10 + and above. + * config/darwin-driver.c (darwin_find_version_from_kernel): Removed + kernel version check to avoid incrementing it after every major OS X + release. + (darwin_default_min_version): Avoid static memory buffer. + +2014-09-29 Charles Baylis <charles.baylis@linaro.org> + + Backport from mainline r212303 + PR target/49423 + * config/arm/arm-protos.h (arm_legitimate_address_p, + arm_is_constant_pool_ref): Add prototypes. + * config/arm/arm.c (arm_legitimate_address_p): Remove static. + (arm_is_constant_pool_ref) New function. + * config/arm/arm.md (unaligned_loadhis, arm_zero_extendhisi2_v6, + arm_zero_extendqisi2_v6): Use Uh constraint for memory operand. + (arm_extendhisi2, arm_extendhisi2_v6): Use Uh constraint for memory + operand and remove pool_range and neg_pool_range attributes. + (arm_extendqihi_insn, arm_extendqisi, arm_extendqisi_v6): Remove + pool_range and neg_pool_range attributes. + * config/arm/constraints.md (Uh): New constraint. (Uq): Don't allow + constant pool references. + +2014-09-29 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/63247 + * omp-low.c (lower_omp_target): For OMP_CLAUSE_MAP_POINTER + of ARRAY_TYPE, if not OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION + use the alignment of avar rather than ovar. + +2014-09-28 John David Anglin <danglin@gcc.gnu.org> + + * config/pa/pa.c (pa_output_function_epilogue): Only update + last_address when a nonnote insn is found. + +2014-09-25 Oleg Endo <olegendo@gcc.gnu.org> + + Backport from mainline + 2014-09-25 Nick Clifton <nickc@redhat.com> + 2014-09-25 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/62218 + * config/sh/sync.md (atomic_fetch_nand<mode>_soft_imask, + atomic_test_and_set_soft_imask): Fix typo in instruction sequence. + +2014-09-25 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + Backport from mainline r215559 + 2014-09-25 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + PR target/63335 + * config/rs6000/rs6000-c.c (altivec_build_resolved_builtin): + Exclude VSX_BUILTIN_XVCMPGEDP_P from special handling. + 2014-09-25 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/63341 @@ -43,6 +1435,374 @@ - 1 instead of setting offset, pass byte_offset down to vect_create_data_ref_ptr. +2014-09-23 Michael Meissner <meissner@linux.vnet.ibm.com> + + Backport from mainline + 2014-09-23 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/rs6000.md (f32_vsx): New mode attributes to + refine the constraints used on 32/64-bit floating point moves. + (f32_av): Likewise. + (f64_vsx): Likewise. + (f64_dm): Likewise. + (f64_av): Likewise. + (BOOL_REGS_OUTPUT): Use wt constraint for TImode instead of wa. + (BOOL_REGS_OP1): Likewise. + (BOOL_REGS_OP2): Likewise. + (BOOL_REGS_UNARY): Likewise. + (mov<mode>_hardfloat, SFmode/SDmode): Tighten down constraints for + 32/64-bit floating point moves. Do not use wa, instead use ww/ws + for moves involving VSX registers. Do not use constraints that + target VSX registers for decimal types. + (mov<mode>_hardfloat32, DFmode/DDmode): Likewise. + (mov<mode>_hardfloat64, DFmode/DDmode): Likewise. + +2014-09-22 Marek Polacek <polacek@redhat.com> + + Backport from mainline + 2014-05-21 Marek Polacek <polacek@redhat.com> + + PR sanitizer/61272 + * ubsan.c (is_ubsan_builtin_p): Turn assert into a condition. + +2014-09-22 Jakub Jelinek <jakub@redhat.com> + + PR debug/63328 + * omp-low.c (ipa_simd_modify_stmt_ops): For debug stmts + insert a debug source bind stmt setting DEBUG_EXPR_DECL + instead of a normal gimple assignment stmt. + +2014-09-19 Michael Meissner <meissner@linux.vnet.ibm.com> + + Back port from trunk: + 2014-09-19 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/predicates.md (fusion_gpr_mem_load): Move testing + for base_reg_operand to be common between LO_SUM and PLUS. + (fusion_gpr_mem_combo): New predicate to match a fused address + that combines the addis and memory offset address. + + * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): Change + calling signature. + (emit_fusion_gpr_load): Likewise. + + * config/rs6000/rs6000.c (fusion_gpr_load_p): Change calling + signature to pass each argument separately, rather than + using an operands array. Rewrite the insns found by peephole2 to + be a single insn, rather than hoping the insns will still be + together when the peephole pass is done. Drop being called via a + normal peephole. + (emit_fusion_gpr_load): Change calling signature to be called from + the fusion_gpr_load_<mode> insns with a combined memory address + instead of the peephole pass passing the addis and offset + separately. + + * config/rs6000/rs6000.md (UNSPEC_FUSION_GPR): New unspec for GPR + fusion. + (power8 fusion peephole): Drop support for doing power8 via a + normal peephole that was created by the peephole2 pass. + (power8 fusion peephole2): Create a new insn with the fused + address, so that the fused operation is kept together after + register allocation is done. + (fusion_gpr_load_<mode>): Likewise. + +2014-09-18 Jakub Jelinek <jakub@redhat.com> + + PR c++/62017 + * asan.c (transform_statements): Don't instrument clobber statements. + +2014-09-17 Jakub Jelinek <jakub@redhat.com> + + PR debug/63284 + * tree-cfgcleanup.c (fixup_noreturn_call): Don't split block + if there are only debug stmts after the noreturn call, instead + remove the debug stmts. + +2014-09-17 Sebastian Huber <sebastian.huber@embedded-brains.de> + + * config.gcc (*-*-rtems*): Default to 'rtems' thread model. + Enable selection of 'posix' or no thread model. + +2014-09-16 John David Anglin <danglin@gcc.gnu.org> + + PR target/61853 + * config/pa/pa.c (pa_function_value): Directly handle aggregates + that fit exactly in a word or double word. + +2014-09-15 Sharad Singhai <singhai@google.com> + + Google Ref b/17114943 + + * l-ipo.c (promote_static_var_func): Update RTL with the unique name. + +2014-09-15 Markus Trippelsdorf <markus@trippelsdorf.de> + + * doc/install.texi (Options specification): add + --disable-libsanitizer item. + +2014-09-12 DJ Delorie <dj@redhat.com> + + * config/msp430/msp430.md (extendhipsi2): Use 20-bit form of RLAM/RRAM. + (extend_and_shift1_hipsi2): Likewise. + (extend_and_shift2_hipsi2): Likewise. + +2014-09-12 Martin Jambor <mjambor@suse.cz> + + PR ipa/61654 + * cgraph.h (cgraph_analyze_function): Declare. + * cgraphunit.c: (analyze_function): Remove forward declaration, + rename to cgraph_analyze_function, made external. + * cgraphclones.c (duplicate_thunk_for_node): Copy arguments of the + new decl properly. Analyze the new thunk if it is expanded. + +2014-09-11 H.J. Lu <hongjiu.lu@intel.com> + + Backport from mainline + 2014-09-11 H.J. Lu <hongjiu.lu@intel.com> + + PR target/63228 + * config/i386/i386.c (ix86_option_override_internal): Also turn + off OPTION_MASK_ABI_X32 for -m16. + +2014-09-11 James Greenhalgh <james.greenhalgh@arm.com> + + Backport from mainline. + 2014-09-11 James Greenhalgh <james.greenhalgh@arm.com> + + * config/aarch64/arm_neon.h (vmull_high_lane_s16): Fix argument + types. + (vmull_high_lane_s32): Likewise. + (vmull_high_lane_u16): Likewise. + (vmull_high_lane_u32): Likewise. + +2014-09-11 Alan Lawrence <alan.lawrence@arm.com> + + Backport r214946 from mainline + 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> + + * config/aarch64/aarch64.md (adddi3_aarch64): Set type to neon_add. + +2014-09-11 Alan Lawrence <alan.lawrence@arm.com> + + Backport r214953 from mainline + 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> + + * config/aarch64/arm_neon.h (int32x1_t, int16x1_t, int8x1_t, + uint32x1_t, uint16x1_t, uint8x1_t): Remove typedefs. + + (vqabsb_s8, vqabsh_s16, vqabss_s32, vqaddb_s8, vqaddh_s16, vqadds_s32, + vqaddb_u8, vqaddh_u16, vqadds_u32, vqdmlalh_s16, vqdmlalh_lane_s16, + vqdmlals_s32, vqdmlslh_s16, vqdmlslh_lane_s16, vqdmlsls_s32, + vqdmulhh_s16, vqdmulhh_lane_s16, vqdmulhs_s32, vqdmulhs_lane_s32, + vqdmullh_s16, vqdmullh_lane_s16, vqdmulls_s32, vqdmulls_lane_s32, + vqmovnh_s16, vqmovns_s32, vqmovnd_s64, vqmovnh_u16, vqmovns_u32, + vqmovnd_u64, vqmovunh_s16, vqmovuns_s32, vqmovund_s64, vqnegb_s8, + vqnegh_s16, vqnegs_s32, vqrdmulhh_s16, vqrdmulhh_lane_s16, + vqrdmulhs_s32, vqrdmulhs_lane_s32, vqrshlb_s8, vqrshlh_s16, + vqrshls_s32, vqrshlb_u8, vqrshlh_u16, vqrshls_u32, vqrshrnh_n_s16, + vqrshrns_n_s32, vqrshrnd_n_s64, vqrshrnh_n_u16, vqrshrns_n_u32, + vqrshrnd_n_u64, vqrshrunh_n_s16, vqrshruns_n_s32, vqrshrund_n_s64, + vqshlb_s8, vqshlh_s16, vqshls_s32, vqshlb_u8, vqshlh_u16, vqshls_u32, + vqshlb_n_s8, vqshlh_n_s16, vqshls_n_s32, vqshlb_n_u8, vqshlh_n_u16, + vqshls_n_u32, vqshlub_n_s8, vqshluh_n_s16, vqshlus_n_s32, + vqshrnh_n_s16, vqshrns_n_s32, vqshrnd_n_s64, vqshrnh_n_u16, + vqshrns_n_u32, vqshrnd_n_u64, vqshrunh_n_s16, vqshruns_n_s32, + vqshrund_n_s64, vqsubb_s8, vqsubh_s16, vqsubs_s32, vqsubb_u8, + vqsubh_u16, vqsubs_u32, vsqaddb_u8, vsqaddh_u16, vsqadds_u32, + vuqaddb_s8, vuqaddh_s16, vuqadds_s32): Replace all int{32,16,8}x1_t + with int{32,16,8}_t. + +2014-09-11 Jason Merrill <jason@redhat.com> + + PR c++/58678 + * ipa-devirt.c (ipa_devirt): Don't check DECL_COMDAT. + +2014-09-11 Georg-Johann Lay <avr@gjlay.de> + + Backport from 2014-09-11 trunk r215152. + + PR target/63223 + * config/avr/avr.md (*tablejump.3byte-pc): New insn. + (*tablejump): Restrict to !AVR_HAVE_EIJMP_EICALL. Add void clobber. + (casesi): Expand to *tablejump.3byte-pc if AVR_HAVE_EIJMP_EICALL. + +2014-09-10 Michael Meissner <meissner@linux.vnet.ibm.com> + + Backport from mainline + 2014-09-10 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/vsx.md (vsx_fmav4sf4): Use correct constraints for + V2DF, V4SF, DF, and DI modes. + (vsx_fmav2df2): Likewise. + (vsx_float_fix_<mode>2): Likewise. + (vsx_reduc_<VEC_reduc_name>_v2df_scalar): Likewise. + +2014-09-10 Xinliang David Li <davidxl@google.com> + + Backport from mainline + PR target/63209 + * config/arm/arm.md (movcond_addsi): Handle case where source + and target operands are the same. + +2014-09-10 Alan Modra <amodra@gmail.com> + + PR debug/60655 + * dwarf2out.c (mem_loc_descriptor <PLUS>): Return NULL if addend + can't be output. + +2014-09-09 Bill Schmidt <wschmidt@us.ibm.com> + + Backported from mainline + 2014-09-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * config/rs6000/vsx.md (*vsx_extract_<mode>_load): Always match + selection of 0th memory doubleword, regardless of endianness. + +2014-09-09 James Greenhalgh <james.greenhalgh@arm.com> + + Backport from mainline + 2014-09-09 James Greenhalgh <james.greenhalgh@arm.com> + + * doc/invoke.texi (-march): Use GNU/Linux rather than Linux. + (-mtune): Likewise. + (-mcpu): Likewise. + +2014-09-09 Jason Merrill <jason@redhat.com> + + PR c++/61214 + PR c++/62224 + * gimple-fold.c (can_refer_decl_in_current_unit_p): Don't allow + reference to a DECL_EXTERNAL COMDAT. + +2014-09-09 Richard Biener <rguenther@suse.de> + + Backport from mainline + 2014-08-05 Richard Biener <rguenther@suse.de> + + PR rtl-optimization/61672 + * emit-rtl.h (mem_attrs_eq_p): Declare. + * emit-rtl.c (mem_attrs_eq_p): Export. Handle NULL mem-attrs. + * cse.c (exp_equiv_p): Use mem_attrs_eq_p. + * cfgcleanup.c (merge_memattrs): Likewise. + Include emit-rtl.h. + + 2014-08-11 Richard Biener <rguenther@suse.de> + + PR tree-optimization/62075 + * tree-vect-slp.c (vect_detect_hybrid_slp_stmts): Properly + handle uses in patterns. + + 2014-08-14 Richard Biener <rguenther@suse.de> + + PR rtl-optimization/62079 + * recog.c (peephole2_optimize): If peep2_do_cleanup_cfg + run cleanup_cfg. + + 2014-08-26 Richard Biener <rguenther@suse.de> + + PR tree-optimization/62175 + * tree-ssa-loop-niter.c (expand_simple_operations): Do not + expand possibly trapping operations. + +2014-09-08 DJ Delorie <dj@redhat.com> + + * doc/invoke.texi (MSP430 Options): Add -minrt. + +2014-09-05 Easwaran Raman <eraman@google.com> + + Backport from mainline + PR rtl-optimization/62146 + * ifcvt.c (dead_or_predicable): Make removal of REG_EQUAL note of + hoisted instruction unconditional. + +2014-09-04 Guozhi Wei <carrot@google.com> + + PR target/62040 + * config/aarch64/iterators.md (VQ_NO2E, VQ_2E): New iterators. + * config/aarch64/aarch64-simd.md (move_lo_quad_internal_<mode>): Split + it into two patterns. + (move_lo_quad_internal_be_<mode>): Likewise. + +2014-09-03 Martin Jambor <mjambor@suse.cz> + + PR ipa/62015 + * ipa-cp.c (intersect_aggregates_with_edge): Handle impermissible + pass-trough jump functions correctly. + +2014-09-03 Martin Jambor <mjambor@suse.cz> + + PR ipa/61986 + * ipa-cp.c (find_aggregate_values_for_callers_subset): Chain + created replacements in ascending order of offsets. + (known_aggs_to_agg_replacement_list): Likewise. + +2014-09-02 Kaz Kojima <kkojima@gcc.gnu.org> + + Backport from mainline + 2014-08-27 Kaz Kojima <kkojima@gcc.gnu.org> + + PR target/62261 + * config/sh/sh.md (ashlsi3): Handle negative shift count for + TARGET_SHMEDIA. + (ashldi3, ashrsi3, ashrdi3, lshrsi3, lshrdi3): Likewise. + +2014-09-02 Kaz Kojima <kkojima@gcc.gnu.org> + + Backport from mainline + 2014-08-25 Kaz Kojima <kkojima@gcc.gnu.org> + + PR target/62111 + * config/sh/predicates.md (general_extend_operand): Disable + TRUNCATE before reload completes. + +2014-09-01 Oleg Endo <olegendo@gcc.gnu.org> + + Backport from mainline + 2014-09-01 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/62312 + * config/sh/sh.md (*cmp_div0s_0): Add missing constraints. + +2014-09-01 Jakub Jelinek <jakub@redhat.com> + + PR target/62025 + * sched-deps.c (add_or_update_dep_1): If ask_dependency_caches + returned DEP_PRESENT, make sure to set DEP_MULTIPLE on present_dep. + (find_inc): Revert 2014-08-12 change. + + * config/gnu-user.h (LIBLSAN_EARLY_SPEC): Define. + * gcc.c (LIBLSAN_SPEC, LIBLSAN_EARLY_SPEC): Follow LIBTSAN*_SPEC. + (SANITIZER_EARLY_SPEC): Include LIBLSAN_EARLY_SPEC for -fsanitize=leak. + +2014-09-01 Marek Polacek <polacek@redhat.com> + + Backport from mainline + 2014-08-21 Marek Polacek <polacek@redhat.com> + + PR c/61271 + * expr.c (is_aligning_offset): Remove logical not. + +2014-09-01 Marek Polacek <polacek@redhat.com> + + Backport from mainline + 2014-08-19 Marek Polacek <polacek@redhat.com> + + PR c/61271 + * cgraphunit.c (handle_alias_pairs): Fix condition. + +2014-08-30 John David Anglin <danglin@gcc.gnu.org> + + * config/pa/pa.c (pa_assemble_integer): Don't add PLABEL relocation + prefix to function labels when generating fast indirect calls. + +2014-08-29 Yvan Roux <yvan.roux@linaro.org> + + Backport from mainline + 2014-08-27 Yvan Roux <yvan.roux@linaro.org> + + PR other/62248 + * config.gcc (arm*-*-*): Check --with-fpu against arm-fpus.def. + 2014-08-27 Guozhi Wei <carrot@google.com> PR target/62262 @@ -208,7 +1968,6 @@ OMP_CLAUSE_SHARED for global vars if the global var is mentioned in OMP_CLAUSE_MAP in some outer target region. ->>>>>>> .r214216 2014-08-14 Kyrylo Tkachov <kyrylo.tkachov@arm.com> Backport from mainline @@ -376,7 +2135,7 @@ 2014-08-12 Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com> Backport from mainline - 2014-08-04 Ganesh Gopalasubramanian + 2014-08-04 Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com> * config/i386/driver-i386.c (host_detect_local_cpu): Handle AMD's extended @@ -385,7 +2144,7 @@ 2014-08-12 Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com> Backport from mainline - 2014-06-16 Ganesh Gopalasubramanian + 2014-06-16 Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com> * config/i386/i386.c (ix86_expand_sse2_mulvxdi3): Issue @@ -649,7 +2408,7 @@ * omp-low.c (create_omp_child_function): Don't set DECL_NAMELESS on the FUNCTION_DECL. - * BASE-VER: Set to 4.9.1. + * BASE-VER: Set to 4.9.2. * DEV-PHASE: Set to prerelease. 2014-07-16 Release Manager diff --git a/gcc-4.9/gcc/DATESTAMP b/gcc-4.9/gcc/DATESTAMP index 2cd35d408..b5129ed0d 100644 --- a/gcc-4.9/gcc/DATESTAMP +++ b/gcc-4.9/gcc/DATESTAMP @@ -1 +1 @@ -20140827 +20150123 diff --git a/gcc-4.9/gcc/Makefile.in b/gcc-4.9/gcc/Makefile.in index 868054981..0309b3701 100644 --- a/gcc-4.9/gcc/Makefile.in +++ b/gcc-4.9/gcc/Makefile.in @@ -2294,7 +2294,9 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/vtable-verify.c \ $(srcdir)/asan.c \ $(srcdir)/ubsan.c \ - $(srcdir)/tsan.c $(srcdir)/ipa-devirt.c \ + $(srcdir)/tsan.c \ + $(srcdir)/ipa-devirt.c \ + $(srcdir)/internal-fn.h \ @all_gtfiles@ # Compute the list of GT header files from the corresponding C sources, @@ -3169,7 +3171,7 @@ PLUGIN_HEADERS = $(TREE_H) $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ tree-parloops.h tree-ssa-address.h tree-ssa-coalesce.h tree-ssa-dom.h \ tree-ssa-loop.h tree-ssa-loop-ivopts.h tree-ssa-loop-manip.h \ tree-ssa-loop-niter.h tree-ssa-ter.h tree-ssa-threadedge.h \ - tree-ssa-threadupdate.h + tree-ssa-threadupdate.h pass-instances.def # generate the 'build fragment' b-header-vars s-header-vars: Makefile diff --git a/gcc-4.9/gcc/ada/ChangeLog b/gcc-4.9/gcc/ada/ChangeLog index c8c4d807b..ba21f435e 100644 --- a/gcc-4.9/gcc/ada/ChangeLog +++ b/gcc-4.9/gcc/ada/ChangeLog @@ -1,3 +1,51 @@ +2015-01-05 Eric Botcazou <ebotcazou@adacore.com> + + PR ada/64492 + * gcc-interface/Makefile.in (../stamp-tools): Reinstate dropped code. + +2014-11-24 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/trans.c (push_range_check_info): Replace early test + with assertion. + (Raise_Error_to_gnu): Do not call push_range_check_info if the loop + stack is empty. + * gcc-interface/utils.c (convert_to_fat_pointer): Fix formatting. + * gcc-interface/utils2.c (gnat_invariant_expr): Deal with padded types + and revert latest change. + +2014-11-22 Eric Botcazou <ebotcazou@adacore.com> + + Backport from mainline + 2014-11-20 Vincent Celier <celier@adacore.com> + + PR ada/47500 + * back_end.adb (Scan_Back_End_Switches): Skip switch -G and + its argument. + +2014-11-11 Simon Wright <simon@pushface.org> + + PR ada/42978 + * mlib-utl.adb (ar): Output the options passed to ranlib. + +2014-10-30 Release Manager + + * GCC 4.9.2 released. + +2014-10-27 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/decl.c (gnat_to_gnu_entity) <E_Array_Type>: Remove + superfluous computation for the max size. + <E_Array_Subtype>: Likewise. Make sure that the max size calculation + does not overflow at compile time. + +2014-10-13 Eric Botcazou <ebotcazou@adacore.com> + Alan Modra <amodra@gmail.com> + + PR ada/63225 + * uintp.adb (Vector_To_Uint): Move from here to... + * uintp.ads (UI_Vector): Make public. + (Vector_To_Uint): ...here. + 2014-08-12 Joel Sherrill <joel.sherrill@oarcorp.com> * socket.c: For RTEMS, use correct prototype of gethostbyname_r(). diff --git a/gcc-4.9/gcc/ada/back_end.adb b/gcc-4.9/gcc/ada/back_end.adb index bb442ad5e..53146c891 100644 --- a/gcc-4.9/gcc/ada/back_end.adb +++ b/gcc-4.9/gcc/ada/back_end.adb @@ -210,9 +210,10 @@ package body Back_End is Last : constant Natural := Switch_Last (Switch_Chars); begin - -- Skip -o or internal GCC switches together with their argument + -- Skip -o, -G or internal GCC switches together with their argument. if Switch_Chars (First .. Last) = "o" + or else Switch_Chars (First .. Last) = "G" or else Is_Internal_GCC_Switch (Switch_Chars) then Next_Arg := Next_Arg + 1; diff --git a/gcc-4.9/gcc/ada/gcc-interface/Makefile.in b/gcc-4.9/gcc/ada/gcc-interface/Makefile.in index 5c36962ef..e2cc4a9e0 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/Makefile.in +++ b/gcc-4.9/gcc/ada/gcc-interface/Makefile.in @@ -2510,9 +2510,16 @@ GCC_LINK=$(CXX) $(GCC_LINK_FLAGS) $(ADA_INCLUDES) $(LDFLAGS) # Build directory for the tools. Let's copy the target-dependent # sources using the same mechanism as for gnatlib. The other sources are # accessed using the vpath directive below -# Note: dummy target, stamp-tools is mainly handled by gnattools. ../stamp-tools: + -$(RM) tools/* + -$(RMDIR) tools + -$(MKDIR) tools + -(cd tools; $(LN_S) ../sdefault.adb ../snames.ads ../snames.adb .) + -$(foreach PAIR,$(TOOLS_TARGET_PAIRS), \ + $(RM) tools/$(word 1,$(subst <, ,$(PAIR)));\ + $(LN_S) $(fsrcpfx)ada/$(word 2,$(subst <, ,$(PAIR))) \ + tools/$(word 1,$(subst <, ,$(PAIR)));) touch ../stamp-tools # when compiling the tools, the runtime has to be first on the path so that diff --git a/gcc-4.9/gcc/ada/gcc-interface/decl.c b/gcc-4.9/gcc/ada/gcc-interface/decl.c index 52452ce79..95bc778b4 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/decl.c +++ b/gcc-4.9/gcc/ada/gcc-interface/decl.c @@ -2200,11 +2200,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, int definition) tree gnu_max = convert (sizetype, TYPE_MAX_VALUE (gnu_index_type)); tree gnu_this_max - = size_binop (MAX_EXPR, - size_binop (PLUS_EXPR, size_one_node, - size_binop (MINUS_EXPR, - gnu_max, gnu_min)), - size_zero_node); + = size_binop (PLUS_EXPR, size_one_node, + size_binop (MINUS_EXPR, gnu_max, gnu_min)); if (TREE_CODE (gnu_this_max) == INTEGER_CST && TREE_OVERFLOW (gnu_this_max)) @@ -2525,20 +2522,26 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, int definition) gnu_max_size = NULL_TREE; else { - tree gnu_this_max - = size_binop (MAX_EXPR, - size_binop (PLUS_EXPR, size_one_node, - size_binop (MINUS_EXPR, + tree gnu_this_max; + + /* Use int_const_binop if the bounds are constant to + avoid any unwanted overflow. */ + if (TREE_CODE (gnu_base_min) == INTEGER_CST + && TREE_CODE (gnu_base_max) == INTEGER_CST) + gnu_this_max + = int_const_binop (PLUS_EXPR, size_one_node, + int_const_binop (MINUS_EXPR, gnu_base_max, - gnu_base_min)), - size_zero_node); - - if (TREE_CODE (gnu_this_max) == INTEGER_CST - && TREE_OVERFLOW (gnu_this_max)) - gnu_max_size = NULL_TREE; + gnu_base_min)); else - gnu_max_size - = size_binop (MULT_EXPR, gnu_max_size, gnu_this_max); + gnu_this_max + = size_binop (PLUS_EXPR, size_one_node, + size_binop (MINUS_EXPR, + gnu_base_max, + gnu_base_min)); + + gnu_max_size + = size_binop (MULT_EXPR, gnu_max_size, gnu_this_max); } } diff --git a/gcc-4.9/gcc/ada/gcc-interface/trans.c b/gcc-4.9/gcc/ada/gcc-interface/trans.c index 03bf098b0..30a166611 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/trans.c +++ b/gcc-4.9/gcc/ada/gcc-interface/trans.c @@ -2424,9 +2424,6 @@ push_range_check_info (tree var) struct loop_info_d *iter = NULL; unsigned int i; - if (vec_safe_is_empty (gnu_loop_stack)) - return NULL; - var = remove_conversions (var, false); if (TREE_CODE (var) != VAR_DECL) @@ -2435,6 +2432,8 @@ push_range_check_info (tree var) if (decl_function_context (var) != current_function_decl) return NULL; + gcc_assert (vec_safe_length (gnu_loop_stack) > 0); + for (i = vec_safe_length (gnu_loop_stack) - 1; vec_safe_iterate (gnu_loop_stack, i, &iter); i--) @@ -5165,6 +5164,7 @@ Raise_Error_to_gnu (Node_Id gnat_node, tree *gnu_result_type_p) the original checks reinstated, and a run time selection. The former loop will be suitable for vectorization. */ if (flag_unswitch_loops + && !vec_safe_is_empty (gnu_loop_stack) && (!gnu_low_bound || (gnu_low_bound = gnat_invariant_expr (gnu_low_bound))) && (!gnu_high_bound diff --git a/gcc-4.9/gcc/ada/gcc-interface/utils.c b/gcc-4.9/gcc/ada/gcc-interface/utils.c index 15b72366b..820d2cec9 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/utils.c +++ b/gcc-4.9/gcc/ada/gcc-interface/utils.c @@ -4371,8 +4371,7 @@ convert_to_fat_pointer (tree type, tree expr) { /* The template type can still be dummy at this point so we build an empty constructor. The middle-end will fill it in with zeros. */ - t = build_constructor (template_type, - NULL); + t = build_constructor (template_type, NULL); TREE_CONSTANT (t) = TREE_STATIC (t) = 1; null_bounds = build_unary_op (ADDR_EXPR, NULL_TREE, t); SET_TYPE_NULL_BOUNDS (ptr_template_type, null_bounds); diff --git a/gcc-4.9/gcc/ada/gcc-interface/utils2.c b/gcc-4.9/gcc/ada/gcc-interface/utils2.c index dd4151b5b..da52f4185 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/utils2.c +++ b/gcc-4.9/gcc/ada/gcc-interface/utils2.c @@ -2784,7 +2784,13 @@ gnat_invariant_expr (tree expr) || (TREE_CODE (expr) == VAR_DECL && TREE_READONLY (expr))) && decl_function_context (expr) == current_function_decl && DECL_INITIAL (expr)) - expr = remove_conversions (DECL_INITIAL (expr), false); + { + expr = DECL_INITIAL (expr); + /* Look into CONSTRUCTORs built to initialize padded types. */ + if (TYPE_IS_PADDING_P (TREE_TYPE (expr))) + expr = convert (TREE_TYPE (TYPE_FIELDS (TREE_TYPE (expr))), expr); + expr = remove_conversions (expr, false); + } if (TREE_CONSTANT (expr)) return fold_convert (type, expr); @@ -2840,7 +2846,7 @@ object: if (!TREE_READONLY (t)) return NULL_TREE; - if (TREE_CODE (t) == CONSTRUCTOR || TREE_CODE (t) == PARM_DECL) + if (TREE_CODE (t) == PARM_DECL) return fold_convert (type, expr); if (TREE_CODE (t) == VAR_DECL diff --git a/gcc-4.9/gcc/ada/mlib-utl.adb b/gcc-4.9/gcc/ada/mlib-utl.adb index 756add1d4..7e2d56b75 100644 --- a/gcc-4.9/gcc/ada/mlib-utl.adb +++ b/gcc-4.9/gcc/ada/mlib-utl.adb @@ -282,6 +282,10 @@ package body MLib.Utl is if not Opt.Quiet_Output then Write_Str (Ranlib_Name.all); Write_Char (' '); + for J in Ranlib_Options'Range loop + Write_Str (Ranlib_Options (J).all); + Write_Char (' '); + end loop; Write_Line (Arguments (Ar_Options'Length + 1).all); end if; diff --git a/gcc-4.9/gcc/ada/uintp.adb b/gcc-4.9/gcc/ada/uintp.adb index f418b56ce..226c1877f 100644 --- a/gcc-4.9/gcc/ada/uintp.adb +++ b/gcc-4.9/gcc/ada/uintp.adb @@ -171,22 +171,6 @@ package body Uintp is -- If Discard_Quotient is True, Quotient is set to No_Uint -- If Discard_Remainder is True, Remainder is set to No_Uint - function Vector_To_Uint - (In_Vec : UI_Vector; - Negative : Boolean) return Uint; - -- Functions that calculate values in UI_Vectors, call this function to - -- create and return the Uint value. In_Vec contains the multiple precision - -- (Base) representation of a non-negative value. Leading zeroes are - -- permitted. Negative is set if the desired result is the negative of the - -- given value. The result will be either the appropriate directly - -- represented value, or a table entry in the proper canonical format is - -- created and returned. - -- - -- Note that Init_Operand puts a signed value in the result vector, but - -- Vector_To_Uint is always presented with a non-negative value. The - -- processing of signs is something that is done by the caller before - -- calling Vector_To_Uint. - ------------ -- Direct -- ------------ diff --git a/gcc-4.9/gcc/ada/uintp.ads b/gcc-4.9/gcc/ada/uintp.ads index dcf85a07f..d76d28527 100644 --- a/gcc-4.9/gcc/ada/uintp.ads +++ b/gcc-4.9/gcc/ada/uintp.ads @@ -90,6 +90,18 @@ package Uintp is Uint_Minus_80 : constant Uint; Uint_Minus_128 : constant Uint; + type UI_Vector is array (Pos range <>) of Int; + -- Vector containing the integer values of a Uint value + + -- Note: An earlier version of this package used pointers of arrays of Ints + -- (dynamically allocated) for the Uint type. The change leads to a few + -- less natural idioms used throughout this code, but eliminates all uses + -- of the heap except for the table package itself. For example, Uint + -- parameters are often converted to UI_Vectors for internal manipulation. + -- This is done by creating the local UI_Vector using the function N_Digits + -- on the Uint to find the size needed for the vector, and then calling + -- Init_Operand to copy the values out of the table into the vector. + ----------------- -- Subprograms -- ----------------- @@ -252,6 +264,22 @@ package Uintp is -- function is used for capacity checks, and it can be one bit off -- without affecting its usage. + function Vector_To_Uint + (In_Vec : UI_Vector; + Negative : Boolean) return Uint; + -- Functions that calculate values in UI_Vectors, call this function to + -- create and return the Uint value. In_Vec contains the multiple precision + -- (Base) representation of a non-negative value. Leading zeroes are + -- permitted. Negative is set if the desired result is the negative of the + -- given value. The result will be either the appropriate directly + -- represented value, or a table entry in the proper canonical format is + -- created and returned. + -- + -- Note that Init_Operand puts a signed value in the result vector, but + -- Vector_To_Uint is always presented with a non-negative value. The + -- processing of signs is something that is done by the caller before + -- calling Vector_To_Uint. + --------------------- -- Output Routines -- --------------------- @@ -494,18 +522,6 @@ private -- UI_Vector is defined for this purpose and some internal subprograms -- used for converting from one to the other are defined. - type UI_Vector is array (Pos range <>) of Int; - -- Vector containing the integer values of a Uint value - - -- Note: An earlier version of this package used pointers of arrays of Ints - -- (dynamically allocated) for the Uint type. The change leads to a few - -- less natural idioms used throughout this code, but eliminates all uses - -- of the heap except for the table package itself. For example, Uint - -- parameters are often converted to UI_Vectors for internal manipulation. - -- This is done by creating the local UI_Vector using the function N_Digits - -- on the Uint to find the size needed for the vector, and then calling - -- Init_Operand to copy the values out of the table into the vector. - type Uint_Entry is record Length : Pos; -- Length of entry in Udigits table in digits (i.e. in words) diff --git a/gcc-4.9/gcc/alias.c b/gcc-4.9/gcc/alias.c index e5406123c..b81ea28c6 100644 --- a/gcc-4.9/gcc/alias.c +++ b/gcc-4.9/gcc/alias.c @@ -382,17 +382,9 @@ get_alias_set_entry (alias_set_type alias_set) static inline int mems_in_disjoint_alias_sets_p (const_rtx mem1, const_rtx mem2) { -/* Perform a basic sanity check. Namely, that there are no alias sets - if we're not using strict aliasing. This helps to catch bugs - whereby someone uses PUT_CODE, but doesn't clear MEM_ALIAS_SET, or - where a MEM is allocated in some way other than by the use of - gen_rtx_MEM, and the MEM_ALIAS_SET is not cleared. If we begin to - use alias sets to indicate that spilled registers cannot alias each - other, we might need to remove this check. */ - gcc_assert (flag_strict_aliasing - || (!MEM_ALIAS_SET (mem1) && !MEM_ALIAS_SET (mem2))); - - return ! alias_sets_conflict_p (MEM_ALIAS_SET (mem1), MEM_ALIAS_SET (mem2)); + return (flag_strict_aliasing + && ! alias_sets_conflict_p (MEM_ALIAS_SET (mem1), + MEM_ALIAS_SET (mem2))); } /* Insert the NODE into the splay tree given by DATA. Used by @@ -2454,6 +2446,7 @@ static int true_dependence_1 (const_rtx mem, enum machine_mode mem_mode, rtx mem_addr, const_rtx x, rtx x_addr, bool mem_canonicalized) { + rtx true_mem_addr; rtx base; int ret; @@ -2473,17 +2466,9 @@ true_dependence_1 (const_rtx mem, enum machine_mode mem_mode, rtx mem_addr, || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER) return 1; - /* Read-only memory is by definition never modified, and therefore can't - conflict with anything. We don't expect to find read-only set on MEM, - but stupid user tricks can produce them, so don't die. */ - if (MEM_READONLY_P (x)) - return 0; - - /* If we have MEMs referring to different address spaces (which can - potentially overlap), we cannot easily tell from the addresses - whether the references overlap. */ - if (MEM_ADDR_SPACE (mem) != MEM_ADDR_SPACE (x)) - return 1; + if (! x_addr) + x_addr = XEXP (x, 0); + x_addr = get_addr (x_addr); if (! mem_addr) { @@ -2491,22 +2476,23 @@ true_dependence_1 (const_rtx mem, enum machine_mode mem_mode, rtx mem_addr, if (mem_mode == VOIDmode) mem_mode = GET_MODE (mem); } + true_mem_addr = get_addr (mem_addr); - if (! x_addr) - { - x_addr = XEXP (x, 0); - if (!((GET_CODE (x_addr) == VALUE - && GET_CODE (mem_addr) != VALUE - && reg_mentioned_p (x_addr, mem_addr)) - || (GET_CODE (x_addr) != VALUE - && GET_CODE (mem_addr) == VALUE - && reg_mentioned_p (mem_addr, x_addr)))) - { - x_addr = get_addr (x_addr); - if (! mem_canonicalized) - mem_addr = get_addr (mem_addr); - } - } + /* Read-only memory is by definition never modified, and therefore can't + conflict with anything. However, don't assume anything when AND + addresses are involved and leave to the code below to determine + dependence. We don't expect to find read-only set on MEM, but + stupid user tricks can produce them, so don't die. */ + if (MEM_READONLY_P (x) + && GET_CODE (x_addr) != AND + && GET_CODE (true_mem_addr) != AND) + return 0; + + /* If we have MEMs referring to different address spaces (which can + potentially overlap), we cannot easily tell from the addresses + whether the references overlap. */ + if (MEM_ADDR_SPACE (mem) != MEM_ADDR_SPACE (x)) + return 1; base = find_base_term (x_addr); if (base && (GET_CODE (base) == LABEL_REF @@ -2514,14 +2500,14 @@ true_dependence_1 (const_rtx mem, enum machine_mode mem_mode, rtx mem_addr, && CONSTANT_POOL_ADDRESS_P (base)))) return 0; - rtx mem_base = find_base_term (mem_addr); - if (! base_alias_check (x_addr, base, mem_addr, mem_base, + rtx mem_base = find_base_term (true_mem_addr); + if (! base_alias_check (x_addr, base, true_mem_addr, mem_base, GET_MODE (x), mem_mode)) return 0; x_addr = canon_rtx (x_addr); if (!mem_canonicalized) - mem_addr = canon_rtx (mem_addr); + mem_addr = canon_rtx (true_mem_addr); if ((ret = memrefs_conflict_p (GET_MODE_SIZE (mem_mode), mem_addr, SIZE_FOR_MODE (x), x_addr, 0)) != -1) @@ -2571,6 +2557,7 @@ write_dependence_p (const_rtx mem, bool mem_canonicalized, bool x_canonicalized, bool writep) { rtx mem_addr; + rtx true_mem_addr, true_x_addr; rtx base; int ret; @@ -2591,8 +2578,20 @@ write_dependence_p (const_rtx mem, || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER) return 1; - /* A read from read-only memory can't conflict with read-write memory. */ - if (!writep && MEM_READONLY_P (mem)) + if (!x_addr) + x_addr = XEXP (x, 0); + true_x_addr = get_addr (x_addr); + + mem_addr = XEXP (mem, 0); + true_mem_addr = get_addr (mem_addr); + + /* A read from read-only memory can't conflict with read-write memory. + Don't assume anything when AND addresses are involved and leave to + the code below to determine dependence. */ + if (!writep + && MEM_READONLY_P (mem) + && GET_CODE (true_x_addr) != AND + && GET_CODE (true_mem_addr) != AND) return 0; /* If we have MEMs referring to different address spaces (which can @@ -2601,24 +2600,7 @@ write_dependence_p (const_rtx mem, if (MEM_ADDR_SPACE (mem) != MEM_ADDR_SPACE (x)) return 1; - mem_addr = XEXP (mem, 0); - if (!x_addr) - { - x_addr = XEXP (x, 0); - if (!((GET_CODE (x_addr) == VALUE - && GET_CODE (mem_addr) != VALUE - && reg_mentioned_p (x_addr, mem_addr)) - || (GET_CODE (x_addr) != VALUE - && GET_CODE (mem_addr) == VALUE - && reg_mentioned_p (mem_addr, x_addr)))) - { - x_addr = get_addr (x_addr); - if (!mem_canonicalized) - mem_addr = get_addr (mem_addr); - } - } - - base = find_base_term (mem_addr); + base = find_base_term (true_mem_addr); if (! writep && base && (GET_CODE (base) == LABEL_REF @@ -2626,18 +2608,18 @@ write_dependence_p (const_rtx mem, && CONSTANT_POOL_ADDRESS_P (base)))) return 0; - rtx x_base = find_base_term (x_addr); - if (! base_alias_check (x_addr, x_base, mem_addr, base, GET_MODE (x), - GET_MODE (mem))) + rtx x_base = find_base_term (true_x_addr); + if (! base_alias_check (true_x_addr, x_base, true_mem_addr, base, + GET_MODE (x), GET_MODE (mem))) return 0; if (!x_canonicalized) { - x_addr = canon_rtx (x_addr); + x_addr = canon_rtx (true_x_addr); x_mode = GET_MODE (x); } if (!mem_canonicalized) - mem_addr = canon_rtx (mem_addr); + mem_addr = canon_rtx (true_mem_addr); if ((ret = memrefs_conflict_p (SIZE_FOR_MODE (mem), mem_addr, GET_MODE_SIZE (x_mode), x_addr, 0)) != -1) @@ -2705,10 +2687,20 @@ may_alias_p (const_rtx mem, const_rtx x) || MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER) return 1; + x_addr = XEXP (x, 0); + x_addr = get_addr (x_addr); + + mem_addr = XEXP (mem, 0); + mem_addr = get_addr (mem_addr); + /* Read-only memory is by definition never modified, and therefore can't - conflict with anything. We don't expect to find read-only set on MEM, - but stupid user tricks can produce them, so don't die. */ - if (MEM_READONLY_P (x)) + conflict with anything. However, don't assume anything when AND + addresses are involved and leave to the code below to determine + dependence. We don't expect to find read-only set on MEM, but + stupid user tricks can produce them, so don't die. */ + if (MEM_READONLY_P (x) + && GET_CODE (x_addr) != AND + && GET_CODE (mem_addr) != AND) return 0; /* If we have MEMs referring to different address spaces (which can @@ -2717,28 +2709,12 @@ may_alias_p (const_rtx mem, const_rtx x) if (MEM_ADDR_SPACE (mem) != MEM_ADDR_SPACE (x)) return 1; - x_addr = XEXP (x, 0); - mem_addr = XEXP (mem, 0); - if (!((GET_CODE (x_addr) == VALUE - && GET_CODE (mem_addr) != VALUE - && reg_mentioned_p (x_addr, mem_addr)) - || (GET_CODE (x_addr) != VALUE - && GET_CODE (mem_addr) == VALUE - && reg_mentioned_p (mem_addr, x_addr)))) - { - x_addr = get_addr (x_addr); - mem_addr = get_addr (mem_addr); - } - rtx x_base = find_base_term (x_addr); rtx mem_base = find_base_term (mem_addr); if (! base_alias_check (x_addr, x_base, mem_addr, mem_base, GET_MODE (x), GET_MODE (mem_addr))) return 0; - x_addr = canon_rtx (x_addr); - mem_addr = canon_rtx (mem_addr); - if (nonoverlapping_memrefs_p (mem, x, true)) return 0; diff --git a/gcc-4.9/gcc/asan.c b/gcc-4.9/gcc/asan.c index 28a476fe4..f6c42a1d0 100644 --- a/gcc-4.9/gcc/asan.c +++ b/gcc-4.9/gcc/asan.c @@ -242,6 +242,17 @@ static GTY(()) tree shadow_ptr_types[2]; /* Decl for __asan_option_detect_stack_use_after_return. */ static GTY(()) tree asan_detect_stack_use_after_return; +/* Various flags for Asan builtins. */ +enum asan_check_flags +{ + ASAN_CHECK_STORE = 1 << 0, + ASAN_CHECK_SCALAR_ACCESS = 1 << 1, + ASAN_CHECK_NON_ZERO_LEN = 1 << 2, + ASAN_CHECK_START_INSTRUMENTED = 1 << 3, + ASAN_CHECK_END_INSTRUMENTED = 1 << 4, + ASAN_CHECK_LAST = 1 << 5 +}; + /* Hashtable support for memory references used by gimple statements. */ @@ -251,8 +262,8 @@ struct asan_mem_ref /* The expression of the beginning of the memory region. */ tree start; - /* The size of the access (can be 1, 2, 4, 8, 16 for now). */ - char access_size; + /* The size of the access. */ + HOST_WIDE_INT access_size; }; static alloc_pool asan_mem_ref_alloc_pool; @@ -274,7 +285,7 @@ asan_mem_ref_get_alloc_pool () /* Initializes an instance of asan_mem_ref. */ static void -asan_mem_ref_init (asan_mem_ref *ref, tree start, char access_size) +asan_mem_ref_init (asan_mem_ref *ref, tree start, HOST_WIDE_INT access_size) { ref->start = start; ref->access_size = access_size; @@ -287,7 +298,7 @@ asan_mem_ref_init (asan_mem_ref *ref, tree start, char access_size) access to the referenced memory. */ static asan_mem_ref* -asan_mem_ref_new (tree start, char access_size) +asan_mem_ref_new (tree start, HOST_WIDE_INT access_size) { asan_mem_ref *ref = (asan_mem_ref *) pool_alloc (asan_mem_ref_get_alloc_pool ()); @@ -305,6 +316,9 @@ asan_mem_ref_get_end (tree start, tree len) if (len == NULL_TREE || integer_zerop (len)) return start; + if (!ptrofftype_p (len)) + len = convert_to_ptrofftype (len); + return fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (start), start, len); } @@ -334,7 +348,7 @@ inline hashval_t asan_mem_ref_hasher::hash (const asan_mem_ref *mem_ref) { hashval_t h = iterative_hash_expr (mem_ref->start, 0); - h = iterative_hash_hashval_t (h, mem_ref->access_size); + h = iterative_hash_host_wide_int (mem_ref->access_size, h); return h; } @@ -392,7 +406,7 @@ free_mem_ref_resources () /* Return true iff the memory reference REF has been instrumented. */ static bool -has_mem_ref_been_instrumented (tree ref, char access_size) +has_mem_ref_been_instrumented (tree ref, HOST_WIDE_INT access_size) { asan_mem_ref r; asan_mem_ref_init (&r, ref, access_size); @@ -480,7 +494,7 @@ get_mem_refs_of_builtin_call (const gimple call, tree source0 = NULL_TREE, source1 = NULL_TREE, dest = NULL_TREE, len = NULL_TREE; bool is_store = true, got_reference_p = false; - char access_size = 1; + HOST_WIDE_INT access_size = 1; switch (DECL_FUNCTION_CODE (callee)) { @@ -842,7 +856,7 @@ has_stmt_been_instrumented_p (gimple stmt) /* Insert a memory reference into the hash table. */ static void -update_mem_ref_hash_table (tree ref, char access_size) +update_mem_ref_hash_table (tree ref, HOST_WIDE_INT access_size) { hash_table <asan_mem_ref_hasher> ht = get_mem_ref_hash_table (); @@ -929,7 +943,7 @@ asan_clear_shadow (rtx shadow_mem, HOST_WIDE_INT len) emit_move_insn (shadow_mem, const0_rtx); tmp = expand_simple_binop (Pmode, PLUS, addr, gen_int_mode (4, Pmode), addr, - true, OPTAB_LIB_WIDEN); + true, OPTAB_LIB_WIDEN); if (tmp != addr) emit_move_insn (addr, tmp); emit_cmp_and_jump_insns (addr, end, LT, NULL_RTX, Pmode, true, top_label); @@ -944,7 +958,7 @@ asan_function_start (void) section *fnsec = function_section (current_function_decl); switch_to_section (fnsec); ASM_OUTPUT_DEBUG_LABEL (asm_out_file, "LASANPC", - current_function_funcdef_no); + current_function_funcdef_no); } /* Insert code to protect stack vars. The prologue sequence should be emitted @@ -1009,7 +1023,7 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb, { use_after_return_class = floor_log2 (asan_frame_size - 1) - 5; /* __asan_stack_malloc_N guarantees alignment - N < 6 ? (64 << N) : 4096 bytes. */ + N < 6 ? (64 << N) : 4096 bytes. */ if (alignb > (use_after_return_class < 6 ? (64U << use_after_return_class) : 4096U)) use_after_return_class = -1; @@ -1082,7 +1096,7 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb, ASM_GENERATE_INTERNAL_LABEL (buf, "LASANPC", current_function_funcdef_no); id = get_identifier (buf); decl = build_decl (DECL_SOURCE_LOCATION (current_function_decl), - VAR_DECL, id, char_type_node); + VAR_DECL, id, char_type_node); SET_DECL_ASSEMBLER_NAME (decl, id); TREE_ADDRESSABLE (decl) = 1; TREE_READONLY (decl) = 1; @@ -1315,23 +1329,50 @@ asan_protect_global (tree decl) return true; } -/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}. - IS_STORE is either 1 (for a store) or 0 (for a load). - SIZE_IN_BYTES is one of 1, 2, 4, 8, 16. */ +/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16,_n}. + IS_STORE is either 1 (for a store) or 0 (for a load). */ static tree -report_error_func (bool is_store, int size_in_bytes) +report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes, int *nargs) { - static enum built_in_function report[2][5] + static enum built_in_function report[2][6] = { { BUILT_IN_ASAN_REPORT_LOAD1, BUILT_IN_ASAN_REPORT_LOAD2, BUILT_IN_ASAN_REPORT_LOAD4, BUILT_IN_ASAN_REPORT_LOAD8, - BUILT_IN_ASAN_REPORT_LOAD16 }, + BUILT_IN_ASAN_REPORT_LOAD16, BUILT_IN_ASAN_REPORT_LOAD_N }, { BUILT_IN_ASAN_REPORT_STORE1, BUILT_IN_ASAN_REPORT_STORE2, BUILT_IN_ASAN_REPORT_STORE4, BUILT_IN_ASAN_REPORT_STORE8, - BUILT_IN_ASAN_REPORT_STORE16 } }; + BUILT_IN_ASAN_REPORT_STORE16, BUILT_IN_ASAN_REPORT_STORE_N } }; + if (size_in_bytes == -1) + { + *nargs = 2; + return builtin_decl_implicit (report[is_store][5]); + } + *nargs = 1; return builtin_decl_implicit (report[is_store][exact_log2 (size_in_bytes)]); } +/* Construct a function tree for __asan_{load,store}{1,2,4,8,16,_n}. + IS_STORE is either 1 (for a store) or 0 (for a load). */ + +static tree +check_func (bool is_store, int size_in_bytes, int *nargs) +{ + static enum built_in_function check[2][6] + = { { BUILT_IN_ASAN_LOAD1, BUILT_IN_ASAN_LOAD2, + BUILT_IN_ASAN_LOAD4, BUILT_IN_ASAN_LOAD8, + BUILT_IN_ASAN_LOAD16, BUILT_IN_ASAN_LOADN }, + { BUILT_IN_ASAN_STORE1, BUILT_IN_ASAN_STORE2, + BUILT_IN_ASAN_STORE4, BUILT_IN_ASAN_STORE8, + BUILT_IN_ASAN_STORE16, BUILT_IN_ASAN_STOREN } }; + if (size_in_bytes == -1) + { + *nargs = 2; + return builtin_decl_implicit (check[is_store][5]); + } + *nargs = 1; + return builtin_decl_implicit (check[is_store][exact_log2 (size_in_bytes)]); +} + /* Split the current basic block and create a condition statement insertion point right before or after the statement pointed to by ITER. Return an iterator to the point at which the caller might @@ -1450,85 +1491,36 @@ insert_if_then_before_iter (gimple cond, gsi_insert_after (&cond_insert_point, cond, GSI_NEW_STMT); } -/* Instrument the memory access instruction BASE. Insert new - statements before or after ITER. - - Note that the memory access represented by BASE can be either an - SSA_NAME, or a non-SSA expression. LOCATION is the source code - location. IS_STORE is TRUE for a store, FALSE for a load. - BEFORE_P is TRUE for inserting the instrumentation code before - ITER, FALSE for inserting it after ITER. SIZE_IN_BYTES is one of - 1, 2, 4, 8, 16. - - If BEFORE_P is TRUE, *ITER is arranged to still point to the - statement it was pointing to prior to calling this function, - otherwise, it points to the statement logically following it. */ +/* Build + (base_addr >> ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset (). */ -static void -build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter, - bool before_p, bool is_store, int size_in_bytes) +static tree +build_shadow_mem_access (gimple_stmt_iterator *gsi, location_t location, + tree base_addr, tree shadow_ptr_type) { - gimple_stmt_iterator gsi; - basic_block then_bb, else_bb; - tree t, base_addr, shadow; - gimple g; - tree shadow_ptr_type = shadow_ptr_types[size_in_bytes == 16 ? 1 : 0]; + tree t, uintptr_type = TREE_TYPE (base_addr); tree shadow_type = TREE_TYPE (shadow_ptr_type); - tree uintptr_type - = build_nonstandard_integer_type (TYPE_PRECISION (TREE_TYPE (base)), 1); - tree base_ssa = base; - - /* Get an iterator on the point where we can add the condition - statement for the instrumentation. */ - gsi = create_cond_insert_point (iter, before_p, - /*then_more_likely_p=*/false, - /*create_then_fallthru_edge=*/false, - &then_bb, - &else_bb); - - base = unshare_expr (base); - - /* BASE can already be an SSA_NAME; in that case, do not create a - new SSA_NAME for it. */ - if (TREE_CODE (base) != SSA_NAME) - { - g = gimple_build_assign_with_ops (TREE_CODE (base), - make_ssa_name (TREE_TYPE (base), NULL), - base, NULL_TREE); - gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - base_ssa = gimple_assign_lhs (g); - } - - g = gimple_build_assign_with_ops (NOP_EXPR, - make_ssa_name (uintptr_type, NULL), - base_ssa, NULL_TREE); - gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - base_addr = gimple_assign_lhs (g); - - /* Build - (base_addr >> ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset (). */ + gimple g; t = build_int_cst (uintptr_type, ASAN_SHADOW_SHIFT); g = gimple_build_assign_with_ops (RSHIFT_EXPR, make_ssa_name (uintptr_type, NULL), base_addr, t); gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); + gsi_insert_after (gsi, g, GSI_NEW_STMT); t = build_int_cst (uintptr_type, targetm.asan_shadow_offset ()); g = gimple_build_assign_with_ops (PLUS_EXPR, make_ssa_name (uintptr_type, NULL), gimple_assign_lhs (g), t); gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); + gsi_insert_after (gsi, g, GSI_NEW_STMT); g = gimple_build_assign_with_ops (NOP_EXPR, make_ssa_name (shadow_ptr_type, NULL), gimple_assign_lhs (g), NULL_TREE); gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); + gsi_insert_after (gsi, g, GSI_NEW_STMT); t = build2 (MEM_REF, shadow_type, gimple_assign_lhs (g), build_int_cst (shadow_ptr_type, 0)); @@ -1536,48 +1528,150 @@ build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter, make_ssa_name (shadow_type, NULL), t, NULL_TREE); gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - shadow = gimple_assign_lhs (g); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + return gimple_assign_lhs (g); +} + +/* BASE can already be an SSA_NAME; in that case, do not create a + new SSA_NAME for it. */ + +static tree +maybe_create_ssa_name (location_t loc, tree base, gimple_stmt_iterator *iter, + bool before_p) +{ + if (TREE_CODE (base) == SSA_NAME) + return base; + gimple g + = gimple_build_assign_with_ops (TREE_CODE (base), + make_ssa_name (TREE_TYPE (base), NULL), + base, NULL_TREE); + gimple_set_location (g, loc); + if (before_p) + gsi_insert_before (iter, g, GSI_SAME_STMT); + else + gsi_insert_after (iter, g, GSI_NEW_STMT); + return gimple_assign_lhs (g); +} + +/* LEN can already have necessary size and precision; + in that case, do not create a new variable. */ + +tree +maybe_cast_to_ptrmode (location_t loc, tree len, gimple_stmt_iterator *iter, + bool before_p) +{ + if (ptrofftype_p (len)) + return len; + gimple g + = gimple_build_assign_with_ops (NOP_EXPR, + make_ssa_name (pointer_sized_int_node, NULL), + len, NULL); + gimple_set_location (g, loc); + if (before_p) + gsi_insert_before (iter, g, GSI_SAME_STMT); + else + gsi_insert_after (iter, g, GSI_NEW_STMT); + return gimple_assign_lhs (g); +} + +/* Instrument the memory access instruction BASE. Insert new + statements before or after ITER. - if (size_in_bytes < 8) + Note that the memory access represented by BASE can be either an + SSA_NAME, or a non-SSA expression. LOCATION is the source code + location. IS_STORE is TRUE for a store, FALSE for a load. + BEFORE_P is TRUE for inserting the instrumentation code before + ITER, FALSE for inserting it after ITER. IS_SCALAR_ACCESS is TRUE + for a scalar memory access and FALSE for memory region access. + NON_ZERO_P is TRUE if memory region is guaranteed to have non-zero + length. ALIGN tells alignment of accessed memory object. + + START_INSTRUMENTED and END_INSTRUMENTED are TRUE if start/end of + memory region have already been instrumented. + + If BEFORE_P is TRUE, *ITER is arranged to still point to the + statement it was pointing to prior to calling this function, + otherwise, it points to the statement logically following it. */ + +static void +build_check_stmt (location_t loc, tree base, tree len, + HOST_WIDE_INT size_in_bytes, gimple_stmt_iterator *iter, + bool is_non_zero_len, bool before_p, bool is_store, + bool is_scalar_access, unsigned int align = 0, + bool start_instrumented = false, + bool end_instrumented = false) +{ + gimple_stmt_iterator gsi = *iter; + gimple g; + + gcc_assert (!(size_in_bytes > 0 && !is_non_zero_len)); + + if (start_instrumented && end_instrumented) { - /* Slow path for 1, 2 and 4 byte accesses. - Test (shadow != 0) - & ((base_addr & 7) + (size_in_bytes - 1)) >= shadow). */ - gimple_seq seq = NULL; - gimple shadow_test = build_assign (NE_EXPR, shadow, 0); - gimple_seq_add_stmt (&seq, shadow_test); - gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, base_addr, 7)); - gimple_seq_add_stmt (&seq, build_type_cast (shadow_type, - gimple_seq_last (seq))); - if (size_in_bytes > 1) - gimple_seq_add_stmt (&seq, - build_assign (PLUS_EXPR, gimple_seq_last (seq), - size_in_bytes - 1)); - gimple_seq_add_stmt (&seq, build_assign (GE_EXPR, gimple_seq_last (seq), - shadow)); - gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, shadow_test, - gimple_seq_last (seq))); - t = gimple_assign_lhs (gimple_seq_last (seq)); - gimple_seq_set_location (seq, location); - gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + if (!before_p) + gsi_next (iter); + return; } - else - t = shadow; - g = gimple_build_cond (NE_EXPR, t, build_int_cst (TREE_TYPE (t), 0), - NULL_TREE, NULL_TREE); - gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); + gsi = *iter; - /* Generate call to the run-time library (e.g. __asan_report_load8). */ - gsi = gsi_start_bb (then_bb); - g = gimple_build_call (report_error_func (is_store, size_in_bytes), - 1, base_addr); - gimple_set_location (g, location); - gsi_insert_after (&gsi, g, GSI_NEW_STMT); + base = unshare_expr (base); + base = maybe_create_ssa_name (loc, base, &gsi, before_p); - *iter = gsi_start_bb (else_bb); + if (len) + { + len = unshare_expr (len); + len = maybe_cast_to_ptrmode (loc, len, iter, before_p); + } + else + { + gcc_assert (size_in_bytes != -1); + len = build_int_cst (pointer_sized_int_node, size_in_bytes); + } + + if (size_in_bytes > 1) + { + if ((size_in_bytes & (size_in_bytes - 1)) != 0 + || size_in_bytes > 16) + is_scalar_access = false; + else if (align && align < size_in_bytes * BITS_PER_UNIT) + { + /* On non-strict alignment targets, if + 16-byte access is just 8-byte aligned, + this will result in misaligned shadow + memory 2 byte load, but otherwise can + be handled using one read. */ + if (size_in_bytes != 16 + || STRICT_ALIGNMENT + || align < 8 * BITS_PER_UNIT) + is_scalar_access = false; + } + } + + HOST_WIDE_INT flags = 0; + if (is_store) + flags |= ASAN_CHECK_STORE; + if (is_non_zero_len) + flags |= ASAN_CHECK_NON_ZERO_LEN; + if (is_scalar_access) + flags |= ASAN_CHECK_SCALAR_ACCESS; + if (start_instrumented) + flags |= ASAN_CHECK_START_INSTRUMENTED; + if (end_instrumented) + flags |= ASAN_CHECK_END_INSTRUMENTED; + + g = gimple_build_call_internal (IFN_ASAN_CHECK, 3, + build_int_cst (integer_type_node, flags), + base, len); + gimple_set_location (g, loc); + if (before_p) + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + else + { + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + gsi_next (&gsi); + *iter = gsi; + } } /* If T represents a memory access, add instrumentation code before ITER. @@ -1611,8 +1705,7 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, } size_in_bytes = int_size_in_bytes (type); - if ((size_in_bytes & (size_in_bytes - 1)) != 0 - || (unsigned HOST_WIDE_INT) size_in_bytes - 1 >= 16) + if (size_in_bytes <= 0) return; HOST_WIDE_INT bitsize, bitpos; @@ -1621,20 +1714,21 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, int volatilep = 0, unsignedp = 0; tree inner = get_inner_reference (t, &bitsize, &bitpos, &offset, &mode, &unsignedp, &volatilep, false); - if (bitpos % (size_in_bytes * BITS_PER_UNIT) - || bitsize != size_in_bytes * BITS_PER_UNIT) + + if (TREE_CODE (t) == COMPONENT_REF + && DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (t, 1)) != NULL_TREE) { - if (TREE_CODE (t) == COMPONENT_REF - && DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (t, 1)) != NULL_TREE) - { - tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (t, 1)); - instrument_derefs (iter, build3 (COMPONENT_REF, TREE_TYPE (repr), - TREE_OPERAND (t, 0), repr, - NULL_TREE), location, is_store); - } + tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (t, 1)); + instrument_derefs (iter, build3 (COMPONENT_REF, TREE_TYPE (repr), + TREE_OPERAND (t, 0), repr, + NULL_TREE), location, is_store); return; } + if (bitpos % BITS_PER_UNIT + || bitsize != size_in_bytes * BITS_PER_UNIT) + return; + if (TREE_CODE (inner) == VAR_DECL && offset == NULL_TREE && bitpos >= 0 @@ -1666,8 +1760,10 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, base = build_fold_addr_expr (t); if (!has_mem_ref_been_instrumented (base, size_in_bytes)) { - build_check_stmt (location, base, iter, /*before_p=*/true, - is_store, size_in_bytes); + unsigned int align = get_object_alignment (t); + build_check_stmt (location, base, NULL_TREE, size_in_bytes, iter, + /*is_non_zero_len*/size_in_bytes > 0, /*before_p=*/true, + is_store, /*is_scalar_access*/true, align); update_mem_ref_hash_table (base, size_in_bytes); update_mem_ref_hash_table (t, size_in_bytes); } @@ -1692,142 +1788,24 @@ instrument_mem_region_access (tree base, tree len, || integer_zerop (len)) return; - gimple_stmt_iterator gsi = *iter; - - basic_block fallthrough_bb = NULL, then_bb = NULL; - /* If the beginning of the memory region has already been instrumented, do not instrument it. */ bool start_instrumented = has_mem_ref_been_instrumented (base, 1); /* If the end of the memory region has already been instrumented, do - not instrument it. */ + not instrument it. */ tree end = asan_mem_ref_get_end (base, len); bool end_instrumented = has_mem_ref_been_instrumented (end, 1); - if (start_instrumented && end_instrumented) - return; - - if (!is_gimple_constant (len)) - { - /* So, the length of the memory area to asan-protect is - non-constant. Let's guard the generated instrumentation code - like: - - if (len != 0) - { - //asan instrumentation code goes here. - } - // falltrough instructions, starting with *ITER. */ + HOST_WIDE_INT size_in_bytes = tree_fits_shwi_p (len) ? tree_to_shwi (len) : -1; - gimple g = gimple_build_cond (NE_EXPR, - len, - build_int_cst (TREE_TYPE (len), 0), - NULL_TREE, NULL_TREE); - gimple_set_location (g, location); - insert_if_then_before_iter (g, iter, /*then_more_likely_p=*/true, - &then_bb, &fallthrough_bb); - /* Note that fallthrough_bb starts with the statement that was - pointed to by ITER. */ - - /* The 'then block' of the 'if (len != 0) condition is where - we'll generate the asan instrumentation code now. */ - gsi = gsi_last_bb (then_bb); - } - - if (!start_instrumented) - { - /* Instrument the beginning of the memory region to be accessed, - and arrange for the rest of the intrumentation code to be - inserted in the then block *after* the current gsi. */ - build_check_stmt (location, base, &gsi, /*before_p=*/true, is_store, 1); - - if (then_bb) - /* We are in the case where the length of the region is not - constant; so instrumentation code is being generated in the - 'then block' of the 'if (len != 0) condition. Let's arrange - for the subsequent instrumentation statements to go in the - 'then block'. */ - gsi = gsi_last_bb (then_bb); - else - { - *iter = gsi; - /* Don't remember this access as instrumented, if length - is unknown. It might be zero and not being actually - instrumented, so we can't rely on it being instrumented. */ - update_mem_ref_hash_table (base, 1); - } - } - - if (end_instrumented) - return; - - /* We want to instrument the access at the end of the memory region, - which is at (base + len - 1). */ - - /* offset = len - 1; */ - len = unshare_expr (len); - tree offset; - gimple_seq seq = NULL; - if (TREE_CODE (len) == INTEGER_CST) - offset = fold_build2 (MINUS_EXPR, size_type_node, - fold_convert (size_type_node, len), - build_int_cst (size_type_node, 1)); - else - { - gimple g; - tree t; - - if (TREE_CODE (len) != SSA_NAME) - { - t = make_ssa_name (TREE_TYPE (len), NULL); - g = gimple_build_assign_with_ops (TREE_CODE (len), t, len, NULL); - gimple_set_location (g, location); - gimple_seq_add_stmt_without_update (&seq, g); - len = t; - } - if (!useless_type_conversion_p (size_type_node, TREE_TYPE (len))) - { - t = make_ssa_name (size_type_node, NULL); - g = gimple_build_assign_with_ops (NOP_EXPR, t, len, NULL); - gimple_set_location (g, location); - gimple_seq_add_stmt_without_update (&seq, g); - len = t; - } - - t = make_ssa_name (size_type_node, NULL); - g = gimple_build_assign_with_ops (MINUS_EXPR, t, len, - build_int_cst (size_type_node, 1)); - gimple_set_location (g, location); - gimple_seq_add_stmt_without_update (&seq, g); - offset = gimple_assign_lhs (g); - } + build_check_stmt (location, base, len, size_in_bytes, iter, + /*is_non_zero_len*/size_in_bytes > 0, /*before_p*/true, + is_store, /*is_scalar_access*/false, /*align*/0, + start_instrumented, end_instrumented); - /* _1 = base; */ - base = unshare_expr (base); - gimple region_end = - gimple_build_assign_with_ops (TREE_CODE (base), - make_ssa_name (TREE_TYPE (base), NULL), - base, NULL); - gimple_set_location (region_end, location); - gimple_seq_add_stmt_without_update (&seq, region_end); - - /* _2 = _1 + offset; */ - region_end = - gimple_build_assign_with_ops (POINTER_PLUS_EXPR, - make_ssa_name (TREE_TYPE (base), NULL), - gimple_assign_lhs (region_end), - offset); - gimple_set_location (region_end, location); - gimple_seq_add_stmt_without_update (&seq, region_end); - gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); - - /* instrument access at _2; */ - gsi = gsi_for_stmt (region_end); - build_check_stmt (location, gimple_assign_lhs (region_end), - &gsi, /*before_p=*/false, is_store, 1); - - if (then_bb == NULL) + update_mem_ref_hash_table (base, 1); + if (size_in_bytes != -1) update_mem_ref_hash_table (end, 1); *iter = gsi_for_stmt (gsi_stmt (*iter)); @@ -1850,6 +1828,7 @@ instrument_mem_region_access (tree base, tree len, static bool instrument_strlen_call (gimple_stmt_iterator *iter) { + gimple g; gimple call = gsi_stmt (*iter); gcc_assert (is_gimple_call (call)); @@ -1858,6 +1837,8 @@ instrument_strlen_call (gimple_stmt_iterator *iter) && DECL_BUILT_IN_CLASS (callee) == BUILT_IN_NORMAL && DECL_FUNCTION_CODE (callee) == BUILT_IN_STRLEN); + location_t loc = gimple_location (call); + tree len = gimple_call_lhs (call); if (len == NULL) /* Some passes might clear the return value of the strlen call; @@ -1866,50 +1847,35 @@ instrument_strlen_call (gimple_stmt_iterator *iter) return false; gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (len))); - location_t loc = gimple_location (call); - tree str_arg = gimple_call_arg (call, 0); + len = maybe_cast_to_ptrmode (loc, len, iter, /*before_p*/false); - /* Instrument the access to the first byte of str_arg. i.e: + tree str_arg = gimple_call_arg (call, 0); + bool start_instrumented = has_mem_ref_been_instrumented (str_arg, 1); - _1 = str_arg; instrument (_1); */ tree cptr_type = build_pointer_type (char_type_node); - gimple str_arg_ssa = - gimple_build_assign_with_ops (NOP_EXPR, - make_ssa_name (cptr_type, NULL), - str_arg, NULL); - gimple_set_location (str_arg_ssa, loc); - gimple_stmt_iterator gsi = *iter; - gsi_insert_before (&gsi, str_arg_ssa, GSI_NEW_STMT); - build_check_stmt (loc, gimple_assign_lhs (str_arg_ssa), &gsi, - /*before_p=*/false, /*is_store=*/false, 1); - - /* If we initially had an instruction like: - - int n = strlen (str) - - we now want to instrument the access to str[n], after the - instruction above.*/ - - /* So let's build the access to str[n] that is, access through the - pointer_plus expr: (_1 + len). */ - gimple stmt = - gimple_build_assign_with_ops (POINTER_PLUS_EXPR, - make_ssa_name (cptr_type, NULL), - gimple_assign_lhs (str_arg_ssa), - len); - gimple_set_location (stmt, loc); - gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); - - build_check_stmt (loc, gimple_assign_lhs (stmt), &gsi, - /*before_p=*/false, /*is_store=*/false, 1); - - /* Ensure that iter points to the statement logically following the - one it was initially pointing to. */ - *iter = gsi; - /* As *ITER has been advanced to point to the next statement, let's - return true to inform transform_statements that it shouldn't - advance *ITER anymore; otherwises it will skip that next - statement, which wouldn't be instrumented. */ + g = gimple_build_assign_with_ops (NOP_EXPR, + make_ssa_name (cptr_type, NULL), + str_arg, NULL); + gimple_set_location (g, loc); + gsi_insert_before (iter, g, GSI_SAME_STMT); + str_arg = gimple_assign_lhs (g); + + build_check_stmt (loc, str_arg, NULL_TREE, 1, iter, + /*is_non_zero_len*/true, /*before_p=*/true, + /*is_store=*/false, /*is_scalar_access*/true, /*align*/0, + start_instrumented, start_instrumented); + + g = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, + make_ssa_name (cptr_type, NULL), + str_arg, + len); + gimple_set_location (g, loc); + gsi_insert_after (iter, g, GSI_NEW_STMT); + + build_check_stmt (loc, gimple_assign_lhs (g), NULL_TREE, 1, iter, + /*is_non_zero_len*/true, /*before_p=*/false, + /*is_store=*/false, /*is_scalar_access*/true, /*align*/0); + return true; } @@ -2099,6 +2065,7 @@ transform_statements (void) if (has_stmt_been_instrumented_p (s)) gsi_next (&i); else if (gimple_assign_single_p (s) + && !gimple_clobber_p (s) && maybe_instrument_assignment (&i)) /* Nothing to do as maybe_instrument_assignment advanced the iterator I. */; @@ -2416,8 +2383,11 @@ asan_finish_file (void) nor after .LASAN* array. */ flag_sanitize &= ~SANITIZE_ADDRESS; - tree fn = builtin_decl_implicit (BUILT_IN_ASAN_INIT); - append_to_statement_list (build_call_expr (fn, 0), &asan_ctor_statements); + if (flag_sanitize & SANITIZE_USER_ADDRESS) + { + tree fn = builtin_decl_implicit (BUILT_IN_ASAN_INIT); + append_to_statement_list (build_call_expr (fn, 0), &asan_ctor_statements); + } FOR_EACH_DEFINED_VARIABLE (vnode) if (TREE_ASM_WRITTEN (vnode->decl) && asan_protect_global (vnode->decl)) @@ -2454,7 +2424,7 @@ asan_finish_file (void) DECL_INITIAL (var) = ctor; varpool_assemble_decl (varpool_node_for_decl (var)); - fn = builtin_decl_implicit (BUILT_IN_ASAN_REGISTER_GLOBALS); + tree fn = builtin_decl_implicit (BUILT_IN_ASAN_REGISTER_GLOBALS); tree gcount_tree = build_int_cst (pointer_sized_int_node, gcount); append_to_statement_list (build_call_expr (fn, 2, build_fold_addr_expr (var), @@ -2469,11 +2439,218 @@ asan_finish_file (void) cgraph_build_static_cdtor ('D', dtor_statements, MAX_RESERVED_INIT_PRIORITY - 1); } - cgraph_build_static_cdtor ('I', asan_ctor_statements, - MAX_RESERVED_INIT_PRIORITY - 1); + if (asan_ctor_statements) + cgraph_build_static_cdtor ('I', asan_ctor_statements, + MAX_RESERVED_INIT_PRIORITY - 1); flag_sanitize |= SANITIZE_ADDRESS; } +/* Expand the ASAN_{LOAD,STORE} builtins. */ + +static bool +asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls) +{ + gimple g = gsi_stmt (*iter); + location_t loc = gimple_location (g); + + HOST_WIDE_INT flags = tree_to_shwi (gimple_call_arg (g, 0)); + gcc_assert (flags < ASAN_CHECK_LAST); + bool is_scalar_access = (flags & ASAN_CHECK_SCALAR_ACCESS) != 0; + bool is_store = (flags & ASAN_CHECK_STORE) != 0; + bool is_non_zero_len = (flags & ASAN_CHECK_NON_ZERO_LEN) != 0; + bool start_instrumented = (flags & ASAN_CHECK_START_INSTRUMENTED) != 0; + bool end_instrumented = (flags & ASAN_CHECK_END_INSTRUMENTED) != 0; + + tree base = gimple_call_arg (g, 1); + tree len = gimple_call_arg (g, 2); + + HOST_WIDE_INT size_in_bytes + = is_scalar_access && tree_fits_shwi_p (len) ? tree_to_shwi (len) : -1; + + if (use_calls) + { + /* Instrument using callbacks. */ + gimple g + = gimple_build_assign_with_ops (NOP_EXPR, + make_ssa_name (pointer_sized_int_node, + NULL), + base, NULL_TREE); + gimple_set_location (g, loc); + gsi_insert_before (iter, g, GSI_SAME_STMT); + tree base_addr = gimple_assign_lhs (g); + + int nargs; + tree fun = check_func (is_store, size_in_bytes, &nargs); + if (nargs == 1) + g = gimple_build_call (fun, 1, base_addr); + else + { + gcc_assert (nargs == 2); + g = gimple_build_assign_with_ops (NOP_EXPR, + make_ssa_name (pointer_sized_int_node, + NULL), + len, NULL_TREE); + gimple_set_location (g, loc); + gsi_insert_before (iter, g, GSI_SAME_STMT); + tree sz_arg = gimple_assign_lhs (g); + g = gimple_build_call (fun, nargs, base_addr, sz_arg); + } + gimple_set_location (g, loc); + gsi_replace (iter, g, false); + return false; + } + + HOST_WIDE_INT real_size_in_bytes = size_in_bytes == -1 ? 1 : size_in_bytes; + + tree shadow_ptr_type = shadow_ptr_types[real_size_in_bytes == 16 ? 1 : 0]; + tree shadow_type = TREE_TYPE (shadow_ptr_type); + + gimple_stmt_iterator gsi = *iter; + + if (!is_non_zero_len) + { + /* So, the length of the memory area to asan-protect is + non-constant. Let's guard the generated instrumentation code + like: + + if (len != 0) + { + //asan instrumentation code goes here. + } + // falltrough instructions, starting with *ITER. */ + + g = gimple_build_cond (NE_EXPR, + len, + build_int_cst (TREE_TYPE (len), 0), + NULL_TREE, NULL_TREE); + gimple_set_location (g, loc); + + basic_block then_bb, fallthrough_bb; + insert_if_then_before_iter (g, iter, /*then_more_likely_p=*/true, + &then_bb, &fallthrough_bb); + /* Note that fallthrough_bb starts with the statement that was + pointed to by ITER. */ + + /* The 'then block' of the 'if (len != 0) condition is where + we'll generate the asan instrumentation code now. */ + gsi = gsi_last_bb (then_bb); + } + + /* Get an iterator on the point where we can add the condition + statement for the instrumentation. */ + basic_block then_bb, else_bb; + gsi = create_cond_insert_point (&gsi, /*before_p*/false, + /*then_more_likely_p=*/false, + /*create_then_fallthru_edge=*/false, + &then_bb, + &else_bb); + + g = gimple_build_assign_with_ops (NOP_EXPR, + make_ssa_name (pointer_sized_int_node, + NULL), + base, NULL_TREE); + gimple_set_location (g, loc); + gsi_insert_before (&gsi, g, GSI_NEW_STMT); + tree base_addr = gimple_assign_lhs (g); + + tree t = NULL_TREE; + if (real_size_in_bytes >= 8) + { + tree shadow = build_shadow_mem_access (&gsi, loc, base_addr, + shadow_ptr_type); + t = shadow; + } + else + { + /* Slow path for 1, 2 and 4 byte accesses. */ + + if (!start_instrumented) + { + /* Test (shadow != 0) + & ((base_addr & 7) + (real_size_in_bytes - 1)) >= shadow). */ + tree shadow = build_shadow_mem_access (&gsi, loc, base_addr, + shadow_ptr_type); + gimple shadow_test = build_assign (NE_EXPR, shadow, 0); + gimple_seq seq = NULL; + gimple_seq_add_stmt (&seq, shadow_test); + gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, base_addr, 7)); + gimple_seq_add_stmt (&seq, build_type_cast (shadow_type, + gimple_seq_last (seq))); + if (real_size_in_bytes > 1) + gimple_seq_add_stmt (&seq, + build_assign (PLUS_EXPR, gimple_seq_last (seq), + real_size_in_bytes - 1)); + gimple_seq_add_stmt (&seq, build_assign (GE_EXPR, + gimple_seq_last (seq), + shadow)); + gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, shadow_test, + gimple_seq_last (seq))); + t = gimple_assign_lhs (gimple_seq_last (seq)); + gimple_seq_set_location (seq, loc); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + } + + /* For non-constant, misaligned or otherwise weird access sizes, + check first and last byte. */ + if (size_in_bytes == -1 && !end_instrumented) + { + g = gimple_build_assign_with_ops (MINUS_EXPR, + make_ssa_name (pointer_sized_int_node, NULL), + len, + build_int_cst (pointer_sized_int_node, 1)); + gimple_set_location (g, loc); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + tree last = gimple_assign_lhs (g); + g = gimple_build_assign_with_ops (PLUS_EXPR, + make_ssa_name (pointer_sized_int_node, NULL), + base_addr, + last); + gimple_set_location (g, loc); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + tree base_end_addr = gimple_assign_lhs (g); + + tree shadow = build_shadow_mem_access (&gsi, loc, base_end_addr, + shadow_ptr_type); + gimple shadow_test = build_assign (NE_EXPR, shadow, 0); + gimple_seq seq = NULL; + gimple_seq_add_stmt (&seq, shadow_test); + gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, + base_end_addr, 7)); + gimple_seq_add_stmt (&seq, build_type_cast (shadow_type, + gimple_seq_last (seq))); + gimple_seq_add_stmt (&seq, build_assign (GE_EXPR, + gimple_seq_last (seq), + shadow)); + gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, shadow_test, + gimple_seq_last (seq))); + if (!start_instrumented) + gimple_seq_add_stmt (&seq, build_assign (BIT_IOR_EXPR, t, + gimple_seq_last (seq))); + t = gimple_assign_lhs (gimple_seq_last (seq)); + gimple_seq_set_location (seq, loc); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + } + } + + g = gimple_build_cond (NE_EXPR, t, build_int_cst (TREE_TYPE (t), 0), + NULL_TREE, NULL_TREE); + gimple_set_location (g, loc); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + + /* Generate call to the run-time library (e.g. __asan_report_load8). */ + gsi = gsi_start_bb (then_bb); + int nargs; + tree fun = report_error_func (is_store, size_in_bytes, &nargs); + g = gimple_build_call (fun, nargs, base_addr, len); + gimple_set_location (g, loc); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + + gsi_remove (iter, true); + *iter = gsi_start_bb (else_bb); + + return true; +} + /* Instrument the current function. */ static unsigned int @@ -2585,25 +2762,55 @@ execute_sanopt (void) { basic_block bb; + int asan_num_accesses = 0; + if (flag_sanitize & SANITIZE_ADDRESS) + { + gimple_stmt_iterator gsi; + FOR_EACH_BB_FN (bb, cfun) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt) && gimple_call_internal_p (stmt) + && gimple_call_internal_fn (stmt) == IFN_ASAN_CHECK) + ++asan_num_accesses; + } + } + + bool use_calls = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD < INT_MAX + && (flag_sanitize & SANITIZE_KERNEL_ADDRESS) + && asan_num_accesses >= ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD; + FOR_EACH_BB_FN (bb, cfun) { gimple_stmt_iterator gsi; - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) { gimple stmt = gsi_stmt (gsi); + bool no_next = false; if (!is_gimple_call (stmt)) - continue; + { + gsi_next (&gsi); + continue; + } if (gimple_call_internal_p (stmt)) - switch (gimple_call_internal_fn (stmt)) - { - case IFN_UBSAN_NULL: - ubsan_expand_null_ifn (gsi); - break; - default: - break; - } + { + enum internal_fn ifn = gimple_call_internal_fn (stmt); + switch (ifn) + { + case IFN_UBSAN_NULL: + ubsan_expand_null_ifn (gsi); + break; + case IFN_ASAN_CHECK: + { + no_next = asan_expand_check_ifn (&gsi, use_calls); + break; + } + default: + break; + } + } if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -2611,6 +2818,9 @@ execute_sanopt (void) print_gimple_stmt (dump_file, stmt, 0, dump_flags); fprintf (dump_file, "\n"); } + + if (!no_next) + gsi_next (&gsi); } } return 0; diff --git a/gcc-4.9/gcc/auto-profile.c b/gcc-4.9/gcc/auto-profile.c index c69c1e68c..88115840c 100644 --- a/gcc-4.9/gcc/auto-profile.c +++ b/gcc-4.9/gcc/auto-profile.c @@ -1,5 +1,5 @@ -/* Calculate branch probabilities, and basic block execution counts. - Copyright (C) 2012. Free Software Foundation, Inc. +/* Read and annotate call graph profile from the auto profile data file. + Copyright (C) 2014. Free Software Foundation, Inc. Contributed by Dehao Chen (dehao@google.com) This file is part of GCC. @@ -18,19 +18,17 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ -/* Read and annotate call graph profile from the auto profile data - file. */ +#include "config.h" +#include "system.h" #include <string.h> #include <map> -#include <vector> #include <set> -#include "config.h" -#include "system.h" #include "coretypes.h" #include "tree.h" #include "flags.h" +#include "vec.h" #include "basic-block.h" #include "diagnostic-core.h" #include "gcov-io.h" @@ -73,26 +71,34 @@ along with GCC; see the file COPYING3. If not see Phase 1: Read profile from the profile data file. The following info is read from the profile datafile: - * string_table: a map between function name and its index. - * autofdo_source_profile: a map from function_instance name to - function_instance. This is represented as a forest of - function_instances. - * autofdo_module_profile: a map from module name to its - compilation/aux-module info. - * WorkingSet: a histogram of how many instructions are covered for a - given percentage of total cycles. - - Phase 2: Early inline. + * string_table: a map between function name and its index. + * autofdo_source_profile: a map from function_instance name to + function_instance. This is represented as a forest of + function_instances. + * WorkingSet: a histogram of how many instructions are covered for a + given percentage of total cycles. This is describing the binary + level information (not source level). This info is used to help + decide if we want aggressive optimizations that could increase + code footprint (e.g. loop unroll etc.) + A function instance is an instance of function that could either be a + standalone symbol, or a clone of a function that is inlined into another + function. + + Phase 2: Early inline + valur profile transformation. Early inline uses autofdo_source_profile to find if a callsite is: - * inlined in the profiled binary. - * callee body is hot in the profiling run. + * inlined in the profiled binary. + * callee body is hot in the profiling run. If both condition satisfies, early inline will inline the callsite regardless of the code growth. + Phase 2 is an iterative process. During each iteration, we also check + if an indirect callsite is promoted and inlined in the profiling run. + If yes, vpt will happen to force promote it and in the next iteration, + einline will inline the promoted callsite in the next iteration. Phase 3: Annotate control flow graph. AutoFDO uses a separate pass to: - * Annotate basic block count - * Estimate branch probability + * Annotate basic block count + * Estimate branch probability After the above 3 phases, all profile is readily annotated on the GCC IR. AutoFDO tries to reuse all FDO infrastructure as much as possible to make @@ -102,16 +108,17 @@ along with GCC; see the file COPYING3. If not see #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" -namespace autofdo { +namespace autofdo +{ /* Represent a source location: (function_decl, lineno). */ typedef std::pair<tree, unsigned> decl_lineno; /* Represent an inline stack. vector[0] is the leaf node. */ -typedef std::vector<decl_lineno> inline_stack; +typedef auto_vec<decl_lineno> inline_stack; /* String array that stores function names. */ -typedef std::vector<const char *> string_vector; +typedef auto_vec<char *> string_vector; /* Map from function name's index in string_table to target's execution count. */ @@ -130,7 +137,7 @@ struct count_info /* Map from indirect call target to its sample count. */ icall_target_map targets; - /* Whether this inline stack is already used in annotation. + /* Whether this inline stack is already used in annotation. Each inline stack should only be used to annotate IR once. This will be enforced when instruction-level discriminator @@ -141,14 +148,20 @@ struct count_info /* operator< for "const char *". */ struct string_compare { - bool operator() (const char *a, const char *b) const - { return strcmp (a, b) < 0; } + bool operator()(const char *a, const char *b) const + { + return strcmp (a, b) < 0; + } }; /* Store a string array, indexed by string position in the array. */ -class string_table { +class string_table +{ public: - static string_table *create (); + string_table () + {} + + ~string_table (); /* For a given string, returns its index. */ int get_index (const char *name) const; @@ -159,10 +172,10 @@ public: /* For a given index, returns the string. */ const char *get_name (int index) const; -private: - string_table () {} + /* Read profile, return TRUE on success. */ bool read (); +private: typedef std::map<const char *, unsigned, string_compare> string_index_map; string_vector vector_; string_index_map map_; @@ -170,34 +183,47 @@ private: /* Profile of a function instance: 1. total_count of the function. - 2. head_count of the function (only valid when function is a top-level - function_instance, i.e. it is the original copy instead of the - inlined copy). - 3. map from source location (decl_lineno) of the inlined callsite to - profile (count_info). + 2. head_count (entry basic block count) of the function (only valid when + function is a top-level function_instance, i.e. it is the original copy + instead of the inlined copy). + 3. map from source location (decl_lineno) to profile (count_info). 4. map from callsite to callee function_instance. */ -class function_instance { +class function_instance +{ public: - typedef std::vector<function_instance *> function_instance_stack; + typedef auto_vec<function_instance *> function_instance_stack; /* Read the profile and return a function_instance with head count as HEAD_COUNT. Recursively read callsites to create nested function_instances too. STACK is used to track the recursive creation process. */ - static function_instance *read_function_instance ( - function_instance_stack *stack, gcov_type head_count); + static function_instance * + read_function_instance (function_instance_stack *stack, + gcov_type head_count); /* Recursively deallocate all callsites (nested function_instances). */ ~function_instance (); /* Accessors. */ - int name () const { return name_; } - gcov_type total_count () const { return total_count_; } - gcov_type head_count () const { return head_count_; } + int + name () const + { + return name_; + } + gcov_type + total_count () const + { + return total_count_; + } + gcov_type + head_count () const + { + return head_count_; + } - /* Recursively traverse STACK starting from LEVEL to find the corresponding - function_instance. */ - function_instance *get_function_instance (const inline_stack &stack, - unsigned level); + /* Traverse callsites of the current function_instance to find one at the + location of LINENO. */ + function_instance *get_function_instance_by_decl (unsigned lineno, + tree decl) const; /* Store the profile info for LOC in INFO. Return TRUE if profile info is found. */ @@ -214,12 +240,17 @@ public: void mark_annotated (location_t loc); private: + /* Callsite, represented as (decl_lineno, callee_function_name_index). */ + typedef std::pair<unsigned, unsigned> callsite; + + /* Map from callsite to callee function_instance. */ + typedef std::map<callsite, function_instance *> callsite_map; + function_instance (unsigned name, gcov_type head_count) - : name_(name), total_count_(0), head_count_(head_count) {} + : name_ (name), total_count_ (0), head_count_ (head_count) + { + } - /* Map from callsite decl_lineno (lineno in higher 16 bits, discriminator - in lower 16 bits) to callee function_instance. */ - typedef std::map<unsigned, function_instance *> callsite_map; /* Map from source location (decl_lineno) to profile (count_info). */ typedef std::map<unsigned, count_info> position_count_map; @@ -240,16 +271,19 @@ private: }; /* Profile for all functions. */ -class autofdo_source_profile { +class autofdo_source_profile +{ public: - static autofdo_source_profile *create () - { - autofdo_source_profile *map = new autofdo_source_profile (); - if (map->read ()) - return map; - delete map; - return NULL; - } + static autofdo_source_profile * + create () + { + autofdo_source_profile *map = new autofdo_source_profile (); + + if (map->read ()) + return map; + delete map; + return NULL; + } ~autofdo_source_profile (); @@ -277,8 +311,7 @@ public: private: /* Map from function_instance name index (in string_table) to function_instance. */ - typedef std::map<unsigned, function_instance *> - name_function_instance_map; + typedef std::map<unsigned, function_instance *> name_function_instance_map; autofdo_source_profile () {} @@ -287,8 +320,8 @@ private: /* Return the function_instance in the profile that correspond to the inline STACK. */ - function_instance *get_function_instance_by_inline_stack ( - const inline_stack &stack) const; + function_instance * + get_function_instance_by_inline_stack (const inline_stack &stack) const; name_function_instance_map map_; }; @@ -300,7 +333,7 @@ public: { autofdo_module_profile *map = new autofdo_module_profile (); if (map->read ()) - return map; + return map; delete map; return NULL; } @@ -332,6 +365,7 @@ private: /* Store the strings read from the profile data file. */ static string_table *afdo_string_table; + /* Store the AutoFDO source profile. */ static autofdo_source_profile *afdo_source_profile; @@ -344,9 +378,10 @@ static struct gcov_ctr_summary *afdo_profile_info; /* Helper functions. */ /* Return the original name of NAME: strip the suffix that starts - with '.' */ + with '.' Caller is responsible for freeing RET. */ -static const char *get_original_name (const char *name) +static char * +get_original_name (const char *name) { char *ret = xstrdup (name); char *find = strchr (ret, '.'); @@ -362,8 +397,9 @@ static const char *get_original_name (const char *name) static unsigned get_combined_location (location_t loc, tree decl) { + /* TODO: allow more bits for line and less bits for discriminator. */ return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16) - | get_discriminator_from_locus (loc); + | get_discriminator_from_locus (loc); } /* Return the function decl of a given lexical BLOCK. */ @@ -397,23 +433,23 @@ get_inline_stack (location_t locus, inline_stack *stack) { int level = 0; for (block = BLOCK_SUPERCONTEXT (block); - block && (TREE_CODE (block) == BLOCK); - block = BLOCK_SUPERCONTEXT (block)) - { - location_t tmp_locus = BLOCK_SOURCE_LOCATION (block); - if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION) - continue; - - tree decl = get_function_decl_from_block (block); - stack->push_back (std::make_pair ( - decl, get_combined_location (locus, decl))); - locus = tmp_locus; - level++; - } + block && (TREE_CODE (block) == BLOCK); + block = BLOCK_SUPERCONTEXT (block)) + { + location_t tmp_locus = BLOCK_SOURCE_LOCATION (block); + if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION) + continue; + + tree decl = get_function_decl_from_block (block); + stack->safe_push ( + std::make_pair (decl, get_combined_location (locus, decl))); + locus = tmp_locus; + level++; + } } - stack->push_back (std::make_pair ( - current_function_decl, - get_combined_location (locus, current_function_decl))); + stack->safe_push ( + std::make_pair (current_function_decl, + get_combined_location (locus, current_function_decl))); } /* Return STMT's combined location, which is a 32bit integer in which @@ -427,12 +463,11 @@ get_relative_location_for_stmt (gimple stmt) if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) return UNKNOWN_LOCATION; - for (tree block = gimple_block (stmt); - block && (TREE_CODE (block) == BLOCK); + for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK); block = BLOCK_SUPERCONTEXT (block)) if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION) - return get_combined_location ( - locus, get_function_decl_from_block (block)); + return get_combined_location (locus, + get_function_decl_from_block (block)); return get_combined_location (locus, current_function_decl); } @@ -446,44 +481,50 @@ has_indirect_call (basic_block bb) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { gimple stmt = gsi_stmt (gsi); - if (gimple_code (stmt) == GIMPLE_CALL - && (gimple_call_fn (stmt) == NULL - || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) - return true; + if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt) + && (gimple_call_fn (stmt) == NULL + || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) + return true; } return false; } /* Member functions for string_table. */ -string_table * -string_table::create () +/* Deconstructor. */ + +string_table::~string_table () { - string_table *map = new string_table(); - if (map->read ()) - return map; - delete map; - return NULL; + for (unsigned i = 0; i < vector_.length (); i++) + free (vector_[i]); } + +/* Return the index of a given function NAME. Return -1 if NAME is not + found in string table. */ + int string_table::get_index (const char *name) const { if (name == NULL) return -1; string_index_map::const_iterator iter = map_.find (name); - if (iter == map_.end()) + if (iter == map_.end ()) return -1; else return iter->second; } +/* Return the index of a given function DECL. Return -1 if DECL is not + found in string table. */ + int string_table::get_index_by_decl (tree decl) const { - const char *name = get_original_name ( - IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); + char *name + = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); int ret = get_index (name); + free (name); if (ret != -1) return ret; ret = get_index (lang_hooks.dwarf_name (decl, 0)); @@ -495,13 +536,17 @@ string_table::get_index_by_decl (tree decl) const return -1; } +/* Return the function name of a given INDEX. */ + const char * string_table::get_name (int index) const { - gcc_assert (index > 0 && index < (int) vector_.size()); + gcc_assert (index > 0 && index < (int)vector_.length ()); return vector_[index]; } +/* Read the string table. Return TRUE if reading is successful. */ + bool string_table::read () { @@ -513,34 +558,47 @@ string_table::read () unsigned string_num = gcov_read_unsigned (); for (unsigned i = 0; i < string_num; i++) { - vector_.push_back (get_original_name (gcov_read_string ())); - map_[vector_.back()] = i; + vector_.safe_push (get_original_name (gcov_read_string ())); + map_[vector_.last ()] = i; } return true; } - /* Member functions for function_instance. */ function_instance::~function_instance () { - for (callsite_map::iterator iter = callsites.begin(); - iter != callsites.end(); ++iter) + for (callsite_map::iterator iter = callsites.begin (); + iter != callsites.end (); ++iter) delete iter->second; } -/* Recursively traverse STACK starting from LEVEL to find the corresponding - function_instance. */ +/* Traverse callsites of the current function_instance to find one at the + location of LINENO and callee name represented in DECL. */ function_instance * -function_instance::get_function_instance ( - const inline_stack &stack, unsigned level) +function_instance::get_function_instance_by_decl (unsigned lineno, + tree decl) const { - if (level == 0) - return this; - callsite_map::const_iterator ret = callsites.find (stack[level].second); - if (ret != callsites.end () && ret->second != NULL) - return ret->second->get_function_instance (stack, level - 1); + int func_name_idx = afdo_string_table->get_index_by_decl (decl); + if (func_name_idx != -1) + { + callsite_map::const_iterator ret + = callsites.find (std::make_pair (lineno, func_name_idx)); + if (ret != callsites.end ()) + return ret->second; + } + func_name_idx + = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0)); + if (func_name_idx != -1) + { + callsite_map::const_iterator ret + = callsites.find (std::make_pair (lineno, func_name_idx)); + if (ret != callsites.end ()) + return ret->second; + } + if (DECL_ABSTRACT_ORIGIN (decl)) + return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl)); else return NULL; } @@ -573,25 +631,27 @@ function_instance::mark_annotated (location_t loc) MAP, return the total count for all inlined indirect calls. */ gcov_type -function_instance::find_icall_target_map ( - gimple stmt, icall_target_map *map) const +function_instance::find_icall_target_map (gimple stmt, + icall_target_map *map) const { gcov_type ret = 0; unsigned stmt_offset = get_relative_location_for_stmt (stmt); - for (callsite_map::const_iterator iter = callsites.begin(); - iter != callsites.end(); ++iter) + for (callsite_map::const_iterator iter = callsites.begin (); + iter != callsites.end (); ++iter) { - unsigned callee = iter->second->name(); + unsigned callee = iter->second->name (); /* Check if callsite location match the stmt. */ - if (iter->first != stmt_offset) - continue; + if (iter->first.first != stmt_offset) + continue; struct cgraph_node *node = find_func_by_global_id ( - (unsigned long long) afdo_string_table->get_name (callee), true); + (unsigned long long) afdo_string_table->get_name (callee), true); if (node == NULL) - continue; + continue; if (!check_ic_target (stmt, node)) - continue; + continue; + if (!node->definition) + continue; (*map)[callee] = iter->second->total_count (); ret += iter->second->total_count (); } @@ -602,15 +662,42 @@ function_instance::find_icall_target_map ( HEAD_COUNT. Recursively read callsites to create nested function_instances too. STACK is used to track the recursive creation process. */ +/* function instance profile format: + + ENTRY_COUNT: 8 bytes + NAME_INDEX: 4 bytes + NUM_POS_COUNTS: 4 bytes + NUM_CALLSITES: 4 byte + POS_COUNT_1: + POS_1_OFFSET: 4 bytes + NUM_TARGETS: 4 bytes + COUNT: 8 bytes + TARGET_1: + VALUE_PROFILE_TYPE: 4 bytes + TARGET_IDX: 8 bytes + COUNT: 8 bytes + TARGET_2 + ... + TARGET_n + POS_COUNT_2 + ... + POS_COUNT_N + CALLSITE_1: + CALLSITE_1_OFFSET: 4 bytes + FUNCTION_INSTANCE_PROFILE (nested) + CALLSITE_2 + ... + CALLSITE_n. */ + function_instance * -function_instance::read_function_instance ( - function_instance_stack *stack, gcov_type head_count) +function_instance::read_function_instance (function_instance_stack *stack, + gcov_type head_count) { unsigned name = gcov_read_unsigned (); unsigned num_pos_counts = gcov_read_unsigned (); unsigned num_callsites = gcov_read_unsigned (); function_instance *s = new function_instance (name, head_count); - stack->push_back(s); + stack->safe_push (s); for (unsigned i = 0; i < num_pos_counts; i++) { @@ -618,22 +705,25 @@ function_instance::read_function_instance ( unsigned num_targets = gcov_read_unsigned (); gcov_type count = gcov_read_counter (); s->pos_counts[offset].count = count; - for (unsigned j = 0; j < stack->size(); j++) - (*stack)[j]->total_count_ += count; + for (unsigned j = 0; j < stack->length (); j++) + (*stack)[j]->total_count_ += count; for (unsigned j = 0; j < num_targets; j++) - { - /* Only indirect call target histogram is supported now. */ - gcov_read_unsigned (); - gcov_type target_idx = gcov_read_counter (); - s->pos_counts[offset].targets[target_idx] = - gcov_read_counter (); - } + { + /* Only indirect call target histogram is supported now. */ + gcov_read_unsigned (); + gcov_type target_idx = gcov_read_counter (); + s->pos_counts[offset].targets[target_idx] = gcov_read_counter (); + } } - for (unsigned i = 0; i < num_callsites; i++) { - unsigned offset = gcov_read_unsigned (); - s->callsites[offset] = read_function_instance (stack, 0); - } - stack->pop_back(); + for (unsigned i = 0; i < num_callsites; i++) + { + unsigned offset = gcov_read_unsigned (); + function_instance *callee_function_instance + = read_function_instance (stack, 0); + s->callsites[std::make_pair (offset, callee_function_instance->name ())] + = callee_function_instance; + } + stack->pop (); return s; } @@ -643,11 +733,11 @@ gcov_type function_instance::total_annotated_count () const { gcov_type ret = 0; - for (callsite_map::const_iterator iter = callsites.begin(); - iter != callsites.end(); ++iter) + for (callsite_map::const_iterator iter = callsites.begin (); + iter != callsites.end (); ++iter) ret += iter->second->total_annotated_count (); - for (position_count_map::const_iterator iter = pos_counts.begin(); - iter != pos_counts.end(); ++iter) + for (position_count_map::const_iterator iter = pos_counts.begin (); + iter != pos_counts.end (); ++iter) if (iter->second.annotated) ret += iter->second.count; return ret; @@ -671,13 +761,13 @@ autofdo_source_profile::write_annotated_count () const iter != map_.end (); ++iter) if (iter->second->total_count () > 0) { - char buf[1024]; - snprintf (buf, 1024, - "%s:"HOST_WIDEST_INT_PRINT_DEC":"HOST_WIDEST_INT_PRINT_DEC, - afdo_string_table->get_name (iter->first), - iter->second->total_count (), - iter->second->total_annotated_count ()); - dw2_asm_output_nstring (buf, (size_t)-1, NULL); + char buf[1024]; + snprintf (buf, 1024, + "%s:"HOST_WIDEST_INT_PRINT_DEC":"HOST_WIDEST_INT_PRINT_DEC, + afdo_string_table->get_name (iter->first), + iter->second->total_count (), + iter->second->total_annotated_count ()); + dw2_asm_output_nstring (buf, (size_t)-1, NULL); } } @@ -700,7 +790,7 @@ autofdo_source_profile::get_function_instance_by_decl (tree decl) const if (index == -1) return NULL; name_function_instance_map::const_iterator ret = map_.find (index); - return ret == map_.end() ? NULL : ret->second; + return ret == map_.end () ? NULL : ret->second; } /* Find count_info for a given gimple STMT. If found, store the count_info @@ -714,19 +804,22 @@ autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const inline_stack stack; get_inline_stack (gimple_location (stmt), &stack); - if (stack.size () == 0) + if (stack.length () == 0) return false; - const function_instance *s = get_function_instance_by_inline_stack (stack); + function_instance *s = get_function_instance_by_inline_stack (stack); if (s == NULL) return false; return s->get_count_info (stack[0].second, info); } +/* Mark LOC as annotated. */ + void -autofdo_source_profile::mark_annotated (location_t loc) { +autofdo_source_profile::mark_annotated (location_t loc) +{ inline_stack stack; get_inline_stack (loc, &stack); - if (stack.size () == 0) + if (stack.length () == 0) return; function_instance *s = get_function_instance_by_inline_stack (stack); if (s == NULL) @@ -738,8 +831,8 @@ autofdo_source_profile::mark_annotated (location_t loc) { Return true if INFO is updated. */ bool -autofdo_source_profile::update_inlined_ind_target ( - gimple stmt, count_info *info) +autofdo_source_profile::update_inlined_ind_target (gimple stmt, + count_info *info) { if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) return false; @@ -747,8 +840,8 @@ autofdo_source_profile::update_inlined_ind_target ( count_info old_info; get_count_info (stmt, &old_info); gcov_type total = 0; - for (icall_target_map::const_iterator iter = old_info.targets.begin(); - iter != old_info.targets.end(); ++iter) + for (icall_target_map::const_iterator iter = old_info.targets.begin (); + iter != old_info.targets.end (); ++iter) total += iter->second; /* Program behavior changed, original promoted (and inlined) target is not @@ -758,21 +851,21 @@ autofdo_source_profile::update_inlined_ind_target ( count of the unpromoted targets (stored in old_info). If it is no less than half of the callsite count (stored in INFO), the original promoted target is considered not hot any more. */ - if (total >= info->count * 0.5) + if (total >= info->count / 2) return false; inline_stack stack; get_inline_stack (gimple_location (stmt), &stack); - if (stack.size () == 0) + if (stack.length () == 0) return false; - const function_instance *s = get_function_instance_by_inline_stack (stack); + function_instance *s = get_function_instance_by_inline_stack (stack); if (s == NULL) return false; icall_target_map map; if (s->find_icall_target_map (stmt, &map) == 0) return false; - for (icall_target_map::const_iterator iter = map.begin(); - iter != map.end(); ++iter) + for (icall_target_map::const_iterator iter = map.begin (); + iter != map.end (); ++iter) info->targets[iter->first] = iter->second; return true; } @@ -784,10 +877,10 @@ autofdo_source_profile::get_callsite_total_count ( struct cgraph_edge *edge) const { inline_stack stack; - stack.push_back (std::make_pair(edge->callee->decl, 0)); + stack.safe_push (std::make_pair (edge->callee->decl, 0)); get_inline_stack (gimple_location (edge->call_stmt), &stack); - const function_instance *s = get_function_instance_by_inline_stack (stack); + function_instance *s = get_function_instance_by_inline_stack (stack); if (s == NULL) return 0; else @@ -796,6 +889,16 @@ autofdo_source_profile::get_callsite_total_count ( /* Read AutoFDO profile and returns TRUE on success. */ +/* source profile format: + + GCOV_TAG_AFDO_FUNCTION: 4 bytes + LENGTH: 4 bytes + NUM_FUNCTIONS: 4 bytes + FUNCTION_INSTANCE_1 + FUNCTION_INSTANCE_2 + ... + FUNCTION_INSTANCE_N. */ + bool autofdo_source_profile::read () { @@ -815,7 +918,7 @@ autofdo_source_profile::read () { function_instance::function_instance_stack stack; function_instance *s = function_instance::read_function_instance ( - &stack, gcov_read_counter ()); + &stack, gcov_read_counter ()); afdo_profile_info->sum_all += s->total_count (); map_[s->name ()] = s; } @@ -830,11 +933,18 @@ autofdo_source_profile::get_function_instance_by_inline_stack ( const inline_stack &stack) const { name_function_instance_map::const_iterator iter = map_.find ( - afdo_string_table->get_index_by_decl ( - stack[stack.size() - 1].first)); - return iter == map_.end() - ? NULL - : iter->second->get_function_instance (stack, stack.size() - 1); + afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first)); + if (iter == map_.end()) + return NULL; + function_instance *s = iter->second; + for (unsigned i = stack.length() - 1; i > 0; i--) + { + s = s->get_function_instance_by_decl ( + stack[i].second, stack[i - 1].first); + if (s == NULL) + return NULL; + } + return s; } @@ -863,16 +973,16 @@ autofdo_module_profile::read () unsigned lang = gcov_read_unsigned (); unsigned ggc_memory = gcov_read_unsigned (); for (unsigned j = 0; j < 7; j++) - { - num_array[j] = gcov_read_unsigned (); - total_num += num_array[j]; - } + { + num_array[j] = gcov_read_unsigned (); + total_num += num_array[j]; + } gcov_module_info *module = XCNEWVAR ( - gcov_module_info, - sizeof (gcov_module_info) + sizeof (char *) * total_num); + gcov_module_info, + sizeof (gcov_module_info) + sizeof (char *) * total_num); std::pair<name_target_map::iterator, bool> ret = map_.insert( - name_target_map::value_type (name, AuxInfo())); + name_target_map::value_type (name, AuxInfo())); gcc_assert (ret.second); ret.first->second.second = module; module->ident = i + 1; @@ -888,14 +998,14 @@ autofdo_module_profile::read () module->is_primary = strcmp (name, in_fnames[0]) == 0; module->flags = module->is_primary ? exported : 1; for (unsigned j = 0; j < num_array[0]; j++) - ret.first->second.first.push_back (xstrdup (gcov_read_string ())); + ret.first->second.first.safe_push (xstrdup (gcov_read_string ())); for (unsigned j = 0; j < total_num - num_array[0]; j++) - module->string_array[j] = xstrdup (gcov_read_string ()); + module->string_array[j] = xstrdup (gcov_read_string ()); } return true; } -/* Read the profile from the profile file. */ +/* Read data from profile data file. */ static void read_profile (void) @@ -913,8 +1023,8 @@ read_profile (void) gcov_read_unsigned (); /* string_table. */ - afdo_string_table = string_table::create (); - if (afdo_string_table == NULL) + afdo_string_table = new string_table (); + if (!afdo_string_table->read()) error ("Cannot read string table from %s.", auto_profile_file); /* autofdo_source_profile. */ @@ -953,7 +1063,7 @@ read_aux_modules (void) const string_vector *aux_modules = afdo_module_profile->get_aux_modules (in_fnames[0]); - unsigned num_aux_modules = aux_modules ? aux_modules->size() : 0; + unsigned num_aux_modules = aux_modules ? aux_modules->length() : 0; module_infos = XCNEWVEC (gcov_module_info *, num_aux_modules + 1); module_infos[0] = module; @@ -962,55 +1072,61 @@ read_aux_modules (void) if (aux_modules == NULL) return; unsigned curr_module = 1, max_group = PARAM_VALUE (PARAM_MAX_LIPO_GROUP); - for (string_vector::const_iterator iter = aux_modules->begin(); - iter != aux_modules->end(); ++iter) - { - gcov_module_info *aux_module = afdo_module_profile->get_module (*iter); - if (aux_module == module) - continue; - if (aux_module == NULL) - { - if (flag_opt_info) - inform (0, "aux module %s cannot be found.", *iter); - continue; - } - if ((aux_module->lang & GCOV_MODULE_LANG_MASK) != - (module->lang & GCOV_MODULE_LANG_MASK)) - { - if (flag_opt_info) - inform (0, "Not importing %s: source language" - " different from primary module's source language", *iter); - continue; - } - if ((aux_module->lang & GCOV_MODULE_ASM_STMTS) - && flag_ripa_disallow_asm_modules) - { - if (flag_opt_info) - inform (0, "Not importing %s: contains " - "assembler statements", *iter); - continue; - } - if (max_group != 0 && curr_module >= max_group) - { - if (flag_opt_info) - inform (0, "Not importing %s: maximum group size reached", *iter); - continue; - } - if (incompatible_cl_args (module, aux_module)) - { - if (flag_opt_info) - inform (0, "Not importing %s: command-line" - " arguments not compatible with primary module", *iter); - continue; - } - module_infos[curr_module++] = aux_module; - add_input_filename (*iter); - record_module_name (aux_module->ident, lbasename (*iter)); - } + int i; + char *str; + FOR_EACH_VEC_ELT (*aux_modules, i, str) + { + gcov_module_info *aux_module = afdo_module_profile->get_module (str); + if (aux_module == module) + continue; + if (aux_module == NULL) + { + if (flag_opt_info) + inform (0, "aux module %s cannot be found.", str); + continue; + } + if ((aux_module->lang & GCOV_MODULE_LANG_MASK) != + (module->lang & GCOV_MODULE_LANG_MASK)) + { + if (flag_opt_info) + inform (0, "Not importing %s: source language" + " different from primary module's source language", str); + continue; + } + if ((aux_module->lang & GCOV_MODULE_ASM_STMTS) + && flag_ripa_disallow_asm_modules) + { + if (flag_opt_info) + inform (0, "Not importing %s: contains " + "assembler statements", str); + continue; + } + if (max_group != 0 && curr_module >= max_group) + { + if (flag_opt_info) + inform (0, "Not importing %s: maximum group size reached", str); + continue; + } + if (incompatible_cl_args (module, aux_module)) + { + if (flag_opt_info) + inform (0, "Not importing %s: command-line" + " arguments not compatible with primary module", str); + continue; + } + module_infos[curr_module++] = aux_module; + add_input_filename (str); + record_module_name (aux_module->ident, lbasename (str)); + } } /* From AutoFDO profiles, find values inside STMT for that we want to measure - histograms for indirect-call optimization. */ + histograms for indirect-call optimization. + + This function is actually served for 2 purposes: + Â Â * before annotation, we need to mark histogram, promote and inline + Â Â * after annotation, we just need to mark, and let follow-up logic to + Â Â Â decide if it needs to promote and inline. */ static void afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map) @@ -1018,7 +1134,7 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map) gimple stmt = gsi_stmt (*gsi); tree callee; - if (map.size() == 0 || gimple_code (stmt) != GIMPLE_CALL + if (map.size () == 0 || gimple_code (stmt) != GIMPLE_CALL || gimple_call_fndecl (stmt) != NULL_TREE) return; @@ -1027,24 +1143,24 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map) histogram_value hist = gimple_alloc_histogram_value ( cfun, HIST_TYPE_INDIR_CALL_TOPN, stmt, callee); hist->n_counters = (GCOV_ICALL_TOPN_VAL << 2) + 1; - hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); + hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); gimple_add_histogram_value (cfun, stmt, hist); gcov_type total = 0; - icall_target_map::const_iterator max_iter1 = map.end(); - icall_target_map::const_iterator max_iter2 = map.end(); + icall_target_map::const_iterator max_iter1 = map.end (); + icall_target_map::const_iterator max_iter2 = map.end (); - for (icall_target_map::const_iterator iter = map.begin(); - iter != map.end(); ++iter) + for (icall_target_map::const_iterator iter = map.begin (); + iter != map.end (); ++iter) { total += iter->second; - if (max_iter1 == map.end() || max_iter1->second < iter->second) - { - max_iter2 = max_iter1; - max_iter1 = iter; - } - else if (max_iter2 == map.end() || max_iter2->second < iter->second) - max_iter2 = iter; + if (max_iter1 == map.end () || max_iter1->second < iter->second) + { + max_iter2 = max_iter1; + max_iter1 = iter; + } + else if (max_iter2 == map.end () || max_iter2->second < iter->second) + max_iter2 = iter; } hist->hvalue.counters[0] = total; @@ -1054,7 +1170,7 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map) if (max_iter2 != map.end()) { hist->hvalue.counters[3] = (unsigned long long) - afdo_string_table->get_name (max_iter2->first); + afdo_string_table->get_name (max_iter2->first); hist->hvalue.counters[4] = max_iter2->second; } else @@ -1073,12 +1189,39 @@ afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map) afdo_indirect_call (gsi, map); } -/* For a given BB, return its execution count. Add the location of annotated - stmt to ANNOTATED. Attach value profile if a stmt is not in PROMOTED, - because we only want to promot an indirect call once. */ +typedef std::set<basic_block> bb_set; +typedef std::set<edge> edge_set; + +static bool +is_bb_annotated (const basic_block bb, const bb_set &annotated) +{ + return annotated.find (bb) != annotated.end (); +} + +static void +set_bb_annotated (basic_block bb, bb_set *annotated) +{ + annotated->insert (bb); +} -static gcov_type -afdo_get_bb_count (basic_block bb, const stmt_set &promoted) +static bool +is_edge_annotated (const edge e, const edge_set &annotated) +{ + return annotated.find (e) != annotated.end (); +} + +static void +set_edge_annotated (edge e, edge_set *annotated) +{ + annotated->insert (e); +} + +/* For a given BB, set its execution count. Attach value profile if a stmt + is not in PROMOTED, because we only want to promot an indirect call once. + Return TRUE if BB is annotated. */ + +static bool +afdo_set_bb_count (basic_block bb, const stmt_set &promoted) { gimple_stmt_iterator gsi; edge e; @@ -1090,22 +1233,23 @@ afdo_get_bb_count (basic_block bb, const stmt_set &promoted) { count_info info; gimple stmt = gsi_stmt (gsi); - if (stmt->code == GIMPLE_DEBUG) - continue; + if (gimple_clobber_p (stmt) || is_gimple_debug (stmt)) + continue; if (afdo_source_profile->get_count_info (stmt, &info)) - { - if (info.annotated) - continue; - if (info.count > max_count) - max_count = info.count; - has_annotated = true; - if (info.targets.size() > 0 && promoted.find (stmt) == promoted.end ()) - afdo_vpt (&gsi, info.targets); - } + { + if (info.annotated) + continue; + if (info.count > max_count) + max_count = info.count; + has_annotated = true; + if (info.targets.size () > 0 + && promoted.find (stmt) == promoted.end ()) + afdo_vpt (&gsi, info.targets); + } } if (!has_annotated) - return 0; + return false; for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi))); @@ -1114,13 +1258,13 @@ afdo_get_bb_count (basic_block bb, const stmt_set &promoted) gimple phi = gsi_stmt (gsi); size_t i; for (i = 0; i < gimple_phi_num_args (phi); i++) - afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i)); + afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i)); } FOR_EACH_EDGE (e, ei, bb->succs) afdo_source_profile->mark_annotated (e->goto_locus); - bb->flags |= BB_ANNOTATED; - return max_count; + bb->count = max_count; + return true; } /* BB1 and BB2 are in an equivalent class iff: @@ -1129,104 +1273,104 @@ afdo_get_bb_count (basic_block bb, const stmt_set &promoted) 3. BB1 and BB2 are in the same loop nest. This function finds the equivalent class for each basic block, and stores a pointer to the first BB in its equivalent class. Meanwhile, - set bb counts for the same equivalent class to be idenical. */ + set bb counts for the same equivalent class to be idenical. Update + ANNOTATED_BB for the first BB in its equivalent class. */ static void -afdo_find_equiv_class (void) +afdo_find_equiv_class (bb_set *annotated_bb) { basic_block bb; FOR_ALL_BB_FN (bb, cfun) - bb->aux = NULL; + bb->aux = NULL; FOR_ALL_BB_FN (bb, cfun) - { - vec<basic_block> dom_bbs; - basic_block bb1; - int i; - - if (bb->aux != NULL) - continue; - bb->aux = bb; - dom_bbs = get_all_dominated_blocks (CDI_DOMINATORS, bb); - FOR_EACH_VEC_ELT (dom_bbs, i, bb1) - if (bb1->aux == NULL - && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) - && bb1->loop_father == bb->loop_father) - { - bb1->aux = bb; - if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0) - { - bb->count = MAX (bb->count, bb1->count); - bb->flags |= BB_ANNOTATED; - } - } - dom_bbs = get_all_dominated_blocks (CDI_POST_DOMINATORS, bb); - FOR_EACH_VEC_ELT (dom_bbs, i, bb1) - if (bb1->aux == NULL - && dominated_by_p (CDI_DOMINATORS, bb, bb1) - && bb1->loop_father == bb->loop_father) - { - bb1->aux = bb; - if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0) - { - bb->count = MAX (bb->count, bb1->count); - bb->flags |= BB_ANNOTATED; - } - } - } + { + vec<basic_block> dom_bbs; + basic_block bb1; + int i; + + if (bb->aux != NULL) + continue; + bb->aux = bb; + dom_bbs = get_all_dominated_blocks (CDI_DOMINATORS, bb); + FOR_EACH_VEC_ELT (dom_bbs, i, bb1) + if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) + && bb1->loop_father == bb->loop_father) + { + bb1->aux = bb; + if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) + { + bb->count = MAX (bb->count, bb1->count); + set_bb_annotated (bb, annotated_bb); + } + } + dom_bbs = get_all_dominated_blocks (CDI_POST_DOMINATORS, bb); + FOR_EACH_VEC_ELT (dom_bbs, i, bb1) + if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1) + && bb1->loop_father == bb->loop_father) + { + bb1->aux = bb; + if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) + { + bb->count = MAX (bb->count, bb1->count); + set_bb_annotated (bb, annotated_bb); + } + } + } } /* If a basic block's count is known, and only one of its in/out edges' count - is unknown, its count can be calculated. - Meanwhile, if all of the in/out edges' counts are known, then the basic - block's unknown count can also be calculated. + is unknown, its count can be calculated. Meanwhile, if all of the in/out + edges' counts are known, then the basic block's unknown count can also be + calculated. IS_SUCC is true if out edges of a basic blocks are examined. + Update ANNOTATED_BB and ANNOTATED_EDGE accordingly. Return TRUE if any basic block/edge count is changed. */ static bool -afdo_propagate_edge (bool is_succ) +afdo_propagate_edge (bool is_succ, bb_set *annotated_bb, + edge_set *annotated_edge) { basic_block bb; bool changed = false; FOR_EACH_BB_FN (bb, cfun) - { - edge e, unknown_edge = NULL; - edge_iterator ei; - int num_unknown_edge = 0; - gcov_type total_known_count = 0; - - FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) - if ((e->flags & EDGE_ANNOTATED) == 0) - num_unknown_edge ++, unknown_edge = e; - else - total_known_count += e->count; - - if (num_unknown_edge == 0) - { - if (total_known_count > bb->count) - { - bb->count = total_known_count; - changed = true; - } - if ((bb->flags & BB_ANNOTATED) == 0) - { - bb->flags |= BB_ANNOTATED; - changed = true; - } - } - else if (num_unknown_edge == 1 - && (bb->flags & BB_ANNOTATED) != 0) - { - if (bb->count >= total_known_count) - unknown_edge->count = bb->count - total_known_count; - else - unknown_edge->count = 0; - unknown_edge->flags |= EDGE_ANNOTATED; - changed = true; - } - } + { + edge e, unknown_edge = NULL; + edge_iterator ei; + int num_unknown_edge = 0; + gcov_type total_known_count = 0; + + FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) + if (!is_edge_annotated (e, *annotated_edge)) + num_unknown_edge++, unknown_edge = e; + else + total_known_count += e->count; + + if (num_unknown_edge == 0) + { + if (total_known_count > bb->count) + { + bb->count = total_known_count; + changed = true; + } + if (!is_bb_annotated (bb, *annotated_bb)) + { + set_bb_annotated (bb, annotated_bb); + changed = true; + } + } + else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb)) + { + if (bb->count >= total_known_count) + unknown_edge->count = bb->count - total_known_count; + else + unknown_edge->count = 0; + set_edge_annotated (unknown_edge, annotated_edge); + changed = true; + } + } return changed; } @@ -1260,95 +1404,103 @@ afdo_propagate_edge (bool is_succ) goto BB3 In this case, we need to propagate through PHI to determine the edge - count of BB1->BB.t1, BB.t1->BB.t2. */ + count of BB1->BB.t1, BB.t1->BB.t2. + Update ANNOTATED_EDGE accordingly. */ static void -afdo_propagate_circuit (void) +afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge) { basic_block bb; FOR_ALL_BB_FN (bb, cfun) + { + gimple phi_stmt; + tree cmp_rhs, cmp_lhs; + gimple cmp_stmt = last_stmt (bb); + edge e; + edge_iterator ei; + + if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) + continue; + cmp_rhs = gimple_cond_rhs (cmp_stmt); + cmp_lhs = gimple_cond_lhs (cmp_stmt); + if (!TREE_CONSTANT (cmp_rhs) + || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) + continue; + if (TREE_CODE (cmp_lhs) != SSA_NAME) + continue; + if (!is_bb_annotated (bb, annotated_bb)) + continue; + phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs); + while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN + && gimple_assign_single_p (phi_stmt) + && TREE_CODE (gimple_assign_rhs1 (phi_stmt)) == SSA_NAME) + phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt)); + if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI) + continue; + FOR_EACH_EDGE (e, ei, bb->succs) { - gimple phi_stmt; - tree cmp_rhs, cmp_lhs; - gimple cmp_stmt = last_stmt (bb); - edge e; - edge_iterator ei; - - if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) - continue; - cmp_rhs = gimple_cond_rhs (cmp_stmt); - cmp_lhs = gimple_cond_lhs (cmp_stmt); - if (!TREE_CONSTANT (cmp_rhs) - || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) - continue; - if (TREE_CODE (cmp_lhs) != SSA_NAME) - continue; - if ((bb->flags & BB_ANNOTATED) == 0) - continue; - phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs); - while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN - && gimple_assign_single_p (phi_stmt) - && TREE_CODE (gimple_assign_rhs1 (phi_stmt)) == SSA_NAME) - phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt)); - if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI) - continue; - FOR_EACH_EDGE (e, ei, bb->succs) - { - unsigned i, total = 0; - edge only_one; - bool check_value_one = (((integer_onep (cmp_rhs)) - ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) - ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); - if ((e->flags & EDGE_ANNOTATED) == 0) - continue; - for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) - { - tree val = gimple_phi_arg_def (phi_stmt, i); - edge ep = gimple_phi_arg_edge (phi_stmt, i); - - if (!TREE_CONSTANT (val) || !(integer_zerop (val) - || integer_onep (val))) - continue; - if (check_value_one ^ integer_onep (val)) - continue; - total++; - only_one = ep; - } - if (total == 1 && (only_one->flags & EDGE_ANNOTATED) == 0) - { - only_one->count = e->count; - only_one->flags |= EDGE_ANNOTATED; - } - } + unsigned i, total = 0; + edge only_one; + bool check_value_one = (((integer_onep (cmp_rhs)) + ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) + ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); + if (!is_edge_annotated (e, *annotated_edge)) + continue; + for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) + { + tree val = gimple_phi_arg_def (phi_stmt, i); + edge ep = gimple_phi_arg_edge (phi_stmt, i); + + if (!TREE_CONSTANT (val) + || !(integer_zerop (val) || integer_onep (val))) + continue; + if (check_value_one ^ integer_onep (val)) + continue; + total++; + only_one = ep; + if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge)) + { + ep->probability = 0; + ep->count = 0; + set_edge_annotated (ep, annotated_edge); + } + } + if (total == 1 && !is_edge_annotated (only_one, *annotated_edge)) + { + only_one->probability = e->probability; + only_one->count = e->count; + set_edge_annotated (only_one, annotated_edge); + } } + } } /* Propagate the basic block count and edge count on the control flow graph. We do the propagation iteratively until stablize. */ static void -afdo_propagate (void) +afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge) { basic_block bb; bool changed = true; int i = 0; FOR_ALL_BB_FN (bb, cfun) - { - bb->count = ((basic_block) bb->aux)->count; - if ((((basic_block) bb->aux)->flags & BB_ANNOTATED) != 0) - bb->flags |= BB_ANNOTATED; - } + { + bb->count = ((basic_block)bb->aux)->count; + if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb)) + set_bb_annotated (bb, annotated_bb); + } while (changed && i++ < PARAM_VALUE (PARAM_AUTOFDO_MAX_PROPAGATE_ITERATIONS)) { changed = false; - if (afdo_propagate_edge (true)) - changed = true; - if (afdo_propagate_edge (false)) - changed = true; - afdo_propagate_circuit (); + if (afdo_propagate_edge (true, annotated_bb, annotated_edge)) + changed = true; + if (afdo_propagate_edge (false, annotated_bb, annotated_edge)) + changed = true; + afdo_propagate_circuit (*annotated_bb, annotated_edge); } } @@ -1379,7 +1531,7 @@ get_locus_information (location_t locus, locus_information_t* li) { inline_stack stack; get_inline_stack (locus, &stack); - if (stack.empty ()) + if (stack.is_empty ()) return false; tree function_decl = stack[0].first; @@ -1393,7 +1545,7 @@ get_locus_information (location_t locus, locus_information_t* li) { LOCATION_LINE (DECL_SOURCE_LOCATION (function_decl)); function *f = DECL_STRUCT_FUNCTION (function_decl); unsigned function_length = f? LOCATION_LINE (f->function_end_locus) - - function_lineno : 0; + function_lineno : 0; unsigned branch_offset = li->lineno - function_lineno; int discriminator = get_discriminator_from_locus (locus); @@ -1432,40 +1584,40 @@ record_branch_prediction_results (edge e, int probability) { gimple last = NULL; for (gsi = gsi_last_nondebug_bb (bb); - !gsi_end_p (gsi); - gsi_prev_nondebug (&gsi)) - { - last = gsi_stmt (gsi); + !gsi_end_p (gsi); + gsi_prev_nondebug (&gsi)) + { + last = gsi_stmt (gsi); - if (gimple_has_location (last)) - break; - } + if (gimple_has_location (last)) + break; + } struct locus_information_t li; bool annotated; if (e->flags & EDGE_PREDICTED_BY_EXPECT) - annotated = true; + annotated = true; else - annotated = false; + annotated = false; if (get_locus_information (e->goto_locus, &li)) - ; /* Intentionally do nothing. */ + ; /* Intentionally do nothing. */ else if (get_locus_information (gimple_location (last), &li)) - ; /* Intentionally do nothing. */ + ; /* Intentionally do nothing. */ else - return; /* Can't get locus information, return. */ + return; /* Can't get locus information, return. */ switch_to_section (get_section ( - ".gnu.switches.text.branch.annotation", - SECTION_DEBUG | SECTION_MERGE | - SECTION_STRINGS | (SECTION_ENTSIZE & 1), - NULL)); + ".gnu.switches.text.branch.annotation", + SECTION_DEBUG | SECTION_MERGE | + SECTION_STRINGS | (SECTION_ENTSIZE & 1), + NULL)); char buf[1024]; snprintf (buf, 1024, "%s;%u;" - HOST_WIDEST_INT_PRINT_DEC";%d;%d;%d;%s", - li.filename, li.lineno, bb->count, annotated?1:0, - probability, e->probability, li.hash); + HOST_WIDEST_INT_PRINT_DEC";%d;%d;%d;%s", + li.filename, li.lineno, bb->count, annotated?1:0, + probability, e->probability, li.hash); dw2_asm_output_nstring (buf, (size_t)-1, NULL); } } @@ -1474,14 +1626,14 @@ record_branch_prediction_results (edge e, int probability) { probabilities. */ static void -afdo_calculate_branch_prob (void) +afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge) { basic_block bb; bool has_sample = false; FOR_EACH_BB_FN (bb, cfun) - if (bb->count > 0) - has_sample = true; + if (bb->count > 0) + has_sample = true; if (!has_sample) return; @@ -1490,59 +1642,55 @@ afdo_calculate_branch_prob (void) calculate_dominance_info (CDI_DOMINATORS); loop_optimizer_init (0); - afdo_find_equiv_class (); - afdo_propagate (); + afdo_find_equiv_class (annotated_bb); + afdo_propagate (annotated_bb, annotated_edge); FOR_EACH_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + int num_unknown_succ = 0; + gcov_type total_count = 0; + + FOR_EACH_EDGE (e, ei, bb->succs) { - edge e; - edge_iterator ei; - int num_unknown_succ = 0; - gcov_type total_count = 0; - - FOR_EACH_EDGE (e, ei, bb->succs) - { - if ((e->flags & EDGE_ANNOTATED) == 0) - num_unknown_succ ++; - else - total_count += e->count; - } - if (num_unknown_succ == 0 && total_count > 0) - { - bool first_edge = true; - - FOR_EACH_EDGE (e, ei, bb->succs) - { - double probability = - (double) e->count * REG_BR_PROB_BASE / total_count; - - if (first_edge && flag_check_branch_annotation) - { - record_branch_prediction_results ( - e, static_cast<int> (probability + 0.5)); - first_edge = false; - } - - e->probability = probability; - } - } + if (!is_edge_annotated (e, *annotated_edge)) + num_unknown_succ++; + else + total_count += e->count; + } + if (num_unknown_succ == 0 && total_count > 0) + { + bool first_edge = true; + + FOR_EACH_EDGE (e, ei, bb->succs) + { + int probability = (double) e->count * REG_BR_PROB_BASE / total_count; + + if (first_edge && flag_check_branch_annotation) + { + record_branch_prediction_results (e, probability); + first_edge = false; + } + e->probability = probability; + } } + } FOR_ALL_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->succs) { - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, bb->succs) - { - e->count = - (double) bb->count * e->probability / REG_BR_PROB_BASE; - if (flag_check_branch_annotation) - { - e->flags &= ~EDGE_PREDICTED_BY_EXPECT; - } - } - bb->aux = NULL; + e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE; + if (flag_check_branch_annotation) + { + e->flags &= ~EDGE_PREDICTED_BY_EXPECT; + } } + bb->aux = NULL; + } loop_optimizer_finalize (); free_dominance_info (CDI_DOMINATORS); @@ -1558,49 +1706,48 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts) { basic_block bb; if (afdo_source_profile->get_function_instance_by_decl ( - current_function_decl) == NULL) + current_function_decl) == NULL) return false; bool has_vpt = false; FOR_EACH_BB_FN (bb, cfun) - { - if (!has_indirect_call (bb)) - continue; - gimple_stmt_iterator gsi; + { + if (!has_indirect_call (bb)) + continue; + gimple_stmt_iterator gsi; - gcov_type bb_count = 0; - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - count_info info; - gimple stmt = gsi_stmt (gsi); - if (afdo_source_profile->get_count_info (stmt, &info)) - bb_count = MAX (bb_count, info.count); - } - - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple stmt = gsi_stmt (gsi); - /* IC_promotion and early_inline_2 is done in multiple iterations. - No need to promoted the stmt if its in promoted_stmts (means - it is already been promoted in the previous iterations). */ - if (gimple_code (stmt) != GIMPLE_CALL - || (gimple_call_fn (stmt) != NULL - && TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL) - || promoted_stmts->find (stmt) != promoted_stmts->end ()) - continue; - - count_info info; - afdo_source_profile->get_count_info (stmt, &info); - info.count = bb_count; - if (afdo_source_profile->update_inlined_ind_target (stmt, &info)) - { - /* Promote the indirect call and update the promoted_stmts. */ - promoted_stmts->insert (stmt); - afdo_vpt (&gsi, info.targets); - has_vpt = true; - } - } - } + gcov_type bb_count = 0; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + count_info info; + gimple stmt = gsi_stmt (gsi); + if (afdo_source_profile->get_count_info (stmt, &info)) + bb_count = MAX (bb_count, info.count); + } + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + /* IC_promotion and early_inline_2 is done in multiple iterations. + No need to promoted the stmt if its in promoted_stmts (means + it is already been promoted in the previous iterations). */ + if (gimple_code (stmt) != GIMPLE_CALL || gimple_call_fn (stmt) == NULL + || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL + || promoted_stmts->find (stmt) != promoted_stmts->end ()) + continue; + + count_info info; + afdo_source_profile->get_count_info (stmt, &info); + info.count = bb_count; + if (afdo_source_profile->update_inlined_ind_target (stmt, &info)) + { + /* Promote the indirect call and update the promoted_stmts. */ + promoted_stmts->insert (stmt); + afdo_vpt (&gsi, info.targets); + has_vpt = true; + } + } + } if (has_vpt && gimple_value_profile_transformations ()) { free_dominance_info (CDI_DOMINATORS); @@ -1622,9 +1769,11 @@ static void afdo_annotate_cfg (const stmt_set &promoted_stmts) { basic_block bb; - const function_instance *s = - afdo_source_profile->get_function_instance_by_decl ( - current_function_decl); + bb_set annotated_bb; + edge_set annotated_edge; + const function_instance *s + = afdo_source_profile->get_function_instance_by_decl ( + current_function_decl); if (s == NULL) return; @@ -1633,35 +1782,32 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts) gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; FOR_EACH_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + + bb->count = 0; + FOR_EACH_EDGE (e, ei, bb->succs) + e->count = 0; + + if (afdo_set_bb_count (bb, promoted_stmts)) + set_bb_annotated (bb, &annotated_bb); + if (bb->count > max_count) + max_count = bb->count; + } + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count + > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) { - edge e; - edge_iterator ei; - - bb->count = 0; - bb->flags &= (~BB_ANNOTATED); - FOR_EACH_EDGE (e, ei, bb->succs) - { - e->count = 0; - e->flags &= (~EDGE_ANNOTATED); - } - - bb->count = afdo_get_bb_count (bb, promoted_stmts); - if (bb->count > max_count) - max_count = bb->count; - } - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count > - ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) - { - ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count = - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; - ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->flags |= BB_ANNOTATED; + ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count + = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb); } - if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count > - EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count + > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) { - EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count = - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; - EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->flags |= BB_ANNOTATED; + EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count + = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb); } afdo_source_profile->mark_annotated ( DECL_SOURCE_LOCATION (current_function_decl)); @@ -1669,9 +1815,9 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts) afdo_source_profile->mark_annotated (cfun->function_end_locus); if (max_count > 0) { - profile_status_for_fn (cfun) = PROFILE_READ; - afdo_calculate_branch_prob (); + afdo_calculate_branch_prob (&annotated_bb, &annotated_edge); counts_to_freqs (); + profile_status_for_fn (cfun) = PROFILE_READ; } if (flag_value_profile_transformations) { @@ -1686,7 +1832,8 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts) /* Wrapper function to invoke early inliner. */ -static void early_inline () +static void +early_inline () { compute_inline_parameters (cgraph_get_node (current_function_decl), true); unsigned todo = early_inliner (); @@ -1708,77 +1855,78 @@ auto_profile (void) if (!flag_auto_profile) return 0; - profile_info = autofdo::afdo_profile_info; if (L_IPO_COMP_MODE) lipo_link_and_fixup (); init_node_map (true); + profile_info = autofdo::afdo_profile_info; FOR_EACH_FUNCTION (node) - { - if (!gimple_has_body_p (node->decl)) - continue; - - /* Don't profile functions produced for builtin stuff. */ - if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) - continue; - - push_cfun (DECL_STRUCT_FUNCTION (node->decl)); - - /* First do indirect call promotion and early inline to make the - IR match the profiled binary before actual annotation. - - This is needed because an indirect call might have been promoted - and inlined in the profiled binary. If we do not promote and - inline these indirect calls before annotation, the profile for - these promoted functions will be lost. - - e.g. foo() --indirect_call--> bar() - In profiled binary, the callsite is promoted and inlined, making - the profile look like: - - foo: { - loc_foo_1: count_1 - bar@loc_foo_2: { - loc_bar_1: count_2 - loc_bar_2: count_3 - } - } - - Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. - If we perform annotation on it, the profile inside bar@loc_foo2 - will be wasted. - - To avoid this, we promote loc_foo_2 and inline the promoted bar - function before annotation, so the profile inside bar@loc_foo2 - will be useful. */ - autofdo::stmt_set promoted_stmts; - for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++) - { - if (!flag_value_profile_transformations - || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts)) - break; - early_inline (); - } - - early_inline (); - autofdo::afdo_annotate_cfg (promoted_stmts); - compute_function_frequency (); - - /* Local pure-const may imply need to fixup the cfg. */ - if (execute_fixup_cfg () & TODO_cleanup_cfg) - cleanup_tree_cfg (); + { + if (!gimple_has_body_p (node->decl)) + continue; + + /* Don't profile functions produced for builtin stuff. */ + if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) + continue; + + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + + /* First do indirect call promotion and early inline to make the + IR match the profiled binary before actual annotation. + + This is needed because an indirect call might have been promoted + and inlined in the profiled binary. If we do not promote and + inline these indirect calls before annotation, the profile for + these promoted functions will be lost. + + e.g. foo() --indirect_call--> bar() + In profiled binary, the callsite is promoted and inlined, making + the profile look like: + + foo: { + loc_foo_1: count_1 + bar@loc_foo_2: { + loc_bar_1: count_2 + loc_bar_2: count_3 + } + } + + Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. + If we perform annotation on it, the profile inside bar@loc_foo2 + will be wasted. + + To avoid this, we promote loc_foo_2 and inline the promoted bar + function before annotation, so the profile inside bar@loc_foo2 + will be useful. */ + autofdo::stmt_set promoted_stmts; + for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++) + { + if (!flag_value_profile_transformations + || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts)) + break; + early_inline (); + } - free_dominance_info (CDI_DOMINATORS); - free_dominance_info (CDI_POST_DOMINATORS); - rebuild_cgraph_edges (); - pop_cfun (); - } + early_inline (); + autofdo::afdo_annotate_cfg (promoted_stmts); + compute_function_frequency (); + + /* Local pure-const may imply need to fixup the cfg. */ + if (execute_fixup_cfg () & TODO_cleanup_cfg) + cleanup_tree_cfg (); + + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + rebuild_cgraph_edges (); + compute_inline_parameters (cgraph_get_node (current_function_decl), true); + pop_cfun (); + } if (flag_auto_profile_record_coverage_in_elf) autofdo::afdo_source_profile->write_annotated_count (); return TODO_rebuild_cgraph_edges; } -} /* namespace autofdo. */ +} /* namespace autofdo. */ /* Read the profile from the profile data file. */ @@ -1788,8 +1936,8 @@ init_auto_profile (void) if (auto_profile_file == NULL) auto_profile_file = DEFAULT_AUTO_PROFILE_FILE; - autofdo::afdo_profile_info = (struct gcov_ctr_summary *) - xcalloc (1, sizeof (struct gcov_ctr_summary)); + autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc ( + 1, sizeof (struct gcov_ctr_summary)); autofdo::afdo_profile_info->runs = 1; autofdo::afdo_profile_info->sum_max = 0; autofdo::afdo_profile_info->sum_all = 0; @@ -1817,14 +1965,14 @@ end_auto_profile (void) bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) { - gcov_type count = - autofdo::afdo_source_profile->get_callsite_total_count (edge); + gcov_type count + = autofdo::afdo_source_profile->get_callsite_total_count (edge); if (count > 0) { bool is_hot; const struct gcov_ctr_summary *saved_profile_info = profile_info; /* At earling inline stage, profile_info is not set yet. We need to - temporarily set it to afdo_profile_info to calculate hotness. */ + temporarily set it to afdo_profile_info to calculate hotness. */ profile_info = autofdo::afdo_profile_info; is_hot = maybe_hot_count_p (NULL, count); profile_info = saved_profile_info; diff --git a/gcc-4.9/gcc/c-family/ChangeLog b/gcc-4.9/gcc/c-family/ChangeLog index e3c8c8300..701d1cef7 100644 --- a/gcc-4.9/gcc/c-family/ChangeLog +++ b/gcc-4.9/gcc/c-family/ChangeLog @@ -1,3 +1,29 @@ +2015-01-20 Marek Polacek <polacek@redhat.com> + + Backport from mainline + 2014-06-23 Marek Polacek <polacek@redhat.com> + + PR c/61553 + * c-common.c (get_atomic_generic_size): Don't segfault if the + type doesn't have a size. + +2014-10-30 Release Manager + + * GCC 4.9.2 released. + +2014-10-10 Jakub Jelinek <jakub@redhat.com> + + PR c/63495 + * c-common.c (min_align_of_type): Don't decrease alignment + through BIGGEST_FIELD_ALIGNMENT or ADJUST_FIELD_ALIGN if + TYPE_USER_ALIGN is set. + +2014-10-08 Edward Smith-Rowland <3dw4rd@verizon.net> + + Implement SD-6: SG10 Feature Test Recommendations + * c-cppbuiltin.c (c_cpp_builtins()): Define language feature + macros and the __has_header macro. + 2014-08-12 Igor Zamyatin <igor.zamyatin@intel.com> PR other/61962 diff --git a/gcc-4.9/gcc/c-family/c-common.c b/gcc-4.9/gcc/c-family/c-common.c index 9923928fe..41f81221f 100644 --- a/gcc-4.9/gcc/c-family/c-common.c +++ b/gcc-4.9/gcc/c-family/c-common.c @@ -336,6 +336,7 @@ static tree handle_mode_attribute (tree *, tree, tree, int, bool *); static tree handle_section_attribute (tree *, tree, tree, int, bool *); static tree handle_aligned_attribute (tree *, tree, tree, int, bool *); static tree handle_weak_attribute (tree *, tree, tree, int, bool *) ; +static tree handle_noplt_attribute (tree *, tree, tree, int, bool *) ; static tree handle_alias_ifunc_attribute (bool, tree *, tree, tree, bool *); static tree handle_ifunc_attribute (tree *, tree, tree, int, bool *); static tree handle_alias_attribute (tree *, tree, tree, int, bool *); @@ -673,6 +674,8 @@ const struct attribute_spec c_common_attribute_table[] = handle_aligned_attribute, false }, { "weak", 0, 0, true, false, false, handle_weak_attribute, false }, + { "noplt", 0, 0, true, false, false, + handle_noplt_attribute, false }, { "ifunc", 1, 1, true, false, false, handle_ifunc_attribute, false }, { "alias", 1, 1, true, false, false, @@ -4948,16 +4951,18 @@ min_align_of_type (tree type) { unsigned int align = TYPE_ALIGN (type); align = MIN (align, BIGGEST_ALIGNMENT); + if (!TYPE_USER_ALIGN (type)) + { #ifdef BIGGEST_FIELD_ALIGNMENT - align = MIN (align, BIGGEST_FIELD_ALIGNMENT); + align = MIN (align, BIGGEST_FIELD_ALIGNMENT); #endif - unsigned int field_align = align; + unsigned int field_align = align; #ifdef ADJUST_FIELD_ALIGN - tree field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, - type); - field_align = ADJUST_FIELD_ALIGN (field, field_align); + tree field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, type); + field_align = ADJUST_FIELD_ALIGN (field, field_align); #endif - align = MIN (align, field_align); + align = MIN (align, field_align); + } return align / BITS_PER_UNIT; } @@ -7666,6 +7671,25 @@ handle_weak_attribute (tree *node, tree name, return NULL_TREE; } +/* Handle a "noplt" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +handle_noplt_attribute (tree *node, tree name, + tree ARG_UNUSED (args), + int ARG_UNUSED (flags), + bool * ARG_UNUSED (no_add_attrs)) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, + "%qE attribute is only applicable on functions", name); + *no_add_attrs = true; + return NULL_TREE; + } + return NULL_TREE; +} + /* Handle an "alias" or "ifunc" attribute; arguments as in struct attribute_spec.handler, except that IS_ALIAS tells us whether this is an alias as opposed to ifunc attribute. */ @@ -10454,7 +10478,8 @@ get_atomic_generic_size (location_t loc, tree function, function); return 0; } - size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); + tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); + size = type_size ? tree_to_uhwi (type_size) : 0; if (size != size_0) { error_at (loc, "size mismatch in argument %d of %qE", x + 1, diff --git a/gcc-4.9/gcc/c-family/c-cppbuiltin.c b/gcc-4.9/gcc/c-family/c-cppbuiltin.c index 6a697f666..930ee1a0f 100644 --- a/gcc-4.9/gcc/c-family/c-cppbuiltin.c +++ b/gcc-4.9/gcc/c-family/c-cppbuiltin.c @@ -794,18 +794,66 @@ c_cpp_builtins (cpp_reader *pfile) /* For stddef.h. They require macros defined in c-common.c. */ c_stddef_cpp_builtins (); + /* Set include test macros for all C/C++ (not for just C++11 etc.) + the builtins __has_include__ and __has_include_next__ are defined + in libcpp. */ + cpp_define (pfile, "__has_include(STR)=__has_include__(STR)"); + cpp_define (pfile, "__has_include_next(STR)=__has_include_next__(STR)"); + if (c_dialect_cxx ()) { if (flag_weak && SUPPORTS_ONE_ONLY) cpp_define (pfile, "__GXX_WEAK__=1"); else cpp_define (pfile, "__GXX_WEAK__=0"); + if (warn_deprecated) cpp_define (pfile, "__DEPRECATED"); + if (flag_rtti) cpp_define (pfile, "__GXX_RTTI"); + if (cxx_dialect >= cxx11) cpp_define (pfile, "__GXX_EXPERIMENTAL_CXX0X__"); + + /* Binary literals have been allowed in g++ before C++11 + and were standardized for C++14. */ + if (!pedantic || cxx_dialect > cxx11) + cpp_define (pfile, "__cpp_binary_literals=201304"); + if (cxx_dialect >= cxx11) + { + /* Set feature test macros for C++11 */ + cpp_define (pfile, "__cpp_unicode_characters=200704"); + cpp_define (pfile, "__cpp_raw_strings=200710"); + cpp_define (pfile, "__cpp_unicode_literals=200710"); + cpp_define (pfile, "__cpp_user_defined_literals=200809"); + cpp_define (pfile, "__cpp_lambdas=200907"); + cpp_define (pfile, "__cpp_constexpr=200704"); + cpp_define (pfile, "__cpp_static_assert=200410"); + cpp_define (pfile, "__cpp_decltype=200707"); + cpp_define (pfile, "__cpp_attributes=200809"); + cpp_define (pfile, "__cpp_rvalue_reference=200610"); + cpp_define (pfile, "__cpp_variadic_templates=200704"); + cpp_define (pfile, "__cpp_alias_templates=200704"); + } + if (cxx_dialect > cxx11) + { + /* Set feature test macros for C++14 */ + cpp_define (pfile, "__cpp_return_type_deduction=201304"); + cpp_define (pfile, "__cpp_init_captures=201304"); + cpp_define (pfile, "__cpp_generic_lambdas=201304"); + //cpp_undef (pfile, "__cpp_constexpr"); + //cpp_define (pfile, "__cpp_constexpr=201304"); + cpp_define (pfile, "__cpp_decltype_auto=201304"); + //cpp_define (pfile, "__cpp_aggregate_nsdmi=201304"); + //cpp_define (pfile, "__cpp_variable_templates=201304"); + cpp_define (pfile, "__cpp_digit_separators=201309"); + cpp_define (pfile, "__cpp_attribute_deprecated=201309"); + //cpp_define (pfile, "__cpp_sized_deallocation=201309"); + /* We'll have to see where runtime arrays wind up. + Let's put it in C++14 for now. */ + cpp_define (pfile, "__cpp_runtime_arrays=201304"); + } } /* Note that we define this for C as well, so that we know if __attribute__((cleanup)) will interface with EH. */ diff --git a/gcc-4.9/gcc/c-family/c-ubsan.c b/gcc-4.9/gcc/c-family/c-ubsan.c index e89ebc187..2c0d009a2 100644 --- a/gcc-4.9/gcc/c-family/c-ubsan.c +++ b/gcc-4.9/gcc/c-family/c-ubsan.c @@ -98,19 +98,19 @@ ubsan_instrument_shift (location_t loc, enum tree_code code, tree op1_utype = unsigned_type_for (type1); HOST_WIDE_INT op0_prec = TYPE_PRECISION (type0); tree uprecm1 = build_int_cst (op1_utype, op0_prec - 1); - tree precm1 = build_int_cst (type1, op0_prec - 1); t = fold_convert_loc (loc, op1_utype, op1); t = fold_build2 (GT_EXPR, boolean_type_node, t, uprecm1); /* For signed x << y, in C99/C11, the following: - (unsigned) x >> (precm1 - y) + (unsigned) x >> (uprecm1 - y) if non-zero, is undefined. */ if (code == LSHIFT_EXPR && !TYPE_UNSIGNED (type0) && flag_isoc99) { - tree x = fold_build2 (MINUS_EXPR, integer_type_node, precm1, op1); + tree x = fold_build2 (MINUS_EXPR, unsigned_type_node, uprecm1, + fold_convert (op1_utype, op1)); tt = fold_convert_loc (loc, unsigned_type_for (type0), op0); tt = fold_build2 (RSHIFT_EXPR, TREE_TYPE (tt), tt, x); tt = fold_build2 (NE_EXPR, boolean_type_node, tt, @@ -118,13 +118,14 @@ ubsan_instrument_shift (location_t loc, enum tree_code code, } /* For signed x << y, in C++11/C++14, the following: - x < 0 || ((unsigned) x >> (precm1 - y)) + x < 0 || ((unsigned) x >> (uprecm1 - y)) if > 1, is undefined. */ if (code == LSHIFT_EXPR && !TYPE_UNSIGNED (TREE_TYPE (op0)) && (cxx_dialect == cxx11 || cxx_dialect == cxx1y)) { - tree x = fold_build2 (MINUS_EXPR, integer_type_node, precm1, op1); + tree x = fold_build2 (MINUS_EXPR, unsigned_type_node, uprecm1, + fold_convert (op1_utype, op1)); tt = fold_convert_loc (loc, unsigned_type_for (type0), op0); tt = fold_build2 (RSHIFT_EXPR, TREE_TYPE (tt), tt, x); tt = fold_build2 (GT_EXPR, boolean_type_node, tt, diff --git a/gcc-4.9/gcc/c/ChangeLog b/gcc-4.9/gcc/c/ChangeLog index 6fb49c62c..deb70eb72 100644 --- a/gcc-4.9/gcc/c/ChangeLog +++ b/gcc-4.9/gcc/c/ChangeLog @@ -1,3 +1,22 @@ +2014-10-30 Release Manager + + * GCC 4.9.2 released. + +2014-09-25 Thomas Schwinge <thomas@codesourcery.com> + + PR c++/63249 + * c-parser.c (c_parser_omp_variable_list): Call mark_exp_read + on low_bound and length. + +2014-09-03 Marek Polacek <polacek@redhat.com> + + PR c/62294 + * c-typeck.c (convert_arguments): Get location of a parameter. Change + error and warning calls to error_at and warning_at. Pass location of + a parameter to it. + (convert_for_assignment): Add parameter to WARN_FOR_ASSIGNMENT and + WARN_FOR_QUALIFIERS. Pass expr_loc to those. + 2014-08-22 Igor Zamyatin <igor.zamyatin@intel.com> PR other/62008 diff --git a/gcc-4.9/gcc/c/c-parser.c b/gcc-4.9/gcc/c/c-parser.c index 6ce277c9b..264c17026 100644 --- a/gcc-4.9/gcc/c/c-parser.c +++ b/gcc-4.9/gcc/c/c-parser.c @@ -9764,7 +9764,10 @@ c_parser_omp_variable_list (c_parser *parser, c_parser_consume_token (parser); if (!c_parser_next_token_is (parser, CPP_COLON)) - low_bound = c_parser_expression (parser).value; + { + low_bound = c_parser_expression (parser).value; + mark_exp_read (low_bound); + } if (c_parser_next_token_is (parser, CPP_CLOSE_SQUARE)) length = integer_one_node; else @@ -9777,7 +9780,10 @@ c_parser_omp_variable_list (c_parser *parser, break; } if (!c_parser_next_token_is (parser, CPP_CLOSE_SQUARE)) - length = c_parser_expression (parser).value; + { + length = c_parser_expression (parser).value; + mark_exp_read (length); + } } /* Look for the closing `]'. */ if (!c_parser_require (parser, CPP_CLOSE_SQUARE, diff --git a/gcc-4.9/gcc/c/c-typeck.c b/gcc-4.9/gcc/c/c-typeck.c index 5838d6a72..d096ad405 100644 --- a/gcc-4.9/gcc/c/c-typeck.c +++ b/gcc-4.9/gcc/c/c-typeck.c @@ -3071,6 +3071,12 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist, bool excess_precision = false; bool npc; tree parmval; + /* Some __atomic_* builtins have additional hidden argument at + position 0. */ + location_t ploc + = !arg_loc.is_empty () && values->length () == arg_loc.length () + ? expansion_point_location_if_in_system_header (arg_loc[parmnum]) + : input_location; if (type == void_type_node) { @@ -3113,7 +3119,8 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist, if (type == error_mark_node || !COMPLETE_TYPE_P (type)) { - error ("type of formal parameter %d is incomplete", parmnum + 1); + error_at (ploc, "type of formal parameter %d is incomplete", + parmnum + 1); parmval = val; } else @@ -3128,34 +3135,34 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist, if (INTEGRAL_TYPE_P (type) && TREE_CODE (valtype) == REAL_TYPE) - warning (0, "passing argument %d of %qE as integer " - "rather than floating due to prototype", - argnum, rname); + warning_at (ploc, 0, "passing argument %d of %qE as " + "integer rather than floating due to " + "prototype", argnum, rname); if (INTEGRAL_TYPE_P (type) && TREE_CODE (valtype) == COMPLEX_TYPE) - warning (0, "passing argument %d of %qE as integer " - "rather than complex due to prototype", - argnum, rname); + warning_at (ploc, 0, "passing argument %d of %qE as " + "integer rather than complex due to " + "prototype", argnum, rname); else if (TREE_CODE (type) == COMPLEX_TYPE && TREE_CODE (valtype) == REAL_TYPE) - warning (0, "passing argument %d of %qE as complex " - "rather than floating due to prototype", - argnum, rname); + warning_at (ploc, 0, "passing argument %d of %qE as " + "complex rather than floating due to " + "prototype", argnum, rname); else if (TREE_CODE (type) == REAL_TYPE && INTEGRAL_TYPE_P (valtype)) - warning (0, "passing argument %d of %qE as floating " - "rather than integer due to prototype", - argnum, rname); + warning_at (ploc, 0, "passing argument %d of %qE as " + "floating rather than integer due to " + "prototype", argnum, rname); else if (TREE_CODE (type) == COMPLEX_TYPE && INTEGRAL_TYPE_P (valtype)) - warning (0, "passing argument %d of %qE as complex " - "rather than integer due to prototype", - argnum, rname); + warning_at (ploc, 0, "passing argument %d of %qE as " + "complex rather than integer due to " + "prototype", argnum, rname); else if (TREE_CODE (type) == REAL_TYPE && TREE_CODE (valtype) == COMPLEX_TYPE) - warning (0, "passing argument %d of %qE as floating " - "rather than complex due to prototype", - argnum, rname); + warning_at (ploc, 0, "passing argument %d of %qE as " + "floating rather than complex due to " + "prototype", argnum, rname); /* ??? At some point, messages should be written about conversions between complex types, but that's too messy to do now. */ @@ -3166,9 +3173,10 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist, since without a prototype it would be `double'. */ if (formal_prec == TYPE_PRECISION (float_type_node) && type != dfloat32_type_node) - warning (0, "passing argument %d of %qE as %<float%> " - "rather than %<double%> due to prototype", - argnum, rname); + warning_at (ploc, 0, + "passing argument %d of %qE as %<float%> " + "rather than %<double%> due to prototype", + argnum, rname); /* Warn if mismatch between argument and prototype for decimal float types. Warn of conversions with @@ -3191,9 +3199,10 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist, || (type == dfloat64_type_node && (valtype != dfloat32_type_node)))) - warning (0, "passing argument %d of %qE as %qT " - "rather than %qT due to prototype", - argnum, rname, type, valtype); + warning_at (ploc, 0, + "passing argument %d of %qE as %qT " + "rather than %qT due to prototype", + argnum, rname, type, valtype); } /* Detect integer changing in width or signedness. @@ -3212,10 +3221,10 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist, and the actual arg is that enum type. */ ; else if (formal_prec != TYPE_PRECISION (type1)) - warning (OPT_Wtraditional_conversion, - "passing argument %d of %qE " - "with different width due to prototype", - argnum, rname); + warning_at (ploc, OPT_Wtraditional_conversion, + "passing argument %d of %qE " + "with different width due to prototype", + argnum, rname); else if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (type1)) ; /* Don't complain if the formal parameter type @@ -3236,14 +3245,15 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist, && TYPE_UNSIGNED (valtype)) ; else if (TYPE_UNSIGNED (type)) - warning (OPT_Wtraditional_conversion, - "passing argument %d of %qE " - "as unsigned due to prototype", - argnum, rname); + warning_at (ploc, OPT_Wtraditional_conversion, + "passing argument %d of %qE " + "as unsigned due to prototype", + argnum, rname); else - warning (OPT_Wtraditional_conversion, - "passing argument %d of %qE " - "as signed due to prototype", argnum, rname); + warning_at (ploc, OPT_Wtraditional_conversion, + "passing argument %d of %qE " + "as signed due to prototype", + argnum, rname); } } @@ -3252,13 +3262,7 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist, if (excess_precision) val = build1 (EXCESS_PRECISION_EXPR, valtype, val); origtype = (!origtypes) ? NULL_TREE : (*origtypes)[parmnum]; - bool arg_loc_ok = !arg_loc.is_empty () - /* Some __atomic_* builtins have additional - hidden argument at position 0. */ - && values->length () == arg_loc.length (); - parmval = convert_for_assignment (loc, - arg_loc_ok ? arg_loc[parmnum] - : UNKNOWN_LOCATION, type, + parmval = convert_for_assignment (loc, ploc, type, val, origtype, ic_argpass, npc, fundecl, function, parmnum + 1); @@ -3282,10 +3286,10 @@ convert_arguments (location_t loc, vec<location_t> arg_loc, tree typelist, { /* Convert `float' to `double'. */ if (warn_double_promotion && !c_inhibit_evaluation_warnings) - warning_at (arg_loc[parmnum], OPT_Wdouble_promotion, - "implicit conversion from %qT to %qT when passing " - "argument to function", - valtype, double_type_node); + warning_at (ploc, OPT_Wdouble_promotion, + "implicit conversion from %qT to %qT when passing " + "argument to function", + valtype, double_type_node); parmval = convert (double_type_node, val); } } @@ -5591,14 +5595,14 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, /* This macro is used to emit diagnostics to ensure that all format strings are complete sentences, visible to gettext and checked at compile time. */ -#define WARN_FOR_ASSIGNMENT(LOCATION, OPT, AR, AS, IN, RE) \ +#define WARN_FOR_ASSIGNMENT(LOCATION, PLOC, OPT, AR, AS, IN, RE) \ do { \ switch (errtype) \ { \ case ic_argpass: \ - if (pedwarn (LOCATION, OPT, AR, parmnum, rname)) \ + if (pedwarn (PLOC, OPT, AR, parmnum, rname)) \ inform ((fundecl && !DECL_IS_BUILTIN (fundecl)) \ - ? DECL_SOURCE_LOCATION (fundecl) : LOCATION, \ + ? DECL_SOURCE_LOCATION (fundecl) : PLOC, \ "expected %qT but argument is of type %qT", \ type, rhstype); \ break; \ @@ -5621,22 +5625,22 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, compile time. It is the same as WARN_FOR_ASSIGNMENT but with an extra parameter to enumerate qualifiers. */ -#define WARN_FOR_QUALIFIERS(LOCATION, OPT, AR, AS, IN, RE, QUALS) \ +#define WARN_FOR_QUALIFIERS(LOCATION, PLOC, OPT, AR, AS, IN, RE, QUALS) \ do { \ switch (errtype) \ { \ case ic_argpass: \ - if (pedwarn (LOCATION, OPT, AR, parmnum, rname, QUALS)) \ + if (pedwarn (PLOC, OPT, AR, parmnum, rname, QUALS)) \ inform ((fundecl && !DECL_IS_BUILTIN (fundecl)) \ - ? DECL_SOURCE_LOCATION (fundecl) : LOCATION, \ + ? DECL_SOURCE_LOCATION (fundecl) : PLOC, \ "expected %qT but argument is of type %qT", \ type, rhstype); \ break; \ case ic_assign: \ - pedwarn (LOCATION, OPT, AS, QUALS); \ + pedwarn (LOCATION, OPT, AS, QUALS); \ break; \ case ic_init: \ - pedwarn (LOCATION, OPT, IN, QUALS); \ + pedwarn (LOCATION, OPT, IN, QUALS); \ break; \ case ic_return: \ pedwarn (LOCATION, OPT, RE, QUALS); \ @@ -5688,7 +5692,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, && TREE_CODE (type) == ENUMERAL_TYPE && TYPE_MAIN_VARIANT (checktype) != TYPE_MAIN_VARIANT (type)) { - WARN_FOR_ASSIGNMENT (input_location, OPT_Wc___compat, + WARN_FOR_ASSIGNMENT (input_location, expr_loc, OPT_Wc___compat, G_("enum conversion when passing argument " "%d of %qE is invalid in C++"), G_("enum conversion in assignment is " @@ -5851,7 +5855,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, vice-versa. */ if (TYPE_QUALS_NO_ADDR_SPACE (ttl) & ~TYPE_QUALS_NO_ADDR_SPACE (ttr)) - WARN_FOR_QUALIFIERS (location, 0, + WARN_FOR_QUALIFIERS (location, expr_loc, 0, G_("passing argument %d of %qE " "makes %q#v qualified function " "pointer from unqualified"), @@ -5867,7 +5871,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, } else if (TYPE_QUALS_NO_ADDR_SPACE (ttr) & ~TYPE_QUALS_NO_ADDR_SPACE (ttl)) - WARN_FOR_QUALIFIERS (location, 0, + WARN_FOR_QUALIFIERS (location, expr_loc, 0, G_("passing argument %d of %qE discards " "%qv qualifier from pointer target type"), G_("assignment discards %qv qualifier " @@ -6029,7 +6033,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, (VOID_TYPE_P (ttr) && !null_pointer_constant && TREE_CODE (ttl) == FUNCTION_TYPE))) - WARN_FOR_ASSIGNMENT (location, OPT_Wpedantic, + WARN_FOR_ASSIGNMENT (location, expr_loc, OPT_Wpedantic, G_("ISO C forbids passing argument %d of " "%qE between function pointer " "and %<void *%>"), @@ -6048,7 +6052,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, if (TYPE_QUALS_NO_ADDR_SPACE_NO_ATOMIC (ttr) & ~TYPE_QUALS_NO_ADDR_SPACE_NO_ATOMIC (ttl)) { - WARN_FOR_QUALIFIERS (location, 0, + WARN_FOR_QUALIFIERS (location, expr_loc, 0, G_("passing argument %d of %qE discards " "%qv qualifier from pointer target type"), G_("assignment discards %qv qualifier " @@ -6066,7 +6070,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, ; /* If there is a mismatch, do warn. */ else if (warn_pointer_sign) - WARN_FOR_ASSIGNMENT (location, OPT_Wpointer_sign, + WARN_FOR_ASSIGNMENT (location, expr_loc, OPT_Wpointer_sign, G_("pointer targets in passing argument " "%d of %qE differ in signedness"), G_("pointer targets in assignment " @@ -6085,7 +6089,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, where an ordinary one is wanted, but not vice-versa. */ if (TYPE_QUALS_NO_ADDR_SPACE (ttl) & ~TYPE_QUALS_NO_ADDR_SPACE (ttr)) - WARN_FOR_QUALIFIERS (location, 0, + WARN_FOR_QUALIFIERS (location, expr_loc, 0, G_("passing argument %d of %qE makes " "%q#v qualified function pointer " "from unqualified"), @@ -6101,7 +6105,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, else /* Avoid warning about the volatile ObjC EH puts on decls. */ if (!objc_ok) - WARN_FOR_ASSIGNMENT (location, 0, + WARN_FOR_ASSIGNMENT (location, expr_loc, 0, G_("passing argument %d of %qE from " "incompatible pointer type"), G_("assignment from incompatible pointer type"), @@ -6124,7 +6128,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, or one that results from arithmetic, even including a cast to integer type. */ if (!null_pointer_constant) - WARN_FOR_ASSIGNMENT (location, 0, + WARN_FOR_ASSIGNMENT (location, expr_loc, 0, G_("passing argument %d of %qE makes " "pointer from integer without a cast"), G_("assignment makes pointer from integer " @@ -6138,7 +6142,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, } else if (codel == INTEGER_TYPE && coder == POINTER_TYPE) { - WARN_FOR_ASSIGNMENT (location, 0, + WARN_FOR_ASSIGNMENT (location, expr_loc, 0, G_("passing argument %d of %qE makes integer " "from pointer without a cast"), G_("assignment makes integer from pointer " diff --git a/gcc-4.9/gcc/calls.c b/gcc-4.9/gcc/calls.c index f0c92ddc0..5d7df8b63 100644 --- a/gcc-4.9/gcc/calls.c +++ b/gcc-4.9/gcc/calls.c @@ -184,6 +184,18 @@ prepare_call_address (tree fndecl, rtx funexp, rtx static_chain_value, && targetm.small_register_classes_for_mode_p (FUNCTION_MODE)) ? force_not_mem (memory_address (FUNCTION_MODE, funexp)) : memory_address (FUNCTION_MODE, funexp)); + else if (flag_pic + && fndecl + && TREE_CODE (fndecl) == FUNCTION_DECL + && (!flag_plt + || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl))) + && !targetm.binds_local_p (fndecl)) + { + /* This is done only for PIC code. There is no easy interface to force the + function address into GOT for non-PIC case. non-PIC case needs to be + handled specially by the backend. */ + funexp = force_reg (Pmode, funexp); + } else if (! sibcallp) { #ifndef NO_FUNCTION_CSE diff --git a/gcc-4.9/gcc/cfg-flags.def b/gcc-4.9/gcc/cfg-flags.def index a28ba34b5..afd239463 100644 --- a/gcc-4.9/gcc/cfg-flags.def +++ b/gcc-4.9/gcc/cfg-flags.def @@ -93,9 +93,8 @@ DEF_BASIC_BLOCK_FLAG(VISITED, 13) demand, and is available after calling compute_transaction_bits(). */ DEF_BASIC_BLOCK_FLAG(IN_TRANSACTION, 14) -/* Set on blocks that has been annotated during AutoFDO profile - attribution. */ -DEF_BASIC_BLOCK_FLAG(ANNOTATED, 15) +/* Set on blocks that fp could be used freely. */ +DEF_BASIC_BLOCK_FLAG(FP_IS_FREE, 15) #endif #ifdef DEF_EDGE_FLAG @@ -180,9 +179,6 @@ DEF_EDGE_FLAG(TM_UNINSTRUMENTED, 15) /* Abort (over) edge out of a GIMPLE_TRANSACTION statement. */ DEF_EDGE_FLAG(TM_ABORT, 16) -/* Annotated during AutoFDO profile attribution. */ -DEF_EDGE_FLAG(ANNOTATED, 17) - /* Edge probability predicted by __builtin_expect. */ DEF_EDGE_FLAG(PREDICTED_BY_EXPECT, 18) diff --git a/gcc-4.9/gcc/cfgcleanup.c b/gcc-4.9/gcc/cfgcleanup.c index de307da54..53ea30028 100644 --- a/gcc-4.9/gcc/cfgcleanup.c +++ b/gcc-4.9/gcc/cfgcleanup.c @@ -1174,7 +1174,7 @@ old_insns_match_p (int mode ATTRIBUTE_UNUSED, rtx i1, rtx i2) && DECL_FUNCTION_CODE (SYMBOL_REF_DECL (symbol)) >= BUILT_IN_ASAN_REPORT_LOAD1 && DECL_FUNCTION_CODE (SYMBOL_REF_DECL (symbol)) - <= BUILT_IN_ASAN_REPORT_STORE16) + <= BUILT_IN_ASAN_STOREN) return dir_none; } } diff --git a/gcc-4.9/gcc/cfgloop.c b/gcc-4.9/gcc/cfgloop.c index 14693f1c9..e4b60f5d8 100644 --- a/gcc-4.9/gcc/cfgloop.c +++ b/gcc-4.9/gcc/cfgloop.c @@ -1875,7 +1875,8 @@ record_niter_bound (struct loop *loop, double_int i_bound, bool realistic, } if (realistic && (!loop->any_estimate - || i_bound.ult (loop->nb_iterations_estimate))) + || (!flag_auto_profile && + i_bound.ult (loop->nb_iterations_estimate)))) { loop->any_estimate = true; loop->nb_iterations_estimate = i_bound; diff --git a/gcc-4.9/gcc/cfgrtl.c b/gcc-4.9/gcc/cfgrtl.c index 2c5ca2ac1..ed1c37cd4 100644 --- a/gcc-4.9/gcc/cfgrtl.c +++ b/gcc-4.9/gcc/cfgrtl.c @@ -1453,7 +1453,24 @@ emit_barrier_after_bb (basic_block bb) gcc_assert (current_ir_type () == IR_RTL_CFGRTL || current_ir_type () == IR_RTL_CFGLAYOUT); if (current_ir_type () == IR_RTL_CFGLAYOUT) - BB_FOOTER (bb) = unlink_insn_chain (barrier, barrier); + { + rtx insn = unlink_insn_chain (barrier, barrier); + + if (BB_FOOTER (bb)) + { + rtx footer_tail = BB_FOOTER (bb); + + while (NEXT_INSN (footer_tail)) + footer_tail = NEXT_INSN (footer_tail); + if (!BARRIER_P (footer_tail)) + { + NEXT_INSN (footer_tail) = insn; + PREV_INSN (insn) = footer_tail; + } + } + else + BB_FOOTER (bb) = insn; + } } /* Like force_nonfallthru below, but additionally performs redirection @@ -1761,6 +1778,22 @@ rtl_tidy_fallthru_edge (edge e) && (any_uncondjump_p (q) || single_succ_p (b))) { + rtx label, table; + + if (tablejump_p (q, &label, &table)) + { + /* The label is likely mentioned in some instruction before + the tablejump and might not be DCEd, so turn it into + a note instead and move before the tablejump that is going to + be deleted. */ + const char *name = LABEL_NAME (label); + PUT_CODE (label, NOTE); + NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; + NOTE_DELETED_LABEL_NAME (label) = name; + reorder_insns (label, label, PREV_INSN (q)); + delete_insn (table); + } + #ifdef HAVE_cc0 /* If this was a conditional jump, we need to also delete the insn that set cc0. */ @@ -2481,7 +2514,6 @@ rtl_verify_edges (void) | EDGE_LOOP_EXIT | EDGE_CROSSING | EDGE_PRESERVE - | EDGE_ANNOTATED | EDGE_PREDICTED_BY_EXPECT)) == 0) n_branch++; diff --git a/gcc-4.9/gcc/cgraph.c b/gcc-4.9/gcc/cgraph.c index 63b7b4d60..532d09b28 100644 --- a/gcc-4.9/gcc/cgraph.c +++ b/gcc-4.9/gcc/cgraph.c @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple-pretty-print.h" #include "expr.h" #include "tree-dfa.h" +#include "opts.h" /* FIXME: Only for PROP_loops, but cgraph shouldn't have to know about this. */ #include "tree-pass.h" @@ -546,6 +547,7 @@ cgraph_create_node (tree decl) node->next_nested = node->origin->nested; node->origin->nested = node; } + pattern_match_function_attributes (decl); return node; } @@ -2462,6 +2464,11 @@ cgraph_can_remove_if_no_direct_calls_and_refs_p (struct cgraph_node *node) /* Extern inlines can always go, we will use the external definition. */ if (DECL_EXTERNAL (node->decl)) return true; + /* Aux functions are safe to remove, but only once static promotion is + complete since they may affect promoted names if they are the context + for any static variables. */ + if (cgraph_pre_profiling_inlining_done && cgraph_is_aux_decl_external (node)) + return true; /* When function is needed, we can not remove it. */ if (node->force_output || node->used_from_other_partition) return false; diff --git a/gcc-4.9/gcc/cgraph.h b/gcc-4.9/gcc/cgraph.h index ee32bcb6b..414c2c9d0 100644 --- a/gcc-4.9/gcc/cgraph.h +++ b/gcc-4.9/gcc/cgraph.h @@ -816,6 +816,7 @@ void cgraph_unnest_node (struct cgraph_node *); enum availability cgraph_function_body_availability (struct cgraph_node *); void cgraph_add_new_function (tree, bool); +void cgraph_analyze_function (struct cgraph_node *); const char* cgraph_inline_failed_string (cgraph_inline_failed_t); cgraph_inline_failed_type_t cgraph_inline_failed_type (cgraph_inline_failed_t); diff --git a/gcc-4.9/gcc/cgraphbuild.c b/gcc-4.9/gcc/cgraphbuild.c index c6b04be5a..879a77a44 100644 --- a/gcc-4.9/gcc/cgraphbuild.c +++ b/gcc-4.9/gcc/cgraphbuild.c @@ -665,6 +665,45 @@ record_references_in_initializer (tree decl, bool only_vars) pointer_set_destroy (visited_nodes); } +typedef struct _fixup_decl_info { + tree orig_decl; + tree new_decl; +} fixup_decl_info; + +/* Check the tree at TP to see if it contains the original decl stored in + DATA and if so replace it with the new decl. If original decl is + found set WALK_SUBTREES to 0 so the subtree under TP is not traversed. + Returns the updated parent tree T or NULL if no update performed. */ + +static tree +fixup_all_refs_1 (tree *tp, int *walk_subtrees, void *data) +{ + tree t = *tp; + fixup_decl_info *info = (fixup_decl_info *) data; + + /* The original function decl is always the first tree operand. */ + if (TREE_OPERAND (t,0) == info->orig_decl) + { + TREE_OPERAND (t,0) = info->new_decl; + *walk_subtrees = 0; + return t; + } + return NULL_TREE; +} + +/* Walk the whole tree rooted at TP and invoke fixup_all_refs_1 to + replace any references to the original decl with the new decl + stored in INFO. */ + +static inline void +fixup_all_refs (tree *tp, fixup_decl_info *info) +{ + tree t = walk_tree (tp, fixup_all_refs_1, info, NULL); + /* This is invoked when we found the original decl, so we expect + to have replaced a reference. */ + gcc_assert (t != NULL_TREE); +} + /* Update any function decl references in base ADDR of operand OP to refer to the resolved node. */ @@ -674,13 +713,16 @@ fixup_ref (gimple, tree addr, tree op) addr = get_base_address (addr); if (addr && TREE_CODE (addr) == FUNCTION_DECL) { - gcc_assert (TREE_CODE (op) == ADDR_EXPR); - gcc_assert (TREE_OPERAND (op,0) == addr); struct cgraph_node *real_callee; real_callee = cgraph_lipo_get_resolved_node (addr); if (addr == real_callee->decl) return false; - TREE_OPERAND (op,0) = real_callee->decl; + /* We need to locate and update the tree operand within OP + that contains ADDR and update it to the real callee's decl. */ + fixup_decl_info info; + info.orig_decl = addr; + info.new_decl = real_callee->decl; + fixup_all_refs (&op, &info); } return false; } diff --git a/gcc-4.9/gcc/cgraphclones.c b/gcc-4.9/gcc/cgraphclones.c index 9fec2a04d..c23509c28 100644 --- a/gcc-4.9/gcc/cgraphclones.c +++ b/gcc-4.9/gcc/cgraphclones.c @@ -315,6 +315,11 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node) if (thunk_of->thunk.thunk_p) node = duplicate_thunk_for_node (thunk_of, node); + /* We need to copy arguments, at LTO these mat not be read from function + section. */ + if (!DECL_ARGUMENTS (thunk->decl)) + cgraph_get_body (thunk); + struct cgraph_edge *cs; for (cs = node->callers; cs; cs = cs->next_caller) if (cs->caller->thunk.thunk_p @@ -339,6 +344,22 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node) node->clone.args_to_skip, false); } + + tree *link = &DECL_ARGUMENTS (new_decl); + int i = 0; + for (tree pd = DECL_ARGUMENTS (thunk->decl); pd; pd = DECL_CHAIN (pd), i++) + { + if (!node->clone.args_to_skip + || !bitmap_bit_p (node->clone.args_to_skip, i)) + { + tree nd = copy_node (pd); + DECL_CONTEXT (nd) = new_decl; + *link = nd; + link = &DECL_CHAIN (nd); + } + } + *link = NULL_TREE; + gcc_checking_assert (!DECL_STRUCT_FUNCTION (new_decl)); gcc_checking_assert (!DECL_INITIAL (new_decl)); gcc_checking_assert (!DECL_RESULT (new_decl)); @@ -363,6 +384,11 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node) cgraph_call_edge_duplication_hooks (thunk->callees, e); if (!expand_thunk (new_thunk, false)) new_thunk->analyzed = true; + else + { + new_thunk->thunk.thunk_p = false; + cgraph_analyze_function (new_thunk); + } cgraph_call_node_duplication_hooks (thunk, new_thunk); return new_thunk; } diff --git a/gcc-4.9/gcc/cgraphunit.c b/gcc-4.9/gcc/cgraphunit.c index bff2833a1..6ddc7fb10 100644 --- a/gcc-4.9/gcc/cgraphunit.c +++ b/gcc-4.9/gcc/cgraphunit.c @@ -221,7 +221,6 @@ cgraph_node_set cgraph_new_nodes; static void expand_all_functions (void); static void mark_functions_to_output (void); static void expand_function (struct cgraph_node *); -static void analyze_function (struct cgraph_node *); static void handle_alias_pairs (void); FILE *cgraph_dump_file; @@ -340,7 +339,7 @@ cgraph_process_new_functions (void) gimple_register_cfg_hooks (); if (!node->analyzed) - analyze_function (node); + cgraph_analyze_function (node); push_cfun (DECL_STRUCT_FUNCTION (fndecl)); if (cgraph_state == CGRAPH_STATE_IPA_SSA && !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (fndecl))) @@ -551,7 +550,7 @@ cgraph_add_new_function (tree fndecl, bool lowered) if (lowered) node->lowered = true; node->definition = true; - analyze_function (node); + cgraph_analyze_function (node); push_cfun (DECL_STRUCT_FUNCTION (fndecl)); gimple_register_cfg_hooks (); bitmap_obstack_initialize (NULL); @@ -607,8 +606,8 @@ output_asm_statements (void) } /* Analyze the function scheduled to be output. */ -static void -analyze_function (struct cgraph_node *node) +void +cgraph_analyze_function (struct cgraph_node *node) { tree decl = node->decl; location_t saved_loc = input_location; @@ -1024,7 +1023,7 @@ analyze_functions (void) } if (!cnode->analyzed) - analyze_function (cnode); + cgraph_analyze_function (cnode); for (edge = cnode->callees; edge; edge = edge->next_callee) if (edge->callee->definition) @@ -1049,7 +1048,7 @@ analyze_functions (void) if (DECL_ABSTRACT_ORIGIN (decl)) { struct cgraph_node *origin_node - = cgraph_get_node (DECL_ABSTRACT_ORIGIN (decl)); + = cgraph_get_create_node (DECL_ABSTRACT_ORIGIN (decl)); origin_node->used_as_abstract_origin = true; enqueue_node (origin_node); } @@ -1181,7 +1180,7 @@ handle_alias_pairs (void) /* We use local aliases for C++ thunks to force the tailcall to bind locally. This is a hack - to keep it working do the following (which is not strictly correct). */ - && (! TREE_CODE (target_node->decl) == FUNCTION_DECL + && (TREE_CODE (target_node->decl) != FUNCTION_DECL || ! DECL_VIRTUAL_P (target_node->decl)) && ! lookup_attribute ("weakref", DECL_ATTRIBUTES (p->decl))) { diff --git a/gcc-4.9/gcc/collect2.c b/gcc-4.9/gcc/collect2.c index 5a2ec0b5d..ddc190487 100644 --- a/gcc-4.9/gcc/collect2.c +++ b/gcc-4.9/gcc/collect2.c @@ -1345,6 +1345,12 @@ main (int argc, char **argv) ld1--; ld2--; } + else if (strncmp (arg, "-fno-lto", 8) == 0) + { + /* Do not pass -fno-lto to the linker. */ + ld1--; + ld2--; + } #ifdef TARGET_AIX_VERSION else { diff --git a/gcc-4.9/gcc/combine.c b/gcc-4.9/gcc/combine.c index 7c0045205..adea2c161 100644 --- a/gcc-4.9/gcc/combine.c +++ b/gcc-4.9/gcc/combine.c @@ -1529,8 +1529,8 @@ setup_incoming_promotions (rtx first) uns3 = TYPE_UNSIGNED (DECL_ARG_TYPE (arg)); /* The mode and signedness of the argument as it is actually passed, - after any TARGET_PROMOTE_FUNCTION_ARGS-driven ABI promotions. */ - mode3 = promote_function_mode (DECL_ARG_TYPE (arg), mode2, &uns3, + see assign_parm_setup_reg in function.c. */ + mode3 = promote_function_mode (TREE_TYPE (arg), mode1, &uns3, TREE_TYPE (cfun->decl), 0); /* The mode of the register in which the argument is being passed. */ diff --git a/gcc-4.9/gcc/common.opt b/gcc-4.9/gcc/common.opt index 40607738a..d4e989b40 100644 --- a/gcc-4.9/gcc/common.opt +++ b/gcc-4.9/gcc/common.opt @@ -322,6 +322,9 @@ Common Alias(Wpedantic) -pedantic-errors Common Alias(pedantic-errors) +-no-pie +Driver Alias(no-pie) + -pie Driver Alias(pie) @@ -1210,10 +1213,23 @@ fdwarf2-cfi-asm Common Report Var(flag_dwarf2_cfi_asm) Init(HAVE_GAS_CFI_DIRECTIVE) Enable CFI tables via GAS assembler directives. +ftwo-level-all-subprogs +Common Report Var(flag_two_level_all_subprogs) Init(0) +When generating two-level line tables in DWARF (experimental), +add linkage names for all functions (not just inlined functions). + +ftwo-level-line-tables +Common Report Var(flag_two_level_line_tables) Init(0) +Use two-level line tables in DWARF (experimental). + fripa Common Report Var(flag_dyn_ipa) Perform Dynamic Inter-Procedural Analysis. +fripa-allow-debug +Common Report Var(flag_dyn_ipa_allow_debug) Init(0) +Allow -g enablement for -fripa -fprofile-generate compiles. + fripa-disallow-asm-modules Common Report Var(flag_ripa_disallow_asm_modules) Don't import an auxiliary module if it contains asm statements @@ -1324,6 +1340,10 @@ Enum(fp_contract_mode) String(on) Value(FP_CONTRACT_OFF) EnumValue Enum(fp_contract_mode) String(fast) Value(FP_CONTRACT_FAST) +ffunction-attribute-list= +Common Joined RejectNegative Var(common_deferred_options) Defer +-ffunction-attribute-list=attribute:name,... Add attribute to named functions + ; Nonzero means don't put addresses of constant functions in registers. ; Used for compiling the Unix kernel, where strange substitutions are ; done on the assembly output. @@ -1715,6 +1735,10 @@ fomit-frame-pointer Common Report Var(flag_omit_frame_pointer) Optimization When possible do not generate stack frames +fshrink-wrap-frame-pointer +Common Report Var(flag_shrink_wrap_frame_pointer) Optimization +Framepointer shrinkwrapping optimization. + fopt-info Common Report Var(flag_opt_info) Optimization Enable all optimization info dumps on stderr @@ -1787,6 +1811,10 @@ fpie Common Report Var(flag_pie,1) Negative(fPIC) Generate position-independent code for executables if possible (small mode) +fplt +Common Report Var(flag_plt) Init(1) Optimization +Use PLT for PIC calls (-fno-plt: load the address from GOT at call site) + fplugin= Common Joined RejectNegative Var(common_deferred_options) Defer Specify a plugin to load @@ -1845,6 +1873,10 @@ fprofile-generate-sampling Common Var(flag_profile_generate_sampling) Turn on instrumentation sampling with -fprofile-generate with rate set by --param profile-generate-sampling-rate or environment variable GCOV_SAMPLING_RATE +fprofile-generate-buildinfo= +Common RejectNegative Joined Var(flag_profile_generate_buildinfo) +-fprofile-generate-buildinfo=filename Read build info to include in gcda file from filename + femit-function-names Common Var(flag_emit_function_names) Print to stderr the mapping from module name and function id to assembler @@ -1909,6 +1941,11 @@ fregmove Common Ignore Does nothing. Preserved for backward compatibility. +flifetime-dse +Common Report Var(flag_lifetime_dse) Init(1) Optimization +Tell DSE that the storage for a C++ object is dead when the constructor +starts and when the destructor finishes. + flive-range-shrinkage Common Report Var(flag_live_range_shrinkage) Init(0) Optimization Relief of register pressure through live range shrinkage @@ -2638,6 +2675,10 @@ ggdb Common JoinedOrMissing Generate debug information in default extended format +gline-tables-only +Common RejectNegative Var(debug_line_tables_only) Init(0) +Generate DWARF line number tables and no other debug sections + gno-pubnames Common Negative(gpubnames) Var(debug_generate_pub_sections, 0) Init(-1) Don't generate DWARF pubnames and pubtypes sections. @@ -2845,7 +2886,7 @@ x Driver Joined Separate shared -Driver RejectNegative Negative(pie) +Driver RejectNegative Negative(no-pie) Create a shared library shared-libgcc @@ -2889,6 +2930,10 @@ Driver symbolic Driver +no-pie +Driver RejectNegative Negative(pie) +Create a position dependent executable + pie Driver RejectNegative Negative(shared) Create a position independent executable diff --git a/gcc-4.9/gcc/config.gcc b/gcc-4.9/gcc/config.gcc index 9f68a8efe..925658a82 100644 --- a/gcc-4.9/gcc/config.gcc +++ b/gcc-4.9/gcc/config.gcc @@ -791,7 +791,13 @@ case ${target} in ;; *-*-rtems*) case ${enable_threads} in - yes) thread_file='rtems' ;; + "" | yes | rtems) thread_file='rtems' ;; + posix) thread_file='posix' ;; + no) ;; + *) + echo 'Unknown thread configuration for RTEMS' + exit 1 + ;; esac tmake_file="${tmake_file} t-rtems" extra_options="${extra_options} rtems.opt" @@ -1038,12 +1044,6 @@ arm*-*-linux-*) # ARM GNU/Linux with ELF tmake_file="$tmake_file arm/t-linux-androideabi" ;; esac - # Pull in spec changes for GRTE configurations. - case ${target} in - *-grte*) - tm_file="${tm_file} linux-grte.h arm/linux-grte.h" - ;; - esac # The BPABI long long divmod functions return a 128-bit value in # registers r0-r3. Correctly modeling that requires the use of # TImode. @@ -1463,12 +1463,6 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i else tm_file="${tm_file} i386/gnu-user-common.h i386/gnu-user.h i386/linux-common.h i386/linux.h" fi - # Pull in spec changes for GRTE configurations. - case ${target} in - *-grte*) - tm_file="${tm_file} linux-grte.h" - ;; - esac ;; i[34567]86-*-knetbsd*-gnu) tm_file="${tm_file} i386/gnu-user-common.h i386/gnu-user.h knetbsd-gnu.h i386/knetbsd-gnu.h" @@ -1493,12 +1487,6 @@ x86_64-*-linux* | x86_64-*-kfreebsd*-gnu | x86_64-*-knetbsd*-gnu) extra_options="${extra_options} linux-android.opt" # Assume modern glibc default_gnu_indirect_function=yes - # Pull in spec changes for GRTE configurations. - case ${target} in - *-grte*) - tm_file="${tm_file} linux-grte.h" - ;; - esac ;; x86_64-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h i386/kfreebsd-gnu64.h" @@ -2417,12 +2405,6 @@ powerpc*-*-linux*) if test x${enable_secureplt} = xyes; then tm_file="rs6000/secureplt.h ${tm_file}" fi - # Pull in spec changes for GRTE configurations. - case ${target} in - *-grte*) - tm_file="${tm_file} rs6000/linux-grte.h" - ;; - esac ;; powerpc-wrs-vxworks|powerpc-wrs-vxworksae) tm_file="${tm_file} elfos.h freebsd-spec.h rs6000/sysv4.h" @@ -2751,7 +2733,7 @@ sparc-*-elf*) ;; sparc-*-rtems*) tm_file="${tm_file} dbxelf.h elfos.h sparc/sysv4.h sparc/sp-elf.h sparc/rtemself.h rtems.h newlib-stdint.h" - tmake_file="${tmake_file} sparc/t-sparc sparc/t-elf sparc/t-rtems" + tmake_file="${tmake_file} sparc/t-sparc sparc/t-rtems" ;; sparc-*-linux*) tm_file="${tm_file} dbxelf.h elfos.h sparc/sysv4.h gnu-user.h linux.h glibc-stdint.h sparc/tso.h" @@ -3294,6 +3276,9 @@ if test x$with_cpu = x ; then *-leon[3-9]*) with_cpu=leon3 ;; + *-leon[3-9]v7*) + with_cpu=leon3v7 + ;; *) with_cpu="`echo ${target} | sed 's/-.*$//'`" ;; @@ -3629,20 +3614,17 @@ case "${target}" in ;; esac - case "$with_fpu" in - "" \ - | vfp | vfp3 | vfpv3 \ - | vfpv3-fp16 | vfpv3-d16 | vfpv3-d16-fp16 | vfpv3xd \ - | vfpv3xd-fp16 | neon | neon-fp16 | vfpv4 | vfpv4-d16 \ - | fpv4-sp-d16 | neon-vfpv4 | fp-arm-v8 | neon-fp-armv8 \ - | crypto-neon-fp-armv8) - # OK - ;; - *) - echo "Unknown fpu used in --with-fpu=$with_fpu" 2>&1 - exit 1 - ;; - esac + # see if it matches any of the entries in arm-fpus.def + if [ x"$with_fpu" = x ] \ + || grep "^ARM_FPU(\"$with_fpu\"," \ + ${srcdir}/config/arm/arm-fpus.def \ + > /dev/null; then + # OK + true + else + echo "Unknown fpu used in --with-fpu=$with_fpu" 1>&2 + exit 1 + fi case "$with_abi" in "" \ @@ -4111,7 +4093,7 @@ case "${target}" in case ${val} in "" | sparc | sparcv9 | sparc64 \ | v7 | cypress \ - | v8 | supersparc | hypersparc | leon | leon3 \ + | v8 | supersparc | hypersparc | leon | leon3 | leon3v7 \ | sparclite | f930 | f934 | sparclite86x \ | sparclet | tsc701 \ | v9 | ultrasparc | ultrasparc3 | niagara | niagara2 \ diff --git a/gcc-4.9/gcc/config.in b/gcc-4.9/gcc/config.in index 8527ea7d7..7883eb362 100644 --- a/gcc-4.9/gcc/config.in +++ b/gcc-4.9/gcc/config.in @@ -1223,6 +1223,12 @@ #endif +/* Define if isl_schedule_constraints_compute_schedule exists. */ +#ifndef USED_FOR_TARGET +#undef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE +#endif + + /* Define to 1 if you have the `kill' function. */ #ifndef USED_FOR_TARGET #undef HAVE_KILL @@ -1327,6 +1333,12 @@ #endif +/* Define 0/1 if your linker supports -pie option with copy reloc. */ +#ifndef USED_FOR_TARGET +#undef HAVE_LD_PIE_COPYRELOC +#endif + + /* Define if your linker links a mix of read-only and read-write sections into a read-write section. */ #ifndef USED_FOR_TARGET diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c index 55cfe0ab2..a5af874bf 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c @@ -371,6 +371,12 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { enum aarch64_builtins { AARCH64_BUILTIN_MIN, + + AARCH64_BUILTIN_GET_FPCR, + AARCH64_BUILTIN_SET_FPCR, + AARCH64_BUILTIN_GET_FPSR, + AARCH64_BUILTIN_SET_FPSR, + AARCH64_SIMD_BUILTIN_BASE, #include "aarch64-simd-builtins.def" AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE @@ -752,6 +758,24 @@ aarch64_init_simd_builtins (void) void aarch64_init_builtins (void) { + tree ftype_set_fpr + = build_function_type_list (void_type_node, unsigned_type_node, NULL); + tree ftype_get_fpr + = build_function_type_list (unsigned_type_node, NULL); + + aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] + = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, + AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] + = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, + AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] + = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, + AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] + = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, + AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); + if (TARGET_SIMD) aarch64_init_simd_builtins (); } @@ -964,6 +988,36 @@ aarch64_expand_builtin (tree exp, { tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); int fcode = DECL_FUNCTION_CODE (fndecl); + int icode; + rtx pat, op0; + tree arg0; + + switch (fcode) + { + case AARCH64_BUILTIN_GET_FPCR: + case AARCH64_BUILTIN_SET_FPCR: + case AARCH64_BUILTIN_GET_FPSR: + case AARCH64_BUILTIN_SET_FPSR: + if ((fcode == AARCH64_BUILTIN_GET_FPCR) + || (fcode == AARCH64_BUILTIN_GET_FPSR)) + { + icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ? + CODE_FOR_get_fpsr : CODE_FOR_get_fpcr; + target = gen_reg_rtx (SImode); + pat = GEN_FCN (icode) (target); + } + else + { + target = NULL_RTX; + icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ? + CODE_FOR_set_fpsr : CODE_FOR_set_fpcr; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + pat = GEN_FCN (icode) (op0); + } + emit_insn (pat); + return target; + } if (fcode >= AARCH64_SIMD_BUILTIN_BASE) return aarch64_simd_expand_builtin (fcode, exp, target); @@ -1196,6 +1250,106 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) return changed; } +void +aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + const unsigned AARCH64_FE_INVALID = 1; + const unsigned AARCH64_FE_DIVBYZERO = 2; + const unsigned AARCH64_FE_OVERFLOW = 4; + const unsigned AARCH64_FE_UNDERFLOW = 8; + const unsigned AARCH64_FE_INEXACT = 16; + const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID + | AARCH64_FE_DIVBYZERO + | AARCH64_FE_OVERFLOW + | AARCH64_FE_UNDERFLOW + | AARCH64_FE_INEXACT); + const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8; + tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr; + tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr; + tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr; + tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv; + + /* Generate the equivalence of : + unsigned int fenv_cr; + fenv_cr = __builtin_aarch64_get_fpcr (); + + unsigned int fenv_sr; + fenv_sr = __builtin_aarch64_get_fpsr (); + + Now set all exceptions to non-stop + unsigned int mask_cr + = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT); + unsigned int masked_cr; + masked_cr = fenv_cr & mask_cr; + + And clear all exception flags + unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT; + unsigned int masked_cr; + masked_sr = fenv_sr & mask_sr; + + __builtin_aarch64_set_cr (masked_cr); + __builtin_aarch64_set_sr (masked_sr); */ + + fenv_cr = create_tmp_var (unsigned_type_node, NULL); + fenv_sr = create_tmp_var (unsigned_type_node, NULL); + + get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]; + set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]; + get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]; + set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]; + + mask_cr = build_int_cst (unsigned_type_node, + ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT)); + mask_sr = build_int_cst (unsigned_type_node, + ~(AARCH64_FE_ALL_EXCEPT)); + + ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node, + fenv_cr, build_call_expr (get_fpcr, 0)); + ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node, + fenv_sr, build_call_expr (get_fpsr, 0)); + + masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr); + masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr); + + hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr); + hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr); + + hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr, + hold_fnclex_sr); + masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr, + masked_fenv_sr); + ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr); + + *hold = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), + hold_fnclex); + + /* Store the value of masked_fenv to clear the exceptions: + __builtin_aarch64_set_fpsr (masked_fenv_sr); */ + + *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr); + + /* Generate the equivalent of : + unsigned int new_fenv_var; + new_fenv_var = __builtin_aarch64_get_fpsr (); + + __builtin_aarch64_set_fpsr (fenv_sr); + + __atomic_feraiseexcept (new_fenv_var); */ + + new_fenv_var = create_tmp_var (unsigned_type_node, NULL); + reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, + new_fenv_var, build_call_expr (get_fpsr, 0)); + restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr); + atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); + update_call = build_call_expr (atomic_feraiseexcept, 1, + fold_convert (integer_type_node, new_fenv_var)); + *update = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, + reload_fenv, restore_fnenv), update_call); +} + + #undef AARCH64_CHECK_BUILTIN_MODE #undef AARCH64_FIND_FRINT_VARIANT #undef BUILTIN_DX diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h index eafdd551d..bb5c88d53 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h @@ -33,6 +33,14 @@ " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" #endif +#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT +#define CA53_ERR_835769_SPEC \ + " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" +#else +#define CA53_ERR_835769_SPEC \ + " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" +#endif + #ifndef LINK_SPEC #define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X \ -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h index b77becd23..651abe3ce 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h @@ -50,7 +50,16 @@ #define LINUX_TARGET_LINK_SPEC LINUX_TARGET_LINK_SPEC0 CA53_ERR_835769_SPEC -#define LINK_SPEC LINUX_TARGET_LINK_SPEC +#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT +#define CA53_ERR_835769_SPEC \ + " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" +#else +#define CA53_ERR_835769_SPEC \ + " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" +#endif + +#define LINK_SPEC LINUX_TARGET_LINK_SPEC \ + CA53_ERR_835769_SPEC #define TARGET_OS_CPP_BUILTINS() \ do \ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h index bef58bf71..8b0a70538 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h @@ -291,4 +291,5 @@ extern bool aarch64_madd_needs_nop (rtx); extern void aarch64_final_prescan_insn (rtx); extern bool aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); +void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); #endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md index 851e77a02..7626ed31f 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md @@ -934,6 +934,41 @@ [(set_attr "type" "neon_minmax<q>")] ) +(define_expand "<su><maxmin>v2di3" + [(parallel [ + (set (match_operand:V2DI 0 "register_operand" "") + (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "") + (match_operand:V2DI 2 "register_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_SIMD" +{ + enum rtx_code cmp_operator; + rtx cmp_fmt; + + switch (<CODE>) + { + case UMIN: + cmp_operator = LTU; + break; + case SMIN: + cmp_operator = LT; + break; + case UMAX: + cmp_operator = GTU; + break; + case SMAX: + cmp_operator = GT; + break; + default: + gcc_unreachable (); + } + + cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); + emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1], + operands[2], cmp_fmt, operands[1], operands[2])); + DONE; +}) + ;; vec_concat gives a new vector with the low elements from operand 1, and ;; the high elements from operand 2. That is to say, given op1 = { a, b } ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. @@ -4565,8 +4600,8 @@ }) (define_insn "*aarch64_simd_ld1r<mode>" - [(set (match_operand:VALLDI 0 "register_operand" "=w") - (vec_duplicate:VALLDI + [(set (match_operand:VALL 0 "register_operand" "=w") + (vec_duplicate:VALL (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))] "TARGET_SIMD" "ld1r\\t{%0.<Vtype>}, %1" diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c index 2ff6c7cb8..029c54ca3 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64.c @@ -3874,7 +3874,7 @@ aarch64_print_operand_address (FILE *f, rtx x) switch (GET_CODE (x)) { case PRE_INC: - asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], GET_MODE_SIZE (aarch64_memory_reference_mode)); return; case POST_INC: @@ -5152,7 +5152,6 @@ aarch64_parse_cpu (void) if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0) { selected_cpu = cpu; - selected_tune = cpu; aarch64_isa_flags = selected_cpu->flags; if (ext != NULL) @@ -5248,9 +5247,8 @@ aarch64_override_options (void) gcc_assert (selected_cpu); - /* The selected cpu may be an architecture, so lookup tuning by core ID. */ if (!selected_tune) - selected_tune = &all_cores[selected_cpu->core]; + selected_tune = selected_cpu; aarch64_tune_flags = selected_tune->flags; aarch64_tune = selected_tune->core; @@ -7194,7 +7192,7 @@ aarch64_expand_vector_init (rtx target, rtx vals) x = XVECEXP (vals, 0, 0); if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) n_var = 1, one_var = 0; - + for (i = 1; i < n_elts; ++i) { x = XVECEXP (vals, 0, i); @@ -8642,6 +8640,10 @@ aarch64_cannot_change_mode_class (enum machine_mode from, #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ aarch64_autovectorize_vector_sizes +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \ + aarch64_atomic_assign_expand_fenv + /* Section anchor support. */ #undef TARGET_MIN_ANCHOR_OFFSET diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md index 319f80591..05f5e1b35 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64.md @@ -107,6 +107,10 @@ (define_c_enum "unspecv" [ UNSPECV_EH_RETURN ; Represent EH_RETURN + UNSPECV_GET_FPCR ; Represent fetch of FPCR content. + UNSPECV_SET_FPCR ; Represent assign of FPCR content. + UNSPECV_GET_FPSR ; Represent fetch of FPSR content. + UNSPECV_SET_FPSR ; Represent assign of FPSR content. ] ) @@ -1102,7 +1106,7 @@ add\\t%x0, %x1, %x2 sub\\t%x0, %x1, #%n2 add\\t%d0, %d1, %d2" - [(set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg") + [(set_attr "type" "alu_imm,alu_reg,alu_imm,neon_add") (set_attr "simd" "*,*,*,yes")] ) @@ -2782,7 +2786,7 @@ ;; Logical right shift using SISD or Integer instruction (define_insn "*aarch64_lshr_sisd_or_int_<mode>3" - [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + [(set (match_operand:GPI 0 "register_operand" "=w,&w,r") (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "w,w,r") (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))] @@ -2801,11 +2805,13 @@ (match_operand:DI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))] - "" + (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_USHL))] + { + operands[3] = gen_lowpart (QImode, operands[0]); + } ) (define_split @@ -2814,11 +2820,13 @@ (match_operand:SI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))] - "" + (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_USHL_2S))] + { + operands[3] = gen_lowpart (QImode, operands[0]); + } ) ;; Arithmetic right shift using SISD or Integer instruction @@ -3642,6 +3650,37 @@ DONE; }) +;; Write Floating-point Control Register. +(define_insn "set_fpcr" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)] + "" + "msr\\tfpcr, %0\;isb" + [(set_attr "type" "mrs")]) + +;; Read Floating-point Control Register. +(define_insn "get_fpcr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))] + "" + "mrs\\t%0, fpcr" + [(set_attr "type" "mrs")]) + +;; Write Floating-point Status Register. +(define_insn "set_fpsr" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)] + "" + "msr\\tfpsr, %0" + [(set_attr "type" "mrs")]) + +;; Read Floating-point Status Register. +(define_insn "get_fpsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))] + "" + "mrs\\t%0, fpsr" + [(set_attr "type" "mrs")]) + + ;; AdvSIMD Stuff (include "aarch64-simd.md") diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h index c01669b2c..ae0ae9c1b 100644 --- a/gcc-4.9/gcc/config/aarch64/arm_neon.h +++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h @@ -39,9 +39,6 @@ typedef __builtin_aarch64_simd_hi int16x4_t typedef __builtin_aarch64_simd_si int32x2_t __attribute__ ((__vector_size__ (8))); typedef int64_t int64x1_t; -typedef int32_t int32x1_t; -typedef int16_t int16x1_t; -typedef int8_t int8x1_t; typedef double float64x1_t; typedef __builtin_aarch64_simd_sf float32x2_t __attribute__ ((__vector_size__ (8))); @@ -56,9 +53,6 @@ typedef __builtin_aarch64_simd_uhi uint16x4_t typedef __builtin_aarch64_simd_usi uint32x2_t __attribute__ ((__vector_size__ (8))); typedef uint64_t uint64x1_t; -typedef uint32_t uint32x1_t; -typedef uint16_t uint16x1_t; -typedef uint8_t uint8x1_t; typedef __builtin_aarch64_simd_qi int8x16_t __attribute__ ((__vector_size__ (16))); typedef __builtin_aarch64_simd_hi int16x8_t @@ -8400,7 +8394,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) #define vmull_high_lane_s16(a, b, c) \ __extension__ \ ({ \ - int16x8_t b_ = (b); \ + int16x4_t b_ = (b); \ int16x8_t a_ = (a); \ int32x4_t result; \ __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ @@ -8413,7 +8407,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) #define vmull_high_lane_s32(a, b, c) \ __extension__ \ ({ \ - int32x4_t b_ = (b); \ + int32x2_t b_ = (b); \ int32x4_t a_ = (a); \ int64x2_t result; \ __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ @@ -8426,7 +8420,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) #define vmull_high_lane_u16(a, b, c) \ __extension__ \ ({ \ - uint16x8_t b_ = (b); \ + uint16x4_t b_ = (b); \ uint16x8_t a_ = (a); \ uint32x4_t result; \ __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ @@ -8439,7 +8433,7 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) #define vmull_high_lane_u32(a, b, c) \ __extension__ \ ({ \ - uint32x4_t b_ = (b); \ + uint32x2_t b_ = (b); \ uint32x4_t a_ = (a); \ uint64x2_t result; \ __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ @@ -20925,42 +20919,42 @@ vqabsq_s64 (int64x2_t __a) return (int64x2_t) __builtin_aarch64_sqabsv2di (__a); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqabsb_s8 (int8x1_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqabsb_s8 (int8_t __a) { - return (int8x1_t) __builtin_aarch64_sqabsqi (__a); + return (int8_t) __builtin_aarch64_sqabsqi (__a); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqabsh_s16 (int16x1_t __a) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqabsh_s16 (int16_t __a) { - return (int16x1_t) __builtin_aarch64_sqabshi (__a); + return (int16_t) __builtin_aarch64_sqabshi (__a); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqabss_s32 (int32x1_t __a) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqabss_s32 (int32_t __a) { - return (int32x1_t) __builtin_aarch64_sqabssi (__a); + return (int32_t) __builtin_aarch64_sqabssi (__a); } /* vqadd */ -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqaddb_s8 (int8x1_t __a, int8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqaddb_s8 (int8_t __a, int8_t __b) { - return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b); + return (int8_t) __builtin_aarch64_sqaddqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqaddh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqaddh_s16 (int16_t __a, int16_t __b) { - return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b); + return (int16_t) __builtin_aarch64_sqaddhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqadds_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqadds_s32 (int32_t __a, int32_t __b) { - return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b); + return (int32_t) __builtin_aarch64_sqaddsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -20969,22 +20963,22 @@ vqaddd_s64 (int64x1_t __a, int64x1_t __b) return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqaddb_u8 (uint8x1_t __a, uint8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqaddb_u8 (uint8_t __a, uint8_t __b) { - return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqaddh_u16 (uint16x1_t __a, uint16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqaddh_u16 (uint16_t __a, uint16_t __b) { - return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqadds_u32 (uint32x1_t __a, uint32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqadds_u32 (uint32_t __a, uint32_t __b) { - return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -21095,26 +21089,26 @@ vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c) { return __builtin_aarch64_sqdmlalhi (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) +vqdmlals_s32 (int64x1_t __a, int32_t __b, int32_t __c) { return __builtin_aarch64_sqdmlalsi (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) +vqdmlals_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d); } @@ -21221,26 +21215,26 @@ vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c) { return __builtin_aarch64_sqdmlslhi (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) +vqdmlsls_s32 (int64x1_t __a, int32_t __b, int32_t __c) { return __builtin_aarch64_sqdmlslsi (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) +vqdmlsls_lane_s32 (int64x1_t __a, int32_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d); } @@ -21271,26 +21265,26 @@ vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqdmulhh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqdmulhh_s16 (int16_t __a, int16_t __b) { - return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b); + return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmulhs_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmulhs_s32 (int32_t __a, int32_t __b) { - return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b); + return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); } @@ -21393,26 +21387,26 @@ vqdmull_n_s32 (int32x2_t __a, int32_t __b) return __builtin_aarch64_sqdmull_nv2si (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmullh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmullh_s16 (int16_t __a, int16_t __b) { - return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b); + return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmulls_s32 (int32x1_t __a, int32x1_t __b) +vqdmulls_s32 (int32_t __a, int32_t __b) { return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) +vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); } @@ -21455,40 +21449,40 @@ vqmovn_u64 (uint64x2_t __a) return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqmovnh_s16 (int16x1_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqmovnh_s16 (int16_t __a) { - return (int8x1_t) __builtin_aarch64_sqmovnhi (__a); + return (int8_t) __builtin_aarch64_sqmovnhi (__a); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqmovns_s32 (int32x1_t __a) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqmovns_s32 (int32_t __a) { - return (int16x1_t) __builtin_aarch64_sqmovnsi (__a); + return (int16_t) __builtin_aarch64_sqmovnsi (__a); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqmovnd_s64 (int64x1_t __a) { - return (int32x1_t) __builtin_aarch64_sqmovndi (__a); + return (int32_t) __builtin_aarch64_sqmovndi (__a); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqmovnh_u16 (uint16x1_t __a) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqmovnh_u16 (uint16_t __a) { - return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a); + return (uint8_t) __builtin_aarch64_uqmovnhi (__a); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqmovns_u32 (uint32x1_t __a) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqmovns_u32 (uint32_t __a) { - return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a); + return (uint16_t) __builtin_aarch64_uqmovnsi (__a); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vqmovnd_u64 (uint64x1_t __a) { - return (uint32x1_t) __builtin_aarch64_uqmovndi (__a); + return (uint32_t) __builtin_aarch64_uqmovndi (__a); } /* vqmovun */ @@ -21511,22 +21505,22 @@ vqmovun_s64 (int64x2_t __a) return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqmovunh_s16 (int16x1_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqmovunh_s16 (int16_t __a) { - return (int8x1_t) __builtin_aarch64_sqmovunhi (__a); + return (int8_t) __builtin_aarch64_sqmovunhi (__a); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqmovuns_s32 (int32x1_t __a) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqmovuns_s32 (int32_t __a) { - return (int16x1_t) __builtin_aarch64_sqmovunsi (__a); + return (int16_t) __builtin_aarch64_sqmovunsi (__a); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqmovund_s64 (int64x1_t __a) { - return (int32x1_t) __builtin_aarch64_sqmovundi (__a); + return (int32_t) __builtin_aarch64_sqmovundi (__a); } /* vqneg */ @@ -21537,22 +21531,22 @@ vqnegq_s64 (int64x2_t __a) return (int64x2_t) __builtin_aarch64_sqnegv2di (__a); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqnegb_s8 (int8x1_t __a) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqnegb_s8 (int8_t __a) { - return (int8x1_t) __builtin_aarch64_sqnegqi (__a); + return (int8_t) __builtin_aarch64_sqnegqi (__a); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqnegh_s16 (int16x1_t __a) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqnegh_s16 (int16_t __a) { - return (int16x1_t) __builtin_aarch64_sqneghi (__a); + return (int16_t) __builtin_aarch64_sqneghi (__a); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqnegs_s32 (int32x1_t __a) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqnegs_s32 (int32_t __a) { - return (int32x1_t) __builtin_aarch64_sqnegsi (__a); + return (int32_t) __builtin_aarch64_sqnegsi (__a); } /* vqrdmulh */ @@ -21581,26 +21575,26 @@ vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmulhh_s16 (int16_t __a, int16_t __b) { - return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b); + return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmulhs_s32 (int32_t __a, int32_t __b) { - return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b); + return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); } @@ -21703,20 +21697,20 @@ vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqrshlb_s8 (int8x1_t __a, int8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqrshlb_s8 (int8_t __a, int8_t __b) { return __builtin_aarch64_sqrshlqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrshlh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrshlh_s16 (int16_t __a, int16_t __b) { return __builtin_aarch64_sqrshlhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqrshls_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrshls_s32 (int32_t __a, int32_t __b) { return __builtin_aarch64_sqrshlsi (__a, __b); } @@ -21727,22 +21721,22 @@ vqrshld_s64 (int64x1_t __a, int64x1_t __b) return __builtin_aarch64_sqrshldi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqrshlb_u8 (uint8_t __a, uint8_t __b) { - return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqrshlh_u16 (uint16_t __a, uint16_t __b) { - return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqrshls_u32 (uint32x1_t __a, uint32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqrshls_u32 (uint32_t __a, uint32_t __b) { - return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -21789,40 +21783,40 @@ vqrshrn_n_u64 (uint64x2_t __a, const int __b) return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqrshrnh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqrshrnh_n_s16 (int16_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b); + return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrshrns_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrshrns_n_s32 (int32_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b); + return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqrshrnd_n_s64 (int64x1_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b); + return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqrshrnh_n_u16 (uint16x1_t __a, const int __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqrshrnh_n_u16 (uint16_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b); + return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqrshrns_n_u32 (uint32x1_t __a, const int __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqrshrns_n_u32 (uint32_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b); + return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vqrshrnd_n_u64 (uint64x1_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b); + return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b); } /* vqrshrun */ @@ -21845,22 +21839,22 @@ vqrshrun_n_s64 (int64x2_t __a, const int __b) return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqrshrunh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqrshrunh_n_s16 (int16_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b); + return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrshruns_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrshruns_n_s32 (int32_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b); + return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqrshrund_n_s64 (int64x1_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b); + return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b); } /* vqshl */ @@ -21961,20 +21955,20 @@ vqshlq_u64 (uint64x2_t __a, int64x2_t __b) return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshlb_s8 (int8x1_t __a, int8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshlb_s8 (int8_t __a, int8_t __b) { return __builtin_aarch64_sqshlqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshlh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshlh_s16 (int16_t __a, int16_t __b) { return __builtin_aarch64_sqshlhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqshls_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqshls_s32 (int32_t __a, int32_t __b) { return __builtin_aarch64_sqshlsi (__a, __b); } @@ -21985,22 +21979,22 @@ vqshld_s64 (int64x1_t __a, int64x1_t __b) return __builtin_aarch64_sqshldi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqshlb_u8 (uint8x1_t __a, uint8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqshlb_u8 (uint8_t __a, uint8_t __b) { - return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqshlh_u16 (uint16x1_t __a, uint16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqshlh_u16 (uint16_t __a, uint16_t __b) { - return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqshls_u32 (uint32x1_t __a, uint32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqshls_u32 (uint32_t __a, uint32_t __b) { - return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -22105,22 +22099,22 @@ vqshlq_n_u64 (uint64x2_t __a, const int __b) return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshlb_n_s8 (int8x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshlb_n_s8 (int8_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b); + return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshlh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshlh_n_s16 (int16_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b); + return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqshls_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqshls_n_s32 (int32_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b); + return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -22129,22 +22123,22 @@ vqshld_n_s64 (int64x1_t __a, const int __b) return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqshlb_n_u8 (uint8x1_t __a, const int __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqshlb_n_u8 (uint8_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqshlh_n_u16 (uint16x1_t __a, const int __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqshlh_n_u16 (uint16_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqshls_n_u32 (uint32x1_t __a, const int __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqshls_n_u32 (uint32_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -22203,22 +22197,22 @@ vqshluq_n_s64 (int64x2_t __a, const int __b) return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshlub_n_s8 (int8x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshlub_n_s8 (int8_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b); + return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshluh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshluh_n_s16 (int16_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b); + return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqshlus_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqshlus_n_s32 (int32_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b); + return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -22265,40 +22259,40 @@ vqshrn_n_u64 (uint64x2_t __a, const int __b) return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshrnh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshrnh_n_s16 (int16_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b); + return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshrns_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshrns_n_s32 (int32_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b); + return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqshrnd_n_s64 (int64x1_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b); + return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqshrnh_n_u16 (uint16x1_t __a, const int __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqshrnh_n_u16 (uint16_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b); + return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqshrns_n_u32 (uint32x1_t __a, const int __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqshrns_n_u32 (uint32_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b); + return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vqshrnd_n_u64 (uint64x1_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b); + return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b); } /* vqshrun */ @@ -22321,42 +22315,42 @@ vqshrun_n_s64 (int64x2_t __a, const int __b) return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqshrunh_n_s16 (int16x1_t __a, const int __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqshrunh_n_s16 (int16_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b); + return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqshruns_n_s32 (int32x1_t __a, const int __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqshruns_n_s32 (int32_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b); + return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) vqshrund_n_s64 (int64x1_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b); + return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b); } /* vqsub */ -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vqsubb_s8 (int8x1_t __a, int8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vqsubb_s8 (int8_t __a, int8_t __b) { - return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b); + return (int8_t) __builtin_aarch64_sqsubqi (__a, __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqsubh_s16 (int16x1_t __a, int16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqsubh_s16 (int16_t __a, int16_t __b) { - return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b); + return (int16_t) __builtin_aarch64_sqsubhi (__a, __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqsubs_s32 (int32x1_t __a, int32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqsubs_s32 (int32_t __a, int32_t __b) { - return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b); + return (int32_t) __builtin_aarch64_sqsubsi (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -22365,22 +22359,22 @@ vqsubd_s64 (int64x1_t __a, int64x1_t __b) return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vqsubb_u8 (uint8x1_t __a, uint8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vqsubb_u8 (uint8_t __a, uint8_t __b) { - return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b); + return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vqsubh_u16 (uint16x1_t __a, uint16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vqsubh_u16 (uint16_t __a, uint16_t __b) { - return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b); + return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vqsubs_u32 (uint32x1_t __a, uint32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vqsubs_u32 (uint32_t __a, uint32_t __b) { - return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b); + return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -23596,22 +23590,22 @@ vsqaddq_u64 (uint64x2_t __a, int64x2_t __b) (int64x2_t) __b); } -__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) -vsqaddb_u8 (uint8x1_t __a, int8x1_t __b) +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vsqaddb_u8 (uint8_t __a, int8_t __b) { - return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b); + return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b); } -__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) -vsqaddh_u16 (uint16x1_t __a, int16x1_t __b) +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vsqaddh_u16 (uint16_t __a, int16_t __b) { - return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b); + return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b); } -__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) -vsqadds_u32 (uint32x1_t __a, int32x1_t __b) +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vsqadds_u32 (uint32_t __a, int32_t __b) { - return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b); + return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) @@ -25251,22 +25245,22 @@ vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); } -__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -vuqaddb_s8 (int8x1_t __a, uint8x1_t __b) +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vuqaddb_s8 (int8_t __a, uint8_t __b) { - return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b); + return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b); } -__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vuqaddh_s16 (int16x1_t __a, uint16x1_t __b) +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vuqaddh_s16 (int16_t __a, uint16_t __b) { - return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b); + return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b); } -__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vuqadds_s32 (int32x1_t __a, uint32x1_t __b) +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vuqadds_s32 (int32_t __a, uint32_t __b) { - return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b); + return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c index d5c7908be..19ae3665a 100644 --- a/gcc-4.9/gcc/config/alpha/alpha.c +++ b/gcc-4.9/gcc/config/alpha/alpha.c @@ -9918,12 +9918,6 @@ alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1, #undef TARGET_EXPAND_BUILTIN_VA_START #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start -/* The Alpha architecture does not require sequential consistency. See - http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html - for an example of how it can be violated in practice. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE alpha_option_override diff --git a/gcc-4.9/gcc/config/alpha/alpha.md b/gcc-4.9/gcc/config/alpha/alpha.md index 795b4df3f..1179d572d 100644 --- a/gcc-4.9/gcc/config/alpha/alpha.md +++ b/gcc-4.9/gcc/config/alpha/alpha.md @@ -5984,16 +5984,38 @@ [(set_attr "type" "jsr") (set_attr "length" "*,*,8")]) -(define_insn_and_split "call_value_osf_tlsgd" +(define_int_iterator TLS_CALL + [UNSPEC_TLSGD_CALL + UNSPEC_TLSLDM_CALL]) + +(define_int_attr tls + [(UNSPEC_TLSGD_CALL "tlsgd") + (UNSPEC_TLSLDM_CALL "tlsldm")]) + +(define_insn "call_value_osf_<tls>" [(set (match_operand 0) (call (mem:DI (match_operand:DI 1 "symbolic_operand")) (const_int 0))) - (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSGD_CALL) + (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL) (use (reg:DI 29)) (clobber (reg:DI 26))] "HAVE_AS_TLS" - "#" - "&& reload_completed" + "ldq $27,%1($29)\t\t!literal!%2\;jsr $26,($27),%1\t\t!lituse_<tls>!%2\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "jsr") + (set_attr "length" "16")]) + +;; We must use peep2 instead of a split because we need accurate life +;; information for $gp. +(define_peephole2 + [(parallel + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "HAVE_AS_TLS && reload_completed + && peep2_regno_dead_p (1, 29)" [(set (match_dup 3) (unspec:DI [(match_dup 5) (match_dup 1) @@ -6001,10 +6023,9 @@ (parallel [(set (match_dup 0) (call (mem:DI (match_dup 3)) (const_int 0))) - (set (match_dup 5) - (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 5)) (use (match_dup 1)) - (use (unspec [(match_dup 2)] UNSPEC_TLSGD_CALL)) + (use (unspec [(match_dup 2)] TLS_CALL)) (clobber (reg:DI 26))]) (set (match_dup 5) (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] @@ -6012,19 +6033,18 @@ operands[3] = gen_rtx_REG (Pmode, 27); operands[4] = GEN_INT (alpha_next_sequence_number++); operands[5] = pic_offset_table_rtx; -} - [(set_attr "type" "multi")]) +}) -(define_insn_and_split "call_value_osf_tlsldm" - [(set (match_operand 0) - (call (mem:DI (match_operand:DI 1 "symbolic_operand")) - (const_int 0))) - (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSLDM_CALL) - (use (reg:DI 29)) - (clobber (reg:DI 26))] - "HAVE_AS_TLS" - "#" - "&& reload_completed" +(define_peephole2 + [(parallel + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "HAVE_AS_TLS && reload_completed + && !peep2_regno_dead_p (1, 29)" [(set (match_dup 3) (unspec:DI [(match_dup 5) (match_dup 1) @@ -6035,7 +6055,7 @@ (set (match_dup 5) (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1)) (use (match_dup 1)) - (use (unspec [(match_dup 2)] UNSPEC_TLSLDM_CALL)) + (use (unspec [(match_dup 2)] TLS_CALL)) (clobber (reg:DI 26))]) (set (match_dup 5) (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] @@ -6043,8 +6063,7 @@ operands[3] = gen_rtx_REG (Pmode, 27); operands[4] = GEN_INT (alpha_next_sequence_number++); operands[5] = pic_offset_table_rtx; -} - [(set_attr "type" "multi")]) +}) (define_insn "*call_value_osf_1" [(set (match_operand 0) diff --git a/gcc-4.9/gcc/config/arm/arm-protos.h b/gcc-4.9/gcc/config/arm/arm-protos.h index 13874ee6e..2ac3b3009 100644 --- a/gcc-4.9/gcc/config/arm/arm-protos.h +++ b/gcc-4.9/gcc/config/arm/arm-protos.h @@ -56,6 +56,7 @@ extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, extern int legitimate_pic_operand_p (rtx); extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx); extern rtx legitimize_tls_address (rtx, rtx); +extern bool arm_legitimate_address_p (enum machine_mode, rtx, bool); extern int arm_legitimate_address_outer_p (enum machine_mode, rtx, RTX_CODE, int); extern int thumb_legitimate_offset_p (enum machine_mode, HOST_WIDE_INT); extern bool arm_legitimize_reload_address (rtx *, enum machine_mode, int, int, @@ -294,4 +295,6 @@ extern void arm_emit_eabi_attribute (const char *, int, int); /* Defined in gcc/common/config/arm-common.c. */ extern const char *arm_rewrite_selected_cpu (const char *name); +extern bool arm_is_constant_pool_ref (rtx); + #endif /* ! GCC_ARM_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c index 3c237cb6d..b79bb48b1 100644 --- a/gcc-4.9/gcc/config/arm/arm.c +++ b/gcc-4.9/gcc/config/arm/arm.c @@ -89,7 +89,6 @@ static rtx arm_legitimize_address (rtx, rtx, enum machine_mode); static reg_class_t arm_preferred_reload_class (rtx, reg_class_t); static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode); inline static int thumb1_index_register_rtx_p (rtx, int); -static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); static int thumb_far_jump_used_p (void); static bool thumb_force_lr_save (void); static unsigned arm_size_return_regs (void); @@ -13952,9 +13951,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, HOST_WIDE_INT srcoffset, dstoffset; HOST_WIDE_INT src_autoinc, dst_autoinc; rtx mem, addr; - + gcc_assert (1 <= interleave_factor && interleave_factor <= 4); - + /* Use hard registers if we have aligned source or destination so we can use load/store multiple with contiguous registers. */ if (dst_aligned || src_aligned) @@ -13968,7 +13967,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, src = copy_addr_to_reg (XEXP (srcbase, 0)); srcoffset = dstoffset = 0; - + /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST. For copying the last bytes we want to subtract this offset again. */ src_autoinc = dst_autoinc = 0; @@ -14022,14 +14021,14 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, remaining -= block_size_bytes; } - + /* Copy any whole words left (note these aren't interleaved with any subsequent halfword/byte load/stores in the interests of simplicity). */ - + words = remaining / UNITS_PER_WORD; gcc_assert (words < interleave_factor); - + if (src_aligned && words > 1) { emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase, @@ -14069,11 +14068,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, } remaining -= words * UNITS_PER_WORD; - + gcc_assert (remaining < 4); - + /* Copy a halfword if necessary. */ - + if (remaining >= 2) { halfword_tmp = gen_reg_rtx (SImode); @@ -14097,11 +14096,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, remaining -= 2; srcoffset += 2; } - + gcc_assert (remaining < 2); - + /* Copy last byte. */ - + if ((remaining & 1) != 0) { byte_tmp = gen_reg_rtx (SImode); @@ -14122,9 +14121,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, remaining--; srcoffset++; } - + /* Store last halfword if we haven't done so already. */ - + if (halfword_tmp) { addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); @@ -14143,7 +14142,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); dstoffset++; } - + gcc_assert (remaining == 0 && srcoffset == dstoffset); } @@ -14162,7 +14161,7 @@ arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg, rtx *loop_mem) { *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); - + /* Although the new mem does not refer to a known location, it does keep up to LENGTH bytes of alignment. */ *loop_mem = change_address (mem, BLKmode, *loop_reg); @@ -14182,14 +14181,14 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, { rtx label, src_reg, dest_reg, final_src, test; HOST_WIDE_INT leftover; - + leftover = length % bytes_per_iter; length -= leftover; - + /* Create registers and memory references for use within the loop. */ arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); - + /* Calculate the value that SRC_REG should have after the last iteration of the loop. */ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), @@ -14198,7 +14197,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, /* Emit the start of the loop. */ label = gen_label_rtx (); emit_label (label); - + /* Emit the loop body. */ arm_block_move_unaligned_straight (dest, src, bytes_per_iter, interleave_factor); @@ -14206,11 +14205,11 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, /* Move on to the next block. */ emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); - + /* Emit the loop condition. */ test = gen_rtx_NE (VOIDmode, src_reg, final_src); emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); - + /* Mop up any left-over bytes. */ if (leftover) arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor); @@ -14224,7 +14223,7 @@ static int arm_movmemqi_unaligned (rtx *operands) { HOST_WIDE_INT length = INTVAL (operands[2]); - + if (optimize_size) { bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD; @@ -14235,7 +14234,7 @@ arm_movmemqi_unaligned (rtx *operands) resulting code can be smaller. */ unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1; HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4; - + if (length > 12) arm_block_move_unaligned_loop (operands[0], operands[1], length, interleave_factor, bytes_per_iter); @@ -14253,7 +14252,7 @@ arm_movmemqi_unaligned (rtx *operands) else arm_block_move_unaligned_straight (operands[0], operands[1], length, 4); } - + return 1; } @@ -28520,7 +28519,11 @@ arm_set_return_address (rtx source, rtx scratch) addr = plus_constant (Pmode, addr, delta); } - emit_move_insn (gen_frame_mem (Pmode, addr), source); + /* The store needs to be marked as frame related in order to prevent + DSE from deleting it as dead if it is based on fp. */ + rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM)); } } @@ -28572,7 +28575,11 @@ thumb_set_return_address (rtx source, rtx scratch) else addr = plus_constant (Pmode, addr, delta); - emit_move_insn (gen_frame_mem (Pmode, addr), source); + /* The store needs to be marked as frame related in order to prevent + DSE from deleting it as dead if it is based on fp. */ + rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM)); } else emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source); @@ -29828,10 +29835,10 @@ int vfp3_const_double_for_fract_bits (rtx operand) { REAL_VALUE_TYPE r0; - + if (!CONST_DOUBLE_P (operand)) return 0; - + REAL_VALUE_FROM_CONST_DOUBLE (r0, operand); if (exact_real_inverse (DFmode, &r0)) { @@ -30825,7 +30832,7 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code) else return false; } - + return true; case ARM_POST_DEC: @@ -30842,10 +30849,10 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code) return false; return true; - + default: return false; - + } return false; @@ -30856,7 +30863,7 @@ arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code) Additionally, the default expansion code is not available or suitable for post-reload insn splits (this can occur when the register allocator chooses not to do a shift in NEON). - + This function is used in both initial expand and post-reload splits, and handles all kinds of 64-bit shifts. @@ -31109,7 +31116,7 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) { enum rtx_code code = GET_CODE (*comparison); int code_int; - enum machine_mode mode = (GET_MODE (*op1) == VOIDmode) + enum machine_mode mode = (GET_MODE (*op1) == VOIDmode) ? GET_MODE (*op2) : GET_MODE (*op1); gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode); @@ -31163,7 +31170,7 @@ arm_asan_shadow_offset (void) /* This is a temporary fix for PR60655. Ideally we need to handle most of these cases in the generic part but - currently we reject minus (..) (sym_ref). We try to + currently we reject minus (..) (sym_ref). We try to ameliorate the case with minus (sym_ref1) (sym_ref2) where they are in the same section. */ @@ -31393,4 +31400,13 @@ arm_load_global_address (rtx symbol, rtx offset_reg, df_insn_rescan (load_insn); } +/* return TRUE if x is a reference to a value in a constant pool */ +extern bool +arm_is_constant_pool_ref (rtx x) +{ + return (MEM_P (x) + && GET_CODE (XEXP (x, 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))); +} + #include "gt-arm.h" diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h index ab5167a8b..433a3dd77 100644 --- a/gcc-4.9/gcc/config/arm/arm.h +++ b/gcc-4.9/gcc/config/arm/arm.h @@ -74,8 +74,8 @@ extern char arm_arch_name[]; builtin_define_with_int_value ( \ "__ARM_SIZEOF_MINIMAL_ENUM", \ flag_short_enums ? 1 : 4); \ - builtin_define_with_int_value ( \ - "__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE); \ + builtin_define_type_sizeof ("__ARM_SIZEOF_WCHAR_T", \ + wchar_type_node); \ if (TARGET_ARM_ARCH_PROFILE) \ builtin_define_with_int_value ( \ "__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE); \ @@ -2139,9 +2139,10 @@ extern int making_const_table; ? reverse_condition_maybe_unordered (code) \ : reverse_condition (code)) -/* The arm5 clz instruction returns 32. */ -#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) -#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE)) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE)) #define CC_STATUS_INIT \ do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0) diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md index 467f9ce4e..1153a1e34 100644 --- a/gcc-4.9/gcc/config/arm/arm.md +++ b/gcc-4.9/gcc/config/arm/arm.md @@ -127,9 +127,10 @@ ; This can be "a" for ARM, "t" for either of the Thumbs, "32" for ; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode. "v6" ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without -; arm_arch6. This attribute is used to compute attribute "enabled", -; use type "any" to enable an alternative in all cases. -(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2" +; arm_arch6. "v6t2" for Thumb-2 with arm_arch6. This attribute is +; used to compute attribute "enabled", use type "any" to enable an +; alternative in all cases. +(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2" (const_string "any")) (define_attr "arch_enabled" "no,yes" @@ -164,6 +165,10 @@ (match_test "TARGET_32BIT && !arm_arch6")) (const_string "yes") + (and (eq_attr "arch" "v6t2") + (match_test "TARGET_32BIT && arm_arch6 && arm_arch_thumb2")) + (const_string "yes") + (and (eq_attr "arch" "avoid_neon_for_64bits") (match_test "TARGET_NEON") (not (match_test "TARGET_PREFER_NEON_64BITS"))) @@ -3631,7 +3636,7 @@ [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "s_register_operand" "r")])) (clobber (reg:CC CC_REGNUM))] - "TARGET_32BIT && optimize_function_for_size_p (cfun)" + "TARGET_32BIT && optimize_function_for_size_p (cfun) && !arm_restrict_it" "* operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, operands[1], operands[2]); @@ -4374,7 +4379,7 @@ (define_insn "unaligned_loadhis" [(set (match_operand:SI 0 "s_register_operand" "=l,r") (sign_extend:SI - (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] + (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,Uh")] UNSPEC_UNALIGNED_LOAD)))] "unaligned_access && TARGET_32BIT" "ldr%(sh%)\t%0, %1\t@ unaligned" @@ -5287,7 +5292,7 @@ (define_insn "*arm_zero_extendhisi2_v6" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))] "TARGET_ARM && arm_arch6" "@ uxth%?\\t%0, %1 @@ -5381,7 +5386,7 @@ (define_insn "*arm_zero_extendqisi2_v6" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,Uh")))] "TARGET_ARM && arm_arch6" "@ uxtb%(%)\\t%0, %1 @@ -5615,31 +5620,27 @@ (define_insn "*arm_extendhisi2" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))] "TARGET_ARM && arm_arch4 && !arm_arch6" "@ # ldr%(sh%)\\t%0, %1" [(set_attr "length" "8,4") (set_attr "type" "alu_shift_reg,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) ;; ??? Check Thumb-2 pool range (define_insn "*arm_extendhisi2_v6" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,Uh")))] "TARGET_32BIT && arm_arch6" "@ sxth%?\\t%0, %1 ldr%(sh%)\\t%0, %1" [(set_attr "type" "extend,load_byte") (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable_short_it" "no")] ) (define_insn "*arm_extendhisi2addsi" @@ -5682,9 +5683,7 @@ "TARGET_ARM && arm_arch4" "ldr%(sb%)\\t%0, %1" [(set_attr "type" "load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "256") - (set_attr "neg_pool_range" "244")] + (set_attr "predicable" "yes")] ) (define_expand "extendqisi2" @@ -5724,9 +5723,7 @@ ldr%(sb%)\\t%0, %1" [(set_attr "length" "8,4") (set_attr "type" "alu_shift_reg,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) (define_insn "*arm_extendqisi_v6" @@ -5738,9 +5735,7 @@ sxtb%?\\t%0, %1 ldr%(sb%)\\t%0, %1" [(set_attr "type" "extend,load_byte") - (set_attr "predicable" "yes") - (set_attr "pool_range" "*,256") - (set_attr "neg_pool_range" "*,244")] + (set_attr "predicable" "yes")] ) (define_insn "*arm_extendqisi2addsi" @@ -6973,8 +6968,8 @@ ;; Pattern to recognize insn generated default case above (define_insn "*movhi_insn_arch4" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r") - (match_operand:HI 1 "general_operand" "rI,K,r,mi"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m,r") + (match_operand:HI 1 "general_operand" "rI,K,n,r,mi"))] "TARGET_ARM && arm_arch4 && (register_operand (operands[0], HImode) @@ -6982,16 +6977,19 @@ "@ mov%?\\t%0, %1\\t%@ movhi mvn%?\\t%0, #%B1\\t%@ movhi + movw%?\\t%0, %L1\\t%@ movhi str%(h%)\\t%1, %0\\t%@ movhi ldr%(h%)\\t%0, %1\\t%@ movhi" [(set_attr "predicable" "yes") - (set_attr "pool_range" "*,*,*,256") - (set_attr "neg_pool_range" "*,*,*,244") + (set_attr "pool_range" "*,*,*,*,256") + (set_attr "neg_pool_range" "*,*,*,*,244") + (set_attr "arch" "*,*,v6t2,*,*") (set_attr_alternative "type" [(if_then_else (match_operand 1 "const_int_operand" "") (const_string "mov_imm" ) (const_string "mov_reg")) (const_string "mvn_imm") + (const_string "mov_imm") (const_string "store1") (const_string "load1")])] ) @@ -10944,10 +10942,16 @@ enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[5]), operands[3], operands[4]); enum rtx_code rc = GET_CODE (operands[5]); - operands[6] = gen_rtx_REG (mode, CC_REGNUM); gcc_assert (!(mode == CCFPmode || mode == CCFPEmode)); - rc = reverse_condition (rc); + if (REGNO (operands[2]) != REGNO (operands[0])) + rc = reverse_condition (rc); + else + { + rtx tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } operands[6] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx); } diff --git a/gcc-4.9/gcc/config/arm/constraints.md b/gcc-4.9/gcc/config/arm/constraints.md index 85dd116ce..f848664d5 100644 --- a/gcc-4.9/gcc/config/arm/constraints.md +++ b/gcc-4.9/gcc/config/arm/constraints.md @@ -36,7 +36,7 @@ ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py ;; The following memory constraints have been used: -;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us +;; in ARM/Thumb-2 state: Q, Uh, Ut, Uv, Uy, Un, Um, Us ;; in ARM state: Uq ;; in Thumb state: Uu, Uw @@ -348,6 +348,12 @@ An address valid for loading/storing register exclusive" (match_operand 0 "mem_noofs_operand")) +(define_memory_constraint "Uh" + "@internal + An address suitable for byte and half-word loads which does not point inside a constant pool" + (and (match_code "mem") + (match_test "arm_legitimate_address_p (GET_MODE (op), XEXP (op, 0), false) && !arm_is_constant_pool_ref (op)"))) + (define_memory_constraint "Ut" "@internal In ARM/Thumb-2 state an address valid for loading/storing opaque structure @@ -394,7 +400,8 @@ (and (match_code "mem") (match_test "TARGET_ARM && arm_legitimate_address_outer_p (GET_MODE (op), XEXP (op, 0), - SIGN_EXTEND, 0)"))) + SIGN_EXTEND, 0) + && !arm_is_constant_pool_ref (op)"))) (define_memory_constraint "Q" "@internal diff --git a/gcc-4.9/gcc/config/arm/linux-grte.h b/gcc-4.9/gcc/config/arm/linux-grte.h index 7ee5806b7..e69de29bb 100644 --- a/gcc-4.9/gcc/config/arm/linux-grte.h +++ b/gcc-4.9/gcc/config/arm/linux-grte.h @@ -1,27 +0,0 @@ -/* Definitions for ARM Linux-based GRTE (Google RunTime Environment). - Copyright (C) 2011 Free Software Foundation, Inc. - Contributed by Chris Demetriou. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#undef SUBSUBTARGET_EXTRA_SPECS -#define SUBSUBTARGET_EXTRA_SPECS LINUX_GRTE_EXTRA_SPECS diff --git a/gcc-4.9/gcc/config/arm/t-aprofile b/gcc-4.9/gcc/config/arm/t-aprofile index ff9e2e1b3..86741e6b0 100644 --- a/gcc-4.9/gcc/config/arm/t-aprofile +++ b/gcc-4.9/gcc/config/arm/t-aprofile @@ -88,6 +88,9 @@ MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a53 MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57 MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57.cortex-a53 +# Arch Matches +MULTILIB_MATCHES += march?armv8-a=march?armv8-a+crc + # FPU matches MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3 MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16 diff --git a/gcc-4.9/gcc/config/avr/avr-dimode.md b/gcc-4.9/gcc/config/avr/avr-dimode.md index 639810518..56cd30458 100644 --- a/gcc-4.9/gcc/config/avr/avr-dimode.md +++ b/gcc-4.9/gcc/config/avr/avr-dimode.md @@ -68,6 +68,7 @@ { rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (DImode == <MODE>mode @@ -145,6 +146,7 @@ { rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (const_operand (operands[2], GET_MODE (operands[2]))) @@ -201,6 +203,7 @@ { rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (const_operand (operands[2], GET_MODE (operands[2]))) @@ -249,6 +252,7 @@ { rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (const_operand (operands[2], GET_MODE (operands[2]))) @@ -338,6 +342,7 @@ { rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A)); emit_move_insn (acc_a, operands[1]); if (s8_operand (operands[2], VOIDmode)) @@ -424,6 +429,7 @@ { rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A); + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, ACC_A)); emit_move_insn (acc_a, operands[1]); emit_move_insn (gen_rtx_REG (QImode, 16), operands[2]); emit_insn (gen_<code_stdname><mode>3_insn ()); @@ -457,6 +463,7 @@ (clobber (any_extend:SI (match_dup 1)))])] "avr_have_dimode" { + avr_fix_inputs (operands, 1 << 2, regmask (SImode, 22)); emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]); emit_move_insn (gen_rtx_REG (SImode, 18), operands[2]); emit_insn (gen_<extend_u>mulsidi3_insn()); diff --git a/gcc-4.9/gcc/config/avr/avr-fixed.md b/gcc-4.9/gcc/config/avr/avr-fixed.md index 9c8489edd..6763f596e 100644 --- a/gcc-4.9/gcc/config/avr/avr-fixed.md +++ b/gcc-4.9/gcc/config/avr/avr-fixed.md @@ -231,7 +231,11 @@ (clobber (reg:HI 24))]) (set (match_operand:QQ 0 "register_operand" "") (reg:QQ 23))] - "!AVR_HAVE_MUL") + "!AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (QQmode, 24)); + }) + (define_expand "muluqq3_nomul" [(set (reg:UQQ 22) @@ -246,7 +250,10 @@ (clobber (reg:HI 22))]) (set (match_operand:UQQ 0 "register_operand" "") (reg:UQQ 25))] - "!AVR_HAVE_MUL") + "!AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (UQQmode, 22)); + }) (define_insn "*mulqq3.call" [(set (reg:QQ 23) @@ -274,7 +281,10 @@ (clobber (reg:HI 22))]) (set (match_operand:ALL2QA 0 "register_operand" "") (reg:ALL2QA 24))] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 18)); + }) ;; "*mulhq3.call" "*muluhq3.call" ;; "*mulha3.call" "*muluha3.call" @@ -302,7 +312,10 @@ (reg:ALL4A 20))) (set (match_operand:ALL4A 0 "register_operand" "") (reg:ALL4A 24))] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 16)); + }) ;; "*mulsa3.call" "*mulusa3.call" (define_insn "*mul<mode>3.call" @@ -330,7 +343,12 @@ (reg:ALL1Q 22))) (clobber (reg:QI 25))]) (set (match_operand:ALL1Q 0 "register_operand" "") - (reg:ALL1Q 24))]) + (reg:ALL1Q 24))] + "" + { + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 25)); + }) + ;; "*divqq3.call" "*udivuqq3.call" (define_insn "*<code><mode>3.call" @@ -356,7 +374,11 @@ (clobber (reg:HI 26)) (clobber (reg:QI 21))]) (set (match_operand:ALL2QA 0 "register_operand" "") - (reg:ALL2QA 24))]) + (reg:ALL2QA 24))] + "" + { + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 26)); + }) ;; "*divhq3.call" "*udivuhq3.call" ;; "*divha3.call" "*udivuha3.call" @@ -385,7 +407,11 @@ (clobber (reg:HI 26)) (clobber (reg:HI 30))]) (set (match_operand:ALL4A 0 "register_operand" "") - (reg:ALL4A 22))]) + (reg:ALL4A 22))] + "" + { + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, 24)); + }) ;; "*divsa3.call" "*udivusa3.call" (define_insn "*<code><mode>3.call" @@ -435,6 +461,7 @@ operands[3] = gen_rtx_REG (<MODE>mode, regno_out[(size_t) GET_MODE_SIZE (<MODE>mode)]); operands[4] = gen_rtx_REG (<MODE>mode, regno_in[(size_t) GET_MODE_SIZE (<MODE>mode)]); + avr_fix_inputs (operands, 1 << 2, regmask (<MODE>mode, REGNO (operands[4]))); operands[5] = simplify_gen_subreg (QImode, force_reg (HImode, operands[2]), HImode, 0); // $2 is no more needed, but is referenced for expand. operands[2] = const0_rtx; diff --git a/gcc-4.9/gcc/config/avr/avr-protos.h b/gcc-4.9/gcc/config/avr/avr-protos.h index c5ce78429..4a899a27c 100644 --- a/gcc-4.9/gcc/config/avr/avr-protos.h +++ b/gcc-4.9/gcc/config/avr/avr-protos.h @@ -124,6 +124,15 @@ extern bool avr_mem_memx_p (rtx); extern bool avr_load_libgcc_p (rtx); extern bool avr_xload_libgcc_p (enum machine_mode); +static inline unsigned +regmask (enum machine_mode mode, unsigned regno) +{ + return ((1u << GET_MODE_SIZE (mode)) - 1) << regno; +} + +extern void avr_fix_inputs (rtx*, unsigned, unsigned); +extern bool avr_emit3_fix_outputs (rtx (*)(rtx,rtx,rtx), rtx*, unsigned, unsigned); + extern rtx lpm_reg_rtx; extern rtx lpm_addr_reg_rtx; extern rtx tmp_reg_rtx; diff --git a/gcc-4.9/gcc/config/avr/avr.c b/gcc-4.9/gcc/config/avr/avr.c index fa979df46..4c65f5efa 100644 --- a/gcc-4.9/gcc/config/avr/avr.c +++ b/gcc-4.9/gcc/config/avr/avr.c @@ -11118,6 +11118,115 @@ avr_convert_to_type (tree type, tree expr) } +/* PR63633: The middle-end might come up with hard regs as input operands. + + RMASK is a bit mask representing a subset of hard registers R0...R31: + Rn is an element of that set iff bit n of RMASK is set. + OPMASK describes a subset of OP[]: If bit n of OPMASK is 1 then + OP[n] has to be fixed; otherwise OP[n] is left alone. + + For each element of OPMASK which is a hard register overlapping RMASK, + replace OP[n] with a newly created pseudo register + + HREG == 0: Also emit a move insn that copies the contents of that + hard register into the new pseudo. + + HREG != 0: Also set HREG[n] to the hard register. */ + +static void +avr_fix_operands (rtx *op, rtx *hreg, unsigned opmask, unsigned rmask) +{ + for (; opmask; opmask >>= 1, op++) + { + rtx reg = *op; + + if (hreg) + *hreg = NULL_RTX; + + if ((opmask & 1) + && REG_P (reg) + && REGNO (reg) < FIRST_PSEUDO_REGISTER + // This hard-reg overlaps other prohibited hard regs? + && (rmask & regmask (GET_MODE (reg), REGNO (reg)))) + { + *op = gen_reg_rtx (GET_MODE (reg)); + if (hreg == NULL) + emit_move_insn (*op, reg); + else + *hreg = reg; + } + + if (hreg) + hreg++; + } +} + + +void +avr_fix_inputs (rtx *op, unsigned opmask, unsigned rmask) +{ + avr_fix_operands (op, NULL, opmask, rmask); +} + + +/* Helper for the function below: If bit n of MASK is set and + HREG[n] != NULL, then emit a move insn to copy OP[n] to HREG[n]. + Otherwise do nothing for that n. Return TRUE. */ + +static bool +avr_move_fixed_operands (rtx *op, rtx *hreg, unsigned mask) +{ + for (; mask; mask >>= 1, op++, hreg++) + if ((mask & 1) + && *hreg) + emit_move_insn (*hreg, *op); + + return true; +} + + +/* PR63633: The middle-end might come up with hard regs as output operands. + + GEN is a sequence generating function like gen_mulsi3 with 3 operands OP[]. + RMASK is a bit mask representing a subset of hard registers R0...R31: + Rn is an element of that set iff bit n of RMASK is set. + OPMASK describes a subset of OP[]: If bit n of OPMASK is 1 then + OP[n] has to be fixed; otherwise OP[n] is left alone. + + Emit the insn sequence as generated by GEN() with all elements of OPMASK + which are hard registers overlapping RMASK replaced by newly created + pseudo registers. After the sequence has been emitted, emit insns that + move the contents of respective pseudos to their hard regs. */ + +bool +avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op, + unsigned opmask, unsigned rmask) +{ + const int n = 3; + rtx hreg[n]; + + /* It is legitimate for GEN to call this function, and in order not to + get self-recursive we use the following static kludge. This is the + only way not to duplicate all expanders and to avoid ugly and + hard-to-maintain C-code instead of the much more appreciated RTL + representation as supplied by define_expand. */ + static bool lock = false; + + gcc_assert (opmask < (1u << n)); + + if (lock) + return false; + + avr_fix_operands (op, hreg, opmask, rmask); + + lock = true; + emit_insn (gen (op[0], op[1], op[2])); + lock = false; + + return avr_move_fixed_operands (op, hreg, opmask); +} + + /* Worker function for movmemhi expander. XOP[0] Destination as MEM:BLK XOP[1] Source " " diff --git a/gcc-4.9/gcc/config/avr/avr.md b/gcc-4.9/gcc/config/avr/avr.md index 3bb2a914a..3f4181dab 100644 --- a/gcc-4.9/gcc/config/avr/avr.md +++ b/gcc-4.9/gcc/config/avr/avr.md @@ -1482,7 +1482,11 @@ (set (reg:QI 22) (match_operand:QI 2 "register_operand" "")) (parallel [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22))) (clobber (reg:QI 22))]) - (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))]) + (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))] + "" + { + avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24)); + }) (define_insn "*mulqi3_call" [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22))) @@ -2210,7 +2214,13 @@ (parallel [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22))) (clobber (reg:HI 22)) (clobber (reg:QI 21))]) - (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + { + avr_fix_inputs (operands, (1 << 2), regmask (HImode, 24)); + }) + (define_insn "*mulhi3_call" [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22))) @@ -2248,6 +2258,10 @@ emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1])); DONE; } + + if (avr_emit3_fix_outputs (gen_mulsi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; }) (define_insn_and_split "*mulsi3" @@ -2287,7 +2301,23 @@ ;; "muluqisi3" ;; "muluhisi3" -(define_insn_and_split "mulu<mode>si3" +(define_expand "mulu<mode>si3" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "")) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" ""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u); + if (avr_emit3_fix_outputs (gen_mulu<mode>si3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) + +;; "*muluqisi3" +;; "*muluhisi3" +(define_insn_and_split "*mulu<mode>si3" [(set (match_operand:SI 0 "pseudo_register_operand" "=r") (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) @@ -2323,7 +2353,23 @@ ;; "mulsqisi3" ;; "mulshisi3" -(define_insn_and_split "muls<mode>si3" +(define_expand "muls<mode>si3" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "")) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" ""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u); + if (avr_emit3_fix_outputs (gen_muls<mode>si3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) + +;; "*mulsqisi3" +;; "*mulshisi3" +(define_insn_and_split "*muls<mode>si3" [(set (match_operand:SI 0 "pseudo_register_operand" "=r") (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) @@ -2366,7 +2412,22 @@ ;; One-extend operand 1 -(define_insn_and_split "mulohisi3" +(define_expand "mulohisi3" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (mult:SI (not:SI (zero_extend:SI + (not:HI (match_operand:HI 1 "pseudo_register_operand" "")))) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" ""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u); + if (avr_emit3_fix_outputs (gen_mulohisi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) + +(define_insn_and_split "*mulohisi3" [(set (match_operand:SI 0 "pseudo_register_operand" "=r") (mult:SI (not:SI (zero_extend:SI (not:HI (match_operand:HI 1 "pseudo_register_operand" "r")))) @@ -2394,7 +2455,12 @@ (any_extend:SI (match_operand:HI 2 "register_operand" "")))) (clobber (reg:HI 26)) (clobber (reg:DI 18))])] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + if (avr_emit3_fix_outputs (gen_<extend_u>mulhisi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) (define_expand "usmulhisi3" [(parallel [(set (match_operand:SI 0 "register_operand" "") @@ -2402,7 +2468,12 @@ (sign_extend:SI (match_operand:HI 2 "register_operand" "")))) (clobber (reg:HI 26)) (clobber (reg:DI 18))])] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + if (avr_emit3_fix_outputs (gen_usmulhisi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) ;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3" ;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3" @@ -2474,7 +2545,10 @@ (clobber (reg:HI 22))]) (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))] - "AVR_HAVE_MUL") + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, 1 << 2, regmask (HImode, 18)); + }) (define_insn "*mulsi3_call" @@ -2697,6 +2771,10 @@ emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1])); DONE; } + + if (avr_emit3_fix_outputs (gen_mulpsi3, operands, 1u << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; }) (define_insn "*umulqihipsi3" @@ -2729,7 +2807,21 @@ [(set_attr "length" "7") (set_attr "cc" "clobber")]) -(define_insn_and_split "mulsqipsi3" +(define_expand "mulsqipsi3" + [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "") + (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "")) + (match_operand:PSI 2 "pseudo_register_or_const_int_operand"""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + avr_fix_inputs (operands, (1 << 1) | (1 << 2), -1u); + if (avr_emit3_fix_outputs (gen_mulsqipsi3, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26))) + DONE; + }) + +(define_insn_and_split "*mulsqipsi3" [(set (match_operand:PSI 0 "pseudo_register_operand" "=r") (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r")) (match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn"))) @@ -4931,8 +5023,9 @@ (unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r,z")] UNSPEC_INDEX_JMP)) (use (label_ref (match_operand 1 "" ""))) - (clobber (match_dup 0))] - "" + (clobber (match_dup 0)) + (clobber (const_int 0))] + "!AVR_HAVE_EIJMP_EICALL" "@ ijmp push %A0\;push %B0\;ret @@ -4941,6 +5034,19 @@ (set_attr "isa" "rjmp,rjmp,jmp") (set_attr "cc" "none,none,clobber")]) +(define_insn "*tablejump.3byte-pc" + [(set (pc) + (unspec:HI [(reg:HI REG_Z)] + UNSPEC_INDEX_JMP)) + (use (label_ref (match_operand 0 "" ""))) + (clobber (reg:HI REG_Z)) + (clobber (reg:QI 24))] + "AVR_HAVE_EIJMP_EICALL" + "clr r24\;subi r30,pm_lo8(-(%0))\;sbci r31,pm_hi8(-(%0))\;sbci r24,pm_hh8(-(%0))\;jmp __tablejump2__" + [(set_attr "length" "6") + (set_attr "isa" "eijmp") + (set_attr "cc" "clobber")]) + (define_expand "casesi" [(parallel [(set (match_dup 6) @@ -4958,15 +5064,31 @@ (label_ref (match_operand 4 "" "")) (pc))) - (set (match_dup 6) - (plus:HI (match_dup 6) (label_ref (match_operand:HI 3 "" "")))) + (set (match_dup 10) + (match_dup 7)) - (parallel [(set (pc) (unspec:HI [(match_dup 6)] UNSPEC_INDEX_JMP)) + (parallel [(set (pc) + (unspec:HI [(match_dup 10)] UNSPEC_INDEX_JMP)) (use (label_ref (match_dup 3))) - (clobber (match_dup 6))])] + (clobber (match_dup 10)) + (clobber (match_dup 8))])] "" { operands[6] = gen_reg_rtx (HImode); + + if (AVR_HAVE_EIJMP_EICALL) + { + operands[7] = operands[6]; + operands[8] = all_regs_rtx[24]; + operands[10] = gen_rtx_REG (HImode, REG_Z); + } + else + { + operands[7] = gen_rtx_PLUS (HImode, operands[6], + gen_rtx_LABEL_REF (VOIDmode, operands[3])); + operands[8] = const0_rtx; + operands[10] = operands[6]; + } }) @@ -6034,6 +6156,7 @@ emit_insn (gen_fmul_insn (operand0, operand1, operand2)); DONE; } + avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24)); }) (define_insn "fmul_insn" @@ -6077,6 +6200,7 @@ emit_insn (gen_fmuls_insn (operand0, operand1, operand2)); DONE; } + avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24)); }) (define_insn "fmuls_insn" @@ -6120,6 +6244,7 @@ emit_insn (gen_fmulsu_insn (operand0, operand1, operand2)); DONE; } + avr_fix_inputs (operands, 1 << 2, regmask (QImode, 24)); }) (define_insn "fmulsu_insn" diff --git a/gcc-4.9/gcc/config/darwin-c.c b/gcc-4.9/gcc/config/darwin-c.c index 892ba3547..7fe4b1f2e 100644 --- a/gcc-4.9/gcc/config/darwin-c.c +++ b/gcc-4.9/gcc/config/darwin-c.c @@ -571,21 +571,34 @@ find_subframework_header (cpp_reader *pfile, const char *header, cpp_dir **dirp) } /* Return the value of darwin_macosx_version_min suitable for the - __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro, - so '10.4.2' becomes 1040. The lowest digit is always zero. + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro, so '10.4.2' + becomes 1040 and '10.10.0' becomes 101000. The lowest digit is + always zero, as is the second lowest for '10.10.x' and above. Print a warning if the version number can't be understood. */ static const char * version_as_macro (void) { - static char result[] = "1000"; + static char result[7] = "1000"; + int minorDigitIdx; if (strncmp (darwin_macosx_version_min, "10.", 3) != 0) goto fail; if (! ISDIGIT (darwin_macosx_version_min[3])) goto fail; - result[2] = darwin_macosx_version_min[3]; - if (darwin_macosx_version_min[4] != '\0' - && darwin_macosx_version_min[4] != '.') + + minorDigitIdx = 3; + result[2] = darwin_macosx_version_min[minorDigitIdx++]; + if (ISDIGIT (darwin_macosx_version_min[minorDigitIdx])) + { + /* Starting with OS X 10.10, the macro ends '00' rather than '0', + i.e. 10.10.x becomes 101000 rather than 10100. */ + result[3] = darwin_macosx_version_min[minorDigitIdx++]; + result[4] = '0'; + result[5] = '0'; + result[6] = '\0'; + } + if (darwin_macosx_version_min[minorDigitIdx] != '\0' + && darwin_macosx_version_min[minorDigitIdx] != '.') goto fail; return result; diff --git a/gcc-4.9/gcc/config/darwin-driver.c b/gcc-4.9/gcc/config/darwin-driver.c index 8b6ae9391..541e10bc0 100644 --- a/gcc-4.9/gcc/config/darwin-driver.c +++ b/gcc-4.9/gcc/config/darwin-driver.c @@ -29,8 +29,8 @@ along with GCC; see the file COPYING3. If not see #include <sys/sysctl.h> #include "xregex.h" -static bool -darwin_find_version_from_kernel (char *new_flag) +static char * +darwin_find_version_from_kernel (void) { char osversion[32]; size_t osversion_len = sizeof (osversion) - 1; @@ -39,6 +39,7 @@ darwin_find_version_from_kernel (char *new_flag) char minor_vers[6]; char * version_p; char * version_pend; + char * new_flag; /* Determine the version of the running OS. If we can't, warn user, and do nothing. */ @@ -46,7 +47,7 @@ darwin_find_version_from_kernel (char *new_flag) &osversion_len, NULL, 0) == -1) { warning (0, "sysctl for kern.osversion failed: %m"); - return false; + return NULL; } /* Try to parse the first two parts of the OS version number. Warn @@ -57,8 +58,6 @@ darwin_find_version_from_kernel (char *new_flag) version_p = osversion + 1; if (ISDIGIT (*version_p)) major_vers = major_vers * 10 + (*version_p++ - '0'); - if (major_vers > 4 + 9) - goto parse_failed; if (*version_p++ != '.') goto parse_failed; version_pend = strchr(version_p, '.'); @@ -74,17 +73,16 @@ darwin_find_version_from_kernel (char *new_flag) if (major_vers - 4 <= 4) /* On 10.4 and earlier, the old linker is used which does not support three-component system versions. */ - sprintf (new_flag, "10.%d", major_vers - 4); + asprintf (&new_flag, "10.%d", major_vers - 4); else - sprintf (new_flag, "10.%d.%s", major_vers - 4, - minor_vers); + asprintf (&new_flag, "10.%d.%s", major_vers - 4, minor_vers); - return true; + return new_flag; parse_failed: warning (0, "couldn%'t understand kern.osversion %q.*s", (int) osversion_len, osversion); - return false; + return NULL; } #endif @@ -105,7 +103,7 @@ darwin_default_min_version (unsigned int *decoded_options_count, const unsigned int argc = *decoded_options_count; struct cl_decoded_option *const argv = *decoded_options; unsigned int i; - static char new_flag[sizeof ("10.0.0") + 6]; + const char *new_flag; /* If the command-line is empty, just return. */ if (argc <= 1) @@ -142,16 +140,16 @@ darwin_default_min_version (unsigned int *decoded_options_count, #ifndef CROSS_DIRECTORY_STRUCTURE - /* Try to find the version from the kernel, if we fail - we print a message - and give up. */ - if (!darwin_find_version_from_kernel (new_flag)) - return; + /* Try to find the version from the kernel, if we fail - we print a message + and give up. */ + new_flag = darwin_find_version_from_kernel (); + if (!new_flag) + return; #else - /* For cross-compilers, default to the target OS version. */ - - strncpy (new_flag, DEF_MIN_OSX_VERSION, sizeof (new_flag)); + /* For cross-compilers, default to the target OS version. */ + new_flag = DEF_MIN_OSX_VERSION; #endif /* CROSS_DIRECTORY_STRUCTURE */ @@ -165,7 +163,6 @@ darwin_default_min_version (unsigned int *decoded_options_count, memcpy (*decoded_options + 2, argv + 1, (argc - 1) * sizeof (struct cl_decoded_option)); return; - } /* Translate -filelist and -framework options in *DECODED_OPTIONS diff --git a/gcc-4.9/gcc/config/gnu-user.h b/gcc-4.9/gcc/config/gnu-user.h index 2af0a54ed..1a9a487a8 100644 --- a/gcc-4.9/gcc/config/gnu-user.h +++ b/gcc-4.9/gcc/config/gnu-user.h @@ -118,7 +118,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* Link -lasan early on the command line. For -static-libasan, don't link it for -shared link, the executable should be compiled with -static-libasan in that case, and for executable link link with --{,no-}whole-archive around - it to force everything into the executable. And similarly for -ltsan. */ + it to force everything into the executable. And similarly for -ltsan + and -llsan. */ #if defined(HAVE_LD_STATIC_DYNAMIC) #undef LIBASAN_EARLY_SPEC #define LIBASAN_EARLY_SPEC "%{!shared:libasan_preinit%O%s} " \ @@ -129,4 +130,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define LIBTSAN_EARLY_SPEC "%{static-libtsan:%{!shared:" \ LD_STATIC_OPTION " --whole-archive -ltsan --no-whole-archive " \ LD_DYNAMIC_OPTION "}}%{!static-libtsan:-ltsan}" +#undef LIBLSAN_EARLY_SPEC +#define LIBLSAN_EARLY_SPEC "%{static-liblsan:%{!shared:" \ + LD_STATIC_OPTION " --whole-archive -llsan --no-whole-archive " \ + LD_DYNAMIC_OPTION "}}%{!static-liblsan:-llsan}" #endif diff --git a/gcc-4.9/gcc/config/i386/i386.c b/gcc-4.9/gcc/config/i386/i386.c index a598b8eef..54942d520 100644 --- a/gcc-4.9/gcc/config/i386/i386.c +++ b/gcc-4.9/gcc/config/i386/i386.c @@ -2465,7 +2465,7 @@ struct ptt const int align_func; }; -/* This table must be in sync with enum processor_type in i386.h. */ +/* This table must be in sync with enum processor_type in i386.h. */ static const struct ptt processor_target_table[PROCESSOR_max] = { {"generic", &generic_cost, 16, 10, 16, 10, 16}, @@ -3257,14 +3257,14 @@ ix86_option_override_internal (bool main_args_p, | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C - | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE + | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE}, {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 - | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 - | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 - | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 + | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, @@ -3334,8 +3334,9 @@ ix86_option_override_internal (bool main_args_p, /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is on and OPTION_MASK_ABI_64 is off. We turn off OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by - -m64. */ - if (TARGET_LP64_P (opts->x_ix86_isa_flags)) + -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ + if (TARGET_LP64_P (opts->x_ix86_isa_flags) + || TARGET_16BIT_P (opts->x_ix86_isa_flags)) opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; #endif } @@ -3846,11 +3847,30 @@ ix86_option_override_internal (bool main_args_p, opts->x_target_flags |= MASK_NO_RED_ZONE; } + if (!global_options_set.x_flag_shrink_wrap_frame_pointer) + flag_shrink_wrap_frame_pointer = 1; + + /* -fshrink-wrap-frame-pointer is an optimization based on + -fno-omit-frame-pointer mode, so it is only effective when + flag_omit_frame_pointer is false. + Frame pointer shrinkwrap may increase code size, so disable + it when optimize_size is true. */ + if (flag_omit_frame_pointer + || optimize == 0 + || optimize_size) + flag_shrink_wrap_frame_pointer = 0; + + /* If only no -mno-omit-leaf-frame-pointer is explicitly specified, + -fshrink_wrap_frame_pointer will enable omitting leaf frame + pointer by default. */ + if (flag_shrink_wrap_frame_pointer + && !(TARGET_OMIT_LEAF_FRAME_POINTER_P (opts_set->x_target_flags) + && !TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))) + opts->x_target_flags |= MASK_OMIT_LEAF_FRAME_POINTER; + /* Keep nonleaf frame pointers. */ if (opts->x_flag_omit_frame_pointer) opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; - else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) - opts->x_flag_omit_frame_pointer = 1; /* If we're doing fast math, we don't care about comparison order wrt NaNs. This lets us use a shorter comparison sequence. */ @@ -3969,7 +3989,7 @@ ix86_option_override_internal (bool main_args_p, /* For all chips supporting SSE2, -mfpmath=sse performs better than fpmath=387. The second is however default at many targets since the extra 80bit precision of temporaries is considered to be part of ABI. - Overwrite the default at least for -ffast-math. + Overwrite the default at least for -ffast-math. TODO: -mfpmath=both seems to produce same performing code with bit smaller binaries. It is however not clear if register allocation is ready for this setting. @@ -4291,7 +4311,7 @@ ix86_conditional_register_usage (void) c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3) : TARGET_64BIT ? (1 << 2) : (1 << 1)); - + CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) @@ -4840,9 +4860,9 @@ ix86_valid_target_attribute_p (tree fndecl, tree old_optimize = build_optimization_node (&global_options); - /* Get the optimization options of the current function. */ + /* Get the optimization options of the current function. */ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); - + if (!func_optimize) func_optimize = old_optimize; @@ -4850,7 +4870,7 @@ ix86_valid_target_attribute_p (tree fndecl, memset (&func_options, 0, sizeof (func_options)); init_options_struct (&func_options, NULL); lang_hooks.init_options_struct (&func_options); - + cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize)); @@ -5007,6 +5027,10 @@ ix86_in_large_data_p (tree exp) if (TREE_CODE (exp) == FUNCTION_DECL) return false; + /* Automatic variables are never large data. */ + if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp)) + return false; + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) { const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); @@ -5040,8 +5064,7 @@ ATTRIBUTE_UNUSED static section * x86_64_elf_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) { - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) + if (ix86_in_large_data_p (decl)) { const char *sname = NULL; unsigned int flags = SECTION_WRITE; @@ -5127,8 +5150,7 @@ x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) static void ATTRIBUTE_UNUSED x86_64_elf_unique_section (tree decl, int reloc) { - if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) - && ix86_in_large_data_p (decl)) + if (ix86_in_large_data_p (decl)) { const char *prefix = NULL; /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ @@ -5197,7 +5219,7 @@ x86_elf_aligned_common (FILE *file, { if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) && size > (unsigned int)ix86_section_threshold) - fputs (".largecomm\t", file); + fputs ("\t.largecomm\t", file); else fputs (COMMON_ASM_OP, file); assemble_name (file, name); @@ -5976,7 +5998,18 @@ ix86_function_type_abi (const_tree fntype) if (abi == SYSV_ABI) { if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) - abi = MS_ABI; + { + if (TARGET_X32) + { + static bool warned = false; + if (!warned) + { + error ("X32 does not support ms_abi attribute"); + warned = true; + } + } + abi = MS_ABI; + } } else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) abi = SYSV_ABI; @@ -6212,7 +6245,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ The midde-end can't deal with the vector types > 16 bytes. In this case, we return the original mode and warn ABI change if CUM isn't - NULL. + NULL. If INT_RETURN is true, warn ABI change if the vector mode isn't available for function return value. */ @@ -9083,20 +9116,22 @@ ix86_frame_pointer_required (void) if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) return true; - /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER - turns off the frame pointer by default. Turn it back on now if - we've not got a leaf function. */ - if (TARGET_OMIT_LEAF_FRAME_POINTER - && (!crtl->is_leaf - || ix86_current_function_calls_tls_descriptor)) - return true; - if (crtl->profile && !flag_fentry) return true; return false; } +/* Return true if the frame pointer of the function could be omitted. */ + +static bool +ix86_can_omit_leaf_frame_pointer (void) +{ + return TARGET_OMIT_LEAF_FRAME_POINTER + && (crtl->is_leaf + && !ix86_current_function_calls_tls_descriptor); +} + /* Record that the current function accesses previous call frames. */ void @@ -9569,7 +9604,7 @@ ix86_compute_frame_layout (struct ix86_frame *frame) offset += UNITS_PER_WORD; /* Skip saved base pointer. */ - if (frame_pointer_needed) + if (frame_pointer_needed || frame_pointer_partially_needed) offset += UNITS_PER_WORD; frame->hfp_save_offset = offset; @@ -10890,6 +10925,26 @@ ix86_expand_prologue (void) m->fs.fp_valid = true; } } + else if (frame_pointer_partially_needed) + { + insn = emit_insn (gen_push (hard_frame_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + if (fpset_needed_in_prologue) + { + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + /* Using sp as cfa_reg will involve more .cfi_def_cfa_offset for + pushes in prologue, so use fp as cfa_reg to reduce .eh_frame + size when possible. */ + if (!any_fp_def) + { + RTX_FRAME_RELATED_P (insn) = 1; + if (m->fs.cfa_reg == stack_pointer_rtx) + m->fs.cfa_reg = hard_frame_pointer_rtx; + m->fs.fp_offset = m->fs.sp_offset; + m->fs.fp_valid = true; + } + } + } if (!int_registers_saved) { @@ -11067,6 +11122,10 @@ ix86_expand_prologue (void) if (sp_is_cfa_reg) m->fs.cfa_offset += UNITS_PER_WORD; RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); } } @@ -11080,6 +11139,10 @@ ix86_expand_prologue (void) if (sp_is_cfa_reg) m->fs.cfa_offset += UNITS_PER_WORD; RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -UNITS_PER_WORD))); } } @@ -11231,6 +11294,34 @@ ix86_expand_prologue (void) emit_insn (gen_prologue_use (stack_pointer_rtx)); } +/* Get frame pointer setting insn based on frame state. */ +static rtx +ix86_set_fp_insn () +{ + rtx r, seq; + struct ix86_frame frame; + HOST_WIDE_INT offset; + + ix86_compute_frame_layout (&frame); + gcc_assert (frame_pointer_partially_needed); + offset = frame.stack_pointer_offset - frame.hard_frame_pointer_offset; + + if (TARGET_64BIT && (offset > 0x7fffffff)) + { + r = gen_rtx_SET (DImode, hard_frame_pointer_rtx, GEN_INT (offset)); + emit_insn (r); + r = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, stack_pointer_rtx); + r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r); + } + else + { + r = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); + r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r); + } + emit_insn (r); + return r; +} + /* Emit code to restore REG using a POP insn. */ static void @@ -11415,7 +11506,11 @@ ix86_expand_epilogue (int style) || m->fs.sp_offset == frame.stack_pointer_offset); /* The FP must be valid if the frame pointer is present. */ - gcc_assert (frame_pointer_needed == m->fs.fp_valid); + if (!frame_pointer_partially_needed) + gcc_assert (frame_pointer_needed == m->fs.fp_valid); + else + gcc_assert (!(any_fp_def && m->fs.fp_valid)); + gcc_assert (!m->fs.fp_valid || m->fs.fp_offset == frame.hard_frame_pointer_offset); @@ -11619,7 +11714,7 @@ ix86_expand_epilogue (int style) /* If we used a stack pointer and haven't already got rid of it, then do so now. */ - if (m->fs.fp_valid) + if (m->fs.fp_valid || frame_pointer_partially_needed) { /* If the stack pointer is valid and pointing at the frame pointer store address, then we only need a pop. */ @@ -11627,15 +11722,20 @@ ix86_expand_epilogue (int style) ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); /* Leave results in shorter dependency chains on CPUs that are able to grok it fast. */ - else if (TARGET_USE_LEAVE - || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) - || !cfun->machine->use_fast_prologue_epilogue) + else if (m->fs.fp_valid + && (TARGET_USE_LEAVE + || optimize_function_for_size_p (cfun) + || !cfun->machine->use_fast_prologue_epilogue)) ix86_emit_leave (); else { + rtx dest, offset; + dest = (m->fs.fp_valid) ? hard_frame_pointer_rtx : stack_pointer_rtx; + offset = (m->fs.fp_valid) ? const0_rtx : + GEN_INT (m->fs.sp_offset - frame.hfp_save_offset); pro_epilogue_adjust_stack (stack_pointer_rtx, - hard_frame_pointer_rtx, - const0_rtx, style, !using_drap); + dest, + offset, style, !using_drap); ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); } } @@ -11947,7 +12047,7 @@ ix86_output_function_nops_prologue_epilogue (FILE *file, fprintf (file, "\n"); /* Switching back to text section. */ - switch_to_section (function_section (current_function_decl)); + switch_to_section (current_function_section ()); return true; } @@ -12379,7 +12479,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) addr = XEXP (addr, 0); if (CONST_INT_P (addr)) return 0; - } + } else if (GET_CODE (addr) == AND && const_32bit_mask (XEXP (addr, 1), DImode)) { @@ -12905,8 +13005,16 @@ legitimate_pic_address_disp_p (rtx disp) return true; } else if (!SYMBOL_REF_FAR_ADDR_P (op0) - && (SYMBOL_REF_LOCAL_P (op0) - || (TARGET_64BIT && ix86_pie_copyrelocs && flag_pie + && (SYMBOL_REF_LOCAL_P (op0) + || (HAVE_LD_PIE_COPYRELOC + && flag_pie + && !(SYMBOL_REF_WEAK (op0) + /* TODO:Temporary fix for weak defined symbols. Weak defined + symbols in an executable cannot be overridden even with + a non-weak symbol in a shared library. + Revert after fix is checked in here: + http://gcc.gnu.org/ml/gcc-patches/2015-02/msg00366.html*/ + && SYMBOL_REF_EXTERNAL_P (op0)) && !SYMBOL_REF_FUNCTION_P (op0))) && ix86_cmodel != CM_LARGE_PIC) return true; @@ -13010,7 +13118,7 @@ ix86_legitimize_reload_address (rtx x, (reg:DI 2 cx)) This RTX is rejected from ix86_legitimate_address_p due to - non-strictness of base register 97. Following this rejection, + non-strictness of base register 97. Following this rejection, reload pushes all three components into separate registers, creating invalid memory address RTX. @@ -13025,7 +13133,7 @@ ix86_legitimize_reload_address (rtx x, rtx base, index; bool something_reloaded = false; - base = XEXP (XEXP (x, 0), 1); + base = XEXP (XEXP (x, 0), 1); if (!REG_OK_FOR_BASE_STRICT_P (base)) { push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL, @@ -13929,7 +14037,7 @@ get_dllimport_decl (tree decl, bool beimport) #ifdef SUB_TARGET_RECORD_STUB SUB_TARGET_RECORD_STUB (name); #endif - } + } rtl = gen_const_mem (Pmode, rtl); set_mem_alias_set (rtl, ix86_GOT_alias_set ()); @@ -13976,7 +14084,7 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg) return x; } -/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG +/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG is true if we require the result be a register. */ static rtx @@ -14749,7 +14857,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, if (mode == CCmode) suffix = "b"; else if (mode == CCCmode) - suffix = "c"; + suffix = fp ? "b" : "c"; else gcc_unreachable (); break; @@ -14772,9 +14880,9 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse, break; case GEU: if (mode == CCmode) - suffix = fp ? "nb" : "ae"; + suffix = "nb"; else if (mode == CCCmode) - suffix = "nc"; + suffix = fp ? "nb" : "nc"; else gcc_unreachable (); break; @@ -15109,7 +15217,7 @@ ix86_print_operand (FILE *file, rtx x, int code) case 2: putc ('w', file); break; - + case 4: putc ('l', file); break; @@ -16408,7 +16516,7 @@ ix86_mode_needed (int entity, rtx insn) } /* Check if a 256bit AVX register is referenced in stores. */ - + static void ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) { @@ -16417,7 +16525,7 @@ ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) bool *used = (bool *) data; *used = true; } - } + } /* Calculate mode of upper 128bit AVX registers after the insn. */ @@ -17463,7 +17571,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) t = gen_reg_rtx (V4SFmode); else t = op0; - + if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) emit_move_insn (t, CONST0_RTX (V4SFmode)); else @@ -18527,7 +18635,7 @@ ix86_emit_binop (enum rtx_code code, enum machine_mode mode, op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src)); clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); } @@ -21852,7 +21960,7 @@ ix86_expand_vec_perm (rtx operands[]) if (TARGET_XOP) { - /* The XOP VPPERM insn supports three inputs. By ignoring the + /* The XOP VPPERM insn supports three inputs. By ignoring the one_operand_shuffle special case, we avoid creating another set of constant vectors in memory. */ one_operand_shuffle = false; @@ -23708,7 +23816,7 @@ expand_small_movmem_or_setmem (rtx destmem, rtx srcmem, DONE_LABEL is a label after the whole copying sequence. The label is created on demand if *DONE_LABEL is NULL. MIN_SIZE is minimal size of block copied. This value gets adjusted for new - bounds after the initial copies. + bounds after the initial copies. DESTMEM/SRCMEM are memory expressions pointing to the copies block, DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether @@ -24013,7 +24121,7 @@ expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, return dst; } -/* Return true if ALG can be used in current context. +/* Return true if ALG can be used in current context. Assume we expand memset if MEMSET is true. */ static bool alg_usable_p (enum stringop_alg alg, bool memset) @@ -24136,7 +24244,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, *noalign = alg_noalign; return alg; } - break; + else if (!any_alg_usable_p) + break; } else if (alg_usable_p (candidate, memset)) { @@ -24174,9 +24283,10 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, alg = decide_alg (count, max / 2, min_size, max_size, memset, zero_memset, dynamic_check, noalign); gcc_assert (*dynamic_check == -1); - gcc_assert (alg != libcall); if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) *dynamic_check = max; + else + gcc_assert (alg != libcall); return alg; } return (alg_usable_p (algs->unknown_size, memset) @@ -24336,7 +24446,7 @@ promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, with specified algorithm. 4) Epilogue: code copying tail of the block that is too small to be - handled by main body (or up to size guarded by prologue guard). + handled by main body (or up to size guarded by prologue guard). Misaligned move sequence @@ -24531,7 +24641,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, /* Do the cheap promotion to allow better CSE across the main loop and epilogue (ie one load of the big constant in the - front of all code. + front of all code. For now the misaligned move sequences do not have fast path without broadcasting. */ if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) @@ -25103,13 +25213,19 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, } else { - /* Static functions and indirect calls don't need the pic register. */ + /* Static functions and indirect calls don't need the pic register. Also, + check if PLT was explicitly avoided via no-plt or "noplt" attribute, making + it an indirect call. */ if (flag_pic && (!TARGET_64BIT || (ix86_cmodel == CM_LARGE_PIC && DEFAULT_ABI != MS_ABI)) && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF - && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) + && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)) + && flag_plt + && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE + || !lookup_attribute ("noplt", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0)))))) use_reg (&use, pic_offset_table_rtx); } @@ -25173,6 +25289,31 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, return call; } +/* Return true if the function being called was marked with attribute "noplt" + or using -fno-plt and we are compiling for non-PIC and x86_64. We need to + handle the non-PIC case in the backend because there is no easy interface + for the front-end to force non-PLT calls to use the GOT. This is currently + used only with 64-bit ELF targets to call the function marked "noplt" + indirectly. */ + +static bool +ix86_nopic_noplt_attribute_p (rtx call_op) +{ + if (flag_pic || ix86_cmodel == CM_LARGE + || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF + || SYMBOL_REF_LOCAL_P (call_op)) + return false; + + tree symbol_decl = SYMBOL_REF_DECL (call_op); + + if (!flag_plt + || (symbol_decl != NULL_TREE + && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))) + return true; + + return false; +} + /* Output the assembly for a call instruction. */ const char * @@ -25184,7 +25325,9 @@ ix86_output_call_insn (rtx insn, rtx call_op) if (SIBLING_CALL_P (insn)) { - if (direct_p) + if (direct_p && ix86_nopic_noplt_attribute_p (call_op)) + xasm = "jmp\t*%p0@GOTPCREL(%%rip)"; + else if (direct_p) xasm = "jmp\t%P0"; /* SEH epilogue detection requires the indirect branch case to include REX.W. */ @@ -25236,7 +25379,9 @@ ix86_output_call_insn (rtx insn, rtx call_op) seh_nop_p = true; } - if (direct_p) + if (direct_p && ix86_nopic_noplt_attribute_p (call_op)) + xasm = "call\t*%p0@GOTPCREL(%%rip)"; + else if (direct_p) xasm = "call\t%P0"; else xasm = "call\t%A0"; @@ -26506,7 +26651,7 @@ ix86_dependencies_evaluation_hook (rtx head, rtx tail) using topological ordering in the region. */ if (rgn == CONTAINING_RGN (e->src->index) && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) - add_dependee_for_func_arg (first_arg, e->src); + add_dependee_for_func_arg (first_arg, e->src); } } insn = first_arg; @@ -26974,7 +27119,7 @@ ix86_local_alignment (tree exp, enum machine_mode mode, other unit can not rely on the alignment. Exclude va_list type. It is the common case of local array where - we can not benefit from the alignment. + we can not benefit from the alignment. TODO: Probably one should optimize for size only when var is not escaping. */ if (TARGET_64BIT && optimize_function_for_speed_p (cfun) @@ -31443,7 +31588,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, convert_expr = build1 (CONVERT_EXPR, ptr_type_node, build_fold_addr_expr (version_decl)); result_var = create_tmp_var (ptr_type_node, NULL); - convert_stmt = gimple_build_assign (result_var, convert_expr); + convert_stmt = gimple_build_assign (result_var, convert_expr); return_stmt = gimple_build_return (result_var); if (predicate_chain == NULL_TREE) @@ -31470,7 +31615,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, gimple_seq_add_stmt (&gseq, call_cond_stmt); predicate_chain = TREE_CHAIN (predicate_chain); - + if (and_expr_var == NULL) and_expr_var = cond_var; else @@ -31511,7 +31656,7 @@ add_condition_to_bb (tree function_decl, tree version_decl, gimple_set_bb (return_stmt, bb2); bb3 = e23->dest; - make_edge (bb1, bb3, EDGE_FALSE_VALUE); + make_edge (bb1, bb3, EDGE_FALSE_VALUE); remove_edge (e23); make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); @@ -31563,7 +31708,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) P_FMA4, P_XOP, P_PROC_XOP, - P_FMA, + P_FMA, P_PROC_FMA, P_AVX2, P_PROC_AVX2 @@ -31628,11 +31773,11 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) cl_target_option_save (&cur_target, &global_options); target_node = ix86_valid_target_attribute_tree (attrs, &global_options, &global_options_set); - + gcc_assert (target_node); new_target = TREE_TARGET_OPTION (target_node); gcc_assert (new_target); - + if (new_target->arch_specified && new_target->arch > 0) { switch (new_target->arch) @@ -31701,18 +31846,18 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) arg_str = "bdver4"; priority = P_PROC_AVX2; break; - } - } - + } + } + cl_target_option_restore (&global_options, &cur_target); - + if (predicate_list && arg_str == NULL) { error_at (DECL_SOURCE_LOCATION (decl), "No dispatcher found for the versioning attributes"); return 0; } - + if (predicate_list) { predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; @@ -31779,7 +31924,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) *predicate_list = predicate_chain; } - return priority; + return priority; } /* This compares the priority of target features in function DECL1 @@ -31798,7 +31943,7 @@ ix86_compare_version_priority (tree decl1, tree decl2) /* V1 and V2 point to function versions with different priorities based on the target ISA. This function compares their priorities. */ - + static int feature_compare (const void *v1, const void *v2) { @@ -32111,12 +32256,12 @@ ix86_function_versions (tree fn1, tree fn2) result = true; XDELETEVEC (target1); - XDELETEVEC (target2); - + XDELETEVEC (target2); + return result; } -static tree +static tree ix86_mangle_decl_assembler_name (tree decl, tree id) { /* For function version, add the target suffix to the assembler name. */ @@ -32186,7 +32331,7 @@ make_dispatcher_decl (const tree decl) fn_type = TREE_TYPE (decl); func_type = build_function_type (TREE_TYPE (fn_type), TYPE_ARG_TYPES (fn_type)); - + func_decl = build_fn_decl (func_name, func_type); XDELETEVEC (func_name); TREE_USED (func_decl) = 1; @@ -32199,7 +32344,7 @@ make_dispatcher_decl (const tree decl) /* This will be of type IFUNCs have to be externally visible. */ TREE_PUBLIC (func_decl) = 1; - return func_decl; + return func_decl; } #endif @@ -32236,7 +32381,7 @@ ix86_get_function_versions_dispatcher (void *decl) tree dispatch_decl = NULL; struct cgraph_function_version_info *default_version_info = NULL; - + gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); node = cgraph_get_node (fn); @@ -32244,7 +32389,7 @@ ix86_get_function_versions_dispatcher (void *decl) node_v = get_cgraph_node_version (node); gcc_assert (node_v != NULL); - + if (node_v->dispatcher_resolver != NULL) return node_v->dispatcher_resolver; @@ -32409,7 +32554,7 @@ make_resolver_func (const tree default_decl, gcc_assert (dispatch_decl != NULL); /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */ - DECL_ATTRIBUTES (dispatch_decl) + DECL_ATTRIBUTES (dispatch_decl) = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl)); /* Create the alias for dispatch to resolver here. */ @@ -32424,7 +32569,7 @@ make_resolver_func (const tree default_decl, provide the code to dispatch the right function at run-time. NODE points to the dispatcher decl whose body will be created. */ -static tree +static tree ix86_generate_version_dispatcher_body (void *node_p) { tree resolver_decl; @@ -32476,7 +32621,7 @@ ix86_generate_version_dispatcher_body (void *node_p) } dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); - rebuild_cgraph_edges (); + rebuild_cgraph_edges (); pop_cfun (); return resolver_decl; } @@ -32587,7 +32732,7 @@ fold_builtin_cpu (tree fndecl, tree *args) M_AMDFAM15H, M_INTEL_SILVERMONT, M_AMD_BTVER1, - M_AMD_BTVER2, + M_AMD_BTVER2, M_CPU_SUBTYPE_START, M_INTEL_COREI7_NEHALEM, M_INTEL_COREI7_WESTMERE, @@ -32627,13 +32772,13 @@ fold_builtin_cpu (tree fndecl, tree *args) {"barcelona", M_AMDFAM10H_BARCELONA}, {"shanghai", M_AMDFAM10H_SHANGHAI}, {"istanbul", M_AMDFAM10H_ISTANBUL}, - {"btver1", M_AMD_BTVER1}, + {"btver1", M_AMD_BTVER1}, {"amdfam15h", M_AMDFAM15H}, {"bdver1", M_AMDFAM15H_BDVER1}, {"bdver2", M_AMDFAM15H_BDVER2}, {"bdver3", M_AMDFAM15H_BDVER3}, {"bdver4", M_AMDFAM15H_BDVER4}, - {"btver2", M_AMD_BTVER2}, + {"btver2", M_AMD_BTVER2}, }; static struct _isa_names_table @@ -35238,9 +35383,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, { /* Make it call __cpu_indicator_init in libgcc. */ tree call_expr, fndecl, type; - type = build_function_type_list (integer_type_node, NULL_TREE); + type = build_function_type_list (integer_type_node, NULL_TREE); fndecl = build_fn_decl ("__cpu_indicator_init", type); - call_expr = build_call_expr (fndecl, 0); + call_expr = build_call_expr (fndecl, 0); return expand_expr (call_expr, target, mode, EXPAND_NORMAL); } case IX86_BUILTIN_CPU_IS: @@ -41332,8 +41477,8 @@ ix86_encode_section_info (tree decl, rtx rtl, int first) { default_encode_section_info (decl, rtl, first); - if (TREE_CODE (decl) == VAR_DECL - && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) + if (((TREE_CODE (decl) == VAR_DECL && is_global_var (decl)) + || TREE_CODE(decl) == STRING_CST) && ix86_in_large_data_p (decl)) SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; } @@ -42957,8 +43102,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) op0 = gen_lowpart (V4DImode, d->op0); op1 = gen_lowpart (V4DImode, d->op1); rperm[0] - = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0) - || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0)); + = GEN_INT ((d->perm[0] / (nelt / 2)) + | ((d->perm[nelt / 2] / (nelt / 2)) * 16)); emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); if (target != d->target) emit_move_insn (d->target, gen_lowpart (d->vmode, target)); @@ -47277,6 +47422,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2) #undef TARGET_PROFILE_BEFORE_PROLOGUE #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue +#undef TARGET_SET_FP_INSN +#define TARGET_SET_FP_INSN ix86_set_fp_insn + #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name @@ -47562,6 +47710,9 @@ adjacent_mem_locations (rtx mem1, rtx mem2) #undef TARGET_FRAME_POINTER_REQUIRED #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required +#undef TARGET_CAN_OMIT_LEAF_FRAME_POINTER +#define TARGET_CAN_OMIT_LEAF_FRAME_POINTER ix86_can_omit_leaf_frame_pointer + #undef TARGET_CAN_ELIMINATE #define TARGET_CAN_ELIMINATE ix86_can_eliminate @@ -47601,6 +47752,8 @@ adjacent_mem_locations (rtx mem1, rtx mem2) #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ ix86_float_exceptions_rounding_supported_p +#undef TARGET_STRICT_ALIGN +#define TARGET_STRICT_ALIGN true struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md index 39d395875..2369e4b40 100644 --- a/gcc-4.9/gcc/config/i386/i386.md +++ b/gcc-4.9/gcc/config/i386/i386.md @@ -12159,18 +12159,52 @@ (set_attr "mode" "<MODE>")]) ;; BMI2 instructions. -(define_insn "bmi2_bzhi_<mode>3" +(define_expand "bmi2_bzhi_<mode>3" + [(parallel + [(set (match_operand:SWI48 0 "register_operand") + (zero_extract:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand") + (umin:SWI48 + (and:SWI48 (match_operand:SWI48 2 "register_operand") + (const_int 255)) + (match_dup 3)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_BMI2" + "operands[3] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);") + +(define_insn "*bmi2_bzhi_<mode>3" [(set (match_operand:SWI48 0 "register_operand" "=r") - (and:SWI48 (lshiftrt:SWI48 (const_int -1) - (match_operand:SWI48 2 "register_operand" "r")) - (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (zero_extract:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (umin:SWI48 + (and:SWI48 (match_operand:SWI48 2 "register_operand" "r") + (const_int 255)) + (match_operand:SWI48 3 "const_int_operand" "n")) + (const_int 0))) (clobber (reg:CC FLAGS_REG))] - "TARGET_BMI2" + "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT" "bzhi\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) +(define_mode_attr k [(SI "k") (DI "q")]) +(define_insn "*bmi2_bzhi_<mode>3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (zero_extract:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (umin:SWI48 + (zero_extend:SWI48 (match_operand:QI 2 "register_operand" "r")) + (match_operand:SWI48 3 "const_int_operand" "n")) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && INTVAL (operands[3]) == <MODE_SIZE> * BITS_PER_UNIT" + "bzhi\t{%<k>2, %1, %0|%0, %1, %<k>2}" + [(set_attr "type" "bitmanip") + (set_attr "prefix" "vex") + (set_attr "mode" "<MODE>")]) + (define_insn "bmi2_pdep_<mode>3" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r") @@ -13518,7 +13552,8 @@ (set (reg:CCFP FPSR_REG) (unspec:CCFP [(match_dup 2) (match_dup 3)] UNSPEC_C2_FLAG))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" "fprem" [(set_attr "type" "fpspc") (set_attr "mode" "XF")]) @@ -13527,7 +13562,8 @@ [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "general_operand")) (use (match_operand:XF 2 "general_operand"))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" { rtx label = gen_label_rtx (); @@ -13550,7 +13586,8 @@ [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:MODEF 2 "general_operand"))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" { rtx (*gen_truncxf) (rtx, rtx); @@ -13589,7 +13626,8 @@ (set (reg:CCFP FPSR_REG) (unspec:CCFP [(match_dup 2) (match_dup 3)] UNSPEC_C2_FLAG))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" "fprem1" [(set_attr "type" "fpspc") (set_attr "mode" "XF")]) @@ -13598,7 +13636,8 @@ [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "general_operand")) (use (match_operand:XF 2 "general_operand"))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" { rtx label = gen_label_rtx (); @@ -13621,7 +13660,8 @@ [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:MODEF 2 "general_operand"))] - "TARGET_USE_FANCY_MATH_387" + "TARGET_USE_FANCY_MATH_387 + && flag_finite_math_only" { rtx (*gen_truncxf) (rtx, rtx); diff --git a/gcc-4.9/gcc/config/i386/i386.opt b/gcc-4.9/gcc/config/i386/i386.opt index 1e00b660e..f64a9e1eb 100644 --- a/gcc-4.9/gcc/config/i386/i386.opt +++ b/gcc-4.9/gcc/config/i386/i386.opt @@ -108,10 +108,6 @@ int x_ix86_dump_tunes TargetSave int x_ix86_force_align_arg_pointer -;; -mcopyrelocs= -TargetSave -int x_ix86_copyrelocs - ;; -mforce-drap= TargetSave int x_ix86_force_drap @@ -295,10 +291,6 @@ mfancy-math-387 Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save Generate sin, cos, sqrt for FPU -mcopyrelocs -Target Report Var(ix86_pie_copyrelocs) Init(0) -Assume copy relocations support for pie builds. - mforce-drap Target Report Var(ix86_force_drap) Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack diff --git a/gcc-4.9/gcc/config/i386/linux.h b/gcc-4.9/gcc/config/i386/linux.h index 27d68b5db..bfc7746bc 100644 --- a/gcc-4.9/gcc/config/i386/linux.h +++ b/gcc-4.9/gcc/config/i386/linux.h @@ -24,18 +24,3 @@ along with GCC; see the file COPYING3. If not see #define RUNTIME_ROOT_PREFIX "" #endif #define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2" - -/* These may be provided by config/linux-grtev*.h. */ -#ifndef LINUX_GRTE_EXTRA_SPECS -#define LINUX_GRTE_EXTRA_SPECS -#endif - -#undef SUBTARGET_EXTRA_SPECS -#ifndef SUBTARGET_EXTRA_SPECS_STR -#define SUBTARGET_EXTRA_SPECS \ - LINUX_GRTE_EXTRA_SPECS -#else -#define SUBTARGET_EXTRA_SPECS \ - LINUX_GRTE_EXTRA_SPECS \ - SUBTARGET_EXTRA_SPECS_STR -#endif diff --git a/gcc-4.9/gcc/config/i386/linux64.h b/gcc-4.9/gcc/config/i386/linux64.h index 5124a341b..b71616fea 100644 --- a/gcc-4.9/gcc/config/i386/linux64.h +++ b/gcc-4.9/gcc/config/i386/linux64.h @@ -34,12 +34,3 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2" #define GLIBC_DYNAMIC_LINKERX32 RUNTIME_ROOT_PREFIX "/libx32/ld-linux-x32.so.2" -/* These may be provided by config/linux-grtev*.h. */ -#ifndef LINUX_GRTE_EXTRA_SPECS -#define LINUX_GRTE_EXTRA_SPECS -#endif - -#undef SUBTARGET_EXTRA_SPECS -#define SUBTARGET_EXTRA_SPECS \ - LINUX_GRTE_EXTRA_SPECS - diff --git a/gcc-4.9/gcc/config/i386/mmx.md b/gcc-4.9/gcc/config/i386/mmx.md index 214acde23..a7d2a7eec 100644 --- a/gcc-4.9/gcc/config/i386/mmx.md +++ b/gcc-4.9/gcc/config/i386/mmx.md @@ -600,20 +600,25 @@ ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "*vec_extractv2sf_1" - [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,y,x,f,r") + [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,x,y,x,f,r") (vec_select:SF - (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o") + (match_operand:V2SF 1 "nonimmediate_operand" " 0,x,x,o,o,o,o") (parallel [(const_int 1)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 - unpckhps\t%0, %0 + %vmovshdup\t{%1, %0|%0, %1} + shufps\t{$0xe5, %1, %0|%0, %1, 0xe5} # # # #" - [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov") - (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")]) + [(set_attr "isa" "*,sse3,noavx,*,*,*,*") + (set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov") + (set_attr "length_immediate" "*,*,1,*,*,*,*") + (set_attr "prefix_rep" "*,1,*,*,*,*,*") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig") + (set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")]) (define_split [(set (match_operand:SF 0 "register_operand") @@ -1288,26 +1293,23 @@ ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "*vec_extractv2si_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,x,y,x,r") + [(set (match_operand:SI 0 "nonimmediate_operand" "=y,x,x,y,x,r") (vec_select:SI - (match_operand:V2SI 1 "nonimmediate_operand" " 0,0,x,0,o,o,o") + (match_operand:V2SI 1 "nonimmediate_operand" " 0,x,x,o,o,o") (parallel [(const_int 1)])))] "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ punpckhdq\t%0, %0 - punpckhdq\t%0, %0 - pshufd\t{$85, %1, %0|%0, %1, 85} - unpckhps\t%0, %0 + %vpshufd\t{$0xe5, %1, %0|%0, %1, 0xe5} + shufps\t{$0xe5, %1, %0|%0, %1, 0xe5} # # #" - [(set (attr "isa") - (if_then_else (eq_attr "alternative" "1,2") - (const_string "sse2") - (const_string "*"))) - (set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov") - (set_attr "length_immediate" "*,*,1,*,*,*,*") - (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")]) + [(set_attr "isa" "*,sse2,noavx,*,*,*") + (set_attr "type" "mmxcvt,sseshuf1,sseshuf1,mmxmov,ssemov,imov") + (set_attr "length_immediate" "*,1,1,*,*,*") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig") + (set_attr "mode" "DI,TI,V4SF,SI,SI,SI")]) (define_split [(set (match_operand:SI 0 "register_operand") diff --git a/gcc-4.9/gcc/config/i386/x86-tune.def b/gcc-4.9/gcc/config/i386/x86-tune.def index ddf1d21c9..215c71f4d 100644 --- a/gcc-4.9/gcc/config/i386/x86-tune.def +++ b/gcc-4.9/gcc/config/i386/x86-tune.def @@ -97,25 +97,25 @@ DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", conditional jump instruction for 32 bit TARGET. FIXME: revisit for generic. */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", - m_CORE_ALL | m_BDVER) + m_GENERIC | m_CORE_ALL | m_BDVER) /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent conditional jump instruction for TARGET_64BIT. FIXME: revisit for generic. */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER) + m_GENERIC | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER) /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a subsequent conditional jump instruction when the condition jump check sign flag (SF) or overflow flag (OF). */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER) + m_GENERIC | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER) /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional jump instruction when the alu instruction produces the CCFLAG consumed by the conditional jump instruction. */ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", - m_SANDYBRIDGE | m_HASWELL) + m_GENERIC | m_SANDYBRIDGE | m_HASWELL) /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations during reassociation of integer computation. */ diff --git a/gcc-4.9/gcc/config/ia64/ia64.c b/gcc-4.9/gcc/config/ia64/ia64.c index 41adc4adc..4ec3e3abe 100644 --- a/gcc-4.9/gcc/config/ia64/ia64.c +++ b/gcc-4.9/gcc/config/ia64/ia64.c @@ -602,11 +602,6 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p -/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur - in an order different from the specified program order. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p #undef TARGET_LEGITIMATE_ADDRESS_P diff --git a/gcc-4.9/gcc/config/linux-grte.h b/gcc-4.9/gcc/config/linux-grte.h index 31e8a94ce..e69de29bb 100644 --- a/gcc-4.9/gcc/config/linux-grte.h +++ b/gcc-4.9/gcc/config/linux-grte.h @@ -1,41 +0,0 @@ -/* Definitions for Linux-based GRTE (Google RunTime Environment). - Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc. - Contributed by Chris Demetriou and Ollie Wild. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -/* Overrides LIB_SPEC from gnu-user.h. */ -#undef LIB_SPEC -#define LIB_SPEC \ - "%{pthread:-lpthread} \ - %{shared:-lc} \ - %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}" - -/* When GRTE links statically, it needs its NSS and resolver libraries - linked in as well. Note that when linking statically, these are - enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */ -#undef LINUX_GRTE_EXTRA_SPECS -#define LINUX_GRTE_EXTRA_SPECS \ - { "libc", "%{static:%(libc_static);:-lc}" }, \ - { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \ - { "libc_static", "-lc -lresolv" }, \ - { "libc_p_static", "-lc_p -lresolv_p" }, diff --git a/gcc-4.9/gcc/config/linux.c b/gcc-4.9/gcc/config/linux.c index 7c3c5a461..cdb2b5bdd 100644 --- a/gcc-4.9/gcc/config/linux.c +++ b/gcc-4.9/gcc/config/linux.c @@ -23,8 +23,6 @@ along with GCC; see the file COPYING3. If not see #include "tm.h" #include "linux-protos.h" -/* Android does not support GNU indirect functions. */ - bool linux_has_ifunc_p (void) { diff --git a/gcc-4.9/gcc/config/msp430/msp430.md b/gcc-4.9/gcc/config/msp430/msp430.md index 5e890eced..3f29d6d62 100644 --- a/gcc-4.9/gcc/config/msp430/msp430.md +++ b/gcc-4.9/gcc/config/msp430/msp430.md @@ -559,7 +559,7 @@ [(set (match_operand:PSI 0 "nonimmediate_operand" "=r") (subreg:PSI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")) 0))] "TARGET_LARGE" - "RLAM #4, %0 { RRAM #4, %0" + "RLAM.A #4, %0 { RRAM.A #4, %0" ) ;; Look for cases where integer/pointer conversions are suboptimal due @@ -587,7 +587,7 @@ (ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")) (const_int 1)))] "TARGET_LARGE" - "RLAM #4, %0 { RRAM #3, %0" + "RLAM.A #4, %0 { RRAM.A #3, %0" ) (define_insn "extend_and_shift2_hipsi2" @@ -595,7 +595,7 @@ (ashift:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0")) (const_int 2)))] "TARGET_LARGE" - "RLAM #4, %0 { RRAM #2, %0" + "RLAM.A #4, %0 { RRAM.A #2, %0" ) ; Nasty - we are sign-extending a 20-bit PSI value in one register into diff --git a/gcc-4.9/gcc/config/nios2/nios2.c b/gcc-4.9/gcc/config/nios2/nios2.c index cdd2e6bc9..047b615ba 100644 --- a/gcc-4.9/gcc/config/nios2/nios2.c +++ b/gcc-4.9/gcc/config/nios2/nios2.c @@ -2135,6 +2135,18 @@ nios2_output_dwarf_dtprel (FILE *file, int size, rtx x) fprintf (file, ")"); } +/* Implemet TARGET_ASM_FILE_END. */ + +static void +nios2_asm_file_end (void) +{ + /* The Nios II Linux stack is mapped non-executable by default, so add a + .note.GNU-stack section for switching to executable stacks only when + trampolines are generated. */ + if (TARGET_LINUX_ABI && trampolines_created) + file_end_indicate_exec_stack (); +} + /* Implement TARGET_ASM_FUNCTION_PROLOGUE. */ static void nios2_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) @@ -3313,6 +3325,9 @@ nios2_merge_decl_attributes (tree olddecl, tree newdecl) #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nios2_output_addr_const_extra +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END nios2_asm_file_end + #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nios2_option_override diff --git a/gcc-4.9/gcc/config/pa/pa.c b/gcc-4.9/gcc/config/pa/pa.c index 5a7598ca7..801982068 100644 --- a/gcc-4.9/gcc/config/pa/pa.c +++ b/gcc-4.9/gcc/config/pa/pa.c @@ -3235,7 +3235,12 @@ pa_assemble_integer (rtx x, unsigned int size, int aligned_p) && aligned_p && function_label_operand (x, VOIDmode)) { - fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file); + fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file); + + /* We don't want an OPD when generating fast indirect calls. */ + if (!TARGET_FAST_INDIRECT_CALLS) + fputs ("P%", asm_out_file); + output_addr_const (asm_out_file, x); fputc ('\n', asm_out_file); return true; @@ -4203,9 +4208,12 @@ pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) { last_address = extra_nop ? 4 : 0; insn = get_last_nonnote_insn (); - last_address += INSN_ADDRESSES (INSN_UID (insn)); - if (INSN_P (insn)) - last_address += insn_default_length (insn); + if (insn) + { + last_address += INSN_ADDRESSES (INSN_UID (insn)); + if (INSN_P (insn)) + last_address += insn_default_length (insn); + } last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1) & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)); } @@ -9308,6 +9316,12 @@ pa_function_value (const_tree valtype, || TREE_CODE (valtype) == COMPLEX_TYPE || TREE_CODE (valtype) == VECTOR_TYPE) { + HOST_WIDE_INT valsize = int_size_in_bytes (valtype); + + /* Handle aggregates that fit exactly in a word or double word. */ + if ((valsize & (UNITS_PER_WORD - 1)) == 0) + return gen_rtx_REG (TYPE_MODE (valtype), 28); + if (TARGET_64BIT) { /* Aggregates with a size less than or equal to 128 bits are @@ -9316,7 +9330,7 @@ pa_function_value (const_tree valtype, memory. */ rtx loc[2]; int i, offset = 0; - int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2; + int ub = valsize <= UNITS_PER_WORD ? 1 : 2; for (i = 0; i < ub; i++) { @@ -9328,7 +9342,7 @@ pa_function_value (const_tree valtype, return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc)); } - else if (int_size_in_bytes (valtype) > UNITS_PER_WORD) + else if (valsize > UNITS_PER_WORD) { /* Aggregates 5 to 8 bytes in size are returned in general registers r28-r29 in the same manner as other non diff --git a/gcc-4.9/gcc/config/pa/pa.md b/gcc-4.9/gcc/config/pa/pa.md index a9421ac2e..43b909e35 100644 --- a/gcc-4.9/gcc/config/pa/pa.md +++ b/gcc-4.9/gcc/config/pa/pa.md @@ -123,7 +123,7 @@ ;; type "binary" insns have two input operands (1,2) and one output (0) (define_attr "type" - "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload" + "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,sh_func_adrs,parallel_branch,fpstore_load,store_fpload,trap" (const_string "binary")) (define_attr "pa_combine_type" @@ -166,7 +166,7 @@ ;; For conditional branches. Frame related instructions are not allowed ;; because they confuse the unwind support. (define_attr "in_branch_delay" "false,true" - (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch") + (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,trap") (eq_attr "length" "4") (not (match_test "RTX_FRAME_RELATED_P (insn)"))) (const_string "true") @@ -175,7 +175,7 @@ ;; Disallow instructions which use the FPU since they will tie up the FPU ;; even if the instruction is nullified. (define_attr "in_nullified_branch_delay" "false,true" - (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch") + (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch,trap") (eq_attr "length" "4") (not (match_test "RTX_FRAME_RELATED_P (insn)"))) (const_string "true") @@ -184,7 +184,7 @@ ;; For calls and millicode calls. Allow unconditional branches in the ;; delay slot. (define_attr "in_call_delay" "false,true" - (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch") + (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,sibcall,dyncall,multi,milli,sh_func_adrs,parallel_branch,trap") (eq_attr "length" "4") (not (match_test "RTX_FRAME_RELATED_P (insn)"))) (const_string "true") @@ -5331,6 +5331,15 @@ [(set_attr "type" "binary,binary") (set_attr "length" "4,4")]) +;; Trap instructions. + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "" + "{addit|addi,tc},<> 1,%%r0,%%r0" + [(set_attr "type" "trap") + (set_attr "length" "4")]) + ;; Clobbering a "register_operand" instead of a match_scratch ;; in operand3 of millicode calls avoids spilling %r1 and ;; produces better code. @@ -8926,14 +8935,14 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" ;; strength reduction is used. It is actually created when the instruction ;; combination phase combines the special loop test. Since this insn ;; is both a jump insn and has an output, it must deal with its own -;; reloads, hence the `m' constraints. The `!' constraints direct reload +;; reloads, hence the `Q' constraints. The `!' constraints direct reload ;; to not choose the register alternatives in the event a reload is needed. (define_insn "decrement_and_branch_until_zero" [(set (pc) (if_then_else (match_operator 2 "comparison_operator" [(plus:SI - (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*m") + (match_operand:SI 0 "reg_before_reload_operand" "+!r,!*f,*Q") (match_operand:SI 1 "int5_operand" "L,L,L")) (const_int 0)]) (label_ref (match_operand 3 "" "")) @@ -9022,7 +9031,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)]) (label_ref (match_operand 3 "" "")) (pc))) - (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q") + (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*Q,!*q") (match_dup 1))] "" "* return pa_output_movb (operands, insn, which_alternative, 0); " @@ -9094,7 +9103,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" [(match_operand:SI 1 "register_operand" "r,r,r,r") (const_int 0)]) (pc) (label_ref (match_operand 3 "" "")))) - (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*m,!*q") + (set (match_operand:SI 0 "reg_before_reload_operand" "=!r,!*f,*Q,!*q") (match_dup 1))] "" "* return pa_output_movb (operands, insn, which_alternative, 1); " diff --git a/gcc-4.9/gcc/config/pa/predicates.md b/gcc-4.9/gcc/config/pa/predicates.md index 8dcfce0e9..405cf7f63 100644 --- a/gcc-4.9/gcc/config/pa/predicates.md +++ b/gcc-4.9/gcc/config/pa/predicates.md @@ -528,20 +528,29 @@ ;; This predicate is used for branch patterns that internally handle ;; register reloading. We need to accept non-symbolic memory operands ;; after reload to ensure that the pattern is still valid if reload -;; didn't find a hard register for the operand. +;; didn't find a hard register for the operand. We also reject index +;; and lo_sum DLT address as these are invalid for move destinations. (define_predicate "reg_before_reload_operand" (match_code "reg,mem") { + rtx op0; + if (register_operand (op, mode)) return true; - if (reload_completed - && memory_operand (op, mode) - && !symbolic_memory_operand (op, mode)) - return true; + if (!reload_in_progress && !reload_completed) + return false; - return false; + if (! MEM_P (op)) + return false; + + op0 = XEXP (op, 0); + + return (memory_address_p (mode, op0) + && !IS_INDEX_ADDR_P (op0) + && !IS_LO_SUM_DLT_ADDR_P (op0) + && !symbolic_memory_operand (op, mode)); }) ;; True iff OP is a register or const_0 operand for MODE. diff --git a/gcc-4.9/gcc/config/rs6000/altivec.h b/gcc-4.9/gcc/config/rs6000/altivec.h index 129cf6fa1..9ee0ae5ec 100644 --- a/gcc-4.9/gcc/config/rs6000/altivec.h +++ b/gcc-4.9/gcc/config/rs6000/altivec.h @@ -124,6 +124,7 @@ #define vec_vcfux __builtin_vec_vcfux #define vec_cts __builtin_vec_cts #define vec_ctu __builtin_vec_ctu +#define vec_cpsgn __builtin_vec_copysign #define vec_expte __builtin_vec_expte #define vec_floor __builtin_vec_floor #define vec_loge __builtin_vec_loge @@ -214,8 +215,10 @@ #define vec_lvsl __builtin_vec_lvsl #define vec_lvsr __builtin_vec_lvsr #define vec_max __builtin_vec_max +#define vec_mergee __builtin_vec_vmrgew #define vec_mergeh __builtin_vec_mergeh #define vec_mergel __builtin_vec_mergel +#define vec_mergeo __builtin_vec_vmrgow #define vec_min __builtin_vec_min #define vec_mladd __builtin_vec_mladd #define vec_msum __builtin_vec_msum @@ -319,6 +322,8 @@ #define vec_sqrt __builtin_vec_sqrt #define vec_vsx_ld __builtin_vec_vsx_ld #define vec_vsx_st __builtin_vec_vsx_st +#define vec_xl __builtin_vec_vsx_ld +#define vec_xst __builtin_vec_vsx_st /* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions instead of __builtin_vec_<xxx> */ @@ -336,6 +341,7 @@ #define vec_vadduqm __builtin_vec_vadduqm #define vec_vbpermq __builtin_vec_vbpermq #define vec_vclz __builtin_vec_vclz +#define vec_cntlz __builtin_vec_vclz #define vec_vclzb __builtin_vec_vclzb #define vec_vclzd __builtin_vec_vclzd #define vec_vclzh __builtin_vec_vclzh diff --git a/gcc-4.9/gcc/config/rs6000/altivec.md b/gcc-4.9/gcc/config/rs6000/altivec.md index a8cfcb739..02ea14237 100644 --- a/gcc-4.9/gcc/config/rs6000/altivec.md +++ b/gcc-4.9/gcc/config/rs6000/altivec.md @@ -67,7 +67,7 @@ UNSPEC_VCTSXS UNSPEC_VLOGEFP UNSPEC_VEXPTEFP - UNSPEC_VLSDOI + UNSPEC_VSLDOI UNSPEC_VUNPACK_HI_SIGN UNSPEC_VUNPACK_LO_SIGN UNSPEC_VUNPACK_HI_SIGN_DIRECT @@ -2077,7 +2077,7 @@ (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") (match_operand:QI 3 "immediate_operand" "i")] - UNSPEC_VLSDOI))] + UNSPEC_VSLDOI))] "TARGET_ALTIVEC" "vsldoi %0,%1,%2,%3" [(set_attr "type" "vecperm")]) @@ -2297,7 +2297,31 @@ "dststt %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_lvsl" +(define_expand "altivec_lvsl" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "memory_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsl_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsl_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsl_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] UNSPEC_LVSL))] @@ -2305,7 +2329,31 @@ "lvsl %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_lvsr" +(define_expand "altivec_lvsr" + [(use (match_operand:V16QI 0 "register_operand" "")) + (use (match_operand:V16QI 1 "memory_operand" ""))] + "TARGET_ALTIVEC" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_altivec_lvsr_direct (operands[0], operands[1])); + else + { + int i; + rtx mask, perm[16], constv, vperm; + mask = gen_reg_rtx (V16QImode); + emit_insn (gen_altivec_lvsr_direct (mask, operands[1])); + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (i); + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + vperm = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, mask, mask, constv), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], vperm)); + } + DONE; +}) + +(define_insn "altivec_lvsr_direct" [(set (match_operand:V16QI 0 "register_operand" "=v") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "Z")] UNSPEC_LVSR))] diff --git a/gcc-4.9/gcc/config/rs6000/darwin.h b/gcc-4.9/gcc/config/rs6000/darwin.h index 0329f3f62..dfd181e43 100644 --- a/gcc-4.9/gcc/config/rs6000/darwin.h +++ b/gcc-4.9/gcc/config/rs6000/darwin.h @@ -206,7 +206,11 @@ extern int darwin_emit_branch_islands; "vrsave", "vscr", \ "spe_acc", "spefscr", \ "sfp", \ - "tfhar", "tfiar", "texasr" \ + "tfhar", "tfiar", "texasr", \ + "rh0", "rh1", "rh2", "rh3", "rh4", "rh5", "rh6", "rh7", \ + "rh8", "rh9", "rh10", "rh11", "rh12", "rh13", "rh14", "rh15", \ + "rh16", "rh17", "rh18", "rh19", "rh20", "rh21", "rh22", "rh23", \ + "rh24", "rh25", "rh26", "rh27", "rh28", "rh29", "rh30", "rh31" \ } /* This outputs NAME to FILE. */ diff --git a/gcc-4.9/gcc/config/rs6000/linux-grte.h b/gcc-4.9/gcc/config/rs6000/linux-grte.h index 53997f027..e69de29bb 100644 --- a/gcc-4.9/gcc/config/rs6000/linux-grte.h +++ b/gcc-4.9/gcc/config/rs6000/linux-grte.h @@ -1,41 +0,0 @@ -/* Definitions for Linux-based GRTE (Google RunTime Environment). - Copyright (C) 2009,2010,2011,2012 Free Software Foundation, Inc. - Contributed by Chris Demetriou and Ollie Wild. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -/* Overrides LIB_LINUX_SPEC from sysv4.h. */ -#undef LIB_LINUX_SPEC -#define LIB_LINUX_SPEC \ - "%{pthread:-lpthread} \ - %{shared:-lc} \ - %{!shared:%{mieee-fp:-lieee} %{profile:%(libc_p)}%{!profile:%(libc)}}" - -/* When GRTE links statically, it needs its NSS and resolver libraries - linked in as well. Note that when linking statically, these are - enclosed in a group by LINK_GCC_C_SEQUENCE_SPEC. */ -#undef LINUX_GRTE_EXTRA_SPECS -#define LINUX_GRTE_EXTRA_SPECS \ - { "libc", "%{static:%(libc_static);:-lc}" }, \ - { "libc_p", "%{static:%(libc_p_static);:-lc_p}" }, \ - { "libc_static", "-lc -lresolv" }, \ - { "libc_p_static", "-lc_p -lresolv_p" }, diff --git a/gcc-4.9/gcc/config/rs6000/predicates.md b/gcc-4.9/gcc/config/rs6000/predicates.md index 8c384b380..2f4046215 100644 --- a/gcc-4.9/gcc/config/rs6000/predicates.md +++ b/gcc-4.9/gcc/config/rs6000/predicates.md @@ -1783,7 +1783,7 @@ (define_predicate "fusion_gpr_mem_load" (match_code "mem,sign_extend,zero_extend") { - rtx addr; + rtx addr, base, offset; /* Handle sign/zero extend. */ if (GET_CODE (op) == ZERO_EXTEND @@ -1813,24 +1813,79 @@ } addr = XEXP (op, 0); + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return 0; + + base = XEXP (addr, 0); + if (!base_reg_operand (base, GET_MODE (base))) + return 0; + + offset = XEXP (addr, 1); + if (GET_CODE (addr) == PLUS) + return satisfies_constraint_I (offset); + + else if (GET_CODE (addr) == LO_SUM) { - rtx base = XEXP (addr, 0); - rtx offset = XEXP (addr, 1); + if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + return small_toc_ref (offset, GET_MODE (offset)); - return (base_reg_operand (base, GET_MODE (base)) - && satisfies_constraint_I (offset)); + else if (TARGET_ELF && !TARGET_POWERPC64) + return CONSTANT_P (offset); } - else if (GET_CODE (addr) == LO_SUM) + return 0; +}) + +;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the +;; memory field with both the addis and the memory offset. Sign extension +;; is not handled here, since lha and lwa are not fused. +(define_predicate "fusion_gpr_mem_combo" + (match_code "mem,zero_extend") +{ + rtx addr, base, offset; + + /* Handle zero extend. */ + if (GET_CODE (op) == ZERO_EXTEND) { - rtx base = XEXP (addr, 0); - rtx offset = XEXP (addr, 1); + op = XEXP (op, 0); + mode = GET_MODE (op); + } + + if (!MEM_P (op)) + return 0; - if (!base_reg_operand (base, GET_MODE (base))) + switch (mode) + { + case QImode: + case HImode: + case SImode: + break; + + case DImode: + if (!TARGET_POWERPC64) return 0; + break; - else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + default: + return 0; + } + + addr = XEXP (op, 0); + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return 0; + + base = XEXP (addr, 0); + if (!fusion_gpr_addis (base, GET_MODE (base))) + return 0; + + offset = XEXP (addr, 1); + if (GET_CODE (addr) == PLUS) + return satisfies_constraint_I (offset); + + else if (GET_CODE (addr) == LO_SUM) + { + if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) return small_toc_ref (offset, GET_MODE (offset)); else if (TARGET_ELF && !TARGET_POWERPC64) diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def index 220d1e970..9bb870394 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc-4.9/gcc/config/rs6000/rs6000-builtin.def @@ -1258,6 +1258,16 @@ BU_VSX_2 (VEC_MERGEL_V2DF, "mergel_2df", CONST, vsx_mergel_v2df) BU_VSX_2 (VEC_MERGEL_V2DI, "mergel_2di", CONST, vsx_mergel_v2di) BU_VSX_2 (VEC_MERGEH_V2DF, "mergeh_2df", CONST, vsx_mergeh_v2df) BU_VSX_2 (VEC_MERGEH_V2DI, "mergeh_2di", CONST, vsx_mergeh_v2di) +BU_VSX_2 (XXSPLTD_V2DF, "xxspltd_2df", CONST, vsx_xxspltd_v2df) +BU_VSX_2 (XXSPLTD_V2DI, "xxspltd_2di", CONST, vsx_xxspltd_v2di) +BU_VSX_2 (DIV_V2DI, "div_2di", CONST, vsx_div_v2di) +BU_VSX_2 (UDIV_V2DI, "udiv_2di", CONST, vsx_udiv_v2di) +BU_VSX_2 (MUL_V2DI, "mul_2di", CONST, vsx_mul_v2di) + +BU_VSX_2 (XVCVSXDDP_SCALE, "xvcvsxddp_scale", CONST, vsx_xvcvsxddp_scale) +BU_VSX_2 (XVCVUXDDP_SCALE, "xvcvuxddp_scale", CONST, vsx_xvcvuxddp_scale) +BU_VSX_2 (XVCVDPSXDS_SCALE, "xvcvdpsxds_scale", CONST, vsx_xvcvdpsxds_scale) +BU_VSX_2 (XVCVDPUXDS_SCALE, "xvcvdpuxds_scale", CONST, vsx_xvcvdpuxds_scale) /* VSX abs builtin functions. */ BU_VSX_A (XVABSDP, "xvabsdp", CONST, absv2df2) diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-c.c b/gcc-4.9/gcc/config/rs6000/rs6000-c.c index 46c4a9d8c..8dedeec26 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000-c.c +++ b/gcc-4.9/gcc/config/rs6000/rs6000-c.c @@ -597,6 +597,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ROUND, ALTIVEC_BUILTIN_VRFIN, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_ROUND, VSX_BUILTIN_XVRDPI, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_RECIP, ALTIVEC_BUILTIN_VRECIPFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_RECIP, VSX_BUILTIN_RECIP_V2DF, @@ -877,6 +879,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, @@ -931,6 +945,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, @@ -1118,18 +1144,30 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_CTF, ALTIVEC_BUILTIN_VCFSX, RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVSXDDP_SCALE, + RS6000_BTI_V2DF, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0}, + { ALTIVEC_BUILTIN_VEC_CTF, VSX_BUILTIN_XVCVUXDDP_SCALE, + RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0}, { ALTIVEC_BUILTIN_VEC_VCFSX, ALTIVEC_BUILTIN_VCFSX, RS6000_BTI_V4SF, RS6000_BTI_V4SI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_VCFUX, ALTIVEC_BUILTIN_VCFUX, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_CTS, ALTIVEC_BUILTIN_VCTSXS, RS6000_BTI_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTS, VSX_BUILTIN_XVCVDPSXDS_SCALE, + RS6000_BTI_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_CTU, ALTIVEC_BUILTIN_VCTUXS, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_CTU, VSX_BUILTIN_XVCVDPUXDS_SCALE, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVSP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_DIV_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_UDIV_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX_V2DI, @@ -1595,6 +1633,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEH, VSX_BUILTIN_VEC_MERGEH_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VMRGHW, ALTIVEC_BUILTIN_VMRGHW, @@ -1643,6 +1691,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_MERGEL, VSX_BUILTIN_VEC_MERGEL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VMRGLW, ALTIVEC_BUILTIN_VMRGLW, @@ -1771,6 +1829,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_XVMULDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_MUL_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_MUL, VSX_BUILTIN_MUL_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB, @@ -1812,6 +1874,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, @@ -1842,6 +1916,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, @@ -1945,6 +2031,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS, RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS, @@ -2127,6 +2215,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_SPLAT, ALTIVEC_BUILTIN_VSPLTW, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, 0 }, + { ALTIVEC_BUILTIN_VEC_SPLAT, VSX_BUILTIN_XXSPLTD_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_INTSI, 0 }, { ALTIVEC_BUILTIN_VEC_VSPLTW, ALTIVEC_BUILTIN_VSPLTW, @@ -2519,6 +2615,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 }, @@ -2778,6 +2886,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI, @@ -2818,6 +2928,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF, @@ -3267,6 +3383,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, @@ -3321,6 +3439,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF, RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double }, { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, @@ -3431,6 +3551,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTSW_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTUD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_VCMPGT_P, P8V_BUILTIN_VCMPGTSD_P, + RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTFP_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_VCMPGT_P, VSX_BUILTIN_XVCMPGTDP_P, @@ -3889,12 +4021,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, + RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, @@ -4128,7 +4264,8 @@ altivec_build_resolved_builtin (tree *args, int n, argument) is reversed. Patch the arguments here before building the resolved CALL_EXPR. */ if (desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P - && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P) + && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P + && desc->overloaded_code != VSX_BUILTIN_XVCMPGEDP_P) { tree t; t = args[2], args[2] = args[1], args[1] = t; @@ -4186,6 +4323,14 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n", (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); + + /* vec_lvsl and vec_lvsr are deprecated for use with LE element order. */ + if (fcode == ALTIVEC_BUILTIN_VEC_LVSL && !VECTOR_ELT_ORDER_BIG) + warning (OPT_Wdeprecated, "vec_lvsl is deprecated for little endian; use \ +assignment for unaligned loads and stores"); + else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !VECTOR_ELT_ORDER_BIG) + warning (OPT_Wdeprecated, "vec_lvsr is deprecated for little endian; use \ +assignment for unaligned loads and stores"); /* For now treat vec_splats and vec_promote as the same. */ if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS diff --git a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h index 067a74aa6..aa8e76249 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000-protos.h +++ b/gcc-4.9/gcc/config/rs6000/rs6000-protos.h @@ -65,6 +65,7 @@ extern void altivec_expand_stvx_be (rtx, rtx, enum machine_mode, unsigned); extern void altivec_expand_stvex_be (rtx, rtx, enum machine_mode, unsigned); extern void rs6000_expand_extract_even (rtx, rtx, rtx); extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); +extern void rs6000_scale_v2df (rtx, rtx, int); extern void build_mask64_2_operands (rtx, rtx *); extern int expand_block_clear (rtx[]); extern int expand_block_move (rtx[]); @@ -79,9 +80,9 @@ extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); extern bool direct_move_p (rtx, rtx); extern bool quad_load_store_p (rtx, rtx); -extern bool fusion_gpr_load_p (rtx *, bool); +extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx); extern void expand_fusion_gpr_load (rtx *); -extern const char *emit_fusion_gpr_load (rtx *); +extern const char *emit_fusion_gpr_load (rtx, rtx); extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class); extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, diff --git a/gcc-4.9/gcc/config/rs6000/rs6000.c b/gcc-4.9/gcc/config/rs6000/rs6000.c index 28ccf86df..730e6c8a6 100644 --- a/gcc-4.9/gcc/config/rs6000/rs6000.c +++ b/gcc-4.9/gcc/config/rs6000/rs6000.c @@ -79,6 +79,9 @@ #include "dumpfile.h" #include "cgraph.h" #include "target-globals.h" +#include "real.h" +#include "context.h" +#include "tree-pass.h" #if TARGET_XCOFF #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ #endif @@ -1171,6 +1174,7 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type, enum machine_mode, secondary_reload_info *, bool); +rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); /* Hash table stuff for keeping track of TOC entries. */ @@ -1541,17 +1545,6 @@ static const struct attribute_spec rs6000_attribute_table[] = #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail #endif -/* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors - The PowerPC architecture requires only weak consistency among - processors--that is, memory accesses between processors need not be - sequentially consistent and memory accesses among processors can occur - in any order. The ability to order memory accesses weakly provides - opportunities for more efficient use of the system bus. Unless a - dependency exists, the 604e allows read operations to precede store - operations. */ -#undef TARGET_RELAXED_ORDERING -#define TARGET_RELAXED_ORDERING true - #ifdef HAVE_AS_TLS #undef TARGET_ASM_OUTPUT_DWARF_DTPREL #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel @@ -4084,6 +4077,15 @@ static void rs6000_option_override (void) { (void) rs6000_option_override_internal (true); + + /* Register machine-specific passes. This needs to be done at start-up. + It's convenient to do it here (like i386 does). */ + opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g); + + static struct register_pass_info analyze_swaps_info + = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE }; + + register_pass (&analyze_swaps_info); } @@ -6896,24 +6898,6 @@ rs6000_delegitimize_address (rtx orig_x) if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL) { -#ifdef ENABLE_CHECKING - if (REG_P (XVECEXP (y, 0, 1)) - && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER) - { - /* All good. */ - } - else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR) - { - /* Weirdness alert. df_note_compute can replace r2 with a - debug_expr when this unspec is in a debug_insn. - Seen in gcc.dg/pr51957-1.c */ - } - else - { - debug_rtx (orig_x); - abort (); - } -#endif y = XVECEXP (y, 0, 0); #ifdef HAVE_AS_TLS @@ -13842,8 +13826,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case ALTIVEC_BUILTIN_MASK_FOR_LOAD: case ALTIVEC_BUILTIN_MASK_FOR_STORE: { - int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr - : (int) CODE_FOR_altivec_lvsl); + int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct + : (int) CODE_FOR_altivec_lvsl_direct); enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode = insn_data[icode].operand[1].mode; tree arg; @@ -13871,7 +13855,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); - /*pat = gen_altivec_lvsr (target, op);*/ pat = GEN_FCN (icode) (target, op); if (!pat) return 0; @@ -16654,10 +16637,13 @@ rs6000_secondary_reload (bool in_p, : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */ && (offset & 3) != 0)) { + /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */ if (in_p) - sri->icode = CODE_FOR_reload_di_load; + sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load + : CODE_FOR_reload_di_load); else - sri->icode = CODE_FOR_reload_di_store; + sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store + : CODE_FOR_reload_di_store); sri->extra_cost = 2; ret = NO_REGS; } @@ -30923,6 +30909,23 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); } +/* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ +void +rs6000_scale_v2df (rtx tgt, rtx src, int scale) +{ + HOST_WIDE_INT hwi_scale (scale); + REAL_VALUE_TYPE r_pow; + rtvec v = rtvec_alloc (2); + rtx elt; + rtx scale_vec = gen_reg_rtx (V2DFmode); + (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale); + elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode); + RTVEC_ELT (v, 0) = elt; + RTVEC_ELT (v, 1) = elt; + rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v)); + emit_insn (gen_mulv2df3 (tgt, src, scale_vec)); +} + /* Return an RTX representing where to find the function value of a function returning MODE. */ static rtx @@ -32551,6 +32554,14 @@ rs6000_split_logical_inner (rtx dest, if (complement_op2_p) op2 = gen_rtx_NOT (mode, op2); + /* For canonical RTL, if only one arm is inverted it is the first. */ + if (!complement_op1_p && complement_op2_p) + { + rtx temp = op1; + op1 = op2; + op2 = temp; + } + bool_rtx = ((code == NOT) ? gen_rtx_NOT (mode, op1) : gen_rtx_fmt_ee (code, mode, op1, op2)); @@ -32755,25 +32766,14 @@ rs6000_split_logical (rtx operands[3], /* Return true if the peephole2 can combine a load involving a combination of an addis instruction and a load with an offset that can be fused together on - a power8. - - The operands are: - operands[0] register set with addis - operands[1] value set via addis - operands[2] target register being loaded - operands[3] D-form memory reference using operands[0]. - - In addition, we are passed a boolean that is true if this is a peephole2, - and we can use see if the addis_reg is dead after the insn and can be - replaced by the target register. */ + a power8. */ bool -fusion_gpr_load_p (rtx *operands, bool peep2_p) +fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */ + rtx addis_value, /* addis value. */ + rtx target, /* target register that is loaded. */ + rtx mem) /* bottom part of the memory addr. */ { - rtx addis_reg = operands[0]; - rtx addis_value = operands[1]; - rtx target = operands[2]; - rtx mem = operands[3]; rtx addr; rtx base_reg; @@ -32787,9 +32787,6 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p) if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) return false; - if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) - return false; - /* Allow sign/zero extension. */ if (GET_CODE (mem) == ZERO_EXTEND || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) @@ -32798,22 +32795,22 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p) if (!MEM_P (mem)) return false; + if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) + return false; + addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) return false; /* Validate that the register used to load the high value is either the - register being loaded, or we can safely replace its use in a peephole2. + register being loaded, or we can safely replace its use. - If this is a peephole2, we assume that there are 2 instructions in the - peephole (addis and load), so we want to check if the target register was - not used in the memory address and the register to hold the addis result - is dead after the peephole. */ + This function is only called from the peephole2 pass and we assume that + there are 2 instructions in the peephole (addis and load), so we want to + check if the target register was not used in the memory address and the + register to hold the addis result is dead after the peephole. */ if (REGNO (addis_reg) != REGNO (target)) { - if (!peep2_p) - return false; - if (reg_mentioned_p (target, mem)) return false; @@ -32854,9 +32851,6 @@ expand_fusion_gpr_load (rtx *operands) enum machine_mode extend_mode = target_mode; enum machine_mode ptr_mode = Pmode; enum rtx_code extend = UNKNOWN; - rtx addis_reg = ((ptr_mode == target_mode) - ? target - : simplify_subreg (ptr_mode, target, target_mode, 0)); if (GET_CODE (orig_mem) == ZERO_EXTEND || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) @@ -32873,13 +32867,14 @@ expand_fusion_gpr_load (rtx *operands) gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); offset = XEXP (orig_addr, 1); - new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset); - new_mem = change_address (orig_mem, target_mode, new_addr); + new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); + new_mem = replace_equiv_address_nv (orig_mem, new_addr); if (extend != UNKNOWN) new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem); - emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value)); + new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), + UNSPEC_FUSION_GPR); emit_insn (gen_rtx_SET (VOIDmode, target, new_mem)); if (extend == SIGN_EXTEND) @@ -32898,55 +32893,40 @@ expand_fusion_gpr_load (rtx *operands) } /* Return a string to fuse an addis instruction with a gpr load to the same - register that we loaded up the addis instruction. The code is complicated, - so we call output_asm_insn directly, and just return "". + register that we loaded up the addis instruction. The address that is used + is the logical address that was formed during peephole2: + (lo_sum (high) (low-part)) - The operands are: - operands[0] register set with addis (must be same reg as target). - operands[1] value set via addis - operands[2] target register being loaded - operands[3] D-form memory reference using operands[0]. */ + The code is complicated, so we call output_asm_insn directly, and just + return "". */ const char * -emit_fusion_gpr_load (rtx *operands) +emit_fusion_gpr_load (rtx target, rtx mem) { - rtx addis_reg = operands[0]; - rtx addis_value = operands[1]; - rtx target = operands[2]; - rtx mem = operands[3]; + rtx addis_value; rtx fuse_ops[10]; rtx addr; rtx load_offset; const char *addis_str = NULL; const char *load_str = NULL; - const char *extend_insn = NULL; const char *mode_name = NULL; char insn_template[80]; enum machine_mode mode; const char *comment_str = ASM_COMMENT_START; - bool sign_p = false; - gcc_assert (REG_P (addis_reg) && REG_P (target)); - gcc_assert (REGNO (addis_reg) == REGNO (target)); - - if (*comment_str == ' ') - comment_str++; - - /* Allow sign/zero extension. */ if (GET_CODE (mem) == ZERO_EXTEND) mem = XEXP (mem, 0); - else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN) - { - sign_p = true; - mem = XEXP (mem, 0); - } + gcc_assert (REG_P (target) && MEM_P (mem)); + + if (*comment_str == ' ') + comment_str++; - gcc_assert (MEM_P (mem)); addr = XEXP (mem, 0); if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) gcc_unreachable (); + addis_value = XEXP (addr, 0); load_offset = XEXP (addr, 1); /* Now emit the load instruction to the same register. */ @@ -32956,29 +32936,22 @@ emit_fusion_gpr_load (rtx *operands) case QImode: mode_name = "char"; load_str = "lbz"; - extend_insn = "extsb %0,%0"; break; case HImode: mode_name = "short"; load_str = "lhz"; - extend_insn = "extsh %0,%0"; break; case SImode: mode_name = "int"; load_str = "lwz"; - extend_insn = "extsw %0,%0"; break; case DImode: - if (TARGET_POWERPC64) - { - mode_name = "long"; - load_str = "ld"; - } - else - gcc_unreachable (); + gcc_assert (TARGET_POWERPC64); + mode_name = "long"; + load_str = "ld"; break; default: @@ -33122,17 +33095,1191 @@ emit_fusion_gpr_load (rtx *operands) else fatal_insn ("Unable to generate load offset for fusion", load_offset); - /* Handle sign extension. The peephole2 pass generates this as a separate - insn, but we handle it just in case it got reattached. */ - if (sign_p) + return ""; +} + +/* Analyze vector computations and remove unnecessary doubleword + swaps (xxswapdi instructions). This pass is performed only + for little-endian VSX code generation. + + For this specific case, loads and stores of 4x32 and 2x64 vectors + are inefficient. These are implemented using the lvx2dx and + stvx2dx instructions, which invert the order of doublewords in + a vector register. Thus the code generation inserts an xxswapdi + after each such load, and prior to each such store. (For spill + code after register assignment, an additional xxswapdi is inserted + following each store in order to return a hard register to its + unpermuted value.) + + The extra xxswapdi instructions reduce performance. This can be + particularly bad for vectorized code. The purpose of this pass + is to reduce the number of xxswapdi instructions required for + correctness. + + The primary insight is that much code that operates on vectors + does not care about the relative order of elements in a register, + so long as the correct memory order is preserved. If we have + a computation where all input values are provided by lvxd2x/xxswapdi + sequences, all outputs are stored using xxswapdi/stvxd2x sequences, + and all intermediate computations are pure SIMD (independent of + element order), then all the xxswapdi's associated with the loads + and stores may be removed. + + This pass uses some of the infrastructure and logical ideas from + the "web" pass in web.c. We create maximal webs of computations + fitting the description above using union-find. Each such web is + then optimized by removing its unnecessary xxswapdi instructions. + + The pass is placed prior to global optimization so that we can + perform the optimization in the safest and simplest way possible; + that is, by replacing each xxswapdi insn with a register copy insn. + Subsequent forward propagation will remove copies where possible. + + There are some operations sensitive to element order for which we + can still allow the operation, provided we modify those operations. + These include CONST_VECTORs, for which we must swap the first and + second halves of the constant vector; and SUBREGs, for which we + must adjust the byte offset to account for the swapped doublewords. + A remaining opportunity would be non-immediate-form splats, for + which we should adjust the selected lane of the input. We should + also make code generation adjustments for sum-across operations, + since this is a common vectorizer reduction. + + Because we run prior to the first split, we can see loads and stores + here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla + vector loads and stores that have not yet been split into a permuting + load/store and a swap. (One way this can happen is with a builtin + call to vec_vsx_{ld,st}.) We can handle these as well, but rather + than deleting a swap, we convert the load/store into a permuting + load/store (which effectively removes the swap). */ + +/* Notes on Permutes + + We do not currently handle computations that contain permutes. There + is a general transformation that can be performed correctly, but it + may introduce more expensive code than it replaces. To handle these + would require a cost model to determine when to perform the optimization. + This commentary records how this could be done if desired. + + The most general permute is something like this (example for V16QI): + + (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI)) + (parallel [(const_int a0) (const_int a1) + ... + (const_int a14) (const_int a15)])) + + where a0,...,a15 are in [0,31] and select elements from op1 and op2 + to produce in the result. + + Regardless of mode, we can convert the PARALLEL to a mask of 16 + byte-element selectors. Let's call this M, with M[i] representing + the ith byte-element selector value. Then if we swap doublewords + throughout the computation, we can get correct behavior by replacing + M with M' as follows: + + { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23] + M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31] + { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23] + { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31] + + This seems promising at first, since we are just replacing one mask + with another. But certain masks are preferable to others. If M + is a mask that matches a vmrghh pattern, for example, M' certainly + will not. Instead of a single vmrghh, we would generate a load of + M' and a vperm. So we would need to know how many xxswapd's we can + remove as a result of this transformation to determine if it's + profitable; and preferably the logic would need to be aware of all + the special preferable masks. + + Another form of permute is an UNSPEC_VPERM, in which the mask is + already in a register. In some cases, this mask may be a constant + that we can discover with ud-chains, in which case the above + transformation is ok. However, the common usage here is for the + mask to be produced by an UNSPEC_LVSL, in which case the mask + cannot be known at compile time. In such a case we would have to + generate several instructions to compute M' as above at run time, + and a cost model is needed again. */ + +/* This is based on the union-find logic in web.c. web_entry_base is + defined in df.h. */ +class swap_web_entry : public web_entry_base +{ + public: + /* Pointer to the insn. */ + rtx insn; + /* Set if insn contains a mention of a vector register. All other + fields are undefined if this field is unset. */ + unsigned int is_relevant : 1; + /* Set if insn is a load. */ + unsigned int is_load : 1; + /* Set if insn is a store. */ + unsigned int is_store : 1; + /* Set if insn is a doubleword swap. This can either be a register swap + or a permuting load or store (test is_load and is_store for this). */ + unsigned int is_swap : 1; + /* Set if the insn has a live-in use of a parameter register. */ + unsigned int is_live_in : 1; + /* Set if the insn has a live-out def of a return register. */ + unsigned int is_live_out : 1; + /* Set if the insn contains a subreg reference of a vector register. */ + unsigned int contains_subreg : 1; + /* Set if the insn contains a 128-bit integer operand. */ + unsigned int is_128_int : 1; + /* Set if this is a call-insn. */ + unsigned int is_call : 1; + /* Set if this insn does not perform a vector operation for which + element order matters, or if we know how to fix it up if it does. + Undefined if is_swap is set. */ + unsigned int is_swappable : 1; + /* A nonzero value indicates what kind of special handling for this + insn is required if doublewords are swapped. Undefined if + is_swappable is not set. */ + unsigned int special_handling : 3; + /* Set if the web represented by this entry cannot be optimized. */ + unsigned int web_not_optimizable : 1; + /* Set if this insn should be deleted. */ + unsigned int will_delete : 1; +}; + +enum special_handling_values { + SH_NONE = 0, + SH_CONST_VECTOR, + SH_SUBREG, + SH_NOSWAP_LD, + SH_NOSWAP_ST, + SH_EXTRACT, + SH_SPLAT +}; + +/* Union INSN with all insns containing definitions that reach USE. + Detect whether USE is live-in to the current function. */ +static void +union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use) +{ + struct df_link *link = DF_REF_CHAIN (use); + + if (!link) + insn_entry[INSN_UID (insn)].is_live_in = 1; + + while (link) { - gcc_assert (extend_insn != NULL); - output_asm_insn (extend_insn, fuse_ops); + if (DF_REF_IS_ARTIFICIAL (link->ref)) + insn_entry[INSN_UID (insn)].is_live_in = 1; + + if (DF_REF_INSN_INFO (link->ref)) + { + rtx def_insn = DF_REF_INSN (link->ref); + (void)unionfind_union (insn_entry + INSN_UID (insn), + insn_entry + INSN_UID (def_insn)); + } + + link = link->next; } +} - return ""; +/* Union INSN with all insns containing uses reached from DEF. + Detect whether DEF is live-out from the current function. */ +static void +union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def) +{ + struct df_link *link = DF_REF_CHAIN (def); + + if (!link) + insn_entry[INSN_UID (insn)].is_live_out = 1; + + while (link) + { + /* This could be an eh use or some other artificial use; + we treat these all the same (killing the optimization). */ + if (DF_REF_IS_ARTIFICIAL (link->ref)) + insn_entry[INSN_UID (insn)].is_live_out = 1; + + if (DF_REF_INSN_INFO (link->ref)) + { + rtx use_insn = DF_REF_INSN (link->ref); + (void)unionfind_union (insn_entry + INSN_UID (insn), + insn_entry + INSN_UID (use_insn)); + } + + link = link->next; + } +} + +/* Return 1 iff INSN is a load insn, including permuting loads that + represent an lvxd2x instruction; else return 0. */ +static unsigned int +insn_is_load_p (rtx insn) +{ + rtx body = PATTERN (insn); + + if (GET_CODE (body) == SET) + { + if (GET_CODE (SET_SRC (body)) == MEM) + return 1; + + if (GET_CODE (SET_SRC (body)) == VEC_SELECT + && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM) + return 1; + + return 0; + } + + if (GET_CODE (body) != PARALLEL) + return 0; + + rtx set = XVECEXP (body, 0, 0); + + if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM) + return 1; + + return 0; +} + +/* Return 1 iff INSN is a store insn, including permuting stores that + represent an stvxd2x instruction; else return 0. */ +static unsigned int +insn_is_store_p (rtx insn) +{ + rtx body = PATTERN (insn); + if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM) + return 1; + if (GET_CODE (body) != PARALLEL) + return 0; + rtx set = XVECEXP (body, 0, 0); + if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM) + return 1; + return 0; } +/* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap, + a permuting load, or a permuting store. */ +static unsigned int +insn_is_swap_p (rtx insn) +{ + rtx body = PATTERN (insn); + if (GET_CODE (body) != SET) + return 0; + rtx rhs = SET_SRC (body); + if (GET_CODE (rhs) != VEC_SELECT) + return 0; + rtx parallel = XEXP (rhs, 1); + if (GET_CODE (parallel) != PARALLEL) + return 0; + unsigned int len = XVECLEN (parallel, 0); + if (len != 2 && len != 4 && len != 8 && len != 16) + return 0; + for (unsigned int i = 0; i < len / 2; ++i) + { + rtx op = XVECEXP (parallel, 0, i); + if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i) + return 0; + } + for (unsigned int i = len / 2; i < len; ++i) + { + rtx op = XVECEXP (parallel, 0, i); + if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2) + return 0; + } + return 1; +} + +/* Return 1 iff OP is an operand that will not be affected by having + vector doublewords swapped in memory. */ +static unsigned int +rtx_is_swappable_p (rtx op, unsigned int *special) +{ + enum rtx_code code = GET_CODE (op); + int i, j; + rtx parallel; + + switch (code) + { + case LABEL_REF: + case SYMBOL_REF: + case CLOBBER: + case REG: + return 1; + + case VEC_CONCAT: + case ASM_INPUT: + case ASM_OPERANDS: + return 0; + + case CONST_VECTOR: + { + *special = SH_CONST_VECTOR; + return 1; + } + < |