diff options
Diffstat (limited to 'gcc-4.9/gcc')
814 files changed, 44529 insertions, 204924 deletions
diff --git a/gcc-4.9/gcc/BASE-VER b/gcc-4.9/gcc/BASE-VER index 86a9588ad..d81d21ceb 100644 --- a/gcc-4.9/gcc/BASE-VER +++ b/gcc-4.9/gcc/BASE-VER @@ -1 +1 @@ -4.9 +4.9.x-google diff --git a/gcc-4.9/gcc/ChangeLog b/gcc-4.9/gcc/ChangeLog index 1ccda1cd6..dd6e9ad47 100644 --- a/gcc-4.9/gcc/ChangeLog +++ b/gcc-4.9/gcc/ChangeLog @@ -1,3 +1,1072 @@ +2014-07-11 Rong Xu <xur@google.com> + + Backport r212462 trunk. + + * gcov-tool.c (gcov_output_files): Fix build error introduced in + commit r212448. + +2014-07-10 Rong Xu <xur@google.com> + + Backport r212448 from trunk. + + Add gcov-tool: an offline gcda profile processing tool + Support. + * gcov-io.c (gcov_position): Make avaialble to gcov-tool. + (gcov_is_error): Ditto. + (gcov_read_string): Ditto. + (gcov_read_sync): Ditto. + * gcov-io.h: Move counter defines to gcov-counter.def. + * gcov-dump.c (tag_counters): Use gcov-counter.def. + * coverage.c: Ditto. + * gcov-tool.c: Offline gcda profile processing tool. + (unlink_gcda_file): Remove one gcda file. + (unlink_profile_dir): Remove gcda files from the profile path. + (gcov_output_files): Output gcda files to an output dir. + (profile_merge): Merge two profiles in directory. + (print_merge_usage_message): Print merge usage. + (merge_usage): Print merge usage and exit. + (do_merge): Driver for profile merge sub-command. + (profile_rewrite): Rewrite profile. + (print_rewrite_usage_message): Print rewrite usage. + (rewrite_usage): Print rewrite usage and exit. + (do_rewrite): Driver for profile rewrite sub-command. + (print_usage): Print gcov-info usage and exit. + (print_version): Print gcov-info version. + (process_args): Process arguments. + (main): Main routine for gcov-tool. + * Makefile.in: Build and install gcov-tool. + * gcov-counter.def: New file split from gcov-io.h. + * doc/gcc.texi: Include gcov-tool.texi. + * doc/gcov-tool.texi: Document for gcov-tool. + +2014-07-10 Cary Coutant <ccoutant@google.com> + + Backport from trunk at r212211. + + * dwarf2out.c (remove_addr_table_entry): Remove unnecessary hash table + lookup. + (resolve_addr_in_expr): When replacing the rtx in a location list + entry, get a new address table entry. + (dwarf2out_finish): Call index_location_lists even if there are no + addr_index_table entries yet. + +2014-07-10 Tom G. Christensen <tgc@jupiterrise.com> + + * doc/install.texi: Remove links to defunct package providers for + Solaris. + +2014-07-10 Eric Botcazou <ebotcazou@adacore.com> + + PR middle-end/53590 + * function.c (allocate_struct_function): Revert r188667 change. + + * gimple-low.c (lower_builtin_setjmp): Use properly-typed constant. + +2014-07-09 Alan Lawrence <alan.lawrence@arm.com> + + Backport r211369 from trunk. + 2014-06-09 Alan Lawrence <alan.lawrence@arm.com> + + PR target/61062 + * config/arm/arm_neon.h (vtrn_s8, vtrn_s16, vtrn_u8, vtrn_u16, vtrn_p8, + vtrn_p16, vtrn_s32, vtrn_f32, vtrn_u32, vtrnq_s8, vtrnq_s16, vtrnq_s32, + vtrnq_f32, vtrnq_u8, vtrnq_u16, vtrnq_u32, vtrnq_p8, vtrnq_p16, vzip_s8, + vzip_s16, vzip_u8, vzip_u16, vzip_p8, vzip_p16, vzip_s32, vzip_f32, + vzip_u32, vzipq_s8, vzipq_s16, vzipq_s32, vzipq_f32, vzipq_u8, + vzipq_u16, vzipq_u32, vzipq_p8, vzipq_p16, vuzp_s8, vuzp_s16, vuzp_s32, + vuzp_f32, vuzp_u8, vuzp_u16, vuzp_u32, vuzp_p8, vuzp_p16, vuzpq_s8, + vuzpq_s16, vuzpq_s32, vuzpq_f32, vuzpq_u8, vuzpq_u16, vuzpq_u32, + vuzpq_p8, vuzpq_p16): Correct mask for bigendian. + + +2014-07-09 Alan Lawrence <alan.lawrence@arm.com> + + Backport r210219 from trunk. + 2014-05-08 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> + + * config/arm/arm_neon.h: Update comment. + * config/arm/neon-docgen.ml: Delete. + * config/arm/neon-gen.ml: Delete. + * doc/arm-neon-intrinsics.texi: Update comment. + +2014-07-09 Zhenqiang Chen <zhenqiang.chen@linaro.org> + + Backport r211775 from trunk. + 2014-06-18 Terry Guo <terry.guo@arm.com> + + PR target/61544 + * config/arm/arm.c (thumb1_reorg): Move to next basic block if we + reach the head. + +2014-07-08 Jakub Jelinek <jakub@redhat.com> + + PR rtl-optimization/61673 + * combine.c (simplify_comparison): Test just mode's sign bit + in tmode rather than the sign bit and any bits above it. + +2014-07-08 James Greenhalgh <james.greenhalgh@arm.com> + + Backport r212298 from trunk. + 2014-07-04 James Greenhalgh <james.greenhalgh@arm.com> + + * config/aarch64/aarch64-simd.md (move_lo_quad_internal_<mode>): New. + (move_lo_quad_internal_be_<mode>): Likewise. + (move_lo_quad_<mode>): Convert to define_expand. + (aarch64_simd_move_hi_quad_<mode>): Gate on BYTES_BIG_ENDIAN. + (aarch64_simd_move_hi_quad_be_<mode>): New. + (move_hi_quad_<mode>): Use appropriate insn for BYTES_BIG_ENDIAN. + (aarch64_combinez<mode>): Gate on BYTES_BIG_ENDIAN. + (aarch64_combinez_be<mode>): New. + (aarch64_combine<mode>): Convert to define_expand. + (aarch64_combine_internal<mode>): New. + (aarch64_simd_combine<mode>): Remove bogus RTL description. + +2014-07-08 Richard Biener <rguenther@suse.de> + + PR tree-optimization/61680 + * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): + Handle properly all read-write dependences with group accesses. + + PR tree-optimization/61681 + * tree-ssa-structalias.c (find_what_var_points_to): Expand + NONLOCAL inside ESCAPED. + +2014-07-08 Alan Lawrence <alan.lawrence@arm.com> + + Backport r211502 from mainline. + 2014-06-10 Alan Lawrence <alan.lawrence@arm.com> + + PR target/59843 + * config/aarch64/aarch64-modes.def: Add V1DFmode. + * config/aarch64/aarch64.c (aarch64_vector_mode_supported_p): + Support V1DFmode. + +2014-07-08 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/61725 + * tree-vrp.c (extract_range_basic): Don't assume vr0 is unsigned + range, use range_includes_zerop_p instead of integer_zerop on + vr0->min, only use log2 of max if min is not negative. + +2014-07-06 Gerald Pfeifer <gerald@pfeifer.com> + + * doc/install.texi (Specific, aarch64*-*-*): Fix markup. Reword a bit. + +2014-07-04 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/61654 + * cgraphunit.c (expand_thunk): Call free_dominance_info. + + PR tree-optimization/61684 + * tree-ssa-ifcombine.c (recognize_single_bit_test): Make sure + rhs1 of conversion is a SSA_NAME before using SSA_NAME_DEF_STMT on it. + +2014-06-30 Jakub Jelinek <jakub@redhat.com> + + Backported from mainline + 2014-06-27 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/57233 + PR tree-optimization/61299 + * tree-vect-generic.c (get_compute_type, count_type_subparts): New + functions. + (expand_vector_operations_1): Use them. If {L,R}ROTATE_EXPR + would be lowered to scalar shifts, check if corresponding + shifts and vector BIT_IOR_EXPR are supported and don't lower + or lower just to narrower vector type in that case. + * expmed.c (expand_shift_1): Fix up handling of vector + shifts and rotates. + + 2014-06-25 Jakub Jelinek <jakub@redhat.com> + + * langhooks-def.h (LANG_HOOKS_OMP_CLAUSE_LINEAR_CTOR): Define. + (LANG_HOOKS_DECLS): Add it. + * gimplify.c (gimplify_omp_for): Make sure OMP_CLAUSE_LINEAR_STEP + has correct type. + * tree.h (OMP_CLAUSE_LINEAR_ARRAY): Define. + * langhooks.h (struct lang_hooks_for_decls): Add + omp_clause_linear_ctor hook. + * omp-low.c (lower_rec_input_clauses): Set max_vf even if + OMP_CLAUSE_LINEAR_ARRAY is set. Don't fold_convert + OMP_CLAUSE_LINEAR_STEP. For OMP_CLAUSE_LINEAR_ARRAY in + combined simd loop use omp_clause_linear_ctor hook. + + 2014-06-24 Jakub Jelinek <jakub@redhat.com> + + * gimplify.c (gimplify_scan_omp_clauses) <case OMP_CLAUSE_MAP, + OMP_CLAUSE_TO, OMP_CLAUSE_FROM): Make sure OMP_CLAUSE_SIZE is + non-NULL. + <case OMP_CLAUSE_ALIGNED>: Gimplify OMP_CLAUSE_ALIGNED_ALIGNMENT. + (gimplify_adjust_omp_clauses_1): Make sure OMP_CLAUSE_SIZE is + non-NULL. + (gimplify_adjust_omp_clauses): Likewise. + * omp-low.c (lower_rec_simd_input_clauses, + lower_rec_input_clauses, expand_omp_simd): Handle non-constant + safelen the same as safelen(1). + * tree-nested.c (convert_nonlocal_omp_clauses, + convert_local_omp_clauses): Handle OMP_CLAUSE_ALIGNED. For + OMP_CLAUSE_{MAP,TO,FROM} if not decl use walk_tree. + (convert_nonlocal_reference_stmt, convert_local_reference_stmt): + Fixup handling of GIMPLE_OMP_TARGET. + (convert_tramp_reference_stmt, convert_gimple_call): Handle + GIMPLE_OMP_TARGET. + + 2014-06-18 Jakub Jelinek <jakub@redhat.com> + + * gimplify.c (omp_notice_variable): If n is non-NULL + and no flags change in ORT_TARGET region, don't jump to + do_outer. + (struct gimplify_adjust_omp_clauses_data): New type. + (gimplify_adjust_omp_clauses_1): Adjust for data being + a struct gimplify_adjust_omp_clauses_data pointer instead + of tree *. Pass pre_p as a new argument to + lang_hooks.decls.omp_finish_clause hook. + (gimplify_adjust_omp_clauses): Add pre_p argument, adjust + splay_tree_foreach to pass both list_p and pre_p. + (gimplify_omp_parallel, gimplify_omp_task, gimplify_omp_for, + gimplify_omp_workshare, gimplify_omp_target_update): Adjust + gimplify_adjust_omp_clauses callers. + * langhooks.c (lhd_omp_finish_clause): New function. + * langhooks-def.h (lhd_omp_finish_clause): New prototype. + (LANG_HOOKS_OMP_FINISH_CLAUSE): Define to lhd_omp_finish_clause. + * langhooks.h (struct lang_hooks_for_decls): Add a new + gimple_seq * argument to omp_finish_clause hook. + * omp-low.c (scan_sharing_clauses): Call scan_omp_op on + non-DECL_P OMP_CLAUSE_DECL if ctx->outer. + (scan_omp_parallel, lower_omp_for): When adding + _LOOPTEMP_ clause var, add it to outer ctx's decl_map + as identity. + * tree-core.h (OMP_CLAUSE_MAP_TO_PSET): New map kind. + * tree-nested.c (convert_nonlocal_omp_clauses, + convert_local_omp_clauses): Handle various OpenMP 4.0 clauses. + * tree-pretty-print.c (dump_omp_clause): Handle + OMP_CLAUSE_MAP_TO_PSET. + + 2014-06-10 Jakub Jelinek <jakub@redhat.com> + + PR fortran/60928 + * omp-low.c (lower_rec_input_clauses) <case OMP_CLAUSE_LASTPRIVATE>: + Set lastprivate_firstprivate even if omp_private_outer_ref + langhook returns true. + <case OMP_CLAUSE_REDUCTION>: When calling omp_clause_default_ctor + langhook, call unshare_expr on new_var and call + build_outer_var_ref to get the last argument. + + 2014-05-11 Jakub Jelinek <jakub@redhat.com> + + * tree.h (OMP_CLAUSE_LINEAR_STMT): Define. + * tree.c (omp_clause_num_ops): Increase OMP_CLAUSE_LINEAR + number of operands to 3. + (walk_tree_1): Walk all operands of OMP_CLAUSE_LINEAR. + * tree-nested.c (convert_nonlocal_omp_clauses, + convert_local_omp_clauses): Handle OMP_CLAUSE_DEPEND. + * gimplify.c (gimplify_scan_omp_clauses): Handle + OMP_CLAUSE_LINEAR_STMT. + * omp-low.c (lower_rec_input_clauses): Fix typo. + (maybe_add_implicit_barrier_cancel, lower_omp_1): Add + cast between Fortran boolean_type_node and C _Bool if + needed. + +2014-06-30 Jason Merrill <jason@redhat.com> + + PR c++/51253 + PR c++/61382 + * gimplify.c (gimplify_arg): Non-static. + * gimplify.h: Declare it. + +2014-06-30 Marcus Shawcroft <marcus.shawcroft@arm.com> + + Backport from Mainline + 2014-06-30 Marcus Shawcroft <marcus.shawcroft@arm.com> + + PR target/61633 + * config/aarch64/aarch64.md (*aarch64_ashr_sisd_or_int_<mode>3): + Add alternative; make early clobber. Adjust both split patterns + to use operand 0 as the working register. + +2014-06-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * config/aarch64/iterators.md (VCOND): Handle SI and HI modes. + Update comments. + (VCONQ): Make comment more helpful. + (VCON): Delete. + * config/aarch64/aarch64-simd.md + (aarch64_sqdmulh_lane<mode>): + Use VCOND for operands 2. Update lane checking and flipping logic. + (aarch64_sqrdmulh_lane<mode>): Likewise. + (aarch64_sq<r>dmulh_lane<mode>_internal): Likewise. + (aarch64_sqdmull2<mode>): Remove VCON, use VQ_HSI mode iterator. + (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, VD_HSI): Change mode + attribute of operand 3 to VCOND. + (aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal, SD_HSI): Likewise. + (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Likewise. + (aarch64_sqdmull_lane<mode>_internal, VD_HSI): Likewise. + (aarch64_sqdmull_lane<mode>_internal, SD_HSI): Likewise. + (aarch64_sqdmull2_lane<mode>_internal): Likewise. + (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, VD_HSI: New + define_insn. + (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal, SD_HSI): Likewise. + (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Likewise. + (aarch64_sqdmull_laneq<mode>_internal, VD_HSI): Likewise. + (aarch64_sqdmull_laneq<mode>_internal, SD_HSI): Likewise. + (aarch64_sqdmull2_laneq<mode>_internal): Likewise. + (aarch64_sqdmlal_lane<mode>): Change mode attribute of penultimate + operand to VCOND. Update lane flipping and bounds checking logic. + (aarch64_sqdmlal2_lane<mode>): Likewise. + (aarch64_sqdmlsl_lane<mode>): Likewise. + (aarch64_sqdmull_lane<mode>): Likewise. + (aarch64_sqdmull2_lane<mode>): Likewise. + (aarch64_sqdmlal_laneq<mode>): + Replace VCON usage with VCONQ. + Emit aarch64_sqdmlal_laneq<mode>_internal insn. + (aarch64_sqdmlal2_laneq<mode>): Emit + aarch64_sqdmlal2_laneq<mode>_internal insn. + Replace VCON with VCONQ. + (aarch64_sqdmlsl2_lane<mode>): Replace VCON with VCONQ. + (aarch64_sqdmlsl2_laneq<mode>): Likewise. + (aarch64_sqdmull_laneq<mode>): Emit + aarch64_sqdmull_laneq<mode>_internal insn. + Replace VCON with VCONQ. + (aarch64_sqdmull2_laneq<mode>): Emit + aarch64_sqdmull2_laneq<mode>_internal insn. + (aarch64_sqdmlsl_laneq<mode>): Replace VCON usage with VCONQ. + * config/aarch64/arm_neon.h (vqdmlal_high_lane_s16): Change type + of 3rd argument to int16x4_t. + (vqdmlalh_lane_s16): Likewise. + (vqdmlslh_lane_s16): Likewise. + (vqdmull_high_lane_s16): Likewise. + (vqdmullh_lane_s16): Change type of 2nd argument to int16x4_t. + (vqdmlal_lane_s16): Don't create temporary int16x8_t value. + (vqdmlsl_lane_s16): Likewise. + (vqdmull_lane_s16): Don't create temporary int16x8_t value. + (vqdmlal_high_lane_s32): Change type 3rd argument to int32x2_t. + (vqdmlals_lane_s32): Likewise. + (vqdmlsls_lane_s32): Likewise. + (vqdmull_high_lane_s32): Change type 2nd argument to int32x2_t. + (vqdmulls_lane_s32): Likewise. + (vqdmlal_lane_s32): Don't create temporary int32x4_t value. + (vqdmlsl_lane_s32): Likewise. + (vqdmull_lane_s32): Don't create temporary int32x4_t value. + (vqdmulhh_lane_s16): Change type of second argument to int16x4_t. + (vqrdmulhh_lane_s16): Likewise. + (vqdmlsl_high_lane_s16): Likewise. + (vqdmulhs_lane_s32): Change type of second argument to int32x2_t. + (vqdmlsl_high_lane_s32): Likewise. + (vqrdmulhs_lane_s32): Likewise. + +2014-06-30 Thomas Preud'homme <thomas.preudhomme@arm.com> + + Backport from Mainline + 2014-06-20 Jakub Jelinek <jakub@redhat.com> + 2014-06-11 Thomas Preud'homme <thomas.preudhomme@arm.com> + + PR tree-optimization/61306 + * tree-ssa-math-opts.c (struct symbolic_number): Store type of + expression instead of its size. + (do_shift_rotate): Adapt to change in struct symbolic_number. Return + false to prevent optimization when the result is unpredictable due to + arithmetic right shift of signed type with highest byte is set. + (verify_symbolic_number_p): Adapt to change in struct symbolic_number. + (find_bswap_1): Likewise. Return NULL to prevent optimization when the + result is unpredictable due to sign extension. + (find_bswap): Adapt to change in struct symbolic_number. + +2014-06-27 Martin Jambor <mjambor@suse.cz> + + PR ipa/61160 + * cgraphclones.c (duplicate_thunk_for_node): Removed parameter + args_to_skip, use those from node instead. Copy args_to_skip and + combined_args_to_skip from node to the new thunk. + (redirect_edge_duplicating_thunks): Removed parameter args_to_skip. + (cgraph_create_virtual_clone): Moved computation of + combined_args_to_skip... + (cgraph_clone_node): ...here, simplify it to bitmap_ior.. + +2014-06-27 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2014-06-26 Uros Bizjak <ubizjak@gmail.com> + + PR target/61586 + * config/alpha/alpha.c (alpha_handle_trap_shadows): Handle BARRIER RTX. + +2014-06-26 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + PR target/61542 + * config/rs6000/vsx.md (vsx_extract_v4sf): Fix bug with element + extraction other than index 3. + +2014-06-26 Marc Glisse <marc.glisse@inria.fr> + + PR target/61503 + * config/i386/i386.md (x86_64_shrd, x86_shrd, + ix86_rotr<dwi>3_doubleword): Replace ashiftrt with lshiftrt. + +2014-06-26 Martin Jambor <mjambor@suse.cz> + + Backport from mainline + * ipa-prop.c (ipa_impossible_devirt_target): No longer static, + renamed to ipa_impossible_devirt_target. Fix typo. + * ipa-prop.h (ipa_impossible_devirt_target): Declare. + * ipa-cp.c (ipa_get_indirect_edge_target_1): Use + ipa_impossible_devirt_target. + +2014-06-25 Cong Hou <congh@google.com> + + * tree-vectorizer.h: Fixing incorrect number of patterns. + +2014-06-24 Cong Hou <congh@google.com> + + * tree-vect-patterns.c (vect_recog_sad_pattern): New function for SAD + pattern recognition. + (type_conversion_p): PROMOTION is true if it's a type promotion + conversion, and false otherwise. Return true if the given expression + is a type conversion one. + * tree-vectorizer.h: Adjust the number of patterns. + * tree.def: Add SAD_EXPR. + * optabs.def: Add sad_optab. + * cfgexpand.c (expand_debug_expr): Add SAD_EXPR case. + * expr.c (expand_expr_real_2): Likewise. + * gimple-pretty-print.c (dump_ternary_rhs): Likewise. + * gimple.c (get_gimple_rhs_num_ops): Likewise. + * optabs.c (optab_for_tree_code): Likewise. + * tree-cfg.c (estimate_operator_cost): Likewise. + * tree-ssa-operands.c (get_expr_operands): Likewise. + * tree-vect-loop.c (get_initial_def_for_reduction): Likewise. + * config/i386/sse.md: Add SSE2 and AVX2 expand for SAD. + * doc/generic.texi: Add document for SAD_EXPR. + * doc/md.texi: Add document for ssad and usad. + +2014-06-24 Jakub Jelinek <jakub@redhat.com> + + PR target/61570 + * config/i386/driver-i386.c (host_detect_local_cpu): For unknown + model family 6 CPU with has_longmode never use a CPU without + 64-bit support. + + * gimplify.c (gimplify_omp_for): For #pragma omp for simd iterator + not mentioned in clauses use private clause if the iterator is + declared in #pragma omp for simd, and when adding lastprivate + instead, add it to the outer #pragma omp for too. Diagnose + if the variable is private in outer context. For simd collapse > 1 + loops, replace all iterators with temporaries. + * omp-low.c (lower_rec_input_clauses): Handle LINEAR clause the + same even in collapse > 1 loops. + +2014-06-23 Alan Modra <amodra@gmail.com> + + PR bootstrap/61583 + * tree-vrp.c (remove_range_assertions): Do not set is_unreachable + to zero on debug statements. + +2014-06-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane<mode>): + New expander. + (aarch64_sqrdmulh_lane<mode>): Likewise. + (aarch64_sq<r>dmulh_lane<mode>): Rename to... + (aarch64_sq<r>dmulh_lane<mode>_interna): ...this. + (aarch64_sqdmulh_laneq<mode>): New expander. + (aarch64_sqrdmulh_laneq<mode>): Likewise. + (aarch64_sq<r>dmulh_laneq<mode>): Rename to... + (aarch64_sq<r>dmulh_laneq<mode>_internal): ...this. + (aarch64_sqdmulh_lane<mode>): New expander. + (aarch64_sqrdmulh_lane<mode>): Likewise. + (aarch64_sq<r>dmulh_lane<mode>): Rename to... + (aarch64_sq<r>dmulh_lane<mode>_internal): ...this. + (aarch64_sqdmlal_lane<mode>): Add lane flip for big-endian. + (aarch64_sqdmlal_laneq<mode>): Likewise. + (aarch64_sqdmlsl_lane<mode>): Likewise. + (aarch64_sqdmlsl_laneq<mode>): Likewise. + (aarch64_sqdmlal2_lane<mode>): Likewise. + (aarch64_sqdmlal2_laneq<mode>): Likewise. + (aarch64_sqdmlsl2_lane<mode>): Likewise. + (aarch64_sqdmlsl2_laneq<mode>): Likewise. + (aarch64_sqdmull_lane<mode>): Likewise. + (aarch64_sqdmull_laneq<mode>): Likewise. + (aarch64_sqdmull2_lane<mode>): Likewise. + (aarch64_sqdmull2_laneq<mode>): Likewise. + +2014-06-20 Martin Jambor <mjambor@suse.cz> + + PR ipa/61540 + * ipa-prop.c (impossible_devirt_target): New function. + (try_make_edge_direct_virtual_call): Use it, also instead of + asserting. + +2014-06-20 Martin Jambor <mjambor@suse.cz> + + PR ipa/61211 + * cgraph.c (clone_of_p): Allow skipped_branch to deal with + expanded clones. + +2014-06-20 Chung-Lin Tang <cltang@codesourcery.com> + + Backport from mainline + + 2014-06-20 Julian Brown <julian@codesourcery.com> + Chung-Lin Tang <cltang@codesourcery.com> + + * config/arm/arm.c (arm_output_mi_thunk): Fix offset for + TARGET_THUMB1_ONLY. Add comments. + +2014-06-18 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2014-06-16 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.c (decide_alg): Correctly handle + maximum size of stringop algorithm. + +2014-06-18 Richard Henderson <rth@redhat.com> + + PR target/61545 + * config/aarch64/aarch64.md (tlsdesc_small): Clobber CC_REGNUM. + +2014-06-17 Yufeng Zhang <yufeng.zhang@arm.com> + + PR target/61483 + * config/aarch64/aarch64.c (aarch64_layout_arg): Add new local + variable 'size'; calculate 'size' right in the front; use + 'size' to compute 'nregs' (when 'allocate_ncrn != 0') and + pcum->aapcs_stack_words. + +2014-06-17 Nick Clifton <nickc@redhat.com> + + * config/msp430/msp430.md (mulhisi3): Add a NOP after the DINT. + (umulhi3, mulsidi3, umulsidi3): Likewise. + +2014-06-17 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2014-06-06 Uros Bizjak <ubizjak@gmail.com> + + PR target/61423 + * config/i386/i386.md (*floatunssi<mode>2_i387_with_xmm): New + define_insn_and_split pattern, merged from *floatunssi<mode>2_1 + and corresponding splitters. Zero extend general register + or memory input operand to XMM temporary. Enable for + TARGET_SSE2 and TARGET_INTER_UNIT_MOVES_TO_VEC only. + (floatunssi<mode>2): Update expander predicate. + +2014-06-16 Vladimir Makarov <vmakarov@redhat.com> + + PR rtl-optimization/61325 + * lra-constraints.c (valid_address_p): Add forward declaration. + (simplify_operand_subreg): Check address validity before and after + alter_reg of memory subreg. + +2014-06-18 Jakub Jelinek <jakub@redhat.com> + + PR plugins/45078 + * config.gcc (arm*-*-linux-*): Include vxworks-dummy.h in tm_file. + +2014-06-13 Peter Bergner <bergner@vnet.ibm.com> + + Backport from mainline + + 2014-06-13 Peter Bergner <bergner@vnet.ibm.com> + PR target/61415 + * config/rs6000/rs6000-builtin.def (BU_MISC_1): Delete. + (BU_MISC_2): Rename to ... + (BU_LDBL128_2): ... this. + * config/rs6000/rs6000.h (RS6000_BTM_LDBL128): New define. + (RS6000_BTM_COMMON): Add RS6000_BTM_LDBL128. + * config/rs6000/rs6000.c (rs6000_builtin_mask_calculate): Handle + RS6000_BTM_LDBL128. + (rs6000_invalid_builtin): Add long double 128-bit builtin support. + (rs6000_builtin_mask_names): Add RS6000_BTM_LDBL128. + * config/rs6000/rs6000.md (unpacktf_0): Remove define)expand. + (unpacktf_1): Likewise. + * doc/extend.texi (__builtin_longdouble_dw0): Remove documentation. + (__builtin_longdouble_dw1): Likewise. + * doc/sourcebuild.texi (longdouble128): Document. + +2014-06-13 Jeff Law <law@redhat.com> + + Backports from mainline: + 2014-06-13 Jeff Law <law@redhat.com> + + PR rtl-optimization/61094 + PR rtl-optimization/61446 + * ree.c (combine_reaching_defs): Get the mode for the copy from + the extension insn rather than the defining insn. + + 2014-06-02 Jeff Law <law@redhat.com> + + PR rtl-optimization/61094 + * ree.c (combine_reaching_defs): Do not reextend an insn if it + was marked as do_no_reextend. If a copy is needed to eliminate + an extension, then mark it as do_not_reextend. + +2014-06-13 Martin Jambor <mjambor@suse.cz> + + PR ipa/61186 + * ipa-devirt.c (possible_polymorphic_call_targets): Store NULL to + cache_token if returning early. + +2014-06-12 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/61486 + * gimplify.c (struct gimplify_omp_ctx): Add distribute field. + (gimplify_adjust_omp_clauses): Don't or in GOVD_LASTPRIVATE + if outer combined construct is distribute. + (gimplify_omp_for): For OMP_DISTRIBUTE set + gimplify_omp_ctxp->distribute. + * omp-low.c (scan_sharing_clauses) <case OMP_CLAUSE_SHARED>: For + GIMPLE_OMP_TEAMS, if decl isn't global in outer context, record + mapping into decl map. + +2014-06-12 Jeff Law <law@redhat.com> + + Backports from mainline: + + 2014-06-12 Jeff Law <law@redhat.com> + + PR tree-optimization/61009 + * tree-ssa-threadedge.c (thread_through_normal_block): Correct return + value when we stop processing a block due to problematic PHIs. + + 2014-06-05 Jeff Law <law@redhat.com> + + PR tree-optimization/61289 + * tree-ssa-threadedge.c (invalidate_equivalences): Remove SRC_MAP and + DST_MAP parameters. Invalidate by walking all the SSA_NAME_VALUES + looking for those which match LHS. All callers changed. + (record_temporary_equivalences_from_phis): Remove SRC_MAP and DST_MAP + parameters and code which manipulated them. All callers changed. + (record_temporary_equivalences_from_stmts_at_dest): Remove SRC_MAP + and DST_MAP parameters. Simplify invalidation code by just calling + invalidate_equivalences. All callers changed. + (thread_across_edge): Simplify now that we don't need to maintain + the map of equivalences to invalidate. + +2014-06-12 Eric Botcazou <ebotcazou@adacore.com> + + * tree-core.h (DECL_NONALIASED): Use proper spelling in comment. + +2014-06-12 Georg-Johann Lay <avr@gjlay.de> + + Backport from 2014-05-14 trunk r210418 + * config/avr/avr.h (REG_CLASS_CONTENTS): Use unsigned suffix for + shifted values to avoid build warning. + +2014-06-12 Georg-Johann Lay <avr@gjlay.de> + + Backport from 2014-05-09 trunk r210272 + + * config/avr/avr-fixed.md (round<mode>3): Use -1U instead of -1 in + unsigned int initializers for regno_in, regno_out. + + Backport from 2014-06-12 trunk r211491 + + PR target/61443 + * config/avr/avr.md (push<mode>1): Avoid (subreg(mem)) when + loading from address spaces. + +2014-06-12 Alan Modra <amodra@gmail.com> + + PR target/61300 + * doc/tm.texi.in (INCOMING_REG_PARM_STACK_SPACE): Document. + * doc/tm.texi: Regenerate. + * function.c (INCOMING_REG_PARM_STACK_SPACE): Provide default. + Use throughout in place of REG_PARM_STACK_SPACE. + * config/rs6000/rs6000.c (rs6000_reg_parm_stack_space): Add + "incoming" param. Pass to rs6000_function_parms_need_stack. + (rs6000_function_parms_need_stack): Add "incoming" param, ignore + prototype_p when incoming. Use function decl when incoming + to handle K&R style functions. + * config/rs6000/rs6000.h (REG_PARM_STACK_SPACE): Adjust. + (INCOMING_REG_PARM_STACK_SPACE): Define. + +2014-06-11 Richard Biener <rguenther@suse.de> + + PR tree-optimization/61452 + * tree-ssa-sccvn.c (visit_phi): Remove pointless setting of + expr and has_constants in case we found a leader. + (simplify_binary_expression): Always valueize operands first. + (simplify_unary_expression): Likewise. + +2014-06-11 Richard Biener <rguenther@suse.de> + + PR middle-end/61456 + * tree-ssa-alias.c (nonoverlapping_component_refs_of_decl_p): + Do not use the main variant for the type comparison. + +2014-06-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * doc/arm-acle-intrinsics.texi: Specify when CRC32 intrinsics are + available. + Simplify description of __crc32d and __crc32cd intrinsics. + * doc/extend.texi (ARM ACLE Intrinsics): Remove comment about CRC32 + availability. + +2014-06-07 Eric Botcazou <ebotcazou@adacore.com> + + * tree-ssa-tail-merge.c (same_succ_hash): Hash the static chain of a + call statement, if any. + (gimple_equal_p) <GIMPLE_CALL>: Compare the static chain of the call + statements, if any. Tidy up. + +2014-06-06 Michael Meissner <meissner@linux.vnet.ibm.com> + + Back port from trunk + 2014-06-06 Michael Meissner <meissner@linux.vnet.ibm.com> + + PR target/61431 + * config/rs6000/vsx.md (VSX_LE): Split VSX_D into 2 separate + iterators, VSX_D that handles 64-bit types, and VSX_LE that + handles swapping the two 64-bit double words on little endian + systems. Include V1TImode and optionally TImode in VSX_LE so that + these types are properly swapped. Change all of the insns and + splits that do the 64-bit swaps to use VSX_LE. + (vsx_le_perm_load_<mode>): Likewise. + (vsx_le_perm_store_<mode>): Likewise. + (splitters for little endian memory operations): Likewise. + (vsx_xxpermdi2_le_<mode>): Likewise. + (vsx_lxvd2x2_le_<mode>): Likewise. + (vsx_stxvd2x2_le_<mode>): Likewise. + +2014-06-06 Vladimir Makarov <vmakarov@redhat.com> + + PR rtl-optimization/61325 + * lra-constraints.c (process_address_1): Check scale equal to one + to prevent transformation: base + scale * index => base + new_reg. + +2014-06-05 Martin Jambor <mjambor@suse.cz> + + PR ipa/61393 + * ipa-cp.c (determine_versionability): Pretend that tm_clones are + not versionable. + +2014-06-04 Richard Biener <rguenther@suse.de> + + PR tree-optimization/61383 + * tree-ssa-ifcombine.c (bb_no_side_effects_p): Make sure + stmts can't trap. + +2014-06-02 Jason Merrill <jason@redhat.com> + + PR c++/61020 + * varpool.c (ctor_for_folding): Handle uninitialized vtables. + +2014-06-03 Martin Jambor <mjambor@suse.cz> + + PR ipa/61160 + * ipa-cp.c (cgraph_edge_brings_value_p): Handle edges leading to + thunks. + +2014-06-03 Andrey Belevantsev <abel@ispras.ru> + + Backport from mainline + 2014-05-14 Andrey Belevantsev <abel@ispras.ru> + + PR rtl-optimization/60866 + * sel-sched-ir (sel_init_new_insn): New parameter old_seqno. + Default it to -1. Pass it down to init_simplejump_data. + (init_simplejump_data): New parameter old_seqno. Pass it down + to get_seqno_for_a_jump. + (get_seqno_for_a_jump): New parameter old_seqno. Use it for + initializing new jump seqno as a last resort. Add comment. + (sel_redirect_edge_and_branch): Save old seqno of the conditional + jump and pass it down to sel_init_new_insn. + (sel_redirect_edge_and_branch_force): Likewise. + +2014-06-03 Andrey Belevantsev <abel@ispras.ru> + + Backport from mainline + 2014-05-14 Andrey Belevantsev <abel@ispras.ru> + + PR rtl-optimization/60901 + * config/i386/i386.c (ix86_dependencies_evaluation_hook): Check that + bb predecessor belongs to the same scheduling region. Adjust comment. + +2014-06-03 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2014-06-02 Uros Bizjak <ubizjak@gmail.com> + + PR target/61239 + * config/i386/i386.c (ix86_expand_vec_perm) [case V32QImode]: Use + GEN_INT (-128) instead of GEN_INT (128) to set MSB of QImode constant. + +2014-05-29 Vladimir Makarov <vmakarov@redhat.com> + + PR rtl-optimization/61325 + * lra-constraints.c (process_address): Rename to + process_address_1. + (process_address): New function. + +2014-05-29 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2014-05-26 Uros Bizjak <ubizjak@gmail.com> + + PR target/61271 + * config/i386/i386.c (ix86_rtx_costs) + <case CONST_INT, case CONST, case LABEL_REF, case SYMBOL_REF>: + Fix condition. + +2014-05-28 Eric Botcazou <ebotcazou@adacore.com> + + Backport from mainline + 2014-05-27 Eric Botcazou <ebotcazou@adacore.com> + + * double-int.c (div_and_round_double) <ROUND_DIV_EXPR>: Use the proper + predicate to detect a negative quotient. + +2014-05-28 Richard Biener <rguenther@suse.de> + + Backport from mainline + 2014-05-28 Richard Biener <rguenther@suse.de> + + PR middle-end/61045 + * fold-const.c (fold_comparison): When folding + X +- C1 CMP Y +- C2 to X CMP Y +- C2 +- C1 also ensure + the sign of the remaining constant operand stays the same. + + 2014-05-05 Richard Biener <rguenther@suse.de> + + PR middle-end/61010 + * fold-const.c (fold_binary_loc): Consistently avoid + canonicalizing X & CST away from a CST that is the mask + of a mode. + + 2014-04-28 Richard Biener <rguenther@suse.de> + + PR tree-optimization/60979 + * graphite-scop-detection.c (scopdet_basic_block_info): Reject + SCOPs that end in a block with a successor with abnormal + predecessors. + +2014-05-28 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> + + * configure.ac ($gcc_cv_ld_clearcap): New test. + * configure: Regenerate. + * config.in: Regenerate. + * config/sol2.opt (mclear-hwcap): New option. + * config/sol2.h (LINK_CLEARCAP_SPEC): Define. + * config/sol2-clearcap.map: Moved here from + testsuite/gcc.target/i386/clearcap.map. + * config/sol2-clearcapv2.map: Move here from + gcc.target/i386/clearcapv2.map. + * config/t-sol2 (install): Depend on install-clearcap-map. + (install-clearcap-map): New target. + * doc/invoke.texi (Option Summary, Solaris 2 Options): Document + -mclear-hwcap. + +2014-05-28 Georg-Johann Lay <avr@gjlay.de> + + PR libgcc/61152 + * config/dbx.h (License): Add Runtime Library Exception. + * config/newlib-stdint.h (License): Same. + * config/rtems.h (License): Same + * config/initfini-array.h (License): Same + * config/v850/v850.h (License): Same. + * config/v850/v850-opts.h (License): Same + * config/v850/rtems.h (License): Same. + +2014-05-28 Georg-Johann Lay <avr@gjlay.de> + + PR target/61044 + * doc/extend.texi (Local Labels): Note that label differences are + not supported for AVR. + +2014-05-27 Georg-Johann Lay <avr@gjlay.de> + + Backport from 2014-05-12 mainline r210322. + Backport from 2014-05-27 mainline r210959, r210969. + + PR libgcc/61152 + * config/arm/arm.h (License): Add GCC Runtime Library Exception. + * config/arm/arm-cores.def (License): Same. + * config/arm/arm-opts.h (License): Same. + * config/arm/aout.h (License): Same. + * config/arm/bpabi.h (License): Same. + * config/arm/elf.h (License): Same. + * config/arm/linux-elf.h (License): Same. + * config/arm/linux-gas.h (License): Same. + * config/arm/netbsd-elf.h (License): Same. + * config/arm/uclinux-eabi.h (License): Same. + * config/arm/uclinux-elf.h (License): Same. + * config/arm/vxworks.h (License): Same. + +2014-05-26 Michael Tautschnig <mt@debian.org> + + PR target/61249 + * doc/extend.texi (X86 Built-in Functions): Fix parameter lists of + __builtin_ia32_vfrczs[sd] and __builtin_ia32_mpsadbw256. + +2014-05-22 Vladimir Makarov <vmakarov@redhat.com> + + PR rtl-optimization/61215 + * lra-elelimination.c (lra_eliminate_regs_1): Don't use + simplify_gen_subreg until final substitution. + +2014-05-23 Alan Modra <amodra@gmail.com> + + PR target/61231 + * config/rs6000/rs6000.c (mem_operand_gpr): Handle SImode. + * config/rs6000/rs6000.md (extendsidi2_lfiwax, extendsidi2_nocell): + Use "Y" constraint rather than "m". + +2014-05-22 Vladimir Makarov <vmakarov@redhat.com> + + PR rtl-optimization/60969 + * ira-costs.c (record_reg_classes): Process NO_REGS for matching + constraints. Set up mem cost for NO_REGS case. + +2014-05-22 Peter Bergner <bergner@vnet.ibm.com> + + * config/rs6000/htm.md (ttest): Use correct shift value to get CR0. + +2014-05-22 Richard Earnshaw <rearnsha@arm.com> + + PR target/61208 + * arm.md (arm_cmpdi_unsigned): Fix length calculation for Thumb2. + +2014-05-22 Nick Clifton <nickc@redhat.com> + + * config/msp430/msp430.h (ASM_SPEC): Add spaces after inserted options. + +2014-05-22 Jakub Jelinek <jakub@redhat.com> + + * tree-streamer-in.c (unpack_ts_real_cst_value_fields): Make sure + all padding bits in REAL_VALUE_TYPE are cleared. + +2014-05-21 Guozhi Wei <carrot@google.com> + + PR target/61202 + * config/aarch64/arm_neon.h (vqdmulh_n_s16): Change the last operand's + constraint. + (vqdmulhq_n_s16): Likewise. + +2014-05-21 Martin Jambor <mjambor@suse.cz> + + * doc/invoke.texi (Optimize Options): Document parameters + ipa-cp-eval-threshold, ipa-max-agg-items, ipa-cp-loop-hint-bonus and + ipa-cp-array-index-hint-bonus. + +2014-05-21 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/61252 + * omp-low.c (handle_simd_reference): New function. + (lower_rec_input_clauses): Use it. Defer adding reference + initialization even for reduction without placeholder if in simd, + handle it properly later on. + +2014-05-20 Jan Hubicka <hubicka@ucw.cz> + + PR bootstrap/60984 + * ipa-inline-transform.c (inline_call): Use add CALLEE_REMOVED + parameter. + * ipa-inline.c (inline_to_all_callers): If callee was removed; return. + (ipa_inline): Loop inline_to_all_callers until no more aliases + are removed. + +2014-05-20 Jan Hubicka <hubicka@ucw.cz> + + PR lto/60820 + * varpool.c (varpool_remove_node): Do not alter decls when streaming. + +2014-05-20 DJ Delorie <dj@redhat.com> + + * config/msp430/msp430.md (split): Don't allow subregs when + splitting SImode adds. + (andneghi): Fix subtraction logic. + * config/msp430/predicates.md (msp430_nonsubreg_or_imm_operand): New. + +2014-05-20 Nick Clifton <nickc@redhat.com> + + * config/msp430/msp430.c (TARGET_GIMPLIFY_VA_ARG_EXPR): Define. + (msp430_gimplify_va_arg_expr): New function. + (msp430_print_operand): Handle (CONST (ZERO_EXTRACT)). + + * config/msp430/msp430.md (zero_extendpsisi2): Use + constraint on + operand 0 in order to prevent confusion about the number of + registers involved. + +2014-05-20 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + + * config/arm/arm.md (arith_shiftsi): Do not predicate for + arm_restrict_it. + +2014-05-19 Richard Earnshaw <rearnsha@arm.com> + + * arm.c (thumb1_reorg): When scanning backwards skip anything + that's not a proper insn. + +2014-05-17 Jan Hubicka <hubicka@ucw.cz> + + * ipa.c (symtab_remove_unreachable_nodes): Remove + symbol from comdat group if its body was eliminated. + (comdat_can_be_unshared_p_1): Static symbols can always be privatized. + * symtab.c (symtab_remove_from_same_comdat_group): Break out from ... + (symtab_unregister_node): ... this one. + (verify_symtab_base): More strict checking of comdats. + * cgraph.h (symtab_remove_from_same_comdat_group): Declare. + +2014-05-17 Jan Hubicka <hubicka@ucw.cz> + + * opts.c (common_handle_option): Disable -fipa-reference coorectly + with -fuse-profile. + +2014-05-17 Jan Hubicka <hubicka@ucw.cz> + + PR ipa/60854 + * ipa.c (symtab_remove_unreachable_nodes): Mark targets of + external aliases alive, too. + +2014-05-17 Uros Bizjak <ubizjak@gmail.com> + + * doc/invoke.texi (free): Mention Alpha. Also enabled at -Os. + +2014-05-17 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2014-04-25 H.J. Lu <hongjiu.lu@intel.com> + + PR target/60969 + * config/i386/i386.md (*movsf_internal): Set MODE to SI for + alternative 12. + +2014-05-16 Vladimir Makarov <vmakarov@redhat.com> + + PR rtl-optimization/60969 + * ira-costs.c (record_reg_classes): Allow only memory for pseudo. + Calculate costs for this case. + +2014-05-15 Peter Bergner <bergner@vnet.ibm.com> + + PR target/61193 + * config/rs6000/htmxlintrin.h (_HTM_TBEGIN_STARTED): New define. + (__TM_simple_begin): Use it. + (__TM_begin): Likewise. + +2014-05-15 Martin Jambor <mjambor@suse.cz> + + PR ipa/61085 + * ipa-prop.c (update_indirect_edges_after_inlining): Check + type_preserved flag when the indirect edge is polymorphic. + +2014-05-15 Martin Jambor <mjambor@suse.cz> + + PR ipa/60897 + * ipa-prop.c (ipa_modify_formal_parameters): Reset DECL_LANG_SPECIFIC. + +2014-05-15 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/61158 + * fold-const.c (fold_binary_loc): If X is zero-extended and + shiftc >= prec, make sure zerobits is all ones instead of + invoking undefined behavior. + +2014-05-14 Cary Coutant <ccoutant@google.com> + + PR debug/61013 + * opts.c (common_handle_option): Don't special-case "-g". + (set_debug_level): Default to at least level 2 with "-g". + 2014-05-14 Eric Botcazou <ebotcazou@adacore.com> * config/sparc/sparc-protos.h (sparc_absnegfloat_split_legitimate): @@ -1652,10 +2721,10 @@ PR tree-optimization/60577 * tree-core.h (struct tree_base): Document nothrow_flag use - in VAR_DECL_NONALIASED. - * tree.h (VAR_DECL_NONALIASED): New. + in DECL_NONALIASED. + * tree.h (DECL_NONALIASED): New. (may_be_aliased): Adjust. - * coverage.c (build_var): Set VAR_DECL_NONALIASED. + * coverage.c (build_var): Set DECL_NONALIASED. 2014-03-20 Eric Botcazou <ebotcazou@adacore.com> diff --git a/gcc-4.9/gcc/DATESTAMP b/gcc-4.9/gcc/DATESTAMP index 7dfdaab6e..e48f0e7c4 100644 --- a/gcc-4.9/gcc/DATESTAMP +++ b/gcc-4.9/gcc/DATESTAMP @@ -1 +1 @@ -20140514 +20140711 diff --git a/gcc-4.9/gcc/Makefile.in b/gcc-4.9/gcc/Makefile.in index 735011125..f94b35a1e 100644 --- a/gcc-4.9/gcc/Makefile.in +++ b/gcc-4.9/gcc/Makefile.in @@ -123,7 +123,8 @@ SUBDIRS =@subdirs@ build # Selection of languages to be made. CONFIG_LANGUAGES = @all_selected_languages@ -LANGUAGES = c gcov$(exeext) gcov-dump$(exeext) $(CONFIG_LANGUAGES) +LANGUAGES = c gcov$(exeext) gcov-dump$(exeext) gcov-tool$(exeext) \ + $(CONFIG_LANGUAGES) # Default values for variables overridden in Makefile fragments. # CFLAGS is for the user to override to, e.g., do a cross build with -O2. @@ -196,6 +197,9 @@ GCC_WARN_CXXFLAGS = $(LOOSE_WARN) $($(@D)-warn) $(NOCOMMON_FLAG) $($@-warn) # flex output may yield harmless "no previous prototype" warnings build/gengtype-lex.o-warn = -Wno-error gengtype-lex.o-warn = -Wno-error +libgcov-util.o-warn = -Wno-error +libgcov-driver-tool.o-warn = -Wno-error +libgcov-merge-tool.o-warn = -Wno-error # All warnings have to be shut off in stage1 if the compiler used then # isn't gcc; configure determines that. WARN_CFLAGS will be either @@ -769,6 +773,7 @@ GCC_INSTALL_NAME := $(shell echo gcc|sed '$(program_transform_name)') GCC_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gcc|sed '$(program_transform_name)') CPP_INSTALL_NAME := $(shell echo cpp|sed '$(program_transform_name)') GCOV_INSTALL_NAME := $(shell echo gcov|sed '$(program_transform_name)') +PROFILE_TOOL_INSTALL_NAME := $(shell echo profile_tool|sed '$(program_transform_name)') # Setup the testing framework, if you have one EXPECT = `if [ -f $${rootme}/../expect/expect ] ; then \ @@ -890,7 +895,7 @@ BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) \ GIMPLE_H = gimple.h gimple.def gsstruct.def pointer-set.h $(VEC_H) \ $(GGC_H) $(BASIC_BLOCK_H) $(TREE_H) tree-ssa-operands.h \ tree-ssa-alias.h $(INTERNAL_FN_H) $(HASH_TABLE_H) is-a.h -GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h +GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h gcov-counter.def RECOG_H = recog.h EMIT_RTL_H = emit-rtl.h FLAGS_H = flags.h flag-types.h $(OPTIONS_H) @@ -905,7 +910,8 @@ CFGLOOP_H = cfgloop.h $(BASIC_BLOCK_H) double-int.h \ IPA_UTILS_H = ipa-utils.h $(TREE_H) $(CGRAPH_H) IPA_REFERENCE_H = ipa-reference.h $(BITMAP_H) $(TREE_H) CGRAPH_H = cgraph.h $(VEC_H) $(TREE_H) $(BASIC_BLOCK_H) $(FUNCTION_H) \ - cif-code.def ipa-ref.h ipa-ref-inline.h $(LINKER_PLUGIN_API_H) is-a.h + cif-code.def ipa-ref.h ipa-ref-inline.h $(LINKER_PLUGIN_API_H) is-a.h \ + l-ipo.h DF_H = df.h $(BITMAP_H) $(REGSET_H) sbitmap.h $(BASIC_BLOCK_H) \ alloc-pool.h $(TIMEVAR_H) RESOURCE_H = resource.h hard-reg-set.h $(DF_H) @@ -1163,6 +1169,7 @@ OBJS = \ alias.o \ alloc-pool.o \ auto-inc-dec.o \ + auto-profile.o \ bb-reorder.o \ bitmap.o \ bt-load.o \ @@ -1286,6 +1293,7 @@ OBJS = \ ira-emit.o \ ira-lives.o \ jump.o \ + l-ipo.o \ langhooks.o \ lcm.o \ lists.o \ @@ -1488,7 +1496,7 @@ ALL_HOST_FRONTEND_OBJS = $(foreach v,$(CONFIG_LANGUAGES),$($(v)_OBJS)) ALL_HOST_BACKEND_OBJS = $(GCC_OBJS) $(OBJS) $(OBJS-libcommon) \ $(OBJS-libcommon-target) @TREEBROWSER@ main.o c-family/cppspec.o \ $(COLLECT2_OBJS) $(EXTRA_GCC_OBJS) $(GCOV_OBJS) $(GCOV_DUMP_OBJS) \ - lto-wrapper.o + $(GCOV_TOOL_OBJS) lto-wrapper.o # This lists all host object files, whether they are included in this # compilation or not. @@ -1513,6 +1521,7 @@ MOSTLYCLEANFILES = insn-flags.h insn-config.h insn-codes.h \ $(SPECS) collect2$(exeext) gcc-ar$(exeext) gcc-nm$(exeext) \ gcc-ranlib$(exeext) \ gcov-iov$(build_exeext) gcov$(exeext) gcov-dump$(exeext) \ + gcov-tool$(exeect) \ gengtype$(exeext) *.[0-9][0-9].* *.[si] *-checksum.c libbackend.a \ libcommon-target.a libcommon.a libgcc.mk @@ -1907,9 +1916,10 @@ collect2$(exeext): $(COLLECT2_OBJS) $(LIBDEPS) CFLAGS-collect2.o += -DTARGET_MACHINE=\"$(target_noncanonical)\" \ @TARGET_SYSTEM_ROOT_DEFINE@ -lto-wrapper$(exeext): lto-wrapper.o ggc-none.o libcommon-target.a $(LIBDEPS) +LTO_WRAPPER_OBJS = lto-wrapper.o vec.o ggc-none.o +lto-wrapper$(exeext): $(LTO_WRAPPER_OBJS) libcommon-target.a $(LIBDEPS) +$(LINKER) $(ALL_COMPILERFLAGS) $(LDFLAGS) -o T$@ \ - lto-wrapper.o ggc-none.o libcommon-target.a $(LIBS) + $(LTO_WRAPPER_OBJS) libcommon-target.a $(LIBS) mv -f T$@ $@ # Files used by all variants of C or by the stand-alone pre-processor. @@ -1938,6 +1948,7 @@ DRIVER_DEFINES = \ -DDEFAULT_TARGET_MACHINE=\"$(target_noncanonical)\" \ -DSTANDARD_BINDIR_PREFIX=\"$(bindir)/\" \ -DTOOLDIR_BASE_PREFIX=\"$(libsubdir_to_prefix)$(prefix_to_exec_prefix)\" \ + @RUNTIME_ROOT_PREFIX_DEFINE@ \ @TARGET_SYSTEM_ROOT_DEFINE@ \ $(VALGRIND_DRIVER_DEFINES) \ $(if $(SHLIB),$(if $(filter yes,@enable_shared@),-DENABLE_SHARED_LIBGCC)) \ @@ -2247,7 +2258,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/dwarf2cfi.c \ $(srcdir)/dwarf2out.c \ $(srcdir)/tree-vect-generic.c \ - $(srcdir)/dojump.c \ + $(srcdir)/dojump.c $(srcdir)/l-ipo.c \ $(srcdir)/emit-rtl.c $(srcdir)/except.h $(srcdir)/explow.c $(srcdir)/expr.c \ $(srcdir)/expr.h \ $(srcdir)/function.c $(srcdir)/except.c \ @@ -2564,13 +2575,37 @@ s-iov: build/gcov-iov$(build_exeext) $(BASEVER) $(DEVPHASE) $(SHELL) $(srcdir)/../move-if-change tmp-gcov-iov.h gcov-iov.h $(STAMP) s-iov -GCOV_OBJS = gcov.o +GCOV_OBJS = gcov.o vec.o ggc-none.o gcov$(exeext): $(GCOV_OBJS) $(LIBDEPS) +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) $(GCOV_OBJS) $(LIBS) -o $@ -GCOV_DUMP_OBJS = gcov-dump.o +GCOV_DUMP_OBJS = gcov-dump.o vec.o ggc-none.o gcov-dump$(exeext): $(GCOV_DUMP_OBJS) $(LIBDEPS) +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) $(GCOV_DUMP_OBJS) \ $(LIBS) -o $@ + +GCOV_TOOL_DEP_FILES = $(srcdir)/../libgcc/libgcov-util.c gcov-io.c $(GCOV_IO_H) \ + $(srcdir)/../libgcc/libgcov-driver.c $(srcdir)/../libgcc/libgcov-driver-system.c \ + $(srcdir)/../libgcc/libgcov-merge.c $(srcdir)/../libgcc/libgcov.h \ + $(SYSTEM_H) coretypes.h $(TM_H) $(CONFIG_H) version.h intl.h $(DIAGNOSTIC_H) +libgcov-util.o: $(srcdir)/../libgcc/libgcov-util.c $(GCOV_TOOL_DEP_FILES) + +$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) -o $@ $< +libgcov-driver-tool.o: $(srcdir)/../libgcc/libgcov-driver.c $(GCOV_TOOL_DEP_FILES) + +$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + -DIN_GCOV_TOOL=1 -o $@ $< +libgcov-merge-tool.o: $(srcdir)/../libgcc/libgcov-merge.c $(GCOV_TOOL_DEP_FILES) + +$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + -DIN_GCOV_TOOL=1 -o $@ $< +gcov-tool-dyn-ipa.o: $(srcdir)/../libgcc/dyn-ipa.c $(GCOV_TOOL_DEP_FILES) + +$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + -DIN_GCOV_TOOL=1 -o $@ $< +gcov-tool-params.o: params.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(COMMON_TARGET_H) \ + $(PARAMS_H) $(DIAGNOSTIC_CORE_H) + +$(COMPILER) -DIN_GCOV_TOOL=1 -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \ + $(INCLUDES) -o $@ $< +GCOV_TOOL_OBJS = gcov-tool.o libgcov-util.o libgcov-driver-tool.o \ + libgcov-merge-tool.o gcov-tool-dyn-ipa.o gcov-tool-params.o +gcov-tool$(exeext): $(GCOV_TOOL_OBJS) $(LIBDEPS) + +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) $(GCOV_TOOL_OBJS) $(LIBS) -o $@ # # Build the include directories. The stamp files are stmp-* rather than # s-* so that mostlyclean does not force the include directory to @@ -2799,7 +2834,7 @@ TEXI_GCC_FILES = gcc.texi gcc-common.texi gcc-vers.texi frontends.texi \ contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi \ fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi \ implement-c.texi implement-cxx.texi arm-neon-intrinsics.texi \ - arm-acle-intrinsics.texi + arm-acle-intrinsics.texi gcov-tool.texi # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with # the generated tm.texi; the latter might have a more recent timestamp, @@ -2920,7 +2955,8 @@ $(build_htmldir)/gccinstall/index.html: $(TEXI_GCCINSTALL_FILES) DESTDIR=$(@D) \ $(SHELL) $(srcdir)/doc/install.texi2html -MANFILES = doc/gcov.1 doc/cpp.1 doc/gcc.1 doc/gfdl.7 doc/gpl.7 doc/fsf-funding.7 +MANFILES = doc/gcov.1 doc/cpp.1 doc/gcc.1 doc/gfdl.7 doc/gpl.7 \ + doc/fsf-funding.7 doc/gcov-tool.1 generated-manpages: man @@ -3207,6 +3243,20 @@ install-common: native lang.install-common installdirs rm -f $(DESTDIR)$(bindir)/$(GCOV_INSTALL_NAME)$(exeext); \ $(INSTALL_PROGRAM) gcov$(exeext) $(DESTDIR)$(bindir)/$(GCOV_INSTALL_NAME)$(exeext); \ fi +# Install profile_tool if it is available. + -if [ -f $(srcdir)/../contrib/profile_tool ]; \ + then \ + rm -f $(DESTDIR)$(bindir)/$(PROFILE_TOOL_INSTALL_NAME)$(exeext); \ + $(INSTALL_PROGRAM) $(srcdir)/../contrib/profile_tool \ + $(DESTDIR)$(bindir)/$(PROFILE_TOOL_INSTALL_NAME)$(exeext); \ + fi +# Install gcov-tool if it was compiled. + -if [ -f gcov-tool$(exeext) ]; \ + then \ + rm -f $(DESTDIR)$(bindir)/$(GCOV_TOOL_INSTALL_NAME)$(exeext); \ + $(INSTALL_PROGRAM) \ + gcov-tool$(exeext) $(DESTDIR)$(bindir)/$(GCOV_TOOL_INSTALL_NAME)$(exeext); \ + fi # Install the driver program as $(target_noncanonical)-gcc, # $(target_noncanonical)-gcc-$(version), and also as gcc if native. @@ -3312,6 +3362,11 @@ $(DESTDIR)$(man1dir)/$(GCOV_INSTALL_NAME)$(man1ext): doc/gcov.1 installdirs -$(INSTALL_DATA) $< $@ -chmod a-x $@ +$(DESTDIR)$(man1dir)/$(GCOV_TOOL_INSTALL_NAME)$(man1ext): doc/gcov-tool.1 installdirs + -rm -f $@ + -$(INSTALL_DATA) $< $@ + -chmod a-x $@ + # Install all the header files built in the include subdirectory. install-headers: $(INSTALL_HEADERS_DIR) # Fix symlinks to absolute paths in the installed include directory to diff --git a/gcc-4.9/gcc/ada/ChangeLog b/gcc-4.9/gcc/ada/ChangeLog index b4d3e7769..f979c34d4 100644 --- a/gcc-4.9/gcc/ada/ChangeLog +++ b/gcc-4.9/gcc/ada/ChangeLog @@ -1,3 +1,59 @@ +2014-05-19 Eric Botcazou <ebotcazou@adacore.com> + + Backport from mainline + 2014-04-22 Richard Henderson <rth@redhat.com> + + * init.c [__linux__] (HAVE_GNAT_ALTERNATE_STACK): New define. + (__gnat_alternate_stack): Enable for all linux except ia64. + +2014-05-18 Eric Botcazou <ebotcazou@adacore.com> + + * utils.c (gnat_write_global_declarations): Adjust the flags put on + dummy_global. + +2014-05-18 Eric Botcazou <ebotcazou@adacore.com> + + * gcc-interface/decl.c (gnat_to_gnu_entity) <E_Array_Subtype>: Do not + consider that regular packed arrays can never be superflat. + +2014-05-17 Eric Botcazou <ebotcazou@adacore.com> + + Backport from mainline + 2014-04-28 Richard Henderson <rth@redhat.com> + + * gcc-interface/Makefile.in: Support aarch64-linux. + + 2014-04-28 Eric Botcazou <ebotcazou@adacore.com> + + * exp_dbug.ads (Get_External_Name): Add 'False' default to Has_Suffix, + add 'Suffix' parameter and adjust comment. + (Get_External_Name_With_Suffix): Delete. + * exp_dbug.adb (Get_External_Name_With_Suffix): Merge into... + (Get_External_Name): ...here. Add 'False' default to Has_Suffix, add + 'Suffix' parameter. + (Get_Encoded_Name): Remove 2nd argument in call to Get_External_Name. + Call Get_External_Name instead of Get_External_Name_With_Suffix. + (Get_Secondary_DT_External_Name): Likewise. + * exp_cg.adb (Write_Call_Info): Likewise. + * exp_disp.adb (Export_DT): Likewise. + (Import_DT): Likewise. + * comperr.ads (Compiler_Abort): Remove Code parameter and add From_GCC + parameter with False default. + * comperr.adb (Compiler_Abort): Likewise. Adjust accordingly. + * types.h (Fat_Pointer): Rename into... + (String_Pointer): ...this. Add comment on interfacing rules. + * fe.h (Compiler_Abort): Adjust for above renaming. + (Error_Msg_N): Likewise. + (Error_Msg_NE): Likewise. + (Get_External_Name): Likewise. Add third parameter. + (Get_External_Name_With_Suffix): Delete. + * gcc-interface/decl.c (STDCALL_PREFIX): Define. + (create_concat_name): Adjust call to Get_External_Name, remove call to + Get_External_Name_With_Suffix, use STDCALL_PREFIX, adjust for renaming. + * gcc-interface/trans.c (post_error): Likewise. + (post_error_ne): Likewise. + * gcc-interface/misc.c (internal_error_function): Likewise. + 2014-04-22 Release Manager * GCC 4.9.0 released. diff --git a/gcc-4.9/gcc/ada/comperr.adb b/gcc-4.9/gcc/ada/comperr.adb index 13646a5c1..7a9d7070c 100644 --- a/gcc-4.9/gcc/ada/comperr.adb +++ b/gcc-4.9/gcc/ada/comperr.adb @@ -6,7 +6,7 @@ -- -- -- B o d y -- -- -- --- Copyright (C) 1992-2013, Free Software Foundation, Inc. -- +-- Copyright (C) 1992-2014, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -74,8 +74,8 @@ package body Comperr is procedure Compiler_Abort (X : String; - Code : Integer := 0; - Fallback_Loc : String := "") + Fallback_Loc : String := ""; + From_GCC : Boolean := False) is -- The procedures below output a "bug box" with information about -- the cause of the compiler abort and about the preferred method @@ -206,7 +206,7 @@ package body Comperr is Write_Str (") "); if X'Length + Column > 76 then - if Code < 0 then + if From_GCC then Write_Str ("GCC error:"); end if; @@ -235,11 +235,7 @@ package body Comperr is Write_Str (X); end if; - if Code > 0 then - Write_Str (", Code="); - Write_Int (Int (Code)); - - elsif Code = 0 then + if not From_GCC then -- For exception case, get exception message from the TSD. Note -- that it would be neater and cleaner to pass the exception diff --git a/gcc-4.9/gcc/ada/comperr.ads b/gcc-4.9/gcc/ada/comperr.ads index ba3cb6b8f..dccd8ef34 100644 --- a/gcc-4.9/gcc/ada/comperr.ads +++ b/gcc-4.9/gcc/ada/comperr.ads @@ -6,7 +6,7 @@ -- -- -- S p e c -- -- -- --- Copyright (C) 1992-2013, Free Software Foundation, Inc. -- +-- Copyright (C) 1992-2014, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -31,8 +31,8 @@ package Comperr is procedure Compiler_Abort (X : String; - Code : Integer := 0; - Fallback_Loc : String := ""); + Fallback_Loc : String := ""; + From_GCC : Boolean := False); pragma No_Return (Compiler_Abort); -- Signals an internal compiler error. Never returns control. Depending on -- processing may end up raising Unrecoverable_Error, or exiting directly. @@ -46,10 +46,9 @@ package Comperr is -- Note that this is only used at the outer level (to handle constraint -- errors or assert errors etc.) In the normal logic of the compiler we -- always use pragma Assert to check for errors, and if necessary an - -- explicit abort is achieved by pragma Assert (False). Code is positive - -- for a gigi abort (giving the gigi abort code), zero for a front - -- end exception (with possible message stored in TSD.Current_Excep, - -- and negative (an unused value) for a GCC abort. + -- explicit abort is achieved by pragma Assert (False). From_GCC is true + -- for a GCC abort and false for a front end exception (with a possible + -- message stored in TSD.Current_Excep). procedure Delete_SCIL_Files; -- Delete SCIL files associated with the main unit diff --git a/gcc-4.9/gcc/ada/exp_cg.adb b/gcc-4.9/gcc/ada/exp_cg.adb index d8a7022e5..483f174ef 100644 --- a/gcc-4.9/gcc/ada/exp_cg.adb +++ b/gcc-4.9/gcc/ada/exp_cg.adb @@ -6,7 +6,7 @@ -- -- -- B o d y -- -- -- --- Copyright (C) 2010-2013, Free Software Foundation, Inc. -- +-- Copyright (C) 2010-2014, Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- @@ -437,10 +437,10 @@ package body Exp_CG is if Nkind (P) = N_Subprogram_Body and then not Acts_As_Spec (P) then - Get_External_Name (Corresponding_Spec (P), Has_Suffix => False); + Get_External_Name (Corresponding_Spec (P)); else - Get_External_Name (Defining_Entity (P), Has_Suffix => False); + Get_External_Name (Defining_Entity (P)); end if; Write_Str (Name_Buffer (1 .. Name_Len)); diff --git a/gcc-4.9/gcc/ada/exp_dbug.adb b/gcc-4.9/gcc/ada/exp_dbug.adb index 7dc4264cc..136202908 100644 --- a/gcc-4.9/gcc/ada/exp_dbug.adb +++ b/gcc-4.9/gcc/ada/exp_dbug.adb @@ -507,8 +507,8 @@ package body Exp_Dbug is begin -- If not generating code, there is no need to create encoded names, and -- problems when the back-end is called to annotate types without full - -- code generation. See comments in Get_External_Name_With_Suffix for - -- additional details. + -- code generation. See comments in Get_External_Name for additional + -- details. -- However we do create encoded names if the back end is active, even -- if Operating_Mode got reset. Otherwise any serious error reported @@ -556,7 +556,7 @@ package body Exp_Dbug is -- Fixed-point case if Is_Fixed_Point_Type (E) then - Get_External_Name_With_Suffix (E, "XF_"); + Get_External_Name (E, True, "XF_"); Add_Real_To_Buffer (Delta_Value (E)); if Small_Value (E) /= Delta_Value (E) then @@ -568,14 +568,14 @@ package body Exp_Dbug is elsif Vax_Float (E) then if Digits_Value (Base_Type (E)) = 6 then - Get_External_Name_With_Suffix (E, "XFF"); + Get_External_Name (E, True, "XFF"); elsif Digits_Value (Base_Type (E)) = 9 then - Get_External_Name_With_Suffix (E, "XFF"); + Get_External_Name (E, True, "XFF"); else pragma Assert (Digits_Value (Base_Type (E)) = 15); - Get_External_Name_With_Suffix (E, "XFG"); + Get_External_Name (E, True, "XFG"); end if; -- Discrete case where bounds do not match size @@ -607,9 +607,9 @@ package body Exp_Dbug is begin if Biased then - Get_External_Name_With_Suffix (E, "XB"); + Get_External_Name (E, True, "XB"); else - Get_External_Name_With_Suffix (E, "XD"); + Get_External_Name (E, True, "XD"); end if; if Lo_Encode or Hi_Encode then @@ -649,7 +649,7 @@ package body Exp_Dbug is else Has_Suffix := False; - Get_External_Name (E, Has_Suffix); + Get_External_Name (E); end if; if Debug_Flag_B and then Has_Suffix then @@ -667,7 +667,11 @@ package body Exp_Dbug is -- Get_External_Name -- ----------------------- - procedure Get_External_Name (Entity : Entity_Id; Has_Suffix : Boolean) is + procedure Get_External_Name + (Entity : Entity_Id; + Has_Suffix : Boolean := False; + Suffix : String := "") + is E : Entity_Id := Entity; Kind : Entity_Kind; @@ -704,6 +708,20 @@ package body Exp_Dbug is -- Start of processing for Get_External_Name begin + -- If we are not in code generation mode, this procedure may still be + -- called from Back_End (more specifically - from gigi for doing type + -- representation annotation or some representation-specific checks). + -- But in this mode there is no need to mess with external names. + + -- Furthermore, the call causes difficulties in this case because the + -- string representing the homonym number is not correctly reset as a + -- part of the call to Output_Homonym_Numbers_Suffix (which is not + -- called in gigi). + + if Operating_Mode /= Generate_Code then + return; + end if; + Reset_Buffers; -- If this is a child unit, we want the child @@ -762,42 +780,13 @@ package body Exp_Dbug is Get_Qualified_Name_And_Append (E); end if; - Name_Buffer (Name_Len + 1) := ASCII.NUL; - end Get_External_Name; - - ----------------------------------- - -- Get_External_Name_With_Suffix -- - ----------------------------------- - - procedure Get_External_Name_With_Suffix - (Entity : Entity_Id; - Suffix : String) - is - Has_Suffix : constant Boolean := (Suffix /= ""); - - begin - -- If we are not in code generation mode, this procedure may still be - -- called from Back_End (more specifically - from gigi for doing type - -- representation annotation or some representation-specific checks). - -- But in this mode there is no need to mess with external names. - - -- Furthermore, the call causes difficulties in this case because the - -- string representing the homonym number is not correctly reset as a - -- part of the call to Output_Homonym_Numbers_Suffix (which is not - -- called in gigi). - - if Operating_Mode /= Generate_Code then - return; - end if; - - Get_External_Name (Entity, Has_Suffix); - if Has_Suffix then Add_Str_To_Name_Buffer ("___"); Add_Str_To_Name_Buffer (Suffix); - Name_Buffer (Name_Len + 1) := ASCII.NUL; end if; - end Get_External_Name_With_Suffix; + + Name_Buffer (Name_Len + 1) := ASCII.NUL; + end Get_External_Name; -------------------------- -- Get_Variant_Encoding -- @@ -944,7 +933,7 @@ package body Exp_Dbug is Suffix_Index : Int) is begin - Get_External_Name (Typ, Has_Suffix => False); + Get_External_Name (Typ); if Ancestor_Typ /= Typ then declare @@ -952,7 +941,7 @@ package body Exp_Dbug is Save_Str : constant String (1 .. Name_Len) := Name_Buffer (1 .. Name_Len); begin - Get_External_Name (Ancestor_Typ, Has_Suffix => False); + Get_External_Name (Ancestor_Typ); -- Append the extended name of the ancestor to the -- extended name of Typ diff --git a/gcc-4.9/gcc/ada/exp_dbug.ads b/gcc-4.9/gcc/ada/exp_dbug.ads index 86099f66f..6f27bfe0e 100644 --- a/gcc-4.9/gcc/ada/exp_dbug.ads +++ b/gcc-4.9/gcc/ada/exp_dbug.ads @@ -413,10 +413,11 @@ package Exp_Dbug is procedure Get_External_Name (Entity : Entity_Id; - Has_Suffix : Boolean); - -- Set Name_Buffer and Name_Len to the external name of entity E. The + Has_Suffix : Boolean := False; + Suffix : String := ""); + -- Set Name_Buffer and Name_Len to the external name of the entity. The -- external name is the Interface_Name, if specified, unless the entity - -- has an address clause or a suffix. + -- has an address clause or Has_Suffix is true. -- -- If the Interface is not present, or not used, the external name is the -- concatenation of: @@ -428,26 +429,11 @@ package Exp_Dbug is -- - the string "$" (or "__" if target does not allow "$"), followed -- by homonym suffix, if the entity is an overloaded subprogram -- or is defined within an overloaded subprogram. - - procedure Get_External_Name_With_Suffix - (Entity : Entity_Id; - Suffix : String); - -- Set Name_Buffer and Name_Len to the external name of entity E. If - -- Suffix is the empty string the external name is as above, otherwise - -- the external name is the concatenation of: - -- - -- - the string "_ada_", if the entity is a library subprogram, - -- - the names of any enclosing scopes, each followed by "__", - -- or "X_" if the next entity is a subunit) - -- - the name of the entity - -- - the string "$" (or "__" if target does not allow "$"), followed - -- by homonym suffix, if the entity is an overloaded subprogram - -- or is defined within an overloaded subprogram. - -- - the string "___" followed by Suffix + -- - the string "___" followed by Suffix if Has_Suffix is true. -- -- Note that a call to this procedure has no effect if we are not -- generating code, since the necessary information for computing the - -- proper encoded name is not available in this case. + -- proper external name is not available in this case. -------------------------------------------- -- Subprograms for Handling Qualification -- diff --git a/gcc-4.9/gcc/ada/exp_disp.adb b/gcc-4.9/gcc/ada/exp_disp.adb index 8ed3b3956..da2b55d3d 100644 --- a/gcc-4.9/gcc/ada/exp_disp.adb +++ b/gcc-4.9/gcc/ada/exp_disp.adb @@ -3913,10 +3913,7 @@ package body Exp_Disp is pragma Assert (Related_Type (Node (Elmt)) = Typ); - Get_External_Name - (Entity => Node (Elmt), - Has_Suffix => True); - + Get_External_Name (Node (Elmt)); Set_Interface_Name (DT, Make_String_Literal (Loc, Strval => String_From_Name_Buffer)); @@ -7088,7 +7085,7 @@ package body Exp_Disp is Set_Scope (DT, Current_Scope); - Get_External_Name (DT, True); + Get_External_Name (DT); Set_Interface_Name (DT, Make_String_Literal (Loc, Strval => String_From_Name_Buffer)); diff --git a/gcc-4.9/gcc/ada/fe.h b/gcc-4.9/gcc/ada/fe.h index 9b38903f9..e02067c0e 100644 --- a/gcc-4.9/gcc/ada/fe.h +++ b/gcc-4.9/gcc/ada/fe.h @@ -29,17 +29,20 @@ * * ****************************************************************************/ -/* This file contains definitions to access front-end functions and - variables used by gigi. */ +/* This file contains declarations to access front-end functions and variables + used by gigi. + + WARNING: functions taking String_Pointer parameters must abide by the rule + documented alongside the definition of String_Pointer in types.h. */ #ifdef __cplusplus extern "C" { #endif -/* comperr: */ +/* comperr: */ #define Compiler_Abort comperr__compiler_abort -extern int Compiler_Abort (Fat_Pointer, int, Fat_Pointer) ATTRIBUTE_NORETURN; +extern int Compiler_Abort (String_Pointer, String_Pointer, Boolean) ATTRIBUTE_NORETURN; /* csets: */ @@ -72,8 +75,6 @@ extern void Set_Mechanism (Entity_Id, Mechanism_Type); extern void Set_RM_Size (Entity_Id, Uint); extern void Set_Present_Expr (Node_Id, Uint); -/* Test if the node N is the name of an entity (i.e. is an identifier, - expanded name, or an attribute reference that returns an entity). */ #define Is_Entity_Name einfo__is_entity_name extern Boolean Is_Entity_Name (Node_Id); @@ -90,8 +91,8 @@ extern Node_Id Get_Attribute_Definition_Clause (Entity_Id, char); #define Error_Msg_NE errout__error_msg_ne #define Set_Identifier_Casing errout__set_identifier_casing -extern void Error_Msg_N (Fat_Pointer, Node_Id); -extern void Error_Msg_NE (Fat_Pointer, Node_Id, Entity_Id); +extern void Error_Msg_N (String_Pointer, Node_Id); +extern void Error_Msg_NE (String_Pointer, Node_Id, Entity_Id); extern void Set_Identifier_Casing (Char *, const Char *); /* err_vars: */ @@ -147,11 +148,9 @@ extern void Setup_Asm_Outputs (Node_Id); #define Get_Encoded_Name exp_dbug__get_encoded_name #define Get_External_Name exp_dbug__get_external_name -#define Get_External_Name_With_Suffix exp_dbug__get_external_name_with_suffix -extern void Get_Encoded_Name (Entity_Id); -extern void Get_External_Name (Entity_Id, Boolean); -extern void Get_External_Name_With_Suffix (Entity_Id, Fat_Pointer); +extern void Get_Encoded_Name (Entity_Id); +extern void Get_External_Name (Entity_Id, Boolean, String_Pointer); /* exp_util: */ diff --git a/gcc-4.9/gcc/ada/gcc-interface/Makefile.in b/gcc-4.9/gcc/ada/gcc-interface/Makefile.in index 9af1967ce..5c36962ef 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/Makefile.in +++ b/gcc-4.9/gcc/ada/gcc-interface/Makefile.in @@ -1988,6 +1988,44 @@ ifeq ($(strip $(filter-out arm% linux-gnueabi%,$(target_cpu) $(target_os))),) LIBRARY_VERSION := $(LIB_VERSION) endif +# AArch64 Linux +ifeq ($(strip $(filter-out aarch64% linux%,$(target_cpu) $(target_os))),) + LIBGNAT_TARGET_PAIRS = \ + a-exetim.adb<a-exetim-posix.adb \ + a-exetim.ads<a-exetim-default.ads \ + a-intnam.ads<a-intnam-linux.ads \ + a-synbar.adb<a-synbar-posix.adb \ + a-synbar.ads<a-synbar-posix.ads \ + s-inmaop.adb<s-inmaop-posix.adb \ + s-intman.adb<s-intman-posix.adb \ + s-linux.ads<s-linux.ads \ + s-mudido.adb<s-mudido-affinity.adb \ + s-osinte.ads<s-osinte-linux.ads \ + s-osinte.adb<s-osinte-posix.adb \ + s-osprim.adb<s-osprim-posix.adb \ + s-taprop.adb<s-taprop-linux.adb \ + s-tasinf.ads<s-tasinf-linux.ads \ + s-tasinf.adb<s-tasinf-linux.adb \ + s-tpopsp.adb<s-tpopsp-tls.adb \ + s-taspri.ads<s-taspri-posix.ads \ + g-sercom.adb<g-sercom-linux.adb \ + $(ATOMICS_TARGET_PAIRS) \ + $(ATOMICS_BUILTINS_TARGET_PAIRS) \ + system.ads<system-linux-x86_64.ads + ## ^^ Note the above is a pretty-close placeholder. + + TOOLS_TARGET_PAIRS = \ + mlib-tgt-specific.adb<mlib-tgt-specific-linux.adb \ + indepsw.adb<indepsw-gnu.adb + + EXTRA_GNATRTL_TASKING_OBJS=s-linux.o a-exetim.o + EH_MECHANISM=-gcc + THREADSLIB=-lpthread -lrt + GNATLIB_SHARED=gnatlib-shared-dual + GMEM_LIB = gmemlib + LIBRARY_VERSION := $(LIB_VERSION) +endif + # Sparc Linux ifeq ($(strip $(filter-out sparc% linux%,$(target_cpu) $(target_os))),) LIBGNAT_TARGET_PAIRS_COMMON = \ diff --git a/gcc-4.9/gcc/ada/gcc-interface/decl.c b/gcc-4.9/gcc/ada/gcc-interface/decl.c index 4180e59f6..52452ce79 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/decl.c +++ b/gcc-4.9/gcc/ada/gcc-interface/decl.c @@ -72,6 +72,8 @@ #define Has_Thiscall_Convention(E) 0 #endif +#define STDCALL_PREFIX "_imp__" + /* Stack realignment is necessary for functions with foreign conventions when the ABI doesn't mandate as much as what the compiler assumes - that is, up to PREFERRED_STACK_BOUNDARY. @@ -2433,8 +2435,10 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, int definition) we can just use the high bound of the index type. */ else if ((Nkind (gnat_index) == N_Range && cannot_be_superflat_p (gnat_index)) - /* Packed Array Types are never superflat. */ - || Is_Packed_Array_Type (gnat_entity)) + /* Bit-Packed Array Types are never superflat. */ + || (Is_Packed_Array_Type (gnat_entity) + && Is_Bit_Packed_Array + (Original_Array_Type (gnat_entity)))) gnu_high = gnu_max; /* Otherwise, if the high bound is constant but the low bound is @@ -8879,16 +8883,12 @@ get_entity_name (Entity_Id gnat_entity) tree create_concat_name (Entity_Id gnat_entity, const char *suffix) { - Entity_Kind kind = Ekind (gnat_entity); + const Entity_Kind kind = Ekind (gnat_entity); + const bool has_suffix = (suffix != NULL); + String_Template temp = {1, has_suffix ? strlen (suffix) : 0}; + String_Pointer sp = {suffix, &temp}; - if (suffix) - { - String_Template temp = {1, (int) strlen (suffix)}; - Fat_Pointer fp = {suffix, &temp}; - Get_External_Name_With_Suffix (gnat_entity, fp); - } - else - Get_External_Name (gnat_entity, 0); + Get_External_Name (gnat_entity, has_suffix, sp); /* A variable using the Stdcall convention lives in a DLL. We adjust its name to use the jump table, the _imp__NAME contains the address @@ -8896,9 +8896,9 @@ create_concat_name (Entity_Id gnat_entity, const char *suffix) if ((kind == E_Variable || kind == E_Constant) && Has_Stdcall_Convention (gnat_entity)) { - const int len = 6 + Name_Len; + const int len = strlen (STDCALL_PREFIX) + Name_Len; char *new_name = (char *) alloca (len + 1); - strcpy (new_name, "_imp__"); + strcpy (new_name, STDCALL_PREFIX); strcat (new_name, Name_Buffer); return get_identifier_with_length (new_name, len); } diff --git a/gcc-4.9/gcc/ada/gcc-interface/misc.c b/gcc-4.9/gcc/ada/gcc-interface/misc.c index a5f2881d6..fe44c6d5b 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/misc.c +++ b/gcc-4.9/gcc/ada/gcc-interface/misc.c @@ -283,8 +283,8 @@ internal_error_function (diagnostic_context *context, text_info tinfo; char *buffer, *p, *loc; String_Template temp, temp_loc; - Fat_Pointer fp, fp_loc; - expanded_location s; + String_Pointer sp, sp_loc; + expanded_location xloc; /* Warn if plugins present. */ warn_if_plugins (); @@ -311,21 +311,21 @@ internal_error_function (diagnostic_context *context, temp.Low_Bound = 1; temp.High_Bound = p - buffer; - fp.Bounds = &temp; - fp.Array = buffer; + sp.Bounds = &temp; + sp.Array = buffer; - s = expand_location (input_location); - if (context->show_column && s.column != 0) - asprintf (&loc, "%s:%d:%d", s.file, s.line, s.column); + xloc = expand_location (input_location); + if (context->show_column && xloc.column != 0) + asprintf (&loc, "%s:%d:%d", xloc.file, xloc.line, xloc.column); else - asprintf (&loc, "%s:%d", s.file, s.line); + asprintf (&loc, "%s:%d", xloc.file, xloc.line); temp_loc.Low_Bound = 1; temp_loc.High_Bound = strlen (loc); - fp_loc.Bounds = &temp_loc; - fp_loc.Array = loc; + sp_loc.Bounds = &temp_loc; + sp_loc.Array = loc; Current_Error_Node = error_gnat_node; - Compiler_Abort (fp, -1, fp_loc); + Compiler_Abort (sp, sp_loc, true); } /* Perform all the initialization steps that are language-specific. */ diff --git a/gcc-4.9/gcc/ada/gcc-interface/trans.c b/gcc-4.9/gcc/ada/gcc-interface/trans.c index 4a4d0faa9..03bf098b0 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/trans.c +++ b/gcc-4.9/gcc/ada/gcc-interface/trans.c @@ -9262,16 +9262,16 @@ void post_error (const char *msg, Node_Id node) { String_Template temp; - Fat_Pointer fp; + String_Pointer sp; if (No (node)) return; temp.Low_Bound = 1; temp.High_Bound = strlen (msg); - fp.Bounds = &temp; - fp.Array = msg; - Error_Msg_N (fp, node); + sp.Bounds = &temp; + sp.Array = msg; + Error_Msg_N (sp, node); } /* Similar to post_error, but NODE is the node at which to post the error and @@ -9281,16 +9281,16 @@ void post_error_ne (const char *msg, Node_Id node, Entity_Id ent) { String_Template temp; - Fat_Pointer fp; + String_Pointer sp; if (No (node)) return; temp.Low_Bound = 1; temp.High_Bound = strlen (msg); - fp.Bounds = &temp; - fp.Array = msg; - Error_Msg_NE (fp, node, ent); + sp.Bounds = &temp; + sp.Array = msg; + Error_Msg_NE (sp, node, ent); } /* Similar to post_error_ne, but NUM is the number to use for the '^'. */ diff --git a/gcc-4.9/gcc/ada/gcc-interface/utils.c b/gcc-4.9/gcc/ada/gcc-interface/utils.c index 014fe361b..15b72366b 100644 --- a/gcc-4.9/gcc/ada/gcc-interface/utils.c +++ b/gcc-4.9/gcc/ada/gcc-interface/utils.c @@ -5662,9 +5662,10 @@ gnat_write_global_declarations (void) dummy_global = build_decl (BUILTINS_LOCATION, VAR_DECL, get_identifier (label), void_type_node); + DECL_HARD_REGISTER (dummy_global) = 1; TREE_STATIC (dummy_global) = 1; - TREE_ASM_WRITTEN (dummy_global) = 1; node = varpool_node_for_decl (dummy_global); + node->definition = 1; node->force_output = 1; while (!types_used_by_cur_var_decl->is_empty ()) diff --git a/gcc-4.9/gcc/ada/init.c b/gcc-4.9/gcc/ada/init.c index c3824ab7e..48319d62f 100644 --- a/gcc-4.9/gcc/ada/init.c +++ b/gcc-4.9/gcc/ada/init.c @@ -556,9 +556,14 @@ __gnat_error_handler (int sig, siginfo_t *si ATTRIBUTE_UNUSED, void *ucontext) Raise_From_Signal_Handler (exception, msg); } -#if defined (i386) || defined (__x86_64__) || defined (__powerpc__) -/* This must be in keeping with System.OS_Interface.Alternate_Stack_Size. */ -char __gnat_alternate_stack[16 * 1024]; /* 2 * SIGSTKSZ */ +#ifndef __ia64__ +#define HAVE_GNAT_ALTERNATE_STACK 1 +/* This must be in keeping with System.OS_Interface.Alternate_Stack_Size. + It must be larger than MINSIGSTKSZ and hopefully near 2 * SIGSTKSZ. */ +# if 16 * 1024 < MINSIGSTKSZ +# error "__gnat_alternate_stack too small" +# endif +char __gnat_alternate_stack[16 * 1024]; #endif #ifdef __XENO__ @@ -612,7 +617,7 @@ __gnat_install_handler (void) sigaction (SIGBUS, &act, NULL); if (__gnat_get_interrupt_state (SIGSEGV) != 's') { -#if defined (i386) || defined (__x86_64__) || defined (__powerpc__) +#ifdef HAVE_GNAT_ALTERNATE_STACK /* Setup an alternate stack region for the handler execution so that stack overflows can be handled properly, avoiding a SEGV generation from stack usage by the handler itself. */ diff --git a/gcc-4.9/gcc/ada/types.h b/gcc-4.9/gcc/ada/types.h index 7d1e69624..5e19e8fe6 100644 --- a/gcc-4.9/gcc/ada/types.h +++ b/gcc-4.9/gcc/ada/types.h @@ -76,11 +76,19 @@ typedef Char *Str; /* Pointer to string of Chars */ typedef Char *Str_Ptr; -/* Types for the fat pointer used for strings and the template it - points to. */ -typedef struct {int Low_Bound, High_Bound; } String_Template; -typedef struct {const char *Array; String_Template *Bounds; } - __attribute ((aligned (sizeof (char *) * 2))) Fat_Pointer; +/* Types for the fat pointer used for strings and the template it points to. + The fat pointer is conceptually a couple of pointers, but it is wrapped + up in a special record type. On the Ada side, the record is naturally + aligned (i.e. given pointer alignment) on regular platforms, but it is + given twice this alignment on strict-alignment platforms for performance + reasons. On the C side, for the sake of portability and simplicity, we + overalign it on all platforms (so the machine mode is always the same as + on the Ada side) but arrange to pass it in an even scalar position as a + parameter to functions (so the scalar parameter alignment is always the + same as on the Ada side). */ +typedef struct { int Low_Bound, High_Bound; } String_Template; +typedef struct { const char *Array; String_Template *Bounds; } + __attribute ((aligned (sizeof (char *) * 2))) String_Pointer; /* Types for Node/Entity Kinds: */ diff --git a/gcc-4.9/gcc/alias.c b/gcc-4.9/gcc/alias.c index 434ae7ad3..e5406123c 100644 --- a/gcc-4.9/gcc/alias.c +++ b/gcc-4.9/gcc/alias.c @@ -157,7 +157,6 @@ static rtx find_base_value (rtx); static int mems_in_disjoint_alias_sets_p (const_rtx, const_rtx); static int insert_subset_children (splay_tree_node, void*); static alias_set_entry get_alias_set_entry (alias_set_type); -static bool nonoverlapping_component_refs_p (const_rtx, const_rtx); static tree decl_for_component_ref (tree); static int write_dependence_p (const_rtx, const_rtx, enum machine_mode, rtx, @@ -2259,68 +2258,6 @@ read_dependence (const_rtx mem, const_rtx x) return false; } -/* Return true if we can determine that the fields referenced cannot - overlap for any pair of objects. */ - -static bool -nonoverlapping_component_refs_p (const_rtx rtlx, const_rtx rtly) -{ - const_tree x = MEM_EXPR (rtlx), y = MEM_EXPR (rtly); - const_tree fieldx, fieldy, typex, typey, orig_y; - - if (!flag_strict_aliasing - || !x || !y - || TREE_CODE (x) != COMPONENT_REF - || TREE_CODE (y) != COMPONENT_REF) - return false; - - do - { - /* The comparison has to be done at a common type, since we don't - know how the inheritance hierarchy works. */ - orig_y = y; - do - { - fieldx = TREE_OPERAND (x, 1); - typex = TYPE_MAIN_VARIANT (DECL_FIELD_CONTEXT (fieldx)); - - y = orig_y; - do - { - fieldy = TREE_OPERAND (y, 1); - typey = TYPE_MAIN_VARIANT (DECL_FIELD_CONTEXT (fieldy)); - - if (typex == typey) - goto found; - - y = TREE_OPERAND (y, 0); - } - while (y && TREE_CODE (y) == COMPONENT_REF); - - x = TREE_OPERAND (x, 0); - } - while (x && TREE_CODE (x) == COMPONENT_REF); - /* Never found a common type. */ - return false; - - found: - /* If we're left with accessing different fields of a structure, then no - possible overlap, unless they are both bitfields. */ - if (TREE_CODE (typex) == RECORD_TYPE && fieldx != fieldy) - return !(DECL_BIT_FIELD (fieldx) && DECL_BIT_FIELD (fieldy)); - - /* The comparison on the current field failed. If we're accessing - a very nested structure, look at the next outer level. */ - x = TREE_OPERAND (x, 0); - y = TREE_OPERAND (y, 0); - } - while (x && y - && TREE_CODE (x) == COMPONENT_REF - && TREE_CODE (y) == COMPONENT_REF); - - return false; -} - /* Look at the bottom of the COMPONENT_REF list for a DECL, and return it. */ static tree @@ -2596,9 +2533,6 @@ true_dependence_1 (const_rtx mem, enum machine_mode mem_mode, rtx mem_addr, if (nonoverlapping_memrefs_p (mem, x, false)) return 0; - if (nonoverlapping_component_refs_p (mem, x)) - return 0; - return rtx_refs_may_alias_p (x, mem, true); } diff --git a/gcc-4.9/gcc/attribs.c b/gcc-4.9/gcc/attribs.c index 54373eb52..d3558a6e9 100644 --- a/gcc-4.9/gcc/attribs.c +++ b/gcc-4.9/gcc/attribs.c @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see #include "hash-table.h" #include "plugin.h" + /* Table of the tables of attributes (common, language, format, machine) searched. */ static const struct attribute_spec *attribute_tables[4]; diff --git a/gcc-4.9/gcc/auto-profile.c b/gcc-4.9/gcc/auto-profile.c new file mode 100644 index 000000000..ddc2d5e9f --- /dev/null +++ b/gcc-4.9/gcc/auto-profile.c @@ -0,0 +1,1865 @@ +/* Calculate branch probabilities, and basic block execution counts. + Copyright (C) 2012. Free Software Foundation, Inc. + Contributed by Dehao Chen (dehao@google.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Read and annotate call graph profile from the auto profile data + file. */ + +#include <string.h> +#include <map> +#include <vector> +#include <set> + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "flags.h" +#include "basic-block.h" +#include "diagnostic-core.h" +#include "gcov-io.h" +#include "input.h" +#include "profile.h" +#include "langhooks.h" +#include "opts.h" +#include "tree-pass.h" +#include "cfgloop.h" +#include "tree-ssa-alias.h" +#include "tree-cfg.h" +#include "tree-cfgcleanup.h" +#include "tree-ssa-operands.h" +#include "tree-into-ssa.h" +#include "internal-fn.h" +#include "is-a.h" +#include "gimple-expr.h" +#include "md5.h" +#include "gimple.h" +#include "gimple-iterator.h" +#include "gimple-ssa.h" +#include "cgraph.h" +#include "value-prof.h" +#include "coverage.h" +#include "params.h" +#include "l-ipo.h" +#include "ipa-utils.h" +#include "ipa-inline.h" +#include "output.h" +#include "dwarf2asm.h" +#include "tree-inline.h" +#include "auto-profile.h" + +/* The following routines implements AutoFDO optimization. + + This optimization uses sampling profiles to annotate basic block counts + and uses heuristics to estimate branch probabilities. + + There are three phases in AutoFDO: + + Phase 1: Read profile from the profile data file. + The following info is read from the profile datafile: + * string_table: a map between function name and its index. + * autofdo_source_profile: a map from function_instance name to + function_instance. This is represented as a forest of + function_instances. + * autofdo_module_profile: a map from module name to its + compilation/aux-module info. + * WorkingSet: a histogram of how many instructions are covered for a + given percentage of total cycles. + + Phase 2: Early inline. + Early inline uses autofdo_source_profile to find if a callsite is: + * inlined in the profiled binary. + * callee body is hot in the profiling run. + If both condition satisfies, early inline will inline the callsite + regardless of the code growth. + + Phase 3: Annotate control flow graph. + AutoFDO uses a separate pass to: + * Annotate basic block count + * Estimate branch probability + + After the above 3 phases, all profile is readily annotated on the GCC IR. + AutoFDO tries to reuse all FDO infrastructure as much as possible to make + use of the profile. E.g. it uses existing mechanism to calculate the basic + block/edge frequency, as well as the cgraph node/edge count. +*/ + +#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" + +namespace autofdo { + +/* Represent a source location: (function_decl, lineno). */ +typedef std::pair<tree, unsigned> decl_lineno; + +/* Represent an inline stack. vector[0] is the leaf node. */ +typedef std::vector<decl_lineno> inline_stack; + +/* String array that stores function names. */ +typedef std::vector<const char *> string_vector; + +/* Map from function name's index in string_table to target's + execution count. */ +typedef std::map<unsigned, gcov_type> icall_target_map; + +/* Set of gimple stmts. Used to track if the stmt has already been promoted + to direct call. */ +typedef std::set<gimple> stmt_set; + +/* Represent count info of an inline stack. */ +struct count_info +{ + /* Sampled count of the inline stack. */ + gcov_type count; + + /* Map from indirect call target to its sample count. */ + icall_target_map targets; + + /* Whether this inline stack is already used in annotation. + + Each inline stack should only be used to annotate IR once. + This will be enforced when instruction-level discriminator + is supported. */ + bool annotated; +}; + +/* operator< for "const char *". */ +struct string_compare +{ + bool operator() (const char *a, const char *b) const + { return strcmp (a, b) < 0; } +}; + +/* Store a string array, indexed by string position in the array. */ +class string_table { +public: + static string_table *create (); + + /* For a given string, returns its index. */ + int get_index (const char *name) const; + + /* For a given decl, returns the index of the decl name. */ + int get_index_by_decl (tree decl) const; + + /* For a given index, returns the string. */ + const char *get_name (int index) const; + +private: + string_table () {} + bool read (); + + typedef std::map<const char *, unsigned, string_compare> string_index_map; + string_vector vector_; + string_index_map map_; +}; + +/* Profile of a function instance: + 1. total_count of the function. + 2. head_count of the function (only valid when function is a top-level + function_instance, i.e. it is the original copy instead of the + inlined copy). + 3. map from source location (decl_lineno) of the inlined callsite to + profile (count_info). + 4. map from callsite to callee function_instance. */ +class function_instance { +public: + typedef std::vector<function_instance *> function_instance_stack; + + /* Read the profile and return a function_instance with head count as + HEAD_COUNT. Recursively read callsites to create nested function_instances + too. STACK is used to track the recursive creation process. */ + static function_instance *read_function_instance ( + function_instance_stack *stack, gcov_type head_count); + + /* Recursively deallocate all callsites (nested function_instances). */ + ~function_instance (); + + /* Accessors. */ + int name () const { return name_; } + gcov_type total_count () const { return total_count_; } + gcov_type head_count () const { return head_count_; } + + /* Recursively traverse STACK starting from LEVEL to find the corresponding + function_instance. */ + function_instance *get_function_instance (const inline_stack &stack, + unsigned level); + + /* Store the profile info for LOC in INFO. Return TRUE if profile info + is found. */ + bool get_count_info (location_t loc, count_info *info) const; + + /* Read the inlined indirect call target profile for STMT and store it in + MAP, return the total count for all inlined indirect calls. */ + gcov_type find_icall_target_map (gimple stmt, icall_target_map *map) const; + + /* Sum of counts that is used during annotation. */ + gcov_type total_annotated_count () const; + + /* Mark LOC as annotated. */ + void mark_annotated (location_t loc); + +private: + function_instance (unsigned name, gcov_type head_count) + : name_(name), total_count_(0), head_count_(head_count) {} + + /* Map from callsite decl_lineno (lineno in higher 16 bits, discriminator + in lower 16 bits) to callee function_instance. */ + typedef std::map<unsigned, function_instance *> callsite_map; + /* Map from source location (decl_lineno) to profile (count_info). */ + typedef std::map<unsigned, count_info> position_count_map; + + /* function_instance name index in the string_table. */ + unsigned name_; + + /* Total sample count. */ + gcov_type total_count_; + + /* Entry BB's sample count. */ + gcov_type head_count_; + + /* Map from callsite location to callee function_instance. */ + callsite_map callsites; + + /* Map from source location to count_info. */ + position_count_map pos_counts; +}; + +/* Profile for all functions. */ +class autofdo_source_profile { +public: + static autofdo_source_profile *create () + { + autofdo_source_profile *map = new autofdo_source_profile (); + if (map->read ()) + return map; + delete map; + return NULL; + } + + ~autofdo_source_profile (); + + /* For a given DECL, returns the top-level function_instance. */ + function_instance *get_function_instance_by_decl (tree decl) const; + + /* Find count_info for a given gimple STMT. If found, store the count_info + in INFO and return true; otherwise return false. */ + bool get_count_info (gimple stmt, count_info *info) const; + + /* Find total count of the callee of EDGE. */ + gcov_type get_callsite_total_count (struct cgraph_edge *edge) const; + + /* Update value profile INFO for STMT from the inlined indirect callsite. + Return true if INFO is updated. */ + bool update_inlined_ind_target (gimple stmt, count_info *info); + + /* Mark LOC as annotated. */ + void mark_annotated (location_t loc); + + /* Writes the profile annotation status for each function in an elf + section. */ + void write_annotated_count () const; + +private: + /* Map from function_instance name index (in string_table) to + function_instance. */ + typedef std::map<unsigned, function_instance *> + name_function_instance_map; + + autofdo_source_profile () {} + + /* Read AutoFDO profile and returns TRUE on success. */ + bool read (); + + /* Return the function_instance in the profile that correspond to the + inline STACK. */ + function_instance *get_function_instance_by_inline_stack ( + const inline_stack &stack) const; + + name_function_instance_map map_; +}; + +/* Module profile. */ +class autofdo_module_profile { +public: + static autofdo_module_profile *create () + { + autofdo_module_profile *map = new autofdo_module_profile (); + if (map->read ()) + return map; + delete map; + return NULL; + } + + /* For a given module NAME, returns this module's gcov_module_info. */ + gcov_module_info *get_module(const char *name) const + { + name_target_map::const_iterator iter = map_.find (name); + return iter == map_.end() ? NULL : iter->second.second; + } + + /* For a given module NAME, returns this module's aux-modules. */ + const string_vector *get_aux_modules(const char *name) const + { + name_target_map::const_iterator iter = map_.find (name); + return iter == map_.end() ? NULL : &iter->second.first; + } + +private: + autofdo_module_profile () {} + bool read (); + + typedef std::pair<string_vector, gcov_module_info *> AuxInfo; + typedef std::map<const char *, AuxInfo, string_compare> name_target_map; + /* Map from module name to (aux_modules, gcov_module_info). */ + name_target_map map_; +}; + + +/* Store the strings read from the profile data file. */ +static string_table *afdo_string_table; +/* Store the AutoFDO source profile. */ +static autofdo_source_profile *afdo_source_profile; + +/* Store the AutoFDO module profile. */ +static autofdo_module_profile *afdo_module_profile; + +/* gcov_ctr_summary structure to store the profile_info. */ +static struct gcov_ctr_summary *afdo_profile_info; + +/* Helper functions. */ + +/* Return the original name of NAME: strip the suffix that starts + with '.' */ + +static const char *get_original_name (const char *name) +{ + char *ret = xstrdup (name); + char *find = strchr (ret, '.'); + if (find != NULL) + *find = 0; + return ret; +} + +/* Return the combined location, which is a 32bit integer in which + higher 16 bits stores the line offset of LOC to the start lineno + of DECL, The lower 16 bits stores the discrimnator. */ + +static unsigned +get_combined_location (location_t loc, tree decl) +{ + return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16) + | get_discriminator_from_locus (loc); +} + +/* Return the function decl of a given lexical BLOCK. */ + +static tree +get_function_decl_from_block (tree block) +{ + tree decl; + + if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION)) + return NULL_TREE; + + for (decl = BLOCK_ABSTRACT_ORIGIN (block); + decl && (TREE_CODE (decl) == BLOCK); + decl = BLOCK_ABSTRACT_ORIGIN (decl)) + if (TREE_CODE (decl) == FUNCTION_DECL) + break; + return decl; +} + +/* Store inline stack for STMT in STACK. */ + +static void +get_inline_stack (location_t locus, inline_stack *stack) +{ + if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) + return; + + tree block = LOCATION_BLOCK (locus); + if (block && TREE_CODE (block) == BLOCK) + { + int level = 0; + for (block = BLOCK_SUPERCONTEXT (block); + block && (TREE_CODE (block) == BLOCK); + block = BLOCK_SUPERCONTEXT (block)) + { + location_t tmp_locus = BLOCK_SOURCE_LOCATION (block); + if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION) + continue; + + tree decl = get_function_decl_from_block (block); + stack->push_back (std::make_pair ( + decl, get_combined_location (locus, decl))); + locus = tmp_locus; + level++; + } + } + stack->push_back (std::make_pair ( + current_function_decl, + get_combined_location (locus, current_function_decl))); +} + +/* Return STMT's combined location, which is a 32bit integer in which + higher 16 bits stores the line offset of LOC to the start lineno + of DECL, The lower 16 bits stores the discrimnator. */ + +static unsigned +get_relative_location_for_stmt (gimple stmt) +{ + location_t locus = gimple_location (stmt); + if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) + return UNKNOWN_LOCATION; + + for (tree block = gimple_block (stmt); + block && (TREE_CODE (block) == BLOCK); + block = BLOCK_SUPERCONTEXT (block)) + if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION) + return get_combined_location ( + locus, get_function_decl_from_block (block)); + return get_combined_location (locus, current_function_decl); +} + +/* Return true if BB contains indirect call. */ + +static bool +has_indirect_call (basic_block bb) +{ + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_CALL + && (gimple_call_fn (stmt) == NULL + || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) + return true; + } + return false; +} + +/* Member functions for string_table. */ + +string_table * +string_table::create () +{ + string_table *map = new string_table(); + if (map->read ()) + return map; + delete map; + return NULL; +} + +int +string_table::get_index (const char *name) const +{ + if (name == NULL) + return -1; + string_index_map::const_iterator iter = map_.find (name); + if (iter == map_.end()) + return -1; + else + return iter->second; +} + +int +string_table::get_index_by_decl (tree decl) const +{ + const char *name = get_original_name ( + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); + int ret = get_index (name); + if (ret != -1) + return ret; + ret = get_index (lang_hooks.dwarf_name (decl, 0)); + if (ret != -1) + return ret; + if (DECL_ABSTRACT_ORIGIN (decl)) + return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl)); + else + return -1; +} + +const char * +string_table::get_name (int index) const +{ + gcc_assert (index > 0 && index < (int) vector_.size()); + return vector_[index]; +} + +bool +string_table::read () +{ + if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES) + return false; + /* Skip the length of the section. */ + gcov_read_unsigned (); + /* Read in the file name table. */ + unsigned string_num = gcov_read_unsigned (); + for (unsigned i = 0; i < string_num; i++) + { + vector_.push_back (get_original_name (gcov_read_string ())); + map_[vector_.back()] = i; + } + return true; +} + + +/* Member functions for function_instance. */ + +function_instance::~function_instance () +{ + for (callsite_map::iterator iter = callsites.begin(); + iter != callsites.end(); ++iter) + delete iter->second; +} + +/* Recursively traverse STACK starting from LEVEL to find the corresponding + function_instance. */ + +function_instance * +function_instance::get_function_instance ( + const inline_stack &stack, unsigned level) +{ + if (level == 0) + return this; + callsite_map::const_iterator ret = callsites.find (stack[level].second); + if (ret != callsites.end () && ret->second != NULL) + return ret->second->get_function_instance (stack, level - 1); + else + return NULL; +} + +/* Store the profile info for LOC in INFO. Return TRUE if profile info + is found. */ + +bool +function_instance::get_count_info (location_t loc, count_info *info) const +{ + position_count_map::const_iterator iter = pos_counts.find (loc); + if (iter == pos_counts.end ()) + return false; + *info = iter->second; + return true; +} + +/* Mark LOC as annotated. */ + +void +function_instance::mark_annotated (location_t loc) +{ + position_count_map::iterator iter = pos_counts.find (loc); + if (iter == pos_counts.end ()) + return; + iter->second.annotated = true; +} + +/* Read the inlinied indirect call target profile for STMT and store it in + MAP, return the total count for all inlined indirect calls. */ + +gcov_type +function_instance::find_icall_target_map ( + gimple stmt, icall_target_map *map) const +{ + gcov_type ret = 0; + unsigned stmt_offset = get_relative_location_for_stmt (stmt); + + for (callsite_map::const_iterator iter = callsites.begin(); + iter != callsites.end(); ++iter) + { + unsigned callee = iter->second->name(); + /* Check if callsite location match the stmt. */ + if (iter->first != stmt_offset) + continue; + struct cgraph_node *node = find_func_by_global_id ( + (unsigned long long) afdo_string_table->get_name (callee), true); + if (node == NULL) + continue; + if (!check_ic_target (stmt, node)) + continue; + (*map)[callee] = iter->second->total_count (); + ret += iter->second->total_count (); + } + return ret; +} + +/* Read the profile and create a function_instance with head count as + HEAD_COUNT. Recursively read callsites to create nested function_instances + too. STACK is used to track the recursive creation process. */ + +function_instance * +function_instance::read_function_instance ( + function_instance_stack *stack, gcov_type head_count) +{ + unsigned name = gcov_read_unsigned (); + unsigned num_pos_counts = gcov_read_unsigned (); + unsigned num_callsites = gcov_read_unsigned (); + function_instance *s = new function_instance (name, head_count); + stack->push_back(s); + + for (unsigned i = 0; i < num_pos_counts; i++) + { + unsigned offset = gcov_read_unsigned (); + unsigned num_targets = gcov_read_unsigned (); + gcov_type count = gcov_read_counter (); + s->pos_counts[offset].count = count; + for (unsigned j = 0; j < stack->size(); j++) + (*stack)[j]->total_count_ += count; + for (unsigned j = 0; j < num_targets; j++) + { + /* Only indirect call target histogram is supported now. */ + gcov_read_unsigned (); + gcov_type target_idx = gcov_read_counter (); + s->pos_counts[offset].targets[target_idx] = + gcov_read_counter (); + } + } + for (unsigned i = 0; i < num_callsites; i++) { + unsigned offset = gcov_read_unsigned (); + s->callsites[offset] = read_function_instance (stack, 0); + } + stack->pop_back(); + return s; +} + +/* Sum of counts that is used during annotation. */ + +gcov_type +function_instance::total_annotated_count () const +{ + gcov_type ret = 0; + for (callsite_map::const_iterator iter = callsites.begin(); + iter != callsites.end(); ++iter) + ret += iter->second->total_annotated_count (); + for (position_count_map::const_iterator iter = pos_counts.begin(); + iter != pos_counts.end(); ++iter) + if (iter->second.annotated) + ret += iter->second.count; + return ret; +} + +void +autofdo_source_profile::write_annotated_count () const +{ + /* We store the annotation info as a string in the format of: + + function_name:total_count:annotated_count + + Because different modules may output the annotation info for a same + function, we set the section as SECTION_MERGE so that we don't have + replicated info in the final binary. */ + switch_to_section (get_section ( + ".gnu.switches.text.annotation", + SECTION_DEBUG | SECTION_MERGE | SECTION_STRINGS | (SECTION_ENTSIZE & 1), + NULL)); + for (name_function_instance_map::const_iterator iter = map_.begin (); + iter != map_.end (); ++iter) + if (iter->second->total_count () > 0) + { + char buf[1024]; + snprintf (buf, 1024, + "%s:"HOST_WIDEST_INT_PRINT_DEC":"HOST_WIDEST_INT_PRINT_DEC, + afdo_string_table->get_name (iter->first), + iter->second->total_count (), + iter->second->total_annotated_count ()); + dw2_asm_output_nstring (buf, (size_t)-1, NULL); + } +} + + +/* Member functions for autofdo_source_profile. */ + +autofdo_source_profile::~autofdo_source_profile () +{ + for (name_function_instance_map::const_iterator iter = map_.begin (); + iter != map_.end (); ++iter) + delete iter->second; +} + +/* For a given DECL, returns the top-level function_instance. */ + +function_instance * +autofdo_source_profile::get_function_instance_by_decl (tree decl) const +{ + int index = afdo_string_table->get_index_by_decl (decl); + if (index == -1) + return NULL; + name_function_instance_map::const_iterator ret = map_.find (index); + return ret == map_.end() ? NULL : ret->second; +} + +/* Find count_info for a given gimple STMT. If found, store the count_info + in INFO and return true; otherwise return false. */ + +bool +autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const +{ + if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) + return false; + + inline_stack stack; + get_inline_stack (gimple_location (stmt), &stack); + if (stack.size () == 0) + return false; + const function_instance *s = get_function_instance_by_inline_stack (stack); + if (s == NULL) + return false; + return s->get_count_info (stack[0].second, info); +} + +void +autofdo_source_profile::mark_annotated (location_t loc) { + inline_stack stack; + get_inline_stack (loc, &stack); + if (stack.size () == 0) + return; + function_instance *s = get_function_instance_by_inline_stack (stack); + if (s == NULL) + return; + s->mark_annotated (stack[0].second); +} + +/* Update value profile INFO for STMT from the inlined indirect callsite. + Return true if INFO is updated. */ + +bool +autofdo_source_profile::update_inlined_ind_target ( + gimple stmt, count_info *info) +{ + if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) + return false; + + count_info old_info; + get_count_info (stmt, &old_info); + gcov_type total = 0; + for (icall_target_map::const_iterator iter = old_info.targets.begin(); + iter != old_info.targets.end(); ++iter) + total += iter->second; + + /* Program behavior changed, original promoted (and inlined) target is not + hot any more. Will avoid promote the original target. + + To check if original promoted target is still hot, we check the total + count of the unpromoted targets (stored in old_info). If it is no less + than half of the callsite count (stored in INFO), the original promoted + target is considered not hot any more. */ + if (total >= info->count * 0.5) + return false; + + inline_stack stack; + get_inline_stack (gimple_location (stmt), &stack); + if (stack.size () == 0) + return false; + const function_instance *s = get_function_instance_by_inline_stack (stack); + if (s == NULL) + return false; + icall_target_map map; + if (s->find_icall_target_map (stmt, &map) == 0) + return false; + for (icall_target_map::const_iterator iter = map.begin(); + iter != map.end(); ++iter) + info->targets[iter->first] = iter->second; + return true; +} + +/* Find total count of the callee of EDGE. */ + +gcov_type +autofdo_source_profile::get_callsite_total_count ( + struct cgraph_edge *edge) const +{ + inline_stack stack; + stack.push_back (std::make_pair(edge->callee->decl, 0)); + get_inline_stack (gimple_location (edge->call_stmt), &stack); + + const function_instance *s = get_function_instance_by_inline_stack (stack); + if (s == NULL) + return 0; + else + return s->total_count (); +} + +/* Read AutoFDO profile and returns TRUE on success. */ + +bool +autofdo_source_profile::read () +{ + if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION) + { + inform (0, "Not expected TAG."); + return false; + } + + /* Skip the length of the section. */ + gcov_read_unsigned (); + + /* Read in the function/callsite profile, and store it in local + data structure. */ + unsigned function_num = gcov_read_unsigned (); + for (unsigned i = 0; i < function_num; i++) + { + function_instance::function_instance_stack stack; + function_instance *s = function_instance::read_function_instance ( + &stack, gcov_read_counter ()); + afdo_profile_info->sum_all += s->total_count (); + map_[s->name ()] = s; + } + return true; +} + +/* Return the function_instance in the profile that correspond to the + inline STACK. */ + +function_instance * +autofdo_source_profile::get_function_instance_by_inline_stack ( + const inline_stack &stack) const +{ + name_function_instance_map::const_iterator iter = map_.find ( + afdo_string_table->get_index_by_decl ( + stack[stack.size() - 1].first)); + return iter == map_.end() + ? NULL + : iter->second->get_function_instance (stack, stack.size() - 1); +} + + +/* Member functions for autofdo_module_profile. */ + +bool +autofdo_module_profile::read () +{ + /* Read in the module info. */ + if (gcov_read_unsigned () != GCOV_TAG_AFDO_MODULE_GROUPING) + { + inform (0, "Not expected TAG."); + return false; + } + /* Skip the length of the section. */ + gcov_read_unsigned (); + + /* Read in the file name table. */ + unsigned total_module_num = gcov_read_unsigned (); + for (unsigned i = 0; i < total_module_num; i++) + { + char *name = xstrdup (gcov_read_string ()); + unsigned total_num = 0; + unsigned num_array[7]; + unsigned exported = gcov_read_unsigned (); + unsigned lang = gcov_read_unsigned (); + unsigned ggc_memory = gcov_read_unsigned (); + for (unsigned j = 0; j < 7; j++) + { + num_array[j] = gcov_read_unsigned (); + total_num += num_array[j]; + } + gcov_module_info *module = XCNEWVAR ( + gcov_module_info, + sizeof (gcov_module_info) + sizeof (char *) * total_num); + + std::pair<name_target_map::iterator, bool> ret = map_.insert( + name_target_map::value_type (name, AuxInfo())); + gcc_assert (ret.second); + ret.first->second.second = module; + module->ident = i + 1; + module->lang = lang; + module->ggc_memory = ggc_memory; + module->num_quote_paths = num_array[1]; + module->num_bracket_paths = num_array[2]; + module->num_system_paths = num_array[3]; + module->num_cpp_defines = num_array[4]; + module->num_cpp_includes = num_array[5]; + module->num_cl_args = num_array[6]; + module->source_filename = name; + module->is_primary = strcmp (name, in_fnames[0]) == 0; + module->flags = module->is_primary ? exported : 1; + for (unsigned j = 0; j < num_array[0]; j++) + ret.first->second.first.push_back (xstrdup (gcov_read_string ())); + for (unsigned j = 0; j < total_num - num_array[0]; j++) + module->string_array[j] = xstrdup (gcov_read_string ()); + } + return true; +} + +/* Read the profile from the profile file. */ + +static void +read_profile (void) +{ + if (gcov_open (auto_profile_file, 1) == 0) + error ("Cannot open profile file %s.", auto_profile_file); + + if (gcov_read_unsigned () != GCOV_DATA_MAGIC) + error ("AutoFDO profile magic number does not mathch."); + + /* Skip the version number. */ + gcov_read_unsigned (); + + /* Skip the empty integer. */ + gcov_read_unsigned (); + + /* string_table. */ + afdo_string_table = string_table::create (); + if (afdo_string_table == NULL) + error ("Cannot read string table from %s.", auto_profile_file); + + /* autofdo_source_profile. */ + afdo_source_profile = autofdo_source_profile::create (); + if (afdo_source_profile == NULL) + error ("Cannot read function profile from %s.", auto_profile_file); + + /* autofdo_module_profile. */ + afdo_module_profile = autofdo_module_profile::create (); + if (afdo_module_profile == NULL) + error ("Cannot read module profile from %s.", auto_profile_file); + + /* Read in the working set. */ + if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET) + error ("Cannot read working set from %s.", auto_profile_file); + + /* Skip the length of the section. */ + gcov_read_unsigned (); + gcov_working_set_t set[128]; + for (unsigned i = 0; i < 128; i++) + { + set[i].num_counters = gcov_read_unsigned (); + set[i].min_counter = gcov_read_counter (); + } + add_working_set (set); +} + +/* Read in the auxiliary modules for the current primary module. */ + +static void +read_aux_modules (void) +{ + gcov_module_info *module = afdo_module_profile->get_module (in_fnames[0]); + if (module == NULL) + return; + + const string_vector *aux_modules = + afdo_module_profile->get_aux_modules (in_fnames[0]); + unsigned num_aux_modules = aux_modules ? aux_modules->size() : 0; + + module_infos = XCNEWVEC (gcov_module_info *, num_aux_modules + 1); + module_infos[0] = module; + primary_module_id = module->ident; + if (aux_modules == NULL) + return; + unsigned curr_module = 1, max_group = PARAM_VALUE (PARAM_MAX_LIPO_GROUP); + for (string_vector::const_iterator iter = aux_modules->begin(); + iter != aux_modules->end(); ++iter) + { + gcov_module_info *aux_module = afdo_module_profile->get_module (*iter); + if (aux_module == module) + continue; + if (aux_module == NULL) + { + if (flag_opt_info) + inform (0, "aux module %s cannot be found.", *iter); + continue; + } + if ((aux_module->lang & GCOV_MODULE_LANG_MASK) != + (module->lang & GCOV_MODULE_LANG_MASK)) + { + if (flag_opt_info) + inform (0, "Not importing %s: source language" + " different from primary module's source language", *iter); + continue; + } + if ((aux_module->lang & GCOV_MODULE_ASM_STMTS) + && flag_ripa_disallow_asm_modules) + { + if (flag_opt_info) + inform (0, "Not importing %s: contains " + "assembler statements", *iter); + continue; + } + if (max_group != 0 && curr_module >= max_group) + { + if (flag_opt_info) + inform (0, "Not importing %s: maximum group size reached", *iter); + continue; + } + if (incompatible_cl_args (module, aux_module)) + { + if (flag_opt_info) + inform (0, "Not importing %s: command-line" + " arguments not compatible with primary module", *iter); + continue; + } + module_infos[curr_module++] = aux_module; + add_input_filename (*iter); + } +} + +/* From AutoFDO profiles, find values inside STMT for that we want to measure + histograms for indirect-call optimization. */ + +static void +afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map) +{ + gimple stmt = gsi_stmt (*gsi); + tree callee; + + if (map.size() == 0 || gimple_code (stmt) != GIMPLE_CALL + || gimple_call_fndecl (stmt) != NULL_TREE) + return; + + callee = gimple_call_fn (stmt); + + histogram_value hist = gimple_alloc_histogram_value ( + cfun, HIST_TYPE_INDIR_CALL_TOPN, stmt, callee); + hist->n_counters = (GCOV_ICALL_TOPN_VAL << 2) + 1; + hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); + gimple_add_histogram_value (cfun, stmt, hist); + + gcov_type total = 0; + icall_target_map::const_iterator max_iter1 = map.end(); + icall_target_map::const_iterator max_iter2 = map.end(); + + for (icall_target_map::const_iterator iter = map.begin(); + iter != map.end(); ++iter) + { + total += iter->second; + if (max_iter1 == map.end() || max_iter1->second < iter->second) + { + max_iter2 = max_iter1; + max_iter1 = iter; + } + else if (max_iter2 == map.end() || max_iter2->second < iter->second) + max_iter2 = iter; + } + + hist->hvalue.counters[0] = total; + hist->hvalue.counters[1] = (unsigned long long) + afdo_string_table->get_name (max_iter1->first); + hist->hvalue.counters[2] = max_iter1->second; + if (max_iter2 != map.end()) + { + hist->hvalue.counters[3] = (unsigned long long) + afdo_string_table->get_name (max_iter2->first); + hist->hvalue.counters[4] = max_iter2->second; + } + else + { + hist->hvalue.counters[3] = 0; + hist->hvalue.counters[4] = 0; + } +} + +/* From AutoFDO profiles, find values inside STMT for that we want to measure + histograms and adds them to list VALUES. */ + +static void +afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map) +{ + afdo_indirect_call (gsi, map); +} + +/* For a given BB, return its execution count. Add the location of annotated + stmt to ANNOTATED. Attach value profile if a stmt is not in PROMOTED, + because we only want to promot an indirect call once. */ + +static gcov_type +afdo_get_bb_count (basic_block bb, const stmt_set &promoted) +{ + gimple_stmt_iterator gsi; + edge e; + edge_iterator ei; + gcov_type max_count = 0; + bool has_annotated = false; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + count_info info; + gimple stmt = gsi_stmt (gsi); + if (stmt->code == GIMPLE_DEBUG) + continue; + if (afdo_source_profile->get_count_info (stmt, &info)) + { + if (info.annotated) + continue; + if (info.count > max_count) + max_count = info.count; + has_annotated = true; + if (info.targets.size() > 0 && promoted.find (stmt) == promoted.end ()) + afdo_vpt (&gsi, info.targets); + } + } + + if (!has_annotated) + return 0; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi))); + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple phi = gsi_stmt (gsi); + size_t i; + for (i = 0; i < gimple_phi_num_args (phi); i++) + afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i)); + } + FOR_EACH_EDGE (e, ei, bb->succs) + afdo_source_profile->mark_annotated (e->goto_locus); + + bb->flags |= BB_ANNOTATED; + return max_count; +} + +/* BB1 and BB2 are in an equivalent class iff: + 1. BB1 dominates BB2. + 2. BB2 post-dominates BB1. + 3. BB1 and BB2 are in the same loop nest. + This function finds the equivalent class for each basic block, and + stores a pointer to the first BB in its equivalent class. Meanwhile, + set bb counts for the same equivalent class to be idenical. */ + +static void +afdo_find_equiv_class (void) +{ + basic_block bb; + + FOR_ALL_BB_FN (bb, cfun) + bb->aux = NULL; + + FOR_ALL_BB_FN (bb, cfun) + { + vec<basic_block> dom_bbs; + basic_block bb1; + int i; + + if (bb->aux != NULL) + continue; + bb->aux = bb; + dom_bbs = get_all_dominated_blocks (CDI_DOMINATORS, bb); + FOR_EACH_VEC_ELT (dom_bbs, i, bb1) + if (bb1->aux == NULL + && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) + && bb1->loop_father == bb->loop_father) + { + bb1->aux = bb; + if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0) + { + bb->count = MAX (bb->count, bb1->count); + bb->flags |= BB_ANNOTATED; + } + } + dom_bbs = get_all_dominated_blocks (CDI_POST_DOMINATORS, bb); + FOR_EACH_VEC_ELT (dom_bbs, i, bb1) + if (bb1->aux == NULL + && dominated_by_p (CDI_DOMINATORS, bb, bb1) + && bb1->loop_father == bb->loop_father) + { + bb1->aux = bb; + if (bb1->count > bb->count && (bb1->flags & BB_ANNOTATED) != 0) + { + bb->count = MAX (bb->count, bb1->count); + bb->flags |= BB_ANNOTATED; + } + } + } +} + +/* If a basic block's count is known, and only one of its in/out edges' count + is unknown, its count can be calculated. + Meanwhile, if all of the in/out edges' counts are known, then the basic + block's unknown count can also be calculated. + IS_SUCC is true if out edges of a basic blocks are examined. + Return TRUE if any basic block/edge count is changed. */ + +static bool +afdo_propagate_edge (bool is_succ) +{ + basic_block bb; + bool changed = false; + + FOR_EACH_BB_FN (bb, cfun) + { + edge e, unknown_edge = NULL; + edge_iterator ei; + int num_unknown_edge = 0; + gcov_type total_known_count = 0; + + FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) + if ((e->flags & EDGE_ANNOTATED) == 0) + num_unknown_edge ++, unknown_edge = e; + else + total_known_count += e->count; + + if (num_unknown_edge == 0) + { + if (total_known_count > bb->count) + { + bb->count = total_known_count; + changed = true; + } + if ((bb->flags & BB_ANNOTATED) == 0) + { + bb->flags |= BB_ANNOTATED; + changed = true; + } + } + else if (num_unknown_edge == 1 + && (bb->flags & BB_ANNOTATED) != 0) + { + if (bb->count >= total_known_count) + unknown_edge->count = bb->count - total_known_count; + else + unknown_edge->count = 0; + unknown_edge->flags |= EDGE_ANNOTATED; + changed = true; + } + } + return changed; +} + +/* Special propagation for circuit expressions. Because GCC translates + control flow into data flow for circuit expressions. E.g. + BB1: + if (a && b) + BB2 + else + BB3 + + will be translated into: + + BB1: + if (a) + goto BB.t1 + else + goto BB.t3 + BB.t1: + if (b) + goto BB.t2 + else + goto BB.t3 + BB.t2: + goto BB.t3 + BB.t3: + tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2) + if (tmp) + goto BB2 + else + goto BB3 + + In this case, we need to propagate through PHI to determine the edge + count of BB1->BB.t1, BB.t1->BB.t2. */ + +static void +afdo_propagate_circuit (void) +{ + basic_block bb; + FOR_ALL_BB_FN (bb, cfun) + { + gimple phi_stmt; + tree cmp_rhs, cmp_lhs; + gimple cmp_stmt = last_stmt (bb); + edge e; + edge_iterator ei; + + if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) + continue; + cmp_rhs = gimple_cond_rhs (cmp_stmt); + cmp_lhs = gimple_cond_lhs (cmp_stmt); + if (!TREE_CONSTANT (cmp_rhs) + || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) + continue; + if (TREE_CODE (cmp_lhs) != SSA_NAME) + continue; + if ((bb->flags & BB_ANNOTATED) == 0) + continue; + phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs); + while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN + && gimple_assign_single_p (phi_stmt) + && TREE_CODE (gimple_assign_rhs1 (phi_stmt)) == SSA_NAME) + phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt)); + if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI) + continue; + FOR_EACH_EDGE (e, ei, bb->succs) + { + unsigned i, total = 0; + edge only_one; + bool check_value_one = (((integer_onep (cmp_rhs)) + ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) + ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); + if ((e->flags & EDGE_ANNOTATED) == 0) + continue; + for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) + { + tree val = gimple_phi_arg_def (phi_stmt, i); + edge ep = gimple_phi_arg_edge (phi_stmt, i); + + if (!TREE_CONSTANT (val) || !(integer_zerop (val) + || integer_onep (val))) + continue; + if (check_value_one ^ integer_onep (val)) + continue; + total++; + only_one = ep; + } + if (total == 1 && (only_one->flags & EDGE_ANNOTATED) == 0) + { + only_one->count = e->count; + only_one->flags |= EDGE_ANNOTATED; + } + } + } +} + +/* Propagate the basic block count and edge count on the control flow + graph. We do the propagation iteratively until stablize. */ + +static void +afdo_propagate (void) +{ + basic_block bb; + bool changed = true; + int i = 0; + + FOR_ALL_BB_FN (bb, cfun) + { + bb->count = ((basic_block) bb->aux)->count; + if ((((basic_block) bb->aux)->flags & BB_ANNOTATED) != 0) + bb->flags |= BB_ANNOTATED; + } + + while (changed && i++ < PARAM_VALUE (PARAM_AUTOFDO_MAX_PROPAGATE_ITERATIONS)) + { + changed = false; + + if (afdo_propagate_edge (true)) + changed = true; + if (afdo_propagate_edge (false)) + changed = true; + afdo_propagate_circuit (); + } +} + +/* All information parsed from a location_t that will be stored into the ELF + section. */ + +struct locus_information_t { + /* File name of the source file containing the branch. */ + const char *filename; + /* Line number of the branch location. */ + unsigned lineno; + /* Hash value calculated from function name, function length, branch site + offset and discriminator, used to uniquely identify a branch across + different source versions. */ + char hash[33]; +}; + +/* Return true iff file and lineno are available for the provided locus. + Fill all fields of li with information about locus. */ + +static bool +get_locus_information (location_t locus, locus_information_t* li) { + if (locus == UNKNOWN_LOCATION || !LOCATION_FILE (locus)) + return false; + li->filename = LOCATION_FILE (locus); + li->lineno = LOCATION_LINE (locus); + + inline_stack stack; + + get_inline_stack (locus, &stack); + if (stack.empty ()) + return false; + + tree function_decl = stack[0].first; + + if (!(function_decl && TREE_CODE (function_decl) == FUNCTION_DECL)) + return false; + + /* Get function_length, branch_offset and discriminator to identify branches + across different source versions. */ + unsigned function_lineno = + LOCATION_LINE (DECL_SOURCE_LOCATION (function_decl)); + function *f = DECL_STRUCT_FUNCTION (function_decl); + unsigned function_length = f? LOCATION_LINE (f->function_end_locus) - + function_lineno : 0; + unsigned branch_offset = li->lineno - function_lineno; + int discriminator = get_discriminator_from_locus (locus); + + const char *fn_name = fndecl_name (function_decl); + unsigned char md5_result[16]; + + md5_ctx ctx; + + md5_init_ctx (&ctx); + md5_process_bytes (fn_name, strlen (fn_name), &ctx); + md5_process_bytes (&function_length, sizeof (function_length), &ctx); + md5_process_bytes (&branch_offset, sizeof (branch_offset), &ctx); + md5_process_bytes (&discriminator, sizeof (discriminator), &ctx); + md5_finish_ctx (&ctx, md5_result); + + /* Convert MD5 to hexadecimal representation. */ + for (int i = 0; i < 16; ++i) + { + sprintf (li->hash + i*2, "%02x", md5_result[i]); + } + + return true; +} + +/* Record branch prediction comparison for the given edge and actual + probability. */ +static void +record_branch_prediction_results (edge e, int probability) { + basic_block bb = e->src; + + if (bb->succs->length () == 2 && + maybe_hot_count_p (cfun, bb->count) && + bb->count >= check_branch_annotation_threshold) + { + gimple_stmt_iterator gsi; + gimple last = NULL; + + for (gsi = gsi_last_nondebug_bb (bb); + !gsi_end_p (gsi); + gsi_prev_nondebug (&gsi)) + { + last = gsi_stmt (gsi); + + if (gimple_has_location (last)) + break; + } + + struct locus_information_t li; + bool annotated; + + if (e->flags & EDGE_PREDICTED_BY_EXPECT) + annotated = true; + else + annotated = false; + + if (get_locus_information (e->goto_locus, &li)) + ; /* Intentionally do nothing. */ + else if (get_locus_information (gimple_location (last), &li)) + ; /* Intentionally do nothing. */ + else + return; /* Can't get locus information, return. */ + + switch_to_section (get_section ( + ".gnu.switches.text.branch.annotation", + SECTION_DEBUG | SECTION_MERGE | + SECTION_STRINGS | (SECTION_ENTSIZE & 1), + NULL)); + char buf[1024]; + snprintf (buf, 1024, "%s;%u;" + HOST_WIDEST_INT_PRINT_DEC";%d;%d;%d;%s", + li.filename, li.lineno, bb->count, annotated?1:0, + probability, e->probability, li.hash); + dw2_asm_output_nstring (buf, (size_t)-1, NULL); + } +} + +/* Propagate counts on control flow graph and calculate branch + probabilities. */ + +static void +afdo_calculate_branch_prob (void) +{ + basic_block bb; + bool has_sample = false; + + FOR_EACH_BB_FN (bb, cfun) + if (bb->count > 0) + has_sample = true; + + if (!has_sample) + return; + + calculate_dominance_info (CDI_POST_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (0); + + afdo_find_equiv_class (); + afdo_propagate (); + + FOR_EACH_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + int num_unknown_succ = 0; + gcov_type total_count = 0; + + FOR_EACH_EDGE (e, ei, bb->succs) + { + if ((e->flags & EDGE_ANNOTATED) == 0) + num_unknown_succ ++; + else + total_count += e->count; + } + if (num_unknown_succ == 0 && total_count > 0) + { + bool first_edge = true; + + FOR_EACH_EDGE (e, ei, bb->succs) + { + double probability = + (double) e->count * REG_BR_PROB_BASE / total_count; + + if (first_edge && flag_check_branch_annotation) + { + record_branch_prediction_results ( + e, static_cast<int> (probability + 0.5)); + first_edge = false; + } + + e->probability = probability; + } + } + } + FOR_ALL_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->succs) + { + e->count = + (double) bb->count * e->probability / REG_BR_PROB_BASE; + if (flag_check_branch_annotation) + { + e->flags &= ~EDGE_PREDICTED_BY_EXPECT; + } + } + bb->aux = NULL; + } + + loop_optimizer_finalize (); + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); +} + +/* Perform value profile transformation using AutoFDO profile. Add the + promoted stmts to PROMOTED_STMTS. Return TRUE if there is any + indirect call promoted. */ + +static bool +afdo_vpt_for_early_inline (stmt_set *promoted_stmts) +{ + basic_block bb; + if (afdo_source_profile->get_function_instance_by_decl ( + current_function_decl) == NULL) + return false; + + bool has_vpt = false; + FOR_EACH_BB_FN (bb, cfun) + { + if (!has_indirect_call (bb)) + continue; + gimple_stmt_iterator gsi; + + gcov_type bb_count = 0; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + count_info info; + gimple stmt = gsi_stmt (gsi); + if (afdo_source_profile->get_count_info (stmt, &info)) + bb_count = MAX (bb_count, info.count); + } + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + /* IC_promotion and early_inline_2 is done in multiple iterations. + No need to promoted the stmt if its in promoted_stmts (means + it is already been promoted in the previous iterations). */ + if (gimple_code (stmt) != GIMPLE_CALL + || (gimple_call_fn (stmt) != NULL + && TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL) + || promoted_stmts->find (stmt) != promoted_stmts->end ()) + continue; + + count_info info; + afdo_source_profile->get_count_info (stmt, &info); + info.count = bb_count; + if (afdo_source_profile->update_inlined_ind_target (stmt, &info)) + { + /* Promote the indirect call and update the promoted_stmts. */ + promoted_stmts->insert (stmt); + afdo_vpt (&gsi, info.targets); + has_vpt = true; + } + } + } + if (has_vpt && gimple_value_profile_transformations ()) + { + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + calculate_dominance_info (CDI_POST_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + update_ssa (TODO_update_ssa); + rebuild_cgraph_edges (); + return true; + } + else + return false; +} + +/* Annotate auto profile to the control flow graph. Do not annotate value + profile for stmts in PROMOTED_STMTS. */ + +static void +afdo_annotate_cfg (const stmt_set &promoted_stmts) +{ + basic_block bb; + const function_instance *s = + afdo_source_profile->get_function_instance_by_decl ( + current_function_decl); + + if (s == NULL) + return; + cgraph_get_node (current_function_decl)->count = s->head_count (); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count (); + gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + + FOR_EACH_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + + bb->count = 0; + bb->flags &= (~BB_ANNOTATED); + FOR_EACH_EDGE (e, ei, bb->succs) + { + e->count = 0; + e->flags &= (~EDGE_ANNOTATED); + } + + bb->count = afdo_get_bb_count (bb, promoted_stmts); + if (bb->count > max_count) + max_count = bb->count; + } + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count > + ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) + { + ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count = + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->flags |= BB_ANNOTATED; + } + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count > + EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) + { + EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count = + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->flags |= BB_ANNOTATED; + } + afdo_source_profile->mark_annotated ( + DECL_SOURCE_LOCATION (current_function_decl)); + afdo_source_profile->mark_annotated (cfun->function_start_locus); + afdo_source_profile->mark_annotated (cfun->function_end_locus); + if (max_count > 0) + { + profile_status_for_fn (cfun) = PROFILE_READ; + afdo_calculate_branch_prob (); + counts_to_freqs (); + } + if (flag_value_profile_transformations) + gimple_value_profile_transformations (); +} + +/* Wrapper function to invoke early inliner. */ + +static void early_inline () +{ + compute_inline_parameters (cgraph_get_node (current_function_decl), true); + unsigned todo = early_inliner (); + if (todo & TODO_update_ssa_any) + update_ssa (TODO_update_ssa); +} + +/* Use AutoFDO profile to annoate the control flow graph. + Return the todo flag. */ + +static unsigned int +auto_profile (void) +{ + struct cgraph_node *node; + + if (cgraph_state == CGRAPH_STATE_FINISHED) + return 0; + + if (!flag_auto_profile) + return 0; + + profile_info = autofdo::afdo_profile_info; + if (L_IPO_COMP_MODE) + lipo_link_and_fixup (); + init_node_map (true); + + FOR_EACH_FUNCTION (node) + { + if (!gimple_has_body_p (node->decl)) + continue; + + /* Don't profile functions produced for builtin stuff. */ + if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) + continue; + + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + + /* First do indirect call promotion and early inline to make the + IR match the profiled binary before actual annotation. + + This is needed because an indirect call might have been promoted + and inlined in the profiled binary. If we do not promote and + inline these indirect calls before annotation, the profile for + these promoted functions will be lost. + + e.g. foo() --indirect_call--> bar() + In profiled binary, the callsite is promoted and inlined, making + the profile look like: + + foo: { + loc_foo_1: count_1 + bar@loc_foo_2: { + loc_bar_1: count_2 + loc_bar_2: count_3 + } + } + + Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. + If we perform annotation on it, the profile inside bar@loc_foo2 + will be wasted. + + To avoid this, we promote loc_foo_2 and inline the promoted bar + function before annotation, so the profile inside bar@loc_foo2 + will be useful. */ + autofdo::stmt_set promoted_stmts; + for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++) + { + if (!flag_value_profile_transformations + || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts)) + break; + early_inline (); + } + + early_inline (); + autofdo::afdo_annotate_cfg (promoted_stmts); + compute_function_frequency (); + update_ssa (TODO_update_ssa); + + /* Local pure-const may imply need to fixup the cfg. */ + if (execute_fixup_cfg () & TODO_cleanup_cfg) + cleanup_tree_cfg (); + + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + rebuild_cgraph_edges (); + pop_cfun (); + } + + if (flag_auto_profile_record_coverage_in_elf) + autofdo::afdo_source_profile->write_annotated_count (); + return TODO_rebuild_cgraph_edges; +} +} /* namespace autofdo. */ + +/* Read the profile from the profile data file. */ + +void +init_auto_profile (void) +{ + if (auto_profile_file == NULL) + auto_profile_file = DEFAULT_AUTO_PROFILE_FILE; + + autofdo::afdo_profile_info = (struct gcov_ctr_summary *) + xcalloc (1, sizeof (struct gcov_ctr_summary)); + autofdo::afdo_profile_info->runs = 1; + autofdo::afdo_profile_info->sum_max = 0; + autofdo::afdo_profile_info->sum_all = 0; + + /* Read the profile from the profile file. */ + autofdo::read_profile (); + + if (flag_dyn_ipa) + autofdo::read_aux_modules (); +} + +/* Free the resources. */ + +void +end_auto_profile (void) +{ + delete autofdo::afdo_source_profile; + delete autofdo::afdo_string_table; + delete autofdo::afdo_module_profile; + profile_info = NULL; +} + +/* Returns TRUE if EDGE is hot enough to be inlined early. */ + +bool +afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) +{ + gcov_type count = + autofdo::afdo_source_profile->get_callsite_total_count (edge); + if (count > 0) + { + bool is_hot; + const struct gcov_ctr_summary *saved_profile_info = profile_info; + /* At earling inline stage, profile_info is not set yet. We need to + temporarily set it to afdo_profile_info to calculate hotness. */ + profile_info = autofdo::afdo_profile_info; + is_hot = maybe_hot_count_p (NULL, count); + profile_info = saved_profile_info; + return is_hot; + } + else + return false; +} + +namespace { + +const pass_data pass_data_ipa_auto_profile = +{ + SIMPLE_IPA_PASS, + "afdo", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_IPA_AUTOFDO, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_ipa_auto_profile : public simple_ipa_opt_pass +{ +public: + pass_ipa_auto_profile(gcc::context *ctxt) + : simple_ipa_opt_pass(pass_data_ipa_auto_profile, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return flag_auto_profile; } + unsigned int execute () { return autofdo::auto_profile (); } + +}; // class pass_ipa_auto_profile + +} // anon namespace + +simple_ipa_opt_pass * +make_pass_ipa_auto_profile (gcc::context *ctxt) +{ + return new pass_ipa_auto_profile (ctxt); +} diff --git a/gcc-4.9/gcc/auto-profile.h b/gcc-4.9/gcc/auto-profile.h new file mode 100644 index 000000000..276614ad9 --- /dev/null +++ b/gcc-4.9/gcc/auto-profile.h @@ -0,0 +1,36 @@ +/* auto-profile.h - Defines data exported from auto-profile.c + Copyright (C) 2012. Free Software Foundation, Inc. + Contributed by Dehao Chen (dehao@google.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef AUTO_PROFILE_H +#define AUTO_PROFILE_H + +#include <vector> + +/* Read, process, finalize AutoFDO data structures. */ +extern void init_auto_profile (void); +extern void end_auto_profile (void); + +/* Returns TRUE if EDGE is hot enough to be inlined early. */ +extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *); + +/* Stores all possible call targets for NODE to RET. */ +extern void get_all_possible_call_targets (struct cgraph_node *, + std::vector<const char *> *); +#endif /* AUTO_PROFILE_H */ diff --git a/gcc-4.9/gcc/basic-block.h b/gcc-4.9/gcc/basic-block.h index 82729b4c8..9ebe4efb8 100644 --- a/gcc-4.9/gcc/basic-block.h +++ b/gcc-4.9/gcc/basic-block.h @@ -201,11 +201,6 @@ struct GTY((chain_next ("%h.next_bb"), chain_prev ("%h.prev_bb"))) basic_block_d /* Expected frequency. Normalized to be in range 0 to BB_FREQ_MAX. */ int frequency; - - /* The discriminator for this block. The discriminator distinguishes - among several basic blocks that share a common locus, allowing for - more accurate sample-based profiling. */ - int discriminator; }; /* This ensures that struct gimple_bb_info is smaller than @@ -718,6 +713,7 @@ extern struct edge_list *pre_edge_rev_lcm (int, sbitmap *, extern void compute_available (sbitmap *, sbitmap *, sbitmap *, sbitmap *); /* In predict.c */ +extern bool maybe_hot_count_p (struct function *, gcov_type); extern bool maybe_hot_bb_p (struct function *, const_basic_block); extern bool maybe_hot_edge_p (edge); extern bool probably_never_executed_bb_p (struct function *, const_basic_block); diff --git a/gcc-4.9/gcc/builtin-types.def b/gcc-4.9/gcc/builtin-types.def index fba9c7ddd..91513b36b 100644 --- a/gcc-4.9/gcc/builtin-types.def +++ b/gcc-4.9/gcc/builtin-types.def @@ -569,6 +569,10 @@ DEF_POINTER_TYPE (BT_PTR_FN_VOID_VAR, BT_FN_VOID_VAR) DEF_FUNCTION_TYPE_3 (BT_FN_PTR_PTR_FN_VOID_VAR_PTR_SIZE, BT_PTR, BT_PTR_FN_VOID_VAR, BT_PTR, BT_SIZE) +DEF_POINTER_TYPE (BT_PTR_FN_INT, BT_FN_INT) +DEF_FUNCTION_TYPE_VAR_3 (BT_FN_INT_PTR_FN_INT_PTR_PTR_VAR, + BT_INT, BT_PTR_FN_INT, BT_PTR, BT_PTR) + DEF_FUNCTION_TYPE_1 (BT_FN_I1_VPTR, BT_I1, BT_VOLATILE_PTR) DEF_FUNCTION_TYPE_1 (BT_FN_I2_VPTR, BT_I2, BT_VOLATILE_PTR) diff --git a/gcc-4.9/gcc/builtins.c b/gcc-4.9/gcc/builtins.c index dd57b1ae4..d6642d0d9 100644 --- a/gcc-4.9/gcc/builtins.c +++ b/gcc-4.9/gcc/builtins.c @@ -59,6 +59,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "ubsan.h" #include "cilk.h" +#include "input.h" static tree do_mpc_arg1 (tree, tree, int (*)(mpc_ptr, mpc_srcptr, mpc_rnd_t)); @@ -12064,13 +12065,16 @@ fold_builtin_next_arg (tree exp, bool va_start_p) tree fntype = TREE_TYPE (current_function_decl); int nargs = call_expr_nargs (exp); tree arg; + location_t loc = LOCATION_LOCUS (input_location); + if (has_discriminator (loc)) + loc = map_discriminator_location (loc); + /* There is good chance the current input_location points inside the definition of the va_start macro (perhaps on the token for builtin) in a system header, so warnings will not be emitted. Use the location in real source code. */ source_location current_location = - linemap_unwind_to_first_non_reserved_loc (line_table, input_location, - NULL); + linemap_unwind_to_first_non_reserved_loc (line_table, loc, NULL); if (!stdarg_p (fntype)) { diff --git a/gcc-4.9/gcc/c-family/ChangeLog b/gcc-4.9/gcc/c-family/ChangeLog index 4f277de68..1fa0dd088 100644 --- a/gcc-4.9/gcc/c-family/ChangeLog +++ b/gcc-4.9/gcc/c-family/ChangeLog @@ -1,3 +1,29 @@ +2014-06-30 Jakub Jelinek <jakub@redhat.com> + + Backported from mainline + 2014-06-10 Jakub Jelinek <jakub@redhat.com> + + PR fortran/60928 + * c-pragma.c (omp_pragmas_simd): Move PRAGMA_OMP_TASK... + (omp_pragmas): ... back here. + +2014-06-12 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/61486 + * c-omp.c (c_omp_split_clauses): Don't crash on firstprivate in + #pragma omp target teams or + #pragma omp {,target }teams distribute simd. + +2014-06-04 Marek Polacek <polacek@redhat.com> + + Backport from mainline + 2014-05-08 Marek Polacek <polacek@redhat.com> + + PR c/61053 + * c-common.c (min_align_of_type): New function factored out from... + (c_sizeof_or_alignof_type): ...here. + * c-common.h (min_align_of_type): Declare. + 2014-05-06 Richard Biener <rguenther@suse.de> * c-opts.c (c_common_post_options): For -freestanding, diff --git a/gcc-4.9/gcc/c-family/c-common.c b/gcc-4.9/gcc/c-family/c-common.c index f7f2bb3e3..65c25bf17 100644 --- a/gcc-4.9/gcc/c-family/c-common.c +++ b/gcc-4.9/gcc/c-family/c-common.c @@ -380,6 +380,13 @@ static tree handle_omp_declare_simd_attribute (tree *, tree, tree, int, static tree handle_omp_declare_target_attribute (tree *, tree, tree, int, bool *); +static tree handle_always_patch_for_instrumentation_attribute (tree *, tree, + tree, int, + bool *); +static tree handle_never_patch_for_instrumentation_attribute (tree *, tree, + tree, int, + bool *); + static void check_function_nonnull (tree, int, tree *); static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT); static bool nonnull_check_p (tree, unsigned HOST_WIDE_INT); @@ -758,6 +765,13 @@ const struct attribute_spec c_common_attribute_table[] = The name contains space to prevent its usage in source code. */ { "fn spec", 1, 1, false, true, true, handle_fnspec_attribute, false }, + { "always_patch_for_instrumentation", 0, 0, true, false, false, + handle_always_patch_for_instrumentation_attribute, + false }, + { "never_patch_for_instrumentation", 0, 0, true, false, false, + handle_never_patch_for_instrumentation_attribute, + false }, + { "warn_unused", 0, 0, false, false, false, handle_warn_unused_attribute, false }, { "returns_nonnull", 0, 0, false, true, true, @@ -4927,6 +4941,26 @@ c_common_get_alias_set (tree t) return -1; } +/* Return the least alignment required for type TYPE. */ + +unsigned int +min_align_of_type (tree type) +{ + unsigned int align = TYPE_ALIGN (type); + align = MIN (align, BIGGEST_ALIGNMENT); +#ifdef BIGGEST_FIELD_ALIGNMENT + align = MIN (align, BIGGEST_FIELD_ALIGNMENT); +#endif + unsigned int field_align = align; +#ifdef ADJUST_FIELD_ALIGN + tree field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, + type); + field_align = ADJUST_FIELD_ALIGN (field, field_align); +#endif + align = MIN (align, field_align); + return align / BITS_PER_UNIT; +} + /* Compute the value of 'sizeof (TYPE)' or '__alignof__ (TYPE)', where the IS_SIZEOF parameter indicates which operator is being applied. The COMPLAIN flag controls whether we should diagnose possibly @@ -5005,21 +5039,7 @@ c_sizeof_or_alignof_type (location_t loc, size_int (TYPE_PRECISION (char_type_node) / BITS_PER_UNIT)); else if (min_alignof) - { - unsigned int align = TYPE_ALIGN (type); - align = MIN (align, BIGGEST_ALIGNMENT); -#ifdef BIGGEST_FIELD_ALIGNMENT - align = MIN (align, BIGGEST_FIELD_ALIGNMENT); -#endif - unsigned int field_align = align; -#ifdef ADJUST_FIELD_ALIGN - tree field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, NULL_TREE, - type); - field_align = ADJUST_FIELD_ALIGN (field, field_align); -#endif - align = MIN (align, field_align); - value = size_int (align / BITS_PER_UNIT); - } + value = size_int (min_align_of_type (type)); else value = size_int (TYPE_ALIGN_UNIT (type)); } @@ -6867,6 +6887,7 @@ handle_unused_attribute (tree *node, tree name, tree ARG_UNUSED (args), if (TREE_CODE (decl) == PARM_DECL || TREE_CODE (decl) == VAR_DECL + || TREE_CODE (decl) == FIELD_DECL || TREE_CODE (decl) == FUNCTION_DECL || TREE_CODE (decl) == LABEL_DECL || TREE_CODE (decl) == TYPE_DECL) @@ -8680,6 +8701,47 @@ handle_nonnull_attribute (tree *node, tree ARG_UNUSED (name), return NULL_TREE; } +/* Handle a "always_patch_for_instrumentation" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +handle_always_patch_for_instrumentation_attribute (tree *node, tree name, + tree ARG_UNUSED (args), + int ARG_UNUSED (flags), + bool *no_add_attrs) +{ + if (TREE_CODE (*node) == FUNCTION_DECL) + { + /* Disable inlining if forced instrumentation. */ + DECL_UNINLINABLE (*node) = 1; + } + else + { + warning (OPT_Wattributes, "%qE attribute ignored", name); + *no_add_attrs = true; + } + return NULL_TREE; +} + + +/* Handle a "never_patch_for_instrumentation" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +handle_never_patch_for_instrumentation_attribute (tree *node, tree name, + tree ARG_UNUSED (args), + int ARG_UNUSED (flags), + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute ignored", name); + *no_add_attrs = true; + } + return NULL_TREE; +} + + /* Check the argument list of a function call for null in argument slots that are marked as requiring a non-null pointer argument. The NARGS arguments are passed in the array ARGARRAY. @@ -11727,6 +11789,31 @@ keyword_is_decl_specifier (enum rid keyword) } } +/* Check for and warn about self-assignment or self-initialization. + LHS and RHS are the tree nodes for the left-hand side and right-hand side + of the assignment or initialization we are checking. + LOCATION is the source location for RHS. */ + +void +check_for_self_assign (location_t location, tree lhs, tree rhs) +{ + if (lhs == NULL_TREE || rhs == NULL_TREE) + return; + + /* Deal with TREE_LIST initializers (may be generated by class + member initialization in C++). */ + if (TREE_CODE (rhs) == TREE_LIST) + rhs = TREE_VALUE (rhs); + + /* Only emit a warning if RHS is not a folded expression so that we don't + warn on something like x = x / 1. */ + if (!EXPR_FOLDED (rhs) + && operand_equal_p (lhs, rhs, + OEP_PURE_SAME | OEP_ALLOW_NULL | OEP_ALLOW_NO_TYPE)) + warning_at (location, OPT_Wself_assign, G_("%qE is assigned to itself"), + lhs); +} + /* Initialize language-specific-bits of tree_contains_struct. */ void diff --git a/gcc-4.9/gcc/c-family/c-common.h b/gcc-4.9/gcc/c-family/c-common.h index 24959d83e..fe798fa6d 100644 --- a/gcc-4.9/gcc/c-family/c-common.h +++ b/gcc-4.9/gcc/c-family/c-common.h @@ -758,6 +758,7 @@ extern tree c_wrap_maybe_const (tree, bool); extern tree c_save_expr (tree); extern tree c_common_truthvalue_conversion (location_t, tree); extern void c_apply_type_quals_to_decl (int, tree); +extern unsigned int min_align_of_type (tree); extern tree c_sizeof_or_alignof_type (location_t, tree, bool, bool, int); extern tree c_alignof_expr (location_t, tree); /* Print an error message for invalid operands to arith operation CODE. @@ -830,6 +831,7 @@ extern bool c_common_post_options (const char **); extern bool c_common_init (void); extern void c_common_finish (void); extern void c_common_parse_file (void); +extern FILE *get_dump_info (int, int *); extern alias_set_type c_common_get_alias_set (tree); extern void c_register_builtin_type (tree, const char*); extern bool c_promoting_integer_type_p (const_tree); @@ -1012,6 +1014,7 @@ extern void release_tree_vector (vec<tree, va_gc> *); extern vec<tree, va_gc> *make_tree_vector_single (tree); extern vec<tree, va_gc> *make_tree_vector_from_list (tree); extern vec<tree, va_gc> *make_tree_vector_copy (const vec<tree, va_gc> *); +extern void check_for_self_assign (location_t, tree, tree); /* In c-gimplify.c */ extern void c_genericize (tree); diff --git a/gcc-4.9/gcc/c-family/c-cppbuiltin.c b/gcc-4.9/gcc/c-family/c-cppbuiltin.c index 2f2e7bae8..6a697f666 100644 --- a/gcc-4.9/gcc/c-family/c-cppbuiltin.c +++ b/gcc-4.9/gcc/c-family/c-cppbuiltin.c @@ -969,6 +969,33 @@ c_cpp_builtins (cpp_reader *pfile) if (c_dialect_cxx () && TYPE_UNSIGNED (wchar_type_node)) cpp_define (pfile, "__WCHAR_UNSIGNED__"); + /* Tell source code if the compiler makes sync_compare_and_swap + builtins available. */ +#ifdef HAVE_sync_compare_and_swapqi + if (HAVE_sync_compare_and_swapqi) + cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); +#endif + +#ifdef HAVE_sync_compare_and_swaphi + if (HAVE_sync_compare_and_swaphi) + cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); +#endif + +#ifdef HAVE_sync_compare_and_swapsi + if (HAVE_sync_compare_and_swapsi) + cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); +#endif + +#ifdef HAVE_sync_compare_and_swapdi + if (HAVE_sync_compare_and_swapdi) + cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); +#endif + +#ifdef HAVE_sync_compare_and_swapti + if (HAVE_sync_compare_and_swapti) + cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16"); +#endif + cpp_atomic_builtins (pfile); #ifdef DWARF2_UNWIND_INFO @@ -1028,6 +1055,8 @@ c_cpp_builtins (cpp_reader *pfile) format. */ if (ENABLE_DECIMAL_FLOAT && ENABLE_DECIMAL_BID_FORMAT) cpp_define (pfile, "__DECIMAL_BID_FORMAT__"); + if (c_dialect_cxx () && flag_sized_delete) + cpp_define (pfile, "__GXX_DELETE_WITH_SIZE__"); } /* Pass an object-like macro. If it doesn't lie in the user's diff --git a/gcc-4.9/gcc/c-family/c-gimplify.c b/gcc-4.9/gcc/c-family/c-gimplify.c index 737be4d72..d385f3a40 100644 --- a/gcc-4.9/gcc/c-family/c-gimplify.c +++ b/gcc-4.9/gcc/c-family/c-gimplify.c @@ -80,7 +80,7 @@ c_genericize (tree fndecl) struct cgraph_node *cgn; /* Dump the C-specific tree IR. */ - dump_orig = dump_begin (TDI_original, &local_dump_flags); + dump_orig = get_dump_info (TDI_original, &local_dump_flags); if (dump_orig) { fprintf (dump_orig, "\n;; Function %s", @@ -97,8 +97,6 @@ c_genericize (tree fndecl) else print_c_tree (dump_orig, DECL_SAVED_TREE (fndecl)); fprintf (dump_orig, "\n"); - - dump_end (TDI_original, dump_orig); } /* Dump all nested functions now. */ diff --git a/gcc-4.9/gcc/c-family/c-omp.c b/gcc-4.9/gcc/c-family/c-omp.c index dd0a45d96..6a0e41988 100644 --- a/gcc-4.9/gcc/c-family/c-omp.c +++ b/gcc-4.9/gcc/c-family/c-omp.c @@ -789,8 +789,13 @@ c_omp_split_clauses (location_t loc, enum tree_code code, else if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_TEAMS)) != 0) { - /* This must be #pragma omp {,target }teams distribute. */ - gcc_assert (code == OMP_DISTRIBUTE); + /* This must be one of + #pragma omp {,target }teams distribute + #pragma omp target teams + #pragma omp {,target }teams distribute simd. */ + gcc_assert (code == OMP_DISTRIBUTE + || code == OMP_TEAMS + || code == OMP_SIMD); s = C_OMP_CLAUSE_SPLIT_TEAMS; } else if ((mask & (OMP_CLAUSE_MASK_1 diff --git a/gcc-4.9/gcc/c-family/c-opts.c b/gcc-4.9/gcc/c-family/c-opts.c index 29e9a355b..60d7145b7 100644 --- a/gcc-4.9/gcc/c-family/c-opts.c +++ b/gcc-4.9/gcc/c-family/c-opts.c @@ -43,6 +43,10 @@ along with GCC; see the file COPYING3. If not see TARGET_FLT_EVAL_METHOD_NON_DEFAULT and TARGET_OPTF. */ #include "tm_p.h" /* For C_COMMON_OVERRIDE_OPTIONS. */ +#include "function.h" +#include "params.h" +#include "l-ipo.h" +#include "dumpfile.h" #ifndef DOLLARS_IN_IDENTIFIERS # define DOLLARS_IN_IDENTIFIERS true @@ -102,6 +106,14 @@ static size_t deferred_count; /* Number of deferred options scanned for -include. */ static size_t include_cursor; +static bool parsing_done_p = false; + +/* Dump files/flags to use during parsing. */ +static FILE *original_dump_file = NULL; +static int original_dump_flags; +static FILE *class_dump_file = NULL; +static int class_dump_flags; + /* Whether any standard preincluded header has been preincluded. */ static bool done_preinclude; @@ -199,8 +211,10 @@ c_common_init_options_struct (struct gcc_options *opts) opts->x_warn_write_strings = c_dialect_cxx (); opts->x_flag_warn_unused_result = true; - /* By default, C99-like requirements for complex multiply and divide. */ - opts->x_flag_complex_method = 2; + /* By default, C99-like requirements for complex multiply and divide. + But for C++ this should not be required. */ + if (c_language != clk_cxx) + opts->x_flag_complex_method = 2; } /* Common initialization before calling option handlers. */ @@ -845,6 +859,10 @@ c_common_post_options (const char **pfilename) else if (!flag_gnu89_inline && !flag_isoc99) error ("-fno-gnu89-inline is only supported in GNU99 or C99 mode"); + if (flag_dyn_ipa && cpp_opts->preprocessed) + error ("-fpreprocessed/-save-temps are not supported with -fripa"); + + /* Default to ObjC sjlj exception handling if NeXT runtime. */ if (flag_objc_sjlj_exceptions < 0) flag_objc_sjlj_exceptions = flag_next_runtime; @@ -1051,6 +1069,34 @@ c_common_init (void) return true; } +/* Return TRUE if the lipo maximum memory consumption limit is reached, and + we should not import any further auxiliary modules. Check after parsing + each module, the Ith module being the just parsed module. */ +static bool +lipo_max_mem_reached (unsigned int i) +{ + if (L_IPO_COMP_MODE && PARAM_VALUE (PARAM_MAX_LIPO_MEMORY) + && i < (num_in_fnames - 1) + /* Scale up memory usage by 25% to account for memory consumption + by the optimizer. */ + && ((ggc_total_allocated () >> 10) * 1.25 + > (size_t) PARAM_VALUE (PARAM_MAX_LIPO_MEMORY))) { + if (dump_enabled_p ()) + { + i++; + do { + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, input_location, + "Not importing %s: maximum memory " + "consumption reached", in_fnames[i]); + i++; + } while (i < num_in_fnames); + } + return true; + } + return false; +} + + /* Initialize the integrated preprocessor after debug output has been initialized; loop over each input file. */ void @@ -1062,9 +1108,26 @@ c_common_parse_file (void) for (;;) { c_finish_options (); + /* Open the dump files to use for the original and class dump output + here, to be used during parsing for the current file. */ + original_dump_file = dump_begin (TDI_original, &original_dump_flags); + class_dump_file = dump_begin (TDI_class, &class_dump_flags); pch_init (); + set_lipo_c_parsing_context (parse_in, i, verbose); push_file_scope (); c_parse_file (); + if (i == 0 && flag_record_compilation_info_in_elf) + write_compilation_flags_to_asm (); + + if (i == 0) + ggc_total_memory = (ggc_total_allocated () >> 10); + + /* In lipo mode, processing too many auxiliary files will cause us + to hit memory limits, and cause thrashing -- prevent this by not + processing any further auxiliary modules if we reach a certain + memory limit. */ + if (!include_all_aux && lipo_max_mem_reached (i)) + num_in_fnames = i + 1; pop_file_scope (); /* And end the main input file, if the debug writer wants it */ if (debug_hooks->start_end_main_source_file) @@ -1073,13 +1136,50 @@ c_common_parse_file (void) break; cpp_undef_all (parse_in); cpp_clear_file_cache (parse_in); + deferred_count = 0; this_input_filename = cpp_read_main_file (parse_in, in_fnames[i]); + if (original_dump_file) + { + dump_end (TDI_original, original_dump_file); + original_dump_file = NULL; + } + if (class_dump_file) + { + dump_end (TDI_class, class_dump_file); + class_dump_file = NULL; + } /* If an input file is missing, abandon further compilation. cpplib has issued a diagnostic. */ if (!this_input_filename) break; } + parsing_done_p = true; +} + +/* Returns true if parsing is done */ + +bool +is_parsing_done_p (void) +{ + return parsing_done_p; +} + +/* Returns the appropriate dump file for PHASE to dump with FLAGS. */ +FILE * +get_dump_info (int phase, int *flags) +{ + gcc_assert (phase == TDI_original || phase == TDI_class); + if (phase == TDI_original) + { + *flags = original_dump_flags; + return original_dump_file; + } + else + { + *flags = class_dump_flags; + return class_dump_file; + } } /* Common finish hook for the C, ObjC and C++ front ends. */ @@ -1089,7 +1189,11 @@ c_common_finish (void) FILE *deps_stream = NULL; /* Don't write the deps file if there are errors. */ - if (cpp_opts->deps.style != DEPS_NONE && !seen_error ()) + /* FIXME. We are emitting the deps file even if there were errors. + This is a temporary workaround to avoid confusing Google's build + system. It assumes that deps files are always emitted even + in the presence of errors. */ + if (cpp_opts->deps.style != DEPS_NONE /*&& !seen_error ()*/) { /* If -M or -MM was seen without -MF, default output to the output stream. */ @@ -1313,9 +1417,15 @@ c_finish_options (void) struct deferred_opt *opt = &deferred_opts[i]; if (opt->code == OPT_D) - cpp_define (parse_in, opt->arg); + { + cpp_define (parse_in, opt->arg); + coverage_note_define (opt->arg, true); + } else if (opt->code == OPT_U) - cpp_undef (parse_in, opt->arg); + { + cpp_undef (parse_in, opt->arg); + coverage_note_define (opt->arg, false); + } else if (opt->code == OPT_A) { if (opt->arg[0] == '-') @@ -1338,6 +1448,7 @@ c_finish_options (void) if (opt->code == OPT_imacros && cpp_push_include (parse_in, opt->arg)) { + coverage_note_include (opt->arg); /* Disable push_command_line_include callback for now. */ include_cursor = deferred_count + 1; cpp_scan_nooutput (parse_in); @@ -1382,7 +1493,10 @@ push_command_line_include (void) if (!cpp_opts->preprocessed && opt->code == OPT_include && cpp_push_include (parse_in, opt->arg)) - return; + { + coverage_note_include (opt->arg); + return; + } } if (include_cursor == deferred_count) diff --git a/gcc-4.9/gcc/c-family/c-pragma.c b/gcc-4.9/gcc/c-family/c-pragma.c index 9e2a00eb1..ad115e998 100644 --- a/gcc-4.9/gcc/c-family/c-pragma.c +++ b/gcc-4.9/gcc/c-family/c-pragma.c @@ -1188,6 +1188,7 @@ static const struct omp_pragma_def omp_pragmas[] = { { "section", PRAGMA_OMP_SECTION }, { "sections", PRAGMA_OMP_SECTIONS }, { "single", PRAGMA_OMP_SINGLE }, + { "task", PRAGMA_OMP_TASK }, { "taskgroup", PRAGMA_OMP_TASKGROUP }, { "taskwait", PRAGMA_OMP_TASKWAIT }, { "taskyield", PRAGMA_OMP_TASKYIELD }, @@ -1200,7 +1201,6 @@ static const struct omp_pragma_def omp_pragmas_simd[] = { { "parallel", PRAGMA_OMP_PARALLEL }, { "simd", PRAGMA_OMP_SIMD }, { "target", PRAGMA_OMP_TARGET }, - { "task", PRAGMA_OMP_TASK }, { "teams", PRAGMA_OMP_TEAMS }, }; diff --git a/gcc-4.9/gcc/c-family/c.opt b/gcc-4.9/gcc/c-family/c.opt index 2abf66cb7..9e0a9a410 100644 --- a/gcc-4.9/gcc/c-family/c.opt +++ b/gcc-4.9/gcc/c-family/c.opt @@ -331,6 +331,10 @@ Wconversion-null C++ ObjC++ Var(warn_conversion_null) Init(1) Warning Warn for converting NULL from/to a non-pointer type +Wself-assign-non-pod +C++ ObjC++ Var(warn_self_assign_non_pod) Init(0) Warning +Warn when a variable of a non-POD type is assigned to itself + Wctor-dtor-privacy C++ ObjC++ Var(warn_ctor_dtor_privacy) Warning Warn when all constructors and destructors are private diff --git a/gcc-4.9/gcc/c/ChangeLog b/gcc-4.9/gcc/c/ChangeLog index 5f4a207a7..e31d4a8a3 100644 --- a/gcc-4.9/gcc/c/ChangeLog +++ b/gcc-4.9/gcc/c/ChangeLog @@ -1,3 +1,54 @@ +2014-06-30 Jakub Jelinek <jakub@redhat.com> + + Backported from mainline + 2014-06-25 Jakub Jelinek <jakub@redhat.com> + + * c-typeck.c (c_finish_omp_clauses): Make sure + OMP_CLAUSE_LINEAR_STEP has correct type. + +2014-06-30 Sebastian Huber <sebastian.huber@embedded-brains.de> + + * c-parser.c (c_parser_declaration_or_fndef): Discard all type + qualifiers in __auto_type for atomic types. + (c_parser_typeof_specifier): Discard all type qualifiers in + __typeof__ for atomic types. + +2014-06-30 Igor Zamyatin <igor.zamyatin@intel.com> + + PR middle-end/57541 + * c-array-notation.c (fix_builtin_array_notation_fn): + Check for 0 arguments in builtin call. Check that bultin argument is + correct. + * c-parser.c (c_parser_array_notation): Check for incorrect initial + index. + +2014-06-24 Jakub Jelinek <jakub@redhat.com> + + * c-parser.c (c_parser_omp_for_loop): For + #pragma omp parallel for simd move lastprivate clause from parallel + to for rather than simd. + +2014-06-04 Igor Zamyatin <igor.zamyatin@intel.com> + + PR c/58942 + * c-array-notation.c (fix_builtin_array_notation_fn): Handle the case + with a pointer. + +2014-06-04 Marek Polacek <polacek@redhat.com> + + Backport from mainline + 2014-05-08 Marek Polacek <polacek@redhat.com> + + PR c/61053 + * c-decl.c (grokdeclarator): Use min_align_of_type instead of + TYPE_ALIGN_UNIT. + +2014-05-26 Igor Zamyatin <igor.zamyatin@intel.com> + + PR c/61191 + * c-array-notation.c (fix_builtin_array_notation_fn): Check invalid + function parameters. + 2014-04-24 Jakub Jelinek <jakub@redhat.com> * c-parser.c (c_parser_omp_atomic): Allow seq_cst before diff --git a/gcc-4.9/gcc/c/c-array-notation.c b/gcc-4.9/gcc/c/c-array-notation.c index 0ac6ba8e1..2305e1e03 100644 --- a/gcc-4.9/gcc/c/c-array-notation.c +++ b/gcc-4.9/gcc/c/c-array-notation.c @@ -214,6 +214,13 @@ fix_builtin_array_notation_fn (tree an_builtin_fn, tree *new_var) if (an_type == BUILT_IN_NONE) return NULL_TREE; + /* Builtin call should contain at least one argument. */ + if (call_expr_nargs (an_builtin_fn) == 0) + { + error_at (EXPR_LOCATION (an_builtin_fn), "Invalid builtin arguments"); + return error_mark_node; + } + if (an_type == BUILT_IN_CILKPLUS_SEC_REDUCE || an_type == BUILT_IN_CILKPLUS_SEC_REDUCE_MUTATING) { @@ -229,6 +236,8 @@ fix_builtin_array_notation_fn (tree an_builtin_fn, tree *new_var) /* Fully fold any EXCESSIVE_PRECISION EXPR that can occur in the function parameter. */ func_parm = c_fully_fold (func_parm, false, NULL); + if (func_parm == error_mark_node) + return error_mark_node; location = EXPR_LOCATION (an_builtin_fn); @@ -236,7 +245,10 @@ fix_builtin_array_notation_fn (tree an_builtin_fn, tree *new_var) return error_mark_node; if (rank == 0) - return an_builtin_fn; + { + error_at (location, "Invalid builtin arguments"); + return error_mark_node; + } else if (rank > 1 && (an_type == BUILT_IN_CILKPLUS_SEC_REDUCE_MAX_IND || an_type == BUILT_IN_CILKPLUS_SEC_REDUCE_MIN_IND)) @@ -308,7 +320,9 @@ fix_builtin_array_notation_fn (tree an_builtin_fn, tree *new_var) || an_type == BUILT_IN_CILKPLUS_SEC_REDUCE_MIN_IND) array_ind_value = build_decl (location, VAR_DECL, NULL_TREE, TREE_TYPE (func_parm)); - array_op0 = (*array_operand)[0]; + array_op0 = (*array_operand)[0]; + if (TREE_CODE (array_op0) == INDIRECT_REF) + array_op0 = TREE_OPERAND (array_op0, 0); switch (an_type) { case BUILT_IN_CILKPLUS_SEC_REDUCE_ADD: diff --git a/gcc-4.9/gcc/c/c-decl.c b/gcc-4.9/gcc/c/c-decl.c index df84980e3..ac3819823 100644 --- a/gcc-4.9/gcc/c/c-decl.c +++ b/gcc-4.9/gcc/c/c-decl.c @@ -60,6 +60,7 @@ along with GCC; see the file COPYING3. If not see #include "hash-table.h" #include "langhooks-def.h" #include "pointer-set.h" +#include "l-ipo.h" #include "plugin.h" #include "c-family/c-ada-spec.h" #include "cilk.h" @@ -537,6 +538,27 @@ static tree grokdeclarator (const struct c_declarator *, bool *, enum deprecated_states); static tree grokparms (struct c_arg_info *, bool); static void layout_array_type (tree); +static void pop_ext_scope (void); + +/* LIPO support */ +/* The list of block nodes. A member node is created + when an external scope is popped. */ +static GTY (()) vec<tree, va_gc> *ext_blocks = NULL; +static inline void +apply_for_each_ext_block (void (*func) (tree)) +{ + if (L_IPO_COMP_MODE) + { + size_t i; + tree eb; + + for (i = 0; + ext_blocks->iterate (i, &eb); + ++i) + func (BLOCK_VARS (eb)); + } +} + /* T is a statement. Add it to the statement-tree. This is the C/ObjC version--C++ has a slightly different version of this @@ -686,6 +708,8 @@ bind (tree name, tree decl, struct c_scope *scope, bool invisible, b->shadowed = *here; *here = b; + + add_decl_to_current_module_scope (decl, scope); } /* Clear the binding structure B, stick it on the binding_freelist, @@ -1214,8 +1238,18 @@ pop_scope (void) binding in the home scope. */ if (!b->nested) { - DECL_CHAIN (p) = BLOCK_VARS (block); - BLOCK_VARS (block) = p; + /* In LIPO mode compilation, ext_scope is popped out + at end of each module to block name lookup across + modules. The ext_scope is used to keep the list of + global variables in that module scope. Other decls + are filtered out. */ + if (!L_IPO_COMP_MODE + || scope != external_scope + || TREE_CODE (p) == VAR_DECL) + { + DECL_CHAIN (p) = BLOCK_VARS (block); + BLOCK_VARS (block) = p; + } } else if (VAR_OR_FUNCTION_DECL_P (p) && scope != file_scope) { @@ -1316,6 +1350,11 @@ push_file_scope (void) push_scope (); file_scope = current_scope; + /* LIPO support -- do this before file scope bindings + are created for visible_builtins -- only need to remember + external scope bindings. */ + push_module_scope (); + start_fname_decls (); for (decl = visible_builtins; decl; decl = DECL_CHAIN (decl)) @@ -1350,7 +1389,18 @@ pop_file_scope (void) pop_scope (); file_scope = 0; - maybe_apply_pending_pragma_weaks (); + if (!L_IPO_COMP_MODE) + maybe_apply_pending_pragma_weaks (); + else + { + pop_ext_scope (); + gcc_assert (current_scope == 0 && external_scope == 0); + push_scope (); + external_scope = current_scope; + /* Prepare for parsing for the next module -- including + builtin re-binding. */ + pop_module_scope (); + } } /* Adjust the bindings for the start of a statement expression. */ @@ -2552,7 +2602,9 @@ warn_if_shadowing (tree new_decl) struct c_binding *b; /* Shadow warnings wanted? */ - if (!warn_shadow + if (!(warn_shadow + || warn_shadow_local + || warn_shadow_compatible_local) /* No shadow warnings for internally generated vars. */ || DECL_IS_BUILTIN (new_decl) /* No shadow warnings for vars made for inlining. */ @@ -2569,14 +2621,25 @@ warn_if_shadowing (tree new_decl) tree old_decl = b->decl; if (old_decl == error_mark_node) - { - warning (OPT_Wshadow, "declaration of %q+D shadows previous " - "non-variable", new_decl); - break; - } + warning (OPT_Wshadow, "declaration of %q+D shadows previous " + "non-variable", new_decl); else if (TREE_CODE (old_decl) == PARM_DECL) - warning (OPT_Wshadow, "declaration of %q+D shadows a parameter", - new_decl); + { + enum opt_code warning_code; + + /* If '-Wshadow-compatible-local' is specified without other + -Wshadow flags, we will warn only when the types of the + shadowing variable (i.e. new_decl) and the shadowed variable + (old_decl) are compatible. */ + if (comptypes (TREE_TYPE (old_decl), TREE_TYPE (new_decl))) + warning_code = OPT_Wshadow_compatible_local; + else + warning_code = OPT_Wshadow_local; + warning (warning_code, + "declaration of %q+D shadows a parameter", new_decl); + warning_at (DECL_SOURCE_LOCATION (old_decl), warning_code, + "shadowed declaration is here"); + } else if (DECL_FILE_SCOPE_P (old_decl)) { /* Do not warn if a variable shadows a function, unless @@ -2586,23 +2649,34 @@ warn_if_shadowing (tree new_decl) && !FUNCTION_POINTER_TYPE_P (TREE_TYPE (new_decl))) continue; - warning_at (DECL_SOURCE_LOCATION (new_decl), OPT_Wshadow, - "declaration of %qD shadows a global declaration", - new_decl); + warning (OPT_Wshadow, "declaration of %q+D shadows a global " + "declaration", new_decl); + warning_at (DECL_SOURCE_LOCATION (old_decl), OPT_Wshadow, + "shadowed declaration is here"); } else if (TREE_CODE (old_decl) == FUNCTION_DECL && DECL_BUILT_IN (old_decl)) - { warning (OPT_Wshadow, "declaration of %q+D shadows " "a built-in function", new_decl); - break; - } else - warning (OPT_Wshadow, "declaration of %q+D shadows a previous local", - new_decl); - - warning_at (DECL_SOURCE_LOCATION (old_decl), OPT_Wshadow, - "shadowed declaration is here"); + { + enum opt_code warning_code; + + /* If '-Wshadow-compatible-local' is specified without other + -Wshadow flags, we will warn only when the types of the + shadowing variable (i.e. new_decl) and the shadowed variable + (old_decl) are compatible. */ + if (comptypes (TREE_TYPE (old_decl), TREE_TYPE (new_decl))) + warning_code = OPT_Wshadow_compatible_local; + else + warning_code = OPT_Wshadow_local; + warning (warning_code, + "declaration of %q+D shadows a previous local", + new_decl); + + warning_at (DECL_SOURCE_LOCATION (old_decl), warning_code, + "shadowed declaration is here"); + } break; } @@ -4486,12 +4560,25 @@ finish_decl (tree decl, location_t init_loc, tree init, when a tentative file-scope definition is seen. But at end of compilation, do output code for them. */ DECL_DEFER_OUTPUT (decl) = 1; + + /* In LIPO mode, create varpool_node early + enough so that module id of the current source file being + parsed is captured. */ + if (flag_dyn_ipa && TREE_CODE (decl) == VAR_DECL) + varpool_node_for_decl (decl); + if (asmspec && C_DECL_REGISTER (decl)) DECL_HARD_REGISTER (decl) = 1; rest_of_decl_compilation (decl, true, 0); } else { + /* LIPO: capture module id. */ + if (flag_dyn_ipa + && TREE_CODE (decl) == VAR_DECL + && TREE_STATIC (decl)) + varpool_node_for_decl (decl); + /* In conjunction with an ASMSPEC, the `register' keyword indicates that we should place the variable in a particular register. */ @@ -5911,7 +5998,7 @@ grokdeclarator (const struct c_declarator *declarator, else if (declspecs->align_log != -1) { alignas_align = 1U << declspecs->align_log; - if (alignas_align < TYPE_ALIGN_UNIT (type)) + if (alignas_align < min_align_of_type (type)) { if (name) error_at (loc, "%<_Alignas%> specifiers cannot reduce " @@ -10391,7 +10478,12 @@ c_write_global_declarations (void) through wrapup_global_declarations and check_global_declarations. */ FOR_EACH_VEC_ELT (*all_translation_units, i, t) c_write_global_declarations_1 (BLOCK_VARS (DECL_INITIAL (t))); - c_write_global_declarations_1 (BLOCK_VARS (ext_block)); + if (ext_block) + c_write_global_declarations_1 (BLOCK_VARS (ext_block)); + apply_for_each_ext_block (c_write_global_declarations_1); + + if (L_IPO_COMP_MODE) + maybe_apply_pending_pragma_weaks (); timevar_stop (TV_PHASE_DEFERRED); timevar_start (TV_PHASE_OPT_GEN); @@ -10410,7 +10502,9 @@ c_write_global_declarations (void) timevar_push (TV_SYMOUT); FOR_EACH_VEC_ELT (*all_translation_units, i, t) c_write_global_declarations_2 (BLOCK_VARS (DECL_INITIAL (t))); - c_write_global_declarations_2 (BLOCK_VARS (ext_block)); + if (ext_block) + c_write_global_declarations_2 (BLOCK_VARS (ext_block)); + apply_for_each_ext_block (c_write_global_declarations_2); timevar_pop (TV_SYMOUT); } @@ -10418,6 +10512,236 @@ c_write_global_declarations (void) timevar_stop (TV_PHASE_DBGINFO); } + +/* LIPO support */ + +typedef struct GTY (()) c_sb +{ + tree decl; + tree id; + tree decl_copy_pre; /* copy at the start of file parsing. */ + tree decl_copy_post; /* copy at the end of module_scope. */ + int invisible; +} c_saved_builtin; + +static GTY (()) vec<c_saved_builtin, va_gc> *saved_builtins = NULL; + +/* Return the needed size of lang_decl structure for tree T. */ + +int +c_get_lang_decl_size (tree t) +{ + if (!DECL_LANG_SPECIFIC (t)) + return 0; + return sizeof (struct lang_decl); +} + +/* Return true if S is external or file scope. */ + +bool +c_is_global_scope (tree decl ATTRIBUTE_UNUSED, void *s) +{ + struct c_scope *scope = (struct c_scope *)s; + + if (scope == external_scope || scope == file_scope) + return true; + + return false; +} + +/* Add DECL to the list of builtins. */ + +void +c_add_built_in_decl (tree decl) +{ + c_saved_builtin *sb; + struct c_binding *b = NULL; + + if (!flag_dyn_ipa) + return; + + if (at_eof) + return; + + if (parser_parsing_start) + return; + + sb = vec_safe_push (saved_builtins, c_saved_builtin ()); + sb->decl = decl; + sb->decl_copy_pre = NULL; + sb->decl_copy_post = NULL; + sb->id = get_type_or_decl_name (decl); + + switch (TREE_CODE (decl)) + { + case TYPE_DECL: + case FUNCTION_DECL: + case CONST_DECL: + b = I_SYMBOL_BINDING (sb->id); + break; + case ENUMERAL_TYPE: + case UNION_TYPE: + case RECORD_TYPE: + b = I_TAG_BINDING (sb->id); + break; + default: + gcc_unreachable (); + } + + gcc_assert (b && b->decl == decl + && b->id == sb->id && b->depth == 0); + sb->invisible = b->invisible; +} + +/* Pop the external scope at the end of parsing of a file. */ + +static void +pop_ext_scope (void) +{ + tree ext_b; + if (!L_IPO_COMP_MODE) + return; + ext_b = pop_scope (); + vec_safe_push (ext_blocks, ext_b); + gcc_assert (!current_scope); + external_scope = 0; + + /* Now remove non var_decls from BLOCK_VARS -- + this is needed to avoid tree-chain contamination + from other modules due to builtin (shared) decls. */ + { + tree *p = &BLOCK_VARS (ext_b); + tree decl = BLOCK_VARS (ext_b); + for (; decl; decl = TREE_CHAIN (decl)) + { + if (TREE_CODE (decl) != VAR_DECL) + { + gcc_assert (0); + *p = TREE_CHAIN (decl); + } + else + p = &TREE_CHAIN (decl); + } + } +} + +/* Save a copy of SB->decl before file parsing start. */ + +static void +c_save_built_in_decl_pre_parsing_1 (c_saved_builtin *sb) +{ + tree decl = sb->decl; + + sb->decl_copy_pre = lipo_save_decl (decl); + sb->decl_copy_post = NULL; + return; +} + +/* Make copies of builtin decls before file parsing. */ + +void +c_save_built_in_decl_pre_parsing (void) +{ + size_t i; + c_saved_builtin *bi; + + for (i = 0; + saved_builtins->iterate (i, &bi); + ++i) + c_save_built_in_decl_pre_parsing_1 (bi); +} + +/* Restore builtins to their values before file parsing ( + the initial default value). */ + +void +c_restore_built_in_decl_pre_parsing (void) +{ + size_t i; + c_saved_builtin *bi; + + /* Now re-bind the builtins in the external scope. */ + gcc_assert (current_scope && current_scope == external_scope); + for (i = 0; + saved_builtins->iterate (i, &bi); + ++i) + { + tree id; + tree decl = bi->decl; + id = bi->id; + + lipo_restore_decl (decl, bi->decl_copy_pre); + if (id) + bind (id, decl, external_scope, + bi->invisible, false /*nested*/, + DECL_SOURCE_LOCATION (decl)); + } +} + +/* Save values of builtins after parsing of a file. */ + +void +c_save_built_in_decl_post_parsing (void) +{ + size_t i; + c_saved_builtin *bi; + + for (i = 0; + saved_builtins->iterate (i, &bi); + ++i) + { + /* Skip builtin decls in the predefined state. + The static flag for defined builtins are not set, so + do not check it. */ + if (DECL_ARTIFICIAL (bi->decl) + || TREE_CODE (bi->decl) != FUNCTION_DECL + || !DECL_STRUCT_FUNCTION (bi->decl)) + continue; + /* Remember the defining module. */ + cgraph_link_node (cgraph_get_create_node (bi->decl)); + if (!bi->decl_copy_post) + bi->decl_copy_post = lipo_save_decl (bi->decl); + } +} + +/* Restore builtins to their values (non-default) + after parsing finishes. */ + +void +c_restore_built_in_decl_post_parsing (void) +{ + c_saved_builtin *bi; + unsigned i; + for (i = 0; + saved_builtins->iterate (i, &bi); + ++i) + { + tree decl = bi->decl; + /* Now restore the decl's state */ + if (bi->decl_copy_post) + lipo_restore_decl (decl, bi->decl_copy_post); + } +} + +/* Return true if type T is compiler generated. */ + +bool +c_is_compiler_generated_type (tree t ATTRIBUTE_UNUSED) +{ + return false; +} + +/* Return 1 if lang specific attribute of T1 and T2 are + equivalent. */ + +int +c_cmp_lang_type (tree t1 ATTRIBUTE_UNUSED, + tree t2 ATTRIBUTE_UNUSED) +{ + return 1; +} + + /* Register reserved keyword WORD as qualifier for address space AS. */ void diff --git a/gcc-4.9/gcc/c/c-lang.c b/gcc-4.9/gcc/c/c-lang.c index 97c044362..2b096cf70 100644 --- a/gcc-4.9/gcc/c/c-lang.c +++ b/gcc-4.9/gcc/c/c-lang.c @@ -45,6 +45,27 @@ enum c_language_kind c_language = clk_c; #undef LANG_HOOKS_INIT_TS #define LANG_HOOKS_INIT_TS c_common_init_ts +/* LIPO support. */ +#undef LANG_HOOKS_ADD_BUILT_IN_DECL +#define LANG_HOOKS_ADD_BUILT_IN_DECL c_add_built_in_decl +#undef LANG_HOOKS_SAVE_BUILT_IN_PRE +#define LANG_HOOKS_SAVE_BUILT_IN_PRE c_save_built_in_decl_pre_parsing +#undef LANG_HOOKS_RESTORE_BUILT_IN_PRE +#define LANG_HOOKS_RESTORE_BUILT_IN_PRE c_restore_built_in_decl_pre_parsing +#undef LANG_HOOKS_SAVE_BUILT_IN_POST +#define LANG_HOOKS_SAVE_BUILT_IN_POST c_save_built_in_decl_post_parsing +#undef LANG_HOOKS_RESTORE_BUILT_IN_POST +#define LANG_HOOKS_RESTORE_BUILT_IN_POST c_restore_built_in_decl_post_parsing +#undef LANG_HOOKS_HAS_GLOBAL_NAME +#define LANG_HOOKS_HAS_GLOBAL_NAME c_is_global_scope +#undef LANG_HOOKS_GET_LANG_DECL_SIZE +#define LANG_HOOKS_GET_LANG_DECL_SIZE c_get_lang_decl_size +#undef LANG_HOOKS_IS_GENERATED_TYPE +#define LANG_HOOKS_IS_GENERATED_TYPE c_is_compiler_generated_type +#undef LANG_HOOKS_CMP_LANG_TYPE +#define LANG_HOOKS_CMP_LANG_TYPE c_cmp_lang_type + + /* Each front end provides its own lang hook initializer. */ struct lang_hooks lang_hooks = LANG_HOOKS_INITIALIZER; diff --git a/gcc-4.9/gcc/c/c-parser.c b/gcc-4.9/gcc/c/c-parser.c index 6eb235c5c..a51af2e30 100644 --- a/gcc-4.9/gcc/c/c-parser.c +++ b/gcc-4.9/gcc/c/c-parser.c @@ -1707,14 +1707,10 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, " initializer"); init = convert_lvalue_to_rvalue (init_loc, init, true, true); tree init_type = TREE_TYPE (init.value); - /* As with typeof, remove _Atomic and const - qualifiers from atomic types. */ + /* As with typeof, remove all qualifiers from atomic types. */ if (init_type != error_mark_node && TYPE_ATOMIC (init_type)) init_type - = c_build_qualified_type (init_type, - (TYPE_QUALS (init_type) - & ~(TYPE_QUAL_ATOMIC - | TYPE_QUAL_CONST))); + = c_build_qualified_type (init_type, TYPE_UNQUALIFIED); bool vm_type = variably_modified_type_p (init_type, NULL_TREE); if (vm_type) @@ -3011,16 +3007,11 @@ c_parser_typeof_specifier (c_parser *parser) if (was_vm) ret.expr = c_fully_fold (expr.value, false, &ret.expr_const_operands); pop_maybe_used (was_vm); - /* For use in macros such as those in <stdatomic.h>, remove - _Atomic and const qualifiers from atomic types. (Possibly - all qualifiers should be removed; const can be an issue for - more macros using typeof than just the <stdatomic.h> - ones.) */ + /* For use in macros such as those in <stdatomic.h>, remove all + qualifiers from atomic types. (const can be an issue for more macros + using typeof than just the <stdatomic.h> ones.) */ if (ret.spec != error_mark_node && TYPE_ATOMIC (ret.spec)) - ret.spec = c_build_qualified_type (ret.spec, - (TYPE_QUALS (ret.spec) - & ~(TYPE_QUAL_ATOMIC - | TYPE_QUAL_CONST))); + ret.spec = c_build_qualified_type (ret.spec, TYPE_UNQUALIFIED); } c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, "expected %<)%>"); return ret; @@ -11881,8 +11872,17 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code, tree l = build_omp_clause (OMP_CLAUSE_LOCATION (*c), OMP_CLAUSE_LASTPRIVATE); OMP_CLAUSE_DECL (l) = OMP_CLAUSE_DECL (*c); - OMP_CLAUSE_CHAIN (l) = clauses; - clauses = l; + if (code == OMP_SIMD) + { + OMP_CLAUSE_CHAIN (l) + = cclauses[C_OMP_CLAUSE_SPLIT_FOR]; + cclauses[C_OMP_CLAUSE_SPLIT_FOR] = l; + } + else + { + OMP_CLAUSE_CHAIN (l) = clauses; + clauses = l; + } OMP_CLAUSE_SET_CODE (*c, OMP_CLAUSE_SHARED); } } @@ -14064,7 +14064,7 @@ c_parser_array_notation (location_t loc, c_parser *parser, tree initial_index, tree value_tree = NULL_TREE, type = NULL_TREE, array_type = NULL_TREE; tree array_type_domain = NULL_TREE; - if (array_value == error_mark_node) + if (array_value == error_mark_node || initial_index == error_mark_node) { /* No need to continue. If either of these 2 were true, then an error must be emitted already. Thus, no need to emit them twice. */ diff --git a/gcc-4.9/gcc/c/c-tree.h b/gcc-4.9/gcc/c/c-tree.h index 85df8858d..50fa18074 100644 --- a/gcc-4.9/gcc/c/c-tree.h +++ b/gcc-4.9/gcc/c/c-tree.h @@ -677,4 +677,19 @@ extern tree c_check_omp_declare_reduction_r (tree *, int *, void *); extern void pedwarn_c90 (location_t, int opt, const char *, ...) ATTRIBUTE_GCC_DIAG(3,4); extern void pedwarn_c99 (location_t, int opt, const char *, ...) ATTRIBUTE_GCC_DIAG(3,4); +/* LIPO support. */ + +extern int c_get_lang_decl_size (tree t); +extern void c_lipo_dup_lang_type (tree src, tree dest); +extern void c_lipo_copy_lang_type (tree src, tree dest); +extern bool c_is_global_scope (tree decl, void *scope); +extern void c_clear_name_bindings (tree id); +extern void c_add_built_in_decl (tree decl); +extern void c_save_built_in_decl_pre_parsing (void); +extern void c_restore_built_in_decl_pre_parsing (void); +extern void c_save_built_in_decl_post_parsing (void); +extern void c_restore_built_in_decl_post_parsing (void); +extern bool c_is_compiler_generated_type (tree t); +extern int c_cmp_lang_type (tree t1, tree t2); + #endif /* ! GCC_C_TREE_H */ diff --git a/gcc-4.9/gcc/c/c-typeck.c b/gcc-4.9/gcc/c/c-typeck.c index 65aad4565..5838d6a72 100644 --- a/gcc-4.9/gcc/c/c-typeck.c +++ b/gcc-4.9/gcc/c/c-typeck.c @@ -11925,6 +11925,9 @@ c_finish_omp_clauses (tree clauses) s = size_one_node; OMP_CLAUSE_LINEAR_STEP (c) = s; } + else + OMP_CLAUSE_LINEAR_STEP (c) + = fold_convert (TREE_TYPE (t), OMP_CLAUSE_LINEAR_STEP (c)); goto check_dup_generic; check_dup_generic: diff --git a/gcc-4.9/gcc/cfg-flags.def b/gcc-4.9/gcc/cfg-flags.def index 8422c2b1c..a28ba34b5 100644 --- a/gcc-4.9/gcc/cfg-flags.def +++ b/gcc-4.9/gcc/cfg-flags.def @@ -93,6 +93,9 @@ DEF_BASIC_BLOCK_FLAG(VISITED, 13) demand, and is available after calling compute_transaction_bits(). */ DEF_BASIC_BLOCK_FLAG(IN_TRANSACTION, 14) +/* Set on blocks that has been annotated during AutoFDO profile + attribution. */ +DEF_BASIC_BLOCK_FLAG(ANNOTATED, 15) #endif #ifdef DEF_EDGE_FLAG @@ -177,6 +180,12 @@ DEF_EDGE_FLAG(TM_UNINSTRUMENTED, 15) /* Abort (over) edge out of a GIMPLE_TRANSACTION statement. */ DEF_EDGE_FLAG(TM_ABORT, 16) +/* Annotated during AutoFDO profile attribution. */ +DEF_EDGE_FLAG(ANNOTATED, 17) + +/* Edge probability predicted by __builtin_expect. */ +DEF_EDGE_FLAG(PREDICTED_BY_EXPECT, 18) + #endif /* diff --git a/gcc-4.9/gcc/cfg.c b/gcc-4.9/gcc/cfg.c index a281c0fb8..178c37794 100644 --- a/gcc-4.9/gcc/cfg.c +++ b/gcc-4.9/gcc/cfg.c @@ -941,7 +941,7 @@ scale_bbs_frequencies_int (basic_block *bbs, int nbbs, int num, int den) if (num > 1000000) return; - num = RDIV (1000 * num, den); + num = RDIV (1000.0 * num, den); den = 1000; } if (num > 100 * den) @@ -954,9 +954,9 @@ scale_bbs_frequencies_int (basic_block *bbs, int nbbs, int num, int den) /* Make sure the frequencies do not grow over BB_FREQ_MAX. */ if (bbs[i]->frequency > BB_FREQ_MAX) bbs[i]->frequency = BB_FREQ_MAX; - bbs[i]->count = RDIV (bbs[i]->count * num, den); + bbs[i]->count = RDIV ((double)bbs[i]->count * num, den); FOR_EACH_EDGE (e, ei, bbs[i]->succs) - e->count = RDIV (e->count * num, den); + e->count = RDIV ((double)e->count * num, den); } } @@ -973,7 +973,7 @@ scale_bbs_frequencies_gcov_type (basic_block *bbs, int nbbs, gcov_type num, { int i; edge e; - gcov_type fraction = RDIV (num * 65536, den); + gcov_type fraction = RDIV (num * 65536.0, den); gcc_assert (fraction >= 0); @@ -983,14 +983,14 @@ scale_bbs_frequencies_gcov_type (basic_block *bbs, int nbbs, gcov_type num, edge_iterator ei; bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); if (bbs[i]->count <= MAX_SAFE_MULTIPLIER) - bbs[i]->count = RDIV (bbs[i]->count * num, den); + bbs[i]->count = RDIV ((double)bbs[i]->count * num, den); else - bbs[i]->count = RDIV (bbs[i]->count * fraction, 65536); + bbs[i]->count = RDIV ((double)bbs[i]->count * fraction, 65536); FOR_EACH_EDGE (e, ei, bbs[i]->succs) if (bbs[i]->count <= MAX_SAFE_MULTIPLIER) - e->count = RDIV (e->count * num, den); + e->count = RDIV ((double)e->count * num, den); else - e->count = RDIV (e->count * fraction, 65536); + e->count = RDIV ((double)e->count * fraction, 65536); } else for (i = 0; i < nbbs; i++) @@ -1000,9 +1000,9 @@ scale_bbs_frequencies_gcov_type (basic_block *bbs, int nbbs, gcov_type num, bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); else bbs[i]->frequency = RDIV (bbs[i]->frequency * fraction, 65536); - bbs[i]->count = RDIV (bbs[i]->count * fraction, 65536); + bbs[i]->count = RDIV ((double)bbs[i]->count * fraction, 65536); FOR_EACH_EDGE (e, ei, bbs[i]->succs) - e->count = RDIV (e->count * fraction, 65536); + e->count = RDIV ((double)e->count * fraction, 65536); } } diff --git a/gcc-4.9/gcc/cfgexpand.c b/gcc-4.9/gcc/cfgexpand.c index 14511e137..04d39a3d7 100644 --- a/gcc-4.9/gcc/cfgexpand.c +++ b/gcc-4.9/gcc/cfgexpand.c @@ -74,6 +74,9 @@ along with GCC; see the file COPYING3. If not see #include "recog.h" #include "output.h" +/* In coverage.c */ +extern void coverage_has_asm_stmt (void); + /* Some systems use __main in a way incompatible with its use in gcc, in these cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to give the same symbol without quotes for an alternative entry point. You @@ -2874,6 +2877,7 @@ expand_asm_operands (tree string, tree outputs, tree inputs, emit_move_insn (real_output_rtx[i], output_rtx[i]); crtl->has_asm_statement = 1; + coverage_has_asm_stmt (); free_temp_slots (); } @@ -3681,6 +3685,7 @@ expand_debug_expr (tree exp) { case COND_EXPR: case DOT_PROD_EXPR: + case SAD_EXPR: case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: case FMA_EXPR: diff --git a/gcc-4.9/gcc/cfghooks.c b/gcc-4.9/gcc/cfghooks.c index bc1634aac..87da0d02a 100644 --- a/gcc-4.9/gcc/cfghooks.c +++ b/gcc-4.9/gcc/cfghooks.c @@ -500,7 +500,6 @@ split_block (basic_block bb, void *i) new_bb->count = bb->count; new_bb->frequency = bb->frequency; - new_bb->discriminator = bb->discriminator; if (dom_info_available_p (CDI_DOMINATORS)) { @@ -833,6 +832,9 @@ make_forwarder_block (basic_block bb, bool (*redirect_edge_p) (edge), fallthru = split_block_after_labels (bb); dummy = fallthru->src; + dummy->count = 0; + dummy->frequency = 0; + fallthru->count = 0; bb = fallthru->dest; /* Redirect back edges we want to keep. */ @@ -842,20 +844,13 @@ make_forwarder_block (basic_block bb, bool (*redirect_edge_p) (edge), if (redirect_edge_p (e)) { + dummy->frequency += EDGE_FREQUENCY (e); + dummy->count += e->count; + fallthru->count += e->count; ei_next (&ei); continue; } - dummy->frequency -= EDGE_FREQUENCY (e); - dummy->count -= e->count; - if (dummy->frequency < 0) - dummy->frequency = 0; - if (dummy->count < 0) - dummy->count = 0; - fallthru->count -= e->count; - if (fallthru->count < 0) - fallthru->count = 0; - e_src = e->src; jump = redirect_edge_and_branch_force (e, bb); if (jump != NULL) diff --git a/gcc-4.9/gcc/cfgloop.c b/gcc-4.9/gcc/cfgloop.c index 70744d83d..14693f1c9 100644 --- a/gcc-4.9/gcc/cfgloop.c +++ b/gcc-4.9/gcc/cfgloop.c @@ -1168,24 +1168,98 @@ get_loop_exit_edges (const struct loop *loop) return edges; } -/* Counts the number of conditional branches inside LOOP. */ +/* Determine if INSN is a floating point set. */ -unsigned -num_loop_branches (const struct loop *loop) +static bool +insn_has_fp_set(rtx insn) { - unsigned i, n; - basic_block * body; + int i; + rtx pat = PATTERN(insn); + if (GET_CODE (pat) == SET) + return (FLOAT_MODE_P (GET_MODE (SET_DEST (pat)))); + else if (GET_CODE (pat) == PARALLEL) + { + for (i = 0; i < XVECLEN (pat, 0); i++) + { + rtx sub = XVECEXP (pat, 0, i); + if (GET_CODE (sub) == SET) + return (FLOAT_MODE_P (GET_MODE (SET_DEST (sub)))); + } + } + return false; +} - gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)); +/* Analyzes the instructions inside LOOP, updating the DESC. Currently counts + the number of conditional branch instructions, calls and fp instructions, + as well as the average number of branches executed per iteration. */ + +void +analyze_loop_insns (const struct loop *loop, struct niter_desc *desc) +{ + unsigned i, nbranch; + gcov_type weighted_nbranch; + bool has_call, has_fp; + basic_block * body, bb; + rtx insn; + gcov_type header_count = loop->header->count; + + nbranch = weighted_nbranch = 0; + has_call = has_fp = false; body = get_loop_body (loop); - n = 0; for (i = 0; i < loop->num_nodes; i++) - if (EDGE_COUNT (body[i]->succs) >= 2) - n++; + { + bb = body[i]; + + if (EDGE_COUNT (bb->succs) >= 2) + { + nbranch++; + + /* If this block is executed less frequently than the header (loop + entry), then it is weighted based on its execution count, which + will be turned into a ratio compared to the loop header below. */ + if (bb->count < header_count) + weighted_nbranch += bb->count; + + /* When it is executed more frequently than the header (i.e. it is + in a nested inner loop), simply weight the branch the same as the + header execution count, so that it will contribute 1 branch to + the ratio computed below. */ + else + weighted_nbranch += header_count; + } + + /* No need to iterate through the instructions below if + both flags have already been set. */ + if (has_call && has_fp) + continue; + + FOR_BB_INSNS (bb, insn) + { + if (!INSN_P (insn)) + continue; + + if (!has_call) + has_call = CALL_P (insn); + + if (!has_fp) + has_fp = insn_has_fp_set (insn); + } + } free (body); - return n; + desc->num_branches = nbranch; + /* Now divide the weights computed above by the loop header execution count, + to compute the average number of branches through the loop. By adding + header_count/2 to the numerator we round to nearest with integer + division. */ + if (header_count != 0) + desc->av_num_branches + = (weighted_nbranch + header_count/2) / header_count; + else + desc->av_num_branches = 0; + desc->has_call = has_call; + desc->has_fp = has_fp; } /* Adds basic block BB to LOOP. */ diff --git a/gcc-4.9/gcc/cfgloop.h b/gcc-4.9/gcc/cfgloop.h index 4b7c3d398..c7e417bf2 100644 --- a/gcc-4.9/gcc/cfgloop.h +++ b/gcc-4.9/gcc/cfgloop.h @@ -278,7 +278,6 @@ extern basic_block *get_loop_body_in_custom_order (const struct loop *, extern vec<edge> get_loop_exit_edges (const struct loop *); extern edge single_exit (const struct loop *); extern edge single_likely_exit (struct loop *loop); -extern unsigned num_loop_branches (const struct loop *); extern edge loop_preheader_edge (const struct loop *); extern edge loop_latch_edge (const struct loop *); @@ -381,7 +380,8 @@ struct rtx_iv }; /* The description of an exit from the loop and of the number of iterations - till we take the exit. */ + till we take the exit. Also includes other information used primarily + by the loop unroller. */ struct GTY(()) niter_desc { @@ -419,6 +419,18 @@ struct GTY(()) niter_desc /* The number of iterations of the loop. */ rtx niter_expr; + + /* The number of branches in the loop. */ + unsigned num_branches; + + /* The number of executed branches per iteration. */ + unsigned av_num_branches; + + /* Whether the loop contains a call instruction. */ + bool has_call; + + /* Whether the loop contains fp instructions. */ + bool has_fp; }; extern void iv_analysis_loop_init (struct loop *); @@ -432,6 +444,7 @@ extern void iv_analysis_done (void); extern struct niter_desc *get_simple_loop_desc (struct loop *loop); extern void free_simple_loop_desc (struct loop *loop); +void analyze_loop_insns (const struct loop *, struct niter_desc *desc); static inline struct niter_desc * simple_loop_desc (struct loop *loop) diff --git a/gcc-4.9/gcc/cfgrtl.c b/gcc-4.9/gcc/cfgrtl.c index 60b0c069f..2c5ca2ac1 100644 --- a/gcc-4.9/gcc/cfgrtl.c +++ b/gcc-4.9/gcc/cfgrtl.c @@ -2480,7 +2480,9 @@ rtl_verify_edges (void) | EDGE_IRREDUCIBLE_LOOP | EDGE_LOOP_EXIT | EDGE_CROSSING - | EDGE_PRESERVE)) == 0) + | EDGE_PRESERVE + | EDGE_ANNOTATED + | EDGE_PREDICTED_BY_EXPECT)) == 0) n_branch++; if (e->flags & EDGE_ABNORMAL_CALL) diff --git a/gcc-4.9/gcc/cgraph.c b/gcc-4.9/gcc/cgraph.c index be3661af6..180a63656 100644 --- a/gcc-4.9/gcc/cgraph.c +++ b/gcc-4.9/gcc/cgraph.c @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see #include "rtl.h" #include "ipa-utils.h" #include "lto-streamer.h" +#include "l-ipo.h" #include "ipa-inline.h" #include "cfgloop.h" #include "gimple-pretty-print.h" @@ -773,7 +774,9 @@ cgraph_edge (struct cgraph_node *node, gimple call_stmt) { node->call_site_hash = htab_create_ggc (120, edge_hash, edge_eq, NULL); for (e2 = node->callees; e2; e2 = e2->next_callee) - cgraph_add_edge_to_call_site_hash (e2); + /* Skip fake edges. */ + if (e2->call_stmt) + cgraph_add_edge_to_call_site_hash (e2); for (e2 = node->indirect_calls; e2; e2 = e2->next_callee) cgraph_add_edge_to_call_site_hash (e2); } @@ -822,6 +825,8 @@ cgraph_set_call_stmt (struct cgraph_edge *e, gimple new_stmt, /* Constant propagation (and possibly also inlining?) can turn an indirect call into a direct one. */ struct cgraph_node *new_callee = cgraph_get_node (decl); + if (L_IPO_COMP_MODE && cgraph_pre_profiling_inlining_done) + new_callee = cgraph_lipo_get_resolved_node (decl); gcc_checking_assert (new_callee); e = cgraph_make_edge_direct (e, new_callee); @@ -1026,7 +1031,7 @@ cgraph_edge_remove_caller (struct cgraph_edge *e) else e->caller->callees = e->next_callee; } - if (e->caller->call_site_hash) + if (e->caller->call_site_hash && e->call_stmt) htab_remove_elt_with_hash (e->caller->call_site_hash, e->call_stmt, htab_hash_pointer (e->call_stmt)); @@ -1068,6 +1073,26 @@ cgraph_remove_edge (struct cgraph_edge *e) cgraph_free_edge (e); } +/* Remove fake cgraph edges for indirect calls. NODE is the callee + of the edges. */ + +void +cgraph_remove_fake_indirect_call_in_edges (struct cgraph_node *node) +{ + struct cgraph_edge *f, *e; + + if (!L_IPO_COMP_MODE) + return; + + for (e = node->callers; e; e = f) + { + f = e->next_caller; + if (!e->call_stmt) + cgraph_remove_edge (e); + } +} + + /* Set callee of call graph edge E and add it to the corresponding set of callers. */ @@ -1472,6 +1497,12 @@ cgraph_redirect_edge_call_stmt_to_callee (struct cgraph_edge *e) new_stmt = e->call_stmt; gimple_call_set_fndecl (new_stmt, e->callee->decl); update_stmt_fn (DECL_STRUCT_FUNCTION (e->caller->decl), new_stmt); + if (L_IPO_COMP_MODE) + { + int lp_nr = lookup_stmt_eh_lp (e->call_stmt); + if (lp_nr != 0 && !stmt_could_throw_p (e->call_stmt)) + remove_stmt_from_eh_lp (e->call_stmt); + } } /* If the call becomes noreturn, remove the lhs. */ @@ -1679,6 +1710,10 @@ cgraph_node_remove_callers (struct cgraph_node *node) void release_function_body (tree decl) { + if (cgraph_get_node (decl) + && cgraph_is_aux_decl_external (cgraph_get_node (decl))) + DECL_EXTERNAL (decl) = 1; + if (DECL_STRUCT_FUNCTION (decl)) { push_cfun (DECL_STRUCT_FUNCTION (decl)); @@ -1820,7 +1855,9 @@ cgraph_remove_node (struct cgraph_node *node) cgraph_release_function_body (node); } + cgraph_remove_link_node (node); node->decl = NULL; + if (node->call_site_hash) { htab_delete (node->call_site_hash); @@ -1837,6 +1874,7 @@ cgraph_remove_node (struct cgraph_node *node) free_nodes = node; } + /* Likewise indicate that a node is having address taken. */ void @@ -1978,6 +2016,9 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node) if (node->count) fprintf (f, " executed "HOST_WIDEST_INT_PRINT_DEC"x", (HOST_WIDEST_INT)node->count); + if (node->max_bb_count) + fprintf (f, " hottest bb executed "HOST_WIDEST_INT_PRINT_DEC"x", + (HOST_WIDEST_INT)node->max_bb_count); if (node->origin) fprintf (f, " nested in: %s", node->origin->asm_name ()); if (gimple_has_body_p (node->decl)) @@ -2566,11 +2607,16 @@ clone_of_p (struct cgraph_node *node, struct cgraph_node *node2) skipped_thunk = true; } - if (skipped_thunk - && (!node2->clone_of - || !node2->clone.args_to_skip - || !bitmap_bit_p (node2->clone.args_to_skip, 0))) - return false; + if (skipped_thunk) + { + if (!node2->clone.args_to_skip + || !bitmap_bit_p (node2->clone.args_to_skip, 0)) + return false; + if (node2->former_clone_of == node->decl) + return true; + else if (!node2->clone_of) + return false; + } while (node != node2 && node2) node2 = node2->clone_of; @@ -2599,6 +2645,7 @@ verify_edge_count_and_frequency (struct cgraph_edge *e) error_found = true; } if (gimple_has_body_p (e->caller->decl) + && e->call_stmt && !e->caller->global.inlined_to && !e->speculative /* FIXME: Inline-analysis sets frequency to 0 when edge is optimized out. @@ -2855,7 +2902,9 @@ verify_cgraph_node (struct cgraph_node *node) error ("Alias has non-alias reference"); error_found = true; } - else if (ref_found) + else if (ref_found + /* in LIPO mode, the alias can refer to the real target also */ + && !L_IPO_COMP_MODE) { error ("Alias has more than one alias reference"); error_found = true; @@ -2969,7 +3018,7 @@ verify_cgraph_node (struct cgraph_node *node) for (e = node->callees; e; e = e->next_callee) { - if (!e->aux) + if (!e->aux && e->call_stmt) { error ("edge %s->%s has no corresponding call_stmt", identifier_to_locale (e->caller->name ()), diff --git a/gcc-4.9/gcc/cgraph.h b/gcc-4.9/gcc/cgraph.h index 15310d888..ee32bcb6b 100644 --- a/gcc-4.9/gcc/cgraph.h +++ b/gcc-4.9/gcc/cgraph.h @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see #include "basic-block.h" #include "function.h" #include "ipa-ref.h" +#include "l-ipo.h" /* Symbol table consists of functions and variables. TODO: add labels and CONST_DECLs. */ @@ -222,6 +223,9 @@ struct GTY(()) cgraph_local_info { once compilation is finished. Available only with -funit-at-a-time. */ struct GTY(()) cgraph_global_info { + /* Estimated stack frame consumption by the function. */ + HOST_WIDE_INT estimated_stack_size; + /* For inline clones this points to the function they will be inlined into. */ struct cgraph_node *inlined_to; @@ -398,6 +402,8 @@ public: /* Expected number of executions: calculated in profile.c. */ gcov_type count; + /* Maximum count of any basic block in the function. */ + gcov_type max_bb_count; /* How to scale counts at materialization time; used to merge LTO units with different number of profile runs. */ int count_materialization_scale; @@ -416,6 +422,8 @@ public: /* Set once the function has been instantiated and its callee lists created. */ unsigned process : 1; + /* Is this function cloned during versioning ? */ + unsigned is_versioned_clone : 1; /* How commonly executed the node is. Initialized during branch probabilities pass. */ ENUM_BITFIELD (node_frequency) frequency : 2; @@ -637,6 +645,8 @@ typedef struct cgraph_edge *cgraph_edge_p; class GTY((tag ("SYMTAB_VARIABLE"))) varpool_node : public symtab_node { public: + /* The module in which it is first declared. */ + unsigned module_id; /* Set when variable is scheduled to be assembled. */ unsigned output : 1; @@ -721,8 +731,11 @@ enum symbol_partitioning_class /* In symtab.c */ +hashval_t decl_assembler_name_hash (const_tree); +bool decl_assembler_name_equal (tree decl, const_tree); void symtab_register_node (symtab_node *); void symtab_unregister_node (symtab_node *); +void symtab_remove_from_same_comdat_group (symtab_node *); void symtab_remove_node (symtab_node *); symtab_node *symtab_get_node (const_tree); symtab_node *symtab_node_for_asm (const_tree asmname); @@ -739,6 +752,8 @@ void verify_symtab_node (symtab_node *); bool verify_symtab_base (symtab_node *); bool symtab_used_from_object_file_p (symtab_node *); void symtab_make_decl_local (tree); +void unlink_from_assembler_name_hash (symtab_node *, bool); +void insert_to_assembler_name_hash (symtab_node *, bool); symtab_node *symtab_alias_ultimate_target (symtab_node *, enum availability *avail = NULL); bool symtab_resolve_alias (symtab_node *node, symtab_node *target); @@ -759,6 +774,11 @@ void dump_cgraph_node (FILE *, struct cgraph_node *); void debug_cgraph_node (struct cgraph_node *); void cgraph_remove_edge (struct cgraph_edge *); void cgraph_remove_node (struct cgraph_node *); +void cgraph_remove_fake_indirect_call_in_edges (struct cgraph_node *); +extern bool cgraph_pre_profiling_inlining_done; +extern bool cgraph_is_fake_indirect_call_edge (struct cgraph_edge *e); +void cgraph_add_to_same_comdat_group (struct cgraph_node *, struct cgraph_node *); +void cgraph_remove_node_and_inline_clones (struct cgraph_node *); void cgraph_release_function_body (struct cgraph_node *); void release_function_body (tree); void cgraph_node_remove_callees (struct cgraph_node *node); @@ -822,6 +842,50 @@ void verify_cgraph (void); void verify_cgraph_node (struct cgraph_node *); void cgraph_mark_address_taken_node (struct cgraph_node *); +/* Module info structure. */ +struct GTY (()) cgraph_mod_info +{ + unsigned module_id; +}; + +/* LIPO linker symbol table entry for function symbols. */ +struct GTY (()) cgraph_sym +{ + tree assembler_name; + struct cgraph_node *rep_node; + tree rep_decl; + htab_t GTY ((param_is (struct cgraph_mod_info))) def_module_hash; + bool is_promoted_static; +}; + +void cgraph_init_gid_map (void); +void cgraph_add_fake_indirect_call_edges (void); +void cgraph_remove_zero_count_fake_edges (void); +void cgraph_do_link (void); +struct cgraph_sym *cgraph_link_node (struct cgraph_node *); +tree cgraph_find_decl (tree asm_name); +void cgraph_remove_link_node (struct cgraph_node *node); +struct cgraph_node *cgraph_lipo_get_resolved_node (tree decl); +struct cgraph_node *cgraph_lipo_get_resolved_node_1 (tree decl, bool); +unsigned cgraph_get_module_id (tree fndecl); +bool cgraph_is_auxiliary (tree fndecl); +void cgraph_process_module_scope_statics (void); +bool cgraph_is_promoted_static_func (tree fndecl); +bool cgraph_is_inline_body_available_in_module (tree fndecl, unsigned module_id); +bool cgraph_is_aux_decl_external (struct cgraph_node *); +void cgraph_unify_type_alias_sets (void); +void varpool_do_link (void); +void varpool_link_node (struct varpool_node *); +void varpool_remove_link_node (struct varpool_node *node); +struct varpool_node *real_varpool_node (tree decl); +bool varpool_is_auxiliary (struct varpool_node *node); +void varpool_get_referenced_asm_ids (vec<tree, va_gc> **); +void varpool_clear_asm_id_reference_bit (void); +void varpool_reset_queue (void); +void varpool_remove_duplicate_weak_decls (void); + +bool cgraph_decide_is_function_needed (struct cgraph_node *, tree); + typedef void (*cgraph_edge_hook)(struct cgraph_edge *, void *); typedef void (*cgraph_node_hook)(struct cgraph_node *, void *); typedef void (*varpool_node_hook)(varpool_node *, void *); @@ -881,6 +945,7 @@ void fixup_same_cpp_alias_visibility (symtab_node *, symtab_node *target, tree); IN_SSA is true if the gimple is in SSA. */ basic_block init_lowered_empty_function (tree, bool); void cgraph_reset_node (struct cgraph_node *); +void cgraph_enqueue_node (struct cgraph_node *); bool expand_thunk (struct cgraph_node *, bool); /* In cgraphclones.c */ @@ -1466,10 +1531,16 @@ static inline bool cgraph_edge_recursive_p (struct cgraph_edge *e) { struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL); - if (e->caller->global.inlined_to) - return e->caller->global.inlined_to->decl == callee->decl; - else - return e->caller->decl == callee->decl; + struct cgraph_node *caller = e->caller; + if (caller->global.inlined_to) + caller = caller->global.inlined_to; + + if (L_IPO_COMP_MODE && cgraph_pre_profiling_inlining_done) + { + callee = cgraph_lipo_get_resolved_node (callee->decl); + caller = cgraph_lipo_get_resolved_node (caller->decl); + } + return caller->decl == callee->decl; } /* Return true if the TM_CLONE bit is set for a given FNDECL. */ diff --git a/gcc-4.9/gcc/cgraphbuild.c b/gcc-4.9/gcc/cgraphbuild.c index 19961e27d..c6b04be5a 100644 --- a/gcc-4.9/gcc/cgraphbuild.c +++ b/gcc-4.9/gcc/cgraphbuild.c @@ -23,9 +23,12 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "tm.h" #include "tree.h" +#include "tree-eh.h" #include "pointer-set.h" #include "basic-block.h" #include "tree-ssa-alias.h" +#include "tree-ssa-operands.h" +#include "tree-into-ssa.h" #include "internal-fn.h" #include "gimple-fold.h" #include "gimple-expr.h" @@ -33,11 +36,16 @@ along with GCC; see the file COPYING3. If not see #include "gimple.h" #include "gimple-iterator.h" #include "gimple-walk.h" +#include "gimple-ssa.h" #include "langhooks.h" #include "intl.h" +#include "toplev.h" +#include "gcov-io.h" +#include "coverage.h" #include "tree-pass.h" #include "ipa-utils.h" #include "except.h" +#include "l-ipo.h" #include "ipa-inline.h" /* Context of record_reference. */ @@ -221,6 +229,158 @@ compute_call_stmt_bb_frequency (tree decl, basic_block bb) return freq; } + +bool cgraph_pre_profiling_inlining_done = false; + +/* Return true if E is a fake indirect call edge. */ + +bool +cgraph_is_fake_indirect_call_edge (struct cgraph_edge *e) +{ + return !e->call_stmt; +} + + +/* Add fake cgraph edges from NODE to its indirect call callees + using profile data. */ + +static void +add_fake_indirect_call_edges (struct cgraph_node *node) +{ + unsigned n_counts, i; + gcov_type *ic_counts; + + /* Enable this only for LIPO for now. */ + if (!L_IPO_COMP_MODE) + return; + + ic_counts + = get_coverage_counts_no_warn (DECL_STRUCT_FUNCTION (node->decl), + GCOV_COUNTER_ICALL_TOPNV, &n_counts); + + if (!ic_counts) + return; + + gcc_assert ((n_counts % GCOV_ICALL_TOPN_NCOUNTS) == 0); + +/* After the early_inline_1 before value profile transformation, + functions that are indirect call targets may have their bodies + removed (extern inline functions or functions from aux modules, + functions in comdat etc) if all direct callsites are inlined. This + will lead to missing inline opportunities after profile based + indirect call promotion. The solution is to add fake edges to + indirect call targets. Note that such edges are not associated + with actual indirect call sites because it is not possible to + reliably match pre-early-inline indirect callsites with indirect + call profile counters which are from post-early inline function body. */ + + for (i = 0; i < n_counts; + i += GCOV_ICALL_TOPN_NCOUNTS, ic_counts += GCOV_ICALL_TOPN_NCOUNTS) + { + gcov_type val1, val2, count1, count2; + struct cgraph_node *direct_call1 = 0, *direct_call2 = 0; + + val1 = ic_counts[1]; + count1 = ic_counts[2]; + val2 = ic_counts[3]; + count2 = ic_counts[4]; + + if (val1 == 0 || count1 == 0) + continue; + + direct_call1 = find_func_by_global_id (val1, false); + if (direct_call1) + { + tree decl = direct_call1->decl; + cgraph_create_edge (node, + cgraph_get_create_node (decl), + NULL, + count1, 0); + } + + if (val2 == 0 || count2 == 0) + continue; + direct_call2 = find_func_by_global_id (val2, false); + if (direct_call2) + { + tree decl = direct_call2->decl; + cgraph_create_edge (node, + cgraph_get_create_node (decl), + NULL, + count2, 0); + } + } +} + + +/* This can be implemented as an IPA pass that must be first one + before any unreachable node elimination. */ + +void +cgraph_add_fake_indirect_call_edges (void) +{ + struct cgraph_node *node; + + /* Enable this only for LIPO for now. */ + if (!L_IPO_COMP_MODE) + return; + + FOR_EACH_DEFINED_FUNCTION (node) + { + if (!gimple_has_body_p (node->decl)) + continue; + add_fake_indirect_call_edges (node); + } +} + +/* Remove zero count fake edges added for the purpose of ensuring + the right processing order. This should be called after all + small ipa passes. */ +void +cgraph_remove_zero_count_fake_edges (void) +{ + struct cgraph_node *node; + + /* Enable this only for LIPO for now. */ + if (!L_IPO_COMP_MODE) + return; + + FOR_EACH_DEFINED_FUNCTION (node) + { + if (!gimple_has_body_p (node->decl)) + continue; + + struct cgraph_edge *e, *f; + for (e = node->callees; e; e = f) + { + f = e->next_callee; + if (!e->call_stmt && !e->count && !e->frequency) + cgraph_remove_edge (e); + } + } +} + +static void +record_reference_to_real_target_from_alias (struct cgraph_node *alias) +{ + if (!L_IPO_COMP_MODE || !cgraph_pre_profiling_inlining_done) + return; + + /* Need to add a reference to the resolved node in LIPO + mode to avoid the real node from eliminated */ + if (alias->alias && alias->analyzed) + { + struct cgraph_node *target, *real_target; + + target = dyn_cast<cgraph_node> (symtab_alias_target (alias)); + real_target = cgraph_lipo_get_resolved_node (target->decl); + /* TODO: this make create duplicate entries in the reference list. */ + if (real_target != target) + ipa_record_reference (alias, real_target, + IPA_REF_ALIAS, NULL); + } +} + /* Mark address taken in STMT. */ static bool @@ -229,11 +389,55 @@ mark_address (gimple stmt, tree addr, tree, void *data) addr = get_base_address (addr); if (TREE_CODE (addr) == FUNCTION_DECL) { + /* Before possibly creating a new node in cgraph_get_create_node, + save the current cgraph node for addr. */ + struct cgraph_node *first_clone = cgraph_get_node (addr); struct cgraph_node *node = cgraph_get_create_node (addr); + /* In LIPO mode we use the resolved node. However, there is + a possibility that it may not exist at this point. This + can happen in cases of ipa-cp, where this is a reference + that will eventually go away during inline_transform when we + invoke cgraph_redirect_edge_call_stmt_to_callee to rewrite + the call_stmt and skip some arguments. It is possible + that earlier during inline_call the references to the original + non-cloned resolved node were all eliminated, and it was removed. + However, virtual clones may stick around until inline_transform, + due to references in other virtual clones, at which point they + will all be removed. In between inline_call and inline_transform, + however, we will materialize clones which would rebuild references + and end up here upon still seeing the reference on the call. + Handle this by skipping the resolved node lookup when the first + clone was marked global.inlined_to (i.e. it is a virtual clone, + the original is gone). + + For example, when this is called after ipa inlining for a call stmt + in an ipa cp clone, the call will still look like: + foo.isra.3 (pow, ...); + while the caller node actually has foo.isra.3.constprop in its + callee list. And the original, resolved node for pow would have + been eliminated during ipa inlining/virtual cloning if this was + the only reference leading to a call. + + Later, during inline_transform, this call statement will be rewritted + in cgraph_redirect_edge_call_stmt_to_callee to: + foo.isra.3.constprop (...); // pow argument removed + */ + if (L_IPO_COMP_MODE && cgraph_pre_profiling_inlining_done + && first_clone && !first_clone->global.inlined_to) + { + /* We now fix up address references to function decls after the LIPO + link, so any existing node that isn't an inline clone should be + the resolved node. */ + struct cgraph_node *resolved = cgraph_lipo_get_resolved_node (addr); + gcc_assert (resolved == first_clone); + gcc_assert (resolved == node); + } + cgraph_mark_address_taken_node (node); ipa_record_reference ((symtab_node *)data, node, IPA_REF_ADDR, stmt); + record_reference_to_real_target_from_alias (node); } else if (addr && TREE_CODE (addr) == VAR_DECL && (TREE_STATIC (addr) || DECL_EXTERNAL (addr))) @@ -243,6 +447,13 @@ mark_address (gimple stmt, tree addr, tree, void *data) ipa_record_reference ((symtab_node *)data, vnode, IPA_REF_ADDR, stmt); + if (L_IPO_COMP_MODE && cgraph_pre_profiling_inlining_done) + { + struct varpool_node *rvnode = real_varpool_node (addr); + if (rvnode != vnode) + ipa_record_reference ((symtab_node *)data, rvnode, + IPA_REF_ADDR, stmt); + } } return false; @@ -272,6 +483,15 @@ mark_load (gimple stmt, tree t, tree, void *data) ipa_record_reference ((symtab_node *)data, vnode, IPA_REF_LOAD, stmt); + + if (L_IPO_COMP_MODE && cgraph_pre_profiling_inlining_done) + { + struct varpool_node *rvnode = real_varpool_node (t); + if (rvnode != vnode) + ipa_record_reference ((symtab_node *)data, + rvnode, + IPA_REF_ADDR, stmt); + } } return false; } @@ -290,6 +510,14 @@ mark_store (gimple stmt, tree t, tree, void *data) ipa_record_reference ((symtab_node *)data, vnode, IPA_REF_STORE, stmt); + if (L_IPO_COMP_MODE && cgraph_pre_profiling_inlining_done) + { + struct varpool_node *rvnode = real_varpool_node (t); + if (rvnode != vnode) + ipa_record_reference ((symtab_node *)data, + rvnode, + IPA_REF_ADDR, stmt); + } } return false; } @@ -369,6 +597,7 @@ build_cgraph_edges (void) ipa_record_stmt_references (node, gsi_stmt (gsi)); } + /* Look for initializers of constant variables and private statics. */ FOR_EACH_LOCAL_DECL (cfun, ix, decl) if (TREE_CODE (decl) == VAR_DECL @@ -436,6 +665,144 @@ record_references_in_initializer (tree decl, bool only_vars) pointer_set_destroy (visited_nodes); } +/* Update any function decl references in base ADDR of operand OP to refer to + the resolved node. */ + +static bool +fixup_ref (gimple, tree addr, tree op) +{ + addr = get_base_address (addr); + if (addr && TREE_CODE (addr) == FUNCTION_DECL) + { + gcc_assert (TREE_CODE (op) == ADDR_EXPR); + gcc_assert (TREE_OPERAND (op,0) == addr); + struct cgraph_node *real_callee; + real_callee = cgraph_lipo_get_resolved_node (addr); + if (addr == real_callee->decl) + return false; + TREE_OPERAND (op,0) = real_callee->decl; + } + return false; +} + +/* Update any function decl references in base ADDR of operand OP from address + STMT operand OP to refer to the resolved node. */ + +static bool +fixup_address (gimple stmt, tree addr, tree op, void *) +{ + return fixup_ref (stmt, addr, op); +} + +/* Update any function decl references in base ADDR of operand OP from load + STMT operand OP to refer to the resolved node. See comments in mark_load + on when a load may have a function decl reference. */ + +static bool +fixup_load (gimple stmt, tree addr, tree op, void *) +{ + return fixup_ref (stmt, addr, op); +} + +/* After the LIPO link, references to function decls should be updated + to the resolved node, so that the correct references are added to the + cgraph. Update all references in STMT. */ + +void +lipo_fixup_load_addr_ops (gimple stmt) +{ + walk_stmt_load_store_addr_ops (stmt, NULL, fixup_load, NULL, + fixup_address); +} + +/* In LIPO mode, before tree_profiling, the call graph edge + needs to be built with the original target node to make + sure consistent early inline decisions between profile + generate and profile use. After tree-profiling, the target + needs to be set to the resolved node so that ipa-inline + sees the definitions. */ +#include "gimple-pretty-print.h" +static void +lipo_fixup_cgraph_edge_call_target (gimple stmt) +{ + tree decl; + gcc_assert (is_gimple_call (stmt)); + + decl = gimple_call_fndecl (stmt); + if (decl) + { + struct cgraph_node *real_callee; + real_callee = cgraph_lipo_get_resolved_node (decl); + + if (decl != real_callee->decl) + { + int lp_nr; + + gcc_assert (!real_callee->clone.combined_args_to_skip); + gimple_call_set_fndecl (stmt, real_callee->decl); + update_stmt (stmt); + lp_nr = lookup_stmt_eh_lp (stmt); + if (lp_nr != 0 && !stmt_could_throw_p (stmt)) + remove_stmt_from_eh_lp (stmt); + } + } +} + +/* Link the cgraph nodes, varpool nodes and fixup the call target to + the correct decl. Remove dead functions. */ + + +void +lipo_link_and_fixup () +{ + struct cgraph_node *node; + + cgraph_pre_profiling_inlining_done = true; + cgraph_process_module_scope_statics (); + /* Now perform link to allow cross module inlining. */ + cgraph_do_link (); + varpool_do_link (); + cgraph_unify_type_alias_sets (); + cgraph_init_gid_map (); + + FOR_EACH_DEFINED_FUNCTION (node) + { + if (!gimple_has_body_p (node->decl)) + continue; + + /* Don't profile functions produced for builtin stuff. */ + if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) + continue; + + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + + if (L_IPO_COMP_MODE) + { + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt)) + lipo_fixup_cgraph_edge_call_target (stmt); + lipo_fixup_load_addr_ops (stmt); + } + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + lipo_fixup_load_addr_ops (gsi_stmt (gsi)); + } + update_ssa (TODO_update_ssa); + } + rebuild_cgraph_edges (); + pop_cfun (); + } + + cgraph_add_fake_indirect_call_edges (); + symtab_remove_unreachable_nodes (true, dump_file); +} + + /* Rebuild cgraph edges for current function node. This needs to be run after passes that don't update the cgraph. */ @@ -450,9 +817,12 @@ rebuild_cgraph_edges (void) ipa_remove_all_references (&node->ref_list); node->count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + node->max_bb_count = 0; FOR_EACH_BB_FN (bb, cfun) { + if (bb->count > node->max_bb_count) + node->max_bb_count = bb->count; for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { gimple stmt = gsi_stmt (gsi); @@ -464,8 +834,13 @@ rebuild_cgraph_edges (void) bb); decl = gimple_call_fndecl (stmt); if (decl) - cgraph_create_edge (node, cgraph_get_create_node (decl), stmt, - bb->count, freq); + { + struct cgraph_node *callee = cgraph_get_create_node (decl); + if (L_IPO_COMP_MODE) + record_reference_to_real_target_from_alias (callee); + cgraph_create_edge (node, callee, stmt, + bb->count, freq); + } else if (gimple_call_internal_p (stmt)) ; else @@ -478,6 +853,9 @@ rebuild_cgraph_edges (void) for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ipa_record_stmt_references (node, gsi_stmt (gsi)); } + + if (!cgraph_pre_profiling_inlining_done) + add_fake_indirect_call_edges (node); record_eh_tables (node, cfun); gcc_assert (!node->global.inlined_to); @@ -547,6 +925,9 @@ public: } // anon namespace +/* Defined in passes.c */ +extern bool cgraph_callee_edges_final_cleanup; + gimple_opt_pass * make_pass_rebuild_cgraph_edges (gcc::context *ctxt) { @@ -557,6 +938,12 @@ make_pass_rebuild_cgraph_edges (gcc::context *ctxt) static unsigned int remove_cgraph_callee_edges (void) { + /* The -freorder-functions=* needs the call-graph preserved till + pass_final. */ + if (cgraph_callee_edges_final_cleanup + && (flag_reorder_functions > 1)) + return 0; + struct cgraph_node *node = cgraph_get_node (current_function_decl); cgraph_node_remove_callees (node); ipa_remove_all_references (&node->ref_list); diff --git a/gcc-4.9/gcc/cgraphclones.c b/gcc-4.9/gcc/cgraphclones.c index 257939cb0..9fec2a04d 100644 --- a/gcc-4.9/gcc/cgraphclones.c +++ b/gcc-4.9/gcc/cgraphclones.c @@ -101,6 +101,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-utils.h" #include "lto-streamer.h" #include "except.h" +#include "l-ipo.h" /* Create clone of E in the node N represented by CALL_EXPR the callgraph. */ struct cgraph_edge * @@ -128,7 +129,11 @@ cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n, via cgraph_resolve_speculation and not here. */ && !e->speculative) { - struct cgraph_node *callee = cgraph_get_node (decl); + struct cgraph_node *callee; + if (L_IPO_COMP_MODE && cgraph_pre_profiling_inlining_done) + callee = cgraph_lipo_get_resolved_node (decl); + else + callee = cgraph_get_node (decl); gcc_checking_assert (callee); new_edge = cgraph_create_edge (n, callee, call_stmt, count, freq); } @@ -302,14 +307,13 @@ set_new_clone_decl_and_node_flags (cgraph_node *new_node) thunk is this_adjusting but we are removing this parameter. */ static cgraph_node * -duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node, - bitmap args_to_skip) +duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node) { cgraph_node *new_thunk, *thunk_of; thunk_of = cgraph_function_or_thunk_node (thunk->callees->callee); if (thunk_of->thunk.thunk_p) - node = duplicate_thunk_for_node (thunk_of, node, args_to_skip); + node = duplicate_thunk_for_node (thunk_of, node); struct cgraph_edge *cs; for (cs = node->callers; cs; cs = cs->next_caller) @@ -321,17 +325,18 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node, return cs->caller; tree new_decl; - if (!args_to_skip) + if (!node->clone.args_to_skip) new_decl = copy_node (thunk->decl); else { /* We do not need to duplicate this_adjusting thunks if we have removed this. */ if (thunk->thunk.this_adjusting - && bitmap_bit_p (args_to_skip, 0)) + && bitmap_bit_p (node->clone.args_to_skip, 0)) return node; - new_decl = build_function_decl_skip_args (thunk->decl, args_to_skip, + new_decl = build_function_decl_skip_args (thunk->decl, + node->clone.args_to_skip, false); } gcc_checking_assert (!DECL_STRUCT_FUNCTION (new_decl)); @@ -349,6 +354,8 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node, new_thunk->thunk = thunk->thunk; new_thunk->unique_name = in_lto_p; new_thunk->former_clone_of = thunk->decl; + new_thunk->clone.args_to_skip = node->clone.args_to_skip; + new_thunk->clone.combined_args_to_skip = node->clone.combined_args_to_skip; struct cgraph_edge *e = cgraph_create_edge (new_thunk, node, NULL, 0, CGRAPH_FREQ_BASE); @@ -365,12 +372,11 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node, chain. */ void -redirect_edge_duplicating_thunks (struct cgraph_edge *e, struct cgraph_node *n, - bitmap args_to_skip) +redirect_edge_duplicating_thunks (struct cgraph_edge *e, struct cgraph_node *n) { cgraph_node *orig_to = cgraph_function_or_thunk_node (e->callee); if (orig_to->thunk.thunk_p) - n = duplicate_thunk_for_node (orig_to, n, args_to_skip); + n = duplicate_thunk_for_node (orig_to, n); cgraph_redirect_edge_callee (e, n); } @@ -422,10 +428,26 @@ cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq, new_node->global.inlined_to = new_inlined_to; new_node->rtl = n->rtl; new_node->count = count; + new_node->max_bb_count = count; + if (n->count) + new_node->max_bb_count = ((n->max_bb_count + n->count / 2) + / n->count) * count; new_node->frequency = n->frequency; - new_node->clone = n->clone; - new_node->clone.tree_map = NULL; new_node->tp_first_run = n->tp_first_run; + + new_node->clone.tree_map = NULL; + new_node->clone.args_to_skip = args_to_skip; + if (!args_to_skip) + new_node->clone.combined_args_to_skip = n->clone.combined_args_to_skip; + else if (n->clone.combined_args_to_skip) + { + new_node->clone.combined_args_to_skip = BITMAP_GGC_ALLOC (); + bitmap_ior (new_node->clone.combined_args_to_skip, + n->clone.combined_args_to_skip, args_to_skip); + } + else + new_node->clone.combined_args_to_skip = args_to_skip; + if (n->count) { if (new_node->count > n->count) @@ -435,11 +457,19 @@ cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq, } else count_scale = 0; + /* In AutoFDO, if edge count is larger than callee's entry block + count, we will not update the original callee because it may + mistakenly mark some hot function as cold. */ + if (flag_auto_profile && count >= n->count) + update_original = false; if (update_original) { n->count -= count; if (n->count < 0) - n->count = 0; + n->count = 0; + n->max_bb_count -= new_node->max_bb_count; + if (n->max_bb_count < 0) + n->max_bb_count = 0; } FOR_EACH_VEC_ELT (redirect_callers, i, e) @@ -450,10 +480,9 @@ cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq, if (!e->callee || DECL_BUILT_IN_CLASS (e->callee->decl) != BUILT_IN_NORMAL || DECL_FUNCTION_CODE (e->callee->decl) != BUILT_IN_UNREACHABLE) - redirect_edge_duplicating_thunks (e, new_node, args_to_skip); + redirect_edge_duplicating_thunks (e, new_node); } - for (e = n->callees;e; e=e->next_callee) cgraph_clone_edge (e, new_node, e->call_stmt, e->lto_stmt_uid, count_scale, freq, update_original); @@ -562,7 +591,6 @@ cgraph_create_virtual_clone (struct cgraph_node *old_node, DECL_SECTION_NAME (new_node->decl) = NULL; set_new_clone_decl_and_node_flags (new_node); new_node->clone.tree_map = tree_map; - new_node->clone.args_to_skip = args_to_skip; /* Clones of global symbols or symbols with unique names are unique. */ if ((TREE_PUBLIC (old_decl) @@ -574,32 +602,6 @@ cgraph_create_virtual_clone (struct cgraph_node *old_node, FOR_EACH_VEC_SAFE_ELT (tree_map, i, map) ipa_maybe_record_reference (new_node, map->new_tree, IPA_REF_ADDR, NULL); - if (!args_to_skip) - new_node->clone.combined_args_to_skip = old_node->clone.combined_args_to_skip; - else if (old_node->clone.combined_args_to_skip) - { - int newi = 0, oldi = 0; - tree arg; - bitmap new_args_to_skip = BITMAP_GGC_ALLOC (); - struct cgraph_node *orig_node; - for (orig_node = old_node; orig_node->clone_of; orig_node = orig_node->clone_of) - ; - for (arg = DECL_ARGUMENTS (orig_node->decl); - arg; arg = DECL_CHAIN (arg), oldi++) - { - if (bitmap_bit_p (old_node->clone.combined_args_to_skip, oldi)) - { - bitmap_set_bit (new_args_to_skip, oldi); - continue; - } - if (bitmap_bit_p (args_to_skip, newi)) - bitmap_set_bit (new_args_to_skip, oldi); - newi++; - } - new_node->clone.combined_args_to_skip = new_args_to_skip; - } - else - new_node->clone.combined_args_to_skip = args_to_skip; cgraph_call_node_duplication_hooks (old_node, new_node); @@ -883,6 +885,7 @@ cgraph_copy_node_for_versioning (struct cgraph_node *old_version, new_version->global = old_version->global; new_version->rtl = old_version->rtl; new_version->count = old_version->count; + new_version->max_bb_count = old_version->max_bb_count; for (e = old_version->callees; e; e=e->next_callee) if (!bbs_to_copy diff --git a/gcc-4.9/gcc/cgraphunit.c b/gcc-4.9/gcc/cgraphunit.c index 06283fc3f..83e436f76 100644 --- a/gcc-4.9/gcc/cgraphunit.c +++ b/gcc-4.9/gcc/cgraphunit.c @@ -193,6 +193,7 @@ along with GCC; see the file COPYING3. If not see #include "intl.h" #include "function.h" #include "ipa-prop.h" +#include "gcov-io.h" #include "tree-iterator.h" #include "tree-pass.h" #include "tree-dump.h" @@ -203,6 +204,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-inline.h" #include "ipa-utils.h" #include "lto-streamer.h" +#include "l-ipo.h" #include "except.h" #include "cfgloop.h" #include "regset.h" /* FIXME: For reg_obstack. */ @@ -244,7 +246,8 @@ decide_is_symbol_needed (symtab_node *node) /* Double check that no one output the function into assembly file early. */ gcc_checking_assert (!DECL_ASSEMBLER_NAME_SET_P (decl) - || !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))); + || (L_IPO_COMP_MODE + || !TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))); if (!node->definition) return false; @@ -292,6 +295,12 @@ enqueue_node (symtab_node *node) queued_nodes = node; } +void +cgraph_enqueue_node (struct cgraph_node *node) +{ + enqueue_node ((symtab_node *) node); +} + /* Process CGRAPH_NEW_FUNCTIONS and perform actions necessary to add these functions into callgraph in a way so they look like ordinary reachable functions inserted into callgraph already at construction time. */ @@ -491,6 +500,7 @@ cgraph_add_new_function (tree fndecl, bool lowered) { gcc::pass_manager *passes = g->get_passes (); struct cgraph_node *node; + switch (cgraph_state) { case CGRAPH_STATE_PARSING: @@ -551,7 +561,6 @@ cgraph_add_new_function (tree fndecl, bool lowered) pop_cfun (); expand_function (node); break; - default: gcc_unreachable (); } @@ -972,7 +981,8 @@ analyze_functions (void) fprintf (cgraph_dump_file, "Trivially needed symbols:"); changed = true; if (cgraph_dump_file) - fprintf (cgraph_dump_file, " %s", node->asm_name ()); + fprintf (cgraph_dump_file, " %s/%d", node->asm_name (), + node->order); if (!changed && cgraph_dump_file) fprintf (cgraph_dump_file, "\n"); } @@ -1085,7 +1095,7 @@ analyze_functions (void) if (!node->aux && !referred_to_p (node)) { if (cgraph_dump_file) - fprintf (cgraph_dump_file, " %s", node->name ()); + fprintf (cgraph_dump_file, " %s/%d", node->name (), node->order); symtab_remove_node (node); continue; } @@ -1205,6 +1215,137 @@ handle_alias_pairs (void) vec_free (alias_pairs); } +/* Hash function for symbol (function) resolution. */ + +static hashval_t +hash_node_by_assembler_name (const void *p) +{ + const struct cgraph_node *n = (const struct cgraph_node *) p; + return (hashval_t) decl_assembler_name_hash ( + DECL_ASSEMBLER_NAME (n->decl)); +} + +/* Equality function for cgraph_node table. */ + +static int +eq_node_assembler_name (const void *p1, const void *p2) +{ + const struct cgraph_node *n1 = (const struct cgraph_node *) p1; + const_tree name = (const_tree)p2; + return (decl_assembler_name_equal (n1->decl, name)); +} + +/* In l-ipo mode compilation (light weight IPO), multiple bodies may + be available for the same inline declared function. cgraph linking + does not really merge them in order to keep the context (module info) + of each body. After inlining, the linkage of the function may require + them to be output (even if it is defined in an auxiliary module). This + in term may result in duplicate emission. */ + +static GTY((param_is (symtab_node))) htab_t output_node_hash = NULL; + +/* Add NODE that is expanded into the hashtable. */ + +static struct cgraph_node * +cgraph_add_output_node (struct cgraph_node *node) +{ + void **aslot; + tree name; + + if (!L_IPO_COMP_MODE) + return node; + + /* Never common non public names except for compiler + generated static functions. (they are not promoted + to globals either. */ + if (!TREE_PUBLIC (node->decl) + && !(DECL_ARTIFICIAL (node->decl) + && DECL_ASSEMBLER_NAME_SET_P (node->decl))) + return node; + + if (!output_node_hash) + output_node_hash = + htab_create_ggc (10, hash_node_by_assembler_name, + eq_node_assembler_name, NULL); + + name = DECL_ASSEMBLER_NAME (node->decl); + + aslot = htab_find_slot_with_hash (output_node_hash, name, + decl_assembler_name_hash (name), + INSERT); + if (*aslot == NULL) + { + *aslot = node; + return node; + } + else + return (struct cgraph_node *)(*aslot); +} + +#if ENABLE_CHECKING +/* Return the cgraph_node if the function symbol for NODE is + expanded in the output. Returns NULL otherwise. */ + +static struct cgraph_node * +cgraph_find_output_node (struct cgraph_node *node) +{ + void **aslot; + tree name; + + if (!L_IPO_COMP_MODE) + return node; + + /* We do not track non-public functions. */ + if (!TREE_PUBLIC (node->decl)) + return NULL; + + /* Never addedd. */ + if (!output_node_hash) + return NULL; + + name = DECL_ASSEMBLER_NAME (node->decl); + + aslot = htab_find_slot_with_hash (output_node_hash, name, + decl_assembler_name_hash (name), + NO_INSERT); + if (!aslot) + return NULL; + + return (struct cgraph_node *)(*aslot); +} +#endif + + +#if ENABLE_CHECKING +/* A function used in validation. Return true if NODE was + not expanded and its body was not reclaimed. */ + +static bool +cgraph_node_expansion_skipped (struct cgraph_node *node) +{ + struct cgraph_node *output_node; + + if (!L_IPO_COMP_MODE) + return false; + + output_node = cgraph_find_output_node (node); + + if (output_node == node) + return false; + + if (output_node) + return true; + + /* No output, no duplicate being output, and the node is not + inlined (and reclaimed) either -- check if the caller node + is output/expanded or not. */ + if (node->global.inlined_to) + return cgraph_node_expansion_skipped (node->global.inlined_to); + + /* External functions not marked for output. */ + return true; +} +#endif /* Figure out what functions we want to assemble. */ @@ -1235,8 +1376,10 @@ mark_functions_to_output (void) && !node->alias && !node->global.inlined_to && !TREE_ASM_WRITTEN (decl) - && !DECL_EXTERNAL (decl)) + && !(DECL_EXTERNAL (decl) || cgraph_is_aux_decl_external (node))) { + if (cgraph_add_output_node (node) == node) { + /* Do not fix indentation. */ node->process = 1; if (node->same_comdat_group) { @@ -1245,9 +1388,11 @@ mark_functions_to_output (void) next != node; next = cgraph (next->same_comdat_group)) if (!next->thunk.thunk_p && !next->alias + && cgraph_add_output_node (next) == next && !symtab_comdat_local_p (next)) next->process = 1; } + } } else if (node->same_comdat_group) { @@ -1266,6 +1411,7 @@ mark_functions_to_output (void) have analyzed node pointing to it. */ && !node->in_other_partition && !node->alias + && !cgraph_is_auxiliary (node->decl) && !node->clones && !DECL_EXTERNAL (decl)) { @@ -1278,13 +1424,14 @@ mark_functions_to_output (void) || node->in_other_partition || node->clones || DECL_ARTIFICIAL (decl) - || DECL_EXTERNAL (decl)); + || DECL_EXTERNAL (decl) + || cgraph_is_auxiliary (node->decl)); } } #ifdef ENABLE_CHECKING - if (check_same_comdat_groups) + if (check_same_comdat_groups && !L_IPO_COMP_MODE) FOR_EACH_FUNCTION (node) if (node->same_comdat_group && !node->process) { @@ -1297,7 +1444,8 @@ mark_functions_to_output (void) analyzed node pointing to it. */ && !node->in_other_partition && !node->clones - && !DECL_EXTERNAL (decl)) + && !(DECL_EXTERNAL (decl) || cgraph_is_aux_decl_external (node)) + && !L_IPO_COMP_MODE) { dump_cgraph_node (stderr, node); internal_error ("failed to reclaim unneeded function in same " @@ -1680,6 +1828,7 @@ expand_thunk (struct cgraph_node *node, bool output_asm_thunks) #ifdef ENABLE_CHECKING verify_flow_info (); #endif + free_dominance_info (CDI_DOMINATORS); /* Since we want to emit the thunk, we explicitly mark its name as referenced. */ @@ -1963,6 +2112,8 @@ output_in_order (void) max = symtab_order; nodes = XCNEWVEC (struct cgraph_order_sort, max); + varpool_remove_duplicate_weak_decls (); + FOR_EACH_DEFINED_FUNCTION (pf) { if (pf->process && !pf->thunk.thunk_p && !pf->alias) @@ -2058,8 +2209,11 @@ ipa_passes (void) if (!in_lto_p) { - /* Generate coverage variables and constructors. */ - coverage_finish (); + /* Generate coverage variables and constructors. + In LIPO mode, delay this until direct call profiling + is done. */ + if (!flag_dyn_ipa) + coverage_finish (); /* Process new functions added. */ set_cfun (NULL); @@ -2165,6 +2319,12 @@ compile (void) fprintf (stderr, "Performing interprocedural optimizations\n"); cgraph_state = CGRAPH_STATE_IPA; + if (L_IPO_COMP_MODE) + { + cgraph_init_gid_map (); + cgraph_add_fake_indirect_call_edges (); + } + /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */ if (flag_lto) lto_streamer_hooks_init (); @@ -2250,6 +2410,7 @@ compile (void) output_asm_statements (); expand_all_functions (); + varpool_remove_duplicate_weak_decls (); varpool_output_variables (); } @@ -2265,15 +2426,21 @@ compile (void) #ifdef ENABLE_CHECKING verify_symtab (); /* Double check that all inline clones are gone and that all - function bodies have been released from memory. */ + function bodies have been released from memory. + As an exception, allow inline clones in the callgraph if + they are auxiliary functions. This is because we don't + expand any of the auxiliary functions, which may result + in inline clones of some auxiliary functions to be left + in the callgraph. */ if (!seen_error ()) { struct cgraph_node *node; bool error_found = false; FOR_EACH_DEFINED_FUNCTION (node) - if (node->global.inlined_to + if (((node->global.inlined_to && !cgraph_is_auxiliary (node->decl)) || gimple_has_body_p (node->decl)) + && !cgraph_node_expansion_skipped (node)) { error_found = true; dump_cgraph_node (stderr, node); diff --git a/gcc-4.9/gcc/cif-code.def b/gcc-4.9/gcc/cif-code.def index ce64d96b6..3e5296c2c 100644 --- a/gcc-4.9/gcc/cif-code.def +++ b/gcc-4.9/gcc/cif-code.def @@ -99,6 +99,14 @@ DEFCIFCODE(MISMATCHED_ARGUMENTS, CIF_FINAL_ERROR, DEFCIFCODE(ORIGINALLY_INDIRECT_CALL, CIF_FINAL_NORMAL, N_("originally indirect function call not considered for inlining")) +/* Cross module inlining not allowed in first early inline phase. */ +DEFCIFCODE(NO_INTERMODULE_INLINE, CIF_FINAL_ERROR, + N_("inter-module inlining is disabled")) + +/* Artificial edge. */ +DEFCIFCODE(ARTIFICIAL_EDGE, CIF_FINAL_ERROR, + N_("artificial call graph edge")) + /* Ths edge represents an indirect edge with a yet-undetermined callee . */ DEFCIFCODE(INDIRECT_UNKNOWN_CALL, CIF_FINAL_NORMAL, N_("indirect function call with a yet undetermined callee")) diff --git a/gcc-4.9/gcc/combine.c b/gcc-4.9/gcc/combine.c index f7a279e86..7c0045205 100644 --- a/gcc-4.9/gcc/combine.c +++ b/gcc-4.9/gcc/combine.c @@ -11987,7 +11987,7 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1) = (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1); op0 = simplify_gen_binary (AND, tmode, gen_lowpart (tmode, op0), - gen_int_mode (sign, mode)); + gen_int_mode (sign, tmode)); code = (code == LT) ? NE : EQ; break; } diff --git a/gcc-4.9/gcc/common.opt b/gcc-4.9/gcc/common.opt index 86489996d..6614228b8 100644 --- a/gcc-4.9/gcc/common.opt +++ b/gcc-4.9/gcc/common.opt @@ -292,6 +292,9 @@ Driver Joined Alias(L) -no-canonical-prefixes Driver Alias(no-canonical-prefixes) +-canonical-prefixes +Driver Alias(canonical-prefixes) + -no-standard-libraries Driver Alias(nostdlib) @@ -557,6 +560,10 @@ Wfatal-errors Common Var(flag_fatal_errors) Exit on the first error occurred +Wforce-warnings +Common Var(flag_force_warnings) +Disable promoting warnings to errors + Wframe-larger-than= Common RejectNegative Joined UInteger -Wframe-larger-than=<number> Warn if a function's stack frame requires more than <number> bytes @@ -599,6 +606,14 @@ Wpadded Common Var(warn_padded) Warning Warn when padding is required to align structure members +Wripa-opt-mismatch +Common Var(warn_ripa_opt_mismatch) Warning EnabledBy(Wall) +Warn if primary and auxiliary modules have mismatched command line options + +Wself-assign +Common Var(warn_self_assign) Init(0) Warning +Warn when a variable is assigned to itself + Wpedantic Common Var(pedantic) Warning Issue warnings needed for strict compliance to the standard @@ -607,6 +622,15 @@ Wshadow Common Var(warn_shadow) Warning Warn when one local variable shadows another +Wshadow-local +Common Var(warn_shadow_local) Warning +Warn when one local variable shadows another local variable or parameter + +Wshadow-compatible-local +Common Var(warn_shadow_compatible_local) Warning +Warn when one local variable shadows another local variable or parameter +of compatible type + Wstack-protector Common Var(warn_stack_protect) Warning Warn when not issuing stack smashing protection for some reason @@ -651,6 +675,50 @@ Wtrampolines Common Var(warn_trampolines) Warning Warn whenever a trampoline is generated +; FIXME. The following -Wthread-* flags are placeholders to prevent +; confusing the compiler when applications are built with these flags. +; Actual support for these flags is being implemented in the +; thread-annotations branch. +Wthread-safety +Common Var(warn_thread_safety) Warning +Warn about potential thread safety issues when the code is annotated with thread safety attributes + +Wthread-safety-analysis +Common Ignore +Does nothing. For compatibility with clang thread safety analysis. + +Wthread-safety-attributes +Common Ignore +Does nothing. For compatibility with clang thread safety analysis. + +Wthread-safety-precise +Common Ignore +Does nothing. For compatibility with clang thread safety analysis. + +Wthread-unguarded-var +Common Var(warn_thread_unguarded_var) Init(1) Warning +Warn about shared variables not properly protected by locks specified in the attributes + +Wthread-unguarded-func +Common Var(warn_thread_unguarded_func) Init(1) Warning +Warn about function calls not properly protected by locks specified in the attributes + +Wthread-mismatched-lock-order +Common Var(warn_thread_mismatched_lock_order) Init(1) Warning +Warn about lock acquisition order inconsistent with what specified in the attributes + +Wthread-mismatched-lock-acq-rel +Common Var(warn_thread_mismatched_lock_acq_rel) Init(1) Warning +Warn about mismatched lock acquisition and release + +Wthread-reentrant-lock +Common Var(warn_thread_reentrant_lock) Init(1) Warning +Warn about a lock being acquired recursively + +Wthread-unsupported-lock-name +Common Var(warn_unsupported_lock_name) Init(0) Warning +Warn about uses of unsupported lock names in attributes + Wtype-limits Common Var(warn_type_limits) Warning EnabledBy(Wextra) Warn if a comparison is always true or always false due to the limited range of the data type @@ -710,6 +778,9 @@ Warn when a vector operation is compiled outside the SIMD Xassembler Driver Separate +Xclang-only= +Driver Joined Ignore + Xlinker Driver Separate @@ -808,7 +879,7 @@ Driver Undocumented ; Additional positive integers will be assigned as new versions of ; the ABI become the default version of the ABI. fabi-version= -Common Joined RejectNegative UInteger Var(flag_abi_version) Init(2) +Common Joined RejectNegative UInteger Var(flag_abi_version) Init(0) faggressive-loop-optimizations Common Report Var(flag_aggressive_loop_optimizations) Optimization Init(1) @@ -870,6 +941,34 @@ fauto-inc-dec Common Report Var(flag_auto_inc_dec) Init(1) Generate auto-inc/dec instructions +fauto-profile +Common Report Var(flag_auto_profile) Optimization +Use sample profile information for call graph node weights. The default +profile file is fbdata.afdo in 'pwd'. + +fauto-profile= +Common Joined RejectNegative Var(auto_profile_file) +Use sample profile information for call graph node weights. The profile +file is specified in the argument. + +fauto-profile-accurate +Common Report Var(flag_auto_profile_accurate) Optimization +Whether to assume the sample profile is accurate. + +fauto-profile-record-coverage-in-elf +Common Report Var(flag_auto_profile_record_coverage_in_elf) Optimization +Whether to record annotation coverage info in elf. + +fcheck-branch-annotation +Common Report Var(flag_check_branch_annotation) +Compare branch prediction result and autofdo profile information, store the +result in a section in the generated elf file. + +fcheck-branch-annotation-threshold= +Common Joined UInteger Var(check_branch_annotation_threshold) Init(100) +The number of executions a basic block needs to reach before GCC dumps its +branch prediction information with -fcheck-branch-annotation. + ; -fcheck-bounds causes gcc to generate array bounds checks. ; For C, C++ and ObjC: defaults off. ; For Java: defaults to on. @@ -1111,6 +1210,27 @@ fdwarf2-cfi-asm Common Report Var(flag_dwarf2_cfi_asm) Init(HAVE_GAS_CFI_DIRECTIVE) Enable CFI tables via GAS assembler directives. +fripa +Common Report Var(flag_dyn_ipa) +Perform Dynamic Inter-Procedural Analysis. + +fripa-disallow-asm-modules +Common Report Var(flag_ripa_disallow_asm_modules) +Don't import an auxiliary module if it contains asm statements + +fripa-disallow-opt-mismatch +Common Report Var(flag_ripa_disallow_opt_mismatch) +Don't import an auxiliary module if the command line options mismatch with the primary module + +fripa-no-promote-always-inline-func +Common Report Var(flag_ripa_no_promote_always_inline) Init(0) +Don't promote always inline static functions assuming they +will be inlined and no copy is needed. + +fripa-inc-path-sub= +Common Joined RejectNegative Var(lipo_inc_path_pattern) +Substitute substring in include paths with a new string to allow reuse profile data + fearly-inlining Common Report Var(flag_early_inlining) Init(1) Optimization Perform early inlining @@ -1631,6 +1751,10 @@ fpcc-struct-return Common Report Var(flag_pcc_struct_return,1) Init(DEFAULT_PCC_STRUCT_RETURN) Return small aggregates in memory, not registers +fpeel-codesize-limit +Common Report Var(flag_peel_codesize_limit) Init(1) Optimization +Limit non-const non-FP loop peeling under profile estimates of large code footprint + fpeel-loops Common Report Var(flag_peel_loops) Optimization Perform loop peeling @@ -1692,6 +1816,19 @@ fprofile-correction Common Report Var(flag_profile_correction) Enable correction of flow inconsistent profile data input +fprofile-dump +Common Report Var(flag_profile_dump) Init(0) Optimization +Dump CFG profile for comparison. + +; fprofile-generate-atomic=0: disable aotimically update. +; fprofile-generate-atomic=1: aotimically update edge profile counters. +; fprofile-generate-atomic=2: aotimically update value profile counters. +; fprofile-generate-atomic=3: aotimically update edge and value profile counters. +; other values will be ignored (fall back to the default of 0). +fprofile-generate-atomic= +Common Joined UInteger Report Var(flag_profile_gen_atomic) Init(0) Optimization +fprofile-generate-atomic=[0..3] Atomically increments for profile counters. + fprofile-generate Common Enable common options for generating profile info for profile feedback directed optimizations @@ -1700,6 +1837,21 @@ fprofile-generate= Common Joined RejectNegative Enable common options for generating profile info for profile feedback directed optimizations, and set -fprofile-dir= +fprofile-generate-sampling +Common Var(flag_profile_generate_sampling) +Turn on instrumentation sampling with -fprofile-generate with rate set by --param profile-generate-sampling-rate or environment variable GCOV_SAMPLING_RATE + +femit-function-names +Common Var(flag_emit_function_names) +Print to stderr the mapping from module name and function id to assembler +function name when -ftest-coverage, -fprofile-generate or -fprofile-use are +active, for use in correlating function ids in gcda files with the function +name. + +fprofile-strip= +Common Joined RejectNegative Var(profile_base_name_suffix_to_strip) +Specify a substring to be stripped from the profile base file name + fprofile-use Common Var(flag_profile_use) Enable common options for performing profile feedback directed optimizations @@ -1737,6 +1889,14 @@ frecord-gcc-switches Common Report Var(flag_record_gcc_switches) Record gcc command line switches in the object file. +; This option differs from frecord-gcc-switches in the way that it +; divide the command line options into several categories. And the +; section is not mergable so that linker can save gcc switches for +; each module. +frecord-compilation-info-in-elf +Common Report Var(flag_record_compilation_info_in_elf) +Record the compiler optimizations in a .gnu.switches.text section. + freg-struct-return Common Report Var(flag_pcc_struct_return,0) Optimization Return small aggregates in registers @@ -1765,6 +1925,16 @@ freorder-functions Common Report Var(flag_reorder_functions) Optimization Reorder functions to improve code placement +freorder-functions= +Common Joined RejectNegative Enum(function_reorder) Var(flag_reorder_functions) Init(0) Optimization +-freorder-functions=[callgraph] Select the scheme for function reordering. This invokes a linker plugin. Generate .gnu.callgraph.text sections listing callees and edge counts. + +Enum +Name(function_reorder) Type(int) UnknownError(unrecognized function reorder value %qs) + +EnumValue +Enum(function_reorder) String(callgraph) Value(2) + frerun-cse-after-loop Common Report Var(flag_rerun_cse_after_loop) Optimization Add a common subexpression elimination pass after loop optimizations @@ -1923,6 +2093,10 @@ fsingle-precision-constant Common Report Var(flag_single_precision_constant) Optimization Convert floating point constants to single precision constants +fsized-delete +Common Report Var(flag_sized_delete) Optimization +Support delete operator with objetc's size as the second parameter. + fsplit-ivs-in-unroller Common Report Var(flag_split_ivs_in_unroller) Init(1) Optimization Split lifetimes of induction variables when loops are unrolled @@ -1962,8 +2136,12 @@ fstack-protector Common Report Var(flag_stack_protect, 1) Use propolice as a stack protection method -fstack-protector-all +fstack-protector-strong Common Report RejectNegative Var(flag_stack_protect, 2) +Use a smart stack protection method for certain functions + +fstack-protector-all +Common Report RejectNegative Var(flag_stack_protect, 3) Use a stack protection method for every function fstack-protector-strong @@ -2209,6 +2387,10 @@ ftree-vrp Common Report Var(flag_tree_vrp) Init(0) Optimization Perform Value Range Propagation on trees +fstrict-enum-precision +Common Report Var(flag_strict_enum_precision) Init(1) Optimization +Perform transformations based on enum precision + funit-at-a-time Common Report Var(flag_unit_at_a_time) Init(1) Optimization Compile whole compilation unit at a time @@ -2221,6 +2403,10 @@ funroll-all-loops Common Report Var(flag_unroll_all_loops) Optimization Perform loop unrolling for all loops +funroll-codesize-limit +Common Report Var(flag_unroll_codesize_limit) Init(1) Optimization +Limit non-const non-FP loop unrolling under profile estimates of large code footprint + ; Nonzero means that loop optimizer may assume that the induction variables ; that control loops do not overflow and that the loops with nontrivial ; exit condition are not infinite @@ -2468,6 +2654,10 @@ grecord-gcc-switches Common RejectNegative Var(dwarf_record_gcc_switches,1) Record gcc command line switches in DWARF DW_AT_producer. +gmlt +Common RejectNegative +Generate debug information at level 1 with minimal line table + gno-split-dwarf Common Driver RejectNegative Var(dwarf_split_debug_info,0) Init(0) Don't generate debug information in separate .dwo files @@ -2476,6 +2666,12 @@ gsplit-dwarf Common Driver RejectNegative Var(dwarf_split_debug_info,1) Generate debug information in separate .dwo files +gfission +Common Driver Alias(gsplit-dwarf) + +gno-fission +Common Driver Alias(gno-split-dwarf) + gstabs Common JoinedOrMissing Negative(gstabs+) Generate debug information in STABS format @@ -2528,6 +2724,9 @@ Driver no-canonical-prefixes Driver +canonical-prefixes +Driver + nodefaultlibs Driver diff --git a/gcc-4.9/gcc/common/config/msp430/msp430-common.c b/gcc-4.9/gcc/common/config/msp430/msp430-common.c new file mode 100644 index 000000000..fc2c1f27d --- /dev/null +++ b/gcc-4.9/gcc/common/config/msp430/msp430-common.c @@ -0,0 +1,91 @@ +/* Common hooks for Texas Instruments MSP430. + Copyright (C) 2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "diagnostic-core.h" +#include "tm.h" +#include "common/common-target.h" +#include "common/common-target-def.h" +#include "opts.h" +#include "flags.h" + +/* Handle -mcpu= and -mmcu= here. We want to ensure that only one + of these two options - the last specified on the command line - + is passed on to the msp430 backend. */ + +static bool +msp430_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED, + struct gcc_options *opts_set ATTRIBUTE_UNUSED, + const struct cl_decoded_option *decoded, + location_t loc ATTRIBUTE_UNUSED) +{ + switch (decoded->opt_index) + { + case OPT_mcpu_: + if (strcasecmp (decoded->arg, "msp430x") == 0 + || strcasecmp (decoded->arg, "msp430xv2") == 0 + || strcasecmp (decoded->arg, "430x") == 0 + || strcasecmp (decoded->arg, "430xv2") == 0) + { + target_cpu = "msp430x"; + target_mcu = NULL; + } + else if (strcasecmp (decoded->arg, "msp430") == 0 + || strcasecmp (decoded->arg, "430") == 0) + { + target_cpu = "msp430"; + target_mcu = NULL; + } + else + { + error ("unrecognised argument of -mcpu: %s", decoded->arg); + return false; + } + break; + + case OPT_mmcu_: + /* For backwards compatibility we recognise two generic MCU + 430X names. However we want to be able to generate special C + preprocessor defines for them, which is why we set target_mcu + to NULL. */ + if (strcasecmp (decoded->arg, "msp430") == 0) + { + target_cpu = "msp430"; + target_mcu = NULL; + } + else if (strcasecmp (decoded->arg, "msp430x") == 0 + || strcasecmp (decoded->arg, "msp430xv2") == 0) + { + target_cpu = "msp430x"; + target_mcu = NULL; + } + else + target_cpu = NULL; + break; + } + + return true; +} + +#undef TARGET_HANDLE_OPTION +#define TARGET_HANDLE_OPTION msp430_handle_option + +struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; diff --git a/gcc-4.9/gcc/common/config/nds32/nds32-common.c b/gcc-4.9/gcc/common/config/nds32/nds32-common.c index 7d9bf6704..6a2ef81a1 100644 --- a/gcc-4.9/gcc/common/config/nds32/nds32-common.c +++ b/gcc-4.9/gcc/common/config/nds32/nds32-common.c @@ -1,5 +1,5 @@ /* Common hooks of Andes NDS32 cpu for GNU compiler - Copyright (C) 2012-2014 Free Software Foundation, Inc. + Copyright (C) 2012-2013 Free Software Foundation, Inc. Contributed by Andes Technology Corporation. This file is part of GCC. diff --git a/gcc-4.9/gcc/config.gcc b/gcc-4.9/gcc/config.gcc index 79f57de55..c4851d37f 100644 --- a/gcc-4.9/gcc/config.gcc +++ b/gcc-4.9/gcc/config.gcc @@ -903,14 +903,19 @@ aarch64*-*-elf) TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'` ;; aarch64*-*-linux*) - tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h" + tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h" tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h" extra_options="${extra_options} linux-android.opt" tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux" + default_gnu_indirect_function=yes + case $target in aarch64_be-*) tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" ;; + aarch64*-*-linux-android*) + tm_file="${tm_file} aarch64/aarch64-linux-android.h" + ;; esac aarch64_multilibs="${with_multilib_list}" if test "$aarch64_multilibs" = "default"; then @@ -1022,9 +1027,30 @@ arm*-*-linux-*) # ARM GNU/Linux with ELF ;; esac tmake_file="${tmake_file} arm/t-arm arm/t-arm-elf arm/t-bpabi arm/t-linux-eabi" - tm_file="$tm_file arm/bpabi.h arm/linux-eabi.h arm/aout.h arm/arm.h" + tm_file="$tm_file arm/bpabi.h arm/linux-eabi.h arm/aout.h vxworks-dummy.h arm/arm.h" # Define multilib configuration for arm-linux-androideabi. case ${target} in + arm*-*-linux-*eabi) + tmake_file="$tmake_file arm/t-arm-elf arm/t-bpabi arm/t-linux-eabi" + # Define multilib configuration for arm-linux-androideabi. + case ${target} in + *-androideabi) + tmake_file="$tmake_file arm/t-linux-androideabi" + ;; + esac + # Pull in spec changes for GRTE configurations. + case ${target} in + *-grte*) + tm_file="${tm_file} linux-grte.h arm/linux-grte.h" + ;; + esac + # The BPABI long long divmod functions return a 128-bit value in + # registers r0-r3. Correctly modeling that requires the use of + # TImode. + need_64bit_hwint=yes + # The EABI requires the use of __cxa_atexit. + default_use_cxa_atexit=yes + ;; *-androideabi) tmake_file="$tmake_file arm/t-linux-androideabi" ;; @@ -1438,6 +1464,12 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i else tm_file="${tm_file} i386/gnu-user-common.h i386/gnu-user.h i386/linux-common.h i386/linux.h" fi + # Pull in spec changes for GRTE configurations. + case ${target} in + *-grte*) + tm_file="${tm_file} linux-grte.h" + ;; + esac ;; i[34567]86-*-knetbsd*-gnu) tm_file="${tm_file} i386/gnu-user-common.h i386/gnu-user.h knetbsd-gnu.h i386/knetbsd-gnu.h" @@ -1462,6 +1494,12 @@ x86_64-*-linux* | x86_64-*-kfreebsd*-gnu | x86_64-*-knetbsd*-gnu) extra_options="${extra_options} linux-android.opt" # Assume modern glibc default_gnu_indirect_function=yes + # Pull in spec changes for GRTE configurations. + case ${target} in + *-grte*) + tm_file="${tm_file} linux-grte.h" + ;; + esac ;; x86_64-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h i386/kfreebsd-gnu64.h" @@ -2315,8 +2353,10 @@ powerpc-*-rtems*) powerpc*-*-linux*) tm_file="${tm_file} dbxelf.h elfos.h freebsd-spec.h rs6000/sysv4.h" extra_options="${extra_options} rs6000/sysv4.opt" + extra_options="${extra_options} linux-android.opt" tmake_file="rs6000/t-fprules rs6000/t-ppcos ${tmake_file} rs6000/t-ppccomm" extra_objs="$extra_objs rs6000-linux.o" + default_gnu_indirect_function=yes case ${target} in powerpc*le-*-*) tm_file="${tm_file} rs6000/sysv4le.h" ;; @@ -2376,6 +2416,12 @@ powerpc*-*-linux*) if test x${enable_secureplt} = xyes; then tm_file="rs6000/secureplt.h ${tm_file}" fi + # Pull in spec changes for GRTE configurations. + case ${target} in + *-grte*) + tm_file="${tm_file} rs6000/linux-grte.h" + ;; + esac ;; powerpc-wrs-vxworks|powerpc-wrs-vxworksae) tm_file="${tm_file} elfos.h freebsd-spec.h rs6000/sysv4.h" diff --git a/gcc-4.9/gcc/config.host b/gcc-4.9/gcc/config.host index bea17bcdd..c9916ccc0 100644 --- a/gcc-4.9/gcc/config.host +++ b/gcc-4.9/gcc/config.host @@ -75,6 +75,7 @@ out_host_hook_obj=host-default.o host_can_use_collect2=yes use_long_long_for_widest_fast_int=no host_lto_plugin_soname=liblto_plugin.so +host_function_reordering_plugin_soname=libfunction_reordering_plugin.so # Unsupported hosts list. Generally, only include hosts known to fail here, # since we allow hosts not listed to be supported generically. diff --git a/gcc-4.9/gcc/config.in b/gcc-4.9/gcc/config.in index 4d57b87ca..a57dde115 100644 --- a/gcc-4.9/gcc/config.in +++ b/gcc-4.9/gcc/config.in @@ -45,6 +45,12 @@ #endif +/* Define to enable prefix canonicalization. */ +#ifndef USED_FOR_TARGET +#undef ENABLE_CANONICAL_PREFIXES +#endif + + /* Define if you want more run-time sanity checks. This one gets a grab bag of miscellaneous but relatively cheap checks. */ #ifndef USED_FOR_TARGET @@ -1260,6 +1266,13 @@ #endif +/* Define if the linker supports clearing hardware capabilities via mapfile. + */ +#ifndef USED_FOR_TARGET +#undef HAVE_LD_CLEARCAP +#endif + + /* Define if your linker supports --demangle option. */ #ifndef USED_FOR_TARGET #undef HAVE_LD_DEMANGLE @@ -1714,6 +1727,11 @@ #undef LINKER_HASH_STYLE #endif +/* Define to the name of the function reordering plugin DSO that must be + passed to the linker's -plugin=LIB option. */ +#ifndef USED_FOR_TARGET +#undef FRPLUGINSONAME +#endif /* Define to the name of the LTO plugin DSO that must be passed to the linker's -plugin=LIB option. */ @@ -1919,6 +1937,12 @@ #endif +/* Define to be extra text for frame size warnings. */ +#ifndef USED_FOR_TARGET +#undef WARN_FRAME_LARGER_THAN_EXTRA_TEXT +#endif + + /* Define to be the last component of the Windows registry key under which to look for installation paths. The full key used will be HKEY_LOCAL_MACHINE/SOFTWARE/Free Software Foundation/{WIN32_REGISTRY_KEY}. diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h new file mode 100644 index 000000000..91d235ff1 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux-android.h @@ -0,0 +1,59 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_AARCH64_LINUX_ANDROID_H +#define GCC_AARCH64_LINUX_ANDROID_H + + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + ANDROID_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (0) + +#undef LINK_SPEC +#define LINK_SPEC \ + LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ + LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) + +#undef CC1_SPEC +#define CC1_SPEC \ + LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ + GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic")) + +#define CC1PLUS_SPEC \ + LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) + +#undef LIB_SPEC +#define LIB_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) + +#endif /* GCC_AARCH64_LINUX_ANDROID_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h index f32d19f16..f8a97c899 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h @@ -21,8 +21,10 @@ #ifndef GCC_AARCH64_LINUX_H #define GCC_AARCH64_LINUX_H -#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" -#define BIONIC_DYNAMIC_LINKER "/system/bin/linker64" +#ifndef RUNTIME_ROOT_PREFIX +#define RUNTIME_ROOT_PREFIX "" +#endif +#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" #define CPP_SPEC "%{pthread:-D_REENTRANT}" @@ -36,38 +38,13 @@ %{mbig-endian:-EB} %{mlittle-endian:-EL} \ -maarch64linux%{mbig-endian:b}" +#define LINK_SPEC LINUX_TARGET_LINK_SPEC + #define TARGET_OS_CPP_BUILTINS() \ do \ { \ GNU_USER_TARGET_OS_CPP_BUILTINS(); \ - ANDROID_TARGET_OS_CPP_BUILTINS(); \ } \ while (0) -#undef LINK_SPEC -#define LINK_SPEC \ - LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ - LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) - -#undef CC1_SPEC -#define CC1_SPEC \ - LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ - GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC("-fpic")) - -#define CC1PLUS_SPEC \ - LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) - -#undef LIB_SPEC -#define LIB_SPEC \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ - GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) - -#undef STARTFILE_SPEC -#define STARTFILE_SPEC \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) - -#undef ENDFILE_SPEC -#define ENDFILE_SPEC \ - LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) - #endif /* GCC_AARCH64_LINUX_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def index 1d2cc7679..f9c436948 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def +++ b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def @@ -31,6 +31,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ VECTOR_MODES (FLOAT, 8); /* V2SF. */ VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */ +VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */ /* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */ INT_MODE (OI, 32); diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md index 73aee2c3d..1f827b57d 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md @@ -934,14 +934,22 @@ [(set_attr "type" "neon_minmax<q>")] ) -;; Move into low-half clearing high half to 0. +;; vec_concat gives a new vector with the low elements from operand 1, and +;; the high elements from operand 2. That is to say, given op1 = { a, b } +;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. +;; What that means, is that the RTL descriptions of the below patterns +;; need to change depending on endianness. -(define_insn "move_lo_quad_<mode>" +;; Move to the low architectural bits of the register. +;; On little-endian this is { operand, zeroes } +;; On big-endian this is { zeroes, operand } + +(define_insn "move_lo_quad_internal_<mode>" [(set (match_operand:VQ 0 "register_operand" "=w,w,w") (vec_concat:VQ (match_operand:<VHALF> 1 "register_operand" "w,r,r") (vec_duplicate:<VHALF> (const_int 0))))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ dup\\t%d0, %1.d[0] fmov\\t%d0, %1 @@ -952,7 +960,39 @@ (set_attr "length" "4")] ) -;; Move into high-half. +(define_insn "move_lo_quad_internal_be_<mode>" + [(set (match_operand:VQ 0 "register_operand" "=w,w,w") + (vec_concat:VQ + (vec_duplicate:<VHALF> (const_int 0)) + (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "@ + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" + [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") + (set_attr "simd" "yes,*,yes") + (set_attr "fp" "*,yes,*") + (set_attr "length" "4")] +) + +(define_expand "move_lo_quad_<mode>" + [(match_operand:VQ 0 "register_operand") + (match_operand:VQ 1 "register_operand")] + "TARGET_SIMD" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1])); + else + emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1])); + DONE; +} +) + +;; Move operand1 to the high architectural bits of the register, keeping +;; the low architectural bits of operand2. +;; For little-endian this is { operand2, operand1 } +;; For big-endian this is { operand1, operand2 } (define_insn "aarch64_simd_move_hi_quad_<mode>" [(set (match_operand:VQ 0 "register_operand" "+w,w") @@ -961,12 +1001,25 @@ (match_dup 0) (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) (match_operand:<VHALF> 1 "register_operand" "w,r")))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ ins\\t%0.d[1], %1.d[0] ins\\t%0.d[1], %1" - [(set_attr "type" "neon_ins") - (set_attr "length" "4")] + [(set_attr "type" "neon_ins")] +) + +(define_insn "aarch64_simd_move_hi_quad_be_<mode>" + [(set (match_operand:VQ 0 "register_operand" "+w,w") + (vec_concat:VQ + (match_operand:<VHALF> 1 "register_operand" "w,r") + (vec_select:<VHALF> + (match_dup 0) + (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "@ + ins\\t%0.d[1], %1.d[0] + ins\\t%0.d[1], %1" + [(set_attr "type" "neon_ins")] ) (define_expand "move_hi_quad_<mode>" @@ -974,9 +1027,13 @@ (match_operand:<VHALF> 1 "register_operand" "")] "TARGET_SIMD" { - rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); - emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], - operands[1], p)); + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, BYTES_BIG_ENDIAN); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], + operands[1], p)); + else + emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], + operands[1], p)); DONE; }) @@ -2321,12 +2378,44 @@ (vec_concat:<VDBL> (match_operand:VDIC 1 "register_operand" "w") (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "mov\\t%0.8b, %1.8b" [(set_attr "type" "neon_move<q>")] ) -(define_insn_and_split "aarch64_combine<mode>" +(define_insn "*aarch64_combinez_be<mode>" + [(set (match_operand:<VDBL> 0 "register_operand" "=&w") + (vec_concat:<VDBL> + (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz") + (match_operand:VDIC 1 "register_operand" "w")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "mov\\t%0.8b, %1.8b" + [(set_attr "type" "neon_move<q>")] +) + +(define_expand "aarch64_combine<mode>" + [(match_operand:<VDBL> 0 "register_operand") + (match_operand:VDC 1 "register_operand") + (match_operand:VDC 2 "register_operand")] + "TARGET_SIMD" +{ + rtx op1, op2; + if (BYTES_BIG_ENDIAN) + { + op1 = operands[2]; + op2 = operands[1]; + } + else + { + op1 = operands[1]; + op2 = operands[2]; + } + emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2)); + DONE; +} +) + +(define_insn_and_split "aarch64_combine_internal<mode>" [(set (match_operand:<VDBL> 0 "register_operand" "=&w") (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") (match_operand:VDC 2 "register_operand" "w")))] @@ -2335,16 +2424,19 @@ "&& reload_completed" [(const_int 0)] { - aarch64_split_simd_combine (operands[0], operands[1], operands[2]); + if (BYTES_BIG_ENDIAN) + aarch64_split_simd_combine (operands[0], operands[2], operands[1]); + else + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); DONE; } [(set_attr "type" "multiple")] ) (define_expand "aarch64_simd_combine<mode>" - [(set (match_operand:<VDBL> 0 "register_operand" "=&w") - (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") - (match_operand:VDC 2 "register_operand" "w")))] + [(match_operand:<VDBL> 0 "register_operand") + (match_operand:VDC 1 "register_operand") + (match_operand:VDC 2 "register_operand")] "TARGET_SIMD" { emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); @@ -2633,7 +2725,41 @@ ;; sq<r>dmulh_lane -(define_insn "aarch64_sq<r>dmulh_lane<mode>" +(define_expand "aarch64_sqdmulh_lane<mode>" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand:<VCOND> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane<mode>" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand:<VCOND> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2649,7 +2775,41 @@ [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] ) -(define_insn "aarch64_sq<r>dmulh_laneq<mode>" +(define_expand "aarch64_sqdmulh_laneq<mode>" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand:<VCONQ> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_laneq<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_laneq<mode>" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand:<VCONQ> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_laneq<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sq<r>dmulh_laneq<mode>_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2659,24 +2819,56 @@ VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] ) -(define_insn "aarch64_sq<r>dmulh_lane<mode>" +(define_expand "aarch64_sqdmulh_lane<mode>" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand:<VCOND> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane<mode>" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand:<VCOND> 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane<mode>_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sq<r>dmulh_lane<mode>_internal" [(set (match_operand:SD_HSI 0 "register_operand" "=w") (unspec:SD_HSI [(match_operand:SD_HSI 1 "register_operand" "w") (vec_select:<VEL> - (match_operand:<VCONQ> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); - operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] ) @@ -2712,7 +2904,31 @@ (sign_extend:<VWIDE> (vec_duplicate:VD_HSI (vec_select:<VEL> - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); + return + "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) )) (const_int 1))))] @@ -2735,7 +2951,30 @@ (match_operand:SD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (vec_select:<VEL> - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); + return + "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) ) (const_int 1))))] @@ -2752,11 +2991,12 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2767,12 +3007,13 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode)); - emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlal_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); DONE; @@ -2782,11 +3023,12 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2797,12 +3039,13 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "0") (match_operand:VSD_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode)); - emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlsl_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); DONE; @@ -2890,7 +3133,33 @@ (sign_extend:<VWIDE> (vec_duplicate:<VHALF> (vec_select:<VEL> - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]) + )))) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); + return + "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (SBINQOPS:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) )))) (const_int 1))))] @@ -2907,12 +3176,13 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2923,13 +3193,14 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode)); - emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; @@ -2939,12 +3210,13 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCOND> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2955,13 +3227,14 @@ [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:<VWIDE> 1 "register_operand" "w") (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:<VCON> 3 "register_operand" "<vwx>") + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (match_operand:SI 4 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode)); - emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); + emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; @@ -3041,7 +3314,28 @@ (sign_extend:<VWIDE> (vec_duplicate:VD_HSI (vec_select:<VEL> - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmull_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 1 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) (const_int 1)))] @@ -3061,7 +3355,27 @@ (match_operand:SD_HSI 1 "register_operand" "w")) (sign_extend:<VWIDE> (vec_select:<VEL> - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")])) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmull_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:SD_HSI 1 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")])) )) (const_int 1)))] @@ -3076,11 +3390,12 @@ (define_expand "aarch64_sqdmull_lane<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VSD_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3])); DONE; @@ -3089,12 +3404,13 @@ (define_expand "aarch64_sqdmull_laneq<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VD_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode)); - emit_insn (gen_aarch64_sqdmull_lane<mode>_internal + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmull_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3])); DONE; }) @@ -3143,7 +3459,7 @@ (define_expand "aarch64_sqdmull2<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "w")] + (match_operand:VQ_HSI 2 "register_operand" "w")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); @@ -3165,7 +3481,30 @@ (sign_extend:<VWIDE> (vec_duplicate:<VHALF> (vec_select:<VEL> - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); + return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; + } + [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmull2_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) (const_int 1)))] @@ -3180,12 +3519,13 @@ (define_expand "aarch64_sqdmull2_lane<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCOND> 2 "register_operand" "<vwx>") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], operands[2], operands[3], p)); @@ -3195,13 +3535,14 @@ (define_expand "aarch64_sqdmull2_laneq<mode>" [(match_operand:<VWIDE> 0 "register_operand" "=w") (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:<VCON> 2 "register_operand" "<vwx>") + (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); - emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c index 7b6c2b38e..bf35031ec 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64.c @@ -1405,6 +1405,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); int ncrn, nvrn, nregs; bool allocate_ncrn, allocate_nvrn; + HOST_WIDE_INT size; /* We need to do this once per argument. */ if (pcum->aapcs_arg_processed) @@ -1412,6 +1413,11 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, pcum->aapcs_arg_processed = true; + /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ + size + = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode), + UNITS_PER_WORD); + allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, mode, @@ -1462,9 +1468,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, } ncrn = pcum->aapcs_ncrn; - nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)) - + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - + nregs = size / UNITS_PER_WORD; /* C6 - C9. though the sign and zero extension semantics are handled elsewhere. This is the case where the argument fits @@ -1513,13 +1517,12 @@ aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, pcum->aapcs_nextncrn = NUM_ARG_REGS; /* The argument is passed on stack; record the needed number of words for - this argument (we can re-use NREGS) and align the total size if - necessary. */ + this argument and align the total size if necessary. */ on_stack: - pcum->aapcs_stack_words = nregs; + pcum->aapcs_stack_words = size / UNITS_PER_WORD; if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT) pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size, - 16 / UNITS_PER_WORD) + 1; + 16 / UNITS_PER_WORD); return; } @@ -6304,7 +6307,8 @@ aarch64_vector_mode_supported_p (enum machine_mode mode) || mode == V16QImode || mode == V2DImode || mode == V2SImode || mode == V4HImode || mode == V8QImode || mode == V2SFmode - || mode == V4SFmode || mode == V2DFmode)) + || mode == V4SFmode || mode == V2DFmode + || mode == V1DFmode)) return true; return false; diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md index c86a29d8e..df81045e9 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64.md @@ -2823,17 +2823,18 @@ ;; Arithmetic right shift using SISD or Integer instruction (define_insn "*aarch64_ashr_sisd_or_int_<mode>3" - [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r") (ashiftrt:GPI - (match_operand:GPI 1 "register_operand" "w,w,r") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,rUs<cmode>")))] + (match_operand:GPI 1 "register_operand" "w,w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,0,rUs<cmode>")))] "" "@ sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2 # + # asr\t%<w>0, %<w>1, %<w>2" - [(set_attr "simd" "yes,yes,no") - (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")] + [(set_attr "simd" "yes,yes,yes,no") + (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>,shift_reg")] ) (define_split @@ -2842,11 +2843,13 @@ (match_operand:DI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))] - "" + (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_SISD_SSHL))] +{ + operands[3] = gen_lowpart (QImode, operands[0]); +} ) (define_split @@ -2855,11 +2858,13 @@ (match_operand:SI 1 "aarch64_simd_register") (match_operand:QI 2 "aarch64_simd_register")))] "TARGET_SIMD && reload_completed" - [(set (match_dup 2) + [(set (match_dup 3) (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) (set (match_dup 0) - (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))] - "" + (unspec:SI [(match_dup 1) (match_dup 3)] UNSPEC_SSHL_2S))] +{ + operands[3] = gen_lowpart (QImode, operands[0]); +} ) (define_insn "*aarch64_sisd_ushl" @@ -3608,6 +3613,7 @@ (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")] UNSPEC_TLSDESC)) (clobber (reg:DI LR_REGNUM)) + (clobber (reg:CC CC_REGNUM)) (clobber (match_scratch:DI 1 "=r"))] "TARGET_TLS_DESC" "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h index b03d11422..c01669b2c 100644 --- a/gcc-4.9/gcc/config/aarch64/arm_neon.h +++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h @@ -21008,7 +21008,7 @@ vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, +vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, int const __d) { return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d); @@ -21030,8 +21030,7 @@ vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -21059,7 +21058,7 @@ vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, +vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, int const __d) { return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d); @@ -21081,8 +21080,7 @@ vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) @@ -21104,7 +21102,7 @@ vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d) +vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); } @@ -21116,7 +21114,7 @@ vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d) +vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d); } @@ -21136,7 +21134,7 @@ vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, +vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, int const __d) { return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d); @@ -21158,8 +21156,7 @@ vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -21187,7 +21184,7 @@ vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, +vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, int const __d) { return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d); @@ -21209,8 +21206,7 @@ vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d); + return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) @@ -21232,7 +21228,7 @@ vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d) +vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); } @@ -21244,7 +21240,7 @@ vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d) +vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d) { return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d); } @@ -21282,7 +21278,7 @@ vqdmulhh_s16 (int16x1_t __a, int16x1_t __b) } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); } @@ -21294,7 +21290,7 @@ vqdmulhs_s32 (int32x1_t __a, int32x1_t __b) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); } @@ -21314,7 +21310,7 @@ vqdmull_high_s16 (int16x8_t __a, int16x8_t __b) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c) +vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c) { return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c); } @@ -21334,8 +21330,7 @@ vqdmull_high_n_s16 (int16x8_t __a, int16_t __b) __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) { - int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c); + return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) @@ -21363,7 +21358,7 @@ vqdmull_high_s32 (int32x4_t __a, int32x4_t __b) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c) +vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c) { return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c); } @@ -21383,8 +21378,7 @@ vqdmull_high_n_s32 (int32x4_t __a, int32_t __b) __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) { - int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0))); - return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c); + return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) @@ -21406,7 +21400,7 @@ vqdmullh_s16 (int16x1_t __a, int16x1_t __b) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); } @@ -21418,7 +21412,7 @@ vqdmulls_s32 (int32x1_t __a, int32x1_t __b) } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); } @@ -21594,7 +21588,7 @@ vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b) } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) -vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); } @@ -21606,7 +21600,7 @@ vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b) } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) -vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c) { return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); } diff --git a/gcc-4.9/gcc/config/aarch64/iterators.md b/gcc-4.9/gcc/config/aarch64/iterators.md index f1339b8cc..e76e3ef10 100644 --- a/gcc-4.9/gcc/config/aarch64/iterators.md +++ b/gcc-4.9/gcc/config/aarch64/iterators.md @@ -396,14 +396,15 @@ (SI "SI") (HI "HI") (QI "QI")]) -;; Define container mode for lane selection. -(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI") +;; 64-bit container modes the inner or scalar source mode. +(define_mode_attr VCOND [(HI "V4HI") (SI "V2SI") + (V4HI "V4HI") (V8HI "V4HI") (V2SI "V2SI") (V4SI "V2SI") (DI "DI") (V2DI "DI") (V2SF "V2SF") (V4SF "V2SF") (V2DF "DF")]) -;; Define container mode for lane selection. +;; 128-bit container modes the inner or scalar source mode. (define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") (V4HI "V8HI") (V8HI "V8HI") (V2SI "V4SI") (V4SI "V4SI") @@ -412,15 +413,6 @@ (V2DF "V2DF") (SI "V4SI") (HI "V8HI") (QI "V16QI")]) -;; Define container mode for lane selection. -(define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI") - (V4HI "V8HI") (V8HI "V8HI") - (V2SI "V4SI") (V4SI "V4SI") - (DI "V2DI") (V2DI "V2DI") - (V2SF "V4SF") (V4SF "V4SF") - (V2DF "V2DF") (SI "V4SI") - (HI "V8HI") (QI "V16QI")]) - ;; Half modes of all vector modes. (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") (V4HI "V2HI") (V8HI "V4HI") diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c index dc07a02c0..d5c7908be 100644 --- a/gcc-4.9/gcc/config/alpha/alpha.c +++ b/gcc-4.9/gcc/config/alpha/alpha.c @@ -8715,6 +8715,11 @@ alpha_handle_trap_shadows (void) } break; + case BARRIER: + /* __builtin_unreachable can expand to no code at all, + leaving (barrier) RTXes in the instruction stream. */ + goto close_shadow_notrapb; + case JUMP_INSN: case CALL_INSN: case CODE_LABEL: @@ -8730,6 +8735,7 @@ alpha_handle_trap_shadows (void) n = emit_insn_before (gen_trapb (), i); PUT_MODE (n, TImode); PUT_MODE (i, TImode); + close_shadow_notrapb: trap_pending = 0; shadow.used.i = 0; shadow.used.fp = 0; diff --git a/gcc-4.9/gcc/config/arm/aout.h b/gcc-4.9/gcc/config/arm/aout.h index 51d32a9d4..c8f4e45c6 100644 --- a/gcc-4.9/gcc/config/arm/aout.h +++ b/gcc-4.9/gcc/config/arm/aout.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef ASM_APP_ON diff --git a/gcc-4.9/gcc/config/arm/arm-cores.def b/gcc-4.9/gcc/config/arm/arm-cores.def index 42f00b463..56041ec8b 100644 --- a/gcc-4.9/gcc/config/arm/arm-cores.def +++ b/gcc-4.9/gcc/config/arm/arm-cores.def @@ -14,8 +14,13 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Before using #include to read this file, define a macro: diff --git a/gcc-4.9/gcc/config/arm/arm-opts.h b/gcc-4.9/gcc/config/arm/arm-opts.h index a8393975a..21902940e 100644 --- a/gcc-4.9/gcc/config/arm/arm-opts.h +++ b/gcc-4.9/gcc/config/arm/arm-opts.h @@ -13,8 +13,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef ARM_OPTS_H diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c index 83763555c..3c237cb6d 100644 --- a/gcc-4.9/gcc/config/arm/arm.c +++ b/gcc-4.9/gcc/config/arm/arm.c @@ -16739,11 +16739,12 @@ thumb1_reorg (void) rtx prev, insn = BB_END (bb); bool insn_clobbered = false; - while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn)) + while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn)) insn = PREV_INSN (insn); /* Find the last cbranchsi4_insn in basic block BB. */ - if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) + if (insn == BB_HEAD (bb) + || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) continue; /* Get the register with which we are comparing. */ @@ -28210,9 +28211,13 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, fputs (":\n", file); if (flag_pic) { - /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */ + /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */ rtx tem = XEXP (DECL_RTL (function), 0); - tem = plus_constant (GET_MODE (tem), tem, -7); + /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC + pipeline offset is four rather than eight. Adjust the offset + accordingly. */ + tem = plus_constant (GET_MODE (tem), tem, + TARGET_THUMB1_ONLY ? -3 : -7); tem = gen_rtx_MINUS (GET_MODE (tem), tem, gen_rtx_SYMBOL_REF (Pmode, diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h index 4d9121436..ab5167a8b 100644 --- a/gcc-4.9/gcc/config/arm/arm.h +++ b/gcc-4.9/gcc/config/arm/arm.h @@ -17,8 +17,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef GCC_ARM_H diff --git a/gcc-4.9/gcc/config/arm/arm.md b/gcc-4.9/gcc/config/arm/arm.md index 662465f2a..467f9ce4e 100644 --- a/gcc-4.9/gcc/config/arm/arm.md +++ b/gcc-4.9/gcc/config/arm/arm.md @@ -75,6 +75,8 @@ ] ) +;; UNSPEC_VOLATILE Usage: + ;;--------------------------------------------------------------------------- ;; Attributes @@ -8349,8 +8351,8 @@ (define_insn_and_split "*arm_cmpdi_unsigned" [(set (reg:CC_CZ CC_REGNUM) - (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r") - (match_operand:DI 1 "arm_di_operand" "Py,r,rDi")))] + (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r,r") + (match_operand:DI 1 "arm_di_operand" "Py,r,Di,rDi")))] "TARGET_32BIT" "#" ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1" @@ -8370,9 +8372,9 @@ operands[1] = gen_lowpart (SImode, operands[1]); } [(set_attr "conds" "set") - (set_attr "enabled_for_depr_it" "yes,yes,no") - (set_attr "arch" "t2,t2,*") - (set_attr "length" "6,6,8") + (set_attr "enabled_for_depr_it" "yes,yes,no,*") + (set_attr "arch" "t2,t2,t2,a") + (set_attr "length" "6,6,10,8") (set_attr "type" "multiple")] ) @@ -9860,6 +9862,7 @@ "TARGET_32BIT" "%i1%?\\t%0, %2, %4%S3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "shift" "4") (set_attr "arch" "a,t2,t2,a") ;; Thumb2 doesn't allow the stack pointer to be used for diff --git a/gcc-4.9/gcc/config/arm/arm_neon.h b/gcc-4.9/gcc/config/arm/arm_neon.h index 37a6e611b..95735433d 100644 --- a/gcc-4.9/gcc/config/arm/arm_neon.h +++ b/gcc-4.9/gcc/config/arm/arm_neon.h @@ -1,5 +1,4 @@ -/* ARM NEON intrinsics include file. This file is generated automatically - using neon-gen.ml. Please do not edit manually. +/* ARM NEON intrinsics include file. Copyright (C) 2006-2014 Free Software Foundation, Inc. Contributed by CodeSourcery. @@ -7707,12 +7706,32 @@ vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); } +/* For big-endian, the shuffle masks for ZIP, UZP and TRN must be changed as + follows. (nelt = the number of elements within a vector.) + + Firstly, a value of N within a mask, becomes (N ^ (nelt - 1)), as gcc vector + extension's indexing scheme is reversed *within each vector* (relative to the + neon intrinsics view), but without changing which of the two vectors. + + Secondly, the elements within each mask are reversed, as the mask is itself a + vector, and will itself be loaded in reverse order (again, relative to the + neon intrinsics view, i.e. that would result from a "vld1" instruction). */ + __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) vtrn_s8 (int8x8_t __a, int8x8_t __b) { int8x8x2_t __rv; - __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7720,8 +7739,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) vtrn_s16 (int16x4_t __a, int16x4_t __b) { int16x4x2_t __rv; - __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7729,8 +7753,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) vtrn_u8 (uint8x8_t __a, uint8x8_t __b) { uint8x8x2_t __rv; - __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7738,8 +7771,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) vtrn_u16 (uint16x4_t __a, uint16x4_t __b) { uint16x4x2_t __rv; - __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7747,8 +7785,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) vtrn_p8 (poly8x8_t __a, poly8x8_t __b) { poly8x8x2_t __rv; - __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7756,8 +7803,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) vtrn_p16 (poly16x4_t __a, poly16x4_t __b) { poly16x4x2_t __rv; - __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7765,8 +7817,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) vtrn_s32 (int32x2_t __a, int32x2_t __b) { int32x2x2_t __rv; - __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7774,8 +7831,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vtrn_f32 (float32x2_t __a, float32x2_t __b) { float32x2x2_t __rv; - __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7783,8 +7845,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) vtrn_u32 (uint32x2_t __a, uint32x2_t __b) { uint32x2x2_t __rv; - __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7792,8 +7859,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vtrnq_s8 (int8x16_t __a, int8x16_t __b) { int8x16x2_t __rv; - __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); - __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#endif return __rv; } @@ -7801,8 +7877,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vtrnq_s16 (int16x8_t __a, int16x8_t __b) { int16x8x2_t __rv; - __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7810,8 +7895,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vtrnq_s32 (int32x4_t __a, int32x4_t __b) { int32x4x2_t __rv; - __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7819,8 +7909,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vtrnq_f32 (float32x4_t __a, float32x4_t __b) { float32x4x2_t __rv; - __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7828,8 +7923,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vtrnq_u8 (uint8x16_t __a, uint8x16_t __b) { uint8x16x2_t __rv; - __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); - __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#endif return __rv; } @@ -7837,8 +7941,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vtrnq_u16 (uint16x8_t __a, uint16x8_t __b) { uint16x8x2_t __rv; - __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7846,8 +7959,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vtrnq_u32 (uint32x4_t __a, uint32x4_t __b) { uint32x4x2_t __rv; - __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); - __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); +#endif return __rv; } @@ -7855,8 +7973,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vtrnq_p8 (poly8x16_t __a, poly8x16_t __b) { poly8x16x2_t __rv; - __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); - __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); +#endif return __rv; } @@ -7864,8 +7991,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vtrnq_p16 (poly16x8_t __a, poly16x8_t __b) { poly16x8x2_t __rv; - __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); - __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif return __rv; } @@ -7873,8 +8009,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) vzip_s8 (int8x8_t __a, int8x8_t __b) { int8x8x2_t __rv; - __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -7882,8 +8027,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) vzip_s16 (int16x4_t __a, int16x4_t __b) { int16x4x2_t __rv; - __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7891,8 +8041,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) vzip_u8 (uint8x8_t __a, uint8x8_t __b) { uint8x8x2_t __rv; - __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -7900,8 +8059,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) vzip_u16 (uint16x4_t __a, uint16x4_t __b) { uint16x4x2_t __rv; - __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7909,8 +8073,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) vzip_p8 (poly8x8_t __a, poly8x8_t __b) { poly8x8x2_t __rv; - __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -7918,8 +8091,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) vzip_p16 (poly16x4_t __a, poly16x4_t __b) { poly16x4x2_t __rv; - __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7927,8 +8105,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) vzip_s32 (int32x2_t __a, int32x2_t __b) { int32x2x2_t __rv; - __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7936,8 +8119,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vzip_f32 (float32x2_t __a, float32x2_t __b) { float32x2x2_t __rv; - __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7945,8 +8133,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) vzip_u32 (uint32x2_t __a, uint32x2_t __b) { uint32x2x2_t __rv; - __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -7954,8 +8147,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vzipq_s8 (int8x16_t __a, int8x16_t __b) { int8x16x2_t __rv; - __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); - __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#endif return __rv; } @@ -7963,8 +8165,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vzipq_s16 (int16x8_t __a, int16x8_t __b) { int16x8x2_t __rv; - __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -7972,8 +8183,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vzipq_s32 (int32x4_t __a, int32x4_t __b) { int32x4x2_t __rv; - __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7981,8 +8197,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vzipq_f32 (float32x4_t __a, float32x4_t __b) { float32x4x2_t __rv; - __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -7990,8 +8211,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vzipq_u8 (uint8x16_t __a, uint8x16_t __b) { uint8x16x2_t __rv; - __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); - __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#endif return __rv; } @@ -7999,8 +8229,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vzipq_u16 (uint16x8_t __a, uint16x8_t __b) { uint16x8x2_t __rv; - __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -8008,8 +8247,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vzipq_u32 (uint32x4_t __a, uint32x4_t __b) { uint32x4x2_t __rv; - __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); - __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); +#endif return __rv; } @@ -8017,8 +8261,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vzipq_p8 (poly8x16_t __a, poly8x16_t __b) { poly8x16x2_t __rv; - __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); - __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +#endif return __rv; } @@ -8026,8 +8279,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vzipq_p16 (poly16x8_t __a, poly16x8_t __b) { poly16x8x2_t __rv; - __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); - __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 12, 4, 13, 5, 14, 6, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 0, 9, 1, 10, 2, 11, 3 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif return __rv; } @@ -8035,8 +8297,17 @@ __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) vuzp_s8 (int8x8_t __a, int8x8_t __b) { int8x8x2_t __rv; - __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8044,8 +8315,13 @@ __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) vuzp_s16 (int16x4_t __a, int16x4_t __b) { int16x4x2_t __rv; - __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8053,8 +8329,13 @@ __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) vuzp_s32 (int32x2_t __a, int32x2_t __b) { int32x2x2_t __rv; - __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -8062,8 +8343,13 @@ __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vuzp_f32 (float32x2_t __a, float32x2_t __b) { float32x2x2_t __rv; - __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -8071,8 +8357,17 @@ __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) vuzp_u8 (uint8x8_t __a, uint8x8_t __b) { uint8x8x2_t __rv; - __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8080,8 +8375,13 @@ __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) vuzp_u16 (uint16x4_t __a, uint16x4_t __b) { uint16x4x2_t __rv; - __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8089,8 +8389,13 @@ __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) vuzp_u32 (uint32x2_t __a, uint32x2_t __b) { uint32x2x2_t __rv; - __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); - __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 3, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 2, 0 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); +#endif return __rv; } @@ -8098,8 +8403,17 @@ __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) vuzp_p8 (poly8x8_t __a, poly8x8_t __b) { poly8x8x2_t __rv; - __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8107,8 +8421,13 @@ __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) vuzp_p16 (poly16x4_t __a, poly16x4_t __b) { poly16x4x2_t __rv; - __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8116,8 +8435,17 @@ __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vuzpq_s8 (int8x16_t __a, int8x16_t __b) { int8x16x2_t __rv; - __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); - __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#endif return __rv; } @@ -8125,8 +8453,17 @@ __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vuzpq_s16 (int16x8_t __a, int16x8_t __b) { int16x8x2_t __rv; - __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8134,8 +8471,13 @@ __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vuzpq_s32 (int32x4_t __a, int32x4_t __b) { int32x4x2_t __rv; - __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8143,8 +8485,13 @@ __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vuzpq_f32 (float32x4_t __a, float32x4_t __b) { float32x4x2_t __rv; - __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8152,8 +8499,17 @@ __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vuzpq_u8 (uint8x16_t __a, uint8x16_t __b) { uint8x16x2_t __rv; - __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); - __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#endif return __rv; } @@ -8161,8 +8517,17 @@ __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vuzpq_u16 (uint16x8_t __a, uint16x8_t __b) { uint16x8x2_t __rv; - __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } @@ -8170,8 +8535,13 @@ __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vuzpq_u32 (uint32x4_t __a, uint32x4_t __b) { uint32x4x2_t __rv; - __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); - __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); +#endif return __rv; } @@ -8179,8 +8549,17 @@ __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vuzpq_p8 (poly8x16_t __a, poly8x16_t __b) { poly8x16x2_t __rv; - __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); - __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t) + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); +#endif return __rv; } @@ -8188,8 +8567,17 @@ __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vuzpq_p16 (poly16x8_t __a, poly16x8_t __b) { poly16x8x2_t __rv; - __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); - __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 9, 11, 13, 15, 1, 3, 5, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 8, 10, 12, 14, 0, 2, 4, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif return __rv; } diff --git a/gcc-4.9/gcc/config/arm/bpabi.h b/gcc-4.9/gcc/config/arm/bpabi.h index bc223f8e3..7a576ac46 100644 --- a/gcc-4.9/gcc/config/arm/bpabi.h +++ b/gcc-4.9/gcc/config/arm/bpabi.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* Use the AAPCS ABI by default. */ diff --git a/gcc-4.9/gcc/config/arm/elf.h b/gcc-4.9/gcc/config/arm/elf.h index 2edf520de..15a32fb8a 100644 --- a/gcc-4.9/gcc/config/arm/elf.h +++ b/gcc-4.9/gcc/config/arm/elf.h @@ -16,8 +16,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #ifndef OBJECT_FORMAT_ELF diff --git a/gcc-4.9/gcc/config/arm/linux-eabi.h b/gcc-4.9/gcc/config/arm/linux-eabi.h index 4d42cbfc8..350639f32 100644 --- a/gcc-4.9/gcc/config/arm/linux-eabi.h +++ b/gcc-4.9/gcc/config/arm/linux-eabi.h @@ -68,8 +68,9 @@ GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI. */ #undef GLIBC_DYNAMIC_LINKER -#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3" -#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3" + +#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.3" +#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT RUNTIME_ROOT_PREFIX "/lib/ld-linux-armhf.so.3" #define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT #define GLIBC_DYNAMIC_LINKER \ diff --git a/gcc-4.9/gcc/config/arm/linux-elf.h b/gcc-4.9/gcc/config/arm/linux-elf.h index 5dc3328e8..e825ae48c 100644 --- a/gcc-4.9/gcc/config/arm/linux-elf.h +++ b/gcc-4.9/gcc/config/arm/linux-elf.h @@ -14,8 +14,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* elfos.h should have already been included. Now just override @@ -57,7 +62,7 @@ #define LIBGCC_SPEC "%{mfloat-abi=soft*:-lfloat} -lgcc" -#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" +#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2" #define LINUX_TARGET_LINK_SPEC "%{h*} \ %{static:-Bstatic} \ diff --git a/gcc-4.9/gcc/config/arm/linux-gas.h b/gcc-4.9/gcc/config/arm/linux-gas.h index 52a739c26..1dd043782 100644 --- a/gcc-4.9/gcc/config/arm/linux-gas.h +++ b/gcc-4.9/gcc/config/arm/linux-gas.h @@ -15,8 +15,13 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ /* This is how we tell the assembler that a symbol is weak. diff --git a/gcc-4.9/gcc/config/arm/linux-grte.h b/gcc-4.9/gcc/config/arm/linux-grte.h new file mode 100644 index 000000000..7ee5806b7 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/linux-grte.h @@ -0,0 +1,27 @@ +/* Definitions for ARM Linux-based GRTE (Google RunTime Environment). + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Chris Demetriou. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#undef SUBSUBTARGET_EXTRA_SPECS +#define SUBSUBTARGET_EXTRA_SPECS LINUX_GRTE_EXTRA_SPECS diff --git a/gcc-4.9/gcc/config/arm/neon-docgen.ml b/gcc-4.9/gcc/config/arm/neon-docgen.ml deleted file mode 100644 index 5788a533e..000000000 --- a/gcc-4.9/gcc/config/arm/neon-docgen.ml +++ /dev/null @@ -1,424 +0,0 @@ -(* ARM NEON documentation generator. - - Copyright (C) 2006-2014 Free Software Foundation, Inc. - Contributed by CodeSourcery. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 3, or (at your option) any later - version. - - GCC is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see - <http://www.gnu.org/licenses/>. - - This is an O'Caml program. The O'Caml compiler is available from: - - http://caml.inria.fr/ - - Or from your favourite OS's friendly packaging system. Tested with version - 3.09.2, though other versions will probably work too. - - Compile with: - ocamlc -c neon.ml - ocamlc -o neon-docgen neon.cmo neon-docgen.ml - - Run with: - /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi -*) - -open Neon - -(* The combined "ops" and "reinterp" table. *) -let ops_reinterp = reinterp @ ops - -(* Helper functions for extracting things from the "ops" table. *) -let single_opcode desired_opcode () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - if opcode = desired_opcode then row :: got_so_far - else got_so_far - ) [] ops_reinterp - -let multiple_opcodes desired_opcodes () = - List.fold_left (fun got_so_far -> - fun desired_opcode -> - (single_opcode desired_opcode ()) @ got_so_far) - [] desired_opcodes - -let ldx_opcode number () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - match opcode with - Vldx n | Vldx_lane n | Vldx_dup n when n = number -> - row :: got_so_far - | _ -> got_so_far - ) [] ops_reinterp - -let stx_opcode number () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - match opcode with - Vstx n | Vstx_lane n when n = number -> - row :: got_so_far - | _ -> got_so_far - ) [] ops_reinterp - -let tbl_opcode () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - match opcode with - Vtbl _ -> row :: got_so_far - | _ -> got_so_far - ) [] ops_reinterp - -let tbx_opcode () = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (opcode, _, _, _, _, _) -> - match opcode with - Vtbx _ -> row :: got_so_far - | _ -> got_so_far - ) [] ops_reinterp - -(* The groups of intrinsics. *) -let intrinsic_groups = - [ "Addition", single_opcode Vadd; - "Multiplication", single_opcode Vmul; - "Multiply-accumulate", single_opcode Vmla; - "Multiply-subtract", single_opcode Vmls; - "Fused-multiply-accumulate", single_opcode Vfma; - "Fused-multiply-subtract", single_opcode Vfms; - "Round to integral (to nearest, ties to even)", single_opcode Vrintn; - "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta; - "Round to integral (towards +Inf)", single_opcode Vrintp; - "Round to integral (towards -Inf)", single_opcode Vrintm; - "Round to integral (towards 0)", single_opcode Vrintz; - "Subtraction", single_opcode Vsub; - "Comparison (equal-to)", single_opcode Vceq; - "Comparison (greater-than-or-equal-to)", single_opcode Vcge; - "Comparison (less-than-or-equal-to)", single_opcode Vcle; - "Comparison (greater-than)", single_opcode Vcgt; - "Comparison (less-than)", single_opcode Vclt; - "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage; - "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale; - "Comparison (absolute greater-than)", single_opcode Vcagt; - "Comparison (absolute less-than)", single_opcode Vcalt; - "Test bits", single_opcode Vtst; - "Absolute difference", single_opcode Vabd; - "Absolute difference and accumulate", single_opcode Vaba; - "Maximum", single_opcode Vmax; - "Minimum", single_opcode Vmin; - "Pairwise add", single_opcode Vpadd; - "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada; - "Folding maximum", single_opcode Vpmax; - "Folding minimum", single_opcode Vpmin; - "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts]; - "Vector shift left", single_opcode Vshl; - "Vector shift left by constant", single_opcode Vshl_n; - "Vector shift right by constant", single_opcode Vshr_n; - "Vector shift right by constant and accumulate", single_opcode Vsra_n; - "Vector shift right and insert", single_opcode Vsri; - "Vector shift left and insert", single_opcode Vsli; - "Absolute value", single_opcode Vabs; - "Negation", single_opcode Vneg; - "Bitwise not", single_opcode Vmvn; - "Count leading sign bits", single_opcode Vcls; - "Count leading zeros", single_opcode Vclz; - "Count number of set bits", single_opcode Vcnt; - "Reciprocal estimate", single_opcode Vrecpe; - "Reciprocal square-root estimate", single_opcode Vrsqrte; - "Get lanes from a vector", single_opcode Vget_lane; - "Set lanes in a vector", single_opcode Vset_lane; - "Create vector from literal bit pattern", single_opcode Vcreate; - "Set all lanes to the same value", - multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane]; - "Combining vectors", single_opcode Vcombine; - "Splitting vectors", multiple_opcodes [Vget_high; Vget_low]; - "Conversions", multiple_opcodes [Vcvt; Vcvt_n]; - "Move, single_opcode narrowing", single_opcode Vmovn; - "Move, single_opcode long", single_opcode Vmovl; - "Table lookup", tbl_opcode; - "Extended table lookup", tbx_opcode; - "Multiply, lane", single_opcode Vmul_lane; - "Long multiply, lane", single_opcode Vmull_lane; - "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane; - "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane; - "Multiply-accumulate, lane", single_opcode Vmla_lane; - "Multiply-subtract, lane", single_opcode Vmls_lane; - "Vector multiply by scalar", single_opcode Vmul_n; - "Vector long multiply by scalar", single_opcode Vmull_n; - "Vector saturating doubling long multiply by scalar", - single_opcode Vqdmull_n; - "Vector saturating doubling multiply high by scalar", - single_opcode Vqdmulh_n; - "Vector multiply-accumulate by scalar", single_opcode Vmla_n; - "Vector multiply-subtract by scalar", single_opcode Vmls_n; - "Vector extract", single_opcode Vext; - "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16]; - "Bit selection", single_opcode Vbsl; - "Transpose elements", single_opcode Vtrn; - "Zip elements", single_opcode Vzip; - "Unzip elements", single_opcode Vuzp; - "Element/structure loads, VLD1 variants", ldx_opcode 1; - "Element/structure stores, VST1 variants", stx_opcode 1; - "Element/structure loads, VLD2 variants", ldx_opcode 2; - "Element/structure stores, VST2 variants", stx_opcode 2; - "Element/structure loads, VLD3 variants", ldx_opcode 3; - "Element/structure stores, VST3 variants", stx_opcode 3; - "Element/structure loads, VLD4 variants", ldx_opcode 4; - "Element/structure stores, VST4 variants", stx_opcode 4; - "Logical operations (AND)", single_opcode Vand; - "Logical operations (OR)", single_opcode Vorr; - "Logical operations (exclusive OR)", single_opcode Veor; - "Logical operations (AND-NOT)", single_opcode Vbic; - "Logical operations (OR-NOT)", single_opcode Vorn; - "Reinterpret casts", single_opcode Vreinterp ] - -(* Given an intrinsic shape, produce a string to document the corresponding - operand shapes. *) -let rec analyze_shape shape = - let rec n_things n thing = - match n with - 0 -> [] - | n -> thing :: (n_things (n - 1) thing) - in - let rec analyze_shape_elt reg_no elt = - match elt with - Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}" - | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}" - | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}" - | Immed -> "#@var{0}" - | VecArray (1, elt) -> - let elt_regexp = analyze_shape_elt 0 elt in - "@{" ^ elt_regexp ^ "@}" - | VecArray (n, elt) -> - let rec f m = - match m with - 0 -> [] - | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1)) - in - let ops = List.rev (f n) in - "@{" ^ (commas (fun x -> x) ops "") ^ "@}" - | (PtrTo elt | CstPtrTo elt) -> - "[" ^ (analyze_shape_elt reg_no elt) ^ "]" - | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]" - | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]" - | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]" - | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts)) - in - match shape with - All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) "" - | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^ - ", " ^ (analyze_shape_elt 0 Dreg) - | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^ - (analyze_shape_elt 0 elt) - | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^ - ", " ^ (analyze_shape_elt 0 Dreg) - | Wide_noreg elt -> analyze_shape (Long_noreg elt) - | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^ - ", " ^ (analyze_shape_elt 0 Qreg) - | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) "" - | By_scalar Dreg -> - analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |]) - | By_scalar Qreg -> - analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |]) - | By_scalar _ -> assert false - | Wide_lane -> - analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) - | Wide_scalar -> - analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) - | Pair_result elt -> - let elt_regexp = analyze_shape_elt 0 elt in - let elt_regexp' = analyze_shape_elt 1 elt in - elt_regexp ^ ", " ^ elt_regexp' - | Unary_scalar _ -> "FIXME Unary_scalar" - | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |]) - | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |]) - | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |]) - -(* Document a single intrinsic. *) -let describe_intrinsic first chan - (elt_ty, (_, features, shape, name, munge, _)) = - let c_arity, new_elt_ty = munge shape elt_ty in - let c_types = strings_of_arity c_arity in - Printf.fprintf chan "@itemize @bullet\n"; - let item_code = if first then "@item" else "@itemx" in - Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types) - (intrinsic_name name) (string_of_elt elt_ty); - Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) ""); - if not (List.exists (fun feature -> feature = No_op) features) then - begin - let print_one_insn name = - Printf.fprintf chan "@code{"; - let no_suffix = (new_elt_ty = NoElts) in - let name_with_suffix = - if no_suffix then name - else name ^ "." ^ (string_of_elt_dots new_elt_ty) - in - let possible_operands = analyze_all_shapes features shape - analyze_shape - in - let rec print_one_possible_operand op = - Printf.fprintf chan "%s %s}" name_with_suffix op - in - (* If the intrinsic expands to multiple instructions, we assume - they are all of the same form. *) - print_one_possible_operand (List.hd possible_operands) - in - let rec print_insns names = - match names with - [] -> () - | [name] -> print_one_insn name - | name::names -> (print_one_insn name; - Printf.fprintf chan " @emph{or} "; - print_insns names) - in - let insn_names = get_insn_names features name in - Printf.fprintf chan "@*@emph{Form of expected instruction(s):} "; - print_insns insn_names; - Printf.fprintf chan "\n" - end; - Printf.fprintf chan "@end itemize\n"; - Printf.fprintf chan "\n\n" - -(* Document a group of intrinsics. *) -let document_group chan (group_title, group_extractor) = - (* Extract the rows in question from the ops table and then turn them - into a list of intrinsics. *) - let intrinsics = - List.fold_left (fun got_so_far -> - fun row -> - match row with - (_, _, _, _, _, elt_tys) -> - List.fold_left (fun got_so_far' -> - fun elt_ty -> - (elt_ty, row) :: got_so_far') - got_so_far elt_tys - ) [] (group_extractor ()) - in - (* Emit the title for this group. *) - Printf.fprintf chan "@subsubsection %s\n\n" group_title; - (* Emit a description of each intrinsic. *) - List.iter (describe_intrinsic true chan) intrinsics; - (* Close this group. *) - Printf.fprintf chan "\n\n" - -let gnu_header chan = - List.iter (fun s -> Printf.fprintf chan "%s\n" s) [ - "@c Copyright (C) 2006-2014 Free Software Foundation, Inc."; - "@c This is part of the GCC manual."; - "@c For copying conditions, see the file gcc.texi."; - ""; - "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml"; - "@c Please do not edit manually."] - -let crypto_doc = -" -@itemize @bullet -@item poly128_t vldrq_p128(poly128_t const *) -@end itemize - -@itemize @bullet -@item void vstrq_p128(poly128_t *, poly128_t) -@end itemize - -@itemize @bullet -@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t) -@end itemize - -@itemize @bullet -@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t) -@end itemize - -@itemize @bullet -@item uint32_t vsha1h_u32 (uint32_t) -@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}} -@end itemize - -@itemize @bullet -@item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) -@*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}} -@end itemize - -@itemize @bullet -@item poly128_t vmull_p64 (poly64_t a, poly64_t b) -@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}} -@end itemize - -@itemize @bullet -@item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b) -@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}} -@end itemize -" - -(* Program entry point. *) -let _ = - if Array.length Sys.argv <> 2 then - failwith "Usage: neon-docgen <output filename>" - else - let file = Sys.argv.(1) in - try - let chan = open_out file in - gnu_header chan; - List.iter (document_group chan) intrinsic_groups; - Printf.fprintf chan "%s\n" crypto_doc; - close_out chan - with Sys_error sys -> - failwith ("Could not create output file " ^ file ^ ": " ^ sys) diff --git a/gcc-4.9/gcc/config/arm/neon-gen.ml b/gcc-4.9/gcc/config/arm/neon-gen.ml deleted file mode 100644 index f3dd86b0a..000000000 --- a/gcc-4.9/gcc/config/arm/neon-gen.ml +++ /dev/null @@ -1,520 +0,0 @@ -(* Auto-generate ARM Neon intrinsics header file. - Copyright (C) 2006-2014 Free Software Foundation, Inc. - Contributed by CodeSourcery. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 3, or (at your option) any later - version. - - GCC is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see - <http://www.gnu.org/licenses/>. - - This is an O'Caml program. The O'Caml compiler is available from: - - http://caml.inria.fr/ - - Or from your favourite OS's friendly packaging system. Tested with version - 3.09.2, though other versions will probably work too. - - Compile with: - ocamlc -c neon.ml - ocamlc -o neon-gen neon.cmo neon-gen.ml - - Run with: - ./neon-gen > arm_neon.h -*) - -open Neon - -(* The format codes used in the following functions are documented at: - http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\ - #6_printflikefunctionsforprettyprinting - (one line, remove the backslash.) -*) - -(* Following functions can be used to approximate GNU indentation style. *) -let start_function () = - Format.printf "@[<v 0>"; - ref 0 - -let end_function nesting = - match !nesting with - 0 -> Format.printf "@;@;@]" - | _ -> failwith ("Bad nesting (ending function at level " - ^ (string_of_int !nesting) ^ ")") - -let open_braceblock nesting = - begin match !nesting with - 0 -> Format.printf "@,@<0>{@[<v 2>@," - | _ -> Format.printf "@,@[<v 2> @<0>{@[<v 2>@," - end; - incr nesting - -let close_braceblock nesting = - decr nesting; - match !nesting with - 0 -> Format.printf "@]@,@<0>}" - | _ -> Format.printf "@]@,@<0>}@]" - -let print_function arity fnname body = - let ffmt = start_function () in - Format.printf "__extension__ static __inline "; - let inl = "__attribute__ ((__always_inline__))" in - begin match arity with - Arity0 ret -> - Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname - | Arity1 (ret, arg0) -> - Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname - (string_of_vectype arg0) - | Arity2 (ret, arg0, arg1) -> - Format.printf "%s %s@,%s (%s __a, %s __b)" - (string_of_vectype ret) inl fnname (string_of_vectype arg0) - (string_of_vectype arg1) - | Arity3 (ret, arg0, arg1, arg2) -> - Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)" - (string_of_vectype ret) inl fnname (string_of_vectype arg0) - (string_of_vectype arg1) (string_of_vectype arg2) - | Arity4 (ret, arg0, arg1, arg2, arg3) -> - Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)" - (string_of_vectype ret) inl fnname (string_of_vectype arg0) - (string_of_vectype arg1) (string_of_vectype arg2) - (string_of_vectype arg3) - end; - open_braceblock ffmt; - let rec print_lines = function - [] -> () - | "" :: lines -> print_lines lines - | [line] -> Format.printf "%s" line - | line::lines -> Format.printf "%s@," line ; print_lines lines in - print_lines body; - close_braceblock ffmt; - end_function ffmt - -let union_string num elts base = - let itype = inttype_for_array num elts in - let iname = string_of_inttype itype - and sname = string_of_vectype (T_arrayof (num, elts)) in - Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base - -let rec signed_ctype = function - T_uint8x8 | T_poly8x8 -> T_int8x8 - | T_uint8x16 | T_poly8x16 -> T_int8x16 - | T_uint16x4 | T_poly16x4 -> T_int16x4 - | T_uint16x8 | T_poly16x8 -> T_int16x8 - | T_uint32x2 -> T_int32x2 - | T_uint32x4 -> T_int32x4 - | T_uint64x1 -> T_int64x1 - | T_uint64x2 -> T_int64x2 - | T_poly64x2 -> T_int64x2 - (* Cast to types defined by mode in arm.c, not random types pulled in from - the <stdint.h> header in use. This fixes incompatible pointer errors when - compiling with C++. *) - | T_uint8 | T_int8 -> T_intQI - | T_uint16 | T_int16 -> T_intHI - | T_uint32 | T_int32 -> T_intSI - | T_uint64 | T_int64 -> T_intDI - | T_float16 -> T_floatHF - | T_float32 -> T_floatSF - | T_poly8 -> T_intQI - | T_poly16 -> T_intHI - | T_poly64 -> T_intDI - | T_poly128 -> T_intTI - | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) - | T_ptrto elt -> T_ptrto (signed_ctype elt) - | T_const elt -> T_const (signed_ctype elt) - | x -> x - -let add_cast ctype cval = - let stype = signed_ctype ctype in - if ctype <> stype then - Printf.sprintf "(%s) %s" (string_of_vectype stype) cval - else - cval - -let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")" - -(* Return a tuple of a list of declarations to go at the start of the function, - and a list of statements needed to return THING. *) -let return arity thing = - match arity with - Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) - | Arity4 (ret, _, _, _, _) -> - begin match ret with - T_arrayof (num, vec) -> - let uname = union_string num vec "__rv" in - [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"] - | T_void -> - [], [thing ^ ";"] - | _ -> - [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"] - end - -let mask_shape_for_shuffle = function - All (num, reg) -> All (num, reg) - | Pair_result reg -> All (2, reg) - | _ -> failwith "mask_for_shuffle" - -let mask_elems shuffle shape elttype part = - let elem_size = elt_width elttype in - let num_elems = - match regmap shape 0 with - Dreg -> 64 / elem_size - | Qreg -> 128 / elem_size - | _ -> failwith "mask_elems" in - shuffle elem_size num_elems part - -(* Return a tuple of a list of declarations 0and a list of statements needed - to implement an intrinsic using __builtin_shuffle. SHUFFLE is a function - which returns a list of elements suitable for using as a mask. *) - -let shuffle_fn shuffle shape arity elttype = - let mshape = mask_shape_for_shuffle shape in - let masktype = type_for_elt mshape (unsigned_of_elt elttype) 0 in - let masktype_str = string_of_vectype masktype in - let shuffle_res = type_for_elt mshape elttype 0 in - let shuffle_res_str = string_of_vectype shuffle_res in - match arity with - Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) - | Arity4 (ret, _, _, _, _) -> - begin match ret with - T_arrayof (num, vec) -> - let elems1 = mask_elems shuffle mshape elttype `lo - and elems2 = mask_elems shuffle mshape elttype `hi in - let mask1 = (String.concat ", " (List.map string_of_int elems1)) - and mask2 = (String.concat ", " (List.map string_of_int elems2)) in - let shuf1 = Printf.sprintf - "__rv.val[0] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });" - shuffle_res_str masktype_str mask1 - and shuf2 = Printf.sprintf - "__rv.val[1] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });" - shuffle_res_str masktype_str mask2 in - [Printf.sprintf "%s __rv;" (string_of_vectype ret);], - [shuf1; shuf2; "return __rv;"] - | _ -> - let elems = mask_elems shuffle mshape elttype `lo in - let mask = (String.concat ", " (List.map string_of_int elems)) in - let shuf = Printf.sprintf - "return (%s) __builtin_shuffle (__a, (%s) { %s });" shuffle_res_str masktype_str mask in - [""], - [shuf] - end - -let rec element_type ctype = - match ctype with - T_arrayof (_, v) -> element_type v - | _ -> ctype - -let params ps = - let pdecls = ref [] in - let ptype t p = - match t with - T_arrayof (num, elts) -> - let uname = union_string num elts (p ^ "u") in - let decl = Printf.sprintf "%s = { %s };" uname p in - pdecls := decl :: !pdecls; - p ^ "u.__o" - | _ -> add_cast t p in - let plist = match ps with - Arity0 _ -> [] - | Arity1 (_, t1) -> [ptype t1 "__a"] - | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"] - | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"] - | Arity4 (_, t1, t2, t3, t4) -> - [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in - !pdecls, plist - -let modify_params features plist = - let is_flipped = - List.exists (function Flipped _ -> true | _ -> false) features in - if is_flipped then |