diff options
Diffstat (limited to 'gcc-4.8/gcc/ipa-inline.c')
-rw-r--r-- | gcc-4.8/gcc/ipa-inline.c | 2137 |
1 files changed, 2137 insertions, 0 deletions
diff --git a/gcc-4.8/gcc/ipa-inline.c b/gcc-4.8/gcc/ipa-inline.c new file mode 100644 index 000000000..90b2a13fc --- /dev/null +++ b/gcc-4.8/gcc/ipa-inline.c @@ -0,0 +1,2137 @@ +/* Inlining decision heuristics. + Copyright (C) 2003-2013 Free Software Foundation, Inc. + Contributed by Jan Hubicka + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Inlining decision heuristics + + The implementation of inliner is organized as follows: + + inlining heuristics limits + + can_inline_edge_p allow to check that particular inlining is allowed + by the limits specified by user (allowed function growth, growth and so + on). + + Functions are inlined when it is obvious the result is profitable (such + as functions called once or when inlining reduce code size). + In addition to that we perform inlining of small functions and recursive + inlining. + + inlining heuristics + + The inliner itself is split into two passes: + + pass_early_inlining + + Simple local inlining pass inlining callees into current function. + This pass makes no use of whole unit analysis and thus it can do only + very simple decisions based on local properties. + + The strength of the pass is that it is run in topological order + (reverse postorder) on the callgraph. Functions are converted into SSA + form just before this pass and optimized subsequently. As a result, the + callees of the function seen by the early inliner was already optimized + and results of early inlining adds a lot of optimization opportunities + for the local optimization. + + The pass handle the obvious inlining decisions within the compilation + unit - inlining auto inline functions, inlining for size and + flattening. + + main strength of the pass is the ability to eliminate abstraction + penalty in C++ code (via combination of inlining and early + optimization) and thus improve quality of analysis done by real IPA + optimizers. + + Because of lack of whole unit knowledge, the pass can not really make + good code size/performance tradeoffs. It however does very simple + speculative inlining allowing code size to grow by + EARLY_INLINING_INSNS when callee is leaf function. In this case the + optimizations performed later are very likely to eliminate the cost. + + pass_ipa_inline + + This is the real inliner able to handle inlining with whole program + knowledge. It performs following steps: + + 1) inlining of small functions. This is implemented by greedy + algorithm ordering all inlinable cgraph edges by their badness and + inlining them in this order as long as inline limits allows doing so. + + This heuristics is not very good on inlining recursive calls. Recursive + calls can be inlined with results similar to loop unrolling. To do so, + special purpose recursive inliner is executed on function when + recursive edge is met as viable candidate. + + 2) Unreachable functions are removed from callgraph. Inlining leads + to devirtualization and other modification of callgraph so functions + may become unreachable during the process. Also functions declared as + extern inline or virtual functions are removed, since after inlining + we no longer need the offline bodies. + + 3) Functions called once and not exported from the unit are inlined. + This should almost always lead to reduction of code size by eliminating + the need for offline copy of the function. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "tree-inline.h" +#include "langhooks.h" +#include "flags.h" +#include "cgraph.h" +#include "diagnostic.h" +#include "gimple-pretty-print.h" +#include "params.h" +#include "fibheap.h" +#include "intl.h" +#include "tree-pass.h" +#include "coverage.h" +#include "ggc.h" +#include "rtl.h" +#include "tree-flow.h" +#include "ipa-prop.h" +#include "except.h" +#include "target.h" +#include "ipa-inline.h" +#include "ipa-utils.h" + +/* Statistics we collect about inlining algorithm. */ +static int overall_size; +static gcov_type max_count; + +/* Return false when inlining edge E would lead to violating + limits on function unit growth or stack usage growth. + + The relative function body growth limit is present generally + to avoid problems with non-linear behavior of the compiler. + To allow inlining huge functions into tiny wrapper, the limit + is always based on the bigger of the two functions considered. + + For stack growth limits we always base the growth in stack usage + of the callers. We want to prevent applications from segfaulting + on stack overflow when functions with huge stack frames gets + inlined. */ + +static bool +caller_growth_limits (struct cgraph_edge *e) +{ + struct cgraph_node *to = e->caller; + struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL); + int newsize; + int limit = 0; + HOST_WIDE_INT stack_size_limit = 0, inlined_stack; + struct inline_summary *info, *what_info, *outer_info = inline_summary (to); + + /* Look for function e->caller is inlined to. While doing + so work out the largest function body on the way. As + described above, we want to base our function growth + limits based on that. Not on the self size of the + outer function, not on the self size of inline code + we immediately inline to. This is the most relaxed + interpretation of the rule "do not grow large functions + too much in order to prevent compiler from exploding". */ + while (true) + { + info = inline_summary (to); + if (limit < info->self_size) + limit = info->self_size; + if (stack_size_limit < info->estimated_self_stack_size) + stack_size_limit = info->estimated_self_stack_size; + if (to->global.inlined_to) + to = to->callers->caller; + else + break; + } + + what_info = inline_summary (what); + + if (limit < what_info->self_size) + limit = what_info->self_size; + + limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100; + + /* Check the size after inlining against the function limits. But allow + the function to shrink if it went over the limits by forced inlining. */ + newsize = estimate_size_after_inlining (to, e); + if (newsize >= info->size + && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS) + && newsize > limit) + { + e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT; + return false; + } + + if (!what_info->estimated_stack_size) + return true; + + /* FIXME: Stack size limit often prevents inlining in Fortran programs + due to large i/o datastructures used by the Fortran front-end. + We ought to ignore this limit when we know that the edge is executed + on every invocation of the caller (i.e. its call statement dominates + exit block). We do not track this information, yet. */ + stack_size_limit += ((gcov_type)stack_size_limit + * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100); + + inlined_stack = (outer_info->stack_frame_offset + + outer_info->estimated_self_stack_size + + what_info->estimated_stack_size); + /* Check new stack consumption with stack consumption at the place + stack is used. */ + if (inlined_stack > stack_size_limit + /* If function already has large stack usage from sibling + inline call, we can inline, too. + This bit overoptimistically assume that we are good at stack + packing. */ + && inlined_stack > info->estimated_stack_size + && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME)) + { + e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT; + return false; + } + return true; +} + +/* Dump info about why inlining has failed. */ + +static void +report_inline_failed_reason (struct cgraph_edge *e) +{ + if (dump_file) + { + fprintf (dump_file, " not inlinable: %s/%i -> %s/%i, %s\n", + xstrdup (cgraph_node_name (e->caller)), e->caller->uid, + xstrdup (cgraph_node_name (e->callee)), e->callee->uid, + cgraph_inline_failed_string (e->inline_failed)); + } +} + +/* Decide if we can inline the edge and possibly update + inline_failed reason. + We check whether inlining is possible at all and whether + caller growth limits allow doing so. + + if REPORT is true, output reason to the dump file. */ + +static bool +can_inline_edge_p (struct cgraph_edge *e, bool report) +{ + bool inlinable = true; + enum availability avail; + struct cgraph_node *callee + = cgraph_function_or_thunk_node (e->callee, &avail); + tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->symbol.decl); + tree callee_tree + = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->symbol.decl) : NULL; + struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->symbol.decl); + struct function *callee_cfun + = callee ? DECL_STRUCT_FUNCTION (callee->symbol.decl) : NULL; + + if (!caller_cfun && e->caller->clone_of) + caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->symbol.decl); + + if (!callee_cfun && callee && callee->clone_of) + callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->symbol.decl); + + gcc_assert (e->inline_failed); + + if (!callee || !callee->analyzed) + { + e->inline_failed = CIF_BODY_NOT_AVAILABLE; + inlinable = false; + } + else if (!inline_summary (callee)->inlinable) + { + e->inline_failed = CIF_FUNCTION_NOT_INLINABLE; + inlinable = false; + } + else if (avail <= AVAIL_OVERWRITABLE) + { + e->inline_failed = CIF_OVERWRITABLE; + return false; + } + else if (e->call_stmt_cannot_inline_p) + { + e->inline_failed = CIF_MISMATCHED_ARGUMENTS; + inlinable = false; + } + /* Don't inline if the functions have different EH personalities. */ + else if (DECL_FUNCTION_PERSONALITY (e->caller->symbol.decl) + && DECL_FUNCTION_PERSONALITY (callee->symbol.decl) + && (DECL_FUNCTION_PERSONALITY (e->caller->symbol.decl) + != DECL_FUNCTION_PERSONALITY (callee->symbol.decl))) + { + e->inline_failed = CIF_EH_PERSONALITY; + inlinable = false; + } + /* TM pure functions should not be inlined into non-TM_pure + functions. */ + else if (is_tm_pure (callee->symbol.decl) + && !is_tm_pure (e->caller->symbol.decl)) + { + e->inline_failed = CIF_UNSPECIFIED; + inlinable = false; + } + /* Don't inline if the callee can throw non-call exceptions but the + caller cannot. + FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing. + Move the flag into cgraph node or mirror it in the inline summary. */ + else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions + && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions)) + { + e->inline_failed = CIF_NON_CALL_EXCEPTIONS; + inlinable = false; + } + /* Check compatibility of target optimization options. */ + else if (!targetm.target_option.can_inline_p (e->caller->symbol.decl, + callee->symbol.decl)) + { + e->inline_failed = CIF_TARGET_OPTION_MISMATCH; + inlinable = false; + } + /* Check if caller growth allows the inlining. */ + else if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl) + && !lookup_attribute ("flatten", + DECL_ATTRIBUTES + (e->caller->global.inlined_to + ? e->caller->global.inlined_to->symbol.decl + : e->caller->symbol.decl)) + && !caller_growth_limits (e)) + inlinable = false; + /* Don't inline a function with a higher optimization level than the + caller. FIXME: this is really just tip of iceberg of handling + optimization attribute. */ + else if (caller_tree != callee_tree) + { + struct cl_optimization *caller_opt + = TREE_OPTIMIZATION ((caller_tree) + ? caller_tree + : optimization_default_node); + + struct cl_optimization *callee_opt + = TREE_OPTIMIZATION ((callee_tree) + ? callee_tree + : optimization_default_node); + + if (((caller_opt->x_optimize > callee_opt->x_optimize) + || (caller_opt->x_optimize_size != callee_opt->x_optimize_size)) + /* gcc.dg/pr43564.c. Look at forced inline even in -O0. */ + && !DECL_DISREGARD_INLINE_LIMITS (e->callee->symbol.decl)) + { + e->inline_failed = CIF_OPTIMIZATION_MISMATCH; + inlinable = false; + } + } + + if (!inlinable && report) + report_inline_failed_reason (e); + return inlinable; +} + + +/* Return true if the edge E is inlinable during early inlining. */ + +static bool +can_early_inline_edge_p (struct cgraph_edge *e) +{ + struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, + NULL); + /* Early inliner might get called at WPA stage when IPA pass adds new + function. In this case we can not really do any of early inlining + because function bodies are missing. */ + if (!gimple_has_body_p (callee->symbol.decl)) + { + e->inline_failed = CIF_BODY_NOT_AVAILABLE; + return false; + } + /* In early inliner some of callees may not be in SSA form yet + (i.e. the callgraph is cyclic and we did not process + the callee by early inliner, yet). We don't have CIF code for this + case; later we will re-do the decision in the real inliner. */ + if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->symbol.decl)) + || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->symbol.decl))) + { + if (dump_file) + fprintf (dump_file, " edge not inlinable: not in SSA form\n"); + return false; + } + if (!can_inline_edge_p (e, true)) + return false; + return true; +} + + +/* Return number of calls in N. Ignore cheap builtins. */ + +static int +num_calls (struct cgraph_node *n) +{ + struct cgraph_edge *e; + int num = 0; + + for (e = n->callees; e; e = e->next_callee) + if (!is_inexpensive_builtin (e->callee->symbol.decl)) + num++; + return num; +} + + +/* Return true if we are interested in inlining small function. */ + +static bool +want_early_inline_function_p (struct cgraph_edge *e) +{ + bool want_inline = true; + struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL); + + if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)) + ; + else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl) + && !flag_inline_small_functions) + { + e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE; + report_inline_failed_reason (e); + want_inline = false; + } + else + { + int growth = estimate_edge_growth (e); + int n; + + if (growth <= 0) + ; + else if (!cgraph_maybe_hot_edge_p (e) + && growth > 0) + { + if (dump_file) + fprintf (dump_file, " will not early inline: %s/%i->%s/%i, " + "call is cold and code would grow by %i\n", + xstrdup (cgraph_node_name (e->caller)), e->caller->uid, + xstrdup (cgraph_node_name (callee)), callee->uid, + growth); + want_inline = false; + } + else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS)) + { + if (dump_file) + fprintf (dump_file, " will not early inline: %s/%i->%s/%i, " + "growth %i exceeds --param early-inlining-insns\n", + xstrdup (cgraph_node_name (e->caller)), e->caller->uid, + xstrdup (cgraph_node_name (callee)), callee->uid, + growth); + want_inline = false; + } + else if ((n = num_calls (callee)) != 0 + && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS)) + { + if (dump_file) + fprintf (dump_file, " will not early inline: %s/%i->%s/%i, " + "growth %i exceeds --param early-inlining-insns " + "divided by number of calls\n", + xstrdup (cgraph_node_name (e->caller)), e->caller->uid, + xstrdup (cgraph_node_name (callee)), callee->uid, + growth); + want_inline = false; + } + } + return want_inline; +} + +/* Compute time of the edge->caller + edge->callee execution when inlining + does not happen. */ + +inline gcov_type +compute_uninlined_call_time (struct inline_summary *callee_info, + struct cgraph_edge *edge) +{ + gcov_type uninlined_call_time = + RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1), + CGRAPH_FREQ_BASE); + gcov_type caller_time = inline_summary (edge->caller->global.inlined_to + ? edge->caller->global.inlined_to + : edge->caller)->time; + return uninlined_call_time + caller_time; +} + +/* Same as compute_uinlined_call_time but compute time when inlining + does happen. */ + +inline gcov_type +compute_inlined_call_time (struct cgraph_edge *edge, + int edge_time) +{ + gcov_type caller_time = inline_summary (edge->caller->global.inlined_to + ? edge->caller->global.inlined_to + : edge->caller)->time; + gcov_type time = (caller_time + + RDIV (((gcov_type) edge_time + - inline_edge_summary (edge)->call_stmt_time) + * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE)); + /* Possible one roundoff error, but watch for overflows. */ + gcc_checking_assert (time >= INT_MIN / 2); + if (time < 0) + time = 0; + return time; +} + +/* Return true if the speedup for inlining E is bigger than + PARAM_MAX_INLINE_MIN_SPEEDUP. */ + +static bool +big_speedup_p (struct cgraph_edge *e) +{ + gcov_type time = compute_uninlined_call_time (inline_summary (e->callee), + e); + gcov_type inlined_time = compute_inlined_call_time (e, + estimate_edge_time (e)); + if (time - inlined_time + > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100)) + return true; + return false; +} + +/* Return true if we are interested in inlining small function. + When REPORT is true, report reason to dump file. */ + +static bool +want_inline_small_function_p (struct cgraph_edge *e, bool report) +{ + bool want_inline = true; + struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL); + + if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)) + ; + else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl) + && !flag_inline_small_functions) + { + e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE; + want_inline = false; + } + else + { + int growth = estimate_edge_growth (e); + inline_hints hints = estimate_edge_hints (e); + bool big_speedup = big_speedup_p (e); + + if (growth <= 0) + ; + /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when + hints suggests that inlining given function is very profitable. */ + else if (DECL_DECLARED_INLINE_P (callee->symbol.decl) + && growth >= MAX_INLINE_INSNS_SINGLE + && !big_speedup + && !(hints & (INLINE_HINT_indirect_call + | INLINE_HINT_loop_iterations + | INLINE_HINT_array_index + | INLINE_HINT_loop_stride))) + { + e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT; + want_inline = false; + } + /* Before giving up based on fact that caller size will grow, allow + functions that are called few times and eliminating the offline + copy will lead to overall code size reduction. + Not all of these will be handled by subsequent inlining of functions + called once: in particular weak functions are not handled or funcitons + that inline to multiple calls but a lot of bodies is optimized out. + Finally we want to inline earlier to allow inlining of callbacks. + + This is slightly wrong on aggressive side: it is entirely possible + that function is called many times with a context where inlining + reduces code size and few times with a context where inlining increase + code size. Resoluting growth estimate will be negative even if it + would make more sense to keep offline copy and do not inline into the + call sites that makes the code size grow. + + When badness orders the calls in a way that code reducing calls come + first, this situation is not a problem at all: after inlining all + "good" calls, we will realize that keeping the function around is + better. */ + else if (growth <= MAX_INLINE_INSNS_SINGLE + /* Unlike for functions called once, we play unsafe with + COMDATs. We can allow that since we know functions + in consideration are small (and thus risk is small) and + moreover grow estimates already accounts that COMDAT + functions may or may not disappear when eliminated from + current unit. With good probability making aggressive + choice in all units is going to make overall program + smaller. + + Consequently we ask cgraph_can_remove_if_no_direct_calls_p + instead of + cgraph_will_be_removed_from_program_if_no_direct_calls */ + && !DECL_EXTERNAL (callee->symbol.decl) + && cgraph_can_remove_if_no_direct_calls_p (callee) + && estimate_growth (callee) <= 0) + ; + else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl) + && !flag_inline_functions) + { + e->inline_failed = CIF_NOT_DECLARED_INLINED; + want_inline = false; + } + /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline + Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that + inlining given function is very profitable. */ + else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl) + && !big_speedup + && growth >= ((hints & (INLINE_HINT_indirect_call + | INLINE_HINT_loop_iterations + | INLINE_HINT_array_index + | INLINE_HINT_loop_stride)) + ? MAX (MAX_INLINE_INSNS_AUTO, + MAX_INLINE_INSNS_SINGLE) + : MAX_INLINE_INSNS_AUTO)) + { + e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; + want_inline = false; + } + /* If call is cold, do not inline when function body would grow. */ + else if (!cgraph_maybe_hot_edge_p (e)) + { + e->inline_failed = CIF_UNLIKELY_CALL; + want_inline = false; + } + } + if (!want_inline && report) + report_inline_failed_reason (e); + return want_inline; +} + +/* EDGE is self recursive edge. + We hand two cases - when function A is inlining into itself + or when function A is being inlined into another inliner copy of function + A within function B. + + In first case OUTER_NODE points to the toplevel copy of A, while + in the second case OUTER_NODE points to the outermost copy of A in B. + + In both cases we want to be extra selective since + inlining the call will just introduce new recursive calls to appear. */ + +static bool +want_inline_self_recursive_call_p (struct cgraph_edge *edge, + struct cgraph_node *outer_node, + bool peeling, + int depth) +{ + char const *reason = NULL; + bool want_inline = true; + int caller_freq = CGRAPH_FREQ_BASE; + int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO); + + if (DECL_DECLARED_INLINE_P (edge->caller->symbol.decl)) + max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH); + + if (!cgraph_maybe_hot_edge_p (edge)) + { + reason = "recursive call is cold"; + want_inline = false; + } + else if (max_count && !outer_node->count) + { + reason = "not executed in profile"; + want_inline = false; + } + else if (depth > max_depth) + { + reason = "--param max-inline-recursive-depth exceeded."; + want_inline = false; + } + + if (outer_node->global.inlined_to) + caller_freq = outer_node->callers->frequency; + + if (!want_inline) + ; + /* Inlining of self recursive function into copy of itself within other function + is transformation similar to loop peeling. + + Peeling is profitable if we can inline enough copies to make probability + of actual call to the self recursive function very small. Be sure that + the probability of recursion is small. + + We ensure that the frequency of recursing is at most 1 - (1/max_depth). + This way the expected number of recision is at most max_depth. */ + else if (peeling) + { + int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1) + / max_depth); + int i; + for (i = 1; i < depth; i++) + max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE; + if (max_count + && (edge->count * CGRAPH_FREQ_BASE / outer_node->count + >= max_prob)) + { + reason = "profile of recursive call is too large"; + want_inline = false; + } + if (!max_count + && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq + >= max_prob)) + { + reason = "frequency of recursive call is too large"; + want_inline = false; + } + } + /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion + depth is large. We reduce function call overhead and increase chances that + things fit in hardware return predictor. + + Recursive inlining might however increase cost of stack frame setup + actually slowing down functions whose recursion tree is wide rather than + deep. + + Deciding reliably on when to do recursive inlining without profile feedback + is tricky. For now we disable recursive inlining when probability of self + recursion is low. + + Recursive inlining of self recursive call within loop also results in large loop + depths that generally optimize badly. We may want to throttle down inlining + in those cases. In particular this seems to happen in one of libstdc++ rb tree + methods. */ + else + { + if (max_count + && (edge->count * 100 / outer_node->count + <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))) + { + reason = "profile of recursive call is too small"; + want_inline = false; + } + else if (!max_count + && (edge->frequency * 100 / caller_freq + <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))) + { + reason = "frequency of recursive call is too small"; + want_inline = false; + } + } + if (!want_inline && dump_file) + fprintf (dump_file, " not inlining recursively: %s\n", reason); + return want_inline; +} + +/* Return true when NODE has caller other than EDGE. + Worker for cgraph_for_node_and_aliases. */ + +static bool +check_caller_edge (struct cgraph_node *node, void *edge) +{ + return (node->callers + && node->callers != edge); +} + + +/* Decide if inlining NODE would reduce unit size by eliminating + the offline copy of function. + When COLD is true the cold calls are considered, too. */ + +static bool +want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold) +{ + struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL); + struct cgraph_edge *e; + bool has_hot_call = false; + + /* Does it have callers? */ + if (!node->callers) + return false; + /* Already inlined? */ + if (function->global.inlined_to) + return false; + if (cgraph_function_or_thunk_node (node, NULL) != node) + return false; + /* Inlining into all callers would increase size? */ + if (estimate_growth (node) > 0) + return false; + /* Maybe other aliases has more direct calls. */ + if (cgraph_for_node_and_aliases (node, check_caller_edge, node->callers, true)) + return false; + /* All inlines must be possible. */ + for (e = node->callers; e; e = e->next_caller) + { + if (!can_inline_edge_p (e, true)) + return false; + if (!has_hot_call && cgraph_maybe_hot_edge_p (e)) + has_hot_call = 1; + } + + if (!cold && !has_hot_call) + return false; + return true; +} + +#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64) + +/* Return relative time improvement for inlining EDGE in range + 1...RELATIVE_TIME_BENEFIT_RANGE */ + +static inline int +relative_time_benefit (struct inline_summary *callee_info, + struct cgraph_edge *edge, + int edge_time) +{ + gcov_type relbenefit; + gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge); + gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time); + + /* Inlining into extern inline function is not a win. */ + if (DECL_EXTERNAL (edge->caller->global.inlined_to + ? edge->caller->global.inlined_to->symbol.decl + : edge->caller->symbol.decl)) + return 1; + + /* Watch overflows. */ + gcc_checking_assert (uninlined_call_time >= 0); + gcc_checking_assert (inlined_call_time >= 0); + gcc_checking_assert (uninlined_call_time >= inlined_call_time); + + /* Compute relative time benefit, i.e. how much the call becomes faster. + ??? perhaps computing how much the caller+calle together become faster + would lead to more realistic results. */ + if (!uninlined_call_time) + uninlined_call_time = 1; + relbenefit = + RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE, + uninlined_call_time); + relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE); + gcc_checking_assert (relbenefit >= 0); + relbenefit = MAX (relbenefit, 1); + return relbenefit; +} + + +/* A cost model driving the inlining heuristics in a way so the edges with + smallest badness are inlined first. After each inlining is performed + the costs of all caller edges of nodes affected are recomputed so the + metrics may accurately depend on values such as number of inlinable callers + of the function or function body size. */ + +static int +edge_badness (struct cgraph_edge *edge, bool dump) +{ + gcov_type badness; + int growth, edge_time; + struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee, + NULL); + struct inline_summary *callee_info = inline_summary (callee); + inline_hints hints; + + if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)) + return INT_MIN; + + growth = estimate_edge_growth (edge); + edge_time = estimate_edge_time (edge); + hints = estimate_edge_hints (edge); + gcc_checking_assert (edge_time >= 0); + gcc_checking_assert (edge_time <= callee_info->time); + gcc_checking_assert (growth <= callee_info->size); + + if (dump) + { + fprintf (dump_file, " Badness calculation for %s/%i -> %s/%i\n", + xstrdup (cgraph_node_name (edge->caller)), + edge->caller->uid, + xstrdup (cgraph_node_name (callee)), + edge->callee->uid); + fprintf (dump_file, " size growth %i, time %i ", + growth, + edge_time); + dump_inline_hints (dump_file, hints); + if (big_speedup_p (edge)) + fprintf (dump_file, " big_speedup"); + fprintf (dump_file, "\n"); + } + + /* Always prefer inlining saving code size. */ + if (growth <= 0) + { + badness = INT_MIN / 2 + growth; + if (dump) + fprintf (dump_file, " %i: Growth %i <= 0\n", (int) badness, + growth); + } + + /* When profiling is available, compute badness as: + + relative_edge_count * relative_time_benefit + goodness = ------------------------------------------- + growth_f_caller + badness = -goodness + + The fraction is upside down, because on edge counts and time beneits + the bounds are known. Edge growth is essentially unlimited. */ + + else if (max_count) + { + int relbenefit = relative_time_benefit (callee_info, edge, edge_time); + badness = + ((int) + ((double) edge->count * INT_MIN / 2 / max_count / RELATIVE_TIME_BENEFIT_RANGE) * + relbenefit) / growth; + + /* Be sure that insanity of the profile won't lead to increasing counts + in the scalling and thus to overflow in the computation above. */ + gcc_assert (max_count >= edge->count); + if (dump) + { + fprintf (dump_file, + " %i (relative %f): profile info. Relative count %f" + " * Relative benefit %f\n", + (int) badness, (double) badness / INT_MIN, + (double) edge->count / max_count, + relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE); + } + } + + /* When function local profile is available. Compute badness as: + + relative_time_benefit + goodness = --------------------------------- + growth_of_caller * overall_growth + + badness = - goodness + + compensated by the inline hints. + */ + else if (flag_guess_branch_prob) + { + badness = (relative_time_benefit (callee_info, edge, edge_time) + * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE)); + badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth))); + gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16); + if ((hints & (INLINE_HINT_indirect_call + | INLINE_HINT_loop_iterations + | INLINE_HINT_array_index + | INLINE_HINT_loop_stride)) + || callee_info->growth <= 0) + badness *= 8; + if (hints & (INLINE_HINT_same_scc)) + badness /= 16; + else if (hints & (INLINE_HINT_in_scc)) + badness /= 8; + else if (hints & (INLINE_HINT_cross_module)) + badness /= 2; + gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2); + if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32) + badness *= 16; + if (dump) + { + fprintf (dump_file, + " %i: guessed profile. frequency %f," + " benefit %f%%, time w/o inlining %i, time w inlining %i" + " overall growth %i (current) %i (original)\n", + (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE, + relative_time_benefit (callee_info, edge, edge_time) * 100.0 + / RELATIVE_TIME_BENEFIT_RANGE, + (int)compute_uninlined_call_time (callee_info, edge), + (int)compute_inlined_call_time (edge, edge_time), + estimate_growth (callee), + callee_info->growth); + } + } + /* When function local profile is not available or it does not give + useful information (ie frequency is zero), base the cost on + loop nest and overall size growth, so we optimize for overall number + of functions fully inlined in program. */ + else + { + int nest = MIN (inline_edge_summary (edge)->loop_depth, 8); + badness = growth * 256; + + /* Decrease badness if call is nested. */ + if (badness > 0) + badness >>= nest; + else + { + badness <<= nest; + } + if (dump) + fprintf (dump_file, " %i: no profile. nest %i\n", (int) badness, + nest); + } + + /* Ensure that we did not overflow in all the fixed point math above. */ + gcc_assert (badness >= INT_MIN); + gcc_assert (badness <= INT_MAX - 1); + /* Make recursive inlining happen always after other inlining is done. */ + if (cgraph_edge_recursive_p (edge)) + return badness + 1; + else + return badness; +} + +/* Recompute badness of EDGE and update its key in HEAP if needed. */ +static inline void +update_edge_key (fibheap_t heap, struct cgraph_edge *edge) +{ + int badness = edge_badness (edge, false); + if (edge->aux) + { + fibnode_t n = (fibnode_t) edge->aux; + gcc_checking_assert (n->data == edge); + + /* fibheap_replace_key only decrease the keys. + When we increase the key we do not update heap + and instead re-insert the element once it becomes + a minimum of heap. */ + if (badness < n->key) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + " decreasing badness %s/%i -> %s/%i, %i to %i\n", + xstrdup (cgraph_node_name (edge->caller)), + edge->caller->uid, + xstrdup (cgraph_node_name (edge->callee)), + edge->callee->uid, + (int)n->key, + badness); + } + fibheap_replace_key (heap, n, badness); + gcc_checking_assert (n->key == badness); + } + } + else + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, + " enqueuing call %s/%i -> %s/%i, badness %i\n", + xstrdup (cgraph_node_name (edge->caller)), + edge->caller->uid, + xstrdup (cgraph_node_name (edge->callee)), + edge->callee->uid, + badness); + } + edge->aux = fibheap_insert (heap, badness, edge); + } +} + + +/* NODE was inlined. + All caller edges needs to be resetted because + size estimates change. Similarly callees needs reset + because better context may be known. */ + +static void +reset_edge_caches (struct cgraph_node *node) +{ + struct cgraph_edge *edge; + struct cgraph_edge *e = node->callees; + struct cgraph_node *where = node; + int i; + struct ipa_ref *ref; + + if (where->global.inlined_to) + where = where->global.inlined_to; + + /* WHERE body size has changed, the cached growth is invalid. */ + reset_node_growth_cache (where); + + for (edge = where->callers; edge; edge = edge->next_caller) + if (edge->inline_failed) + reset_edge_growth_cache (edge); + for (i = 0; ipa_ref_list_referring_iterate (&where->symbol.ref_list, + i, ref); i++) + if (ref->use == IPA_REF_ALIAS) + reset_edge_caches (ipa_ref_referring_node (ref)); + + if (!e) + return; + + while (true) + if (!e->inline_failed && e->callee->callees) + e = e->callee->callees; + else + { + if (e->inline_failed) + reset_edge_growth_cache (e); + if (e->next_callee) + e = e->next_callee; + else + { + do + { + if (e->caller == node) + return; + e = e->caller->callers; + } + while (!e->next_callee); + e = e->next_callee; + } + } +} + +/* Recompute HEAP nodes for each of caller of NODE. + UPDATED_NODES track nodes we already visited, to avoid redundant work. + When CHECK_INLINABLITY_FOR is set, re-check for specified edge that + it is inlinable. Otherwise check all edges. */ + +static void +update_caller_keys (fibheap_t heap, struct cgraph_node *node, + bitmap updated_nodes, + struct cgraph_edge *check_inlinablity_for) +{ + struct cgraph_edge *edge; + int i; + struct ipa_ref *ref; + + if ((!node->alias && !inline_summary (node)->inlinable) + || cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE + || node->global.inlined_to) + return; + if (!bitmap_set_bit (updated_nodes, node->uid)) + return; + + for (i = 0; ipa_ref_list_referring_iterate (&node->symbol.ref_list, + i, ref); i++) + if (ref->use == IPA_REF_ALIAS) + { + struct cgraph_node *alias = ipa_ref_referring_node (ref); + update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for); + } + + for (edge = node->callers; edge; edge = edge->next_caller) + if (edge->inline_failed) + { + if (!check_inlinablity_for + || check_inlinablity_for == edge) + { + if (can_inline_edge_p (edge, false) + && want_inline_small_function_p (edge, false)) + update_edge_key (heap, edge); + else if (edge->aux) + { + report_inline_failed_reason (edge); + fibheap_delete_node (heap, (fibnode_t) edge->aux); + edge->aux = NULL; + } + } + else if (edge->aux) + update_edge_key (heap, edge); + } +} + +/* Recompute HEAP nodes for each uninlined call in NODE. + This is used when we know that edge badnesses are going only to increase + (we introduced new call site) and thus all we need is to insert newly + created edges into heap. */ + +static void +update_callee_keys (fibheap_t heap, struct cgraph_node *node, + bitmap updated_nodes) +{ + struct cgraph_edge *e = node->callees; + + if (!e) + return; + while (true) + if (!e->inline_failed && e->callee->callees) + e = e->callee->callees; + else + { + enum availability avail; + struct cgraph_node *callee; + /* We do not reset callee growth cache here. Since we added a new call, + growth chould have just increased and consequentely badness metric + don't need updating. */ + if (e->inline_failed + && (callee = cgraph_function_or_thunk_node (e->callee, &avail)) + && inline_summary (callee)->inlinable + && cgraph_function_body_availability (callee) >= AVAIL_AVAILABLE + && !bitmap_bit_p (updated_nodes, callee->uid)) + { + if (can_inline_edge_p (e, false) + && want_inline_small_function_p (e, false)) + update_edge_key (heap, e); + else if (e->aux) + { + report_inline_failed_reason (e); + fibheap_delete_node (heap, (fibnode_t) e->aux); + e->aux = NULL; + } + } + if (e->next_callee) + e = e->next_callee; + else + { + do + { + if (e->caller == node) + return; + e = e->caller->callers; + } + while (!e->next_callee); + e = e->next_callee; + } + } +} + +/* Enqueue all recursive calls from NODE into priority queue depending on + how likely we want to recursively inline the call. */ + +static void +lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where, + fibheap_t heap) +{ + struct cgraph_edge *e; + enum availability avail; + + for (e = where->callees; e; e = e->next_callee) + if (e->callee == node + || (cgraph_function_or_thunk_node (e->callee, &avail) == node + && avail > AVAIL_OVERWRITABLE)) + { + /* When profile feedback is available, prioritize by expected number + of calls. */ + fibheap_insert (heap, + !max_count ? -e->frequency + : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))), + e); + } + for (e = where->callees; e; e = e->next_callee) + if (!e->inline_failed) + lookup_recursive_calls (node, e->callee, heap); +} + +/* Decide on recursive inlining: in the case function has recursive calls, + inline until body size reaches given argument. If any new indirect edges + are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES + is NULL. */ + +static bool +recursive_inlining (struct cgraph_edge *edge, + vec<cgraph_edge_p> *new_edges) +{ + int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO); + fibheap_t heap; + struct cgraph_node *node; + struct cgraph_edge *e; + struct cgraph_node *master_clone = NULL, *next; + int depth = 0; + int n = 0; + + node = edge->caller; + if (node->global.inlined_to) + node = node->global.inlined_to; + + if (DECL_DECLARED_INLINE_P (node->symbol.decl)) + limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE); + + /* Make sure that function is small enough to be considered for inlining. */ + if (estimate_size_after_inlining (node, edge) >= limit) + return false; + heap = fibheap_new (); + lookup_recursive_calls (node, node, heap); + if (fibheap_empty (heap)) + { + fibheap_delete (heap); + return false; + } + + if (dump_file) + fprintf (dump_file, + " Performing recursive inlining on %s\n", + cgraph_node_name (node)); + + /* Do the inlining and update list of recursive call during process. */ + while (!fibheap_empty (heap)) + { + struct cgraph_edge *curr + = (struct cgraph_edge *) fibheap_extract_min (heap); + struct cgraph_node *cnode, *dest = curr->callee; + + if (!can_inline_edge_p (curr, true)) + continue; + + /* MASTER_CLONE is produced in the case we already started modified + the function. Be sure to redirect edge to the original body before + estimating growths otherwise we will be seeing growths after inlining + the already modified body. */ + if (master_clone) + { + cgraph_redirect_edge_callee (curr, master_clone); + reset_edge_growth_cache (curr); + } + + if (estimate_size_after_inlining (node, curr) > limit) + { + cgraph_redirect_edge_callee (curr, dest); + reset_edge_growth_cache (curr); + break; + } + + depth = 1; + for (cnode = curr->caller; + cnode->global.inlined_to; cnode = cnode->callers->caller) + if (node->symbol.decl + == cgraph_function_or_thunk_node (curr->callee, NULL)->symbol.decl) + depth++; + + if (!want_inline_self_recursive_call_p (curr, node, false, depth)) + { + cgraph_redirect_edge_callee (curr, dest); + reset_edge_growth_cache (curr); + continue; + } + + if (dump_file) + { + fprintf (dump_file, + " Inlining call of depth %i", depth); + if (node->count) + { + fprintf (dump_file, " called approx. %.2f times per call", + (double)curr->count / node->count); + } + fprintf (dump_file, "\n"); + } + if (!master_clone) + { + /* We need original clone to copy around. */ + master_clone = cgraph_clone_node (node, node->symbol.decl, + node->count, CGRAPH_FREQ_BASE, + false, vNULL, true); + for (e = master_clone->callees; e; e = e->next_callee) + if (!e->inline_failed) + clone_inlined_nodes (e, true, false, NULL); + cgraph_redirect_edge_callee (curr, master_clone); + reset_edge_growth_cache (curr); + } + + inline_call (curr, false, new_edges, &overall_size, true); + lookup_recursive_calls (node, curr->callee, heap); + n++; + } + + if (!fibheap_empty (heap) && dump_file) + fprintf (dump_file, " Recursive inlining growth limit met.\n"); + fibheap_delete (heap); + + if (!master_clone) + return false; + + if (dump_file) + fprintf (dump_file, + "\n Inlined %i times, " + "body grown from size %i to %i, time %i to %i\n", n, + inline_summary (master_clone)->size, inline_summary (node)->size, + inline_summary (master_clone)->time, inline_summary (node)->time); + + /* Remove master clone we used for inlining. We rely that clones inlined + into master clone gets queued just before master clone so we don't + need recursion. */ + for (node = cgraph_first_function (); node != master_clone; + node = next) + { + next = cgraph_next_function (node); + if (node->global.inlined_to == master_clone) + cgraph_remove_node (node); + } + cgraph_remove_node (master_clone); + return true; +} + + +/* Given whole compilation unit estimate of INSNS, compute how large we can + allow the unit to grow. */ + +static int +compute_max_insns (int insns) +{ + int max_insns = insns; + if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) + max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); + + return ((HOST_WIDEST_INT) max_insns + * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100); +} + + +/* Compute badness of all edges in NEW_EDGES and add them to the HEAP. */ + +static void +add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges) +{ + while (new_edges.length () > 0) + { + struct cgraph_edge *edge = new_edges.pop (); + + gcc_assert (!edge->aux); + if (edge->inline_failed + && can_inline_edge_p (edge, true) + && want_inline_small_function_p (edge, true)) + edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge); + } +} + + +/* We use greedy algorithm for inlining of small functions: + All inline candidates are put into prioritized heap ordered in + increasing badness. + + The inlining of small functions is bounded by unit growth parameters. */ + +static void +inline_small_functions (void) +{ + struct cgraph_node *node; + struct cgraph_edge *edge; + fibheap_t edge_heap = fibheap_new (); + bitmap updated_nodes = BITMAP_ALLOC (NULL); + int min_size, max_size; + vec<cgraph_edge_p> new_indirect_edges = vNULL; + int initial_size = 0; + struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); + + if (flag_indirect_inlining) + new_indirect_edges.create (8); + + /* Compute overall unit size and other global parameters used by badness + metrics. */ + + max_count = 0; + ipa_reduced_postorder (order, true, true, NULL); + free (order); + + FOR_EACH_DEFINED_FUNCTION (node) + if (!node->global.inlined_to) + { + if (cgraph_function_with_gimple_body_p (node) + || node->thunk.thunk_p) + { + struct inline_summary *info = inline_summary (node); + struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->symbol.aux; + + if (!DECL_EXTERNAL (node->symbol.decl)) + initial_size += info->size; + info->growth = estimate_growth (node); + if (dfs && dfs->next_cycle) + { + struct cgraph_node *n2; + int id = dfs->scc_no + 1; + for (n2 = node; n2; + n2 = ((struct ipa_dfs_info *) node->symbol.aux)->next_cycle) + { + struct inline_summary *info2 = inline_summary (n2); + if (info2->scc_no) + break; + info2->scc_no = id; + } + } + } + + for (edge = node->callers; edge; edge = edge->next_caller) + if (max_count < edge->count) + max_count = edge->count; + } + ipa_free_postorder_info (); + initialize_growth_caches (); + + if (dump_file) + fprintf (dump_file, + "\nDeciding on inlining of small functions. Starting with size %i.\n", + initial_size); + + overall_size = initial_size; + max_size = compute_max_insns (overall_size); + min_size = overall_size; + + /* Populate the heeap with all edges we might inline. */ + + FOR_EACH_DEFINED_FUNCTION (node) + if (!node->global.inlined_to) + { + if (dump_file) + fprintf (dump_file, "Enqueueing calls of %s/%i.\n", + cgraph_node_name (node), node->uid); + + for (edge = node->callers; edge; edge = edge->next_caller) + if (edge->inline_failed + && can_inline_edge_p (edge, true) + && want_inline_small_function_p (edge, true) + && edge->inline_failed) + { + gcc_assert (!edge->aux); + update_edge_key (edge_heap, edge); + } + } + + gcc_assert (in_lto_p + || !max_count + || (profile_info && flag_branch_probabilities)); + + while (!fibheap_empty (edge_heap)) + { + int old_size = overall_size; + struct cgraph_node *where, *callee; + int badness = fibheap_min_key (edge_heap); + int current_badness; + int cached_badness; + int growth; + + edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap); + gcc_assert (edge->aux); + edge->aux = NULL; + if (!edge->inline_failed) + continue; + + /* Be sure that caches are maintained consistent. + We can not make this ENABLE_CHECKING only because it cause different + updates of the fibheap queue. */ + cached_badness = edge_badness (edge, false); + reset_edge_growth_cache (edge); + reset_node_growth_cache (edge->callee); + + /* When updating the edge costs, we only decrease badness in the keys. + Increases of badness are handled lazilly; when we see key with out + of date value on it, we re-insert it now. */ + current_badness = edge_badness (edge, false); + gcc_assert (cached_badness == current_badness); + gcc_assert (current_badness >= badness); + if (current_badness != badness) + { + edge->aux = fibheap_insert (edge_heap, current_badness, edge); + continue; + } + + if (!can_inline_edge_p (edge, true)) + continue; + + callee = cgraph_function_or_thunk_node (edge->callee, NULL); + growth = estimate_edge_growth (edge); + if (dump_file) + { + fprintf (dump_file, + "\nConsidering %s with %i size\n", + cgraph_node_name (callee), + inline_summary (callee)->size); + fprintf (dump_file, + " to be inlined into %s in %s:%i\n" + " Estimated growth after inlined into all is %+i insns.\n" + " Estimated badness is %i, frequency %.2f.\n", + cgraph_node_name (edge->caller), + flag_wpa ? "unknown" + : gimple_filename ((const_gimple) edge->call_stmt), + flag_wpa ? -1 + : gimple_lineno ((const_gimple) edge->call_stmt), + estimate_growth (callee), + badness, + edge->frequency / (double)CGRAPH_FREQ_BASE); + if (edge->count) + fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", + edge->count); + if (dump_flags & TDF_DETAILS) + edge_badness (edge, true); + } + + if (overall_size + growth > max_size + && !DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)) + { + edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT; + report_inline_failed_reason (edge); + continue; + } + + if (!want_inline_small_function_p (edge, true)) + continue; + + /* Heuristics for inlining small functions works poorly for + recursive calls where we do efect similar to loop unrolling. + When inliing such edge seems profitable, leave decision on + specific inliner. */ + if (cgraph_edge_recursive_p (edge)) + { + where = edge->caller; + if (where->global.inlined_to) + where = where->global.inlined_to; + if (!recursive_inlining (edge, + flag_indirect_inlining + ? &new_indirect_edges : NULL)) + { + edge->inline_failed = CIF_RECURSIVE_INLINING; + continue; + } + reset_edge_caches (where); + /* Recursive inliner inlines all recursive calls of the function + at once. Consequently we need to update all callee keys. */ + if (flag_indirect_inlining) + add_new_edges_to_heap (edge_heap, new_indirect_edges); + update_callee_keys (edge_heap, where, updated_nodes); + } + else + { + struct cgraph_node *outer_node = NULL; + int depth = 0; + + /* Consider the case where self recursive function A is inlined into B. + This is desired optimization in some cases, since it leads to effect + similar of loop peeling and we might completely optimize out the + recursive call. However we must be extra selective. */ + + where = edge->caller; + while (where->global.inlined_to) + { + if (where->symbol.decl == callee->symbol.decl) + outer_node = where, depth++; + where = where->callers->caller; + } + if (outer_node + && !want_inline_self_recursive_call_p (edge, outer_node, + true, depth)) + { + edge->inline_failed + = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->symbol.decl) + ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED); + continue; + } + else if (depth && dump_file) + fprintf (dump_file, " Peeling recursion with depth %i\n", depth); + + gcc_checking_assert (!callee->global.inlined_to); + inline_call (edge, true, &new_indirect_edges, &overall_size, true); + if (flag_indirect_inlining) + add_new_edges_to_heap (edge_heap, new_indirect_edges); + + reset_edge_caches (edge->callee); + reset_node_growth_cache (callee); + + update_callee_keys (edge_heap, where, updated_nodes); + } + where = edge->caller; + if (where->global.inlined_to) + where = where->global.inlined_to; + + /* Our profitability metric can depend on local properties + such as number of inlinable calls and size of the function body. + After inlining these properties might change for the function we + inlined into (since it's body size changed) and for the functions + called by function we inlined (since number of it inlinable callers + might change). */ + update_caller_keys (edge_heap, where, updated_nodes, NULL); + bitmap_clear (updated_nodes); + + if (dump_file) + { + fprintf (dump_file, + " Inlined into %s which now has time %i and size %i," + "net change of %+i.\n", + cgraph_node_name (edge->caller), + inline_summary (edge->caller)->time, + inline_summary (edge->caller)->size, + overall_size - old_size); + } + if (min_size > overall_size) + { + min_size = overall_size; + max_size = compute_max_insns (min_size); + + if (dump_file) + fprintf (dump_file, "New minimal size reached: %i\n", min_size); + } + } + + free_growth_caches (); + new_indirect_edges.release (); + fibheap_delete (edge_heap); + if (dump_file) + fprintf (dump_file, + "Unit growth for small function inlining: %i->%i (%i%%)\n", + initial_size, overall_size, + initial_size ? overall_size * 100 / (initial_size) - 100: 0); + BITMAP_FREE (updated_nodes); +} + +/* Flatten NODE. Performed both during early inlining and + at IPA inlining time. */ + +static void +flatten_function (struct cgraph_node *node, bool early) +{ + struct cgraph_edge *e; + + /* We shouldn't be called recursively when we are being processed. */ + gcc_assert (node->symbol.aux == NULL); + + node->symbol.aux = (void *) node; + + for (e = node->callees; e; e = e->next_callee) + { + struct cgraph_node *orig_callee; + struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL); + + /* We've hit cycle? It is time to give up. */ + if (callee->symbol.aux) + { + if (dump_file) + fprintf (dump_file, + "Not inlining %s into %s to avoid cycle.\n", + xstrdup (cgraph_node_name (callee)), + xstrdup (cgraph_node_name (e->caller))); + e->inline_failed = CIF_RECURSIVE_INLINING; + continue; + } + + /* When the edge is already inlined, we just need to recurse into + it in order to fully flatten the leaves. */ + if (!e->inline_failed) + { + flatten_function (callee, early); + continue; + } + + /* Flatten attribute needs to be processed during late inlining. For + extra code quality we however do flattening during early optimization, + too. */ + if (!early + ? !can_inline_edge_p (e, true) + : !can_early_inline_edge_p (e)) + continue; + + if (cgraph_edge_recursive_p (e)) + { + if (dump_file) + fprintf (dump_file, "Not inlining: recursive call.\n"); + continue; + } + + if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->symbol.decl)) + != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->symbol.decl))) + { + if (dump_file) + fprintf (dump_file, "Not inlining: SSA form does not match.\n"); + continue; + } + + /* Inline the edge and flatten the inline clone. Avoid + recursing through the original node if the node was cloned. */ + if (dump_file) + fprintf (dump_file, " Inlining %s into %s.\n", + xstrdup (cgraph_node_name (callee)), + xstrdup (cgraph_node_name (e->caller))); + orig_callee = callee; + inline_call (e, true, NULL, NULL, false); + if (e->callee != orig_callee) + orig_callee->symbol.aux = (void *) node; + flatten_function (e->callee, early); + if (e->callee != orig_callee) + orig_callee->symbol.aux = NULL; + } + + node->symbol.aux = NULL; + if (!node->global.inlined_to) + inline_update_overall_summary (node); +} + +/* Decide on the inlining. We do so in the topological order to avoid + expenses on updating data structures. */ + +static unsigned int +ipa_inline (void) +{ + struct cgraph_node *node; + int nnodes; + struct cgraph_node **order = + XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); + int i; + + if (in_lto_p && optimize) + ipa_update_after_lto_read (); + + if (dump_file) + dump_inline_summaries (dump_file); + + nnodes = ipa_reverse_postorder (order); + + FOR_EACH_FUNCTION (node) + node->symbol.aux = 0; + + if (dump_file) + fprintf (dump_file, "\nFlattening functions:\n"); + + /* In the first pass handle functions to be flattened. Do this with + a priority so none of our later choices will make this impossible. */ + for (i = nnodes - 1; i >= 0; i--) + { + node = order[i]; + + /* Handle nodes to be flattened. + Ideally when processing callees we stop inlining at the + entry of cycles, possibly cloning that entry point and + try to flatten itself turning it into a self-recursive + function. */ + if (lookup_attribute ("flatten", + DECL_ATTRIBUTES (node->symbol.decl)) != NULL) + { + if (dump_file) + fprintf (dump_file, + "Flattening %s\n", cgraph_node_name (node)); + flatten_function (node, false); + } + } + + inline_small_functions (); + symtab_remove_unreachable_nodes (false, dump_file); + free (order); + + /* Inline functions with a property that after inlining into all callers the + code size will shrink because the out-of-line copy is eliminated. + We do this regardless on the callee size as long as function growth limits + are met. */ + if (flag_inline_functions_called_once) + { + int cold; + if (dump_file) + fprintf (dump_file, + "\nDeciding on functions to be inlined into all callers:\n"); + + /* Inlining one function called once has good chance of preventing + inlining other function into the same callee. Ideally we should + work in priority order, but probably inlining hot functions first + is good cut without the extra pain of maintaining the queue. + + ??? this is not really fitting the bill perfectly: inlining function + into callee often leads to better optimization of callee due to + increased context for optimization. + For example if main() function calls a function that outputs help + and then function that does the main optmization, we should inline + the second with priority even if both calls are cold by themselves. + + We probably want to implement new predicate replacing our use of + maybe_hot_edge interpreted as maybe_hot_edge || callee is known + to be hot. */ + for (cold = 0; cold <= 1; cold ++) + { + FOR_EACH_DEFINED_FUNCTION (node) + { + if (want_inline_function_to_all_callers_p (node, cold)) + { + int num_calls = 0; + struct cgraph_edge *e; + for (e = node->callers; e; e = e->next_caller) + num_calls++; + while (node->callers && !node->global.inlined_to) + { + struct cgraph_node *caller = node->callers->caller; + + if (dump_file) + { + fprintf (dump_file, + "\nInlining %s size %i.\n", + cgraph_node_name (node), + inline_summary (node)->size); + fprintf (dump_file, + " Called once from %s %i insns.\n", + cgraph_node_name (node->callers->caller), + inline_summary (node->callers->caller)->size); + } + + inline_call (node->callers, true, NULL, NULL, true); + if (dump_file) + fprintf (dump_file, + " Inlined into %s which now has %i size\n", + cgraph_node_name (caller), + inline_summary (caller)->size); + if (!num_calls--) + { + if (dump_file) + fprintf (dump_file, "New calls found; giving up.\n"); + break; + } + } + } + } + } + } + + /* Free ipa-prop structures if they are no longer needed. */ + if (optimize) + ipa_free_all_structures_after_iinln (); + + if (dump_file) + fprintf (dump_file, + "\nInlined %i calls, eliminated %i functions\n\n", + ncalls_inlined, nfunctions_inlined); + + if (dump_file) + dump_inline_summaries (dump_file); + /* In WPA we use inline summaries for partitioning process. */ + if (!flag_wpa) + inline_free_summary (); + return 0; +} + +/* Inline always-inline function calls in NODE. */ + +static bool +inline_always_inline_functions (struct cgraph_node *node) +{ + struct cgraph_edge *e; + bool inlined = false; + + for (e = node->callees; e; e = e->next_callee) + { + struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL); + if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)) + continue; + + if (cgraph_edge_recursive_p (e)) + { + if (dump_file) + fprintf (dump_file, " Not inlining recursive call to %s.\n", + cgraph_node_name (e->callee)); + e->inline_failed = CIF_RECURSIVE_INLINING; + continue; + } + + if (!can_early_inline_edge_p (e)) + continue; + + if (dump_file) + fprintf (dump_file, " Inlining %s into %s (always_inline).\n", + xstrdup (cgraph_node_name (e->callee)), + xstrdup (cgraph_node_name (e->caller))); + inline_call (e, true, NULL, NULL, false); + inlined = true; + } + if (inlined) + inline_update_overall_summary (node); + + return inlined; +} + +/* Decide on the inlining. We do so in the topological order to avoid + expenses on updating data structures. */ + +static bool +early_inline_small_functions (struct cgraph_node *node) +{ + struct cgraph_edge *e; + bool inlined = false; + + for (e = node->callees; e; e = e->next_callee) + { + struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL); + if (!inline_summary (callee)->inlinable + || !e->inline_failed) + continue; + + /* Do not consider functions not declared inline. */ + if (!DECL_DECLARED_INLINE_P (callee->symbol.decl) + && !flag_inline_small_functions + && !flag_inline_functions) + continue; + + if (dump_file) + fprintf (dump_file, "Considering inline candidate %s.\n", + cgraph_node_name (callee)); + + if (!can_early_inline_edge_p (e)) + continue; + + if (cgraph_edge_recursive_p (e)) + { + if (dump_file) + fprintf (dump_file, " Not inlining: recursive call.\n"); + continue; + } + + if (!want_early_inline_function_p (e)) + continue; + + if (dump_file) + fprintf (dump_file, " Inlining %s into %s.\n", + xstrdup (cgraph_node_name (callee)), + xstrdup (cgraph_node_name (e->caller))); + inline_call (e, true, NULL, NULL, true); + inlined = true; + } + + return inlined; +} + +/* Do inlining of small functions. Doing so early helps profiling and other + passes to be somewhat more effective and avoids some code duplication in + later real inlining pass for testcases with very many function calls. */ +static unsigned int +early_inliner (void) +{ + struct cgraph_node *node = cgraph_get_node (current_function_decl); + struct cgraph_edge *edge; + unsigned int todo = 0; + int iterations = 0; + bool inlined = false; + + if (seen_error ()) + return 0; + + /* Do nothing if datastructures for ipa-inliner are already computed. This + happens when some pass decides to construct new function and + cgraph_add_new_function calls lowering passes and early optimization on + it. This may confuse ourself when early inliner decide to inline call to + function clone, because function clones don't have parameter list in + ipa-prop matching their signature. */ + if (ipa_node_params_vector.exists ()) + return 0; + +#ifdef ENABLE_CHECKING + verify_cgraph_node (node); +#endif + + /* Even when not optimizing or not inlining inline always-inline + functions. */ + inlined = inline_always_inline_functions (node); + + if (!optimize + || flag_no_inline + || !flag_early_inlining + /* Never inline regular functions into always-inline functions + during incremental inlining. This sucks as functions calling + always inline functions will get less optimized, but at the + same time inlining of functions calling always inline + function into an always inline function might introduce + cycles of edges to be always inlined in the callgraph. + + We might want to be smarter and just avoid this type of inlining. */ + || DECL_DISREGARD_INLINE_LIMITS (node->symbol.decl)) + ; + else if (lookup_attribute ("flatten", + DECL_ATTRIBUTES (node->symbol.decl)) != NULL) + { + /* When the function is marked to be flattened, recursively inline + all calls in it. */ + if (dump_file) + fprintf (dump_file, + "Flattening %s\n", cgraph_node_name (node)); + flatten_function (node, true); + inlined = true; + } + else + { + /* We iterate incremental inlining to get trivial cases of indirect + inlining. */ + while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) + && early_inline_small_functions (node)) + { + timevar_push (TV_INTEGRATION); + todo |= optimize_inline_calls (current_function_decl); + + /* Technically we ought to recompute inline parameters so the new + iteration of early inliner works as expected. We however have + values approximately right and thus we only need to update edge + info that might be cleared out for newly discovered edges. */ + for (edge = node->callees; edge; edge = edge->next_callee) + { + struct inline_edge_summary *es = inline_edge_summary (edge); + es->call_stmt_size + = estimate_num_insns (edge->call_stmt, &eni_size_weights); + es->call_stmt_time + = estimate_num_insns (edge->call_stmt, &eni_time_weights); + if (edge->callee->symbol.decl + && !gimple_check_call_matching_types (edge->call_stmt, + edge->callee->symbol.decl)) + edge->call_stmt_cannot_inline_p = true; + } + timevar_pop (TV_INTEGRATION); + iterations++; + inlined = false; + } + if (dump_file) + fprintf (dump_file, "Iterations: %i\n", iterations); + } + + if (inlined) + { + timevar_push (TV_INTEGRATION); + todo |= optimize_inline_calls (current_function_decl); + timevar_pop (TV_INTEGRATION); + } + + cfun->always_inline_functions_inlined = true; + + return todo; +} + +struct gimple_opt_pass pass_early_inline = +{ + { + GIMPLE_PASS, + "einline", /* name */ + OPTGROUP_INLINE, /* optinfo_flags */ + NULL, /* gate */ + early_inliner, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_EARLY_INLINING, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0 /* todo_flags_finish */ + } +}; + + +/* When to run IPA inlining. Inlining of always-inline functions + happens during early inlining. + + Enable inlining unconditoinally at -flto. We need size estimates to + drive partitioning. */ + +static bool +gate_ipa_inline (void) +{ + return optimize || flag_lto || flag_wpa; +} + +struct ipa_opt_pass_d pass_ipa_inline = +{ + { + IPA_PASS, + "inline", /* name */ + OPTGROUP_INLINE, /* optinfo_flags */ + gate_ipa_inline, /* gate */ + ipa_inline, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_IPA_INLINING, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + TODO_remove_functions, /* todo_flags_finish */ + TODO_dump_symtab + | TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */ + }, + inline_generate_summary, /* generate_summary */ + inline_write_summary, /* write_summary */ + inline_read_summary, /* read_summary */ + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* TODOs */ + inline_transform, /* function_transform */ + NULL, /* variable_transform */ +}; |