aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc/mversn-dispatch.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.9/gcc/mversn-dispatch.c')
-rw-r--r--gcc-4.9/gcc/mversn-dispatch.c1769
1 files changed, 1769 insertions, 0 deletions
diff --git a/gcc-4.9/gcc/mversn-dispatch.c b/gcc-4.9/gcc/mversn-dispatch.c
new file mode 100644
index 000000000..17b2ae8db
--- /dev/null
+++ b/gcc-4.9/gcc/mversn-dispatch.c
@@ -0,0 +1,1769 @@
+/* Mulitversion Dispatch Pass.
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Sriraman Tallam (tmsriram@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+/* This pass processes __builtin_dispatch calls to call multi-versioned
+ functions. Only two versions are supported now. Example use :
+
+ int popcnt_sse4(unsigned int x) __attribute__((__target__("popcnt")));
+ int popcnt_sse4(unsigned int x)
+ {
+ int count = __builtin_popcount(x);
+ return count;
+ }
+
+ int popcnt(unsigned int x) __attribute__((__target__("no-popcnt")));
+ int popcnt(unsigned int x)
+ {
+ int count = __builtin_popcount(x);
+ return count;
+ }
+
+ int testsse() __attribute__((version_selector));
+ int main ()
+ {
+ ...
+ ret = __builtin_dispatch (testsse, (void*)popcnt_sse4, (void*)popcnt, 25);
+ ...
+ }
+
+ There are two passes that are run to achieve multi-versioning.
+ "pass_ipa_multiversion_dispatch" is an ipa pass that decides which functions
+ have to be cloned and hoists the feature-test calls appropriately. This
+ pass can be enabled with the flag "-fclone-hot-version-paths" and disabled
+ with "-fno-clone-hot-version-paths".
+
+ "pass_tree_convert_builtin_dispatch" does the lowering. It is a
+ function-level pass. Functions marked with attribute "version_selector" are
+ also handled by this pass. This pass is always on.
+
+ How to use __builtin_dispatch ?
+ -----------------------------
+
+ __builtin_dispatch takes 3 mandatory arguments :
+
+ __builtin_dispatch (arg1, arg2, arg3, <arg4>, <arg5>, ...);
+
+ arg1 is the pointer to the feature-test function.
+ arg2 is the ( void *) cast pointer to the versioned function that is
+ executed when the feature test returns 1.
+ arg3 is the ( void *) cast pointer to the versioned function that is
+ executed when the feature test returns 0.
+ arg4, arg5, ... are optional. They are the arguments to the versioned
+ functions. Both versions must accept the same number of arguments.
+ The __builtin_dispatch function returns the value returned by the
+ versioned function that gets executed. The versioned function arg2
+ is executed when the feature_test function arg1 returns 1 and arg3
+ is executed when the feature_test function arg1 returns 0. arg1
+ could be marked as a "version_selector" function if it is a pure
+ function with no side-effects, returns a constant at run-time and
+ can be evaluated at any point in the execution.
+
+ When to use the "version_selector" attribute ?
+ -----------------------------------------------
+
+ Functions are marked with attribute "version_selector" only if
+ they are run-time constants. Example of such functions would
+ be those that test if a specific feature is available on a
+ particular architecture. Such functions must return a positive
+ integer. For two-way functions, those that test if a feature
+ is present or not must return 1 or 0 respectively.
+
+
+ The code is organized into five parts. The first part has the functionality
+ to detect and handle functions marked with attribute "version_selector". The
+ second part is the analysis phase where we find calls to __builtin_dispatch
+ and mark all functions that are hot and have a call-graph path to a
+ __builtin_dispatch call. The third part decides which functions
+ to clone. This is based on the number of clones that have to be created for
+ the functions marked in the analysis phase. Only two clones are allowed for
+ a function currently. The fourth part is where the actual cloning happens.
+ The fifth part contains the implementation to lower the __builtin_dispatch
+ calls.
+
+ Flags : -fclone-hot-version-paths does function unswitching via cloning.
+ --param=num-mversn-clones=<num> allows to specify the number of
+ functions that should be cloned.
+ --param=mversn-clone-depth=<num> allows to specify the length of
+ the call graph path that should be cloned. num = 0 implies only
+ leaf node that contains the __builtin_dispatch statement must be
+ cloned. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "tree-inline.h"
+#include "langhooks.h"
+#include "flags.h"
+#include "cgraph.h"
+#include "diagnostic.h"
+#include "toplev.h"
+#include "timevar.h"
+#include "params.h"
+#include "fibheap.h"
+#include "intl.h"
+#include "tree-pass.h"
+#include "hashtab.h"
+#include "coverage.h"
+#include "ggc.h"
+#include "tree-flow.h"
+#include "rtl.h"
+#include "ipa-prop.h"
+#include "basic-block.h"
+#include "toplev.h"
+#include "dbgcnt.h"
+#include "tree-dump.h"
+#include "output.h"
+#include "vecprim.h"
+#include "gimple-pretty-print.h"
+#include "ipa-inline.h"
+
+typedef struct cgraph_node* NODEPTR;
+DEF_VEC_P (NODEPTR);
+DEF_VEC_ALLOC_P (NODEPTR, heap);
+
+/* Store the decl of __builtin_dispatch */
+static tree builtin_function_decl = NULL;
+
+/* Hash to map name to a decl. Used for variables and functions. */
+static htab_t name_decl_htab = NULL;
+
+/* Hashtable helpers for name_decl_htab. */
+
+static hashval_t
+name_decl_htab_hash_descriptor (const void *p)
+{
+ const_tree t = (const_tree) p;
+ const char *name
+ = (IDENTIFIER_POINTER (DECL_NAME (t)));
+ return htab_hash_string(name);
+}
+
+/* Hashtable helper for name_decl_htab. */
+
+static int
+name_decl_htab_eq_descriptor (const void *p1, const void *p2)
+{
+ const_tree t1 = (const_tree) p1;
+ const char *c1 = IDENTIFIER_POINTER (DECL_NAME (t1));
+ const char *c2 = (const char *)p2;
+
+ return (strcmp (c1, c2) == 0);
+}
+
+/* Return true if NODE is a hot function. It is a hot function
+ if its execution frequency is determined to be hot or
+ if any of its incoming or outgoing call-graph edges is hot. */
+
+static bool
+hot_function_p (struct cgraph_node *node)
+{
+ struct cgraph_edge *edge;
+
+ if (node->frequency == NODE_FREQUENCY_HOT)
+ return true;
+
+ for (edge = node->callees; edge; edge = edge->next_callee)
+ if (cgraph_maybe_hot_edge_p (edge))
+ return true;
+
+ for (edge = node->callers; edge; edge = edge->next_caller)
+ if (cgraph_maybe_hot_edge_p (edge))
+ return true;
+
+ return false;
+}
+
+/* Return the number of arguments that a function has. */
+
+static int
+function_args_count (tree fntype)
+{
+ function_args_iterator args_iter;
+ tree t;
+ int num = 0;
+
+ if (fntype)
+ {
+ FOREACH_FUNCTION_ARGS(fntype, t, args_iter)
+ {
+ num++;
+ }
+ }
+
+ return num;
+}
+
+/* Return the variable name (global/constructor) to use for the
+ version_selector function with name of DECL by appending SUFFIX. */
+
+static char *
+make_name (tree decl, const char *suffix)
+{
+ char *global_var_name;
+ int name_len;
+ const char *name;
+
+ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+
+ name_len = strlen (name) + strlen (suffix) + 2;
+ global_var_name = (char *) xmalloc (name_len);
+ snprintf (global_var_name, name_len, "%s_%s", name, suffix);
+ return global_var_name;
+}
+
+/* Code for handling version_selector attribute functions. Such functions are
+ run-time constants and need to be executed only once. They are hoisted
+ to a static constructor and their result is stored in a global.
+ */
+
+
+/* This function returns the global variable / constructor name created
+ for feature-test functions marked with attribute "version_selector".
+ The name returned is the DECL name appended with
+ "version_selector_global" for the variable and
+ "version_selector_constructor" for the constructor. */
+
+static char*
+make_feature_test_global_name (tree decl, bool is_constructor)
+{
+ if (is_constructor)
+ return make_name (decl, "version_selector_constructor");
+
+ return make_name (decl, "version_selector_global");
+}
+
+/* This function creates a new VAR_DECL with attributes set
+ using the parameters. PUBLIK corresponds to TREE_PUBLIC,
+ EXTERNAL corresponds to DECL_EXTERNAL and comdat is
+ for DECL_ONE_ONLY. The global variable will have the
+ same status as the version_selector function.*/
+
+static tree
+allocate_new_var (const char *name, int publik,
+ int external, int comdat)
+{
+ tree new_global_var;
+ struct varpool_node *vnode;
+
+ new_global_var = build_decl (UNKNOWN_LOCATION,
+ VAR_DECL,
+ get_identifier (name),
+ integer_type_node);
+
+ DECL_EXTERNAL (new_global_var) = external;
+ TREE_STATIC (new_global_var) = 1;
+ TREE_PUBLIC (new_global_var) = publik;
+ DECL_INITIAL (new_global_var) = 0;
+ DECL_ARTIFICIAL (new_global_var) = 1;
+ DECL_PRESERVE_P (new_global_var) = 1;
+
+ if (comdat)
+ make_decl_one_only (new_global_var, DECL_ASSEMBLER_NAME (new_global_var));
+ assemble_variable (new_global_var, 0, 0, 0);
+
+ vnode = varpool_node (new_global_var);
+ gcc_assert (vnode != NULL);
+ /* Set finalized to 1, otherwise it asserts in function "write_symbol" in
+ lto-streamer-out.c. */
+ vnode->finalized = 1;
+
+ return new_global_var;
+}
+
+/* Make a new constructor function here to call a feature-test function
+ and set its body to CONSTRUCTOR_BODY. Its public and comdat
+ attributes are set from the parameters, PUBLIK, and COMDAT.
+ VERSION_SELECTOR_VAR is the global decl that saves the result of the
+ feature-test function in the constructor. */
+
+static tree
+make_constructor_function (char *name, gimple constructor_body, int publik,
+ int comdat, tree version_selector_var)
+{
+ tree decl, type, t;
+ gimple_seq seq;
+ basic_block new_bb;
+ tree old_current_function_decl;
+
+ type = build_function_type_list (void_type_node, NULL_TREE);
+
+ if (dump_file)
+ fprintf (dump_file, "Name of new constructor function = %s\n", name);
+
+ decl = build_fn_decl (name, type);
+
+ DECL_NAME (decl) = get_identifier (name);
+ SET_DECL_ASSEMBLER_NAME (decl, DECL_NAME (decl));
+ gcc_assert (cgraph_get_create_node (decl) != NULL);
+
+ TREE_USED (decl) = 1;
+ DECL_ARTIFICIAL (decl) = 1;
+ DECL_IGNORED_P (decl) = 0;
+ TREE_PUBLIC (decl) = publik;
+ DECL_UNINLINABLE (decl) = 1;
+ DECL_EXTERNAL (decl) = 0;
+ DECL_CONTEXT (decl) = NULL_TREE;
+ DECL_INITIAL (decl) = make_node (BLOCK);
+ DECL_STATIC_CONSTRUCTOR (decl) = 1;
+ TREE_READONLY (decl) = 0;
+ DECL_PURE_P (decl) = 0;
+
+ if (comdat)
+ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+
+ /* Build result decl and add to function_decl. */
+ t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
+ DECL_ARTIFICIAL (t) = 1;
+ DECL_IGNORED_P (t) = 1;
+ DECL_RESULT (decl) = t;
+
+ gimplify_function_tree (decl);
+
+ /* Build CFG for this function. */
+
+ old_current_function_decl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (decl));
+ current_function_decl = decl;
+ init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl));
+ cfun->curr_properties |=
+ (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars |
+ PROP_ssa);
+ new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
+ make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
+
+ /* XXX: Not sure if the edge commented below is necessary. If I add this
+ edge, it fails in gimple_verify_flow_info in tree-cfg.c in condition :
+ " if (e->flags & EDGE_FALLTHRU)"
+ during -fprofile-generate.
+ Otherwise, it is fine. Deleting this edge does not break anything.
+ Commenting this so that it is clear I am intentionally not doing this.*/
+ /* make_edge (new_bb, EXIT_BLOCK_PTR, EDGE_FALLTHRU); */
+
+ seq = gimple_seq_alloc_with_stmt (constructor_body);
+
+ set_bb_seq (new_bb, seq);
+ gimple_set_bb (constructor_body, new_bb);
+
+ /* Set the lexical block of the constructor body. Fails the inliner
+ other wise. */
+ gimple_set_block (constructor_body, DECL_INITIAL (decl));
+
+ /* This call is very important if this pass runs when the IR is in
+ SSA form. It breaks things in strange ways otherwise. */
+ init_tree_ssa (DECL_STRUCT_FUNCTION (decl));
+ add_referenced_var (version_selector_var);
+
+ cgraph_add_new_function (decl, true);
+ cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
+ cgraph_mark_needed_node (cgraph_get_create_node (decl));
+
+ if (dump_file)
+ dump_function_to_file (decl, dump_file, TDF_BLOCKS);
+
+ pop_cfun ();
+ current_function_decl = old_current_function_decl;
+ return decl;
+}
+
+/* If the current function is marked with attribute
+ "version_selector" then it is the predicate (feature-test) function
+ for multi-versioning. Call this function in a constructor and assign
+ the return value to a global variable.
+ The constructor's name is the decl name suffixed
+ "version_selector_constructor" and the global variable's name is the
+ decl name suffixed with "version_selector_global"
+
+ For example, feature-test function isSSE4 marked with attribute
+ version_selector is converted to
+
+ void isSSE4_version_selector_constructor ()
+ {
+ isSSE4_version_selector_global = isSSE4 ();
+ }
+
+ This function returns the decl of the global variable.
+
+ THIS_DECL is the function decl of the "version_selector" function.
+ */
+
+static tree
+handle_version_selector_attr_function (tree this_decl)
+{
+ char *global_var_name;
+ tree version_selector_var = NULL;
+ void **slot;
+
+ gcc_assert (!flag_lto);
+
+ if (dump_file)
+ fprintf (dump_file, "Creating constructor/global for function %s\n",
+ IDENTIFIER_POINTER (DECL_NAME (this_decl)));
+
+ global_var_name = make_feature_test_global_name (this_decl,
+ false);
+
+ slot = htab_find_slot_with_hash (name_decl_htab, global_var_name,
+ htab_hash_string (global_var_name),
+ INSERT);
+ if (*slot == NULL)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Creating global variable %s\n",
+ global_var_name);
+ *slot = allocate_new_var (global_var_name,
+ TREE_PUBLIC (this_decl),
+ DECL_EXTERNAL (this_decl),
+ DECL_ONE_ONLY (this_decl));
+ }
+ else
+ {
+ free (global_var_name);
+ return (tree) *slot;
+ }
+
+ version_selector_var = (tree) *slot;
+
+ /* If the feature-test function is not external, create a constructor and
+ call this function in the constructor. */
+
+ if (!DECL_EXTERNAL (this_decl))
+ {
+ char *constructor_name;
+ gimple constructor_body;
+ tree constructor_decl;
+
+ constructor_name
+ = make_feature_test_global_name (this_decl, true);
+
+ constructor_body = gimple_build_call (this_decl, 0);
+
+ gimple_call_set_lhs (constructor_body, version_selector_var);
+
+ if (dump_file)
+ print_gimple_stmt (dump_file, constructor_body, 0, TDF_VOPS);
+
+ constructor_decl =
+ make_constructor_function (constructor_name, constructor_body,
+ TREE_PUBLIC (this_decl),
+ DECL_ONE_ONLY (this_decl),
+ version_selector_var);
+
+ gcc_assert (constructor_decl != NULL_TREE);
+ free (constructor_name);
+ }
+
+ free (global_var_name);
+ return version_selector_var;
+}
+
+/* Start Analysis phase. Mark all functions that are hot and have a call-graph
+ path to a __builtin_dispatch call. */
+
+/* This function returns the address of the feature test function.
+ If the address of the function is saved to a temporary,
+ this function traverses the gimple statements before BUILTIN_STMT
+ and finds an assignment whose rhs is the feature test function.
+ If the feature test function is specified as a function pointer
+ whose function value is unknown, this funcition returns NULL. */
+
+static tree
+find_version_selector_func_addr (gimple builtin_stmt)
+{
+ tree cond_func_addr = NULL;
+ gimple def_stmt = NULL;
+
+ cond_func_addr = gimple_call_arg (builtin_stmt, 0);
+
+ gcc_assert (TREE_CODE (cond_func_addr) == ADDR_EXPR
+ || TREE_CODE (cond_func_addr) == SSA_NAME);
+
+ if (TREE_CODE (cond_func_addr) == ADDR_EXPR)
+ return cond_func_addr;
+
+ /* TREE_CODE (cond_func_addr) == SSA_NAME
+ This means a new function pointer variable is created and assigned the
+ address of the feature-test function. Traverse the statements backwards
+ and find the assignment to get the RHS. */
+
+ def_stmt = SSA_NAME_DEF_STMT (cond_func_addr);
+
+ gcc_assert (def_stmt
+ && gimple_assign_lhs (def_stmt) == cond_func_addr);
+
+ cond_func_addr = gimple_assign_rhs1 (def_stmt);
+
+ /* If the cond_func_addr is still not an ADDR_EXPR, it means that the
+ feature-test function is specified as a pointer. In this case, we
+ return NULL, since the feature-test function decl is not known. */
+
+ if (cond_func_addr == NULL
+ || TREE_CODE (cond_func_addr) != ADDR_EXPR)
+ return NULL;
+
+ /* If the operand of the ADDR_EXPR is not a function_decl, return NULL
+ as this still means the feature-test function is specified as a
+ function pointer. */
+
+ if (TREE_CODE (TREE_OPERAND (cond_func_addr, 0)) != FUNCTION_DECL)
+ return NULL;
+
+ return cond_func_addr;
+}
+
+/* Finds the gimple calls to __builtin_dispatch in function pointed
+ to by the call graph NODE and populates the vector VEC. Returns
+ true if at least one statement was found where the feature test
+ function is marked as "version_selector". Otherwise, there is no
+ question of hoisting it. */
+
+static bool
+is_builtin_dispatch_stmt_present (struct cgraph_node *node,
+ VEC (tree,heap) **vec)
+{
+ struct cgraph_edge *edge;
+ bool present = false;
+
+ gcc_assert (!flag_lto);
+
+ for (edge = node->callees; edge; edge = edge->next_callee)
+ {
+ if (edge->callee->decl == builtin_function_decl)
+ {
+ tree cond_func_decl;
+ tree cond_func_addr;
+ gcc_assert (*vec != NULL);
+ cond_func_addr = find_version_selector_func_addr (edge->call_stmt);
+
+ if (cond_func_addr == NULL)
+ continue;
+
+ cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
+
+ /* Do not consider for hoisting if "version_selector" attribute is
+ not set. */
+ if (lookup_attribute ("version_selector",
+ DECL_ATTRIBUTES (cond_func_decl)) == NULL)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Not hoisting builtin_dispatch as "
+ "feature_test function not version_selector :\n");
+ print_gimple_stmt (dump_file, edge->call_stmt, 0, TDF_VOPS);
+ }
+ continue;
+ }
+
+ present = true;
+ VEC_safe_push (tree, heap, *vec, cond_func_decl);
+ }
+ }
+ return present;
+}
+
+/* Updates the list of feature-test function decls reaching the cgraph
+ function NODE. */
+
+static void
+update_reachable_decls_list (struct cgraph_node *node,
+ VEC (tree, heap) *predicate_decls)
+{
+ VEC (tree, heap) **decl_list = NULL;
+ tree cond_func_decl;
+ int ix;
+
+ if (node->aux == NULL)
+ {
+ decl_list = (VEC (tree, heap) **) xmalloc (sizeof (VEC (tree, heap) *));
+ *decl_list = VEC_alloc (tree, heap, 1);
+ node->aux = decl_list;
+ }
+ else
+ decl_list = (VEC (tree, heap) **) node->aux;
+
+ for (ix = 0; VEC_iterate (tree, predicate_decls, ix, cond_func_decl); ++ix)
+ VEC_safe_push (tree, heap, *decl_list, cond_func_decl);
+}
+
+/* Propagate the __builtin_dispatch stmt (s) called from node to its
+ callers, PREDICATE_DECLS is the decls list of the predicate functions. */
+
+static unsigned int
+mark_reachable_functions (struct cgraph_node *this_node,
+ VEC (tree, heap) *predicate_decls)
+{
+ VEC (NODEPTR, heap) *work_list;
+ VEC (int, heap) *depth_list;
+ struct cgraph_edge *e;
+ htab_t node_htab = NULL;
+ void **slot = NULL;
+
+ /* Use a work-list style algorithm to mark functions in any call-graph
+ path to the current function. */
+
+ work_list = VEC_alloc (NODEPTR, heap, 8);
+ depth_list = VEC_alloc (int, heap, 8);
+
+ VEC_safe_push (NODEPTR, heap, work_list, this_node);
+ VEC_safe_push (int, heap, depth_list, 0);
+
+ node_htab = htab_create (10, htab_hash_pointer,
+ htab_eq_pointer, NULL);
+
+ slot = htab_find_slot (node_htab, this_node, INSERT);
+
+ gcc_assert (*slot == NULL);
+ *slot = this_node;
+
+ while (!VEC_empty (NODEPTR, work_list))
+ {
+ struct cgraph_node *node = VEC_pop (NODEPTR, work_list);
+ int depth = VEC_pop (int, depth_list);
+
+ if (dump_file)
+ fprintf (dump_file, "%s has a depth = %d callgraph path to %s\n",
+ cgraph_node_name (node), depth,
+ cgraph_node_name (this_node));
+
+ update_reachable_decls_list (node, predicate_decls);
+
+ gcc_assert (node->aux != NULL);
+
+ if (depth >= PARAM_VALUE (PARAM_MVERSN_CLONE_CGRAPH_DEPTH))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not propogating __builtin_dispatch... "
+ "maximum cloning depth = %d reached\n",
+ PARAM_VALUE (PARAM_MVERSN_CLONE_CGRAPH_DEPTH));
+ continue;
+ }
+
+ for (e = node->callers; e; e = e->next_caller)
+ {
+ slot = htab_find_slot (node_htab, e->caller, INSERT);
+ if (*slot != NULL)
+ continue;
+ *slot = e->caller;
+ if (!hot_function_p (e->caller))
+ continue;
+
+ VEC_safe_push (NODEPTR, heap, work_list, e->caller);
+ VEC_safe_push (int, heap, depth_list, (depth + 1));
+ }
+ }
+
+ htab_delete (node_htab);
+ VEC_free (NODEPTR, heap, work_list);
+ VEC_free (int, heap, depth_list);
+ return 0;
+}
+
+/* Scan the call graph and detect hot functions that have __builtin_dispatch
+ calls. Then, propogate this information to its callers. Returns true if
+ a suitable __builtin_dispatch was found. */
+
+static bool
+perform_analysis_phase (void)
+{
+ struct cgraph_node *node;
+ VEC(tree, heap) *builtin_predicates_vec = NULL;
+ bool flag = false;
+
+ builtin_predicates_vec = VEC_alloc (tree, heap, 1);
+
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ /* if the body of this decl is from outside, do nothing. */
+ if (DECL_EXTERNAL (node->decl))
+ continue;
+
+ if (!hot_function_p (node))
+ continue;
+
+ if (!is_builtin_dispatch_stmt_present (node, &builtin_predicates_vec))
+ continue;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "%s calls __builtin_dispatch atleast once.\n",
+ cgraph_node_name (node));
+
+ fprintf (dump_file, "%s is a hot function, consider cloning ...\n",
+ cgraph_node_name (node));
+ }
+
+ flag = true;
+ mark_reachable_functions (node, builtin_predicates_vec);
+ VEC_truncate (tree, builtin_predicates_vec, 0);
+ }
+
+ VEC_free (tree, heap, builtin_predicates_vec);
+ return flag;
+}
+
+/* End Analysis phase. */
+
+/* Decide Cloning Phase.
+
+ In this phase, we go through each function and decide if it should be
+ cloned or not. */
+
+/* This function counts the number of unique decls in the DECL_LIST.*/
+
+static int
+count_predicate_functions (VEC (tree,heap) *decl_list)
+{
+ int ix;
+ int count = 0;
+ tree cond_func_decl = NULL;
+ htab_t dup_decl_htab = NULL;
+
+ if (VEC_length (tree, decl_list) == 1)
+ return 1;
+
+ dup_decl_htab = htab_create (2, htab_hash_pointer, htab_eq_pointer, NULL);
+
+ for (ix = 0; VEC_iterate (tree, decl_list, ix, cond_func_decl); ++ix)
+ {
+ void **slot = NULL;
+ slot = htab_find_slot (dup_decl_htab, cond_func_decl, INSERT);
+
+ if (*slot != NULL)
+ continue;
+ count++;
+ *slot = cond_func_decl;
+ }
+
+ htab_delete (dup_decl_htab);
+ return count;
+}
+
+/* This function decides which functions to clone based on the number of
+ feature_test decls reaching it. Currently, only one feature_test decl
+ is allowed. */
+
+static bool
+decide_cloning_phase (void)
+{
+ struct cgraph_node *node;
+ int count;
+ bool run_cloning_phase = false;
+ int num_funcs_cloned = 0;
+
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ tree cond_func_decl = NULL;
+ VEC (tree, heap) *vec;
+ if (node->aux == NULL)
+ continue;
+
+ if (num_funcs_cloned >= PARAM_VALUE (PARAM_NUMBER_OF_MVERSN_CLONES))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Reached cloning limit specified "
+ "by \"num-mversn-clones\" for %s\n",
+ cgraph_node_name (node));
+
+ free (node->aux);
+ node->aux = NULL;
+ continue;
+ }
+
+ vec = *(VEC (tree,heap) **) node->aux;
+ count = count_predicate_functions (vec);
+ gcc_assert (count >= 1);
+ cond_func_decl = VEC_index (tree, vec, 0);
+ gcc_assert (cond_func_decl != NULL);
+ VEC_free (tree, heap, vec);
+ free (node->aux);
+ node->aux = NULL;
+
+ if (count > 1)
+ {
+ if (dump_file)
+ fprintf (dump_file, "%s has %d predicates, Not cloning for > 1\n",
+ cgraph_node_name (node), count);
+ continue;
+ }
+ /* Set the node's aux value to be that of the predicate decl. */
+ node->aux = cond_func_decl;
+ run_cloning_phase = true;
+ num_funcs_cloned++;
+ }
+ return run_cloning_phase;
+}
+
+/* End Decide Cloning Phase. */
+
+/* Cloning Phase. */
+
+/* Deletes all basic-blocks and leaves function with :
+ ENTRY_BLOCK ---> (new empty basic block) ---> EXIT_BLOCK
+*/
+
+static basic_block
+empty_function_body (tree fndecl)
+{
+ basic_block bb, new_bb;
+ edge e;
+ tree old_current_function_decl;
+
+ old_current_function_decl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (fndecl));
+ current_function_decl = fndecl;
+
+ clear_edges ();
+ for (bb = ENTRY_BLOCK_PTR; bb != NULL;)
+ {
+ basic_block bb_next;
+ bb_next = bb->next_bb;
+ if (bb != EXIT_BLOCK_PTR
+ && bb != ENTRY_BLOCK_PTR)
+ {
+ if (bb_seq (bb) != NULL)
+ {
+ gimple_stmt_iterator i;
+ for (i = gsi_start_bb (bb); !gsi_end_p (i);)
+ {
+ gimple stmt = gsi_stmt (i);
+ unlink_stmt_vdef (stmt);
+ gsi_remove (&i, true);
+ release_defs (stmt);
+ }
+ }
+ bb->il.gimple = NULL;
+ bb->prev_bb = NULL;
+ bb->next_bb = NULL;
+ SET_BASIC_BLOCK (bb->index, NULL);
+ n_basic_blocks--;
+ }
+ bb = bb_next;
+ }
+ ENTRY_BLOCK_PTR->next_bb = EXIT_BLOCK_PTR;
+ new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
+ e = make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
+ gcc_assert (e != NULL);
+ /* XXX:Is this edge necessary ? */
+ e = make_edge (new_bb, EXIT_BLOCK_PTR, 0);
+ gcc_assert (e != NULL);
+
+ current_function_decl = old_current_function_decl;
+ pop_cfun ();
+ return new_bb;
+}
+
+/* Takes function with decl ORIG_FNDECL and clones it. The
+ name of the clone is the original name suffixed with
+ NAME_SUFFIX. Code is adapted from cgraph_function_versioning
+ in cgraphunit.c */
+
+static tree
+clone_function (tree orig_fndecl, const char *name_suffix)
+{
+ tree new_decl;
+ char *new_name;
+ struct cgraph_node *new_version;
+ struct cgraph_node *old_version;
+ void **slot;
+ tree old_current_function_decl;
+
+ new_name = make_name (orig_fndecl, name_suffix);
+ new_decl = copy_node (orig_fndecl);
+
+
+ slot = htab_find_slot_with_hash (name_decl_htab, new_name,
+ htab_hash_string (new_name), INSERT);
+
+ gcc_assert (*slot == NULL);
+ *slot = new_decl;
+
+ /* Code adapted from cgraph_function_versioning in cgraphuinit.c */
+
+ new_version = cgraph_get_create_node (new_decl);
+ old_version = cgraph_get_create_node (orig_fndecl);
+
+ new_version->local = old_version->local;
+ new_version->global = old_version->global;
+ new_version->rtl = old_version->rtl;
+ new_version->reachable = true;
+ new_version->count = old_version->count;
+
+ /* Set the name of the new function. */
+ DECL_NAME (new_decl) = get_identifier (new_name);
+ SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
+ SET_DECL_RTL (new_decl, NULL);
+
+ tree_function_versioning (orig_fndecl, new_decl, NULL /*tree_map*/,
+ false, NULL /*args_to_skip*/,
+ false, /* skip return */
+ NULL /* blocks_to_copy */ ,
+ NULL /* new_entry */);
+
+
+ old_current_function_decl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (new_decl));
+ current_function_decl = new_decl;
+
+ TREE_READONLY (new_decl) = TREE_READONLY (orig_fndecl);
+ TREE_STATIC (new_decl) = TREE_STATIC (orig_fndecl);
+ TREE_USED (new_decl) = TREE_USED (orig_fndecl);
+ DECL_ARTIFICIAL (new_decl) = 1;
+ DECL_IGNORED_P (new_decl) = 0;
+ TREE_PUBLIC (new_decl) = TREE_PUBLIC (orig_fndecl);
+ DECL_CONTEXT (new_decl) = DECL_CONTEXT (orig_fndecl);
+
+ DECL_EXTERNAL (new_decl) = DECL_EXTERNAL (orig_fndecl);
+ DECL_COMDAT (new_decl) = DECL_COMDAT (orig_fndecl);
+ DECL_COMDAT_GROUP (new_decl) = DECL_COMDAT_GROUP (orig_fndecl);
+ DECL_VIRTUAL_P (new_decl) = DECL_VIRTUAL_P (orig_fndecl);
+ DECL_WEAK (new_decl) = DECL_WEAK (orig_fndecl);
+
+ /* Always inline the clones. Why are we cloning otherwise? */
+ DECL_DECLARED_INLINE_P (new_decl) = 1;
+ DECL_UNINLINABLE (new_decl) = 0;
+ new_version->local.externally_visible
+ = old_version->local.externally_visible;
+ new_version->local.local
+ = old_version->local.local;
+
+ new_version->analyzed = true;
+ new_version->lowered = true;
+
+ if (dump_file)
+ dump_function_to_file (new_decl, dump_file, TDF_BLOCKS);
+
+ cgraph_add_new_function (new_decl, true);
+
+ cgraph_call_function_insertion_hooks (new_version);
+ cgraph_mark_needed_node (new_version);
+
+ pop_cfun ();
+ current_function_decl = old_current_function_decl;
+
+ return new_decl;
+}
+
+/* This function populates the vector *VEC with the args in the gimple
+ call statement STMT. SKIP_ARGS is the number of args to skip.*/
+
+static void
+get_function_args (gimple stmt, int num_args, VEC (tree, heap) **vec,
+ int skip_args)
+{
+ int i;
+
+ if (num_args == 0) return;
+
+ *vec = VEC_alloc (tree, heap, num_args);
+ /* The number of args in a function is 1 plus the actual number of
+ args. Also, there are 3 special args reserved, so the first arg
+ starts from 3. */
+ for (i = 0; i <= num_args - 2; ++i)
+ VEC_quick_push (tree, *vec, gimple_call_arg (stmt, (skip_args + i)));
+}
+
+/* Given ret = __builtin_dispatch (pred, fn1, fn2, arg1, ....)
+ get ret = fn1 (arg1, ...) or ret = fn2 (arg1, ....)
+ depending on the value of SIDE == 0 or 1. */
+
+static gimple
+make_specialized_call_from_builtin (gimple builtin_stmt, int side)
+{
+ tree func_addr;
+ int num_func_args = 0;
+ VEC (tree, heap) *nargs = NULL;
+ tree lhs_stmt;
+ gimple specialized_call_stmt;
+
+ if (side == 0)
+ func_addr = gimple_call_arg (builtin_stmt, 1);
+ else
+ func_addr = gimple_call_arg (builtin_stmt, 2);
+
+ num_func_args
+ = function_args_count (TREE_TYPE (TREE_OPERAND (func_addr, 0)));
+
+ get_function_args (builtin_stmt, num_func_args, &nargs, 3);
+
+ specialized_call_stmt = gimple_build_call_vec (func_addr, nargs);
+
+ lhs_stmt = gimple_call_lhs (builtin_stmt);
+
+ if (lhs_stmt != NULL_TREE)
+ gimple_call_set_lhs (specialized_call_stmt, lhs_stmt);
+
+ if (nargs != NULL)
+ VEC_free (tree, heap, nargs);
+
+ return specialized_call_stmt;
+}
+
+/* Given a call (GENERIC_STMT) to a function that is cloned, substitute
+ with a call to the correct clone. */
+
+static gimple
+make_specialized_call_to_clone (gimple generic_stmt, int side)
+{
+ tree new_decl;
+ char *new_name;
+ tree generic_fndecl;
+ gimple specialized_call_stmt;
+ void **slot;
+ int num_func_args;
+ tree lhs_stmt;
+ VEC (tree, heap) *nargs= NULL;
+
+ generic_fndecl = gimple_call_fndecl (generic_stmt);
+ gcc_assert (generic_fndecl != NULL);
+
+ if (side == 0)
+ new_name = make_name (generic_fndecl, "clone_0");
+ else
+ new_name = make_name (generic_fndecl, "clone_1");
+
+ slot = htab_find_slot_with_hash (name_decl_htab, new_name,
+ htab_hash_string (new_name), NO_INSERT);
+ gcc_assert (slot != NULL);
+ new_decl = (tree) *slot;
+ gcc_assert (new_decl);
+
+ num_func_args = function_args_count (TREE_TYPE (generic_fndecl));
+ get_function_args (generic_stmt, num_func_args, &nargs, 0);
+ specialized_call_stmt = gimple_build_call_vec (new_decl, nargs);
+
+ lhs_stmt = gimple_call_lhs (generic_stmt);
+
+ if (lhs_stmt != NULL_TREE)
+ gimple_call_set_lhs (specialized_call_stmt, lhs_stmt);
+
+ if (nargs != NULL)
+ VEC_free (tree, heap, nargs);
+
+ return specialized_call_stmt;
+}
+
+/* Returns true if STMT is a call to __builtin_dispatch and its
+ predicate feature-test function is marked with attribute
+ "version_selector". */
+
+static bool
+is_builtin_with_predicate_version_selector (gimple stmt)
+{
+ tree cond_func_addr, cond_func_decl;
+
+ gcc_assert (!flag_lto);
+
+ if (gimple_call_fndecl (stmt) != builtin_function_decl)
+ return false;
+
+ cond_func_addr = find_version_selector_func_addr (stmt);
+
+ if (cond_func_addr == NULL)
+ return false;
+
+ cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
+ if (lookup_attribute ("version_selector",
+ DECL_ATTRIBUTES (cond_func_decl)) != NULL)
+ return true;
+
+ return false;
+}
+
+/* Find calls to __builtin_dispatch or to functions that are versioned
+ in CLONE_DECL and substitute the call with the correct version based
+ on the value of SIDE. */
+
+static void
+specialize_call (tree clone_decl, int side)
+{
+ basic_block bb;
+ tree old_current_function_decl;
+
+ old_current_function_decl = current_function_decl;
+ push_cfun (DECL_STRUCT_FUNCTION (clone_decl));
+ current_function_decl = clone_decl;
+
+ /* Iterate over call edges and find out if there is
+ a call to __builtin_dispatch or a cloned function.
+ We cannot iterate over call graph edges as there are
+ no edges for the clones yet. */
+
+ FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (clone_decl))
+ {
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ tree callee_decl;
+ struct cgraph_node *callee_node;
+ gimple specialized_call_stmt;
+ gimple stmt = gsi_stmt (gsi);
+
+ if (!is_gimple_call (stmt))
+ continue;
+
+ callee_decl = gimple_call_fndecl (stmt);
+
+ if (callee_decl == NULL)
+ continue;
+
+ callee_node = cgraph_get_create_node (callee_decl);
+
+ /* For a __builtin_dispatch stmt, only specialize if
+ version_selector attribute is set. Otherwise, it is
+ not hoisted, so no specialization. */
+
+ if (is_builtin_with_predicate_version_selector (stmt))
+ {
+ specialized_call_stmt =
+ make_specialized_call_from_builtin (stmt, side);
+ }
+ else if (callee_node->aux != NULL)
+ {
+ specialized_call_stmt =
+ make_specialized_call_to_clone (stmt, side);
+ }
+ else
+ continue;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Specialize stmt : \n");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
+ fprintf (dump_file, "Specialized stmt : \n");
+ print_gimple_stmt (dump_file, specialized_call_stmt,
+ 0, TDF_VOPS);
+ }
+
+ gimple_set_block (specialized_call_stmt, gimple_block (stmt));
+ gsi_insert_before_without_update (&gsi, specialized_call_stmt,
+ GSI_SAME_STMT);
+
+
+ unlink_stmt_vdef (stmt);
+ gsi_remove (&gsi, true);
+ mark_symbols_for_renaming (specialized_call_stmt);
+
+ /* After removing make sure gsi is set correctly to not skip
+ a statememt. */
+ gsi = gsi_for_stmt (specialized_call_stmt);
+ }
+ }
+ current_function_decl = old_current_function_decl;
+ pop_cfun ();
+}
+
+/* When a function is version cloned, its body is replaced to call one
+ of the versions with the feature-test function acting as a predicate.
+ This is done with __builtin_dispatch which is later expanded. */
+
+static gimple
+make_builtin_call_to_clones (tree orig_fndecl, tree clone_0_addr,
+ tree clone_1_addr, tree cond_func_addr)
+{
+ gimple new_builtin_call;
+ VEC(tree, heap) *vargs = VEC_alloc (tree, heap, 4);
+ tree arg;
+
+ VEC_quick_push (tree, vargs, cond_func_addr);
+ VEC_quick_push (tree, vargs, clone_0_addr);
+ VEC_quick_push (tree, vargs, clone_1_addr);
+
+ for (arg = DECL_ARGUMENTS (orig_fndecl); arg; arg = TREE_CHAIN (arg))
+ {
+ VEC_safe_push (tree, heap, vargs, arg);
+ /* Again, this add_referenced_var is very very important. It broke
+ a build where a cloned function's arguments where never
+ referenced. Missing this statement in places asserts at
+ tree-dfa.c:589, in function referenced_var_lookup at
+ "gcc_assert (h || uid == 0);" and is very difficult to triage. */
+ add_referenced_var (arg);
+ }
+
+ new_builtin_call = gimple_build_call_vec (builtin_function_decl, vargs);
+ mark_symbols_for_renaming (new_builtin_call);
+
+
+ if (dump_file)
+ print_gimple_stmt (dump_file, new_builtin_call, 0, TDF_VOPS);
+
+ VEC_free (tree, heap, vargs);
+
+ return new_builtin_call;
+}
+
+/* This clones a dispatch function whose callee-graph path has a function
+ which calls __builtin_dispatch. This function is cloned and the
+ original function branches to the right clone. */
+
+static int
+clone_and_dispatch_function (struct cgraph_node *orig_node, tree *clone_0,
+ tree *clone_1)
+{
+ tree clone_0_decl, clone_1_decl;
+ gimple new_builtin_call = NULL;
+ gimple new_return_stmt = NULL;
+ gimple_seq seq = NULL;
+ basic_block new_bb;
+ tree orig_fndecl;
+ tree return_var = NULL;
+ tree return_type;
+ tree old_current_function_decl;
+
+ old_current_function_decl = current_function_decl;
+ orig_fndecl = orig_node->decl;
+ push_cfun (DECL_STRUCT_FUNCTION (orig_fndecl));
+ current_function_decl = orig_fndecl;
+
+ /* Make 2 clones for true and false function. */
+ clone_0_decl = clone_function (orig_fndecl, "clone_0");
+ clone_1_decl = clone_function (orig_fndecl, "clone_1");
+ *clone_0 = clone_0_decl;
+ *clone_1 = clone_1_decl;
+
+ new_bb = empty_function_body (orig_fndecl);
+
+ new_builtin_call = make_builtin_call_to_clones (
+ orig_fndecl,
+ build_fold_addr_expr (clone_0_decl),
+ build_fold_addr_expr (clone_1_decl),
+ build_fold_addr_expr ((tree)orig_node->aux));
+
+ return_type = TREE_TYPE (TREE_TYPE (orig_fndecl));
+
+ if (!TREE_ADDRESSABLE (return_type) && COMPLETE_TYPE_P (return_type))
+ {
+ tree tmp_var;
+ tmp_var = create_tmp_var (return_type, NULL);
+ add_referenced_var (tmp_var);
+ return_var = make_ssa_name (tmp_var, new_builtin_call);
+ gimple_call_set_lhs (new_builtin_call, return_var);
+ }
+
+ mark_symbols_for_renaming (new_builtin_call);
+ new_return_stmt = gimple_build_return (return_var);
+ mark_symbols_for_renaming (new_return_stmt);
+ gimple_seq_add_stmt (&seq, new_builtin_call);
+ gimple_seq_add_stmt (&seq, new_return_stmt);
+ set_bb_seq (new_bb, seq);
+ gimple_set_bb (new_builtin_call, new_bb);
+ gimple_set_bb (new_return_stmt, new_bb);
+
+ gimple_set_block (new_builtin_call, DECL_INITIAL (orig_fndecl));
+ gimple_set_block (new_return_stmt, DECL_INITIAL (orig_fndecl));
+
+ if (dump_file)
+ dump_function_to_file (orig_fndecl, dump_file, TDF_BLOCKS);
+
+ /* This update_ssa is necessary here for the following reason. SSA uses
+ a global syms_to_rename bitmap that stores syms that must be renamed.
+ So, if we accumulate the syms from one function in IPA but move to
+ a different function without updating SSA, then we could be
+ accumulating syms from many functions. This would assert in
+ referenced_var_lookup because the hashtab storing the syms is
+ function local. This is horrible. gcc-4.6 makes this bitmap a
+ global. */
+ update_ssa (TODO_update_ssa);
+
+ compute_inline_parameters (cgraph_get_create_node (orig_fndecl), false);
+ DECL_DECLARED_INLINE_P (orig_fndecl) = 1;
+ DECL_UNINLINABLE (orig_fndecl) = 0;
+ current_function_decl = old_current_function_decl;
+ pop_cfun ();
+ return 0;
+}
+
+/* Clone all functions marked for cloning by the earlier phase. */
+
+static void
+perform_cloning_phase (void)
+{
+ struct cgraph_node *node;
+ int ix;
+ VEC (tree, heap) *cloned_decl_list = NULL;
+ tree cloned_decl = NULL;
+
+ cloned_decl_list = VEC_alloc (tree, heap, 2);
+
+ /* First clone, then specialize the clones. */
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ tree clone_0_decl, clone_1_decl;
+ if (node->aux == NULL)
+ continue;
+ if (dump_file)
+ {
+ fprintf (dump_file, "%s will be cloned\n", cgraph_node_name (node));
+ dump_function_to_file (node->decl, dump_file, TDF_BLOCKS);
+ }
+ clone_and_dispatch_function (node, &clone_0_decl, &clone_1_decl);
+ VEC_safe_push (tree, heap, cloned_decl_list, clone_0_decl);
+ VEC_safe_push (tree, heap, cloned_decl_list, clone_1_decl);
+ continue;
+ }
+
+ /* Specialize the clones now. */
+ for (ix = 0; VEC_iterate (tree, cloned_decl_list, ix, cloned_decl); ++ix)
+ {
+ int which_clone = ix % 2;
+ specialize_call (cloned_decl, which_clone);
+ }
+
+ VEC_free (tree, heap, cloned_decl_list);
+}
+
+/* End Cloning phase. */
+
+/* Checks if there is atleast one call to __builtin_dispatch. */
+
+static bool
+find_builtin_decl (void)
+{
+ struct cgraph_node *node;
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ if (strstr (cgraph_node_name (node), "__builtin_dispatch") != NULL)
+ {
+ builtin_function_decl = node->decl;
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Set the aux fields of all nodes and edges in the call graph to be NULL. */
+
+static void
+cleanup_aux_field (void)
+{
+ struct cgraph_node *node;
+ struct cgraph_edge *edge;
+
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ node->aux = NULL;
+ for (edge = node->callees; edge; edge = edge->next_callee)
+ edge->aux = NULL;
+ }
+}
+
+/* Main driver function. It scans the __builtin_dispatch calls and
+ figures out which functions to clone. It then clones the functions. */
+
+static unsigned int
+builtin_dispatch_ipa_clone (void)
+{
+ cleanup_aux_field ();
+
+ /* Allocate hashtab mapping name to decl. */
+ name_decl_htab = htab_create (10, name_decl_htab_hash_descriptor,
+ name_decl_htab_eq_descriptor, NULL);
+
+ /* Turn it on for O1 and above. At -O0, there is a SSA alias bug
+ with create_tmp_var. Cloning and hoisting is not necessary at
+ -O0 anyways. Also, guard it with the flag
+ "-fclone-hot-version-paths".
+ Disabled for LTO as it needs more work. */
+ if (optimize == 0
+ || profile_arc_flag
+ || !flag_clone_hot_version_paths
+ || flag_lto)
+ return 0;
+
+ if (!find_builtin_decl ())
+ return 0;
+
+ gcc_assert (builtin_function_decl != NULL);
+
+ if (!perform_analysis_phase ())
+ {
+ cleanup_aux_field ();
+ return 0;
+ }
+
+ if (decide_cloning_phase ())
+ perform_cloning_phase ();
+
+ cleanup_aux_field ();
+
+ return 0;
+}
+
+static bool
+gate_handle_builtin_dispatch (void)
+{
+ return true;
+}
+
+struct simple_ipa_opt_pass pass_ipa_multiversion_dispatch =
+{
+ {
+ SIMPLE_IPA_PASS,
+ "multiversion_dispatch", /* name */
+ gate_handle_builtin_dispatch, /* gate */
+ builtin_dispatch_ipa_clone, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_MVERSN_DISPATCH, /* tv_id */
+ 0, /* properties_required */
+ PROP_cfg, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | /* todo_flags_finish */
+ TODO_update_ssa
+ }
+};
+
+/* Lowering of the __builtin_dispatch calls. */
+
+
+/* This function converts STMT which is a __builtin_dispatch
+ call of the form :
+ ret = __builtin_dispatch (predicate, foo, bar, arg1, ...)
+ into :
+ var_1 = predicate
+ if (var_1)
+ var_2 = foo (arg1, ...);
+ else
+ var_3 = bar (arg1, ...);
+ var_4 = phi (var_2, var_3)
+ ret = var_4
+
+ var_? are ssa names for variable var.
+*/
+
+static unsigned int
+convert_builtin_dispatch (gimple stmt)
+{
+ tree cond_func_addr, if_func_addr, else_func_addr;
+ tree cond_func_decl = NULL;
+ gimple if_part, else_part, if_else_stmt;
+ basic_block bb1, bb2, bb3, bb4;
+ gimple bb1end, bb2end, bb3end;
+ edge e12, e13, e23, e24, e34;
+ VEC(tree, heap) *nargs = NULL;
+ int num_func_args = 0, i;
+ tree version_selector_var;
+ tree lhs_result;
+ gimple_stmt_iterator gsi;
+ basic_block bb;
+ gimple feature_test_call = NULL;
+ tree tmp_var = NULL;
+ gimple init_stmt = NULL;
+ tree ssa_if_name, ssa_else_name;
+ gimple phinode = NULL;
+ tree tmp_result_var, ssa_result_var;
+
+ gsi = gsi_for_stmt (stmt);
+ bb = gsi_bb (gsi);
+
+ cond_func_addr = find_version_selector_func_addr (stmt);
+ if (cond_func_addr != NULL)
+ {
+ cond_func_decl = TREE_OPERAND (cond_func_addr, 0);
+ gcc_assert (cond_func_decl);
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Before Converting __builtin_dispatch :\n");
+ dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
+ }
+
+ if_func_addr = gimple_call_arg (stmt, 1);
+ else_func_addr = gimple_call_arg (stmt, 2);
+
+ tmp_result_var = create_tmp_var (integer_type_node, NULL);
+ add_referenced_var (tmp_result_var);
+
+ if (flag_lto
+ || cond_func_decl == NULL
+ || lookup_attribute ("version_selector",
+ DECL_ATTRIBUTES (cond_func_decl)) == NULL)
+ {
+ tree arg = gimple_call_arg (stmt, 0);
+ /* This means the feature-test function is not set with attribute
+ version_selector or it is a function pointer or in LTO. So,
+ explicitly call it. */
+ feature_test_call = gimple_build_call (arg, 0);
+ ssa_result_var = make_ssa_name (tmp_result_var, feature_test_call);
+ gimple_call_set_lhs (feature_test_call, ssa_result_var);
+ mark_symbols_for_renaming (feature_test_call);
+ version_selector_var = ssa_result_var;
+ }
+ else
+ {
+ /* Get the global corresponding to the "version_selector" function. */
+ version_selector_var
+ = handle_version_selector_attr_function (cond_func_decl);
+ gcc_assert (version_selector_var);
+ add_referenced_var (version_selector_var);
+ feature_test_call = gimple_build_assign (tmp_result_var,
+ version_selector_var);
+ ssa_result_var = make_ssa_name (tmp_result_var, feature_test_call);
+ gimple_assign_set_lhs (feature_test_call, ssa_result_var);
+ mark_symbols_for_renaming (feature_test_call);
+ version_selector_var = ssa_result_var;
+ }
+
+ if_else_stmt = gimple_build_cond (GT_EXPR,
+ version_selector_var,
+ integer_zero_node,
+ NULL_TREE, NULL_TREE);
+
+ mark_symbols_for_renaming (if_else_stmt);
+
+ num_func_args = function_args_count (
+ TREE_TYPE (TREE_OPERAND (if_func_addr, 0)));
+
+ nargs = VEC_alloc (tree, heap, num_func_args);
+
+ /* The arguments to the feature test function start from the 4th argument
+ in __builtin_dispatch. The first 3 arguments are mandatory. */
+
+ for (i = 0; i <= num_func_args - 2; ++i)
+ VEC_quick_push (tree, nargs,
+ gimple_call_arg (stmt, (3 + i)));
+
+ if_part = gimple_build_call_vec (if_func_addr, nargs);
+ else_part = gimple_build_call_vec (else_func_addr, nargs);
+
+ lhs_result = gimple_call_lhs (stmt);
+
+ if (lhs_result != NULL_TREE)
+ {
+ tree ssa_var;
+ tree return_type;
+ return_type = TREE_TYPE (lhs_result);
+ tmp_var = create_tmp_var (return_type, NULL);
+ add_referenced_var (tmp_var);
+
+ init_stmt = gimple_build_assign (tmp_var, build_zero_cst (return_type));
+ ssa_var = make_ssa_name (tmp_var, init_stmt);
+ gimple_assign_set_lhs (init_stmt, ssa_var);
+ mark_symbols_for_renaming (init_stmt);
+
+ ssa_if_name = make_ssa_name (tmp_var, init_stmt);
+ ssa_else_name = make_ssa_name (tmp_var, init_stmt);
+ gimple_call_set_lhs (if_part, ssa_if_name);
+ gimple_call_set_lhs (else_part, ssa_else_name);
+ }
+ mark_symbols_for_renaming (if_part);
+ mark_symbols_for_renaming (else_part);
+
+ /* Set the lexical block to be the same as the dispatch call. */
+ gcc_assert (feature_test_call);
+ gimple_set_block (feature_test_call, gimple_block (stmt));
+
+ if (init_stmt)
+ gimple_set_block (init_stmt, gimple_block (stmt));
+
+ gimple_set_block (if_else_stmt, gimple_block (stmt));
+ gimple_set_block (if_part, gimple_block (stmt));
+ gimple_set_block (else_part, gimple_block (stmt));
+
+ gsi_insert_before_without_update (&gsi, feature_test_call, GSI_SAME_STMT);
+ gimple_set_bb (feature_test_call, bb);
+
+ if (init_stmt)
+ {
+ gsi_insert_before_without_update (&gsi, init_stmt,
+ GSI_SAME_STMT);
+ gimple_set_bb (init_stmt, bb);
+ }
+
+ gsi_insert_before_without_update (&gsi, if_else_stmt, GSI_SAME_STMT);
+ gsi_insert_before_without_update (&gsi, if_part, GSI_SAME_STMT);
+ gsi_insert_before_without_update (&gsi, else_part, GSI_SAME_STMT);
+
+ /* Remove the builtin_dispatch call after the expansion. */
+ unlink_stmt_vdef (stmt);
+ gsi_remove (&gsi, true);
+
+ bb1end = if_else_stmt;
+ bb2end = if_part;
+ bb3end = else_part;
+ bb1 = bb;
+ e12 = split_block (bb1, bb1end);
+ bb2 = e12->dest;
+ e23 = split_block (bb2, bb2end);
+ bb3 = e23->dest;
+ e34 = split_block (bb3, bb3end);
+ bb4 = e34->dest;
+
+ e12->flags &= ~EDGE_FALLTHRU;
+ e12->flags |= EDGE_TRUE_VALUE;
+ e13 = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
+ gcc_assert (e13);
+ e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
+ gcc_assert (e24);
+ remove_edge (e23);
+
+ if (tmp_var)
+ {
+ gimple assign_stmt;
+ phinode = create_phi_node (tmp_var, bb4);
+ add_phi_arg (phinode, ssa_if_name, e24, UNKNOWN_LOCATION);
+ add_phi_arg (phinode, ssa_else_name, e34, UNKNOWN_LOCATION);
+ mark_symbols_for_renaming (phinode);
+ gcc_assert (lhs_result);
+ assign_stmt
+ = gimple_build_assign (lhs_result, gimple_phi_result (phinode));
+ mark_symbols_for_renaming (assign_stmt);
+ gsi = gsi_start_bb (bb4);
+ gsi_insert_before_without_update (&gsi, assign_stmt, GSI_SAME_STMT);
+ gimple_set_bb (assign_stmt, bb4);
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Converted __builtin_dispatch :\n");
+ dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS);
+ }
+
+ return 0;
+}
+
+/* This function does two things.
+
+ 1) For a feature-test function marked with attribute "version_selector",
+ it creates a constructor that calls the feature-test function and a
+ global that holds the result. The global's result will be used
+ to lower any __builtin_dispatch statement that refers to this feature
+ test function. The __builtin_dispatch statement and the feature test
+ function can be in different modules.
+
+ 2) It lowers __builtin_dispatch statements. */
+
+static unsigned int
+do_convert_builtin_dispatch (void)
+{
+ basic_block bb;
+ gimple_stmt_iterator gsi;
+ VEC (gimple, heap) *builtin_stmt_list = NULL;
+ int ix;
+ gimple builtin_stmt;
+
+ /* Allocate hashtab mapping name to decl. */
+ if (name_decl_htab == NULL)
+ name_decl_htab = htab_create (10, name_decl_htab_hash_descriptor,
+ name_decl_htab_eq_descriptor, NULL);
+
+ /* Look for functions with attribute "version_selector" and make a
+ constructor which calls the function and saves the result in a
+ global. Disabled for LTO as it needs more work. */
+
+ if (!flag_lto
+ && lookup_attribute ("version_selector",
+ DECL_ATTRIBUTES (current_function_decl)) != NULL)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Function with version_selector attribute found :"
+ " %s. Making constructor for it.\n",
+ current_function_name ());
+
+ handle_version_selector_attr_function (current_function_decl);
+ /* Assume there are no __builtin_dispatch calls in feature test
+ functions. So it is safe to return. */
+ return 0;
+ }
+
+ /* Find and lower __builtin_dispatch calls. */
+
+ FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (current_function_decl))
+ {
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ tree call_decl;
+
+ if (!is_gimple_call (stmt))
+ continue;
+
+ call_decl = gimple_call_fndecl (stmt);
+
+ if (call_decl == NULL)
+ continue;
+
+ if (DECL_NAME (call_decl) == NULL_TREE)
+ continue;
+
+ if (strstr (IDENTIFIER_POINTER (DECL_NAME (call_decl)),
+ "__builtin_dispatch") == NULL)
+ continue;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Converting __builtin_dispatch stmt in:%s\n",
+ current_function_name ());
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS);
+ }
+
+ if (builtin_stmt_list == NULL)
+ builtin_stmt_list = VEC_alloc (gimple, heap, 2);
+
+ gcc_assert (builtin_stmt_list != NULL);
+ VEC_safe_push (gimple, heap, builtin_stmt_list, stmt);
+ }
+ }
+
+ if (!builtin_stmt_list)
+ return 0;
+
+ for (ix = 0; VEC_iterate (gimple, builtin_stmt_list, ix, builtin_stmt);
+ ++ix)
+ convert_builtin_dispatch (builtin_stmt);
+
+ compute_inline_parameters (cgraph_get_create_node (current_function_decl),
+ false);
+
+ VEC_free (gimple, heap, builtin_stmt_list);
+
+ return 0;
+}
+
+static bool
+gate_convert_builtin_dispatch (void)
+{
+ return true;
+}
+
+struct gimple_opt_pass pass_tree_convert_builtin_dispatch =
+{
+ {
+ GIMPLE_PASS,
+ "convert_builtin_dispatch", /* name */
+ gate_convert_builtin_dispatch, /* gate */
+ do_convert_builtin_dispatch, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_MVERSN_DISPATCH, /* tv_id */
+ PROP_cfg, /* properties_required */
+ PROP_cfg, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | /* todo_flags_finish */
+ TODO_cleanup_cfg | TODO_dump_cgraph |
+ TODO_update_ssa | TODO_verify_ssa
+ }
+};