diff options
Diffstat (limited to 'gcc-4.4.3/gcc/tree-profile.c')
-rw-r--r-- | gcc-4.4.3/gcc/tree-profile.c | 757 |
1 files changed, 745 insertions, 12 deletions
diff --git a/gcc-4.4.3/gcc/tree-profile.c b/gcc-4.4.3/gcc/tree-profile.c index cebf93cab..75fc515b4 100644 --- a/gcc-4.4.3/gcc/tree-profile.c +++ b/gcc-4.4.3/gcc/tree-profile.c @@ -48,10 +48,20 @@ along with GCC; see the file COPYING3. If not see #include "ggc.h" #include "cgraph.h" #include "target.h" +#include "params.h" +#include "profile.h" + +/* Number of statements inserted for each edge counter increment. */ +#define EDGE_COUNTER_STMT_COUNT 3 + +/* When -D__KERNEL__ is in the option list, we assume this is a + compilation for Linux Kernel. */ +bool is_kernel_build; static GTY(()) tree gcov_type_node; static GTY(()) tree gcov_type_tmp_var; static GTY(()) tree tree_interval_profiler_fn; +static GTY(()) tree tree_one_float_value_profiler_fn; static GTY(()) tree tree_pow2_profiler_fn; static GTY(()) tree tree_one_value_profiler_fn; static GTY(()) tree tree_indirect_call_profiler_fn; @@ -94,7 +104,7 @@ tree_init_ic_make_global_vars (void) ptr_void); TREE_PUBLIC (ic_void_ptr_var) = 1; DECL_EXTERNAL (ic_void_ptr_var) = 1; - if (targetm.have_tls) + if (targetm.have_tls && !is_kernel_build) { DECL_TLS_MODEL (ic_void_ptr_var) = decl_default_tls_model (ic_void_ptr_var); @@ -107,7 +117,7 @@ tree_init_ic_make_global_vars (void) gcov_type_ptr); TREE_PUBLIC (ic_gcov_type_ptr_var) = 1; DECL_EXTERNAL (ic_gcov_type_ptr_var) = 1; - if (targetm.have_tls) + if (targetm.have_tls && !is_kernel_build) { DECL_TLS_MODEL (ic_gcov_type_ptr_var) = decl_default_tls_model (ic_gcov_type_ptr_var); @@ -122,7 +132,7 @@ tree_init_ic_make_global_vars (void) TREE_STATIC (ic_void_ptr_var) = 1; TREE_PUBLIC (ic_void_ptr_var) = 0; DECL_INITIAL (ic_void_ptr_var) = NULL; - if (targetm.have_tls) + if (targetm.have_tls && !is_kernel_build) { DECL_TLS_MODEL (ic_void_ptr_var) = decl_default_tls_model (ic_void_ptr_var); @@ -136,7 +146,7 @@ tree_init_ic_make_global_vars (void) TREE_STATIC (ic_gcov_type_ptr_var) = 1; TREE_PUBLIC (ic_gcov_type_ptr_var) = 0; DECL_INITIAL (ic_gcov_type_ptr_var) = NULL; - if (targetm.have_tls) + if (targetm.have_tls && !is_kernel_build) { DECL_TLS_MODEL (ic_gcov_type_ptr_var) = decl_default_tls_model (ic_gcov_type_ptr_var); @@ -149,12 +159,181 @@ tree_init_ic_make_global_vars (void) assemble_variable (ic_gcov_type_ptr_var, 0, 0, 0); } +/* A set of the first statement in each block of statements that need to + be applied a sampling wrapper. */ +static htab_t instrumentation_to_be_sampled = NULL; + +/* extern __thread gcov_unsigned_t __gcov_sample_counter */ +static tree gcov_sample_counter_decl = NULL_TREE; + +/* extern gcov_unsigned_t __gcov_sampling_rate */ +static tree gcov_sampling_rate_decl = NULL_TREE; + +/* forward declaration. */ +void tree_init_instrumentation_sampling (void); + +/* Insert STMT_IF around given sequence of consecutive statements in the + same basic block starting with STMT_START, ending with STMT_END. */ + +static void +insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if) +{ + gimple_stmt_iterator gsi; + basic_block bb_original, bb_before_if, bb_then, bb_after_if; + edge e_if_taken, e_then_join, e_if_not_taken; + + gsi = gsi_for_stmt (stmt_start); + gsi_insert_before_without_update (&gsi, stmt_if, GSI_SAME_STMT); + bb_original = gsi_bb (gsi); + e_if_taken = split_block (bb_original, stmt_if); + e_if_taken->flags &= ~EDGE_FALLTHRU; + e_if_taken->flags |= EDGE_TRUE_VALUE; + e_then_join = split_block (e_if_taken->dest, stmt_end); + bb_before_if = e_if_taken->src; + bb_then = e_if_taken->dest; + bb_after_if = e_then_join->dest; + e_if_not_taken = make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE); +} + +/* Transform: + + ORIGINAL CODE + + Into: + + __gcov_sample_counter++; + if (__gcov_sample_counter >= __gcov_sampling_rate) + { + __gcov_sample_counter = 0; + ORIGINAL CODE + } + + The original code block starts with STMT_START, is made of STMT_COUNT + consecutive statements in the same basic block. */ + +static void +add_sampling_wrapper (gimple stmt_start, int stmt_count) +{ + int i; + tree zero, one, tmp_var; + gimple stmt_block_end; + gimple stmt_inc_counter1, stmt_inc_counter2, stmt_inc_counter3; + gimple stmt_reset_counter, stmt_if; + gimple_stmt_iterator gsi; + + tmp_var = create_tmp_var (get_gcov_unsigned_t (), "PROF_sample_counter"); + + /* Create all the new statements needed. */ + stmt_inc_counter1 = gimple_build_assign (tmp_var, gcov_sample_counter_decl); + one = build_int_cst (get_gcov_unsigned_t (), 1); + stmt_inc_counter2 = gimple_build_assign_with_ops ( + PLUS_EXPR, tmp_var, tmp_var, one); + stmt_inc_counter3 = gimple_build_assign (gcov_sample_counter_decl, tmp_var); + zero = build_int_cst (get_gcov_unsigned_t (), 0); + stmt_reset_counter = gimple_build_assign (gcov_sample_counter_decl, zero); + stmt_if = gimple_build_cond (GE_EXPR, tmp_var, gcov_sampling_rate_decl, + NULL_TREE, NULL_TREE); + + /* Insert them for now in the original basic block. */ + gsi = gsi_for_stmt (stmt_start); + gsi_insert_before_without_update (&gsi, stmt_inc_counter1, GSI_SAME_STMT); + gsi_insert_before_without_update (&gsi, stmt_inc_counter2, GSI_SAME_STMT); + gsi_insert_before_without_update (&gsi, stmt_inc_counter3, GSI_SAME_STMT); + gsi_insert_before_without_update (&gsi, stmt_reset_counter, GSI_SAME_STMT); + + /* Move to last statement. */ + for (i = 0; i < stmt_count - 1; i++) + gsi_next (&gsi); + + stmt_block_end = gsi_stmt (gsi); + gcc_assert (stmt_block_end); + + /* Insert IF block. */ + insert_if_then (stmt_reset_counter, stmt_block_end, stmt_if); +} + +/* Return whether STMT is the beginning of an instrumentation block to be + applied sampling. */ + +static bool +is_instrumentation_to_be_sampled (gimple stmt) +{ + return (htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt, + htab_hash_pointer (stmt), NO_INSERT) + != NULL); +} + +/* Add sampling wrappers around edge counter code in current function. */ + +void +add_sampling_to_edge_counters (void) +{ + gimple_stmt_iterator gsi; + basic_block bb; + + FOR_EACH_BB_REVERSE (bb) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (is_instrumentation_to_be_sampled (stmt)) + { + add_sampling_wrapper (stmt, EDGE_COUNTER_STMT_COUNT); + break; + } + } + + /* Empty the set of statements performing the edge counter increment. */ + if (instrumentation_to_be_sampled) + htab_empty (instrumentation_to_be_sampled); +} + +void +tree_init_instrumentation_sampling (void) +{ + if (!gcov_sampling_rate_decl) + { + /* Define __gcov_sampling_rate regardless of -fprofile-generate-sampling. + Otherwise the extern reference to it from libgcov becomes unmatched. + */ + gcov_sampling_rate_decl = build_decl ( + VAR_DECL, + get_identifier ("__gcov_sampling_rate"), + get_gcov_unsigned_t ()); + TREE_PUBLIC (gcov_sampling_rate_decl) = 1; + DECL_ARTIFICIAL (gcov_sampling_rate_decl) = 1; + DECL_ONE_ONLY (gcov_sampling_rate_decl) = 1; + TREE_STATIC (gcov_sampling_rate_decl) = 1; + DECL_INITIAL (gcov_sampling_rate_decl) = build_int_cst ( + get_gcov_unsigned_t (), + PARAM_VALUE (PARAM_PROFILE_GENERATE_SAMPLING_RATE)); + assemble_variable (gcov_sampling_rate_decl, 0, 0, 0); + } + + if (flag_profile_generate_sampling && !instrumentation_to_be_sampled) + { + instrumentation_to_be_sampled = htab_create (100, htab_hash_pointer, + htab_eq_pointer, NULL); + gcov_sample_counter_decl = build_decl ( + VAR_DECL, + get_identifier ("__gcov_sample_counter"), + get_gcov_unsigned_t ()); + TREE_PUBLIC (gcov_sample_counter_decl) = 1; + DECL_EXTERNAL (gcov_sample_counter_decl) = 1; + DECL_ARTIFICIAL (gcov_sample_counter_decl) = 1; + if (targetm.have_tls && !is_kernel_build) + DECL_TLS_MODEL (gcov_sample_counter_decl) = + decl_default_tls_model (gcov_sample_counter_decl); + assemble_variable (gcov_sample_counter_decl, 0, 0, 0); + } +} + static void tree_init_edge_profiler (void) { tree interval_profiler_fn_type; tree pow2_profiler_fn_type; tree one_value_profiler_fn_type; + tree one_float_value_profiler_fn_type; tree gcov_type_ptr; tree ic_profiler_fn_type; tree ic_topn_profiler_fn_type; @@ -199,8 +378,17 @@ tree_init_edge_profiler (void) = build_fn_decl ("__gcov_one_value_profiler", one_value_profiler_fn_type); + /* void (*) (gcov_type *, gcov_float_t) */ + one_float_value_profiler_fn_type + = build_function_type_list (void_type_node, + gcov_type_ptr, gcov_type_node, + NULL_TREE); + tree_one_float_value_profiler_fn + = build_fn_decl ("__gcov_one_float_value_profiler", + one_float_value_profiler_fn_type); + tree_init_ic_make_global_vars (); - + /* void (*) (gcov_type *, gcov_type, void *, void *) */ ic_profiler_fn_type = build_function_type_list (void_type_node, @@ -261,6 +449,7 @@ add_abnormal_goto_call_edges (gimple_stmt_iterator gsi) static void tree_gen_edge_profiler (int edgeno, edge e) { + void** slot; tree ref, one; gimple stmt1, stmt2, stmt3; @@ -274,6 +463,15 @@ tree_gen_edge_profiler (int edgeno, edge e) stmt2 = gimple_build_assign_with_ops (PLUS_EXPR, gcov_type_tmp_var, gcov_type_tmp_var, one); stmt3 = gimple_build_assign (unshare_expr (ref), gcov_type_tmp_var); + + if (flag_profile_generate_sampling) + { + slot = htab_find_slot_with_hash (instrumentation_to_be_sampled, stmt1, + htab_hash_pointer (stmt1), INSERT); + gcc_assert (!*slot); + *slot = stmt1; + } + gsi_insert_on_edge (e, stmt1); gsi_insert_on_edge (e, stmt2); gsi_insert_on_edge (e, stmt3); @@ -350,7 +548,7 @@ tree_gen_one_value_profiler (histogram_value value, unsigned tag, unsigned base) tree ref_ptr = tree_coverage_counter_addr (tag, base); gimple call; tree val; - + ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr, true, NULL_TREE, true, GSI_SAME_STMT); val = prepare_instrumented_value (&gsi, value); @@ -359,6 +557,33 @@ tree_gen_one_value_profiler (histogram_value value, unsigned tag, unsigned base) add_abnormal_goto_call_edges (gsi); } +/* Output instructions as GIMPLE trees for code to find the most common value. + VALUE is the expression whose value is profiled. TAG is the tag of the + section for counters, BASE is offset of the counter position. */ + +static void +tree_gen_one_float_value_profiler (histogram_value value, unsigned tag, + unsigned base) +{ + gimple stmt = value->hvalue.stmt; + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + tree ref_ptr = tree_coverage_counter_addr (tag, base); + gimple call; + tree val; + + ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr, + true, NULL_TREE, true, GSI_SAME_STMT); + + val = force_gimple_operand_gsi (&gsi, fold_convert (get_gcov_float_t (), + value->hvalue.value), + true, NULL_TREE, true, GSI_SAME_STMT); + + call = gimple_build_call (tree_one_float_value_profiler_fn, 2, ref_ptr, + val); + gsi_insert_before (&gsi, call, GSI_NEW_STMT); + add_abnormal_goto_call_edges (gsi); +} + /* Output instructions as GIMPLE trees for code to find the most common called function in indirect call. @@ -450,7 +675,7 @@ tree_gen_ic_func_profiler (void) gsi = gsi_start_bb (bb); /* Set __gcov_indirect_call_callee to 0, so that calls from other modules won't get misattributed - to the last caller of the current callee. */ + to the last caller of the current callee. */ void0 = build_int_cst (build_pointer_type (void_type_node), 0); stmt2 = gimple_build_assign (ic_void_ptr_var, void0); gsi_insert_after (&gsi, stmt2, GSI_NEW_STMT); @@ -524,7 +749,7 @@ tree_gen_dc_profiler (unsigned base, gimple call_stmt) gimple stmt1, stmt2, stmt3; gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt); tree tmp1, tmp2, tmp3, callee = gimple_call_fn (call_stmt); - + /* Insert code: __gcov_direct_call_counters = get_relevant_counter_ptr (); __gcov_callee = (void *) callee; @@ -650,6 +875,509 @@ tree_gen_ior_profiler (histogram_value value, unsigned tag, unsigned base) add_abnormal_goto_call_edges (gsi); } +/* String operation substitution record. For each operation, e.g., memcpy, + we keep up to four declarations, e.g., libopt__memcpy__{0,1,2,3}. + They correspond to memcpy versions in which memory access is nontemporal + in neither, first, second or both arguments (dst, src) respectively. */ + +struct stringop_subst +{ + const char* original_name; /* E.g., "memcpy". */ + int num_args; /* Number of args, 3 for memcpy. */ + int num_ptr_args; /* Number of pointer args, 2 for memcpy. */ + tree instr_fun; /* E.g., declaration of instrument_memcpy. */ + tree nt_ops[4]; /* E.g., libopt__memcpy__{0,1,2,3}. */ +}; +typedef struct stringop_subst* stringop_subst_t; + +/* Substitution database. XXX: switch to hash table. */ + +static struct stringop_subst stringop_decl[] = +{ + {"memcpy", 3, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"memset", 3, 1, NULL, {NULL, NULL, NULL, NULL}}, + {"memmove", 3, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"memcmp", 3, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"bcmp", 3, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"strlen", 1, 1, NULL, {NULL, NULL, NULL, NULL}}, + {"strcpy", 2, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"strncpy", 3, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"strcat", 2, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"strncat", 3, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"strdup", 1, 1, NULL, {NULL, NULL, NULL, NULL}}, + {"strndup", 2, 1, NULL, {NULL, NULL, NULL, NULL}}, + {"strcmp", 2, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"strncmp", 3, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"strcasecmp", 2, 2, NULL, {NULL, NULL, NULL, NULL}}, + {"strncasecmp", 3, 2, NULL, {NULL, NULL, NULL, NULL}}, + {NULL, 0, 0, NULL, {NULL, NULL, NULL, NULL}} +}; + +/* Get the corresponding element in STRINGOP_DECL for NAME. */ + +static stringop_subst_t +get_stringop_subst (const char* name) +{ + stringop_subst_t it; + for (it = stringop_decl; it->original_name; it++) + if (strcmp (name, it->original_name) == 0) + return it; + return 0; +} + +/* Return the matching substitution if call site STMT is worth replacing. */ + +static stringop_subst_t +reusedist_is_interesting_call (gimple stmt) +{ + tree fndecl, name; + + if (gimple_code (stmt) != GIMPLE_CALL) + return 0; + + fndecl = gimple_call_fndecl (stmt); + + if (fndecl == NULL_TREE) + return 0; + + name = DECL_NAME (fndecl); + + if (name == NULL_TREE) + return 0; + + return get_stringop_subst (IDENTIFIER_POINTER (name)); +} + +/* Make up an instrumentation function name for string operation OP. */ + +static void +reusedist_instr_func_name (const char* op, char result[], int size) +{ + int written; + + written = snprintf (result, size, "reusedist_instr_%s", op); + + gcc_assert (written < size); +} + +/* Create a declaration for an instr. function if not already done. + Use TEMPLATE_STMT to figure out argument types. */ + +static tree +reusedist_get_instr_decl (gimple template_stmt, stringop_subst_t subst) +{ + if (!subst->instr_fun) + { + tree args; + char name[64]; + + if (!ptr_void) + ptr_void = build_pointer_type (void_type_node); + + reusedist_instr_func_name (subst->original_name, name, 64); + + switch (subst->num_args) + { + case 1: + args = build_function_type_list ( + void_type_node, ptr_void, + TREE_TYPE (gimple_call_arg (template_stmt, 0)), + NULL_TREE); + break; + case 2: + args = build_function_type_list ( + void_type_node, ptr_void, + TREE_TYPE (gimple_call_arg (template_stmt, 0)), + TREE_TYPE (gimple_call_arg (template_stmt, 1)), + NULL_TREE); + break; + case 3: + args = build_function_type_list ( + void_type_node, ptr_void, + TREE_TYPE (gimple_call_arg (template_stmt, 0)), + TREE_TYPE (gimple_call_arg (template_stmt, 1)), + TREE_TYPE (gimple_call_arg (template_stmt, 2)), + NULL_TREE); + break; + default: + gcc_assert (false); + } + subst->instr_fun = build_fn_decl (name, args); + } + + return subst->instr_fun; +} + +/* Return call to instrumentation function for string op call site STMT. + Given a call to memcpy (dst, src, len), it will return a call to + reusedist_instrument_memcpy (counters, dst, src, len). */ + +static gimple +reusedist_make_instr_call (gimple stmt, stringop_subst_t subst, tree counters) +{ + tree profiler_fn; + + if (!subst) + return 0; + + profiler_fn = reusedist_get_instr_decl (stmt, subst); + + switch (subst->num_args) + { + case 1: + return gimple_build_call (profiler_fn, 1 + subst->num_args, counters, + gimple_call_arg (stmt, 0)); + case 2: + return gimple_build_call (profiler_fn, 1 + subst->num_args, counters, + gimple_call_arg (stmt, 0), + gimple_call_arg (stmt, 1)); + case 3: + return gimple_build_call (profiler_fn, 1 + subst->num_args, counters, + gimple_call_arg (stmt, 0), + gimple_call_arg (stmt, 1), + gimple_call_arg (stmt, 2)); + default: + gcc_assert (false); + } +} + +/* Reuse distance information for a single memory block at a single site. + For some operations, such as memcpy, there will be two such descriptors, + one of the source and one for the destination. + We're keeping the average reuse distance + (e.g., distance from a MEMCPY call until the memory written is first used). + We're also keeping the average operation size (e.g., memcpy size). + These averages are measured over all dynamic invocations of the same + static site. We're also storing the dynamic operation count. + + We're also keeping a measure named dist_x_size, which is the sum of + products (distance * size) across all dynamic instances. This is meant + to account for some information loss through aggregation. For instance, + consider two scenarios. + A: 50% of operations have large reuse distance but are very short. + 50% of operations have short reuse distance but are very long. + B: 50% of operations have large reuse distance and are large. + 50% of operations have short reuse distance and are short. + Without the dist_x_size measure, these scenarios can't be told apart + from the other three measures. With the dist_x_size measure, scenario B + will look like a better candidate. */ + +struct reusedist_t { + gcov_type mean_dist; /* Average reuse distance. */ + gcov_type mean_size; /* Average size of memory referenced. */ + gcov_type count; /* Operation count. */ + gcov_type dist_x_size; /* Sum of (distance * size >> 12) across all ops. */ +}; + +typedef struct reusedist_t reusedist_t; + +/* Number of gcov counters for one reuse distance measurement. */ + +const int RD_NUM_COUNTERS = sizeof(reusedist_t) / sizeof(gcov_type); + +/* Initialize RD from gcov COUNTERS. */ + +static void +reusedist_from_counters (const gcov_type* counters, + reusedist_t* rd) +{ + memcpy (rd, counters, RD_NUM_COUNTERS * sizeof (gcov_type)); +} + +/* Instrument current function to collect reuse distance for string ops. + The heavy lifting is done by an external library. The interface + to this library is functions like this: + + void reusedist_instr_memcpy(gcov_type *counters, + void *dst, void *src, size_t len); + + This function will measure the reuse distance for the given operations + DST with offset LEN, and store values in COUNTERS for one or two pointer + arguments. E.g., for memcpy 2 * RD_NUM_COUNTERS counters will be set, + first RD_NUM_COUNTERS for DST and last RD_NUM_COUNTERS for SRC. + For strlen, only RD_NUM_COUNTERS counters will be allocated thus the + runtime is expected to set only RD_NUM_COUNTERS counters. + The counters will record: + - mean reuse distance + - mean operation size + - call count + - sum(reuse distance * operation size) across all calls + To avoid overflow, each product is first scaled down by a factor of 2^12. + + All reuse distance measurements for dynamic executions of the same static + string operation will be aggregated into a single set of counters. + The reuse distance library uses the passed COUNTERS pointer as index + in its internal tables. */ + +static void +tree_gen_reusedist (void) +{ + basic_block bb; + gimple_stmt_iterator gsi; + + if (DECL_STATIC_CONSTRUCTOR (current_function_decl)) + return; + + tree_init_edge_profiler (); + + FOR_EACH_BB (bb) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + stringop_subst_t subst = reusedist_is_interesting_call (stmt); + + if (subst + && coverage_counter_alloc ( + GCOV_COUNTER_REUSE_DIST, + subst->num_ptr_args * RD_NUM_COUNTERS)) + { + location_t locus; + tree counters = tree_coverage_counter_addr ( + GCOV_COUNTER_REUSE_DIST, 0); + + counters = force_gimple_operand_gsi ( + &gsi, counters, true, NULL_TREE, true, GSI_SAME_STMT); + + gsi_insert_after ( + &gsi, + reusedist_make_instr_call (stmt, subst, counters), + GSI_NEW_STMT); + + locus = (stmt != NULL) + ? gimple_location (stmt) + : DECL_SOURCE_LOCATION (current_function_decl); + inform (locus, + "inserted reuse distance instrumentation for %qs, using " + "%d gcov counters", subst->original_name, + subst->num_ptr_args * RD_NUM_COUNTERS); + } + } +} + +/* Make up a nontemporal substitution name, e.g., "libopt__memcpy__3". */ + +static void +nt_op_name (const char* name, int suffix, char result[], int size) +{ + int written; + + written = snprintf (result, size, "libopt__%s__%d", name, suffix); + + gcc_assert (written < size); +} + +/* Get size threshold for reusedist substitution decisions. */ + +static gcov_type +reusedist_get_size_threshold (const char* name) +{ + if (!strcmp (name, "memcpy")) + return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEMCPY_SIZE_THRESH); + + if (!strcmp (name, "memset")) + return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEMSET_SIZE_THRESH); + + /* Use memcpy threshold as default for unspecified operations. */ + return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEMCPY_SIZE_THRESH); +} + +/* Get distance threshold for reusedist substitution decisions. */ + +static gcov_type +reusedist_get_distance_large_threshold (void) +{ + return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEAN_DIST_LARGE_THRESH); +} + +/* Get distance threshold for reusedist substitution decisions. */ + +static gcov_type +reusedist_get_distance_small_threshold (void) +{ + return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_MEAN_DIST_SMALL_THRESH); +} + +/* Get call count threshold for reusedist substitution decisions. */ + +static gcov_type +reusedist_get_count_threshold (void) +{ + return (gcov_type)PARAM_VALUE (PARAM_REUSEDIST_CALL_COUNT_THRESH); +} + +/* Return whether switching to nontemporal string operation is worth it. + NAME is the function name, such as "memcpy". + COUNTERS is a pointer to gcov counters for this operation site. + Return 1 if worth it, -1 if not worth it and 0 if not sure. */ + +static int +reusedist_nt_is_worth_it (const char* name, const gcov_type* counters) +{ + reusedist_t rd; + + reusedist_from_counters (counters, &rd); + + /* XXX: Need to add check for dist_x_size. */ + + if (rd.mean_size < reusedist_get_size_threshold (name) + || rd.count < reusedist_get_count_threshold ()) + /* If the size of the operation is small, don't substitute. */ + return 0; + + if (rd.mean_dist >= reusedist_get_distance_large_threshold ()) + /* Enforce non-temporal. */ + return 1; + else if (rd.mean_dist <= reusedist_get_distance_small_threshold ()) + /* Enforce temporal. */ + return -1; + else + /* Not conclusive. */ + return 0; +} + +/* Create a declaration for a nontemporal version if not already done. + INDEX is the index of the version in list [first, second, both]. */ + +static tree +reusedist_get_nt_decl (tree template_decl, stringop_subst_t subst, int index) +{ + if (!subst->nt_ops[index]) + { + char nt_name[256]; + nt_op_name (subst->original_name, index, nt_name, 256); + subst->nt_ops[index] = build_fn_decl (nt_name, + TREE_TYPE (template_decl)); + } + + return subst->nt_ops[index]; +} + +/* Issue notes with reuse distance values in COUNTERS for given ARG. */ + +static void +maybe_issue_profile_use_note (location_t locus, gcov_type* counters, int arg) +{ + reusedist_t rd; + + reusedist_from_counters (counters, &rd); + + if (rd.count) + inform (locus, "reuse distance counters for arg %d: %lld %lld %lld %lld", + arg, (long long int)rd.mean_dist, (long long int)rd.mean_size, + (long long int)rd.count, (long long int)rd.dist_x_size); +} + +/* Substitute with nontemporal version when profitable. */ + +static void +reusedist_maybe_replace_with_nt_version (gimple stmt, + gcov_type* counters, + stringop_subst_t subst) +{ + int first, second, suffix; + tree subst_decl; + const char* name = subst->original_name; + location_t locus; + + locus = (stmt != NULL) + ? gimple_location (stmt) + : DECL_SOURCE_LOCATION (current_function_decl); + + gcc_assert (1 == subst->num_ptr_args || 2 == subst->num_ptr_args); + + maybe_issue_profile_use_note (locus, counters, 1); + first = reusedist_nt_is_worth_it (name, counters); + + if (2 == subst->num_ptr_args) + { + maybe_issue_profile_use_note (locus, counters + RD_NUM_COUNTERS, 2); + second = reusedist_nt_is_worth_it (name, counters + RD_NUM_COUNTERS); + } + else + second = 0; + + if (first > 0) + /* Nontemporal in first arg. */ + { + /* The operation on the first arg should be nontemporal. */ + if (second > 0) + suffix = 3; + else + suffix = 1; + } + else if (first < 0) + /* Temporal in first arg. */ + { + if (second > 0) + suffix = 2; + else if (second < 0) + suffix = 0; + else + suffix = -1; + } + else + /* Don't know about the first arg. */ + { + if (second > 0) + suffix = 2; + else + suffix = -1; + } + + if (suffix == -1) + return; + + subst_decl = reusedist_get_nt_decl (gimple_call_fndecl (stmt), subst, + suffix); + gimple_call_set_fndecl (stmt, subst_decl); + inform (locus, "replaced %qs with non-temporal %qs", + subst->original_name, + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (subst_decl))); +} + +/* Replace string operations with equivalent nontemporal, when profitable. */ + +static void +optimize_reusedist (void) +{ + basic_block bb; + gimple_stmt_iterator gsi; + unsigned n_counters; + unsigned counter_index = 0; + gcov_type *counters = get_coverage_counts_no_warn ( + DECL_STRUCT_FUNCTION (current_function_decl), + GCOV_COUNTER_REUSE_DIST, &n_counters); + + if (!n_counters || DECL_STATIC_CONSTRUCTOR (current_function_decl)) + return; + + gcc_assert (!(n_counters % RD_NUM_COUNTERS)); + + FOR_EACH_BB (bb) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + stringop_subst_t subst = reusedist_is_interesting_call (stmt); + + if (subst) + { + if (counter_index < n_counters) + reusedist_maybe_replace_with_nt_version ( + stmt, &counters[counter_index], subst); + counter_index += subst->num_ptr_args * RD_NUM_COUNTERS; + } + } + + if (counter_index != n_counters) + { + warning (0, "coverage mismatch for reuse distance counters " + "in function %qs", IDENTIFIER_POINTER + (DECL_ASSEMBLER_NAME (current_function_decl))); + inform (input_location, "number of counters is %u instead of %u", + n_counters, counter_index); + } +} + /* Return 1 if tree-based profiling is in effect, else 0. If it is, set up hooks for tree-based profiling. Gate for pass_tree_profile. */ @@ -657,7 +1385,8 @@ tree_gen_ior_profiler (histogram_value value, unsigned tag, unsigned base) static bool do_tree_profiling (void) { - if (profile_arc_flag || flag_test_coverage || flag_branch_probabilities) + if (profile_arc_flag || flag_test_coverage || flag_branch_probabilities + || flag_profile_reusedist || flag_optimize_locality) { tree_register_profile_hooks (); gimple_register_value_prof_hooks (); @@ -736,7 +1465,7 @@ direct_call_profiling (void) build_pointer_type (gcov_type_node)); DECL_ARTIFICIAL (dc_gcov_type_ptr_var) = 1; DECL_EXTERNAL (dc_gcov_type_ptr_var) = 1; - if (targetm.have_tls) + if (targetm.have_tls && !is_kernel_build) { DECL_TLS_MODEL (dc_gcov_type_ptr_var) = decl_default_tls_model (dc_gcov_type_ptr_var); @@ -746,7 +1475,7 @@ direct_call_profiling (void) ptr_void); DECL_ARTIFICIAL (dc_void_ptr_var) = 1; DECL_EXTERNAL (dc_void_ptr_var) = 1; - if (targetm.have_tls) + if (targetm.have_tls && !is_kernel_build) { DECL_TLS_MODEL (dc_void_ptr_var) = decl_default_tls_model (dc_void_ptr_var); @@ -766,6 +1495,7 @@ direct_call_profiling (void) /* Check if this is a direct call, and not a builtin call. */ if (gimple_code (stmt) != GIMPLE_CALL || gimple_call_fndecl (stmt) == NULL_TREE + || DECL_BUILT_IN (gimple_call_fndecl (stmt)) || DECL_IS_BUILTIN (gimple_call_fndecl (stmt))) continue; @@ -832,11 +1562,14 @@ struct profile_hooks tree_profile_hooks = tree_gen_interval_profiler, /* gen_interval_profiler */ tree_gen_pow2_profiler, /* gen_pow2_profiler */ tree_gen_one_value_profiler, /* gen_one_value_profiler */ + tree_gen_one_float_value_profiler, /* gen_one_float_value_profiler */ tree_gen_const_delta_profiler, /* gen_const_delta_profiler */ tree_gen_ic_profiler, /* gen_ic_profiler */ tree_gen_dc_profiler, /* gen_dc_profiler */ tree_gen_average_profiler, /* gen_average_profiler */ - tree_gen_ior_profiler /* gen_ior_profiler */ + tree_gen_ior_profiler, /* gen_ior_profiler */ + tree_gen_reusedist, + optimize_reusedist }; #include "gt-tree-profile.h" |