/* Performance monitoring unit (PMU) profiler. If available, use an external tool to collect hardware performance counter data and write it in the .gcda files. Copyright (C) 2010. Free Software Foundation, Inc. Contributed by Sharad Singhai . This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #include "tconfig.h" #include "tsystem.h" #include "coretypes.h" #include "tm.h" #if (defined (__x86_64__) || defined (__i386__)) #include "cpuid.h" #endif #if defined(inhibit_libc) #define IN_LIBGCOV (-1) #else #include #include #define IN_LIBGCOV 1 #if defined(L_gcov) #define GCOV_LINKAGE /* nothing */ #endif #endif #include "gcov-io.h" #ifdef TARGET_POSIX_IO #include #include #include #include #endif #if defined(inhibit_libc) #else #include #include #include #include #include #include #define XNEWVEC(type,ne) (type *)calloc((ne),sizeof(type)) #define XNEW(type) (type *)malloc(sizeof(type)) #define XDELETEVEC(p) free(p) #define XDELETE(p) free(p) #define PFMON_CMD "/usr/bin/pfmon" #define ADDR2LINE_CMD "/usr/bin/addr2line" #define PMU_TOOL_MAX_ARGS (20) static char default_addr2line[] = "??:0"; static const char pfmon_ll_header[] = "# counts %self %cum " "<10 <32 <64 <256 <1024 >=1024 %wself " "code addr symbol\n"; static const char pfmon_bm_header[] = "# counts %self %cum code addr symbol\n"; const char *pfmon_intel_ll_args[PMU_TOOL_MAX_ARGS] = { PFMON_CMD, "--aggregate-results", "--follow-all", "--with-header", "--smpl-module=pebs-ll", "--ld-lat-threshold=4", "--pebs-ll-dcmiss-code", "--resolve-addresses", "-emem_inst_retired:LATENCY_ABOVE_THRESHOLD", "--long-smpl-periods=4000", 0 /* terminating NULL must be present */ }; const char *pfmon_amd_ll_args[PMU_TOOL_MAX_ARGS] = { PFMON_CMD, "--aggregate-results", "--follow-all", "-uk", "--with-header", "--smpl-module=ibs", "--resolve-addresses", "-eibsop_event:uops", "--ibs-dcmiss-code", "--long-smpl-periods=4000", 0 /* terminating NULL must be present */ }; const char *pfmon_intel_brm_args[PMU_TOOL_MAX_ARGS] = { PFMON_CMD, "--aggregate-results", "--follow-all", "--with-header", "--resolve-addresses", "-eMISPREDICTED_BRANCH_RETIRED", "--long-smpl-periods=10000", 0 /* terminating NULL must be present */ }; const char *pfmon_amd_brm_args[PMU_TOOL_MAX_ARGS] = { PFMON_CMD, "--aggregate-results", "--follow-all", "--with-header", "--resolve-addresses", "-eRETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS", "--long-smpl-periods=10000", 0 /* terminating NULL must be present */ }; const char *addr2line_args[PMU_TOOL_MAX_ARGS] = { ADDR2LINE_CMD, "-e", 0 /* terminating NULL must be present */ }; enum pmu_tool_type { PTT_PFMON, PTT_LAST }; enum pmu_event_type { PET_INTEL_LOAD_LATENCY, PET_AMD_LOAD_LATENCY, PET_INTEL_BRANCH_MISPREDICT, PET_AMD_BRANCH_MISPREDICT, PET_LAST }; typedef struct pmu_tool_fns { const char *name; /* name of the pmu tool */ /* pmu tool commandline argument. */ const char **arg_array; /* Initialize pmu module. */ void *(*init_pmu_module) (void); /* Start profililing. */ void (*start_pmu_module) (pid_t ppid, char *tmpfile, const char **args); /* Stop profililing. */ void (*stop_pmu_module) (void); /* How to parse the output generated by the PMU tool. */ int (*parse_pmu_output) (char *filename, void *pmu_data); /* How to write parsed pmu data into gcda file. */ void (*gcov_write_pmu_data) (void *data); /* How to cleanup any data structure created during parsing. */ void (*cleanup_pmu_data) (void *data); /* How to initialize symbolizer for the PPID. */ int (*start_symbolizer) (pid_t ppid); void (*end_symbolizer) (void); char *(*symbolize) (void *addr); } pmu_tool_fns; enum pmu_state { PMU_NONE, /* Not configurated at all. */ PMU_INITIALIZED, /* Configured and initialized. */ PMU_ERROR, /* Configuration error. Cannot recover. */ PMU_ON, /* Currently profiling. */ PMU_OFF /* Currently stopped, but can be restarted. */ }; enum cpu_vendor_signature { CPU_VENDOR_UKNOWN = 0, CPU_VENDOR_INTEL = 0x756e6547, /* Genu */ CPU_VENDOR_AMD = 0x68747541 /* Auth */ }; /* Info about pmu tool during the run time. */ struct pmu_tool_info { /* Current pmu tool. */ enum pmu_tool_type tool; /* Current event. */ enum pmu_event_type event; /* filename for storing the pmu profile. */ char *pmu_profile_filename; /* Intermediate file where the tool stores the PMU data. */ char *raw_pmu_profile_filename; /* Where PMU tool's stderr should be stored. */ char *tool_stderr_filename; enum pmu_state pmu_profiling_state; enum cpu_vendor_signature cpu_vendor; /* as discovered by cpuid */ pid_t pmu_tool_pid; /* process id of the pmu tool */ pid_t symbolizer_pid; /* process id of the symbolizer */ int symbolizer_to_pipefd[2]; /* pipe for writing to the symbolizer */ int symbolizer_from_pipefd[2]; /* pipe for reading from the symbolizer */ void *pmu_data; /* an opaque pointer for the tool to store pmu data */ int verbose; /* turn on additional debugging */ unsigned top_n_address; /* how many addresses to symbolize */ pmu_tool_fns *tool_details; /* list of functions how to start/stop/parse */ }; /* Global struct for recordkeeping. */ static struct pmu_tool_info *the_pmu_tool_info; /* Additional info is printed if these are non-zero. */ static int tool_debug = 0; static int sym_debug = 0; static int parse_load_latency_line (char *line, gcov_pmu_ll_info_t *ll_info); static int parse_branch_mispredict_line (char *line, gcov_pmu_brm_info_t *brm_info); static unsigned convert_pct_to_unsigned (float pct); static void start_pfmon_module (pid_t ppid, char *tmpfile, const char **pfmon_args); static void *init_pmu_load_latency (void); static void *init_pmu_branch_mispredict (void); static void destroy_load_latency_infos (void *info); static void destroy_branch_mispredict_infos (void *info); static int parse_pfmon_load_latency (char *filename, void *pmu_data); static int parse_pfmon_branch_mispredicts (char *filename, void *pmu_data); static gcov_unsigned_t gcov_tag_pmu_tool_header_length (gcov_pmu_tool_header_t *header); static void gcov_write_tool_header (gcov_pmu_tool_header_t *header); static void gcov_write_load_latency_infos (void *info); static void gcov_write_branch_mispredict_infos (void *info); static void gcov_write_ll_line (const gcov_pmu_ll_info_t *ll_info); static void gcov_write_branch_mispredict_line (const gcov_pmu_brm_info_t *brm_info); static int start_addr2line_symbolizer (pid_t pid); static void end_addr2line_symbolizer (void); static char *symbolize_addr2line (void *p); static void reset_symbolizer_parent_pipes (void); static void reset_symbolizer_child_pipes (void); /* parse and cache relevant tool info. */ static int parse_pmu_profile_options (const char *options); static gcov_pmu_tool_header_t *parse_pfmon_tool_header (FILE *fp, const char *end_header); /* How to access the necessary functions for the PMU tools. */ pmu_tool_fns all_pmu_tool_fns[PTT_LAST][PET_LAST] = { { { "intel-load-latency", /* name */ pfmon_intel_ll_args, /* tool args */ init_pmu_load_latency, /* initialization */ start_pfmon_module, /* start */ 0, /* stop */ parse_pfmon_load_latency, /* parse */ gcov_write_load_latency_infos, /* write */ destroy_load_latency_infos, /* cleanup */ start_addr2line_symbolizer, /* start symbolizer */ end_addr2line_symbolizer, /* end symbolizer */ symbolize_addr2line, /* symbolize */ }, { "amd-load-latency", /* name */ pfmon_amd_ll_args, /* tool args */ init_pmu_load_latency, /* initialization */ start_pfmon_module, /* start */ 0, /* stop */ parse_pfmon_load_latency, /* parse */ gcov_write_load_latency_infos, /* write */ destroy_load_latency_infos, /* cleanup */ start_addr2line_symbolizer, /* start symbolizer */ end_addr2line_symbolizer, /* end symbolizer */ symbolize_addr2line, /* symbolize */ }, { "intel-branch-mispredict", /* name */ pfmon_intel_brm_args, /* tool args */ init_pmu_branch_mispredict, /* initialization */ start_pfmon_module, /* start */ 0, /* stop */ parse_pfmon_branch_mispredicts, /* parse */ gcov_write_branch_mispredict_infos,/* write */ destroy_branch_mispredict_infos, /* cleanup */ start_addr2line_symbolizer, /* start symbolizer */ end_addr2line_symbolizer, /* end symbolizer */ symbolize_addr2line, /* symbolize */ }, { "amd-branch-mispredict", /* name */ pfmon_amd_brm_args, /* tool args */ init_pmu_branch_mispredict, /* initialization */ start_pfmon_module, /* start */ 0, /* stop */ parse_pfmon_branch_mispredicts, /* parse */ gcov_write_branch_mispredict_infos,/* write */ destroy_branch_mispredict_infos, /* cleanup */ start_addr2line_symbolizer, /* start symbolizer */ end_addr2line_symbolizer, /* end symbolizer */ symbolize_addr2line, /* symbolize */ } } }; /* Determine the CPU vendor. Currently only distinguishes x86 based cpus where the vendor is either Intel or AMD. Returns one of the enum cpu_vendor_signatures. */ static unsigned int get_x86cpu_vendor (void) { unsigned int vendor = CPU_VENDOR_UKNOWN; #if (defined (__x86_64__) || defined (__i386__)) if (__get_cpuid_max (0, &vendor) < 1) return CPU_VENDOR_UKNOWN; /* Cannot determine cpu type. */ #endif if (vendor == CPU_VENDOR_INTEL || vendor == CPU_VENDOR_AMD) return vendor; else return CPU_VENDOR_UKNOWN; } /* Parse PMU tool option string provided on the command line and store information in global structure. Return 0 on success, otherwise return 1. Any changes to this should be synced with check_pmu_profile_options() which does compile time check. */ static int parse_pmu_profile_options (const char *options) { enum pmu_tool_type ptt = the_pmu_tool_info->tool; enum pmu_event_type pet = PET_LAST; const char *pmutool_path; the_pmu_tool_info->cpu_vendor = get_x86cpu_vendor (); /* Determine the platform we are running on. */ if (the_pmu_tool_info->cpu_vendor == CPU_VENDOR_UKNOWN) { /* Cpuid failed or uknown vendor. */ the_pmu_tool_info->pmu_profiling_state = PMU_ERROR; return 1; } /* Validate the options. */ if (strcmp(options, "load-latency") && strcmp(options, "load-latency-verbose") && strcmp(options, "branch-mispredict") && strcmp(options, "branch-mispredict-verbose")) return 1; /* Check if are aksed to collect load latency PMU data. */ if (!strcmp(options, "load-latency") || !strcmp(options, "load-latency-verbose")) { if (the_pmu_tool_info->cpu_vendor == CPU_VENDOR_INTEL) pet = PET_INTEL_LOAD_LATENCY; else pet = PET_AMD_LOAD_LATENCY; if (!strcmp(options, "load-latency-verbose")) the_pmu_tool_info->verbose = 1; } /* Check if are aksed to collect branch mispredict PMU data. */ if (!strcmp(options, "branch-mispredict") || !strcmp(options, "branch-mispredict-verbose")) { if (the_pmu_tool_info->cpu_vendor == CPU_VENDOR_INTEL) pet = PET_INTEL_BRANCH_MISPREDICT; else pet = PET_AMD_BRANCH_MISPREDICT; if (!strcmp(options, "branch-mispredict-verbose")) the_pmu_tool_info->verbose = 1; } the_pmu_tool_info->tool_details = &all_pmu_tool_fns[ptt][pet]; the_pmu_tool_info->event = pet; /* Allow users to override the default tool path. */ pmutool_path = getenv ("GCOV_PMUTOOL_PATH"); if (pmutool_path && strlen (pmutool_path)) the_pmu_tool_info->tool_details->arg_array[0] = pmutool_path; return 0; } /* Do the initialization of addr2line symbolizer for the process id given by TASK_PID. It forks an addr2line process and creates two pipes where addresses can be written and source_filename:line_num entries can be read. Returns 0 on success, non-zero otherwise. */ static int start_addr2line_symbolizer (pid_t task_pid) { pid_t pid; char *addr2line_path; /* Allow users to override the default addr2line path. */ addr2line_path = getenv ("GCOV_ADDR2LINE_PATH"); if (addr2line_path && strlen (addr2line_path)) addr2line_args[0] = addr2line_path; if (pipe (the_pmu_tool_info->symbolizer_from_pipefd) == -1) { fprintf (stderr, "Cannot create symbolizer write pipe.\n"); return 1; } if (pipe (the_pmu_tool_info->symbolizer_to_pipefd) == -1) { fprintf (stderr, "Cannot create symbolizer read pipe.\n"); return 1; } pid = fork (); if (pid == -1) { /* error condition */ fprintf (stderr, "Cannot create symbolizer process.\n"); reset_symbolizer_parent_pipes (); reset_symbolizer_child_pipes (); return 1; } if (pid == 0) { /* child does an exec and then connects to/from the pipe */ unsigned n_args = 0; char proc_exe_buf[128]; int new_write_fd, new_read_fd; int i; /* Go over the current addr2line args. */ for (i = 0; i < PMU_TOOL_MAX_ARGS && addr2line_args[i]; ++i) n_args++; /* we are going to add one more arg for the /proc/pid/exe */ if (n_args >= (PMU_TOOL_MAX_ARGS - 1)) { fprintf (stderr, "too many addr2line args: %d\n", n_args); _exit (0); } snprintf (proc_exe_buf, sizeof (proc_exe_buf) - 1, "/proc/%d/exe", task_pid); /* add the extra arg */ addr2line_args[n_args] = proc_exe_buf; n_args++; addr2line_args[n_args] = (const char *)NULL; /* add terminating NULL */ if (sym_debug) { fprintf (stderr, "addr2line args:"); for (i = 0; i < PMU_TOOL_MAX_ARGS && addr2line_args[i]; ++i) fprintf (stderr, " %s", addr2line_args[i]); fprintf (stderr, "\n"); } /* close unused ends of the two pipes. */ reset_symbolizer_child_pipes (); /* connect the pipes to stdin/stdout of the child process. */ new_read_fd = dup2 (the_pmu_tool_info->symbolizer_to_pipefd[0], 0); new_write_fd = dup2 (the_pmu_tool_info->symbolizer_from_pipefd[1], 1); if (new_read_fd == -1 || new_write_fd == -1) { fprintf (stderr, "could not dup symbolizer fds\n"); reset_symbolizer_parent_pipes (); reset_symbolizer_child_pipes (); _exit (0); } the_pmu_tool_info->symbolizer_to_pipefd[0] = new_read_fd; the_pmu_tool_info->symbolizer_from_pipefd[1] = new_write_fd; /* Do execve with NULL env. */ execve (addr2line_args[0], (char * const*)addr2line_args, (char * const*)NULL); /* exec returned, an error condition. */ fprintf (stderr, "could not create symbolizer process: %s\n", addr2line_args[0]); reset_symbolizer_parent_pipes (); reset_symbolizer_child_pipes (); _exit (0); } else { /* parent */ the_pmu_tool_info->symbolizer_pid = pid; /* close unused ends of the two pipes. */ reset_symbolizer_parent_pipes (); return 0; } return 0; } /* close unused write end of the FROM-pipe and read end of the TO-pipe. */ static void reset_symbolizer_parent_pipes (void) { if (the_pmu_tool_info->symbolizer_from_pipefd[1] != -1) { close (the_pmu_tool_info->symbolizer_from_pipefd[1]); the_pmu_tool_info->symbolizer_from_pipefd[1] = -1; } if (the_pmu_tool_info->symbolizer_to_pipefd[0] != -1) { close (the_pmu_tool_info->symbolizer_to_pipefd[0]); the_pmu_tool_info->symbolizer_to_pipefd[0] = -1; } } /* Close unused write end of the TO-pipe and read end of the FROM-pipe. */ static void reset_symbolizer_child_pipes (void) { if (the_pmu_tool_info->symbolizer_to_pipefd[1] != -1) { close (the_pmu_tool_info->symbolizer_to_pipefd[1]); the_pmu_tool_info->symbolizer_to_pipefd[1] = -1; } if (the_pmu_tool_info->symbolizer_from_pipefd[0] != -1) { close (the_pmu_tool_info->symbolizer_from_pipefd[0]); the_pmu_tool_info->symbolizer_from_pipefd[0] = -1; } } /* Perform cleanup for the symbolizer process. */ static void end_addr2line_symbolizer (void) { int pid_status; int wait_status; pid_t pid = the_pmu_tool_info->symbolizer_pid; /* symbolizer was not running */ if (!pid) return; reset_symbolizer_parent_pipes (); reset_symbolizer_child_pipes (); kill (pid, SIGTERM); wait_status = waitpid (pid, &pid_status, 0); if (sym_debug) { if (wait_status == pid) fprintf (stderr, "Normal exit. symbolizer terminated.\n"); else fprintf (stderr, "Abnormal exit. symbolizer status, %d.\n", pid_status); } the_pmu_tool_info->symbolizer_pid = 0; /* symoblizer no longer running */ } /* Given an address ADDR, return a string containing source_filename:line_num entries. */ static char * symbolize_addr2line (void *addr) { char buf[32]; /* holds the ascii version of address */ int write_count; int read_count; char *srcfile_linenum; size_t max_length = 1024; if (!the_pmu_tool_info->symbolizer_pid) return default_addr2line; /* symbolizer is not running */ write_count = snprintf (buf, sizeof (buf) - 1, "%p\n", addr); /* write the address into the pipe */ if (write (the_pmu_tool_info->symbolizer_to_pipefd[1], buf, write_count) < write_count) { if (sym_debug) fprintf (stderr, "Cannot write symbolizer pipe.\n"); return default_addr2line; } srcfile_linenum = XNEWVEC (char, max_length); read_count = read (the_pmu_tool_info->symbolizer_from_pipefd[0], srcfile_linenum, max_length); if (read_count == -1) { if (sym_debug) fprintf (stderr, "Cannot read symbolizer pipe.\n"); XDELETEVEC (srcfile_linenum); return default_addr2line; } srcfile_linenum[read_count] = 0; if (sym_debug) fprintf (stderr, "symbolizer: for address %p, read_count %d, got %s\n", addr, read_count, srcfile_linenum); return srcfile_linenum; } /* Start monitoring PPID process via pfmon tool using TMPFILE as a file to store the raw data and using PFMON_ARGS as the command line arguments. */ static void start_pfmon_module (pid_t ppid, char *tmpfile, const char **pfmon_args) { int i; unsigned int n_args = 0; unsigned n_chars; char pid_buf[64]; char filename_buf[1024]; char top_n_buf[24]; unsigned extra_args; /* go over the current pfmon args */ for (i = 0; i < PMU_TOOL_MAX_ARGS && pfmon_args[i]; ++i) n_args++; if (the_pmu_tool_info->verbose) extra_args = 4; /* account for additional --verbose */ else extra_args = 3; /* we are going to add args */ if (n_args >= (PMU_TOOL_MAX_ARGS - extra_args)) { fprintf (stderr, "too many pfmon args: %d\n", n_args); _exit (0); } n_chars = snprintf (pid_buf, sizeof (pid_buf), "--attach-task=%ld", (long)ppid); if (n_chars >= sizeof (pid_buf)) { fprintf (stderr, "pfmon task id too long: %s\n", pid_buf); return; } pfmon_args[n_args] = pid_buf; n_args++; n_chars = snprintf (filename_buf, sizeof (filename_buf), "--smpl-outfile=%s", tmpfile); if (n_chars >= sizeof (filename_buf)) { fprintf (stderr, "pfmon filename too long: %s\n", filename_buf); return; } pfmon_args[n_args] = filename_buf; n_args++; n_chars = snprintf (top_n_buf, sizeof (top_n_buf), "--smpl-show-top=%d", the_pmu_tool_info->top_n_address); if (n_chars >= sizeof (top_n_buf)) { fprintf (stderr, "pfmon option too long: %s\n", top_n_buf); return; } pfmon_args[n_args] = top_n_buf; n_args++; if (the_pmu_tool_info->verbose) { /* Add --verbose as well. */ pfmon_args[n_args] = "--verbose"; n_args++; } pfmon_args[n_args] = (char *)NULL; if (tool_debug) { fprintf (stderr, "pfmon args:"); for (i = 0; i < PMU_TOOL_MAX_ARGS && pfmon_args[i]; ++i) fprintf (stderr, " %s", pfmon_args[i]); fprintf (stderr, "\n"); } /* Do execve with NULL env. */ execve (pfmon_args[0], (char *const *)pfmon_args, (char * const*)NULL); /* does not return */ } /* Convert a fractional PERCENT to an unsigned integer after muliplying by 100. */ static unsigned convert_pct_to_unsigned (float pct) { return (unsigned)(pct * 100.0); } /* Parse the load latency info pointed by LINE and save it into LL_INFO. Returns 0 if the line was parsed successfully, non-zero otherwise. An example header+line look like these: "counts %self %cum <10 <32 <64 <256 <1024 >=1024 %wself code addr symbol" "218 24.06% 24.06% 100.00% 0.00% 0.00% 0.00% 0.00% 0.00% 22.70% 0x0000000000413e75 CalcSSIM(...)+965" */ static int parse_load_latency_line (char *line, gcov_pmu_ll_info_t *ll_info) { unsigned counts; /* These are percentages parsed as floats, but then converted to ints after multiplying by 100. */ float self, cum, lt_10, lt_32, lt_64, lt_256, lt_1024, gt_1024, wself; long unsigned int p; int n_values; pmu_tool_fns *tool_details = the_pmu_tool_info->tool_details; n_values = sscanf (line, "%u%f%%%f%%%f%%%f%%%f%%%f%%%f%%%f%%%f%%%lx", &counts, &self, &cum, <_10, <_32, <_64, <_256, <_1024, >_1024, &wself, &p); if (n_values != 11) return 1; /* Values read successfully. Do the assignment after converting * percentages into ints. */ ll_info->counts = counts; ll_info->self = convert_pct_to_unsigned (self); ll_info->cum = convert_pct_to_unsigned (cum); ll_info->lt_10 = convert_pct_to_unsigned (lt_10); ll_info->lt_32 = convert_pct_to_unsigned (lt_32); ll_info->lt_64 = convert_pct_to_unsigned (lt_64); ll_info->lt_256 = convert_pct_to_unsigned (lt_256); ll_info->lt_1024 = convert_pct_to_unsigned (lt_1024); ll_info->gt_1024 = convert_pct_to_unsigned (gt_1024); ll_info->wself = convert_pct_to_unsigned (wself); ll_info->code_addr = p; /* run the raw address through the symbolizer */ if (tool_details->symbolize) { char *sym_info = tool_details->symbolize ((void *)p); /* sym_info is of the form src_filename:linenum. Descriminator is currently not supported by addr2line. */ char *sep = strchr (sym_info, ':'); if (!sep) { /* assume entire string is srcfile */ ll_info->filename = (char *)sym_info; ll_info->line = 0; } else { /* terminate the filename string at the separator */ *sep = 0; ll_info->filename = (char *)sym_info; /* convert rest of the sym info to a line number */ ll_info->line = atol (sep+1); } ll_info->discriminator = 0; } else { /* no symbolizer available */ ll_info->filename = NULL; ll_info->line = 0; ll_info->discriminator = 0; } return 0; } /* Parse the branch mispredict info pointed by LINE and save it into BRM_INFO. Returns 0 if the line was parsed successfully, non-zero otherwise. An example header+line look like these: "counts %self %cum code addr symbol" "6869 37.67% 37.67% 0x00000000004007e5 sum(std::vector > const&)+51" */ static int parse_branch_mispredict_line (char *line, gcov_pmu_brm_info_t *brm_info) { unsigned counts; /* These are percentages parsed as floats, but then converted to ints after multiplying by 100. */ float self, cum; long unsigned int p; int n_values; pmu_tool_fns *tool_details = the_pmu_tool_info->tool_details; n_values = sscanf (line, "%u%f%%%f%%%lx", &counts, &self, &cum, &p); if (n_values != 4) return 1; /* Values read successfully. Do the assignment after converting * percentages into ints. */ brm_info->counts = counts; brm_info->self = convert_pct_to_unsigned (self); brm_info->cum = convert_pct_to_unsigned (cum); brm_info->code_addr = p; /* run the raw address through the symbolizer */ if (tool_details->symbolize) { char *sym_info = tool_details->symbolize ((void *)p); /* sym_info is of the form src_filename:linenum. Descriminator is currently not supported by addr2line. */ char *sep = strchr (sym_info, ':'); if (!sep) { /* assume entire string is srcfile */ brm_info->filename = sym_info; brm_info->line = 0; } else { /* terminate the filename string at the separator */ *sep = 0; brm_info->filename = sym_info; /* convert rest of the sym info to a line number */ brm_info->line = atol (sep+1); } brm_info->discriminator = 0; } else { /* no symbolizer available */ brm_info->filename = NULL; brm_info->line = 0; brm_info->discriminator = 0; } return 0; } /* Delete load latency info structures INFO. */ static void destroy_load_latency_infos (void *info) { unsigned i; ll_infos_t* ll_infos = (ll_infos_t *)info; /* delete each element */ for (i = 0; i < ll_infos->ll_count; ++i) XDELETE (ll_infos->ll_array[i]); /* delete the array itself */ XDELETE (ll_infos->ll_array); __destroy_pmu_tool_header (ll_infos->pmu_tool_header); free (ll_infos->pmu_tool_header); ll_infos->ll_array = 0; ll_infos->ll_count = 0; } /* Delete branch mispredict structure INFO. */ static void destroy_branch_mispredict_infos (void *info) { unsigned i; brm_infos_t* brm_infos = (brm_infos_t *)info; /* delete each element */ for (i = 0; i < brm_infos->brm_count; ++i) XDELETE (brm_infos->brm_array[i]); /* delete the array itself */ XDELETE (brm_infos->brm_array); __destroy_pmu_tool_header (brm_infos->pmu_tool_header); free (brm_infos->pmu_tool_header); brm_infos->brm_array = 0; brm_infos->brm_count = 0; } /* Parse FILENAME for load latency lines into a structure PMU_DATA. Returns 0 on on success. Returns non-zero on failure. */ static int parse_pfmon_load_latency (char *filename, void *pmu_data) { FILE *fp; size_t buflen = 2*1024; char *buf; ll_infos_t *load_latency_infos = (ll_infos_t *)pmu_data; gcov_pmu_tool_header_t *tool_header = 0; if ((fp = fopen (filename, "r")) == NULL) { fprintf (stderr, "cannot open pmu data file: %s\n", filename); return 1; } if (!(tool_header = parse_pfmon_tool_header (fp, pfmon_ll_header))) { fprintf (stderr, "cannot parse pmu data file header: %s\n", filename); return 1; } buf = (char *) malloc (buflen); while (fgets (buf, buflen, fp)) { gcov_pmu_ll_info_t *ll_info = XNEW (gcov_pmu_ll_info_t); if (!parse_load_latency_line (buf, ll_info)) { /* valid line, add to the array */ load_latency_infos->ll_count++; if (load_latency_infos->ll_count >= load_latency_infos->alloc_ll_count) { /* need to realloc */ load_latency_infos->ll_array = realloc (load_latency_infos->ll_array, 2 * load_latency_infos->alloc_ll_count); if (load_latency_infos->ll_array == NULL) { fprintf (stderr, "Cannot allocate load latency memory.\n"); __destroy_pmu_tool_header (tool_header); free (buf); fclose (fp); return 1; } } load_latency_infos->ll_array[load_latency_infos->ll_count - 1] = ll_info; } else /* invalid line, delete it */ XDELETE (ll_info); } free (buf); fclose (fp); load_latency_infos->pmu_tool_header = tool_header; return 0; } /* Parse open file FP until END_HEADER is seen. The data matching gcov_pmu_tool_header_t fields is saved and returned in a new struct. In case of failure, it returns NULL. */ static gcov_pmu_tool_header_t * parse_pfmon_tool_header (FILE *fp, const char *end_header) { static const char tag_hostname[] = "# hostname: "; static const char tag_kversion[] = "# kernel version: "; static const char tag_hostcpu[] = "# host CPUs: "; static const char tag_column_desc_start[] = "# description of columns:"; static const char tag_column_desc_end[] = "# other columns are self-explanatory"; size_t buflen = 4*1024; char *buf, *buf_start, *buf_end; gcov_pmu_tool_header_t *tool_header = XNEWVEC (gcov_pmu_tool_header_t, 1); char *hostname = 0; char *kversion = 0; char *hostcpu = 0; char *column_description = 0; char *column_desc_start = 0; char *column_desc_end = 0; const char *column_header = 0; int got_hostname = 0; int got_kversion = 0 ; int got_hostcpu = 0; int got_end_header = 0; int got_column_description = 0; buf = (char *) malloc (buflen); buf_start = buf; buf_end = buf + buflen; while (buf < (buf_end - 1) && fgets (buf, buf_end - buf, fp)) { if (strncmp (end_header, buf, buf_end - buf) == 0) { got_end_header = 1; break; } if (!got_hostname && strncmp (buf, tag_hostname, strlen (tag_hostname)) == 0) { size_t len = strlen (buf) - strlen (tag_hostname); hostname = (char *)malloc (len); memcpy (hostname, buf + strlen (tag_hostname), len); hostname[len - 1] = 0; tool_header->hostname = hostname; got_hostname = 1; } if (!got_kversion && strncmp (buf, tag_kversion, strlen (tag_kversion)) == 0) { size_t len = strlen (buf) - strlen (tag_kversion); kversion = (char *)malloc (len); memcpy (kversion, buf + strlen (tag_kversion), len); kversion[len - 1] = 0; tool_header->kernel_version = kversion; got_kversion = 1; } if (!got_hostcpu && strncmp (buf, tag_hostcpu, strlen (tag_hostcpu)) == 0) { size_t len = strlen (buf) - strlen (tag_hostcpu); hostcpu = (char *)malloc (len); memcpy (hostcpu, buf + strlen (tag_hostcpu), len); hostcpu[len - 1] = 0; tool_header->host_cpu = hostcpu; got_hostcpu = 1; } if (!got_column_description && strncmp (buf, tag_column_desc_start, strlen (tag_column_desc_start)) == 0) { column_desc_start = buf; column_desc_end = 0; /* continue reading until end of the column descriptor */ while (buf < (buf_end - 1) && fgets (buf, buf_end - buf, fp)) { if (strncmp (buf, tag_column_desc_end, strlen (tag_column_desc_end)) == 0) { column_desc_end = buf + strlen (tag_column_desc_end); break; } buf += strlen (buf); } if (column_desc_end) { /* found the end, copy it into a new string */ column_description = (char *)malloc (column_desc_end - column_desc_start + 1); got_column_description = 1; strcpy (column_description, column_desc_start); tool_header->column_description = column_description; } } /* advance buf */ buf += strlen (buf); } /* If we are missing any of the fields, return NULL. */ if (!got_end_header || !got_hostname || !got_kversion || !got_hostcpu || !got_column_description) { if (hostname) free (hostname); if (kversion) free (kversion); if (hostcpu) free (hostcpu); if (column_description) free (column_description); free (buf_start); free (tool_header); return NULL; } switch (the_pmu_tool_info->event) { case PET_INTEL_LOAD_LATENCY: case PET_AMD_LOAD_LATENCY: column_header = pfmon_ll_header; break; case PET_INTEL_BRANCH_MISPREDICT: case PET_AMD_BRANCH_MISPREDICT: column_header = pfmon_bm_header; break; default: break; } tool_header->column_header = strdup (column_header); tool_header->full_header = buf_start; return tool_header; } /* Parse FILENAME for branch mispredict lines into a structure PMU_DATA. Returns 0 on on success. Returns non-zero on failure. */ static int parse_pfmon_branch_mispredicts (char *filename, void *pmu_data) { FILE *fp; size_t buflen = 2*1024; char *buf; brm_infos_t *brm_infos = (brm_infos_t *)pmu_data; gcov_pmu_tool_header_t *tool_header = 0; if ((fp = fopen (filename, "r")) == NULL) { fprintf (stderr, "cannot open pmu data file: %s\n", filename); return 1; } if (!(tool_header = parse_pfmon_tool_header (fp, pfmon_bm_header))) { fprintf (stderr, "cannot parse pmu data file header: %s\n", filename); return 1; } buf = (char *) malloc (buflen); while (fgets (buf, buflen, fp)) { gcov_pmu_brm_info_t *brm = XNEW (gcov_pmu_brm_info_t); if (!parse_branch_mispredict_line (buf, brm)) { /* valid line, add to the array */ brm_infos->brm_count++; if (brm_infos->brm_count >= brm_infos->alloc_brm_count) { /* need to realloc */ brm_infos->brm_array = realloc (brm_infos->brm_array, 2 * brm_infos->alloc_brm_count); if (brm_infos->brm_array == NULL) { fprintf (stderr, "Cannot allocate memory for br mispredicts.\n"); __destroy_pmu_tool_header (tool_header); free (buf); fclose (fp); return 1; } } brm_infos->brm_array[brm_infos->brm_count - 1] = brm; } else /* invalid line, delete it */ XDELETE (brm); } free (buf); fclose (fp); brm_infos->pmu_tool_header = tool_header; return 0; } /* Start the monitoring process using pmu tool. Return 0 on success, non-zero otherwise. */ static int pmu_start (void) { pid_t pid; /* no start function */ if (!the_pmu_tool_info->tool_details->start_pmu_module) return 1; pid = fork (); if (pid == -1) { /* error condition */ fprintf (stderr, "Cannot create PMU profiling process, exiting.\n"); return 1; } else if (pid == 0) { /* child */ pid_t ppid = getppid(); char *tmpfile = the_pmu_tool_info->raw_pmu_profile_filename; const char **pfmon_args = the_pmu_tool_info->tool_details->arg_array; int new_stderr_fd; /* Redirect stderr from the child process into a separate file. */ new_stderr_fd = creat (the_pmu_tool_info->tool_stderr_filename, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); if (new_stderr_fd != -1) dup2 (new_stderr_fd, 2); /* The following does an exec and thus is not expected to return. */ the_pmu_tool_info->tool_details->start_pmu_module(ppid, tmpfile, pfmon_args); /* exec returned, an error condition. */ fprintf (stderr, "could not create profiling process: %s\n", the_pmu_tool_info->tool_details->arg_array[0]); _exit (0); } else { /* parent */ the_pmu_tool_info->pmu_tool_pid = pid; return 0; } } /* Allocate and initialize pmu load latency structure. */ static void * init_pmu_load_latency (void) { ll_infos_t *load_latency = XNEWVEC (ll_infos_t, 1); load_latency->ll_count = 0; load_latency->alloc_ll_count = 64; load_latency->ll_array = XNEWVEC (gcov_pmu_ll_info_t *, load_latency->alloc_ll_count); return (void *)load_latency; } /* Allocate and initialize pmu branch mispredict structure. */ static void * init_pmu_branch_mispredict (void) { brm_infos_t *brm_info = XNEWVEC (brm_infos_t, 1); brm_info->brm_count = 0; brm_info->alloc_brm_count = 64; brm_info->brm_array = XNEWVEC (gcov_pmu_brm_info_t *, brm_info->alloc_brm_count); return (void *)brm_info; } /* Initialize pmu tool based upon PMU_INFO. Sets the appropriate tool type in the global the_pmu_tool_info. */ static int init_pmu_tool (struct gcov_pmu_info *pmu_info) { the_pmu_tool_info->pmu_profiling_state = PMU_NONE; the_pmu_tool_info->verbose = 0; the_pmu_tool_info->tool = PTT_PFMON; /* we support only pfmon */ the_pmu_tool_info->pmu_tool_pid = 0; the_pmu_tool_info->top_n_address = pmu_info->pmu_top_n_address; the_pmu_tool_info->symbolizer_pid = 0; the_pmu_tool_info->symbolizer_to_pipefd[0] = -1; the_pmu_tool_info->symbolizer_to_pipefd[1] = -1; the_pmu_tool_info->symbolizer_from_pipefd[0] = -1; the_pmu_tool_info->symbolizer_from_pipefd[1] = -1; if (parse_pmu_profile_options (pmu_info->pmu_tool)) return 1; if (the_pmu_tool_info->pmu_profiling_state == PMU_ERROR) { fprintf (stderr, "Unsupported PMU module: %s, disabling PMU profiling.\n", pmu_info->pmu_tool); return 1; } if (the_pmu_tool_info->tool_details->init_pmu_module) /* initialize module */ the_pmu_tool_info->pmu_data = the_pmu_tool_info->tool_details->init_pmu_module(); return 0; } /* Initialize PMU profiling based upon the information passed in PMU_INFO and use PMU_PROFILE_FILENAME as the file to store the PMU profile. This is called multiple times from libgcov, once per object file. We need to make sure to do the necessary initialization only the first time. For subsequent invocations it behaves as a NOOP. */ void __gcov_init_pmu_profiler (struct gcov_pmu_info *pmu_info) { char *raw_pmu_profile_filename; char *tool_stderr_filename; if (!pmu_info || !pmu_info->pmu_profile_filename || !pmu_info->pmu_tool) return; /* allocate the global structure on first invocation */ if (!the_pmu_tool_info) { the_pmu_tool_info = XNEWVEC (struct pmu_tool_info, 1); if (!the_pmu_tool_info) { fprintf (stderr, "Error allocating memory for PMU tool\n"); return; } if (init_pmu_tool (pmu_info)) { /* Initialization error */ XDELETE (the_pmu_tool_info); the_pmu_tool_info = 0; return; } } switch (the_pmu_tool_info->pmu_profiling_state) { case PMU_NONE: the_pmu_tool_info->pmu_profile_filename = strdup (pmu_info->pmu_profile_filename); /* Construct an intermediate filename by substituting trailing '.gcda' with '.pmud'. */ raw_pmu_profile_filename = strdup (pmu_info->pmu_profile_filename); if (raw_pmu_profile_filename == NULL) { fprintf (stderr, "Cannot allocate memory\n"); exit (1); } strcpy (raw_pmu_profile_filename + strlen (raw_pmu_profile_filename) - 4, "pmud"); /* Construct a filename for collecting PMU tool's stderr by substituting trailing '.gcda' with '.stderr'. */ tool_stderr_filename = XNEWVEC (char, strlen (pmu_info->pmu_profile_filename) + 1 + 2); strcpy (tool_stderr_filename, pmu_info->pmu_profile_filename); strcpy (tool_stderr_filename + strlen (tool_stderr_filename) - 4, "stderr"); the_pmu_tool_info->raw_pmu_profile_filename = raw_pmu_profile_filename; the_pmu_tool_info->tool_stderr_filename = tool_stderr_filename; the_pmu_tool_info->pmu_profiling_state = PMU_INITIALIZED; break; case PMU_INITIALIZED: case PMU_OFF: case PMU_ON: case PMU_ERROR: break; default: break; } } /* Start PMU profiling. It updates the current state. */ void __gcov_start_pmu_profiler (void) { if (!the_pmu_tool_info) return; switch (the_pmu_tool_info->pmu_profiling_state) { case PMU_INITIALIZED: if (!pmu_start ()) the_pmu_tool_info->pmu_profiling_state = PMU_ON; else the_pmu_tool_info->pmu_profiling_state = PMU_ERROR; break; case PMU_NONE: /* PMU was not properly initialized, don't attempt start it. */ the_pmu_tool_info->pmu_profiling_state = PMU_ERROR; break; case PMU_OFF: /* Restarting PMU is not yet supported. */ case PMU_ON: /* Do nothing. */ case PMU_ERROR: break; default: break; } } /* Stop PMU profiling. Currently it doesn't do anything except bookkeeping. */ void __gcov_stop_pmu_profiler (void) { if (!the_pmu_tool_info) return; if (the_pmu_tool_info->tool_details->stop_pmu_module) the_pmu_tool_info->tool_details->stop_pmu_module(); if (the_pmu_tool_info->pmu_profiling_state == PMU_ON) the_pmu_tool_info->pmu_profiling_state = PMU_OFF; } /* Write the load latency information LL_INFO into the gcda file. */ static void gcov_write_ll_line (const gcov_pmu_ll_info_t *ll_info) { gcov_unsigned_t len = GCOV_TAG_PMU_LOAD_LATENCY_LENGTH (ll_info->filename); gcov_write_tag_length (GCOV_TAG_PMU_LOAD_LATENCY_INFO, len); gcov_write_unsigned (ll_info->counts); gcov_write_unsigned (ll_info->self); gcov_write_unsigned (ll_info->cum); gcov_write_unsigned (ll_info->lt_10); gcov_write_unsigned (ll_info->lt_32); gcov_write_unsigned (ll_info->lt_64); gcov_write_unsigned (ll_info->lt_256); gcov_write_unsigned (ll_info->lt_1024); gcov_write_unsigned (ll_info->gt_1024); gcov_write_unsigned (ll_info->wself); gcov_write_counter (ll_info->code_addr); gcov_write_unsigned (ll_info->line); gcov_write_unsigned (ll_info->discriminator); gcov_write_string (ll_info->filename); } /* Write the branch mispredict information BRM_INFO into the gcda file. */ static void gcov_write_branch_mispredict_line (const gcov_pmu_brm_info_t *brm_info) { gcov_unsigned_t len = GCOV_TAG_PMU_BRANCH_MISPREDICT_LENGTH ( brm_info->filename); gcov_write_tag_length (GCOV_TAG_PMU_BRANCH_MISPREDICT_INFO, len); gcov_write_unsigned (brm_info->counts); gcov_write_unsigned (brm_info->self); gcov_write_unsigned (brm_info->cum); gcov_write_counter (brm_info->code_addr); gcov_write_unsigned (brm_info->line); gcov_write_unsigned (brm_info->discriminator); gcov_write_string (brm_info->filename); } /* Write load latency information INFO into the gcda file. The gcda file has already been opened and is available for writing. */ static void gcov_write_load_latency_infos (void *info) { unsigned i; const ll_infos_t *ll_infos = (const ll_infos_t *)info; gcov_unsigned_t stamp = 0; /* don't use stamp as we don't support merge */ /* We don't support merge, and instead always rewrite the file. */ gcov_rewrite (); gcov_write_tag_length (GCOV_DATA_MAGIC, GCOV_VERSION); gcov_write_unsigned (stamp); if (ll_infos->pmu_tool_header) gcov_write_tool_header (ll_infos->pmu_tool_header); for (i = 0; i < ll_infos->ll_count; ++i) { /* write each line */ gcov_write_ll_line (ll_infos->ll_array[i]); } gcov_truncate (); } /* Write branch mispredict information INFO into the gcda file. The gcda file has already been opened and is available for writing. */ static void gcov_write_branch_mispredict_infos (void *info) { unsigned i; const brm_infos_t *brm_infos = (const brm_infos_t *)info; gcov_unsigned_t stamp = 0; /* don't use stamp as we don't support merge */ /* We don't support merge, and instead always rewrite the file. */ gcov_rewrite (); gcov_write_tag_length (GCOV_DATA_MAGIC, GCOV_VERSION); gcov_write_unsigned (stamp); if (brm_infos->pmu_tool_header) gcov_write_tool_header (brm_infos->pmu_tool_header); for (i = 0; i < brm_infos->brm_count; ++i) { /* write each line */ gcov_write_branch_mispredict_line (brm_infos->brm_array[i]); } gcov_truncate (); } /* Compute TOOL_HEADER length for writing into the gcov file. */ static gcov_unsigned_t gcov_tag_pmu_tool_header_length (gcov_pmu_tool_header_t *header) { gcov_unsigned_t len = 0; if (header) { len += gcov_string_length (header->host_cpu); len += gcov_string_length (header->hostname); len += gcov_string_length (header->kernel_version); len += gcov_string_length (header->column_header); len += gcov_string_length (header->column_description); len += gcov_string_length (header->full_header); } return len; } /* Write tool header into the gcda file. It assumes that the gcda file has already been opened and is available for writing. */ static void gcov_write_tool_header (gcov_pmu_tool_header_t *header) { gcov_unsigned_t len = gcov_tag_pmu_tool_header_length (header); gcov_write_tag_length (GCOV_TAG_PMU_TOOL_HEADER, len); gcov_write_string (header->host_cpu); gcov_write_string (header->hostname); gcov_write_string (header->kernel_version); gcov_write_string (header->column_header); gcov_write_string (header->column_description); gcov_write_string (header->full_header); } /* End PMU profiling and write data into appropriate gcda file. */ void __gcov_end_pmu_profiler (void) { int pid_status; int wait_status; pid_t pid; pmu_tool_fns *tool_details; if (!the_pmu_tool_info) return; tool_details = the_pmu_tool_info->tool_details; pid = the_pmu_tool_info->pmu_tool_pid; if (pid) { if (tool_debug) fprintf (stderr, "terminating PMU profiling process %ld\n", (long)pid); kill (pid, SIGTERM); if (tool_debug) fprintf (stderr, "parent: waiting for pmu process to end\n"); wait_status = waitpid (pid, &pid_status, 0); if (tool_debug) { if (wait_status == pid) fprintf (stderr, "Normal exit. Child terminated.\n"); else fprintf (stderr, "Abnormal exit. child status, %d.\n", pid_status); } } if (the_pmu_tool_info->pmu_profiling_state != PMU_OFF) { /* nothing to do */ fprintf (stderr, "__gcov_dump_pmu_profile: incorrect pmu state: %d, pid: %ld\n", the_pmu_tool_info->pmu_profiling_state, (unsigned long)pid); return; } if (!tool_details->parse_pmu_output) return; /* Since we are going to parse the output, we also need symbolizer. */ if (tool_details->start_symbolizer) tool_details->start_symbolizer (getpid ()); if (!tool_details->parse_pmu_output (the_pmu_tool_info->raw_pmu_profile_filename, the_pmu_tool_info->pmu_data)) { if (tool_details->gcov_write_pmu_data) /* write tool output into the gcda file. */ tool_details->gcov_write_pmu_data (the_pmu_tool_info->pmu_data); } if (tool_details->end_symbolizer) tool_details->end_symbolizer (); if (tool_details->cleanup_pmu_data) tool_details->cleanup_pmu_data (the_pmu_tool_info->pmu_data); } #endif