diff options
-rw-r--r-- | tools/perf/arch/x86/util/intel-pt.c | 4 | ||||
-rw-r--r-- | tools/perf/util/intel-pt.c | 75 |
2 files changed, 79 insertions, 0 deletions
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 6ce451293634..082e5f2a415a 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -837,6 +837,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, } } + if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel && + perf_can_record_text_poke_events() && perf_can_record_cpu_wide()) + opts->text_poke = true; + if (intel_pt_evsel) { /* * To obtain the auxtrace buffer file descriptor, the auxtrace diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index cb3c1e569a2d..6c7e44ac444d 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -520,6 +520,17 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) return auxtrace_cache__lookup(dso->auxtrace_cache, offset); } +static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, + u64 offset) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + + if (!c) + return; + + auxtrace_cache__remove(dso->auxtrace_cache, offset); +} + static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) { return ip >= pt->kernel_start ? @@ -2594,6 +2605,67 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) +{ + if (!al->map || addr < al->map->start || addr >= al->map->end) { + if (!thread__find_map(thread, cpumode, addr, al)) + return -1; + } + + return 0; +} + +/* Invalidate all instruction cache entries that overlap the text poke */ +static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) +{ + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u64 addr = event->text_poke.addr + event->text_poke.new_len - 1; + /* Assume text poke begins in a basic block no more than 4096 bytes */ + int cnt = 4096 + event->text_poke.new_len; + struct thread *thread = pt->unknown_thread; + struct addr_location al = { .map = NULL }; + struct machine *machine = pt->machine; + struct intel_pt_cache_entry *e; + u64 offset; + + if (!event->text_poke.new_len) + return 0; + + for (; cnt; cnt--, addr--) { + if (intel_pt_find_map(thread, cpumode, addr, &al)) { + if (addr < event->text_poke.addr) + return 0; + continue; + } + + if (!al.map->dso || !al.map->dso->auxtrace_cache) + continue; + + offset = al.map->map_ip(al.map, addr); + + e = intel_pt_cache_lookup(al.map->dso, machine, offset); + if (!e) + continue; + + if (addr + e->byte_cnt + e->length <= event->text_poke.addr) { + /* + * No overlap. Working backwards there cannot be another + * basic block that overlaps the text poke if there is a + * branch instruction before the text poke address. + */ + if (e->branch != INTEL_PT_BR_NO_BRANCH) + return 0; + } else { + intel_pt_cache_invalidate(al.map->dso, machine, offset); + intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", + al.map->dso->long_name, addr); + } + } + + return 0; +} + static int intel_pt_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -2662,6 +2734,9 @@ static int intel_pt_process_event(struct perf_session *session, event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); + if (!err && event->header.type == PERF_RECORD_TEXT_POKE) + err = intel_pt_text_poke(pt, event); + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", event->header.type, sample->cpu, sample->time, timestamp); intel_pt_log_event(event); |