From 48c65bda95d692076de7e5eae3188ddae8635dca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Feb 2014 10:32:53 +0900 Subject: perf annotate: Check availability of annotate when processing samples The TUI of perf report and top support annotation, but stdio and GTK don't. So it should be checked before calling hist_entry__inc_addr_ samples() to avoid wasting resources that will never be used. perf annotate need it regardless of UI and sort keys, so the check of whether to allocate resources should be on the tools that have annotate as an option in the TUI, 'report' and 'top', not on the function called by all of them. It caused perf annotate on ppc64 to produce zero output, since the buckets were not being allocated. Reported-by: Anton Blanchard Signed-off-by: Namhyung Kim Cc: Anton Blanchard Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392859976-32760-1-git-send-email-namhyung@kernel.org [ Renamed (report,top)__needs_annotate() to ui__has_annotation() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 40 ++++++++++++++++++++++++---------------- tools/perf/builtin-top.c | 6 ++++-- tools/perf/util/annotate.c | 9 ++++++++- tools/perf/util/annotate.h | 2 ++ 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3c53ec268fbc..02f985f3a396 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -113,14 +113,16 @@ static int report__add_mem_hist_entry(struct perf_tool *tool, struct addr_locati if (!he) return -ENOMEM; - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - if (err) - goto out; + if (ui__has_annotation()) { + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + if (err) + goto out; - mx = he->mem_info; - err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); - if (err) - goto out; + mx = he->mem_info; + err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); + if (err) + goto out; + } evsel->hists.stats.total_period += cost; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); @@ -164,14 +166,18 @@ static int report__add_branch_hist_entry(struct perf_tool *tool, struct addr_loc he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL, 1, 1, 0); if (he) { - bx = he->branch_info; - err = addr_map_symbol__inc_samples(&bx->from, evsel->idx); - if (err) - goto out; - - err = addr_map_symbol__inc_samples(&bx->to, evsel->idx); - if (err) - goto out; + if (ui__has_annotation()) { + bx = he->branch_info; + err = addr_map_symbol__inc_samples(&bx->from, + evsel->idx); + if (err) + goto out; + + err = addr_map_symbol__inc_samples(&bx->to, + evsel->idx); + if (err) + goto out; + } evsel->hists.stats.total_period += 1; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); @@ -205,7 +211,9 @@ static int report__add_hist_entry(struct perf_tool *tool, struct perf_evsel *evs if (err) goto out; - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + if (ui__has_annotation()) + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + evsel->hists.stats.total_period += sample->period; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); out: diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 76cd510d34d0..5f989a7d8bc2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -176,7 +176,7 @@ static void perf_top__record_precise_ip(struct perf_top *top, { struct annotation *notes; struct symbol *sym; - int err; + int err = 0; if (he == NULL || he->ms.sym == NULL || ((top->sym_filter_entry == NULL || @@ -190,7 +190,9 @@ static void perf_top__record_precise_ip(struct perf_top *top, return; ip = he->ms.map->map_ip(he->ms.map, ip); - err = hist_entry__inc_addr_samples(he, counter, ip); + + if (ui__has_annotation()) + err = hist_entry__inc_addr_samples(he, counter, ip); pthread_mutex_unlock(¬es->lock); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 469eb679fb9d..3aa555ff9d89 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -8,6 +8,8 @@ */ #include "util.h" +#include "ui/ui.h" +#include "sort.h" #include "build-id.h" #include "color.h" #include "cache.h" @@ -489,7 +491,7 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, { struct annotation *notes; - if (sym == NULL || use_browser != 1 || !sort__has_sym) + if (sym == NULL) return 0; notes = symbol__annotation(sym); @@ -1399,3 +1401,8 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize) { return symbol__annotate(he->ms.sym, he->ms.map, privsize); } + +bool ui__has_annotation(void) +{ + return use_browser == 1 && sort__has_sym; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index b2aef59d6bb2..56ad4f5287de 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -151,6 +151,8 @@ void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void disasm__purge(struct list_head *head); +bool ui__has_annotation(void); + int symbol__tty_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool print_lines, bool full_paths, int min_pcnt, int max_lines); -- cgit v1.2.3 From 98e9f03bbf2cb21a60f94b8b700eb5d38470819d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Feb 2014 10:32:54 +0900 Subject: perf symbols: Destroy unused symsrcs Stephane reported that perf report and annotate failed to process data using lots of (> 500) shared libraries. It was because of the limit on number of open files (ulimit -n). Currently when perf loads a DSO, it'll look for normal and dynamic symbol tables. And if it fails to find out both tables, it'll iterate all of possible symtab types. But many of them are useless since they have no additional information and the problem is that it's not closing those files even though they're not used. Fix it. Reported-by: Stephane Eranian Signed-off-by: Namhyung Kim Cc: Cody P Schafer Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392859976-32760-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a9d758a3b371..e89afc097d8a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1336,6 +1336,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (syms_ss && runtime_ss) break; + } else { + symsrc__destroy(ss); } } -- cgit v1.2.3 From 26e61e8939b1fe8729572dabe9a9e97d930dd4f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Feb 2014 16:03:12 +0100 Subject: perf/x86: Fix event scheduling Vince "Super Tester" Weaver reported a new round of syscall fuzzing (Trinity) failures, with perf WARN_ON()s triggering. He also provided traces of the failures. This is I think the relevant bit: > pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_disable: x86_pmu_disable > pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_state: Events: { > pec_1076_warn-2804 [000] d... 147.926156: x86_pmu_state: 0: state: .R config: ffffffffffffffff ( (null)) > pec_1076_warn-2804 [000] d... 147.926158: x86_pmu_state: 33: state: AR config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926159: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926160: x86_pmu_state: n_events: 1, n_added: 0, n_txn: 1 > pec_1076_warn-2804 [000] d... 147.926161: x86_pmu_state: Assignment: { > pec_1076_warn-2804 [000] d... 147.926162: x86_pmu_state: 0->33 tag: 1 config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926163: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926166: collect_events: Adding event: 1 (ffff880119ec8800) So we add the insn:p event (fd[23]). At this point we should have: n_events = 2, n_added = 1, n_txn = 1 > pec_1076_warn-2804 [000] d... 147.926170: collect_events: Adding event: 0 (ffff8800c9e01800) > pec_1076_warn-2804 [000] d... 147.926172: collect_events: Adding event: 4 (ffff8800cbab2c00) We try and add the {BP,cycles,br_insn} group (fd[3], fd[4], fd[15]). These events are 0:cycles and 4:br_insn, the BP event isn't x86_pmu so that's not visible. group_sched_in() pmu->start_txn() /* nop - BP pmu */ event_sched_in() event->pmu->add() So here we should end up with: 0: n_events = 3, n_added = 2, n_txn = 2 4: n_events = 4, n_added = 3, n_txn = 3 But seeing the below state on x86_pmu_enable(), the must have failed, because the 0 and 4 events aren't there anymore. Looking at group_sched_in(), since the BP is the leader, its event_sched_in() must have succeeded, for otherwise we would not have seen the sibling adds. But since neither 0 or 4 are in the below state; their event_sched_in() must have failed; but I don't see why, the complete state: 0,0,1:p,4 fits perfectly fine on a core2. However, since we try and schedule 4 it means the 0 event must have succeeded! Therefore the 4 event must have failed, its failure will have put group_sched_in() into the fail path, which will call: event_sched_out() event->pmu->del() on 0 and the BP event. Now x86_pmu_del() will reduce n_events; but it will not reduce n_added; giving what we see below: n_event = 2, n_added = 2, n_txn = 2 > pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_enable: x86_pmu_enable > pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_state: Events: { > pec_1076_warn-2804 [000] d... 147.926179: x86_pmu_state: 0: state: .R config: ffffffffffffffff ( (null)) > pec_1076_warn-2804 [000] d... 147.926181: x86_pmu_state: 33: state: AR config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926182: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: n_events: 2, n_added: 2, n_txn: 2 > pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: Assignment: { > pec_1076_warn-2804 [000] d... 147.926186: x86_pmu_state: 0->33 tag: 1 config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: 1->0 tag: 1 config: 1 (ffff880119ec8800) > pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926190: x86_pmu_enable: S0: hwc->idx: 33, hwc->last_cpu: 0, hwc->last_tag: 1 hwc->state: 0 So the problem is that x86_pmu_del(), when called from a group_sched_in() that fails (for whatever reason), and without x86_pmu TXN support (because the leader is !x86_pmu), will corrupt the n_added state. Reported-and-Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Stephane Eranian Cc: Dave Jones Cc: Link: http://lkml.kernel.org/r/20140221150312.GF3104@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 895604f2e916..79f9f848bee4 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1192,6 +1192,9 @@ static void x86_pmu_del(struct perf_event *event, int flags) for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { + if (i >= cpuc->n_events - cpuc->n_added) + --cpuc->n_added; + if (x86_pmu.put_event_constraints) x86_pmu.put_event_constraints(cpuc, event); -- cgit v1.2.3 From e3703f8cdfcf39c25c4338c3ad8e68891cca3731 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 24 Feb 2014 12:06:12 +0100 Subject: perf: Fix hotplug splat Drew Richardson reported that he could make the kernel go *boom* when hotplugging while having perf events active. It turned out that when you have a group event, the code in __perf_event_exit_context() fails to remove the group siblings from the context. We then proceed with destroying and freeing the event, and when you re-plug the CPU and try and add another event to that CPU, things go *boom* because you've still got dead entries there. Reported-by: Drew Richardson Signed-off-by: Peter Zijlstra Cc: Will Deacon Cc: Link: http://lkml.kernel.org/n/tip-k6v5wundvusvcseqj1si0oz0@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 56003c6edfd3..fa0b2d4ad83c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7856,14 +7856,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) static void __perf_event_exit_context(void *__info) { struct perf_event_context *ctx = __info; - struct perf_event *event, *tmp; + struct perf_event *event; perf_pmu_rotate_stop(ctx->pmu); - list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) - __perf_remove_from_context(event); - list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) __perf_remove_from_context(event); + rcu_read_unlock(); } static void perf_event_exit_cpu_context(int cpu) @@ -7887,11 +7887,11 @@ static void perf_event_exit_cpu(int cpu) { struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); + perf_event_exit_cpu_context(cpu); + mutex_lock(&swhash->hlist_mutex); swevent_hlist_release(swhash); mutex_unlock(&swhash->hlist_mutex); - - perf_event_exit_cpu_context(cpu); } #else static inline void perf_event_exit_cpu(int cpu) { } -- cgit v1.2.3 From 280e7c48c3b873e4987a63da276ecab25383f494 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 11 Jan 2014 11:42:51 -0800 Subject: perf tools: fix BFD detection on opensuse opensuse libbfd requires -lz -liberty to build. Add those to the BFD feature detection. Signed-off-by: Andi Kleen Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1389469379-13340-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/config/Makefile | 2 +- tools/perf/config/feature-checks/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index c48d44958172..0331ea2701a3 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -478,7 +478,7 @@ else endif ifeq ($(feature-libbfd), 1) - EXTLIBS += -lbfd + EXTLIBS += -lbfd -lz -liberty endif ifdef NO_DEMANGLE diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 12e551346fa6..523b7bc10553 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -121,7 +121,7 @@ test-libpython-version.bin: $(BUILD) $(FLAGS_PYTHON_EMBED) test-libbfd.bin: - $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl + $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl test-liberty.bin: $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -- cgit v1.2.3 From b39c2a57a00a841f057a75b41df4c26173288b66 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Feb 2014 18:14:26 +0100 Subject: perf tools: Fix strict alias issue for find_first_bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling perf tool code with gcc 4.4.7 I'm getting following error: CC util/session.o cc1: warnings being treated as errors util/session.c: In function ‘perf_session_deliver_event’: tools/perf/util/include/linux/bitops.h:109: error: dereferencing pointer ‘p’ does break strict-aliasing rules tools/perf/util/include/linux/bitops.h:101: error: dereferencing pointer ‘p’ does break strict-aliasing rules util/session.c:697: note: initialized from here tools/perf/util/include/linux/bitops.h:101: note: initialized from here make[1]: *** [util/session.o] Error 1 make: *** [util/session.o] Error 2 The aliased types here are u64 and unsigned long pointers, which is safe for the find_first_bit processing. This error shows up for me only for gcc 4.4 on 32bit x86, even for -Wstrict-aliasing=3, while newer gcc are quiet and scream here for -Wstrict-aliasing={2,1}. Looks like newer gcc changed the rules for strict alias warnings. The gcc documentation offers workaround for valid aliasing by using __may_alias__ attribute: http://gcc.gnu.org/onlinedocs/gcc-4.4.0/gcc/Type-Attributes.html Using this workaround for the find_first_bit function. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1393434867-20271-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/linux/bitops.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index 45cf10a562bd..dadfa7e54287 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -87,13 +87,15 @@ static __always_inline unsigned long __ffs(unsigned long word) return num; } +typedef const unsigned long __attribute__((__may_alias__)) long_alias_t; + /* * Find the first set bit in a memory region. */ static inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { - const unsigned long *p = addr; + long_alias_t *p = (long_alias_t *) addr; unsigned long result = 0; unsigned long tmp; -- cgit v1.2.3