From 1844dbcbe78503e0f4a8996d69da725d5e7a5177 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 28 May 2014 14:12:18 +0900 Subject: perf tools: Introduce hists__inc_nr_samples() There're some duplicate code for counting number of samples. Add hists__inc_nr_samples() and reuse it. Suggested-by: Jiri Olsa Signed-off-by: Namhyung Kim Link: http://lkml.kernel.org/r/1401335910-16832-2-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 4 +--- tools/perf/builtin-sched.c | 2 +- tools/perf/builtin-top.c | 5 +---- tools/perf/tests/hists_filter.c | 4 +--- tools/perf/util/hist.c | 7 +++++++ tools/perf/util/hist.h | 1 + 7 files changed, 13 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index d30d2c2e2a7a..bf52461a88bd 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -70,7 +70,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, return -ENOMEM; ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + hists__inc_nr_samples(&evsel->hists, true); return ret; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bc0eec1ce4be..4a3b84dd4f41 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -92,9 +92,7 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he) * counted in perf_session_deliver_event(). The dump_trace * requires this info is ready before going to the output tree. */ - hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE); - if (!he->filtered) - he->hists->stats.nr_non_filtered_samples++; + hists__inc_nr_samples(he->hists, he->filtered); } static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al, diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d7176830b9b2..c38d06c04775 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ int err = 0; evsel->hists.stats.total_period += sample->period; - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + hists__inc_nr_samples(&evsel->hists, true); if (evsel->handler != NULL) { tracepoint_handler f = evsel->handler; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5b389ce4cd15..51309264d210 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -252,10 +252,7 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, if (he == NULL) return NULL; - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); - if (!he->filtered) - evsel->hists.stats.nr_non_filtered_samples++; - + hists__inc_nr_samples(&evsel->hists, he->filtered); return he; } diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index c5ba924a3581..0a71ef4b9158 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -85,9 +85,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) fake_samples[i].map = al.map; fake_samples[i].sym = al.sym; - hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE); - if (!he->filtered) - he->hists->stats.nr_non_filtered_samples++; + hists__inc_nr_samples(he->hists, he->filtered); } } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b262b44b7a65..5943ba60f193 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -800,6 +800,13 @@ void hists__inc_nr_events(struct hists *hists, u32 type) events_stats__inc(&hists->stats, type); } +void hists__inc_nr_samples(struct hists *hists, bool filtered) +{ + events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE); + if (!filtered) + hists->stats.nr_non_filtered_samples++; +} + static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index a8418d19808d..03ae1dbb1b15 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -119,6 +119,7 @@ u64 hists__total_period(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); void hists__inc_nr_events(struct hists *hists, u32 type); +void hists__inc_nr_samples(struct hists *hists, bool filtered); void events_stats__inc(struct events_stats *stats, u32 type); size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); -- cgit v1.2.3 From 69bcb019fc809874f518559c8e5b0a90176f0532 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 30 Oct 2013 09:40:34 +0900 Subject: perf tools: Introduce struct hist_entry_iter There're some duplicate code when adding hist entries. They are different in that some have branch info or mem info but generally do same thing. So introduce new struct hist_entry_iter and add callbacks to customize each case in general way. The new perf_evsel__add_entry() function will look like: iter->prepare_entry(); iter->add_single_entry(); while (iter->next_entry()) iter->add_next_entry(); iter->finish_entry(); This will help further work like the cumulative callchain patchset. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: David Ahern Cc: Frederic Weisbecker Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1401335910-16832-3-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-report.c | 192 ++++---------------------- tools/perf/tests/hists_filter.c | 16 ++- tools/perf/tests/hists_output.c | 11 +- tools/perf/util/hist.c | 299 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/hist.h | 33 +++++ 5 files changed, 372 insertions(+), 179 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4a3b84dd4f41..3201bdfa8c3f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -76,163 +76,16 @@ static int report__config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } -static void report__inc_stats(struct report *rep, struct hist_entry *he) +static void report__inc_stats(struct report *rep, + struct hist_entry *he __maybe_unused) { /* - * The @he is either of a newly created one or an existing one - * merging current sample. We only want to count a new one so - * checking ->nr_events being 1. + * We cannot access @he at this time. Just assume it's a new entry. + * It'll be fixed once we have a callback mechanism in hist_iter. */ - if (he->stat.nr_events == 1) - rep->nr_entries++; - - /* - * Only counts number of samples at this stage as it's more - * natural to do it here and non-sample events are also - * counted in perf_session_deliver_event(). The dump_trace - * requires this info is ready before going to the output tree. - */ - hists__inc_nr_samples(he->hists, he->filtered); -} - -static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al, - struct perf_sample *sample, struct perf_evsel *evsel) -{ - struct symbol *parent = NULL; - struct hist_entry *he; - struct mem_info *mi, *mx; - uint64_t cost; - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); - - if (err) - return err; - - mi = sample__resolve_mem(sample, al); - if (!mi) - return -ENOMEM; - - if (rep->hide_unresolved && !al->sym) - return 0; - - cost = sample->weight; - if (!cost) - cost = 1; - - /* - * must pass period=weight in order to get the correct - * sorting from hists__collapse_resort() which is solely - * based on periods. We want sorting be done on nr_events * weight - * and this is indirectly achieved by passing period=weight here - * and the he_stat__add_period() function. - */ - he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi, - cost, cost, 0); - if (!he) - return -ENOMEM; - - if (ui__has_annotation()) { - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - if (err) - goto out; - - mx = he->mem_info; - err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); - if (err) - goto out; - } - - report__inc_stats(rep, he); - - err = hist_entry__append_callchain(he, sample); -out: - return err; -} - -static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al, - struct perf_sample *sample, struct perf_evsel *evsel) -{ - struct symbol *parent = NULL; - unsigned i; - struct hist_entry *he; - struct branch_info *bi, *bx; - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); - - if (err) - return err; - - bi = sample__resolve_bstack(sample, al); - if (!bi) - return -ENOMEM; - - for (i = 0; i < sample->branch_stack->nr; i++) { - if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) - continue; - - err = -ENOMEM; - - /* overwrite the 'al' to branch-to info */ - al->map = bi[i].to.map; - al->sym = bi[i].to.sym; - al->addr = bi[i].to.addr; - /* - * The report shows the percentage of total branches captured - * and not events sampled. Thus we use a pseudo period of 1. - */ - he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL, - 1, 1, 0); - if (he) { - if (ui__has_annotation()) { - bx = he->branch_info; - err = addr_map_symbol__inc_samples(&bx->from, - evsel->idx); - if (err) - goto out; - - err = addr_map_symbol__inc_samples(&bx->to, - evsel->idx); - if (err) - goto out; - } - report__inc_stats(rep, he); - } else - goto out; - } - err = 0; -out: - free(bi); - return err; + rep->nr_entries++; } -static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel, - struct addr_location *al, struct perf_sample *sample) -{ - struct symbol *parent = NULL; - struct hist_entry *he; - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); - - if (err) - return err; - - he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL, - sample->period, sample->weight, - sample->transaction); - if (he == NULL) - return -ENOMEM; - - err = hist_entry__append_callchain(he, sample); - if (err) - goto out; - - if (ui__has_annotation()) - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - - report__inc_stats(rep, he); - -out: - return err; -} - - static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -241,6 +94,9 @@ static int process_sample_event(struct perf_tool *tool, { struct report *rep = container_of(tool, struct report, tool); struct addr_location al; + struct hist_entry_iter iter = { + .hide_unresolved = rep->hide_unresolved, + }; int ret; if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { @@ -255,22 +111,22 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - if (sort__mode == SORT_MODE__BRANCH) { - ret = report__add_branch_hist_entry(rep, &al, sample, evsel); - if (ret < 0) - pr_debug("problem adding lbr entry, skipping event\n"); - } else if (rep->mem_mode == 1) { - ret = report__add_mem_hist_entry(rep, &al, sample, evsel); - if (ret < 0) - pr_debug("problem adding mem entry, skipping event\n"); - } else { - if (al.map != NULL) - al.map->dso->hit = 1; - - ret = report__add_hist_entry(rep, evsel, &al, sample); - if (ret < 0) - pr_debug("problem incrementing symbol period, skipping event\n"); - } + if (sort__mode == SORT_MODE__BRANCH) + iter.ops = &hist_iter_branch; + else if (rep->mem_mode) + iter.ops = &hist_iter_mem; + else + iter.ops = &hist_iter_normal; + + if (al.map != NULL) + al.map->dso->hit = 1; + + report__inc_stats(rep, NULL); + + ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack); + if (ret < 0) + pr_debug("problem adding hist entry, skipping event\n"); + return ret; } diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 0a71ef4b9158..76b02e1de701 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -42,11 +42,11 @@ static struct sample fake_samples[] = { { .pid = 300, .ip = 0xf0000 + 800, }, }; -static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) +static int add_hist_entries(struct perf_evlist *evlist, + struct machine *machine __maybe_unused) { struct perf_evsel *evsel; struct addr_location al; - struct hist_entry *he; struct perf_sample sample = { .cpu = 0, }; size_t i; @@ -62,6 +62,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) .misc = PERF_RECORD_MISC_USER, }, }; + struct hist_entry_iter iter = { + .ops = &hist_iter_normal, + .hide_unresolved = false, + }; /* make sure it has no filter at first */ evsel->hists.thread_filter = NULL; @@ -71,21 +75,19 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; + sample.period = 100; if (perf_event__preprocess_sample(&event, machine, &al, &sample) < 0) goto out; - he = __hists__add_entry(&evsel->hists, &al, NULL, - NULL, NULL, 100, 1, 0); - if (he == NULL) + if (hist_entry_iter__add(&iter, &al, evsel, &sample, + PERF_MAX_STACK_DEPTH) < 0) goto out; fake_samples[i].thread = al.thread; fake_samples[i].map = al.map; fake_samples[i].sym = al.sym; - - hists__inc_nr_samples(he->hists, he->filtered); } } diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index a16850551797..1308f88a9169 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -46,7 +46,7 @@ static struct sample fake_samples[] = { static int add_hist_entries(struct hists *hists, struct machine *machine) { struct addr_location al; - struct hist_entry *he; + struct perf_evsel *evsel = hists_to_evsel(hists); struct perf_sample sample = { .period = 100, }; size_t i; @@ -56,6 +56,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) .misc = PERF_RECORD_MISC_USER, }, }; + struct hist_entry_iter iter = { + .ops = &hist_iter_normal, + .hide_unresolved = false, + }; sample.cpu = fake_samples[i].cpu; sample.pid = fake_samples[i].pid; @@ -66,9 +70,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(hists, &al, NULL, NULL, NULL, - sample.period, 1, 0); - if (he == NULL) + if (hist_entry_iter__add(&iter, &al, evsel, &sample, + PERF_MAX_STACK_DEPTH) < 0) goto out; fake_samples[i].thread = al.thread; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 5943ba60f193..d8662356de20 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -4,6 +4,7 @@ #include "session.h" #include "sort.h" #include "evsel.h" +#include "annotate.h" #include static bool hists__filter_entry_by_dso(struct hists *hists, @@ -429,6 +430,304 @@ struct hist_entry *__hists__add_entry(struct hists *hists, return add_hist_entry(hists, &entry, al); } +static int +iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct perf_sample *sample = iter->sample; + struct mem_info *mi; + + mi = sample__resolve_mem(sample, al); + if (mi == NULL) + return -ENOMEM; + + iter->priv = mi; + return 0; +} + +static int +iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + u64 cost; + struct mem_info *mi = iter->priv; + struct hist_entry *he; + + if (mi == NULL) + return -EINVAL; + + cost = iter->sample->weight; + if (!cost) + cost = 1; + + /* + * must pass period=weight in order to get the correct + * sorting from hists__collapse_resort() which is solely + * based on periods. We want sorting be done on nr_events * weight + * and this is indirectly achieved by passing period=weight here + * and the he_stat__add_period() function. + */ + he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi, + cost, cost, 0); + if (!he) + return -ENOMEM; + + iter->he = he; + return 0; +} + +static int +iter_finish_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct hist_entry *he = iter->he; + struct mem_info *mx; + int err = -EINVAL; + + if (he == NULL) + goto out; + + if (ui__has_annotation()) { + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + if (err) + goto out; + + mx = he->mem_info; + err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); + if (err) + goto out; + } + + hists__inc_nr_samples(&evsel->hists, he->filtered); + + err = hist_entry__append_callchain(he, iter->sample); + +out: + /* + * We don't need to free iter->priv (mem_info) here since + * the mem info was either already freed in add_hist_entry() or + * passed to a new hist entry by hist_entry__new(). + */ + iter->priv = NULL; + + iter->he = NULL; + return err; +} + +static int +iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct branch_info *bi; + struct perf_sample *sample = iter->sample; + + bi = sample__resolve_bstack(sample, al); + if (!bi) + return -ENOMEM; + + iter->curr = 0; + iter->total = sample->branch_stack->nr; + + iter->priv = bi; + return 0; +} + +static int +iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct branch_info *bi = iter->priv; + int i = iter->curr; + + if (bi == NULL) + return 0; + + if (iter->curr >= iter->total) + return 0; + + al->map = bi[i].to.map; + al->sym = bi[i].to.sym; + al->addr = bi[i].to.addr; + return 1; +} + +static int +iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct branch_info *bi, *bx; + struct perf_evsel *evsel = iter->evsel; + struct hist_entry *he = NULL; + int i = iter->curr; + int err = 0; + + bi = iter->priv; + + if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) + goto out; + + /* + * The report shows the percentage of total branches captured + * and not events sampled. Thus we use a pseudo period of 1. + */ + he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL, + 1, 1, 0); + if (he == NULL) + return -ENOMEM; + + if (ui__has_annotation()) { + bx = he->branch_info; + err = addr_map_symbol__inc_samples(&bx->from, evsel->idx); + if (err) + goto out; + + err = addr_map_symbol__inc_samples(&bx->to, evsel->idx); + if (err) + goto out; + } + + hists__inc_nr_samples(&evsel->hists, he->filtered); + +out: + iter->he = he; + iter->curr++; + return err; +} + +static int +iter_finish_branch_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + zfree(&iter->priv); + iter->he = NULL; + + return iter->curr >= iter->total ? 0 : -1; +} + +static int +iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + struct hist_entry *he; + + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction); + if (he == NULL) + return -ENOMEM; + + iter->he = he; + return 0; +} + +static int +iter_finish_normal_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + int err; + struct hist_entry *he = iter->he; + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + + if (he == NULL) + return 0; + + iter->he = NULL; + + if (ui__has_annotation()) { + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + if (err) + return err; + } + + hists__inc_nr_samples(&evsel->hists, he->filtered); + + return hist_entry__append_callchain(he, sample); +} + +const struct hist_iter_ops hist_iter_mem = { + .prepare_entry = iter_prepare_mem_entry, + .add_single_entry = iter_add_single_mem_entry, + .next_entry = iter_next_nop_entry, + .add_next_entry = iter_add_next_nop_entry, + .finish_entry = iter_finish_mem_entry, +}; + +const struct hist_iter_ops hist_iter_branch = { + .prepare_entry = iter_prepare_branch_entry, + .add_single_entry = iter_add_single_branch_entry, + .next_entry = iter_next_branch_entry, + .add_next_entry = iter_add_next_branch_entry, + .finish_entry = iter_finish_branch_entry, +}; + +const struct hist_iter_ops hist_iter_normal = { + .prepare_entry = iter_prepare_normal_entry, + .add_single_entry = iter_add_single_normal_entry, + .next_entry = iter_next_nop_entry, + .add_next_entry = iter_add_next_nop_entry, + .finish_entry = iter_finish_normal_entry, +}; + +int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, + struct perf_evsel *evsel, struct perf_sample *sample, + int max_stack_depth) +{ + int err, err2; + + err = sample__resolve_callchain(sample, &iter->parent, evsel, al, + max_stack_depth); + if (err) + return err; + + iter->evsel = evsel; + iter->sample = sample; + + err = iter->ops->prepare_entry(iter, al); + if (err) + goto out; + + err = iter->ops->add_single_entry(iter, al); + if (err) + goto out; + + while (iter->ops->next_entry(iter, al)) { + err = iter->ops->add_next_entry(iter, al); + if (err) + break; + } + +out: + err2 = iter->ops->finish_entry(iter, al); + if (!err) + err = err2; + + return err; +} + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 03ae1dbb1b15..8894f184357c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -96,12 +96,45 @@ struct hists { u16 col_len[HISTC_NR_COLS]; }; +struct hist_entry_iter; + +struct hist_iter_ops { + int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *); + int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *); + int (*next_entry)(struct hist_entry_iter *, struct addr_location *); + int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *); + int (*finish_entry)(struct hist_entry_iter *, struct addr_location *); +}; + +struct hist_entry_iter { + int total; + int curr; + + bool hide_unresolved; + + struct perf_evsel *evsel; + struct perf_sample *sample; + struct hist_entry *he; + struct symbol *parent; + void *priv; + + const struct hist_iter_ops *ops; +}; + +extern const struct hist_iter_ops hist_iter_normal; +extern const struct hist_iter_ops hist_iter_branch; +extern const struct hist_iter_ops hist_iter_mem; + struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *parent, struct branch_info *bi, struct mem_info *mi, u64 period, u64 weight, u64 transaction); +int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, + struct perf_evsel *evsel, struct perf_sample *sample, + int max_stack_depth); + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); -- cgit v1.2.3 From f8be1c8c48c8469d1ce95ccdc77b1e2c6a29700e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 11 Sep 2012 13:15:07 +0900 Subject: perf hists: Add support for accumulated stat of hist entry Maintain accumulated stat information in hist_entry->stat_acc if symbol_conf.cumulate_callchain is set. Fields in ->stat_acc have same vaules initially, and will be updated as callchain is processed later. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-4-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/hist.c | 28 ++++++++++++++++++++++++++-- tools/perf/util/sort.h | 1 + tools/perf/util/symbol.h | 1 + 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index d8662356de20..dfff2ee8effb 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -232,6 +232,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) return true; he_stat__decay(&he->stat); + if (symbol_conf.cumulate_callchain) + he_stat__decay(he->stat_acc); diff = prev_period - he->stat.period; @@ -279,12 +281,26 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) static struct hist_entry *hist_entry__new(struct hist_entry *template) { - size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; - struct hist_entry *he = zalloc(sizeof(*he) + callchain_size); + size_t callchain_size = 0; + struct hist_entry *he; + + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) + callchain_size = sizeof(struct callchain_root); + + he = zalloc(sizeof(*he) + callchain_size); if (he != NULL) { *he = *template; + if (symbol_conf.cumulate_callchain) { + he->stat_acc = malloc(sizeof(he->stat)); + if (he->stat_acc == NULL) { + free(he); + return NULL; + } + memcpy(he->stat_acc, &he->stat, sizeof(he->stat)); + } + if (he->ms.map) he->ms.map->referenced = true; @@ -296,6 +312,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) */ he->branch_info = malloc(sizeof(*he->branch_info)); if (he->branch_info == NULL) { + free(he->stat_acc); free(he); return NULL; } @@ -359,6 +376,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists, if (!cmp) { he_stat__add_period(&he->stat, period, weight); + if (symbol_conf.cumulate_callchain) + he_stat__add_period(he->stat_acc, period, weight); /* * This mem info was allocated from sample__resolve_mem @@ -394,6 +413,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists, rb_insert_color(&he->rb_node_in, hists->entries_in); out: he_stat__add_cpumode_period(&he->stat, al->cpumode, period); + if (symbol_conf.cumulate_callchain) + he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); return he; } @@ -768,6 +789,7 @@ void hist_entry__free(struct hist_entry *he) { zfree(&he->branch_info); zfree(&he->mem_info); + zfree(&he->stat_acc); free_srcline(he->srcline); free(he); } @@ -793,6 +815,8 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); + if (symbol_conf.cumulate_callchain) + he_stat__add_stat(iter->stat_acc, he->stat_acc); if (symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 5f38d925e92f..c9ffa031becd 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -82,6 +82,7 @@ struct hist_entry { struct list_head head; } pairs; struct he_stat stat; + struct he_stat *stat_acc; struct map_symbol ms; struct thread *thread; struct comm *comm; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 33ede53fa6b9..615c752dd767 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -109,6 +109,7 @@ struct symbol_conf { show_nr_samples, show_total_period, use_callchain, + cumulate_callchain, exclude_other, show_cpu_utilization, initialized, -- cgit v1.2.3 From a0b51af367a6831330564c96dc4cc1ac63413701 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 11 Sep 2012 13:34:27 +0900 Subject: perf hists: Check if accumulated when adding a hist entry To support callchain accumulation, @entry should be recognized if it's accumulated or not when add_hist_entry() called. The period of an accumulated entry should be added to ->stat_acc but not ->stat. Add @sample_self arg for that. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-5-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-annotate.c | 3 ++- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/tests/hists_link.c | 4 ++-- tools/perf/util/hist.c | 29 ++++++++++++++++++----------- tools/perf/util/hist.h | 3 ++- 6 files changed, 26 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index bf52461a88bd..1ec429fef2be 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -65,7 +65,8 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, return 0; } - he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0); + he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0, + true); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 8bff543acaab..9a5a035cb426 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists, u64 weight, u64 transaction) { if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight, - transaction) != NULL) + transaction, true) != NULL) return 0; return -ENOMEM; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 51309264d210..12e2e1227e47 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -247,7 +247,7 @@ static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, pthread_mutex_lock(&evsel->hists.lock); he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, sample->period, sample->weight, - sample->transaction); + sample->transaction, true); pthread_mutex_unlock(&evsel->hists.lock); if (he == NULL) return NULL; diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 5ffa2c3eb77d..ca6693b37cd7 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -88,7 +88,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) goto out; he = __hists__add_entry(&evsel->hists, &al, NULL, - NULL, NULL, 1, 1, 0); + NULL, NULL, 1, 1, 0, true); if (he == NULL) goto out; @@ -112,7 +112,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) goto out; he = __hists__add_entry(&evsel->hists, &al, NULL, - NULL, NULL, 1, 1, 0); + NULL, NULL, 1, 1, 0, true); if (he == NULL) goto out; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index dfff2ee8effb..b9facf33b224 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -279,7 +279,8 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) * histogram, sorted on item, collects periods */ -static struct hist_entry *hist_entry__new(struct hist_entry *template) +static struct hist_entry *hist_entry__new(struct hist_entry *template, + bool sample_self) { size_t callchain_size = 0; struct hist_entry *he; @@ -299,6 +300,8 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) return NULL; } memcpy(he->stat_acc, &he->stat, sizeof(he->stat)); + if (!sample_self) + memset(&he->stat, 0, sizeof(he->stat)); } if (he->ms.map) @@ -351,7 +354,8 @@ static u8 symbol__parent_filter(const struct symbol *parent) static struct hist_entry *add_hist_entry(struct hists *hists, struct hist_entry *entry, - struct addr_location *al) + struct addr_location *al, + bool sample_self) { struct rb_node **p; struct rb_node *parent = NULL; @@ -375,7 +379,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists, cmp = hist_entry__cmp(he, entry); if (!cmp) { - he_stat__add_period(&he->stat, period, weight); + if (sample_self) + he_stat__add_period(&he->stat, period, weight); if (symbol_conf.cumulate_callchain) he_stat__add_period(he->stat_acc, period, weight); @@ -405,14 +410,15 @@ static struct hist_entry *add_hist_entry(struct hists *hists, p = &(*p)->rb_right; } - he = hist_entry__new(entry); + he = hist_entry__new(entry, sample_self); if (!he) return NULL; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); out: - he_stat__add_cpumode_period(&he->stat, al->cpumode, period); + if (sample_self) + he_stat__add_cpumode_period(&he->stat, al->cpumode, period); if (symbol_conf.cumulate_callchain) he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); return he; @@ -423,7 +429,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, - u64 period, u64 weight, u64 transaction) + u64 period, u64 weight, u64 transaction, + bool sample_self) { struct hist_entry entry = { .thread = al->thread, @@ -448,7 +455,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .transaction = transaction, }; - return add_hist_entry(hists, &entry, al); + return add_hist_entry(hists, &entry, al, sample_self); } static int @@ -501,7 +508,7 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al * and the he_stat__add_period() function. */ he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi, - cost, cost, 0); + cost, cost, 0, true); if (!he) return -ENOMEM; @@ -608,7 +615,7 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a * and not events sampled. Thus we use a pseudo period of 1. */ he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL, - 1, 1, 0); + 1, 1, 0, true); if (he == NULL) return -ENOMEM; @@ -657,7 +664,7 @@ iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, sample->period, sample->weight, - sample->transaction); + sample->transaction, true); if (he == NULL) return -ENOMEM; @@ -1161,7 +1168,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, p = &(*p)->rb_right; } - he = hist_entry__new(pair); + he = hist_entry__new(pair, true); if (he) { memset(&he->stat, 0, sizeof(he->stat)); he->hists = hists; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 8894f184357c..bedb24d3643c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -130,7 +130,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists, struct symbol *parent, struct branch_info *bi, struct mem_info *mi, u64 period, - u64 weight, u64 transaction); + u64 weight, u64 transaction, + bool sample_self); int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, struct perf_evsel *evsel, struct perf_sample *sample, int max_stack_depth); -- cgit v1.2.3 From 7a13aa28aa268359cee006059731f49bcd1f839e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 11 Sep 2012 14:13:04 +0900 Subject: perf hists: Accumulate hist entry stat based on the callchain Call __hists__add_entry() for each callchain node to get an accumulated stat for an entry. Introduce new cumulative_iter ops to process them properly. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-6-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-report.c | 2 + tools/perf/util/callchain.c | 3 +- tools/perf/util/hist.c | 96 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/hist.h | 1 + 4 files changed, 101 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3201bdfa8c3f..e8fa9fea341f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -115,6 +115,8 @@ static int process_sample_event(struct perf_tool *tool, iter.ops = &hist_iter_branch; else if (rep->mem_mode) iter.ops = &hist_iter_mem; + else if (symbol_conf.cumulate_callchain) + iter.ops = &hist_iter_cumulative; else iter.ops = &hist_iter_normal; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9a42382b3921..2af69c47b725 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -616,7 +616,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent if (sample->callchain == NULL) return 0; - if (symbol_conf.use_callchain || sort__has_parent) { + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || + sort__has_parent) { return machine__resolve_callchain(al->machine, evsel, al->thread, sample, parent, al, max_stack); } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b9facf33b224..6079b5acfb6d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -696,6 +696,94 @@ iter_finish_normal_entry(struct hist_entry_iter *iter, struct addr_location *al) return hist_entry__append_callchain(he, sample); } +static int +iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + callchain_cursor_commit(&callchain_cursor); + return 0; +} + +static int +iter_add_single_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + struct hist_entry *he; + int err = 0; + + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction, true); + if (he == NULL) + return -ENOMEM; + + iter->he = he; + + /* + * The iter->he will be over-written after ->add_next_entry() + * called so inc stats for the original entry now. + */ + if (ui__has_annotation()) + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + + hists__inc_nr_samples(&evsel->hists, he->filtered); + + return err; +} + +static int +iter_next_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al) +{ + struct callchain_cursor_node *node; + + node = callchain_cursor_current(&callchain_cursor); + if (node == NULL) + return 0; + + al->map = node->map; + al->sym = node->sym; + if (node->map) + al->addr = node->map->map_ip(node->map, node->ip); + else + al->addr = node->ip; + + if (iter->hide_unresolved && al->sym == NULL) + return 0; + + callchain_cursor_advance(&callchain_cursor); + return 1; +} + +static int +iter_add_next_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + struct hist_entry *he; + + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction, false); + if (he == NULL) + return -ENOMEM; + + iter->he = he; + + return 0; +} + +static int +iter_finish_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + iter->he = NULL; + return 0; +} + const struct hist_iter_ops hist_iter_mem = { .prepare_entry = iter_prepare_mem_entry, .add_single_entry = iter_add_single_mem_entry, @@ -720,6 +808,14 @@ const struct hist_iter_ops hist_iter_normal = { .finish_entry = iter_finish_normal_entry, }; +const struct hist_iter_ops hist_iter_cumulative = { + .prepare_entry = iter_prepare_cumulative_entry, + .add_single_entry = iter_add_single_cumulative_entry, + .next_entry = iter_next_cumulative_entry, + .add_next_entry = iter_add_next_cumulative_entry, + .finish_entry = iter_finish_cumulative_entry, +}; + int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, struct perf_evsel *evsel, struct perf_sample *sample, int max_stack_depth) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index bedb24d3643c..78409f95d012 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -124,6 +124,7 @@ struct hist_entry_iter { extern const struct hist_iter_ops hist_iter_normal; extern const struct hist_iter_ops hist_iter_branch; extern const struct hist_iter_ops hist_iter_mem; +extern const struct hist_iter_ops hist_iter_cumulative; struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, -- cgit v1.2.3 From c7405d85d7a354b8ba49e2db7c4b027e6cb997c1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 Oct 2013 13:58:30 +0900 Subject: perf tools: Update cpumode for each cumulative entry The cpumode and level in struct addr_localtion was set for a sample and but updated as cumulative callchains were added. This led to have non-matching symbol and cpumode in the output. Update it accordingly based on the fact whether the map is a part of the kernel or not. This is a reverse of what thread__find_addr_map() does. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-7-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/callchain.c | 42 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/callchain.h | 2 ++ tools/perf/util/hist.c | 13 ++----------- 3 files changed, 46 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 2af69c47b725..48b6d3f50012 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -630,3 +630,45 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp return 0; return callchain_append(he->callchain, &callchain_cursor, sample->period); } + +int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, + bool hide_unresolved) +{ + al->map = node->map; + al->sym = node->sym; + if (node->map) + al->addr = node->map->map_ip(node->map, node->ip); + else + al->addr = node->ip; + + if (al->sym == NULL) { + if (hide_unresolved) + return 0; + if (al->map == NULL) + goto out; + } + + if (al->map->groups == &al->machine->kmaps) { + if (machine__is_host(al->machine)) { + al->cpumode = PERF_RECORD_MISC_KERNEL; + al->level = 'k'; + } else { + al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + al->level = 'g'; + } + } else { + if (machine__is_host(al->machine)) { + al->cpumode = PERF_RECORD_MISC_USER; + al->level = '.'; + } else if (perf_guest) { + al->cpumode = PERF_RECORD_MISC_GUEST_USER; + al->level = 'u'; + } else { + al->cpumode = PERF_RECORD_MISC_HYPERVISOR; + al->level = 'H'; + } + } + +out: + return 1; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index bde2b0cc24cf..24a53d562d0a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -162,6 +162,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent struct perf_evsel *evsel, struct addr_location *al, int max_stack); int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample); +int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, + bool hide_unresolved); extern const char record_callchain_help[]; int parse_callchain_report_opt(const char *arg); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6079b5acfb6d..37c28fc13dc3 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -743,18 +743,9 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter, if (node == NULL) return 0; - al->map = node->map; - al->sym = node->sym; - if (node->map) - al->addr = node->map->map_ip(node->map, node->ip); - else - al->addr = node->ip; - - if (iter->hide_unresolved && al->sym == NULL) - return 0; - callchain_cursor_advance(&callchain_cursor); - return 1; + + return fill_callchain_info(al, node, iter->hide_unresolved); } static int -- cgit v1.2.3 From b4d3c8bd86c4eda08456691121f83b4e1db46866 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 Oct 2013 10:05:29 +0900 Subject: perf report: Cache cumulative callchains It is possble that a callchain has cycles or recursive calls. In that case it'll end up having entries more than 100% overhead in the output. In order to prevent such entries, cache each callchain node and skip if same entry already cumulated. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-8-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/hist.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 37c28fc13dc3..bf03db528db6 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -700,7 +700,22 @@ static int iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, struct addr_location *al __maybe_unused) { + struct hist_entry **he_cache; + callchain_cursor_commit(&callchain_cursor); + + /* + * This is for detecting cycles or recursions so that they're + * cumulated only one time to prevent entries more than 100% + * overhead. + */ + he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1)); + if (he_cache == NULL) + return -ENOMEM; + + iter->priv = he_cache; + iter->curr = 0; + return 0; } @@ -710,6 +725,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, { struct perf_evsel *evsel = iter->evsel; struct perf_sample *sample = iter->sample; + struct hist_entry **he_cache = iter->priv; struct hist_entry *he; int err = 0; @@ -720,6 +736,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, return -ENOMEM; iter->he = he; + he_cache[iter->curr++] = he; /* * The iter->he will be over-written after ->add_next_entry() @@ -754,7 +771,29 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, { struct perf_evsel *evsel = iter->evsel; struct perf_sample *sample = iter->sample; + struct hist_entry **he_cache = iter->priv; struct hist_entry *he; + struct hist_entry he_tmp = { + .cpu = al->cpu, + .thread = al->thread, + .comm = thread__comm(al->thread), + .ip = al->addr, + .ms = { + .map = al->map, + .sym = al->sym, + }, + .parent = iter->parent, + }; + int i; + + /* + * Check if there's duplicate entries in the callchain. + * It's possible that it has cycles or recursive calls. + */ + for (i = 0; i < iter->curr; i++) { + if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) + return 0; + } he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, sample->period, sample->weight, @@ -763,6 +802,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, return -ENOMEM; iter->he = he; + he_cache[iter->curr++] = he; return 0; } @@ -771,7 +811,9 @@ static int iter_finish_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { + zfree(&iter->priv); iter->he = NULL; + return 0; } -- cgit v1.2.3 From be1f13e30862ab6b0fffaecd556856a965cefa0c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 10 Sep 2012 13:38:00 +0900 Subject: perf callchain: Add callchain_cursor_snapshot() The callchain_cursor_snapshot() is for saving current status of the callchain. It'll be used to accumulate callchain information for each node. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-9-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/callchain.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 24a53d562d0a..8f84423a75da 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -167,4 +167,13 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * extern const char record_callchain_help[]; int parse_callchain_report_opt(const char *arg); + +static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, + struct callchain_cursor *src) +{ + *dest = *src; + + dest->first = src->curr; + dest->nr -= src->pos; +} #endif /* __PERF_CALLCHAIN_H */ -- cgit v1.2.3 From be7f855a3eebe07f797b9e4a43bf59bab8ca3dbe Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 26 Dec 2013 17:44:10 +0900 Subject: perf tools: Save callchain info for each cumulative entry When accumulating callchain entry, also save current snapshot of the chain so that it can show the rest of the chain. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-10-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/hist.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index bf03db528db6..c6f5f5251aad 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -738,6 +738,14 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; + callchain_append(he->callchain, &callchain_cursor, sample->period); + + /* + * We need to re-initialize the cursor since callchain_append() + * advanced the cursor to the end. + */ + callchain_cursor_commit(&callchain_cursor); + /* * The iter->he will be over-written after ->add_next_entry() * called so inc stats for the original entry now. @@ -760,8 +768,6 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter, if (node == NULL) return 0; - callchain_cursor_advance(&callchain_cursor); - return fill_callchain_info(al, node, iter->hide_unresolved); } @@ -785,6 +791,11 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, .parent = iter->parent, }; int i; + struct callchain_cursor cursor; + + callchain_cursor_snapshot(&cursor, &callchain_cursor); + + callchain_cursor_advance(&callchain_cursor); /* * Check if there's duplicate entries in the callchain. @@ -804,6 +815,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; + callchain_append(he->callchain, &cursor, sample->period); return 0; } -- cgit v1.2.3 From 594dcbf3186e2e1e5c08fa21e8826b90d347f23f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 30 Oct 2013 16:06:59 +0900 Subject: perf ui/hist: Add support to accumulated hist stat Print accumulated stat of a hist entry if requested. To do that, add new HPP_PERCENT_ACC_FNS macro and generate a perf_hpp_fmt using it. The __hpp__sort_acc() function sorts entries by accumulated period value. When accumulated periods of two entries are same (i.e. single path callchain) put the caller above since accumulation tends to put callers on higher position for obvious reason. Also add "overhead_children" output field to be selected by user. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-11-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/hist.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/hist.h | 4 ++ tools/perf/util/sort.c | 1 + 3 files changed, 104 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 4484f5bd1b14..0ce3e79b2ca7 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -104,6 +104,18 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, return ret; } +int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he, + hpp_field_fn get_field, const char *fmt, + hpp_snprint_fn print_fn, bool fmt_percent) +{ + if (!symbol_conf.cumulate_callchain) { + return snprintf(hpp->buf, hpp->size, "%*s", + fmt_percent ? 8 : 12, "N/A"); + } + + return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent); +} + static int field_cmp(u64 field_a, u64 field_b) { if (field_a > field_b) @@ -160,6 +172,24 @@ out: return ret; } +static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, + hpp_field_fn get_field) +{ + s64 ret = 0; + + if (symbol_conf.cumulate_callchain) { + /* + * Put caller above callee when they have equal period. + */ + ret = field_cmp(get_field(a), get_field(b)); + if (ret) + return ret; + + ret = b->callchain->max_depth - a->callchain->max_depth; + } + return ret; +} + #define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ struct perf_hpp *hpp, \ @@ -242,6 +272,34 @@ static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ return __hpp__sort(a, b, he_get_##_field); \ } +#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +static u64 he_get_acc_##_field(struct hist_entry *he) \ +{ \ + return he->stat_acc->_field; \ +} \ + \ +static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \ + hpp_color_scnprintf, true); \ +} + +#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \ +static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \ + struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \ + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt, \ + hpp_entry_scnprintf, true); \ +} + +#define __HPP_SORT_ACC_FN(_type, _field) \ +static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +{ \ + return __hpp__sort_acc(a, b, he_get_acc_##_field); \ +} + #define __HPP_ENTRY_RAW_FN(_type, _field) \ static u64 he_get_raw_##_field(struct hist_entry *he) \ { \ @@ -270,18 +328,27 @@ __HPP_COLOR_PERCENT_FN(_type, _field) \ __HPP_ENTRY_PERCENT_FN(_type, _field) \ __HPP_SORT_FN(_type, _field) +#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\ +__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ +__HPP_WIDTH_FN(_type, _min_width, _unit_width) \ +__HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +__HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \ +__HPP_SORT_ACC_FN(_type, _field) + #define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \ __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ __HPP_WIDTH_FN(_type, _min_width, _unit_width) \ __HPP_ENTRY_RAW_FN(_type, _field) \ __HPP_SORT_RAW_FN(_type, _field) +__HPP_HEADER_FN(overhead_self, "Self", 8, 8) HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8) HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8) HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8) HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8) HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8) +HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8) HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12) HPP_RAW_FNS(period, "Period", period, 12, 12) @@ -303,6 +370,17 @@ static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused, .sort = hpp__sort_ ## _name, \ } +#define HPP__COLOR_ACC_PRINT_FNS(_name) \ + { \ + .header = hpp__header_ ## _name, \ + .width = hpp__width_ ## _name, \ + .color = hpp__color_ ## _name, \ + .entry = hpp__entry_ ## _name, \ + .cmp = hpp__nop_cmp, \ + .collapse = hpp__nop_cmp, \ + .sort = hpp__sort_ ## _name, \ + } + #define HPP__PRINT_FNS(_name) \ { \ .header = hpp__header_ ## _name, \ @@ -319,6 +397,7 @@ struct perf_hpp_fmt perf_hpp__format[] = { HPP__COLOR_PRINT_FNS(overhead_us), HPP__COLOR_PRINT_FNS(overhead_guest_sys), HPP__COLOR_PRINT_FNS(overhead_guest_us), + HPP__COLOR_ACC_PRINT_FNS(overhead_acc), HPP__PRINT_FNS(samples), HPP__PRINT_FNS(period) }; @@ -328,16 +407,23 @@ LIST_HEAD(perf_hpp__sort_list); #undef HPP__COLOR_PRINT_FNS +#undef HPP__COLOR_ACC_PRINT_FNS #undef HPP__PRINT_FNS #undef HPP_PERCENT_FNS +#undef HPP_PERCENT_ACC_FNS #undef HPP_RAW_FNS #undef __HPP_HEADER_FN #undef __HPP_WIDTH_FN #undef __HPP_COLOR_PERCENT_FN #undef __HPP_ENTRY_PERCENT_FN +#undef __HPP_COLOR_ACC_PERCENT_FN +#undef __HPP_ENTRY_ACC_PERCENT_FN #undef __HPP_ENTRY_RAW_FN +#undef __HPP_SORT_FN +#undef __HPP_SORT_ACC_FN +#undef __HPP_SORT_RAW_FN void perf_hpp__init(void) @@ -361,6 +447,13 @@ void perf_hpp__init(void) if (field_order) return; + if (symbol_conf.cumulate_callchain) { + perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC); + + perf_hpp__format[PERF_HPP__OVERHEAD].header = + hpp__header_overhead_self; + } + perf_hpp__column_enable(PERF_HPP__OVERHEAD); if (symbol_conf.show_cpu_utilization) { @@ -383,6 +476,12 @@ void perf_hpp__init(void) list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list; if (list_empty(list)) list_add(list, &perf_hpp__sort_list); + + if (symbol_conf.cumulate_callchain) { + list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list; + if (list_empty(list)) + list_add(list, &perf_hpp__sort_list); + } } void perf_hpp__column_register(struct perf_hpp_fmt *format) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 78409f95d012..efd73e489027 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -228,6 +228,7 @@ enum { PERF_HPP__OVERHEAD_US, PERF_HPP__OVERHEAD_GUEST_SYS, PERF_HPP__OVERHEAD_GUEST_US, + PERF_HPP__OVERHEAD_ACC, PERF_HPP__SAMPLES, PERF_HPP__PERIOD, @@ -254,6 +255,9 @@ typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...); int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, hpp_field_fn get_field, const char *fmt, hpp_snprint_fn print_fn, bool fmt_percent); +int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he, + hpp_field_fn get_field, const char *fmt, + hpp_snprint_fn print_fn, bool fmt_percent); static inline void advance_hpp(struct perf_hpp *hpp, int inc) { diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 901b9bece2ee..9da8931d2394 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1061,6 +1061,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__OVERHEAD_US, "overhead_us"), DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"), DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"), + DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"), DIM(PERF_HPP__SAMPLES, "sample"), DIM(PERF_HPP__PERIOD, "period"), }; -- cgit v1.2.3 From 0434ddd21466a61cfc539ffc3a4cb3bdc67d82ec Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 30 Oct 2013 16:12:59 +0900 Subject: perf ui/browser: Add support to accumulated hist stat Print accumulated stat of a hist entry if requested. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-12-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/browsers/hists.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 1c331b934ffc..2dcbe3d15a5f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -651,13 +651,36 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\ __hpp__slsmg_color_printf, true); \ } +#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +static u64 __hpp_get_acc_##_field(struct hist_entry *he) \ +{ \ + return he->stat_acc->_field; \ +} \ + \ +static int \ +hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\ + struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + if (!symbol_conf.cumulate_callchain) { \ + int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A"); \ + slsmg_printf("%s", hpp->buf); \ + \ + return ret; \ + } \ + return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%", \ + __hpp__slsmg_color_printf, true); \ +} + __HPP_COLOR_PERCENT_FN(overhead, period) __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) __HPP_COLOR_PERCENT_FN(overhead_us, period_us) __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) +__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period) #undef __HPP_COLOR_PERCENT_FN +#undef __HPP_COLOR_ACC_PERCENT_FN void hist_browser__init_hpp(void) { @@ -671,6 +694,8 @@ void hist_browser__init_hpp(void) hist_browser__hpp_color_overhead_guest_sys; perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = hist_browser__hpp_color_overhead_guest_us; + perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = + hist_browser__hpp_color_overhead_acc; } static int hist_browser__show_entry(struct hist_browser *browser, -- cgit v1.2.3 From b09955b2a3d5fd02ed31d279f8c0ac29b32abe83 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 30 Oct 2013 16:15:23 +0900 Subject: perf ui/gtk: Add support to accumulated hist stat Print accumulated stat of a hist entry if requested. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-13-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/gtk/hists.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 9d90683914d4..7e5da4af98d8 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -47,11 +47,26 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, __percent_color_snprintf, true); \ } +#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +static u64 he_get_acc_##_field(struct hist_entry *he) \ +{ \ + return he->stat_acc->_field; \ +} \ + \ +static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \ + __percent_color_snprintf, true); \ +} + __HPP_COLOR_PERCENT_FN(overhead, period) __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) __HPP_COLOR_PERCENT_FN(overhead_us, period_us) __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) +__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period) #undef __HPP_COLOR_PERCENT_FN @@ -68,6 +83,8 @@ void perf_gtk__init_hpp(void) perf_gtk__hpp_color_overhead_guest_sys; perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = perf_gtk__hpp_color_overhead_guest_us; + perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = + perf_gtk__hpp_color_overhead_acc; } static void callchain_list__sym_name(struct callchain_list *cl, -- cgit v1.2.3 From 14135663f1d770bb057f8bf345e5436c985eb29c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 Oct 2013 10:17:39 +0900 Subject: perf tools: Apply percent-limit to cumulative percentage If -g cumulative option is given, it needs to show entries which don't have self overhead. So apply percent-limit to accumulated overhead percentage in this case. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-14-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/browsers/hists.c | 40 +++++++++++----------------------------- tools/perf/ui/gtk/hists.c | 6 ++---- tools/perf/ui/stdio/hist.c | 4 ++-- tools/perf/util/sort.h | 17 ++++++++++++++++- 4 files changed, 31 insertions(+), 36 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 2dcbe3d15a5f..5905acde5f1d 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -37,7 +37,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size, static void hist_browser__update_nr_entries(struct hist_browser *hb); static struct rb_node *hists__filter_entries(struct rb_node *nd, - struct hists *hists, float min_pcnt); static bool hist_browser__has_filter(struct hist_browser *hb) @@ -319,7 +318,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) struct hists *hists = browser->hists; for (nd = rb_first(&hists->entries); - (nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL; + (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; nd = rb_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); hist_entry__set_folding(he, unfold); @@ -808,15 +807,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) for (nd = browser->top; nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - u64 total = hists__total_period(h->hists); - float percent = 0.0; + float percent; if (h->filtered) continue; - if (total) - percent = h->stat.period * 100.0 / total; - + percent = hist_entry__get_percent_limit(h); if (percent < hb->min_pcnt) continue; @@ -829,16 +825,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) } static struct rb_node *hists__filter_entries(struct rb_node *nd, - struct hists *hists, float min_pcnt) { while (nd != NULL) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - u64 total = hists__total_period(hists); - float percent = 0.0; - - if (total) - percent = h->stat.period * 100.0 / total; + float percent = hist_entry__get_percent_limit(h); if (!h->filtered && percent >= min_pcnt) return nd; @@ -850,16 +841,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, } static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, - struct hists *hists, float min_pcnt) { while (nd != NULL) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - u64 total = hists__total_period(hists); - float percent = 0.0; - - if (total) - percent = h->stat.period * 100.0 / total; + float percent = hist_entry__get_percent_limit(h); if (!h->filtered && percent >= min_pcnt) return nd; @@ -888,14 +874,14 @@ static void ui_browser__hists_seek(struct ui_browser *browser, switch (whence) { case SEEK_SET: nd = hists__filter_entries(rb_first(browser->entries), - hb->hists, hb->min_pcnt); + hb->min_pcnt); break; case SEEK_CUR: nd = browser->top; goto do_offset; case SEEK_END: nd = hists__filter_prev_entries(rb_last(browser->entries), - hb->hists, hb->min_pcnt); + hb->min_pcnt); first = false; break; default: @@ -938,8 +924,7 @@ do_offset: break; } } - nd = hists__filter_entries(rb_next(nd), hb->hists, - hb->min_pcnt); + nd = hists__filter_entries(rb_next(nd), hb->min_pcnt); if (nd == NULL) break; --offset; @@ -972,7 +957,7 @@ do_offset: } } - nd = hists__filter_prev_entries(rb_prev(nd), hb->hists, + nd = hists__filter_prev_entries(rb_prev(nd), hb->min_pcnt); if (nd == NULL) break; @@ -1151,7 +1136,6 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) { struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), - browser->hists, browser->min_pcnt); int printed = 0; @@ -1159,8 +1143,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); printed += hist_browser__fprintf_entry(browser, h, fp); - nd = hists__filter_entries(rb_next(nd), browser->hists, - browser->min_pcnt); + nd = hists__filter_entries(rb_next(nd), browser->min_pcnt); } return printed; @@ -1397,8 +1380,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) return; } - while ((nd = hists__filter_entries(nd, hb->hists, - hb->min_pcnt)) != NULL) { + while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { nr_entries++; nd = rb_next(nd); } diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 7e5da4af98d8..03d6812d25dd 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -226,14 +226,12 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); GtkTreeIter iter; u64 total = hists__total_period(h->hists); - float percent = 0.0; + float percent; if (h->filtered) continue; - if (total) - percent = h->stat.period * 100.0 / total; - + percent = hist_entry__get_percent_limit(h); if (percent < min_pcnt) continue; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 9f57991025a9..475d2f5c7e16 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -461,12 +461,12 @@ print_entries: for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - float percent = h->stat.period * 100.0 / - hists->stats.total_period; + float percent; if (h->filtered) continue; + percent = hist_entry__get_percent_limit(h); if (percent < min_pcnt) continue; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index c9ffa031becd..426b873e16ff 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -20,7 +20,7 @@ #include "parse-options.h" #include "parse-events.h" - +#include "hist.h" #include "thread.h" extern regex_t parent_regex; @@ -131,6 +131,21 @@ static inline void hist_entry__add_pair(struct hist_entry *pair, list_add_tail(&pair->pairs.node, &he->pairs.head); } +static inline float hist_entry__get_percent_limit(struct hist_entry *he) +{ + u64 period = he->stat.period; + u64 total_period = hists__total_period(he->hists); + + if (unlikely(total_period == 0)) + return 0; + + if (symbol_conf.cumulate_callchain) + period = he->stat_acc->period; + + return period * 100.0 / total_period; +} + + enum sort_mode { SORT_MODE__NORMAL, SORT_MODE__BRANCH, -- cgit v1.2.3 From 77284de326e6d8c3b8e866cda5b415c86b522e61 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 16 Dec 2013 16:55:13 +0900 Subject: perf tools: Add more hpp helper functions Sometimes it needs to disable some columns at runtime. Add help functions to support that. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-15-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/hist.c | 17 +++++++++++++++++ tools/perf/util/hist.h | 4 ++++ 2 files changed, 21 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 0ce3e79b2ca7..8ca638754acc 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -489,6 +489,11 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format) list_add_tail(&format->list, &perf_hpp__list); } +void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +{ + list_del(&format->list); +} + void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) { list_add_tail(&format->sort_list, &perf_hpp__sort_list); @@ -500,6 +505,18 @@ void perf_hpp__column_enable(unsigned col) perf_hpp__column_register(&perf_hpp__format[col]); } +void perf_hpp__column_disable(unsigned col) +{ + BUG_ON(col >= PERF_HPP__MAX_INDEX); + perf_hpp__column_unregister(&perf_hpp__format[col]); +} + +void perf_hpp__cancel_cumulate(void) +{ + perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC); + perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead; +} + void perf_hpp__setup_output_field(void) { struct perf_hpp_fmt *fmt; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index efd73e489027..99ad3cb433fb 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -237,7 +237,11 @@ enum { void perf_hpp__init(void); void perf_hpp__column_register(struct perf_hpp_fmt *format); +void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__column_enable(unsigned col); +void perf_hpp__column_disable(unsigned col); +void perf_hpp__cancel_cumulate(void); + void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); void perf_hpp__setup_output_field(void); void perf_hpp__reset_output_field(void); -- cgit v1.2.3 From 793aaaabb79803a0154fc6a98c472a29bb6d5cc9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 30 Oct 2013 17:05:55 +0900 Subject: perf report: Add --children option The --children option is for showing accumulated overhead (period) value as well as self overhead. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-16-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/Documentation/perf-report.txt | 7 ++++++- tools/perf/builtin-report.c | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index a1b5185402d5..cefdf430d1b4 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -111,7 +111,7 @@ OPTIONS --fields=:: Specify output field - multiple keys can be specified in CSV format. Following fields are available: - overhead, overhead_sys, overhead_us, sample and period. + overhead, overhead_sys, overhead_us, overhead_children, sample and period. Also it can contain any sort key(s). By default, every sort keys not specified in -F will be appended @@ -163,6 +163,11 @@ OPTIONS Default: fractal,0.5,callee,function. +--children:: + Accumulate callchain of children to parent entry so that then can + show up in the output. The output will have a new "Children" column + and will be sorted on the data. It requires callchains are recorded. + --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. This is a trade-off diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e8fa9fea341f..f27a8aad6a3f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -185,6 +185,14 @@ static int report__setup_sample_type(struct report *rep) } } + if (symbol_conf.cumulate_callchain) { + /* Silently ignore if callchain is missing */ + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { + symbol_conf.cumulate_callchain = false; + perf_hpp__cancel_cumulate(); + } + } + if (sort__mode == SORT_MODE__BRANCH) { if (!is_pipe && !(sample_type & PERF_SAMPLE_BRANCH_STACK)) { @@ -568,6 +576,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), + OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, + "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &report.max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " @@ -660,8 +670,10 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); - if (branch_mode == -1 && has_br_stack) + if (branch_mode == -1 && has_br_stack) { sort__mode = SORT_MODE__BRANCH; + symbol_conf.cumulate_callchain = false; + } if (report.mem_mode) { if (sort__mode == SORT_MODE__BRANCH) { @@ -669,6 +681,7 @@ repeat: goto error; } sort__mode = SORT_MODE__MEMORY; + symbol_conf.cumulate_callchain = false; } if (setup_sorting() < 0) { -- cgit v1.2.3 From 8d8e645ceafd726b8317949f899e4b3acfb20d29 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 22 Jan 2013 18:09:46 +0900 Subject: perf report: Add report.children config option Add report.children config option for setting default value of callchain accumulation. It affects the report output only if perf.data contains callchain info. A user can write .perfconfig file like below to enable accumulation by default: $ cat ~/.perfconfig [report] children = true And it can be disabled through command line: $ perf report --no-children Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-17-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-report.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f27a8aad6a3f..6cac509212ee 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb) rep->min_percent = strtof(value, NULL); return 0; } + if (!strcmp(var, "report.children")) { + symbol_conf.cumulate_callchain = perf_config_bool(var, value); + return 0; + } return perf_default_config(var, value, cb); } -- cgit v1.2.3 From 2bf1a12340bda1bf621f27b9892094a51b1297fd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Mar 2014 09:10:29 +0900 Subject: perf tools: Do not auto-remove Children column if --fields given Depending on the configuration perf inserts/removes the Children column in the output automatically. But it might not be what user wants if [s]he give --fields option explicitly. Signed-off-by: Namhyung Kim Tested-by: Rodrigo Campos Cc: Arun Sharma Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-18-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/hist.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 8ca638754acc..498adb23c02e 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -513,6 +513,9 @@ void perf_hpp__column_disable(unsigned col) void perf_hpp__cancel_cumulate(void) { + if (field_order) + return; + perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC); perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead; } -- cgit v1.2.3 From 9d3c02d7188866299eebe3c4a652c08140a71f40 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 7 Jan 2014 17:02:25 +0900 Subject: perf tools: Add callback function to hist_entry_iter The new ->add_entry_cb() will be called after an entry was added to the histogram. It's used for code sharing between perf report and perf top. Note that ops->add_*_entry() should set iter->he properly in order to call the ->add_entry_cb. Also pass @arg to the callback function. It'll be used by perf top later. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/87k393g999.fsf@sejong.aot.lge.com Signed-off-by: Jiri Olsa --- tools/perf/builtin-report.c | 61 ++++++++++++++++++++++++++++++++----- tools/perf/tests/hists_filter.c | 2 +- tools/perf/tests/hists_output.c | 2 +- tools/perf/util/hist.c | 67 +++++++++++++++-------------------------- tools/perf/util/hist.h | 5 ++- 5 files changed, 84 insertions(+), 53 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 6cac509212ee..21d830bafff3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -80,14 +80,59 @@ static int report__config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } -static void report__inc_stats(struct report *rep, - struct hist_entry *he __maybe_unused) +static void report__inc_stats(struct report *rep, struct hist_entry *he) { /* - * We cannot access @he at this time. Just assume it's a new entry. - * It'll be fixed once we have a callback mechanism in hist_iter. + * The @he is either of a newly created one or an existing one + * merging current sample. We only want to count a new one so + * checking ->nr_events being 1. */ - rep->nr_entries++; + if (he->stat.nr_events == 1) + rep->nr_entries++; +} + +static int hist_iter__report_callback(struct hist_entry_iter *iter, + struct addr_location *al, bool single, + void *arg) +{ + int err = 0; + struct report *rep = arg; + struct hist_entry *he = iter->he; + struct perf_evsel *evsel = iter->evsel; + struct mem_info *mi; + struct branch_info *bi; + + report__inc_stats(rep, he); + + if (!ui__has_annotation()) + return 0; + + if (sort__mode == SORT_MODE__BRANCH) { + bi = he->branch_info; + err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); + if (err) + goto out; + + err = addr_map_symbol__inc_samples(&bi->to, evsel->idx); + + } else if (rep->mem_mode) { + mi = he->mem_info; + err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx); + if (err) + goto out; + + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + + } else if (symbol_conf.cumulate_callchain) { + if (single) + err = hist_entry__inc_addr_samples(he, evsel->idx, + al->addr); + } else { + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + } + +out: + return err; } static int process_sample_event(struct perf_tool *tool, @@ -100,6 +145,7 @@ static int process_sample_event(struct perf_tool *tool, struct addr_location al; struct hist_entry_iter iter = { .hide_unresolved = rep->hide_unresolved, + .add_entry_cb = hist_iter__report_callback, }; int ret; @@ -127,9 +173,8 @@ static int process_sample_event(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - report__inc_stats(rep, NULL); - - ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack); + ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack, + rep); if (ret < 0) pr_debug("problem adding hist entry, skipping event\n"); diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 76b02e1de701..3539403bbad4 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -82,7 +82,7 @@ static int add_hist_entries(struct perf_evlist *evlist, goto out; if (hist_entry_iter__add(&iter, &al, evsel, &sample, - PERF_MAX_STACK_DEPTH) < 0) + PERF_MAX_STACK_DEPTH, NULL) < 0) goto out; fake_samples[i].thread = al.thread; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 1308f88a9169..d40461ecd210 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -71,7 +71,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) goto out; if (hist_entry_iter__add(&iter, &al, evsel, &sample, - PERF_MAX_STACK_DEPTH) < 0) + PERF_MAX_STACK_DEPTH, NULL) < 0) goto out; fake_samples[i].thread = al.thread; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index c6f5f5251aad..5a0a4b2cadc4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -517,27 +517,16 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al } static int -iter_finish_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) +iter_finish_mem_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) { struct perf_evsel *evsel = iter->evsel; struct hist_entry *he = iter->he; - struct mem_info *mx; int err = -EINVAL; if (he == NULL) goto out; - if (ui__has_annotation()) { - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - if (err) - goto out; - - mx = he->mem_info; - err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); - if (err) - goto out; - } - hists__inc_nr_samples(&evsel->hists, he->filtered); err = hist_entry__append_callchain(he, iter->sample); @@ -575,6 +564,9 @@ static int iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, struct addr_location *al __maybe_unused) { + /* to avoid calling callback function */ + iter->he = NULL; + return 0; } @@ -599,7 +591,7 @@ iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) static int iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) { - struct branch_info *bi, *bx; + struct branch_info *bi; struct perf_evsel *evsel = iter->evsel; struct hist_entry *he = NULL; int i = iter->curr; @@ -619,17 +611,6 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a if (he == NULL) return -ENOMEM; - if (ui__has_annotation()) { - bx = he->branch_info; - err = addr_map_symbol__inc_samples(&bx->from, evsel->idx); - if (err) - goto out; - - err = addr_map_symbol__inc_samples(&bx->to, evsel->idx); - if (err) - goto out; - } - hists__inc_nr_samples(&evsel->hists, he->filtered); out: @@ -673,9 +654,9 @@ iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location } static int -iter_finish_normal_entry(struct hist_entry_iter *iter, struct addr_location *al) +iter_finish_normal_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) { - int err; struct hist_entry *he = iter->he; struct perf_evsel *evsel = iter->evsel; struct perf_sample *sample = iter->sample; @@ -685,12 +666,6 @@ iter_finish_normal_entry(struct hist_entry_iter *iter, struct addr_location *al) iter->he = NULL; - if (ui__has_annotation()) { - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - if (err) - return err; - } - hists__inc_nr_samples(&evsel->hists, he->filtered); return hist_entry__append_callchain(he, sample); @@ -746,13 +721,6 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, */ callchain_cursor_commit(&callchain_cursor); - /* - * The iter->he will be over-written after ->add_next_entry() - * called so inc stats for the original entry now. - */ - if (ui__has_annotation()) - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - hists__inc_nr_samples(&evsel->hists, he->filtered); return err; @@ -802,8 +770,11 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, * It's possible that it has cycles or recursive calls. */ for (i = 0; i < iter->curr; i++) { - if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) + if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) { + /* to avoid calling callback function */ + iter->he = NULL; return 0; + } } he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, @@ -863,7 +834,7 @@ const struct hist_iter_ops hist_iter_cumulative = { int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, struct perf_evsel *evsel, struct perf_sample *sample, - int max_stack_depth) + int max_stack_depth, void *arg) { int err, err2; @@ -883,10 +854,22 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (err) goto out; + if (iter->he && iter->add_entry_cb) { + err = iter->add_entry_cb(iter, al, true, arg); + if (err) + goto out; + } + while (iter->ops->next_entry(iter, al)) { err = iter->ops->add_next_entry(iter, al); if (err) break; + + if (iter->he && iter->add_entry_cb) { + err = iter->add_entry_cb(iter, al, false, arg); + if (err) + goto out; + } } out: diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 99ad3cb433fb..82b28ff98062 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -119,6 +119,9 @@ struct hist_entry_iter { void *priv; const struct hist_iter_ops *ops; + /* user-defined callback function (optional) */ + int (*add_entry_cb)(struct hist_entry_iter *iter, + struct addr_location *al, bool single, void *arg); }; extern const struct hist_iter_ops hist_iter_normal; @@ -135,7 +138,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, bool sample_self); int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, struct perf_evsel *evsel, struct perf_sample *sample, - int max_stack_depth); + int max_stack_depth, void *arg); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); -- cgit v1.2.3 From 7c50391f536ea6ed1e75b0f4d90922a2606da3de Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 7 Jan 2014 17:41:03 +0900 Subject: perf top: Convert to hist_entry_iter Reuse hist_entry_iter__add() function to share the similar code with perf report. Note that it needs to be called with hists.lock so tweak some internal functions not to deadlock or hold the lock too long. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Link: http://lkml.kernel.org/r/1401335910-16832-20-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-top.c | 76 ++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 12e2e1227e47..b1cb5f589ade 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -196,6 +196,12 @@ static void perf_top__record_precise_ip(struct perf_top *top, pthread_mutex_unlock(¬es->lock); + /* + * This function is now called with he->hists->lock held. + * Release it before going to sleep. + */ + pthread_mutex_unlock(&he->hists->lock); + if (err == -ERANGE && !he->ms.map->erange_warned) ui__warn_map_erange(he->ms.map, sym, ip); else if (err == -ENOMEM) { @@ -203,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top, sym->name); sleep(1); } + + pthread_mutex_lock(&he->hists->lock); } static void perf_top__show_details(struct perf_top *top) @@ -238,24 +246,6 @@ out_unlock: pthread_mutex_unlock(¬es->lock); } -static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, - struct addr_location *al, - struct perf_sample *sample) -{ - struct hist_entry *he; - - pthread_mutex_lock(&evsel->hists.lock); - he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, - sample->period, sample->weight, - sample->transaction, true); - pthread_mutex_unlock(&evsel->hists.lock); - if (he == NULL) - return NULL; - - hists__inc_nr_samples(&evsel->hists, he->filtered); - return he; -} - static void perf_top__print_sym_table(struct perf_top *top) { char bf[160]; @@ -659,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym) return 0; } +static int hist_iter__top_callback(struct hist_entry_iter *iter, + struct addr_location *al, bool single, + void *arg) +{ + struct perf_top *top = arg; + struct hist_entry *he = iter->he; + struct perf_evsel *evsel = iter->evsel; + + if (sort__has_sym && single) { + u64 ip = al->addr; + + if (al->map) + ip = al->map->unmap_ip(al->map, ip); + + perf_top__record_precise_ip(top, he, evsel->idx, ip); + } + + return 0; +} + static void perf_event__process_sample(struct perf_tool *tool, const union perf_event *event, struct perf_evsel *evsel, @@ -666,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool, struct machine *machine) { struct perf_top *top = container_of(tool, struct perf_top, tool); - struct symbol *parent = NULL; - u64 ip = sample->ip; struct addr_location al; int err; @@ -742,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool, } if (al.sym == NULL || !al.sym->ignore) { - struct hist_entry *he; + struct hist_entry_iter iter = { + .add_entry_cb = hist_iter__top_callback, + }; - err = sample__resolve_callchain(sample, &parent, evsel, &al, - top->max_stack); - if (err) - return; + if (symbol_conf.cumulate_callchain) + iter.ops = &hist_iter_cumulative; + else + iter.ops = &hist_iter_normal; - he = perf_evsel__add_hist_entry(evsel, &al, sample); - if (he == NULL) { - pr_err("Problem incrementing symbol period, skipping event\n"); - return; - } + pthread_mutex_lock(&evsel->hists.lock); - err = hist_entry__append_callchain(he, sample); - if (err) - return; + err = hist_entry_iter__add(&iter, &al, evsel, sample, + top->max_stack, top); + if (err < 0) + pr_err("Problem incrementing symbol period, skipping event\n"); - if (sort__has_sym) - perf_top__record_precise_ip(top, he, evsel->idx, ip); + pthread_mutex_unlock(&evsel->hists.lock); } return; -- cgit v1.2.3 From 1432ec342ece6a7ef78825ae3a9ba1c91686f71d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 30 Oct 2013 17:05:55 +0900 Subject: perf top: Add --children option The --children option is for showing accumulated overhead (period) value as well as self overhead. It should be used with one of -g or --call-graph option. Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-21-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/Documentation/perf-top.txt | 8 +++++++- tools/perf/builtin-top.c | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index dcfa54c851e9..180ae02137a5 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -119,7 +119,7 @@ Default is to monitor all CPUS. --fields=:: Specify output field - multiple keys can be specified in CSV format. Following fields are available: - overhead, overhead_sys, overhead_us, sample and period. + overhead, overhead_sys, overhead_us, overhead_children, sample and period. Also it can contain any sort key(s). By default, every sort keys not specified in --field will be appended @@ -161,6 +161,12 @@ Default is to monitor all CPUS. Setup and enable call-graph (stack chain/backtrace) recording, implies -g. +--children:: + Accumulate callchain of children to parent entry so that then can + show up in the output. The output will have a new "Children" column + and will be sorted on the data. It requires -g/--call-graph option + enabled. + --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. This is a trade-off diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b1cb5f589ade..fea55e3fc931 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1098,6 +1098,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK(0, "call-graph", &top.record_opts, "mode[,dump_size]", record_callchain_help, &parse_callchain_opt), + OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, + "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &top.max_stack, "Set the maximum stack depth when parsing the callchain. " "Default: " __stringify(PERF_MAX_STACK_DEPTH)), @@ -1203,6 +1205,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) top.sym_evsel = perf_evlist__first(top.evlist); + if (!symbol_conf.use_callchain) { + symbol_conf.cumulate_callchain = false; + perf_hpp__cancel_cumulate(); + } + symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); -- cgit v1.2.3 From 104ac991bd821773cba6f262f97a4a752ed76dd5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 22 Jan 2013 18:09:46 +0900 Subject: perf top: Add top.children config option Add top.children config option for setting default value of callchain accumulation. It affects the output only if one of -g or --call-graph option is given as well. A user can write .perfconfig file like below to enable accumulation by default: $ cat ~/.perfconfig [top] children = true And it can be disabled through command line: $ perf top --no-children Signed-off-by: Namhyung Kim Tested-by: Arun Sharma Tested-by: Rodrigo Campos Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-22-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-top.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fea55e3fc931..377971dc89a3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1004,6 +1004,10 @@ static int perf_top_config(const char *var, const char *value, void *cb) if (!strcmp(var, "top.call-graph")) return record_parse_callchain(value, &top->record_opts); + if (!strcmp(var, "top.children")) { + symbol_conf.cumulate_callchain = perf_config_bool(var, value); + return 0; + } return perf_default_config(var, value, cb); } -- cgit v1.2.3 From e511db5e94f056083e821aa3ab74b03ad1216e14 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 24 Dec 2013 16:19:25 +0900 Subject: perf tools: Enable --children option by default Now perf top and perf report will show children column by default if it has callchain information. Requested-by: Ingo Molnar Signed-off-by: Namhyung Kim Tested-by: Rodrigo Campos Tested-by: Arun Sharma Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-23-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/symbol.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 95e249779931..7b9096f29cdb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -29,11 +29,12 @@ int vmlinux_path__nr_entries; char **vmlinux_path; struct symbol_conf symbol_conf = { - .use_modules = true, - .try_vmlinux_path = true, - .annotate_src = true, - .demangle = true, - .symfs = "", + .use_modules = true, + .try_vmlinux_path = true, + .annotate_src = true, + .demangle = true, + .cumulate_callchain = true, + .symfs = "", }; static enum dso_binary_type binary_type_symtab[] = { -- cgit v1.2.3 From 56772ad4750e23460a4b80f7ece5377d8c922ee1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 23 May 2014 18:31:52 +0900 Subject: perf ui/stdio: Fix invalid percentage value of cumulated hist entries On stdio, there's a problem that it shows invalid values for callchains in cumulated hist entries. It's because it only cares about the self period. But with --children behavior, we always add callchain info to the cumulated entries so it should use the value in that case. Before: # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ................ # 61.22% 0.32% swapper [kernel.kallsyms] [k] cpu_idle | --- cpu_idle | |--16530.76%-- start_secondary | |--2758.70%-- rest_init | start_kernel | x86_64_start_reservations | x86_64_start_kernel --6837850969203030.00%-- [...] After: # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ................ # 61.22% 0.32% swapper [kernel.kallsyms] [k] cpu_idle | --- cpu_idle | |--85.70%-- start_secondary | --14.30%-- rest_init start_kernel x86_64_start_reservations x86_64_start_kernel Signed-off-by: Namhyung Kim Cc: Arun Sharma Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-24-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/stdio/hist.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 475d2f5c7e16..90122abd3721 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -271,7 +271,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, { switch (callchain_param.mode) { case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period, + return callchain__fprintf_graph(fp, &he->sorted_chain, + symbol_conf.cumulate_callchain ? + he->stat_acc->period : he->stat.period, left_margin); break; case CHAIN_GRAPH_ABS: -- cgit v1.2.3 From e4cf6f886f3158061fb589df9ed452f9b30f67f1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 23 May 2014 18:49:33 +0900 Subject: perf ui/gtk: Fix callchain display With current output field change, GTK browser cannot display callchain information correctly since it couldn't determine where the symbol column is. This is a problem - just for now I changed to use the last column since it'll work for most cases. Also it has a same problem of the percentage as stdio code. Signed-off-by: Namhyung Kim Cc: Arun Sharma Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-25-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/ui/gtk/hists.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 03d6812d25dd..6ca60e482cdc 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -198,6 +198,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, if (perf_hpp__should_skip(fmt)) continue; + /* + * XXX no way to determine where symcol column is.. + * Just use last column for now. + */ + if (perf_hpp__is_sort_entry(fmt)) + sym_col = col_idx; + fmt->header(fmt, &hpp, hists_to_evsel(hists)); gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), @@ -253,7 +260,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, if (symbol_conf.use_callchain && sort__has_sym) { if (callchain_param.mode == CHAIN_GRAPH_REL) - total = h->stat.period; + total = symbol_conf.cumulate_callchain ? + h->stat_acc->period : h->stat.period; perf_gtk__add_callchain(&h->sorted_chain, store, &iter, sym_col, total); -- cgit v1.2.3 From d69b2962a0aebd431cdda939f4418dd606e2f77e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 23 May 2014 10:59:01 +0900 Subject: perf tools: Reset output/sort order to default When reset_output_field() is called, also reset field/sort order to NULL so that it can have the default values. It's needed for testing. Signed-off-by: Namhyung Kim CC: Arun Sharma Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-26-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/sort.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 9da8931d2394..254f583a52ab 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1582,6 +1582,9 @@ void reset_output_field(void) sort__has_sym = 0; sort__has_dso = 0; + field_order = NULL; + sort_order = NULL; + reset_dimensions(); perf_hpp__reset_output_field(); } -- cgit v1.2.3 From a1891aa4805fa77d98db44ec6e1d93e2921828fb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 23 May 2014 14:59:57 +0900 Subject: perf tests: Define and use symbolic names for fake symbols In various histogram test cases, fake symbols are used as raw numbers. Define macros for each pid, map, symbols so that it can increase readability somewhat. Signed-off-by: Namhyung Kim Cc: Arun Sharma Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-27-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/tests/hists_common.c | 47 ++++++++++++++++++++++------------------- tools/perf/tests/hists_common.h | 32 ++++++++++++++++++++++++++-- tools/perf/tests/hists_filter.c | 23 ++++++++++---------- tools/perf/tests/hists_link.c | 32 ++++++++++++++-------------- tools/perf/tests/hists_output.c | 20 +++++++++--------- 5 files changed, 92 insertions(+), 62 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index e4e01aadc3be..e4e120d3c16f 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -12,9 +12,9 @@ static struct { u32 pid; const char *comm; } fake_threads[] = { - { 100, "perf" }, - { 200, "perf" }, - { 300, "bash" }, + { FAKE_PID_PERF1, "perf" }, + { FAKE_PID_PERF2, "perf" }, + { FAKE_PID_BASH, "bash" }, }; static struct { @@ -22,15 +22,15 @@ static struct { u64 start; const char *filename; } fake_mmap_info[] = { - { 100, 0x40000, "perf" }, - { 100, 0x50000, "libc" }, - { 100, 0xf0000, "[kernel]" }, - { 200, 0x40000, "perf" }, - { 200, 0x50000, "libc" }, - { 200, 0xf0000, "[kernel]" }, - { 300, 0x40000, "bash" }, - { 300, 0x50000, "libc" }, - { 300, 0xf0000, "[kernel]" }, + { FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" }, + { FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" }, + { FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" }, + { FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" }, + { FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" }, + { FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" }, + { FAKE_PID_BASH, FAKE_MAP_BASH, "bash" }, + { FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" }, + { FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" }, }; struct fake_sym { @@ -40,27 +40,30 @@ struct fake_sym { }; static struct fake_sym perf_syms[] = { - { 700, 100, "main" }, - { 800, 100, "run_command" }, - { 900, 100, "cmd_record" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" }, }; static struct fake_sym bash_syms[] = { - { 700, 100, "main" }, - { 800, 100, "xmalloc" }, - { 900, 100, "xfree" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" }, }; static struct fake_sym libc_syms[] = { { 700, 100, "malloc" }, { 800, 100, "free" }, { 900, 100, "realloc" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" }, }; static struct fake_sym kernel_syms[] = { - { 700, 100, "schedule" }, - { 800, 100, "page_fault" }, - { 900, 100, "sys_perf_event_open" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" }, }; static struct { @@ -102,7 +105,7 @@ struct machine *setup_fake_machine(struct machines *machines) .pid = fake_mmap_info[i].pid, .tid = fake_mmap_info[i].pid, .start = fake_mmap_info[i].start, - .len = 0x1000ULL, + .len = FAKE_MAP_LENGTH, .pgoff = 0ULL, }, }; diff --git a/tools/perf/tests/hists_common.h b/tools/perf/tests/hists_common.h index 1415ae69d7b6..888254e8665c 100644 --- a/tools/perf/tests/hists_common.h +++ b/tools/perf/tests/hists_common.h @@ -4,6 +4,34 @@ struct machine; struct machines; +#define FAKE_PID_PERF1 100 +#define FAKE_PID_PERF2 200 +#define FAKE_PID_BASH 300 + +#define FAKE_MAP_PERF 0x400000 +#define FAKE_MAP_BASH 0x400000 +#define FAKE_MAP_LIBC 0x500000 +#define FAKE_MAP_KERNEL 0xf00000 +#define FAKE_MAP_LENGTH 0x100000 + +#define FAKE_SYM_OFFSET1 700 +#define FAKE_SYM_OFFSET2 800 +#define FAKE_SYM_OFFSET3 900 +#define FAKE_SYM_LENGTH 100 + +#define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1 +#define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2 +#define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3 +#define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1 +#define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2 +#define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3 +#define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1 +#define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2 +#define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3 +#define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1 +#define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2 +#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3 + /* * The setup_fake_machine() provides a test environment which consists * of 3 processes that have 3 mappings and in turn, have 3 symbols @@ -13,7 +41,7 @@ struct machines; * ............. ............. ................... * perf: 100 perf main * perf: 100 perf run_command - * perf: 100 perf comd_record + * perf: 100 perf cmd_record * perf: 100 libc malloc * perf: 100 libc free * perf: 100 libc realloc @@ -22,7 +50,7 @@ struct machines; * perf: 100 [kernel] sys_perf_event_open * perf: 200 perf main * perf: 200 perf run_command - * perf: 200 perf comd_record + * perf: 200 perf cmd_record * perf: 200 libc malloc * perf: 200 libc free * perf: 200 libc realloc diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 3539403bbad4..821f581fd930 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -21,25 +21,25 @@ struct sample { /* For the numbers, see hists_common.c */ static struct sample fake_samples[] = { /* perf [kernel] schedule() */ - { .pid = 100, .ip = 0xf0000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, /* perf [perf] main() */ - { .pid = 100, .ip = 0x40000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, /* perf [libc] malloc() */ - { .pid = 100, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, /* perf [perf] main() */ - { .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */ /* perf [perf] cmd_record() */ - { .pid = 200, .ip = 0x40000 + 900, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, /* perf [kernel] page_fault() */ - { .pid = 200, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, /* bash [bash] main() */ - { .pid = 300, .ip = 0x40000 + 700, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, /* bash [bash] xmalloc() */ - { .pid = 300, .ip = 0x40000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, /* bash [libc] malloc() */ - { .pid = 300, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* bash [kernel] page_fault() */ - { .pid = 300, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, }; static int add_hist_entries(struct perf_evlist *evlist, @@ -47,7 +47,7 @@ static int add_hist_entries(struct perf_evlist *evlist, { struct perf_evsel *evsel; struct addr_location al; - struct perf_sample sample = { .cpu = 0, }; + struct perf_sample sample = { .period = 100, }; size_t i; /* @@ -75,7 +75,6 @@ static int add_hist_entries(struct perf_evlist *evlist, sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; - sample.period = 100; if (perf_event__preprocess_sample(&event, machine, &al, &sample) < 0) diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index ca6693b37cd7..d4b34b0f50a2 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -21,41 +21,41 @@ struct sample { /* For the numbers, see hists_common.c */ static struct sample fake_common_samples[] = { /* perf [kernel] schedule() */ - { .pid = 100, .ip = 0xf0000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, /* perf [perf] main() */ - { .pid = 200, .ip = 0x40000 + 700, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* perf [perf] cmd_record() */ - { .pid = 200, .ip = 0x40000 + 900, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, /* bash [bash] xmalloc() */ - { .pid = 300, .ip = 0x40000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, /* bash [libc] malloc() */ - { .pid = 300, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, }; static struct sample fake_samples[][5] = { { /* perf [perf] run_command() */ - { .pid = 100, .ip = 0x40000 + 800, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, }, /* perf [libc] malloc() */ - { .pid = 100, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, /* perf [kernel] page_fault() */ - { .pid = 100, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, /* perf [kernel] sys_perf_event_open() */ - { .pid = 200, .ip = 0xf0000 + 900, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, }, /* bash [libc] free() */ - { .pid = 300, .ip = 0x50000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, }, }, { /* perf [libc] free() */ - { .pid = 200, .ip = 0x50000 + 800, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, }, /* bash [libc] malloc() */ - { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */ /* bash [bash] xfee() */ - { .pid = 300, .ip = 0x40000 + 900, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, }, /* bash [libc] realloc() */ - { .pid = 300, .ip = 0x50000 + 900, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, }, /* bash [kernel] page_fault() */ - { .pid = 300, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, }, }; @@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) struct perf_evsel *evsel; struct addr_location al; struct hist_entry *he; - struct perf_sample sample = { .cpu = 0, }; + struct perf_sample sample = { .period = 1, }; size_t i = 0, k; /* diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index d40461ecd210..e3bbd6c54c1b 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -22,25 +22,25 @@ struct sample { /* For the numbers, see hists_common.c */ static struct sample fake_samples[] = { /* perf [kernel] schedule() */ - { .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, }, + { .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, /* perf [perf] main() */ - { .cpu = 1, .pid = 100, .ip = 0x40000 + 700, }, + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, /* perf [perf] cmd_record() */ - { .cpu = 1, .pid = 100, .ip = 0x40000 + 900, }, + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, }, /* perf [libc] malloc() */ - { .cpu = 1, .pid = 100, .ip = 0x50000 + 700, }, + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, /* perf [libc] free() */ - { .cpu = 2, .pid = 100, .ip = 0x50000 + 800, }, + { .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, }, /* perf [perf] main() */ - { .cpu = 2, .pid = 200, .ip = 0x40000 + 700, }, + { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* perf [kernel] page_fault() */ - { .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, }, + { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, /* bash [bash] main() */ - { .cpu = 3, .pid = 300, .ip = 0x40000 + 700, }, + { .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, /* bash [bash] xmalloc() */ - { .cpu = 0, .pid = 300, .ip = 0x40000 + 800, }, + { .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, /* bash [kernel] page_fault() */ - { .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, }, + { .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, }; static int add_hist_entries(struct hists *hists, struct machine *machine) -- cgit v1.2.3 From 0506aecce999d4370b979892f88cf1118cfe8dcb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 23 May 2014 18:04:42 +0900 Subject: perf tests: Add a test case for cumulating callchains Now it adds a new testcase to verify --children option working correctly. Signed-off-by: Namhyung Kim Cc: Arun Sharma Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1401335910-16832-28-git-send-email-namhyung@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/Makefile.perf | 1 + tools/perf/tests/builtin-test.c | 4 + tools/perf/tests/hists_common.c | 5 +- tools/perf/tests/hists_cumulate.c | 726 ++++++++++++++++++++++++++++++++++++++ tools/perf/tests/tests.h | 1 + 5 files changed, 735 insertions(+), 2 deletions(-) create mode 100644 tools/perf/tests/hists_cumulate.c (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 02f0a4dd1a80..67f7c0575b26 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o LIB_OBJS += $(OUTPUT)tests/hists_link.o LIB_OBJS += $(OUTPUT)tests/hists_filter.o LIB_OBJS += $(OUTPUT)tests/hists_output.o +LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o LIB_OBJS += $(OUTPUT)tests/python-use.o LIB_OBJS += $(OUTPUT)tests/bp_signal.o LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 831f52cae197..802e3cd50f6f 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -139,6 +139,10 @@ static struct test { .desc = "Test output sorting of hist entries", .func = test__hists_output, }, + { + .desc = "Test cumulation of child hist entries", + .func = test__hists_cumulate, + }, { .func = NULL, }, diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index e4e120d3c16f..a62c09134516 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -196,10 +196,11 @@ void print_hists_out(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); if (!he->filtered) { - pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n", + pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n", i, thread__comm_str(he->thread), he->thread->tid, he->ms.map->dso->short_name, - he->ms.sym->name, he->stat.period); + he->ms.sym->name, he->stat.period, + he->stat_acc ? he->stat_acc->period : 0); } i++; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c new file mode 100644 index 000000000000..0ac240db2e24 --- /dev/null +++ b/tools/perf/tests/hists_cumulate.c @@ -0,0 +1,726 @@ +#include "perf.h" +#include "util/debug.h" +#include "util/symbol.h" +#include "util/sort.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/machine.h" +#include "util/thread.h" +#include "util/parse-events.h" +#include "tests/tests.h" +#include "tests/hists_common.h" + +struct sample { + u32 pid; + u64 ip; + struct thread *thread; + struct map *map; + struct symbol *sym; +}; + +/* For the numbers, see hists_common.c */ +static struct sample fake_samples[] = { + /* perf [kernel] schedule() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, + /* perf [perf] main() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, + /* perf [perf] cmd_record() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, }, + /* perf [libc] malloc() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, + /* perf [libc] free() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, }, + /* perf [perf] main() */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, + /* perf [kernel] page_fault() */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, + /* bash [bash] main() */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, + /* bash [bash] xmalloc() */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, + /* bash [kernel] page_fault() */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, +}; + +/* + * Will be casted to struct ip_callchain which has all 64 bit entries + * of nr and ips[]. + */ +static u64 fake_callchains[][10] = { + /* schedule => run_command => main */ + { 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, + /* main */ + { 1, FAKE_IP_PERF_MAIN, }, + /* cmd_record => run_command => main */ + { 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, + /* malloc => cmd_record => run_command => main */ + { 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, + FAKE_IP_PERF_MAIN, }, + /* free => cmd_record => run_command => main */ + { 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, + FAKE_IP_PERF_MAIN, }, + /* main */ + { 1, FAKE_IP_PERF_MAIN, }, + /* page_fault => sys_perf_event_open => run_command => main */ + { 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, + FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, + /* main */ + { 1, FAKE_IP_BASH_MAIN, }, + /* xmalloc => malloc => xmalloc => malloc => xmalloc => main */ + { 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, + FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, }, + /* page_fault => malloc => main */ + { 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, }, +}; + +static int add_hist_entries(struct hists *hists, struct machine *machine) +{ + struct addr_location al; + struct perf_evsel *evsel = hists_to_evsel(hists); + struct perf_sample sample = { .period = 1000, }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { + const union perf_event event = { + .header = { + .misc = PERF_RECORD_MISC_USER, + }, + }; + struct hist_entry_iter iter = { + .hide_unresolved = false, + }; + + if (symbol_conf.cumulate_callchain) + iter.ops = &hist_iter_cumulative; + else + iter.ops = &hist_iter_normal; + + sample.pid = fake_samples[i].pid; + sample.tid = fake_samples[i].pid; + sample.ip = fake_samples[i].ip; + sample.callchain = (struct ip_callchain *)fake_callchains[i]; + + if (perf_event__preprocess_sample(&event, machine, &al, + &sample) < 0) + goto out; + + if (hist_entry_iter__add(&iter, &al, evsel, &sample, + PERF_MAX_STACK_DEPTH, NULL) < 0) + goto out; + + fake_samples[i].thread = al.thread; + fake_samples[i].map = al.map; + fake_samples[i].sym = al.sym; + } + + return TEST_OK; + +out: + pr_debug("Not enough memory for adding a hist entry\n"); + return TEST_FAIL; +} + +static void del_hist_entries(struct hists *hists) +{ + struct hist_entry *he; + struct rb_root *root_in; + struct rb_root *root_out; + struct rb_node *node; + + if (sort__need_collapse) + root_in = &hists->entries_collapsed; + else + root_in = hists->entries_in; + + root_out = &hists->entries; + + while (!RB_EMPTY_ROOT(root_out)) { + node = rb_first(root_out); + + he = rb_entry(node, struct hist_entry, rb_node); + rb_erase(node, root_out); + rb_erase(&he->rb_node_in, root_in); + hist_entry__free(he); + } +} + +typedef int (*test_fn_t)(struct perf_evsel *, struct machine *); + +#define COMM(he) (thread__comm_str(he->thread)) +#define DSO(he) (he->ms.map->dso->short_name) +#define SYM(he) (he->ms.sym->name) +#define CPU(he) (he->cpu) +#define PID(he) (he->thread->tid) +#define DEPTH(he) (he->callchain->max_depth) +#define CDSO(cl) (cl->ms.map->dso->short_name) +#define CSYM(cl) (cl->ms.sym->name) + +struct result { + u64 children; + u64 self; + const char *comm; + const char *dso; + const char *sym; +}; + +struct callchain_result { + u64 nr; + struct { + const char *dso; + const char *sym; + } node[10]; +}; + +static int do_test(struct hists *hists, struct result *expected, size_t nr_expected, + struct callchain_result *expected_callchain, size_t nr_callchain) +{ + char buf[32]; + size_t i, c; + struct hist_entry *he; + struct rb_root *root; + struct rb_node *node; + struct callchain_node *cnode; + struct callchain_list *clist; + + /* + * adding and deleting hist entries must be done outside of this + * function since TEST_ASSERT_VAL() returns in case of failure. + */ + hists__collapse_resort(hists, NULL); + hists__output_resort(hists); + + if (verbose > 2) { + pr_info("use callchain: %d, cumulate callchain: %d\n", + symbol_conf.use_callchain, + symbol_conf.cumulate_callchain); + print_hists_out(hists); + } + + root = &hists->entries; + for (node = rb_first(root), i = 0; + node && (he = rb_entry(node, struct hist_entry, rb_node)); + node = rb_next(node), i++) { + scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i); + + TEST_ASSERT_VAL("Incorrect number of hist entry", + i < nr_expected); + TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self && + !strcmp(COMM(he), expected[i].comm) && + !strcmp(DSO(he), expected[i].dso) && + !strcmp(SYM(he), expected[i].sym)); + + if (symbol_conf.cumulate_callchain) + TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children); + + if (!symbol_conf.use_callchain) + continue; + + /* check callchain entries */ + root = &he->callchain->node.rb_root; + cnode = rb_entry(rb_first(root), struct callchain_node, rb_node); + + c = 0; + list_for_each_entry(clist, &cnode->val, list) { + scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c); + + TEST_ASSERT_VAL("Incorrect number of callchain entry", + c < expected_callchain[i].nr); + TEST_ASSERT_VAL(buf, + !strcmp(CDSO(clist), expected_callchain[i].node[c].dso) && + !strcmp(CSYM(clist), expected_callchain[i].node[c].sym)); + c++; + } + /* TODO: handle multiple child nodes properly */ + TEST_ASSERT_VAL("Incorrect number of callchain entry", + c <= expected_callchain[i].nr); + } + TEST_ASSERT_VAL("Incorrect number of hist entry", + i == nr_expected); + TEST_ASSERT_VAL("Incorrect number of callchain entry", + !symbol_conf.use_callchain || nr_expected == nr_callchain); + return 0; +} + +/* NO callchain + NO children */ +static int test1(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Overhead Command Shared Object Symbol + * ======== ======= ============= ============== + * 20.00% perf perf [.] main + * 10.00% bash [kernel] [k] page_fault + * 10.00% bash bash [.] main + * 10.00% bash bash [.] xmalloc + * 10.00% perf [kernel] [k] page_fault + * 10.00% perf [kernel] [k] schedule + * 10.00% perf libc [.] free + * 10.00% perf libc [.] malloc + * 10.00% perf perf [.] cmd_record + */ + struct result expected[] = { + { 0, 2000, "perf", "perf", "main" }, + { 0, 1000, "bash", "[kernel]", "page_fault" }, + { 0, 1000, "bash", "bash", "main" }, + { 0, 1000, "bash", "bash", "xmalloc" }, + { 0, 1000, "perf", "[kernel]", "page_fault" }, + { 0, 1000, "perf", "[kernel]", "schedule" }, + { 0, 1000, "perf", "libc", "free" }, + { 0, 1000, "perf", "libc", "malloc" }, + { 0, 1000, "perf", "perf", "cmd_record" }, + }; + + symbol_conf.use_callchain = false; + symbol_conf.cumulate_callchain = false; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +/* callcain + NO children */ +static int test2(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Overhead Command Shared Object Symbol + * ======== ======= ============= ============== + * 20.00% perf perf [.] main + * | + * --- main + * + * 10.00% bash [kernel] [k] page_fault + * | + * --- page_fault + * malloc + * main + * + * 10.00% bash bash [.] main + * | + * --- main + * + * 10.00% bash bash [.] xmalloc + * | + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc + * main + * + * 10.00% perf [kernel] [k] page_fault + * | + * --- page_fault + * sys_perf_event_open + * run_command + * main + * + * 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * + * 10.00% perf libc [.] free + * | + * --- free + * cmd_record + * run_command + * main + * + * 10.00% perf libc [.] malloc + * | + * --- malloc + * cmd_record + * run_command + * main + * + * 10.00% perf perf [.] cmd_record + * | + * --- cmd_record + * run_command + * main + * + */ + struct result expected[] = { + { 0, 2000, "perf", "perf", "main" }, + { 0, 1000, "bash", "[kernel]", "page_fault" }, + { 0, 1000, "bash", "bash", "main" }, + { 0, 1000, "bash", "bash", "xmalloc" }, + { 0, 1000, "perf", "[kernel]", "page_fault" }, + { 0, 1000, "perf", "[kernel]", "schedule" }, + { 0, 1000, "perf", "libc", "free" }, + { 0, 1000, "perf", "libc", "malloc" }, + { 0, 1000, "perf", "perf", "cmd_record" }, + }; + struct callchain_result expected_callchain[] = { + { + 1, { { "perf", "main" }, }, + }, + { + 3, { { "[kernel]", "page_fault" }, + { "libc", "malloc" }, + { "bash", "main" }, }, + }, + { + 1, { { "bash", "main" }, }, + }, + { + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, + }, + { + 4, { { "[kernel]", "page_fault" }, + { "[kernel]", "sys_perf_event_open" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "free" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "malloc" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 3, { { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + }; + + symbol_conf.use_callchain = true; + symbol_conf.cumulate_callchain = false; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), + expected_callchain, ARRAY_SIZE(expected_callchain)); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +/* NO callchain + children */ +static int test3(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Children Self Command Shared Object Symbol + * ======== ======== ======= ============= ======================= + * 70.00% 20.00% perf perf [.] main + * 50.00% 0.00% perf perf [.] run_command + * 30.00% 10.00% bash bash [.] main + * 30.00% 10.00% perf perf [.] cmd_record + * 20.00% 0.00% bash libc [.] malloc + * 10.00% 10.00% bash [kernel] [k] page_fault + * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * 10.00% 10.00% perf [kernel] [k] page_fault + * 10.00% 10.00% perf libc [.] free + * 10.00% 10.00% perf libc [.] malloc + * 10.00% 10.00% bash bash [.] xmalloc + */ + struct result expected[] = { + { 7000, 2000, "perf", "perf", "main" }, + { 5000, 0, "perf", "perf", "run_command" }, + { 3000, 1000, "bash", "bash", "main" }, + { 3000, 1000, "perf", "perf", "cmd_record" }, + { 2000, 0, "bash", "libc", "malloc" }, + { 1000, 1000, "bash", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "libc", "free" }, + { 1000, 1000, "perf", "libc", "malloc" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, + }; + + symbol_conf.use_callchain = false; + symbol_conf.cumulate_callchain = true; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +/* callchain + children */ +static int test4(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Children Self Command Shared Object Symbol + * ======== ======== ======= ============= ======================= + * 70.00% 20.00% perf perf [.] main + * | + * --- main + * + * 50.00% 0.00% perf perf [.] run_command + * | + * --- run_command + * main + * + * 30.00% 10.00% bash bash [.] main + * | + * --- main + * + * 30.00% 10.00% perf perf [.] cmd_record + * | + * --- cmd_record + * run_command + * main + * + * 20.00% 0.00% bash libc [.] malloc + * | + * --- malloc + * | + * |--50.00%-- xmalloc + * | main + * --50.00%-- main + * + * 10.00% 10.00% bash [kernel] [k] page_fault + * | + * --- page_fault + * malloc + * main + * + * 10.00% 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * | + * --- sys_perf_event_open + * run_command + * main + * + * 10.00% 10.00% perf [kernel] [k] page_fault + * | + * --- page_fault + * sys_perf_event_open + * run_command + * main + * + * 10.00% 10.00% perf libc [.] free + * | + * --- free + * cmd_record + * run_command + * main + * + * 10.00% 10.00% perf libc [.] malloc + * | + * --- malloc + * cmd_record + * run_command + * main + * + * 10.00% 10.00% bash bash [.] xmalloc + * | + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc + * main + * + */ + struct result expected[] = { + { 7000, 2000, "perf", "perf", "main" }, + { 5000, 0, "perf", "perf", "run_command" }, + { 3000, 1000, "bash", "bash", "main" }, + { 3000, 1000, "perf", "perf", "cmd_record" }, + { 2000, 0, "bash", "libc", "malloc" }, + { 1000, 1000, "bash", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "libc", "free" }, + { 1000, 1000, "perf", "libc", "malloc" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, + }; + struct callchain_result expected_callchain[] = { + { + 1, { { "perf", "main" }, }, + }, + { + 2, { { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 1, { { "bash", "main" }, }, + }, + { + 3, { { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, + { "bash", "main" }, }, + }, + { + 3, { { "[kernel]", "page_fault" }, + { "libc", "malloc" }, + { "bash", "main" }, }, + }, + { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 3, { { "[kernel]", "sys_perf_event_open" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "[kernel]", "page_fault" }, + { "[kernel]", "sys_perf_event_open" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "free" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "malloc" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, + }, + }; + + symbol_conf.use_callchain = true; + symbol_conf.cumulate_callchain = true; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), + expected_callchain, ARRAY_SIZE(expected_callchain)); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +int test__hists_cumulate(void) +{ + int err = TEST_FAIL; + struct machines machines; + struct machine *machine; + struct perf_evsel *evsel; + struct perf_evlist *evlist = perf_evlist__new(); + size_t i; + test_fn_t testcases[] = { + test1, + test2, + test3, + test4, + }; + + TEST_ASSERT_VAL("No memory", evlist); + + err = parse_events(evlist, "cpu-clock"); + if (err) + goto out; + + machines__init(&machines); + + /* setup threads/dso/map/symbols also */ + machine = setup_fake_machine(&machines); + if (!machine) + goto out; + + if (verbose > 1) + machine__fprintf(machine, stderr); + + evsel = perf_evlist__first(evlist); + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + err = testcases[i](evsel, machine); + if (err < 0) + break; + } + +out: + /* tear down everything */ + perf_evlist__delete(evlist); + machines__exit(&machines); + + return err; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index d76c0e2e6635..022bb68fd9c7 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -45,6 +45,7 @@ int test__hists_filter(void); int test__mmap_thread_lookup(void); int test__thread_mg_share(void); int test__hists_output(void); +int test__hists_cumulate(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #ifdef HAVE_DWARF_UNWIND_SUPPORT -- cgit v1.2.3 From 1b5726220fe16ac38eb2db43e7bc82e69f449fca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 May 2014 16:02:29 -0300 Subject: perf trace: Warn the user when not available When the audit-libs devel package is not found at build time we disable the 'trace' command, as we are not able to map syscall numbers to strings, but then the message the user is presented is cryptic: [root@zoo linux]# trace ls perf: 'ls' is not a perf-command. See 'perf --help'. Fix it by presenting a more helpful message: [root@zoo linux]# trace l trace command not available: missing audit-libs devel package at build time. Signed-off-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-uxeunqetd0sgxyibusapen9a@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/perf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 431798a4110d..78f7b920e548 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -481,14 +481,18 @@ int main(int argc, const char **argv) fprintf(stderr, "cannot handle %s internally", cmd); goto out; } -#ifdef HAVE_LIBAUDIT_SUPPORT if (!prefixcmp(cmd, "trace")) { +#ifdef HAVE_LIBAUDIT_SUPPORT set_buildid_dir(); setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv, NULL); - } +#else + fprintf(stderr, + "trace command not available: missing audit-libs devel package at build time.\n"); + goto out; #endif + } /* Look for flags.. */ argv++; argc--; -- cgit v1.2.3 From 6a2f2543a1f3aa0e7766e27c30b93d164771e892 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 May 2014 16:02:30 -0300 Subject: perf tools: Add warning when disabling perl scripting support due to missing devel files We were just showing "libperl: OFF", unlike other features where we present the user with a message helping have a feature built in. Fix it by adding the following message: config/Makefile:450: Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed Signed-off-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-t7yeud34ehimlfi6pklb29p7@git.kernel.org Signed-off-by: Jiri Olsa --- tools/perf/config/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 729bbdf5cec7..319426f632fc 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -447,6 +447,7 @@ else ifneq ($(feature-libperl), 1) CFLAGS += -DNO_LIBPERL NO_LIBPERL := 1 + msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed); else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) -- cgit v1.2.3 From 16a6433615c14097fd8d406b10ce6393aebb7017 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 28 May 2014 10:19:18 +0200 Subject: perf tools: Consider header files outside perf directory in tags target This fixes lookups like "vi -t event_format" Signed-off-by: Sebastian Andrzej Siewior Link: http://lkml.kernel.org/r/20140528081918.GA28567@linutronix.de Signed-off-by: Jiri Olsa --- tools/perf/Makefile.perf | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 67f7c0575b26..6286e13adc2e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -815,17 +815,20 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html $(DOC_TARGETS): $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) +TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol +TAG_FILES= ../../include/uapi/linux/perf_event.h + TAGS: $(RM) TAGS - $(FIND) . -name '*.[hcS]' -print | xargs etags -a + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES) tags: $(RM) tags - $(FIND) . -name '*.[hcS]' -print | xargs ctags -a + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES) cscope: $(RM) cscope* - $(FIND) . -name '*.[hcS]' -print | xargs cscope -b + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) ### Detect prefix changes TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ -- cgit v1.2.3 From f2d9627b2b31506204417bb6842a7ea88970b700 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Tue, 27 May 2014 17:21:56 -0700 Subject: perf tools: Allow overriding sysfs and proc finding with env var SYSFS_PATH and PROC_PATH environment variables now let the user override the detection of sysfs and proc locations for testing purposes. Signed-off-by: Cody P Schafer Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1401236684-10579-2-git-send-email-dev@codyps.com Signed-off-by: Jiri Olsa --- tools/lib/api/fs/fs.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 5b5eb788996e..c1b49c36a951 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -1,8 +1,10 @@ /* TODO merge/factor in debugfs.c here */ +#include #include #include #include +#include #include #include @@ -96,12 +98,51 @@ static bool fs__check_mounts(struct fs *fs) return false; } +static void mem_toupper(char *f, size_t len) +{ + while (len) { + *f = toupper(*f); + f++; + len--; + } +} + +/* + * Check for "NAME_PATH" environment variable to override fs location (for + * testing). This matches the recommendation in Documentation/sysfs-rules.txt + * for SYSFS_PATH. + */ +static bool fs__env_override(struct fs *fs) +{ + char *override_path; + size_t name_len = strlen(fs->name); + /* name + "_PATH" + '\0' */ + char upper_name[name_len + 5 + 1]; + memcpy(upper_name, fs->name, name_len); + mem_toupper(upper_name, name_len); + strcpy(&upper_name[name_len], "_PATH"); + + override_path = getenv(upper_name); + if (!override_path) + return false; + + fs->found = true; + strncpy(fs->path, override_path, sizeof(fs->path)); + return true; +} + static const char *fs__get_mountpoint(struct fs *fs) { + if (fs__env_override(fs)) + return fs->path; + if (fs__check_mounts(fs)) return fs->path; - return fs__read_mounts(fs) ? fs->path : NULL; + if (fs__read_mounts(fs)) + return fs->path; + + return NULL; } static const char *fs__mountpoint(int idx) -- cgit v1.2.3 From 7f3e508ee1e6cc1b5865edcbf04a14a76ff1534c Mon Sep 17 00:00:00 2001 From: zhangdianfang Date: Fri, 30 May 2014 08:53:58 +0800 Subject: perf tools: Fix "==" into "=" in ui_browser__warning assignment Convert "==" into "=" in ui_browser__warning assignment. Bug description: https://bugzilla.kernel.org/show_bug.cgi?id=76751 Reported-by: David Binderman Signed-off-by: Dianfang Zhang Acked-by: Arnaldo Carvalho de Melo Cc: Jean Delvare Link: http://lkml.kernel.org/r/20140530154709.GC1202@kernel.org [ changed the changelog a bit ] Signed-off-by: Jiri Olsa --- tools/perf/ui/browser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index d11541d4d7d7..3ccf6e14f89b 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -194,7 +194,7 @@ int ui_browser__warning(struct ui_browser *browser, int timeout, ui_helpline__vpush(format, args); va_end(args); } else { - while ((key == ui__question_window("Warning!", text, + while ((key = ui__question_window("Warning!", text, "Press any key...", timeout)) == K_RESIZE) ui_browser__handle_resize(browser); -- cgit v1.2.3 From 2ec85c628c4cecef0f82d177279c579aed0f9706 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 23 May 2014 17:15:46 +0200 Subject: perf tools: Remove elide setup for SORT_MODE__MEMORY mode There's no need to setup elide of sort_dso sort entry again with symbol_conf.dso_list list. The only difference were list names of memory mode data, which does not make much sense to me. Acked-by: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1400858147-7155-2-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/util/sort.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 254f583a52ab..2aba620a86f6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1412,19 +1412,6 @@ void sort__setup_elide(FILE *output) sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", output); - } else if (sort__mode == SORT_MODE__MEMORY) { - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "symbol_daddr", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "dso_daddr", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "mem", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "local_weight", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "tlb", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "snoop", output); } /* -- cgit v1.2.3 From f29984226978313039d7dfe9b45eaa55a3aad03d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 23 May 2014 17:15:47 +0200 Subject: perf tools: Move elide bool into perf_hpp_fmt struct After output/sort fields refactoring, it's expensive to check the elide bool in its current location inside the 'struct sort_entry'. The perf_hpp__should_skip function gets highly noticable in workloads with high number of output/sort fields, like for: $ perf report -i perf-test.data -F overhead,sample,period,comm,pid,dso,symbol,cpu --stdio Performance report: 9.70% perf [.] perf_hpp__should_skip Moving the elide bool into the 'struct perf_hpp_fmt', which makes the perf_hpp__should_skip just single struct read. Got speedup of around 22% for my test perf.data workload. The change should not harm any other workload types. Performance counter stats for (10 runs): before: 358,319,732,626 cycles ( +- 0.55% ) 467,129,581,515 instructions # 1.30 insns per cycle ( +- 0.00% ) 150.943975206 seconds time elapsed ( +- 0.62% ) now: 278,785,972,990 cycles ( +- 0.12% ) 370,146,797,640 instructions # 1.33 insns per cycle ( +- 0.00% ) 116.416670507 seconds time elapsed ( +- 0.31% ) Acked-by: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20140601142622.GA9131@krava.brq.redhat.com Signed-off-by: Jiri Olsa --- tools/perf/ui/browsers/hists.c | 8 ++-- tools/perf/util/hist.h | 8 +++- tools/perf/util/sort.c | 90 ++++++++++++++++++++++++++---------------- tools/perf/util/sort.h | 2 +- 4 files changed, 68 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 5905acde5f1d..52c03fbbba17 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1706,14 +1706,14 @@ zoom_dso: zoom_out_dso: ui_helpline__pop(); browser->hists->dso_filter = NULL; - sort_dso.elide = false; + perf_hpp__set_elide(HISTC_DSO, false); } else { if (dso == NULL) continue; ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", dso->kernel ? "the Kernel" : dso->short_name); browser->hists->dso_filter = dso; - sort_dso.elide = true; + perf_hpp__set_elide(HISTC_DSO, true); pstack__push(fstack, &browser->hists->dso_filter); } hists__filter_by_dso(hists); @@ -1725,13 +1725,13 @@ zoom_thread: zoom_out_thread: ui_helpline__pop(); browser->hists->thread_filter = NULL; - sort_thread.elide = false; + perf_hpp__set_elide(HISTC_THREAD, false); } else { ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); browser->hists->thread_filter = thread; - sort_thread.elide = true; + perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(fstack, &browser->hists->thread_filter); } hists__filter_by_thread(hists); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 82b28ff98062..d2bf03575d5f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -205,6 +205,7 @@ struct perf_hpp_fmt { struct list_head list; struct list_head sort_list; + bool elide; }; extern struct list_head perf_hpp__list; @@ -252,7 +253,12 @@ void perf_hpp__append_sort_keys(void); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); -bool perf_hpp__should_skip(struct perf_hpp_fmt *format); + +static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format) +{ + return format->elide; +} + void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists); typedef u64 (*hpp_field_fn)(struct hist_entry *he); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2aba620a86f6..45512baaab67 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1157,6 +1157,7 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); + hse->hpp.elide = false; return hse; } @@ -1364,27 +1365,64 @@ static int __setup_sorting(void) return ret; } -bool perf_hpp__should_skip(struct perf_hpp_fmt *format) +void perf_hpp__set_elide(int idx, bool elide) { - if (perf_hpp__is_sort_entry(format)) { - struct hpp_sort_entry *hse; + struct perf_hpp_fmt *fmt; + struct hpp_sort_entry *hse; - hse = container_of(format, struct hpp_sort_entry, hpp); - return hse->se->elide; + perf_hpp__for_each_format(fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + if (hse->se->se_width_idx == idx) { + fmt->elide = elide; + break; + } } - return false; } -static void sort_entry__setup_elide(struct sort_entry *se, - struct strlist *list, - const char *list_name, FILE *fp) +static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp) { if (list && strlist__nr_entries(list) == 1) { if (fp != NULL) fprintf(fp, "# %s: %s\n", list_name, strlist__entry(list, 0)->s); - se->elide = true; + return true; + } + return false; +} + +static bool get_elide(int idx, FILE *output) +{ + switch (idx) { + case HISTC_SYMBOL: + return __get_elide(symbol_conf.sym_list, "symbol", output); + case HISTC_DSO: + return __get_elide(symbol_conf.dso_list, "dso", output); + case HISTC_COMM: + return __get_elide(symbol_conf.comm_list, "comm", output); + default: + break; } + + if (sort__mode != SORT_MODE__BRANCH) + return false; + + switch (idx) { + case HISTC_SYMBOL_FROM: + return __get_elide(symbol_conf.sym_from_list, "sym_from", output); + case HISTC_SYMBOL_TO: + return __get_elide(symbol_conf.sym_to_list, "sym_to", output); + case HISTC_DSO_FROM: + return __get_elide(symbol_conf.dso_from_list, "dso_from", output); + case HISTC_DSO_TO: + return __get_elide(symbol_conf.dso_to_list, "dso_to", output); + default: + break; + } + + return false; } void sort__setup_elide(FILE *output) @@ -1392,26 +1430,12 @@ void sort__setup_elide(FILE *output) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "dso", output); - sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, - "comm", output); - sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, - "symbol", output); - - if (sort__mode == SORT_MODE__BRANCH) { - sort_entry__setup_elide(&sort_dso_from, - symbol_conf.dso_from_list, - "dso_from", output); - sort_entry__setup_elide(&sort_dso_to, - symbol_conf.dso_to_list, - "dso_to", output); - sort_entry__setup_elide(&sort_sym_from, - symbol_conf.sym_from_list, - "sym_from", output); - sort_entry__setup_elide(&sort_sym_to, - symbol_conf.sym_to_list, - "sym_to", output); + perf_hpp__for_each_format(fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + fmt->elide = get_elide(hse->se->se_width_idx, output); } /* @@ -1422,8 +1446,7 @@ void sort__setup_elide(FILE *output) if (!perf_hpp__is_sort_entry(fmt)) continue; - hse = container_of(fmt, struct hpp_sort_entry, hpp); - if (!hse->se->elide) + if (!fmt->elide) return; } @@ -1431,8 +1454,7 @@ void sort__setup_elide(FILE *output) if (!perf_hpp__is_sort_entry(fmt)) continue; - hse = container_of(fmt, struct hpp_sort_entry, hpp); - hse->se->elide = false; + fmt->elide = false; } } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 426b873e16ff..5bf0098d6b06 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -202,7 +202,6 @@ struct sort_entry { int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size, unsigned int width); u8 se_width_idx; - bool elide; }; extern struct sort_entry sort_thread; @@ -213,6 +212,7 @@ int setup_output_field(void); void reset_output_field(void); extern int sort_dimension__add(const char *); void sort__setup_elide(FILE *fp); +void perf_hpp__set_elide(int idx, bool elide); int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); -- cgit v1.2.3 From a515114fa3cff8f1da10cd68914d55c10879c3e0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Jun 2014 13:44:23 -0400 Subject: perf record: Fix poll return value propagation If the perf record command is interrupted in record__mmap_read_all function, the 'done' is set and err has the latest poll return value, which is most likely positive number (= number of pollfds ready to read). This 'positive err' is then propagated to the exit code, resulting in not finishing the perf.data header properly, causing following error in report: # perf record -F 50000 -a --- make the system real busy, so there's more chance to interrupt perf in event writing code --- ^C[ perf record: Woken up 16 times to write data ] [ perf record: Captured and wrote 30.292 MB perf.data (~1323468 samples) ] # perf report --stdio > /dev/null WARNING: The perf.data file's data size field is 0 which is unexpected. Was the 'perf record' command properly terminated? Fixing this by checking for positive poll return value and setting err to 0. Acked-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Arnaldo Carvalho de Melo Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1401732126-19465-1-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa --- tools/perf/builtin-record.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e4c85b8f46c2..ce2cfec5c764 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -454,7 +454,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (done) break; err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1); - if (err < 0 && errno == EINTR) + /* + * Propagate error, only if there's any. Ignore positive + * number of returned events and interrupt error. + */ + if (err > 0 || (err < 0 && errno == EINTR)) err = 0; waking++; } -- cgit v1.2.3 From fc9cabeabf42d76854059e7bce81a02645e7e5ca Mon Sep 17 00:00:00 2001 From: Jianyu Zhan Date: Tue, 3 Jun 2014 00:44:34 +0800 Subject: perf tools: Fix 'make help' message error Currently 'make help' message has such hint: use "make prefix= " to install to a particular path like make prefix=/usr/local install install-doc But this is misleading, when I specify "prefix=/usr/local", it has got no respect at all. This is because that, "DESTDIR" is considered first. In this case, "DESTDIR" has an empty value, so "prefix" is honored. However, "prefix" is unconditionally assigned to $HOME, regardless of what it is set to from command line. So our "prefix" setting got no respect and the actual destination falls back to $HOME. This patch fixes this issue and corrects the help message. Signed-off-by: Jianyu Zhan Acked-by: Namhyung Kim Link: http://lkml.kernel.org/r/1401727474-19370-1-git-send-email-nasa4836@gmail.com Signed-off-by: Jiri Olsa --- tools/perf/Makefile.perf | 4 ++-- tools/perf/config/Makefile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6286e13adc2e..ae20edfcc3f7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -789,8 +789,8 @@ help: @echo '' @echo 'Perf install targets:' @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed' - @echo ' HINT: use "make prefix= " to install to a particular' - @echo ' path like make prefix=/usr/local install install-doc' + @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular' + @echo ' path like "make prefix=/usr/local install install-doc"' @echo ' install - install compiled binaries' @echo ' install-doc - install *all* documentation' @echo ' install-man - install manpage documentation' diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 319426f632fc..4f100b54ba8b 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -600,7 +600,7 @@ endif # Make the path relative to DESTDIR, not to prefix ifndef DESTDIR -prefix = $(HOME) +prefix ?= $(HOME) endif bindir_relative = bin bindir = $(prefix)/$(bindir_relative) -- cgit v1.2.3 From 0fffa5df4cf3e22be4f40f6698ab9e49f3ffd6fa Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 22 May 2014 12:50:08 +0530 Subject: perf/tool: Add conditional branch filter 'cond' to perf record Adding perf record support for new branch stack filter criteria PERF_SAMPLE_BRANCH_COND. Signed-off-by: Anshuman Khandual Reviewed-by: Stephane Eranian Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1400743210-32289-2-git-send-email-khandual@linux.vnet.ibm.com Cc: mpe@ellerman.id.au Cc: benh@kernel.crashing.org Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ce2cfec5c764..378b85b731a7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -548,6 +548,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX), BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX), BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), + BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_END }; -- cgit v1.2.3 From 3e39db4ae2a92ae9e338e8066411b694b0edcb31 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 22 May 2014 12:50:10 +0530 Subject: perf/documentation: Add description for conditional branch filter Signed-off-by: Anshuman Khandual Reviewed-by: Stephane Eranian Reviewed-by: Andi Kleen Signed-off-by: Peter Zijlstra Cc: mpe@ellerman.id.au Cc: benh@kernel.crashing.org Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1400743210-32289-4-git-send-email-khandual@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-record.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index c71b0f36d9e8..d460049cae8e 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -184,9 +184,10 @@ following filters are defined: - in_tx: only when the target is in a hardware transaction - no_tx: only when the target is not in a hardware transaction - abort_tx: only when the target is a hardware transaction abort + - cond: conditional branches + -The option requires at least one branch type among any, any_call, any_ret, ind_call. +The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. The privilege levels may be omitted, in which case, the privilege levels of the associated event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege levels are subject to permissions. When sampling on multiple events, branch stack sampling -- cgit v1.2.3