diff options
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 32 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/Build | 1 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/group.c | 27 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 119 | ||||
-rw-r--r-- | tools/perf/util/group.h | 7 | ||||
-rw-r--r-- | tools/perf/util/parse-events.l | 1 |
6 files changed, 184 insertions, 3 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04f23b404bbc..d96ccd4844df 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements. --no-aggr:: Do not aggregate counts across all monitored CPUs. +--topdown:: +Print top down level 1 metrics if supported by the CPU. This allows to +determine bottle necks in the CPU pipeline for CPU bound workloads, +by breaking the cycles consumed down into frontend bound, backend bound, +bad speculation and retiring. + +Frontend bound means that the CPU cannot fetch and decode instructions fast +enough. Backend bound means that computation or memory access is the bottle +neck. Bad Speculation means that the CPU wasted cycles due to branch +mispredictions and similar issues. Retiring means that the CPU computed without +an apparently bottleneck. The bottleneck is only the real bottleneck +if the workload is actually bound by the CPU and not by something else. + +For best results it is usually a good idea to use it with interval +mode like -I 1000, as the bottleneck of workloads can change often. + +The top down metrics are collected per core instead of per +CPU thread. Per core mode is automatically enabled +and -a (global monitoring) is needed, requiring root rights or +perf.perf_event_paranoid=-1. + +Topdown uses the full Performance Monitoring Unit, and needs +disabling of the NMI watchdog (as root): +echo 0 > /proc/sys/kernel/nmi_watchdog +for best results. Otherwise the bottlenecks may be inconsistent +on workload with changing phases. + +This enables --metric-only, unless overriden with --no-metric-only. + +To interpret the results it is usually needed to know on which +CPUs the workload runs on. If needed the CPUs can be forced using +taskset. EXAMPLES -------- diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 465970370f3e..4cd8a16b1b7b 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -3,6 +3,7 @@ libperf-y += tsc.o libperf-y += pmu.o libperf-y += kvm-stat.o libperf-y += perf_regs.o +libperf-y += group.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c new file mode 100644 index 000000000000..37f92aa39a5d --- /dev/null +++ b/tools/perf/arch/x86/util/group.c @@ -0,0 +1,27 @@ +#include <stdio.h> +#include "api/fs/fs.h" +#include "util/group.h" + +/* + * Check whether we can use a group for top down. + * Without a group may get bad results due to multiplexing. + */ +bool arch_topdown_check_group(bool *warn) +{ + int n; + + if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) + return false; + if (n > 0) { + *warn = true; + return false; + } + return true; +} + +void arch_topdown_group_warn(void) +{ + fprintf(stderr, + "nmi_watchdog enabled with topdown. May give wrong results.\n" + "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); +} diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee7ada78d86f..fd76bb0b18d1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,10 +59,13 @@ #include "util/thread.h" #include "util/thread_map.h" #include "util/counts.h" +#include "util/group.h" #include "util/session.h" #include "util/tool.h" +#include "util/group.h" #include "asm/bug.h" +#include <api/fs/fs.h> #include <stdlib.h> #include <sys/prctl.h> #include <locale.h> @@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = { "}" }; +static const char * topdown_attrs[] = { + "topdown-total-slots", + "topdown-slots-retired", + "topdown-recovery-bubbles", + "topdown-fetch-bubbles", + "topdown-slots-issued", + NULL, +}; + static struct perf_evlist *evsel_list; static struct target target = { @@ -112,6 +124,7 @@ static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; static bool transaction_run; +static bool topdown_run = false; static bool big_num = true; static int big_num_opt = -1; static const char *csv_sep = NULL; @@ -124,6 +137,7 @@ static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; +static bool force_metric_only = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -1520,6 +1534,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, return 0; } +static int enable_metric_only(const struct option *opt __maybe_unused, + const char *s __maybe_unused, int unset) +{ + force_metric_only = true; + metric_only = !unset; + return 0; +} + static const struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1578,8 +1600,10 @@ static const struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), - OPT_BOOLEAN(0, "metric-only", &metric_only, - "Only print computed metrics. No raw values"), + OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, + "Only print computed metrics. No raw values", enable_metric_only), + OPT_BOOLEAN(0, "topdown", &topdown_run, + "measure topdown level 1 statistics"), OPT_END() }; @@ -1772,12 +1796,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) return 0; } +static int topdown_filter_events(const char **attr, char **str, bool use_group) +{ + int off = 0; + int i; + int len = 0; + char *s; + + for (i = 0; attr[i]; i++) { + if (pmu_have_event("cpu", attr[i])) { + len += strlen(attr[i]) + 1; + attr[i - off] = attr[i]; + } else + off++; + } + attr[i - off] = NULL; + + *str = malloc(len + 1 + 2); + if (!*str) + return -1; + s = *str; + if (i - off == 0) { + *s = 0; + return 0; + } + if (use_group) + *s++ = '{'; + for (i = 0; attr[i]; i++) { + strcpy(s, attr[i]); + s += strlen(s); + *s++ = ','; + } + if (use_group) { + s[-1] = '}'; + *s = 0; + } else + s[-1] = 0; + return 0; +} + +__weak bool arch_topdown_check_group(bool *warn) +{ + *warn = false; + return false; +} + +__weak void arch_topdown_group_warn(void) +{ +} + /* * Add default attributes, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: */ static int add_default_attributes(void) { + int err; struct perf_event_attr default_attrs0[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, @@ -1896,7 +1970,6 @@ static int add_default_attributes(void) return 0; if (transaction_run) { - int err; if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) err = parse_events(evsel_list, transaction_attrs, NULL); @@ -1909,6 +1982,46 @@ static int add_default_attributes(void) return 0; } + if (topdown_run) { + char *str = NULL; + bool warn = false; + + if (stat_config.aggr_mode != AGGR_GLOBAL && + stat_config.aggr_mode != AGGR_CORE) { + pr_err("top down event configuration requires --per-core mode\n"); + return -1; + } + stat_config.aggr_mode = AGGR_CORE; + if (nr_cgroups || !target__has_cpu(&target)) { + pr_err("top down event configuration requires system-wide mode (-a)\n"); + return -1; + } + + if (!force_metric_only) + metric_only = true; + if (topdown_filter_events(topdown_attrs, &str, + arch_topdown_check_group(&warn)) < 0) { + pr_err("Out of memory\n"); + return -1; + } + if (topdown_attrs[0] && str) { + if (warn) + arch_topdown_group_warn(); + err = parse_events(evsel_list, str, NULL); + if (err) { + fprintf(stderr, + "Cannot set up top down events %s: %d\n", + str, err); + free(str); + return -1; + } + } else { + fprintf(stderr, "System does not support topdown\n"); + return -1; + } + free(str); + } + if (!evsel_list->nr_entries) { if (target__has_cpu(&target)) default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h new file mode 100644 index 000000000000..116debe7a995 --- /dev/null +++ b/tools/perf/util/group.h @@ -0,0 +1,7 @@ +#ifndef GROUP_H +#define GROUP_H 1 + +bool arch_topdown_check_group(bool *warn); +void arch_topdown_group_warn(void); + +#endif diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 01af1ee90a27..3c15b33b2e84 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -260,6 +260,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); } cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } +topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } L1-dcache|l1-d|l1d|L1-data | L1-icache|l1-i|l1i|L1-instruction | |