summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/perf/Documentation/perf-stat.txt32
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/arch/x86/util/group.c27
-rw-r--r--tools/perf/builtin-stat.c119
-rw-r--r--tools/perf/util/group.h7
-rw-r--r--tools/perf/util/parse-events.l1
6 files changed, 184 insertions, 3 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04f23b404bbc..d96ccd4844df 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements.
--no-aggr::
Do not aggregate counts across all monitored CPUs.
+--topdown::
+Print top down level 1 metrics if supported by the CPU. This allows to
+determine bottle necks in the CPU pipeline for CPU bound workloads,
+by breaking the cycles consumed down into frontend bound, backend bound,
+bad speculation and retiring.
+
+Frontend bound means that the CPU cannot fetch and decode instructions fast
+enough. Backend bound means that computation or memory access is the bottle
+neck. Bad Speculation means that the CPU wasted cycles due to branch
+mispredictions and similar issues. Retiring means that the CPU computed without
+an apparently bottleneck. The bottleneck is only the real bottleneck
+if the workload is actually bound by the CPU and not by something else.
+
+For best results it is usually a good idea to use it with interval
+mode like -I 1000, as the bottleneck of workloads can change often.
+
+The top down metrics are collected per core instead of per
+CPU thread. Per core mode is automatically enabled
+and -a (global monitoring) is needed, requiring root rights or
+perf.perf_event_paranoid=-1.
+
+Topdown uses the full Performance Monitoring Unit, and needs
+disabling of the NMI watchdog (as root):
+echo 0 > /proc/sys/kernel/nmi_watchdog
+for best results. Otherwise the bottlenecks may be inconsistent
+on workload with changing phases.
+
+This enables --metric-only, unless overriden with --no-metric-only.
+
+To interpret the results it is usually needed to know on which
+CPUs the workload runs on. If needed the CPUs can be forced using
+taskset.
EXAMPLES
--------
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 465970370f3e..4cd8a16b1b7b 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -3,6 +3,7 @@ libperf-y += tsc.o
libperf-y += pmu.o
libperf-y += kvm-stat.o
libperf-y += perf_regs.o
+libperf-y += group.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
new file mode 100644
index 000000000000..37f92aa39a5d
--- /dev/null
+++ b/tools/perf/arch/x86/util/group.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/group.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+ int n;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+ return false;
+ if (n > 0) {
+ *warn = true;
+ return false;
+ }
+ return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+ fprintf(stderr,
+ "nmi_watchdog enabled with topdown. May give wrong results.\n"
+ "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ee7ada78d86f..fd76bb0b18d1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,10 +59,13 @@
#include "util/thread.h"
#include "util/thread_map.h"
#include "util/counts.h"
+#include "util/group.h"
#include "util/session.h"
#include "util/tool.h"
+#include "util/group.h"
#include "asm/bug.h"
+#include <api/fs/fs.h>
#include <stdlib.h>
#include <sys/prctl.h>
#include <locale.h>
@@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = {
"}"
};
+static const char * topdown_attrs[] = {
+ "topdown-total-slots",
+ "topdown-slots-retired",
+ "topdown-recovery-bubbles",
+ "topdown-fetch-bubbles",
+ "topdown-slots-issued",
+ NULL,
+};
+
static struct perf_evlist *evsel_list;
static struct target target = {
@@ -112,6 +124,7 @@ static volatile pid_t child_pid = -1;
static bool null_run = false;
static int detailed_run = 0;
static bool transaction_run;
+static bool topdown_run = false;
static bool big_num = true;
static int big_num_opt = -1;
static const char *csv_sep = NULL;
@@ -124,6 +137,7 @@ static unsigned int initial_delay = 0;
static unsigned int unit_width = 4; /* strlen("unit") */
static bool forever = false;
static bool metric_only = false;
+static bool force_metric_only = false;
static struct timespec ref_time;
static struct cpu_map *aggr_map;
static aggr_get_id_t aggr_get_id;
@@ -1520,6 +1534,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
return 0;
}
+static int enable_metric_only(const struct option *opt __maybe_unused,
+ const char *s __maybe_unused, int unset)
+{
+ force_metric_only = true;
+ metric_only = !unset;
+ return 0;
+}
+
static const struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1578,8 +1600,10 @@ static const struct option stat_options[] = {
"aggregate counts per thread", AGGR_THREAD),
OPT_UINTEGER('D', "delay", &initial_delay,
"ms to wait before starting measurement after program start"),
- OPT_BOOLEAN(0, "metric-only", &metric_only,
- "Only print computed metrics. No raw values"),
+ OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+ "Only print computed metrics. No raw values", enable_metric_only),
+ OPT_BOOLEAN(0, "topdown", &topdown_run,
+ "measure topdown level 1 statistics"),
OPT_END()
};
@@ -1772,12 +1796,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
return 0;
}
+static int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+ int off = 0;
+ int i;
+ int len = 0;
+ char *s;
+
+ for (i = 0; attr[i]; i++) {
+ if (pmu_have_event("cpu", attr[i])) {
+ len += strlen(attr[i]) + 1;
+ attr[i - off] = attr[i];
+ } else
+ off++;
+ }
+ attr[i - off] = NULL;
+
+ *str = malloc(len + 1 + 2);
+ if (!*str)
+ return -1;
+ s = *str;
+ if (i - off == 0) {
+ *s = 0;
+ return 0;
+ }
+ if (use_group)
+ *s++ = '{';
+ for (i = 0; attr[i]; i++) {
+ strcpy(s, attr[i]);
+ s += strlen(s);
+ *s++ = ',';
+ }
+ if (use_group) {
+ s[-1] = '}';
+ *s = 0;
+ } else
+ s[-1] = 0;
+ return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+ *warn = false;
+ return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
/*
* Add default attributes, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
*/
static int add_default_attributes(void)
{
+ int err;
struct perf_event_attr default_attrs0[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@@ -1896,7 +1970,6 @@ static int add_default_attributes(void)
return 0;
if (transaction_run) {
- int err;
if (pmu_have_event("cpu", "cycles-ct") &&
pmu_have_event("cpu", "el-start"))
err = parse_events(evsel_list, transaction_attrs, NULL);
@@ -1909,6 +1982,46 @@ static int add_default_attributes(void)
return 0;
}
+ if (topdown_run) {
+ char *str = NULL;
+ bool warn = false;
+
+ if (stat_config.aggr_mode != AGGR_GLOBAL &&
+ stat_config.aggr_mode != AGGR_CORE) {
+ pr_err("top down event configuration requires --per-core mode\n");
+ return -1;
+ }
+ stat_config.aggr_mode = AGGR_CORE;
+ if (nr_cgroups || !target__has_cpu(&target)) {
+ pr_err("top down event configuration requires system-wide mode (-a)\n");
+ return -1;
+ }
+
+ if (!force_metric_only)
+ metric_only = true;
+ if (topdown_filter_events(topdown_attrs, &str,
+ arch_topdown_check_group(&warn)) < 0) {
+ pr_err("Out of memory\n");
+ return -1;
+ }
+ if (topdown_attrs[0] && str) {
+ if (warn)
+ arch_topdown_group_warn();
+ err = parse_events(evsel_list, str, NULL);
+ if (err) {
+ fprintf(stderr,
+ "Cannot set up top down events %s: %d\n",
+ str, err);
+ free(str);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "System does not support topdown\n");
+ return -1;
+ }
+ free(str);
+ }
+
if (!evsel_list->nr_entries) {
if (target__has_cpu(&target))
default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
new file mode 100644
index 000000000000..116debe7a995
--- /dev/null
+++ b/tools/perf/util/group.h
@@ -0,0 +1,7 @@
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 01af1ee90a27..3c15b33b2e84 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -260,6 +260,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
L1-dcache|l1-d|l1d|L1-data |
L1-icache|l1-i|l1i|L1-instruction |