diff options
author | Ingo Molnar <mingo@kernel.org> | 2015-06-30 06:47:32 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-06-30 06:47:58 +0200 |
commit | 2d6dac2fcc796a9a2917d69bcab66f6b157fe51b (patch) | |
tree | ca03e7f2fa83e2401af8f1c773c297baae2115b7 | |
parent | 6eedf416429a32e0216f61b8b690d25577b2b91e (diff) | |
parent | 36c8bb56a9f718a9a5f35d1834ca9dcec95deb4a (diff) |
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
User visible changes:
- Validate syscall list passed via -e argument to 'perf trace'. (Arnaldo Carvalho de Melo)
- Introduce 'perf stat --per-thread'. (Jiri Olsa)
- Check access permission for --kallsyms and --vmlinux. (Li Zhang)
Infrastructure changes:
- Move stuff out of 'perf stat' and into the lib for further use. (Jiri Olsa)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | tools/perf/Documentation/perf-stat.txt | 4 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 11 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 402 | ||||
-rw-r--r-- | tools/perf/builtin-trace.c | 32 | ||||
-rw-r--r-- | tools/perf/tests/Build | 1 | ||||
-rw-r--r-- | tools/perf/tests/builtin-test.c | 4 | ||||
-rw-r--r-- | tools/perf/tests/openat-syscall-all-cpus.c | 6 | ||||
-rw-r--r-- | tools/perf/tests/openat-syscall.c | 4 | ||||
-rw-r--r-- | tools/perf/tests/tests.h | 1 | ||||
-rw-r--r-- | tools/perf/tests/thread-map.c | 38 | ||||
-rw-r--r-- | tools/perf/util/evlist.h | 1 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 24 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 28 | ||||
-rw-r--r-- | tools/perf/util/python-ext-sources | 1 | ||||
-rw-r--r-- | tools/perf/util/stat.c | 132 | ||||
-rw-r--r-- | tools/perf/util/stat.h | 47 | ||||
-rw-r--r-- | tools/perf/util/symbol.c | 5 | ||||
-rw-r--r-- | tools/perf/util/thread_map.c | 76 | ||||
-rw-r--r-- | tools/perf/util/thread_map.h | 8 |
19 files changed, 570 insertions, 255 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04e150d83e7d..47469abdcc1c 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod use --per-core in addition to -a. (system-wide). The output includes the core number and the number of online logical processors on that physical processor. +--per-thread:: +Aggregate counts per monitored threads, when monitoring threads (-t option) +or processes (-p option). + -D msecs:: --delay msecs:: After starting the program, wait msecs before measuring. This is useful to diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 32626ea3e227..348bed4a2abf 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -742,6 +742,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, options, report_usage, 0); + if (symbol_conf.vmlinux_name && + access(symbol_conf.vmlinux_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); + return -EINVAL; + } + if (symbol_conf.kallsyms_name && + access(symbol_conf.kallsyms_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); + return -EINVAL; + } + if (report.use_stdio) use_browser = 0; else if (report.use_tui) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3e1636cae76b..37e301a32f43 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -67,10 +67,7 @@ #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" -static void print_stat(int argc, const char **argv); -static void print_counter_aggr(struct perf_evsel *counter, char *prefix); -static void print_counter(struct perf_evsel *counter, char *prefix); -static void print_aggr(char *prefix); +static void print_counters(struct timespec *ts, int argc, const char **argv); /* Default events used for perf stat -T */ static const char *transaction_attrs = { @@ -141,86 +138,9 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, } } -static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) +static void perf_stat__reset_stats(void) { - int i; - struct perf_stat *ps = evsel->priv; - - for (i = 0; i < 3; i++) - init_stats(&ps->res_stats[i]); - - perf_stat_evsel_id_init(evsel); -} - -static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) -{ - evsel->priv = zalloc(sizeof(struct perf_stat)); - if (evsel->priv == NULL) - return -ENOMEM; - perf_evsel__reset_stat_priv(evsel); - return 0; -} - -static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) -{ - zfree(&evsel->priv); -} - -static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) -{ - struct perf_counts *counts; - - counts = perf_counts__new(perf_evsel__nr_cpus(evsel)); - if (counts) - evsel->prev_raw_counts = counts; - - return counts ? 0 : -ENOMEM; -} - -static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) -{ - perf_counts__delete(evsel->prev_raw_counts); - evsel->prev_raw_counts = NULL; -} - -static void perf_evlist__free_stats(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - perf_evsel__free_stat_priv(evsel); - perf_evsel__free_counts(evsel); - perf_evsel__free_prev_raw_counts(evsel); - } -} - -static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - if (perf_evsel__alloc_stat_priv(evsel) < 0 || - perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || - (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) - goto out_free; - } - - return 0; - -out_free: - perf_evlist__free_stats(evlist); - return -1; -} - -static void perf_stat__reset_stats(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - perf_evsel__reset_stat_priv(evsel); - perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); - } - + perf_evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); } @@ -294,8 +214,9 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) return 0; } -static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, - struct perf_counts_values *count) +static int +process_counter_values(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; static struct perf_counts_values zero; @@ -310,13 +231,13 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, count = &zero; switch (aggr_mode) { + case AGGR_THREAD: case AGGR_CORE: case AGGR_SOCKET: case AGGR_NONE: if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, count); + perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, scale, NULL); - evsel->counts->cpu[cpu] = *count; if (aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->values, cpu); break; @@ -333,26 +254,48 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, return 0; } -static int read_counter(struct perf_evsel *counter); +static int process_counter_maps(struct perf_evsel *counter) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = perf_evsel__nr_cpus(counter); + int cpu, thread; -/* - * Read out the results of a single counter: - * aggregate counts across CPUs in system-wide mode - */ -static int read_counter_aggr(struct perf_evsel *counter) + if (counter->system_wide) + nthreads = 1; + + for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < ncpus; cpu++) { + if (process_counter_values(counter, cpu, thread, + perf_counts(counter->counts, cpu, thread))) + return -1; + } + } + + return 0; +} + +static int process_counter(struct perf_evsel *counter) { struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat *ps = counter->priv; u64 *count = counter->counts->aggr.values; - int i; + int i, ret; aggr->val = aggr->ena = aggr->run = 0; + init_stats(ps->res_stats); - if (read_counter(counter)) - return -1; + if (counter->per_pkg) + zero_per_pkg(counter); + + ret = process_counter_maps(counter); + if (ret) + return ret; + + if (aggr_mode != AGGR_GLOBAL) + return 0; if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, aggr); + perf_evsel__compute_deltas(counter, -1, -1, aggr); perf_counts_values__scale(aggr, scale, &counter->counts->scaled); for (i = 0; i < 3; i++) @@ -387,12 +330,12 @@ static int read_counter(struct perf_evsel *counter) if (counter->system_wide) nthreads = 1; - if (counter->per_pkg) - zero_per_pkg(counter); - for (thread = 0; thread < nthreads; thread++) { for (cpu = 0; cpu < ncpus; cpu++) { - if (perf_evsel__read_cb(counter, cpu, thread, read_cb)) + struct perf_counts_values *count; + + count = perf_counts(counter->counts, cpu, thread); + if (perf_evsel__read(counter, cpu, thread, count)) return -1; } } @@ -400,68 +343,34 @@ static int read_counter(struct perf_evsel *counter) return 0; } -static void print_interval(void) +static void read_counters(bool close) { - static int num_print_interval; struct perf_evsel *counter; - struct perf_stat *ps; - struct timespec ts, rs; - char prefix[64]; - if (aggr_mode == AGGR_GLOBAL) { - evlist__for_each(evsel_list, counter) { - ps = counter->priv; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); - read_counter_aggr(counter); - } - } else { - evlist__for_each(evsel_list, counter) { - ps = counter->priv; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); - read_counter(counter); - } - } + evlist__for_each(evsel_list, counter) { + if (read_counter(counter)) + pr_warning("failed to read counter %s\n", counter->name); - clock_gettime(CLOCK_MONOTONIC, &ts); - diff_timespec(&rs, &ts, &ref_time); - sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); + if (process_counter(counter)) + pr_warning("failed to process counter %s\n", counter->name); - if (num_print_interval == 0 && !csv_output) { - switch (aggr_mode) { - case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); - break; - case AGGR_CORE: - fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); - break; - case AGGR_NONE: - fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); - break; - case AGGR_GLOBAL: - default: - fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + if (close) { + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), + thread_map__nr(evsel_list->threads)); } } +} - if (++num_print_interval == 25) - num_print_interval = 0; +static void process_interval(void) +{ + struct timespec ts, rs; - switch (aggr_mode) { - case AGGR_CORE: - case AGGR_SOCKET: - print_aggr(prefix); - break; - case AGGR_NONE: - evlist__for_each(evsel_list, counter) - print_counter(counter, prefix); - break; - case AGGR_GLOBAL: - default: - evlist__for_each(evsel_list, counter) - print_counter_aggr(counter, prefix); - } + read_counters(false); - fflush(output); + clock_gettime(CLOCK_MONOTONIC, &ts); + diff_timespec(&rs, &ts, &ref_time); + + print_counters(&rs, 0, NULL); } static void handle_initial_delay(void) @@ -576,7 +485,7 @@ static int __run_perf_stat(int argc, const char **argv) if (interval) { while (!waitpid(child_pid, &status, WNOHANG)) { nanosleep(&ts, NULL); - print_interval(); + process_interval(); } } wait(&status); @@ -594,7 +503,7 @@ static int __run_perf_stat(int argc, const char **argv) while (!done) { nanosleep(&ts, NULL); if (interval) - print_interval(); + process_interval(); } } @@ -602,18 +511,7 @@ static int __run_perf_stat(int argc, const char **argv) update_stats(&walltime_nsecs_stats, t1 - t0); - if (aggr_mode == AGGR_GLOBAL) { - evlist__for_each(evsel_list, counter) { - read_counter_aggr(counter); - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), - thread_map__nr(evsel_list->threads)); - } - } else { - evlist__for_each(evsel_list, counter) { - read_counter(counter); - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); - } - } + read_counters(true); return WEXITSTATUS(status); } @@ -705,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[id], csv_sep); break; + case AGGR_THREAD: + fprintf(output, "%*s-%*d%s", + csv_output ? 0 : 16, + thread_map__comm(evsel->threads, id), + csv_output ? 0 : -8, + thread_map__pid(evsel->threads, id), + csv_sep); + break; case AGGR_GLOBAL: default: break; @@ -805,9 +711,9 @@ static void print_aggr(char *prefix) s2 = aggr_get_id(evsel_list->cpus, cpu2); if (s2 != id) continue; - val += counter->counts->cpu[cpu].val; - ena += counter->counts->cpu[cpu].ena; - run += counter->counts->cpu[cpu].run; + val += perf_counts(counter->counts, cpu, 0)->val; + ena += perf_counts(counter->counts, cpu, 0)->ena; + run += perf_counts(counter->counts, cpu, 0)->run; nr++; } if (prefix) @@ -853,6 +759,40 @@ static void print_aggr(char *prefix) } } +static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int cpu, thread; + double uval; + + for (thread = 0; thread < nthreads; thread++) { + u64 ena = 0, run = 0, val = 0; + + for (cpu = 0; cpu < ncpus; cpu++) { + val += perf_counts(counter->counts, cpu, thread)->val; + ena += perf_counts(counter->counts, cpu, thread)->ena; + run += perf_counts(counter->counts, cpu, thread)->run; + } + + if (prefix) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + + if (nsec_counter(counter)) + nsec_printout(thread, 0, counter, uval); + else + abs_printout(thread, 0, counter, uval); + + if (!csv_output) + print_noise(counter, 1.0); + + print_running(run, ena); + fputc('\n', output); + } +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode @@ -915,9 +855,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix) int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - val = counter->counts->cpu[cpu].val; - ena = counter->counts->cpu[cpu].ena; - run = counter->counts->cpu[cpu].run; + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; if (prefix) fprintf(output, "%s", prefix); @@ -962,9 +902,38 @@ static void print_counter(struct perf_evsel *counter, char *prefix) } } -static void print_stat(int argc, const char **argv) +static void print_interval(char *prefix, struct timespec *ts) +{ + static int num_print_interval; + + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); + + if (num_print_interval == 0 && !csv_output) { + switch (aggr_mode) { + case AGGR_SOCKET: + fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); + break; + case AGGR_CORE: + fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); + break; + case AGGR_NONE: + fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); + break; + case AGGR_THREAD: + fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); + break; + case AGGR_GLOBAL: + default: + fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + } + } + + if (++num_print_interval == 25) + num_print_interval = 0; +} + +static void print_header(int argc, const char **argv) { - struct perf_evsel *counter; int i; fflush(stdout); @@ -990,36 +959,57 @@ static void print_stat(int argc, const char **argv) fprintf(output, " (%d runs)", run_count); fprintf(output, ":\n\n"); } +} + +static void print_footer(void) +{ + if (!null_run) + fprintf(output, "\n"); + fprintf(output, " %17.9f seconds time elapsed", + avg_stats(&walltime_nsecs_stats)/1e9); + if (run_count > 1) { + fprintf(output, " "); + print_noise_pct(stddev_stats(&walltime_nsecs_stats), + avg_stats(&walltime_nsecs_stats)); + } + fprintf(output, "\n\n"); +} + +static void print_counters(struct timespec *ts, int argc, const char **argv) +{ + struct perf_evsel *counter; + char buf[64], *prefix = NULL; + + if (interval) + print_interval(prefix = buf, ts); + else + print_header(argc, argv); switch (aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: - print_aggr(NULL); + print_aggr(prefix); + break; + case AGGR_THREAD: + evlist__for_each(evsel_list, counter) + print_aggr_thread(counter, prefix); break; case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) - print_counter_aggr(counter, NULL); + print_counter_aggr(counter, prefix); break; case AGGR_NONE: evlist__for_each(evsel_list, counter) - print_counter(counter, NULL); + print_counter(counter, prefix); break; default: break; } - if (!csv_output) { - if (!null_run) - fprintf(output, "\n"); - fprintf(output, " %17.9f seconds time elapsed", - avg_stats(&walltime_nsecs_stats)/1e9); - if (run_count > 1) { - fprintf(output, " "); - print_noise_pct(stddev_stats(&walltime_nsecs_stats), - avg_stats(&walltime_nsecs_stats)); - } - fprintf(output, "\n\n"); - } + if (!interval && !csv_output) + print_footer(); + + fflush(output); } static volatile int signr = -1; @@ -1091,6 +1081,7 @@ static int perf_stat_init_aggr_mode(void) break; case AGGR_NONE: case AGGR_GLOBAL: + case AGGR_THREAD: default: break; } @@ -1315,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "aggregate counts per processor socket", AGGR_SOCKET), OPT_SET_UINT(0, "per-core", &aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), + OPT_SET_UINT(0, "per-thread", &aggr_mode, + "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), OPT_END() @@ -1406,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) run_count = 1; } - /* no_aggr, cgroup are for system-wide only */ - if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && + if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { + fprintf(stderr, "The --per-thread option is only available " + "when monitoring via -p -t options.\n"); + parse_options_usage(NULL, options, "p", 1); + parse_options_usage(NULL, options, "t", 1); + goto out; + } + + /* + * no_aggr, cgroup are for system-wide only + * --per-thread is aggregated per thread, we dont mix it with cpu mode + */ + if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && !target__has_cpu(&target)) { fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); @@ -1435,6 +1439,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } goto out; } + + /* + * Initialize thread_map with comm names, + * so we could print it out on output. + */ + if (aggr_mode == AGGR_THREAD) + thread_map__read_comms(evsel_list->threads); + if (interval && interval < 100) { pr_err("print interval must be >= 100ms\n"); parse_options_usage(stat_usage, options, "I", 1); @@ -1468,13 +1480,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) status = run_perf_stat(argc, argv); if (forever && status != -1) { - print_stat(argc, argv); - perf_stat__reset_stats(evsel_list); + print_counters(NULL, argc, argv); + perf_stat__reset_stats(); } } if (!forever && status != -1 && !interval) - print_stat(argc, argv); + print_counters(NULL, argc, argv); perf_evlist__free_stats(evsel_list); out: diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2bf2ca771ca5..39ad4d0ca884 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1617,6 +1617,34 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } +static int trace__validate_ev_qualifier(struct trace *trace) +{ + int err = 0; + struct str_node *pos; + + strlist__for_each(pos, trace->ev_qualifier) { + const char *sc = pos->s; + + if (audit_name_to_syscall(sc, trace->audit.machine) < 0) { + if (err == 0) { + fputs("Error:\tInvalid syscall ", trace->output); + err = -EINVAL; + } else { + fputs(", ", trace->output); + } + + fputs(sc, trace->output); + } + } + + if (err < 0) { + fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" + "\nHint:\tand: 'man syscalls'\n", trace->output); + } + + return err; +} + /* * args is to be interpreted as a series of longs but we need to handle * 8-byte unaligned accesses. args points to raw_data within the event @@ -2862,6 +2890,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) err = -ENOMEM; goto out_close; } + + err = trace__validate_ev_qualifier(&trace); + if (err) + goto out_close; } err = target__validate(&trace.opts.target); diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index ee41e705b2eb..d20d6e6ab65b 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -31,6 +31,7 @@ perf-y += code-reading.o perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o +perf-y += thread-map.o perf-$(CONFIG_X86) += perf-time-to-tsc.o diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 87b9961646e4..c1dde733c3a6 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -171,6 +171,10 @@ static struct test { .func = test__kmod_path__parse, }, { + .desc = "Test thread map", + .func = test__thread_map, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index b8d552b13950..a572f87e9c8d 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -78,7 +78,7 @@ int test__openat_syscall_event_on_all_cpus(void) * we use the auto allocation it will allocate just for 1 cpu, * as we start by cpu 0. */ - if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { + if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); goto out_close_fd; } @@ -98,9 +98,9 @@ int test__openat_syscall_event_on_all_cpus(void) } expected = nr_openat_calls + cpu; - if (evsel->counts->cpu[cpu].val != expected) { + if (perf_counts(evsel->counts, cpu, 0)->val != expected) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpus->map[cpu], evsel->counts->cpu[cpu].val); + expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); err = -1; } } diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index bdfa1f446681..c9a37bc6b33a 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -44,9 +44,9 @@ int test__openat_syscall_event(void) goto out_close_fd; } - if (evsel->counts->cpu[0].val != nr_openat_calls) { + if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", - nr_openat_calls, evsel->counts->cpu[0].val); + nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val); goto out_close_fd; } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 8e5038b48ba8..ebb47d96bc0b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -61,6 +61,7 @@ int test__switch_tracking(void); int test__fdarray__filter(void); int test__fdarray__add(void); int test__kmod_path__parse(void); +int test__thread_map(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c new file mode 100644 index 000000000000..5acf000939ea --- /dev/null +++ b/tools/perf/tests/thread-map.c @@ -0,0 +1,38 @@ +#include <sys/types.h> +#include <unistd.h> +#include "tests.h" +#include "thread_map.h" +#include "debug.h" + +int test__thread_map(void) +{ + struct thread_map *map; + + /* test map on current pid */ + map = thread_map__new_by_pid(getpid()); + TEST_ASSERT_VAL("failed to alloc map", map); + + thread_map__read_comms(map); + + TEST_ASSERT_VAL("wrong nr", map->nr == 1); + TEST_ASSERT_VAL("wrong pid", + thread_map__pid(map, 0) == getpid()); + TEST_ASSERT_VAL("wrong comm", + thread_map__comm(map, 0) && + !strcmp(thread_map__comm(map, 0), "perf")); + thread_map__put(map); + + /* test dummy pid */ + map = thread_map__new_dummy(); + TEST_ASSERT_VAL("failed to alloc map", map); + + thread_map__read_comms(map); + + TEST_ASSERT_VAL("wrong nr", map->nr == 1); + TEST_ASSERT_VAL("wrong pid", thread_map__pid(map, 0) == -1); + TEST_ASSERT_VAL("wrong comm", + thread_map__comm(map, 0) && + !strcmp(thread_map__comm(map, 0), "dummy")); + thread_map__put(map); + return 0; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a8489b9d2812..037633c1da9d 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -289,5 +289,4 @@ void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); - #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1b2f480a3e82..2936b3080722 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -898,7 +898,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -910,8 +910,8 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; } else { - tmp = evsel->prev_raw_counts->cpu[cpu]; - evsel->prev_raw_counts->cpu[cpu] = *count; + tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); + *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; } count->val = count->val - tmp.val; @@ -939,20 +939,18 @@ void perf_counts_values__scale(struct perf_counts_values *count, *pscaled = scaled; } -int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, - perf_evsel__read_cb_t cb) +int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { - struct perf_counts_values count; - - memset(&count, 0, sizeof(count)); + memset(count, 0, sizeof(*count)); if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (readn(FD(evsel, cpu, thread), &count, sizeof(count)) < 0) + if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) < 0) return -errno; - return cb(evsel, cpu, thread, &count); + return 0; } int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, @@ -964,15 +962,15 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) + if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) return -ENOMEM; if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) return -errno; - perf_evsel__compute_deltas(evsel, cpu, &count); + perf_evsel__compute_deltas(evsel, cpu, thread, &count); perf_counts_values__scale(&count, scale, NULL); - evsel->counts->cpu[cpu] = count; + *perf_counts(evsel->counts, cpu, thread) = count; return 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4dbf32d94dfb..4a7ed5656cf0 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -9,23 +9,7 @@ #include "xyarray.h" #include "symbol.h" #include "cpumap.h" - -struct perf_counts_values { - union { - struct { - u64 val; - u64 ena; - u64 run; - }; - u64 values[3]; - }; -}; - -struct perf_counts { - s8 scaled; - struct perf_counts_values aggr; - struct perf_counts_values cpu[]; -}; +#include "stat.h" struct perf_evsel; @@ -128,7 +112,7 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) void perf_counts_values__scale(struct perf_counts_values *count, bool scale, s8 *pscaled); -void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); int perf_evsel__object_config(size_t object_size, @@ -245,12 +229,8 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, (a)->attr.type == (b)->attr.type && \ (a)->attr.config == (b)->attr.config) -typedef int (perf_evsel__read_cb_t)(struct perf_evsel *evsel, - int cpu, int thread, - struct perf_counts_values *count); - -int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, - perf_evsel__read_cb_t cb); +int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count); int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 5925fec90562..e23ded40c79e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -20,3 +20,4 @@ util/stat.c util/strlist.c util/trace-event.c ../../lib/rbtree.c +util/string.c diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 4014b709f956..f2a0d1521e26 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,6 +1,8 @@ #include <math.h> #include "stat.h" +#include "evlist.h" #include "evsel.h" +#include "thread_map.h" void update_stats(struct stats *stats, u64 val) { @@ -95,33 +97,46 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) } } -struct perf_counts *perf_counts__new(int ncpus) +struct perf_counts *perf_counts__new(int ncpus, int nthreads) { - int size = sizeof(struct perf_counts) + - ncpus * sizeof(struct perf_counts_values); + struct perf_counts *counts = zalloc(sizeof(*counts)); - return zalloc(size); + if (counts) { + struct xyarray *values; + + values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); + if (!values) { + free(counts); + return NULL; + } + + counts->values = values; + } + + return counts; } void perf_counts__delete(struct perf_counts *counts) { - free(counts); + if (counts) { + xyarray__delete(counts->values); + free(counts); + } } -static void perf_counts__reset(struct perf_counts *counts, int ncpus) +static void perf_counts__reset(struct perf_counts *counts) { - memset(counts, 0, (sizeof(*counts) + - (ncpus * sizeof(struct perf_counts_values)))); + xyarray__reset(counts->values); } -void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus) +void perf_evsel__reset_counts(struct perf_evsel *evsel) { - perf_counts__reset(evsel->counts, ncpus); + perf_counts__reset(evsel->counts); } -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) { - evsel->counts = perf_counts__new(ncpus); + evsel->counts = perf_counts__new(ncpus, nthreads); return evsel->counts != NULL ? 0 : -ENOMEM; } @@ -130,3 +145,96 @@ void perf_evsel__free_counts(struct perf_evsel *evsel) perf_counts__delete(evsel->counts); evsel->counts = NULL; } + +void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) +{ + int i; + struct perf_stat *ps = evsel->priv; + + for (i = 0; i < 3; i++) + init_stats(&ps->res_stats[i]); + + perf_stat_evsel_id_init(evsel); +} + +int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) +{ + evsel->priv = zalloc(sizeof(struct perf_stat)); + if (evsel->priv == NULL) + return -ENOMEM; + perf_evsel__reset_stat_priv(evsel); + return 0; +} + +void perf_evsel__free_stat_priv(struct perf_evsel *evsel) +{ + zfree(&evsel->priv); +} + +int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads) +{ + struct perf_counts *counts; + + counts = perf_counts__new(ncpus, nthreads); + if (counts) + evsel->prev_raw_counts = counts; + + return counts ? 0 : -ENOMEM; +} + +void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) +{ + perf_counts__delete(evsel->prev_raw_counts); + evsel->prev_raw_counts = NULL; +} + +int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) +{ + int ncpus = perf_evsel__nr_cpus(evsel); + int nthreads = thread_map__nr(evsel->threads); + + if (perf_evsel__alloc_stat_priv(evsel) < 0 || + perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || + (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + return -ENOMEM; + + return 0; +} + +int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (perf_evsel__alloc_stats(evsel, alloc_raw)) + goto out_free; + } + + return 0; + +out_free: + perf_evlist__free_stats(evlist); + return -1; +} + +void perf_evlist__free_stats(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + perf_evsel__free_stat_priv(evsel); + perf_evsel__free_counts(evsel); + perf_evsel__free_prev_raw_counts(evsel); + } +} + +void perf_evlist__reset_stats(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + perf_evsel__reset_stat_priv(evsel); + perf_evsel__reset_counts(evsel); + } +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 093dc3cb28dd..1cfbe0a980ac 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -3,6 +3,7 @@ #include <linux/types.h> #include <stdio.h> +#include "xyarray.h" struct stats { @@ -29,8 +30,32 @@ enum aggr_mode { AGGR_GLOBAL, AGGR_SOCKET, AGGR_CORE, + AGGR_THREAD, }; +struct perf_counts_values { + union { + struct { + u64 val; + u64 ena; + u64 run; + }; + u64 values[3]; + }; +}; + +struct perf_counts { + s8 scaled; + struct perf_counts_values aggr; + struct xyarray *values; +}; + +static inline struct perf_counts_values* +perf_counts(struct perf_counts *counts, int cpu, int thread) +{ + return xyarray__entry(counts->values, cpu, thread); +} + void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); @@ -46,6 +71,8 @@ static inline void init_stats(struct stats *stats) } struct perf_evsel; +struct perf_evlist; + bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id); @@ -62,10 +89,24 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu, enum aggr_mode aggr); -struct perf_counts *perf_counts__new(int ncpus); +struct perf_counts *perf_counts__new(int ncpus, int nthreads); void perf_counts__delete(struct perf_counts *counts); -void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus); -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); +void perf_evsel__reset_counts(struct perf_evsel *evsel); +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_counts(struct perf_evsel *evsel); + +void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); +int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); +void perf_evsel__free_stat_priv(struct perf_evsel *evsel); + +int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads); +void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); + +int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); + +int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); +void perf_evlist__free_stats(struct perf_evlist *evlist); +void perf_evlist__reset_stats(struct perf_evlist *evlist); #endif diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 504f2d73b7ee..48b588c6951a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1132,8 +1132,11 @@ static int dso__load_kcore(struct dso *dso, struct map *map, INIT_LIST_HEAD(&md.maps); fd = open(kcore_filename, O_RDONLY); - if (fd < 0) + if (fd < 0) { + pr_err("%s requires CAP_SYS_RAWIO capability to access.\n", + kcore_filename); return -EINVAL; + } /* Read new maps into temporary lists */ err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md, diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 368cc58c6892..da7646d767fe 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -8,9 +8,11 @@ #include <unistd.h> #include "strlist.h" #include <string.h> +#include <api/fs/fs.h> #include "asm/bug.h" #include "thread_map.h" #include "util.h" +#include "debug.h" /* Skip "." and ".." directories */ static int filter(const struct dirent *dir) @@ -21,11 +23,26 @@ static int filter(const struct dirent *dir) return 1; } +static void thread_map__reset(struct thread_map *map, int start, int nr) +{ + size_t size = (nr - start) * sizeof(map->map[0]); + + memset(&map->map[start], 0, size); +} + static struct thread_map *thread_map__realloc(struct thread_map *map, int nr) { size_t size = sizeof(*map) + sizeof(map->map[0]) * nr; + int start = map ? map->nr : 0; - return realloc(map, size); + map = realloc(map, size); + /* + * We only realloc to add more items, let's reset new items. + */ + if (map) + thread_map__reset(map, start, nr); + + return map; } #define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr) @@ -304,8 +321,12 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, static void thread_map__delete(struct thread_map *threads) { if (threads) { + int i; + WARN_ONCE(atomic_read(&threads->refcnt) != 0, "thread map refcnt unbalanced\n"); + for (i = 0; i < threads->nr; i++) + free(thread_map__comm(threads, i)); free(threads); } } @@ -333,3 +354,56 @@ size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) return printed + fprintf(fp, "\n"); } + +static int get_comm(char **comm, pid_t pid) +{ + char *path; + size_t size; + int err; + + if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1) + return -ENOMEM; + + err = filename__read_str(path, comm, &size); + if (!err) { + /* + * We're reading 16 bytes, while filename__read_str + * allocates data per BUFSIZ bytes, so we can safely + * mark the end of the string. + */ + (*comm)[size] = 0; + rtrim(*comm); + } + + free(path); + return err; +} + +static void comm_init(struct thread_map *map, int i) +{ + pid_t pid = thread_map__pid(map, i); + char *comm = NULL; + + /* dummy pid comm initialization */ + if (pid == -1) { + map->map[i].comm = strdup("dummy"); + return; + } + + /* + * The comm name is like extra bonus ;-), + * so just warn if we fail for any reason. + */ + if (get_comm(&comm, pid)) + pr_warning("Couldn't resolve comm name for pid %d\n", pid); + + map->map[i].comm = comm; +} + +void thread_map__read_comms(struct thread_map *threads) +{ + int i; + + for (i = 0; i < threads->nr; ++i) + comm_init(threads, i); +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 6b0cd2dc006b..af679d8a50f8 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -7,6 +7,7 @@ struct thread_map_data { pid_t pid; + char *comm; }; struct thread_map { @@ -44,4 +45,11 @@ thread_map__set_pid(struct thread_map *map, int thread, pid_t pid) { map->map[thread].pid = pid; } + +static inline char *thread_map__comm(struct thread_map *map, int thread) +{ + return map->map[thread].comm; +} + +void thread_map__read_comms(struct thread_map *threads); #endif /* __PERF_THREAD_MAP_H */ |