diff options
-rw-r--r-- | tools/perf/Documentation/perf-report.txt | 6 | ||||
-rw-r--r-- | tools/perf/util/event.h | 1 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 4 | ||||
-rw-r--r-- | tools/perf/util/hist.c | 12 | ||||
-rw-r--r-- | tools/perf/util/hist.h | 2 | ||||
-rw-r--r-- | tools/perf/util/intel-pt.c | 5 | ||||
-rw-r--r-- | tools/perf/util/session.c | 8 | ||||
-rw-r--r-- | tools/perf/util/sort.c | 47 | ||||
-rw-r--r-- | tools/perf/util/sort.h | 3 | ||||
-rw-r--r-- | tools/perf/util/synthetic-events.c | 4 |
10 files changed, 81 insertions, 11 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index b9686a131ed8..f546b5e9db05 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -109,6 +109,9 @@ OPTIONS - time: Separate the samples by time stamp with the resolution specified by --time-quantum (default 100ms). Specify with overhead and before it. - code_page_size: the code page size of sampled code address (ip) + - ins_lat: Instruction latency in core cycles. This is the global instruction + latency + - local_ins_lat: Local instruction latency version By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) @@ -155,7 +158,8 @@ OPTIONS - blocked: reason of blocked load access for the data at the time of the sample And the default sort keys are changed to local_weight, mem, sym, dso, - symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, see '--mem-mode'. + symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat, + see '--mem-mode'. If the data file has tracepoint event(s), following (dynamic) sort keys are also available: diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 2afea7247dd3..60752e4c9727 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -142,6 +142,7 @@ struct perf_sample { u16 insn_len; u8 cpumode; u16 misc; + u16 ins_lat; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index fa49d15edc35..844aebd9c306 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2352,8 +2352,10 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, weight.full = *array; if (type & PERF_SAMPLE_WEIGHT) data->weight = weight.full; - else + else { data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + } array++; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 4038b086cb80..c82f5fc26af8 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -209,6 +209,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); + hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); + hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else @@ -287,12 +289,13 @@ static long hist_time(unsigned long htime) } static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight) + u64 weight, u64 ins_lat) { he_stat->period += period; he_stat->weight += weight; he_stat->nr_events += 1; + he_stat->ins_lat += ins_lat; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -304,6 +307,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_guest_us += src->period_guest_us; dest->nr_events += src->nr_events; dest->weight += src->weight; + dest->ins_lat += src->ins_lat; } static void he_stat__decay(struct he_stat *he_stat) @@ -592,6 +596,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, int64_t cmp; u64 period = entry->stat.period; u64 weight = entry->stat.weight; + u64 ins_lat = entry->stat.ins_lat; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -610,11 +615,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight); + he_stat__add_period(&he->stat, period, weight, ins_lat); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight); + he_stat__add_period(he->stat_acc, period, weight, ins_lat); /* * This mem info was allocated from sample__resolve_mem @@ -725,6 +730,7 @@ __hists__add_entry(struct hists *hists, .nr_events = 1, .period = sample->period, .weight = sample->weight, + .ins_lat = sample->ins_lat, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3788391c50c7..3c537232294b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -73,6 +73,8 @@ enum hist_column { HISTC_DSO_SIZE, HISTC_SYMBOL_IPC, HISTC_MEM_BLOCKED, + HISTC_LOCAL_INS_LAT, + HISTC_GLOBAL_INS_LAT, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index a929f6dbdf43..c9477d021687 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1871,9 +1871,10 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) * cycles. Use latency >> 32 to distinguish the * different format of the mem access latency field. */ - if (weight > 0) + if (weight > 0) { sample.weight = weight & 0xffff; - else + sample.ins_lat = items->mem_access_latency & 0xffff; + } else sample.weight = items->mem_access_latency; } if (!sample.weight && items->has_tsx_aux_info) { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 053c08c8c850..f4aeb1af05d8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1300,8 +1300,12 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) - printf("... weight: %" PRIu64 "\n", sample->weight); + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { + printf("... weight: %" PRIu64 "", sample->weight); + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) + printf(",0x%"PRIx16"", sample->ins_lat); + printf("\n"); + } if (sample_type & PERF_SAMPLE_DATA_SRC) printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index e29a24b41b67..0d5ad42812b9 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -36,7 +36,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char *default_sort_order = "comm,dso,symbol"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; -const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked"; +const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; const char default_tracepoint_sort_order[] = "trace"; @@ -1365,6 +1365,49 @@ struct sort_entry sort_global_weight = { .se_width_idx = HISTC_GLOBAL_WEIGHT, }; +static u64 he_ins_lat(struct hist_entry *he) +{ + return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0; +} + +static int64_t +sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return he_ins_lat(left) - he_ins_lat(right); +} + +static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he)); +} + +struct sort_entry sort_local_ins_lat = { + .se_header = "Local INSTR Latency", + .se_cmp = sort__local_ins_lat_cmp, + .se_snprintf = hist_entry__local_ins_lat_snprintf, + .se_width_idx = HISTC_LOCAL_INS_LAT, +}; + +static int64_t +sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->stat.ins_lat - right->stat.ins_lat; +} + +static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat); +} + +struct sort_entry sort_global_ins_lat = { + .se_header = "INSTR Latency", + .se_cmp = sort__global_ins_lat_cmp, + .se_snprintf = hist_entry__global_ins_lat_snprintf, + .se_width_idx = HISTC_GLOBAL_INS_LAT, +}; + struct sort_entry sort_mem_daddr_sym = { .se_header = "Data Symbol", .se_cmp = sort__daddr_cmp, @@ -1796,6 +1839,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null), DIM(SORT_TIME, "time", sort_time), DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), + DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), + DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 984e54533ae1..63f67a3f3630 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -50,6 +50,7 @@ struct he_stat { u64 period_guest_sys; u64 period_guest_us; u64 weight; + u64 ins_lat; u32 nr_events; }; @@ -231,6 +232,8 @@ enum sort_type { SORT_SYM_IPC_NULL, SORT_TIME, SORT_CODE_PAGE_SIZE, + SORT_LOCAL_INS_LAT, + SORT_GLOBAL_INS_LAT, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 4e9266f75175..c6f9db3faf83 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1644,8 +1644,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo if (type & PERF_SAMPLE_WEIGHT_TYPE) { *array = sample->weight; - if (type & PERF_SAMPLE_WEIGHT_STRUCT) + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { *array &= 0xffffffff; + *array |= ((u64)sample->ins_lat << 32); + } array++; } |