summaryrefslogtreecommitdiff
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/util/branch.h19
-rw-r--r--tools/perf/util/callchain.h5
-rw-r--r--tools/perf/util/intel-pt.c17
-rw-r--r--tools/perf/util/machine.c139
-rw-r--r--tools/perf/util/thread.c22
-rw-r--r--tools/perf/util/thread.h14
6 files changed, 188 insertions, 28 deletions
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 154a05cd03af..4d3f02fa223d 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -15,13 +15,18 @@
#include "event.h"
struct branch_flags {
- u64 mispred:1;
- u64 predicted:1;
- u64 in_tx:1;
- u64 abort:1;
- u64 cycles:16;
- u64 type:4;
- u64 reserved:40;
+ union {
+ u64 value;
+ struct {
+ u64 mispred:1;
+ u64 predicted:1;
+ u64 in_tx:1;
+ u64 abort:1;
+ u64 cycles:16;
+ u64 type:4;
+ u64 reserved:40;
+ };
+ };
};
struct branch_info {
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index cb33cd42ff43..8f668ee29f25 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -154,6 +154,11 @@ struct callchain_cursor_node {
struct callchain_cursor_node *next;
};
+struct stitch_list {
+ struct list_head node;
+ struct callchain_cursor_node cursor;
+};
+
struct callchain_cursor {
u64 nr;
struct callchain_cursor_node *first;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index a659b4a1b3f2..4be7634dccf5 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1717,15 +1717,14 @@ static u64 intel_pt_lbr_flags(u64 info)
union {
struct branch_flags flags;
u64 result;
- } u = {
- .flags = {
- .mispred = !!(info & LBR_INFO_MISPRED),
- .predicted = !(info & LBR_INFO_MISPRED),
- .in_tx = !!(info & LBR_INFO_IN_TX),
- .abort = !!(info & LBR_INFO_ABORT),
- .cycles = info & LBR_INFO_CYCLES,
- }
- };
+ } u;
+
+ u.result = 0;
+ u.flags.mispred = !!(info & LBR_INFO_MISPRED);
+ u.flags.predicted = !(info & LBR_INFO_MISPRED);
+ u.flags.in_tx = !!(info & LBR_INFO_IN_TX);
+ u.flags.abort = !!(info & LBR_INFO_ABORT);
+ u.flags.cycles = info & LBR_INFO_CYCLES;
return u.result;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 737dee723a57..5ac32cabe4e6 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2348,6 +2348,119 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
return 0;
}
+static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
+ struct callchain_cursor *cursor)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct callchain_cursor_node *cnode;
+ struct stitch_list *stitch_node;
+ int err;
+
+ list_for_each_entry(stitch_node, &lbr_stitch->lists, node) {
+ cnode = &stitch_node->cursor;
+
+ err = callchain_cursor_append(cursor, cnode->ip,
+ &cnode->ms,
+ cnode->branch,
+ &cnode->branch_flags,
+ cnode->nr_loop_iter,
+ cnode->iter_cycles,
+ cnode->branch_from,
+ cnode->srcline);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static struct stitch_list *get_stitch_node(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct stitch_list *stitch_node;
+
+ if (!list_empty(&lbr_stitch->free_lists)) {
+ stitch_node = list_first_entry(&lbr_stitch->free_lists,
+ struct stitch_list, node);
+ list_del(&stitch_node->node);
+
+ return stitch_node;
+ }
+
+ return malloc(sizeof(struct stitch_list));
+}
+
+static bool has_stitched_lbr(struct thread *thread,
+ struct perf_sample *cur,
+ struct perf_sample *prev,
+ unsigned int max_lbr,
+ bool callee)
+{
+ struct branch_stack *cur_stack = cur->branch_stack;
+ struct branch_entry *cur_entries = perf_sample__branch_entries(cur);
+ struct branch_stack *prev_stack = prev->branch_stack;
+ struct branch_entry *prev_entries = perf_sample__branch_entries(prev);
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ int i, j, nr_identical_branches = 0;
+ struct stitch_list *stitch_node;
+ u64 cur_base, distance;
+
+ if (!cur_stack || !prev_stack)
+ return false;
+
+ /* Find the physical index of the base-of-stack for current sample. */
+ cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1;
+
+ distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) :
+ (max_lbr + prev_stack->hw_idx - cur_base);
+ /* Previous sample has shorter stack. Nothing can be stitched. */
+ if (distance + 1 > prev_stack->nr)
+ return false;
+
+ /*
+ * Check if there are identical LBRs between two samples.
+ * Identicall LBRs must have same from, to and flags values. Also,
+ * they have to be saved in the same LBR registers (same physical
+ * index).
+ *
+ * Starts from the base-of-stack of current sample.
+ */
+ for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) {
+ if ((prev_entries[i].from != cur_entries[j].from) ||
+ (prev_entries[i].to != cur_entries[j].to) ||
+ (prev_entries[i].flags.value != cur_entries[j].flags.value))
+ break;
+ nr_identical_branches++;
+ }
+
+ if (!nr_identical_branches)
+ return false;
+
+ /*
+ * Save the LBRs between the base-of-stack of previous sample
+ * and the base-of-stack of current sample into lbr_stitch->lists.
+ * These LBRs will be stitched later.
+ */
+ for (i = prev_stack->nr - 1; i > (int)distance; i--) {
+
+ if (!lbr_stitch->prev_lbr_cursor[i].valid)
+ continue;
+
+ stitch_node = get_stitch_node(thread);
+ if (!stitch_node)
+ return false;
+
+ memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
+ sizeof(struct callchain_cursor_node));
+
+ if (callee)
+ list_add(&stitch_node->node, &lbr_stitch->lists);
+ else
+ list_add_tail(&stitch_node->node, &lbr_stitch->lists);
+ }
+
+ return true;
+}
+
static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
{
if (thread->lbr_stitch)
@@ -2361,6 +2474,9 @@ static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
if (!thread->lbr_stitch->prev_lbr_cursor)
goto free_lbr_stitch;
+ INIT_LIST_HEAD(&thread->lbr_stitch->lists);
+ INIT_LIST_HEAD(&thread->lbr_stitch->free_lists);
+
return true;
free_lbr_stitch:
@@ -2386,9 +2502,11 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
int max_stack,
unsigned int max_lbr)
{
+ bool callee = (callchain_param.order == ORDER_CALLEE);
struct ip_callchain *chain = sample->callchain;
int chain_nr = min(max_stack, (int)chain->nr), i;
struct lbr_stitch *lbr_stitch;
+ bool stitched_lbr = false;
u64 branch_from = 0;
int err;
@@ -2405,10 +2523,18 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
(max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
lbr_stitch = thread->lbr_stitch;
+ stitched_lbr = has_stitched_lbr(thread, sample,
+ &lbr_stitch->prev_sample,
+ max_lbr, callee);
+
+ if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
+ list_replace_init(&lbr_stitch->lists,
+ &lbr_stitch->free_lists);
+ }
memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
}
- if (callchain_param.order == ORDER_CALLEE) {
+ if (callee) {
/* Add kernel ip */
err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
parent, root_al, branch_from,
@@ -2421,7 +2547,18 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
if (err)
goto error;
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+ if (err)
+ goto error;
+ }
+
} else {
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+ if (err)
+ goto error;
+ }
err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
root_al, &branch_from, false);
if (err)
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 8d0da260c84c..665e5c0618ed 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -454,3 +454,25 @@ int thread__memcpy(struct thread *thread, struct machine *machine,
return dso__data_read_offset(al.map->dso, machine, offset, buf, len);
}
+
+void thread__free_stitch_list(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct stitch_list *pos, *tmp;
+
+ if (!lbr_stitch)
+ return;
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ zfree(&lbr_stitch->prev_lbr_cursor);
+ zfree(&thread->lbr_stitch);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 8456174a52c5..b066fb30d203 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -5,7 +5,6 @@
#include <linux/refcount.h>
#include <linux/rbtree.h>
#include <linux/list.h>
-#include <linux/zalloc.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
@@ -24,6 +23,8 @@ struct thread_stack;
struct unwind_libunwind_ops;
struct lbr_stitch {
+ struct list_head lists;
+ struct list_head free_lists;
struct perf_sample prev_sample;
struct callchain_cursor_node *prev_lbr_cursor;
};
@@ -154,15 +155,6 @@ static inline bool thread__is_filtered(struct thread *thread)
return false;
}
-static inline void thread__free_stitch_list(struct thread *thread)
-{
- struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
-
- if (!lbr_stitch)
- return;
-
- zfree(&lbr_stitch->prev_lbr_cursor);
- zfree(&thread->lbr_stitch);
-}
+void thread__free_stitch_list(struct thread *thread);
#endif /* __PERF_THREAD_H */