diff options
-rw-r--r-- | arch/nds32/kernel/perf_event_cpu.c | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index a6e723d0fdbc..5e00ce54d0ff 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1193,6 +1193,305 @@ static int __init register_pmu_driver(void) device_initcall(register_pmu_driver); +/* + * References: arch/nds32/kernel/traps.c:__dump() + * You will need to know the NDS ABI first. + */ +static int unwind_frame_kernel(struct stackframe *frame) +{ + int graph = 0; +#ifdef CONFIG_FRAME_POINTER + /* 0x3 means misalignment */ + if (!kstack_end((void *)frame->fp) && + !((unsigned long)frame->fp & 0x3) && + ((unsigned long)frame->fp >= TASK_SIZE)) { + /* + * The array index is based on the ABI, the below graph + * illustrate the reasons. + * Function call procedure: "smw" and "lmw" will always + * update SP and FP for you automatically. + * + * Stack Relative Address + * | | 0 + * ---- + * |LP| <-- SP(before smw) <-- FP(after smw) -1 + * ---- + * |FP| -2 + * ---- + * | | <-- SP(after smw) -3 + */ + frame->lp = ((unsigned long *)frame->fp)[-1]; + frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (NULL, &graph, frame->lp, NULL); + + return 0; + } else { + return -EPERM; + } +#else + /* + * You can refer to arch/nds32/kernel/traps.c:__dump() + * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". + * And, the "sp" is not always correct. + * + * Stack Relative Address + * | | 0 + * ---- + * |LP| <-- SP(before smw) -1 + * ---- + * | | <-- SP(after smw) -2 + * ---- + */ + if (!kstack_end((void *)frame->sp)) { + frame->lp = ((unsigned long *)frame->sp)[1]; + /* TODO: How to deal with the value in first + * "sp" is not correct? + */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (tsk, &graph, frame->lp, NULL); + + frame->sp = ((unsigned long *)frame->sp) + 1; + + return 0; + } else { + return -EPERM; + } +#endif +} + +static void notrace +walk_stackframe(struct stackframe *frame, + int (*fn_record)(struct stackframe *, void *), + void *data) +{ + while (1) { + int ret; + + if (fn_record(frame, data)) + break; + + ret = unwind_frame_kernel(frame); + if (ret < 0) + break; + } +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int callchain_trace(struct stackframe *fr, void *data) +{ + struct perf_callchain_entry_ctx *entry = data; + + perf_callchain_store(entry, fr->lp); + return 0; +} + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long +user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) +{ + struct frame_tail buftail; + unsigned long lp = 0; + unsigned long *user_frame_tail = + (unsigned long *)(fp - (unsigned long)sizeof(buftail)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic + (&buftail, user_frame_tail, sizeof(buftail))) + return 0; + + /* + * Refer to unwind_frame_kernel() for more illurstration + */ + lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ + fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ + perf_callchain_store(entry, lp); + return fp; +} + +static unsigned long +user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry, + unsigned long fp) +{ + struct frame_tail_opt_size buftail; + unsigned long lp = 0; + + unsigned long *user_frame_tail = + (unsigned long *)(fp - (unsigned long)sizeof(buftail)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic + (&buftail, user_frame_tail, sizeof(buftail))) + return 0; + + /* + * Refer to unwind_frame_kernel() for more illurstration + */ + lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ + fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ + + perf_callchain_store(entry, lp); + return fp; +} + +/* + * This will be called when the target is in user mode + * This function will only be called when we use + * "PERF_SAMPLE_CALLCHAIN" in + * kernel/events/core.c:perf_prepare_sample() + * + * How to trigger perf_callchain_[user/kernel] : + * $ perf record -e cpu-clock --call-graph fp ./program + * $ perf report --call-graph + */ +unsigned long leaf_fp; +void +perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long fp = 0; + unsigned long gp = 0; + unsigned long lp = 0; + unsigned long sp = 0; + unsigned long *user_frame_tail; + + leaf_fp = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->ipc); + fp = regs->fp; + gp = regs->gp; + lp = regs->lp; + sp = regs->sp; + if (entry->nr < PERF_MAX_STACK_DEPTH && + (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) { + user_frame_tail = + (unsigned long *)(fp - (unsigned long)sizeof(fp)); + + if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(fp))) + return; + + if (__copy_from_user_inatomic + (&leaf_fp, user_frame_tail, sizeof(fp))) + return; + + if (leaf_fp == lp) { + /* + * Maybe this is non leaf function + * with optimize for size, + * or maybe this is the function + * with optimize for size + */ + struct frame_tail buftail; + + user_frame_tail = + (unsigned long *)(fp - + (unsigned long)sizeof(buftail)); + + if (!access_ok + (VERIFY_READ, user_frame_tail, sizeof(buftail))) + return; + + if (__copy_from_user_inatomic + (&buftail, user_frame_tail, sizeof(buftail))) + return; + + if (buftail.stack_fp == gp) { + /* non leaf function with optimize + * for size condition + */ + struct frame_tail_opt_size buftail_opt_size; + + user_frame_tail = + (unsigned long *)(fp - (unsigned long) + sizeof(buftail_opt_size)); + + if (!access_ok(VERIFY_READ, user_frame_tail, + sizeof(buftail_opt_size))) + return; + + if (__copy_from_user_inatomic + (&buftail_opt_size, user_frame_tail, + sizeof(buftail_opt_size))) + return; + + perf_callchain_store(entry, lp); + fp = buftail_opt_size.stack_fp; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + (unsigned long)fp && + !((unsigned long)fp & 0x7) && + fp > sp) { + sp = fp; + fp = user_backtrace_opt_size(entry, fp); + } + + } else { + /* this is the function + * without optimize for size + */ + fp = buftail.stack_fp; + perf_callchain_store(entry, lp); + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + (unsigned long)fp && + !((unsigned long)fp & 0x7) && + fp > sp) { + sp = fp; + fp = user_backtrace(entry, fp); + } + } + } else { + /* this is leaf function */ + fp = leaf_fp; + perf_callchain_store(entry, lp); + + /* previous function callcahin */ + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + (unsigned long)fp && + !((unsigned long)fp & 0x7) && fp > sp) { + sp = fp; + fp = user_backtrace(entry, fp); + } + } + return; + } +} + +/* This will be called when the target is in kernel mode */ +void +perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct stackframe fr; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + fr.fp = regs->fp; + fr.lp = regs->lp; + fr.sp = regs->sp; + walk_stackframe(&fr, callchain_trace, entry); +} + unsigned long perf_instruction_pointer(struct pt_regs *regs) { /* However, NDS32 does not support virtualization */ |