summaryrefslogtreecommitdiff
path: root/kernel/trace/trace_stack.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/trace_stack.c')
-rw-r--r--kernel/trace/trace_stack.c112
1 files changed, 112 insertions, 0 deletions
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 5d16f73898db..ec9a34a97129 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -53,6 +53,104 @@ static void print_max_stack(void)
}
}
+/*
+ * The stack tracer looks for a maximum stack at each call from a function. It
+ * registers a callback from ftrace, and in that callback it examines the stack
+ * size. It determines the stack size from the variable passed in, which is the
+ * address of a local variable in the stack_trace_call() callback function.
+ * The stack size is calculated by the address of the local variable to the top
+ * of the current stack. If that size is smaller than the currently saved max
+ * stack size, nothing more is done.
+ *
+ * If the size of the stack is greater than the maximum recorded size, then the
+ * following algorithm takes place.
+ *
+ * For architectures (like x86) that store the function's return address before
+ * saving the function's local variables, the stack will look something like
+ * this:
+ *
+ * [ top of stack ]
+ * 0: sys call entry frame
+ * 10: return addr to entry code
+ * 11: start of sys_foo frame
+ * 20: return addr to sys_foo
+ * 21: start of kernel_func_bar frame
+ * 30: return addr to kernel_func_bar
+ * 31: [ do trace stack here ]
+ *
+ * The save_stack_trace() is called returning all the functions it finds in the
+ * current stack. Which would be (from the bottom of the stack to the top):
+ *
+ * return addr to kernel_func_bar
+ * return addr to sys_foo
+ * return addr to entry code
+ *
+ * Now to figure out how much each of these functions' local variable size is,
+ * a search of the stack is made to find these values. When a match is made, it
+ * is added to the stack_dump_trace[] array. The offset into the stack is saved
+ * in the stack_trace_index[] array. The above example would show:
+ *
+ * stack_dump_trace[] | stack_trace_index[]
+ * ------------------ + -------------------
+ * return addr to kernel_func_bar | 30
+ * return addr to sys_foo | 20
+ * return addr to entry | 10
+ *
+ * The print_max_stack() function above, uses these values to print the size of
+ * each function's portion of the stack.
+ *
+ * for (i = 0; i < nr_entries; i++) {
+ * size = i == nr_entries - 1 ? stack_trace_index[i] :
+ * stack_trace_index[i] - stack_trace_index[i+1]
+ * print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]);
+ * }
+ *
+ * The above shows
+ *
+ * depth size location
+ * ----- ---- --------
+ * 0 30 10 kernel_func_bar
+ * 1 20 10 sys_foo
+ * 2 10 10 entry code
+ *
+ * Now for architectures that might save the return address after the functions
+ * local variables (saving the link register before calling nested functions),
+ * this will cause the stack to look a little different:
+ *
+ * [ top of stack ]
+ * 0: sys call entry frame
+ * 10: start of sys_foo_frame
+ * 19: return addr to entry code << lr saved before calling kernel_func_bar
+ * 20: start of kernel_func_bar frame
+ * 29: return addr to sys_foo_frame << lr saved before calling next function
+ * 30: [ do trace stack here ]
+ *
+ * Although the functions returned by save_stack_trace() may be the same, the
+ * placement in the stack will be different. Using the same algorithm as above
+ * would yield:
+ *
+ * stack_dump_trace[] | stack_trace_index[]
+ * ------------------ + -------------------
+ * return addr to kernel_func_bar | 30
+ * return addr to sys_foo | 29
+ * return addr to entry | 19
+ *
+ * Where the mapping is off by one:
+ *
+ * kernel_func_bar stack frame size is 29 - 19 not 30 - 29!
+ *
+ * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the
+ * values in stack_trace_index[] are shifted by one to and the number of
+ * stack trace entries is decremented by one.
+ *
+ * stack_dump_trace[] | stack_trace_index[]
+ * ------------------ + -------------------
+ * return addr to kernel_func_bar | 29
+ * return addr to sys_foo | 19
+ *
+ * Although the entry function is not displayed, the first function (sys_foo)
+ * will still include the stack size of it.
+ */
static void check_stack(unsigned long ip, unsigned long *stack)
{
unsigned long this_size, flags; unsigned long *p, *top, *start;
@@ -158,6 +256,20 @@ static void check_stack(unsigned long ip, unsigned long *stack)
i++;
}
+#ifdef ARCH_FTRACE_SHIFT_STACK_TRACER
+ /*
+ * Some archs will store the link register before calling
+ * nested functions. This means the saved return address
+ * comes after the local storage, and we need to shift
+ * for that.
+ */
+ if (x > 1) {
+ memmove(&stack_trace_index[0], &stack_trace_index[1],
+ sizeof(stack_trace_index[0]) * (x - 1));
+ x--;
+ }
+#endif
+
stack_trace_nr_entries = x;
if (task_stack_end_corrupted(current)) {