diff options
author | Rik van Riel <riel@surriel.com> | 2018-12-21 14:30:54 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-21 14:51:18 -0800 |
commit | 5eed6f1dff87bfb5e545935def3843edf42800f2 (patch) | |
tree | 0394ab2c5298d47a74ac0eb2b3ee3ebd119402dc /kernel/fork.c | |
parent | 2e83ee1d8694a61d0d95a5b694f2e61e8dde8627 (diff) |
fork,memcg: fix crash in free_thread_stack on memcg charge fail
Commit 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting") will
result in fork failing if allocating a kernel stack for a task in
dup_task_struct exceeds the kernel memory allowance for that cgroup.
Unfortunately, it also results in a crash.
This is due to the code jumping to free_stack and calling
free_thread_stack when the memcg kernel stack charge fails, but without
tsk->stack pointing at the freshly allocated stack.
This in turn results in the vfree_atomic in free_thread_stack oopsing
with a backtrace like this:
#5 [ffffc900244efc88] die at ffffffff8101f0ab
#6 [ffffc900244efcb8] do_general_protection at ffffffff8101cb86
#7 [ffffc900244efce0] general_protection at ffffffff818ff082
[exception RIP: llist_add_batch+7]
RIP: ffffffff8150d487 RSP: ffffc900244efd98 RFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff88085ef55980 RCX: 0000000000000000
RDX: ffff88085ef55980 RSI: 343834343531203a RDI: 343834343531203a
RBP: ffffc900244efd98 R8: 0000000000000001 R9: ffff8808578c3600
R10: 0000000000000000 R11: 0000000000000001 R12: ffff88029f6c21c0
R13: 0000000000000286 R14: ffff880147759b00 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#8 [ffffc900244efda0] vfree_atomic at ffffffff811df2c7
#9 [ffffc900244efdb8] copy_process at ffffffff81086e37
#10 [ffffc900244efe98] _do_fork at ffffffff810884e0
#11 [ffffc900244eff10] sys_vfork at ffffffff810887ff
#12 [ffffc900244eff20] do_syscall_64 at ffffffff81002a43
RIP: 000000000049b948 RSP: 00007ffcdb307830 RFLAGS: 00000246
RAX: ffffffffffffffda RBX: 0000000000896030 RCX: 000000000049b948
RDX: 0000000000000000 RSI: 00007ffcdb307790 RDI: 00000000005d7421
RBP: 000000000067370f R8: 00007ffcdb3077b0 R9: 000000000001ed00
R10: 0000000000000008 R11: 0000000000000246 R12: 0000000000000040
R13: 000000000000000f R14: 0000000000000000 R15: 000000000088d018
ORIG_RAX: 000000000000003a CS: 0033 SS: 002b
The simplest fix is to assign tsk->stack right where it is allocated.
Link: http://lkml.kernel.org/r/20181214231726.7ee4843c@imladris.surriel.com
Fixes: 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting")
Signed-off-by: Rik van Riel <riel@surriel.com>
Acked-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 07cddff89c7b..e2a5156bc9c3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -240,8 +240,10 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) * free_thread_stack() can be called in interrupt context, * so cache the vm_struct. */ - if (stack) + if (stack) { tsk->stack_vm_area = find_vm_area(stack); + tsk->stack = stack; + } return stack; #else struct page *page = alloc_pages_node(node, THREADINFO_GFP, @@ -288,7 +290,10 @@ static struct kmem_cache *thread_stack_cache; static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { - return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); + unsigned long *stack; + stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); + tsk->stack = stack; + return stack; } static void free_thread_stack(struct task_struct *tsk) |