From 3fe81ce206f3805e0eb5d886aabbf91064655144 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sun, 15 Dec 2013 12:43:58 +0000 Subject: Btrfs: fix deadlock when iterating inode refs and running delayed inodes While running btrfs/004 from xfstests, after 503 iterations, dmesg reported a deadlock between tasks iterating inode refs and tasks running delayed inodes (during a transaction commit). It turns out that iterating inode refs implies doing one tree search and release all nodes in the path except the leaf node, and then passing that leaf node to btrfs_ref_to_path(), which in turn does another tree search without releasing the lock on the leaf node it received as parameter. This is a problem when other task wants to write to the btree as well and ends up updating the leaf that is read locked - the writer task locks the parent of the leaf and then blocks waiting for the leaf's lock to be released - at the same time, the task executing btrfs_ref_to_path() does a second tree search, without releasing the lock on the first leaf, and wants to access a leaf (the same or another one) that is a child of the same parent, resulting in a deadlock. The trace reported by lockdep follows. [84314.936373] INFO: task fsstress:11930 blocked for more than 120 seconds. [84314.936381] Tainted: G W O 3.12.0-fdm-btrfs-next-16+ #70 [84314.936383] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84314.936386] fsstress D ffff8806e1bf8000 0 11930 11926 0x00000000 [84314.936393] ffff8804d6d89b78 0000000000000046 ffff8804d6d89b18 ffffffff810bd8bd [84314.936399] ffff8806e1bf8000 ffff8804d6d89fd8 ffff8804d6d89fd8 ffff8804d6d89fd8 [84314.936405] ffff880806308000 ffff8806e1bf8000 ffff8804d6d89c08 ffff8804deb8f190 [84314.936410] Call Trace: [84314.936421] [] ? trace_hardirqs_on+0xd/0x10 [84314.936428] [] schedule+0x29/0x70 [84314.936451] [] btrfs_tree_lock+0x75/0x270 [btrfs] [84314.936457] [] ? __init_waitqueue_head+0x60/0x60 [84314.936470] [] btrfs_search_slot+0x7f1/0x930 [btrfs] [84314.936489] [] ? __btrfs_run_delayed_items+0x13a/0x1e0 [btrfs] [84314.936504] [] btrfs_lookup_inode+0x2f/0xa0 [btrfs] [84314.936510] [] ? trace_hardirqs_on_caller+0x1f/0x1e0 [84314.936528] [] __btrfs_update_delayed_inode+0x4c/0x1d0 [btrfs] [84314.936543] [] ? __btrfs_run_delayed_items+0x13a/0x1e0 [btrfs] [84314.936558] [] ? __btrfs_run_delayed_items+0x13a/0x1e0 [btrfs] [84314.936573] [] __btrfs_run_delayed_items+0x192/0x1e0 [btrfs] [84314.936589] [] btrfs_run_delayed_items+0x13/0x20 [btrfs] [84314.936604] [] btrfs_flush_all_pending_stuffs+0x24/0x80 [btrfs] [84314.936620] [] btrfs_commit_transaction+0x223/0xa20 [btrfs] [84314.936630] [] btrfs_sync_fs+0x6e/0x110 [btrfs] [84314.936635] [] ? __sync_filesystem+0x60/0x60 [84314.936639] [] ? __sync_filesystem+0x60/0x60 [84314.936643] [] sync_fs_one_sb+0x20/0x30 [84314.936648] [] iterate_supers+0xf1/0x100 [84314.936652] [] sys_sync+0x55/0x90 [84314.936658] [] system_call_fastpath+0x16/0x1b [84314.936660] INFO: lockdep is turned off. [84314.936663] INFO: task btrfs:11955 blocked for more than 120 seconds. [84314.936666] Tainted: G W O 3.12.0-fdm-btrfs-next-16+ #70 [84314.936668] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84314.936670] btrfs D ffff880541729a88 0 11955 11608 0x00000000 [84314.936674] ffff880541729a38 0000000000000046 ffff8805417299d8 ffffffff810bd8bd [84314.936680] ffff88075430c8a0 ffff880541729fd8 ffff880541729fd8 ffff880541729fd8 [84314.936685] ffffffff81c104e0 ffff88075430c8a0 ffff8804de8b00b8 ffff8804de8b0000 [84314.936690] Call Trace: [84314.936695] [] ? trace_hardirqs_on+0xd/0x10 [84314.936700] [] schedule+0x29/0x70 [84314.936717] [] btrfs_tree_read_lock+0xd5/0x140 [btrfs] [84314.936721] [] ? __init_waitqueue_head+0x60/0x60 [84314.936733] [] btrfs_search_slot+0x7c1/0x930 [btrfs] [84314.936746] [] btrfs_find_item+0x55/0x160 [btrfs] [84314.936763] [] ? free_extent_buffer+0x49/0xc0 [btrfs] [84314.936780] [] btrfs_ref_to_path+0xba/0x1e0 [btrfs] [84314.936797] [] ? release_extent_buffer+0xb9/0xe0 [btrfs] [84314.936813] [] ? free_extent_buffer+0x49/0xc0 [btrfs] [84314.936830] [] inode_to_path+0x60/0xd0 [btrfs] [84314.936846] [] paths_from_inode+0x115/0x3c0 [btrfs] [84314.936851] [] ? kmem_cache_alloc_trace+0x114/0x200 [84314.936868] [] btrfs_ioctl+0xf14/0x2030 [btrfs] [84314.936873] [] ? _raw_spin_unlock+0x2b/0x50 [84314.936877] [] ? handle_mm_fault+0x34f/0xb00 [84314.936882] [] ? up_read+0x23/0x40 [84314.936886] [] ? __do_page_fault+0x20c/0x5a0 [84314.936892] [] do_vfs_ioctl+0x96/0x570 [84314.936896] [] ? error_sti+0x5/0x6 [84314.936901] [] ? trace_hardirqs_off_caller+0x28/0xd0 [84314.936906] [] ? retint_swapgs+0xe/0x13 [84314.936910] [] SyS_ioctl+0x91/0xb0 [84314.936915] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [84314.936920] [] system_call_fastpath+0x16/0x1b [84314.936922] INFO: lockdep is turned off. [84434.866873] INFO: task btrfs-transacti:11921 blocked for more than 120 seconds. [84434.866881] Tainted: G W O 3.12.0-fdm-btrfs-next-16+ #70 [84434.866883] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84434.866886] btrfs-transacti D ffff880755b6a478 0 11921 2 0x00000000 [84434.866893] ffff8800735b9ce8 0000000000000046 ffff8800735b9c88 ffffffff810bd8bd [84434.866899] ffff8805a1b848a0 ffff8800735b9fd8 ffff8800735b9fd8 ffff8800735b9fd8 [84434.866904] ffffffff81c104e0 ffff8805a1b848a0 ffff880755b6a478 ffff8804cece78f0 [84434.866910] Call Trace: [84434.866920] [] ? trace_hardirqs_on+0xd/0x10 [84434.866927] [] schedule+0x29/0x70 [84434.866948] [] wait_current_trans.isra.33+0xbf/0x120 [btrfs] [84434.866954] [] ? __init_waitqueue_head+0x60/0x60 [84434.866970] [] start_transaction+0x388/0x5a0 [btrfs] [84434.866985] [] ? transaction_kthread+0xb5/0x280 [btrfs] [84434.866999] [] btrfs_attach_transaction+0x17/0x20 [btrfs] [84434.867012] [] transaction_kthread+0x19e/0x280 [btrfs] [84434.867026] [] ? open_ctree+0x2260/0x2260 [btrfs] [84434.867030] [] kthread+0xed/0x100 [84434.867035] [] ? flush_kthread_worker+0x190/0x190 [84434.867040] [] ret_from_fork+0x7c/0xb0 [84434.867044] [] ? flush_kthread_worker+0x190/0x190 Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/backref.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6a3f7f50ad37..835b6c9a26a8 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1569,7 +1569,6 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, struct btrfs_key found_key; while (!ret) { - path->leave_spinning = 1; ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, &found_key); if (ret < 0) @@ -1582,9 +1581,12 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, parent = found_key.offset; slot = path->slots[0]; - eb = path->nodes[0]; - /* make sure we can use eb after releasing the path */ - atomic_inc(&eb->refs); + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); @@ -1642,9 +1644,12 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, ++found; slot = path->slots[0]; - eb = path->nodes[0]; - /* make sure we can use eb after releasing the path */ - atomic_inc(&eb->refs); + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); -- cgit v1.2.3