diff options
author | Qu Wenruo <wqu@suse.com> | 2020-06-16 10:17:36 +0800 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2020-07-27 12:55:38 +0200 |
commit | 2dfb1e43f57dd3aeaa66f7cf05d068db2d4c8788 (patch) | |
tree | e05cf5303cdb46912405f2f165e24f1f54bf1980 /fs/btrfs/disk-io.c | |
parent | 082b6c970f02fefd278c7833880cda29691a5f34 (diff) |
btrfs: preallocate anon block device at first phase of snapshot creation
[BUG]
When the anonymous block device pool is exhausted, subvolume/snapshot
creation fails with EMFILE (Too many files open). This has been reported
by a user. The allocation happens in the second phase during transaction
commit where it's only way out is to abort the transaction
BTRFS: Transaction aborted (error -24)
WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs]
RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs]
Call Trace:
create_pending_snapshots+0x82/0xa0 [btrfs]
btrfs_commit_transaction+0x275/0x8c0 [btrfs]
btrfs_mksubvol+0x4b9/0x500 [btrfs]
btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs]
btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs]
btrfs_ioctl+0x11a4/0x2da0 [btrfs]
do_vfs_ioctl+0xa9/0x640
ksys_ioctl+0x67/0x90
__x64_sys_ioctl+0x1a/0x20
do_syscall_64+0x5a/0x110
entry_SYSCALL_64_after_hwframe+0x44/0xa9
---[ end trace 33f2f83f3d5250e9 ]---
BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown
BTRFS info (device sda1): forced readonly
BTRFS warning (device sda1): Skipping commit of aborted transaction.
BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown
[CAUSE]
When the global anonymous block device pool is exhausted, the following
call chain will fail, and lead to transaction abort:
btrfs_ioctl_snap_create_v2()
|- btrfs_ioctl_snap_create_transid()
|- btrfs_mksubvol()
|- btrfs_commit_transaction()
|- create_pending_snapshot()
|- btrfs_get_fs_root()
|- btrfs_init_fs_root()
|- get_anon_bdev()
[FIX]
Although we can't enlarge the anonymous block device pool, at least we
can preallocate anon_dev for subvolume/snapshot in the first phase,
outside of transaction context and exactly at the moment the user calls
the creation ioctl.
Reported-by: Greed Rong <greedrong@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+=tHJX7nb9DPw@mail.gmail.com/
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 71 |
1 files changed, 64 insertions, 7 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c90edf04a9db..9b307034d32c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1391,7 +1391,12 @@ alloc_fail: goto out; } -static int btrfs_init_fs_root(struct btrfs_root *root) +/* + * Initialize subvolume root in-memory structure + * + * @anon_dev: anonymous device to attach to the root, if zero, allocate new + */ +static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) { int ret; unsigned int nofs_flag; @@ -1430,9 +1435,13 @@ static int btrfs_init_fs_root(struct btrfs_root *root) */ if (is_fstree(root->root_key.objectid) && btrfs_root_refs(&root->root_item) > 0) { - ret = get_anon_bdev(&root->anon_dev); - if (ret) - goto fail; + if (!anon_dev) { + ret = get_anon_bdev(&root->anon_dev); + if (ret) + goto fail; + } else { + root->anon_dev = anon_dev; + } } mutex_lock(&root->objectid_mutex); @@ -1537,8 +1546,27 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) } -struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, bool check_ref) +/* + * Get an in-memory reference of a root structure. + * + * For essential trees like root/extent tree, we grab it from fs_info directly. + * For subvolume trees, we check the cached filesystem roots first. If not + * found, then read it from disk and add it to cached fs roots. + * + * Caller should release the root by calling btrfs_put_root() after the usage. + * + * NOTE: Reloc and log trees can't be read by this function as they share the + * same root objectid. + * + * @objectid: root id + * @anon_dev: preallocated anonymous block device number for new roots, + * pass 0 for new allocation. + * @check_ref: whether to check root item references, If true, return -ENOENT + * for orphan roots + */ +static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, + u64 objectid, dev_t anon_dev, + bool check_ref) { struct btrfs_root *root; struct btrfs_path *path; @@ -1567,6 +1595,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { + /* Shouldn't get preallocated anon_dev for cached roots */ + ASSERT(!anon_dev); if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); @@ -1586,7 +1616,7 @@ again: goto fail; } - ret = btrfs_init_fs_root(root); + ret = btrfs_init_fs_root(root, anon_dev); if (ret) goto fail; @@ -1619,6 +1649,33 @@ fail: return ERR_PTR(ret); } +/* + * Get in-memory reference of a root structure + * + * @objectid: tree objectid + * @check_ref: if set, verify that the tree exists and the item has at least + * one reference + */ +struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, + u64 objectid, bool check_ref) +{ + return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); +} + +/* + * Get in-memory reference of a root structure, created as new, optionally pass + * the anonymous block device id + * + * @objectid: tree objectid + * @anon_dev: if zero, allocate a new anonymous block device or use the + * parameter value + */ +struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, + u64 objectid, dev_t anon_dev) +{ + return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); +} + static int btrfs_congested_fn(void *congested_data, int bdi_bits) { struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; |