7 files changed, 51 insertions, 93 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 786849fcc319..47f66c6a7d7f 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -3370,6 +3370,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 			    space_info->bytes_reserved > 0 ||
 			    space_info->bytes_may_use > 0))
 			btrfs_dump_space_info(info, space_info, 0, 0);
+		WARN_ON(space_info->reclaim_size > 0);
 		list_del(&space_info->list);
 		btrfs_sysfs_remove_space_info(space_info);
 	}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8a144f9cb7ac..719e68ab552c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2098,6 +2098,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	atomic_inc(&root->log_batch);
 
 	/*
+	 * If the inode needs a full sync, make sure we use a full range to
+	 * avoid log tree corruption, due to hole detection racing with ordered
+	 * extent completion for adjacent ranges and races between logging and
+	 * completion of ordered extents for adjancent ranges - both races
+	 * could lead to file extent items in the log with overlapping ranges.
+	 * Do this while holding the inode lock, to avoid races with other
+	 * tasks.
+	 */
+	if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+		     &BTRFS_I(inode)->runtime_flags)) {
+		start = 0;
+		end = LLONG_MAX;
+	}
+
+	/*
 	 * Before we acquired the inode's lock, someone may have dirtied more
 	 * pages in the target range. We need to make sure that writeback for
 	 * any such pages does not start while we are logging the inode, because
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index d1973141d3bb..040009d1cc31 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -264,6 +264,7 @@ copy_inline_extent:
 			    size);
 	inode_add_bytes(dst, datal);
 	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
+	ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
 out:
 	if (!ret && !trans) {
 		/*
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f65595602aa8..7e362a6935fd 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -611,8 +611,8 @@ static int should_ignore_root(struct btrfs_root *root)
 	if (!reloc_root)
 		return 0;
 
-	if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
-	    root->fs_info->running_transaction->transid - 1)
+	if (btrfs_header_generation(reloc_root->commit_root) ==
+	    root->fs_info->running_transaction->transid)
 		return 0;
 	/*
 	 * if there is reloc tree and it was created in previous
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 8b0fe053a25d..ff17a4420358 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -361,6 +361,16 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
+static void remove_ticket(struct btrfs_space_info *space_info,
+			  struct reserve_ticket *ticket)
+{
+	if (!list_empty(&ticket->list)) {
+		list_del_init(&ticket->list);
+		ASSERT(space_info->reclaim_size >= ticket->bytes);
+		space_info->reclaim_size -= ticket->bytes;
+	}
+}
+
 /*
  * This is for space we already have accounted in space_info->bytes_may_use, so
  * basically when we're returning space from block_rsv's.
@@ -388,9 +398,7 @@ again:
 			btrfs_space_info_update_bytes_may_use(fs_info,
 							      space_info,
 							      ticket->bytes);
-			list_del_init(&ticket->list);
-			ASSERT(space_info->reclaim_size >= ticket->bytes);
-			space_info->reclaim_size -= ticket->bytes;
+			remove_ticket(space_info, ticket);
 			ticket->bytes = 0;
 			space_info->tickets_id++;
 			wake_up(&ticket->wait);
@@ -899,7 +907,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
 			btrfs_info(fs_info, "failing ticket with %llu bytes",
 				   ticket->bytes);
 
-		list_del_init(&ticket->list);
+		remove_ticket(space_info, ticket);
 		ticket->error = -ENOSPC;
 		wake_up(&ticket->wait);
 
@@ -1063,7 +1071,7 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
 			 * despite getting an error, resulting in a space leak
 			 * (bytes_may_use counter of our space_info).
 			 */
-			list_del_init(&ticket->list);
+			remove_ticket(space_info, ticket);
 			ticket->error = -EINTR;
 			break;
 		}
@@ -1121,7 +1129,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
 		 * either the async reclaim job deletes the ticket from the list
 		 * or we delete it ourselves at wait_reserve_ticket().
 		 */
-		list_del_init(&ticket->list);
+		remove_ticket(space_info, ticket);
 		if (!ret)
 			ret = -ENOSPC;
 	}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 58c111474ba5..ec36a7c6ba3d 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -96,8 +96,8 @@ enum {
 static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, struct btrfs_inode *inode,
 			   int inode_only,
-			   u64 start,
-			   u64 end,
+			   const loff_t start,
+			   const loff_t end,
 			   struct btrfs_log_ctx *ctx);
 static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
@@ -4533,15 +4533,13 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
 static int btrfs_log_holes(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_inode *inode,
-			   struct btrfs_path *path,
-			   const u64 start,
-			   const u64 end)
+			   struct btrfs_path *path)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_key key;
 	const u64 ino = btrfs_ino(inode);
 	const u64 i_size = i_size_read(&inode->vfs_inode);
-	u64 prev_extent_end = start;
+	u64 prev_extent_end = 0;
 	int ret;
 
 	if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0)
@@ -4549,21 +4547,14 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
 
 	key.objectid = ino;
 	key.type = BTRFS_EXTENT_DATA_KEY;
-	key.offset = start;
+	key.offset = 0;
 
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		return ret;
 
-	if (ret > 0 && path->slots[0] > 0) {
-		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
-		if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY)
-			path->slots[0]--;
-	}
-
 	while (true) {
 		struct extent_buffer *leaf = path->nodes[0];
-		u64 extent_end;
 
 		if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
 			ret = btrfs_next_leaf(root, path);
@@ -4580,18 +4571,9 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
 		if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
 			break;
 
-		extent_end = btrfs_file_extent_end(path);
-		if (extent_end <= start)
-			goto next_slot;
-
 		/* We have a hole, log it. */
 		if (prev_extent_end < key.offset) {
-			u64 hole_len;
-
-			if (key.offset >= end)
-				hole_len = end - prev_extent_end;
-			else
-				hole_len = key.offset - prev_extent_end;
+			const u64 hole_len = key.offset - prev_extent_end;
 
 			/*
 			 * Release the path to avoid deadlocks with other code
@@ -4621,20 +4603,16 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
 			leaf = path->nodes[0];
 		}
 
-		prev_extent_end = min(extent_end, end);
-		if (extent_end >= end)
-			break;
-next_slot:
+		prev_extent_end = btrfs_file_extent_end(path);
 		path->slots[0]++;
 		cond_resched();
 	}
 
-	if (prev_extent_end < end && prev_extent_end < i_size) {
+	if (prev_extent_end < i_size) {
 		u64 hole_len;
 
 		btrfs_release_path(path);
-		hole_len = min(ALIGN(i_size, fs_info->sectorsize), end);
-		hole_len -= prev_extent_end;
+		hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize);
 		ret = btrfs_insert_file_extent(trans, root->log_root,
 					       ino, prev_extent_end, 0, 0,
 					       hole_len, 0, hole_len,
@@ -4971,8 +4949,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
 				   const u64 logged_isize,
 				   const bool recursive_logging,
 				   const int inode_only,
-				   const u64 start,
-				   const u64 end,
 				   struct btrfs_log_ctx *ctx,
 				   bool *need_log_inode_item)
 {
@@ -4981,21 +4957,6 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
 	int ins_nr = 0;
 	int ret;
 
-	/*
-	 * We must make sure we don't copy extent items that are entirely out of
-	 * the range [start, end - 1]. This is not just an optimization to avoid
-	 * copying but also needed to avoid a corruption where we end up with
-	 * file extent items in the log tree that have overlapping ranges - this
-	 * can happen if we race with ordered extent completion for ranges that
-	 * are outside our target range. For example we copy an extent item and
-	 * when we move to the next leaf, that extent was trimmed and a new one
-	 * covering a subrange of it, but with a higher key, was inserted - we
-	 * would then copy this other extent too, resulting in a log tree with
-	 * 2 extent items that represent overlapping ranges.
-	 *
-	 * We can copy the entire extents at the range bondaries however, even
-	 * if they cover an area outside the target range. That's ok.
-	 */
 	while (1) {
 		ret = btrfs_search_forward(root, min_key, path, trans->transid);
 		if (ret < 0)
@@ -5063,29 +5024,6 @@ again:
 			goto next_slot;
 		}
 
-		if (min_key->type == BTRFS_EXTENT_DATA_KEY) {
-			const u64 extent_end = btrfs_file_extent_end(path);
-
-			if (extent_end <= start) {
-				if (ins_nr > 0) {
-					ret = copy_items(trans, inode, dst_path,
-							 path, ins_start_slot,
-							 ins_nr, inode_only,
-							 logged_isize);
-					if (ret < 0)
-						return ret;
-					ins_nr = 0;
-				}
-				goto next_slot;
-			}
-			if (extent_end >= end) {
-				ins_nr++;
-				if (ins_nr == 1)
-					ins_start_slot = path->slots[0];
-				break;
-			}
-		}
-
 		if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
 			ins_nr++;
 			goto next_slot;
@@ -5151,8 +5089,8 @@ next_key:
 static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, struct btrfs_inode *inode,
 			   int inode_only,
-			   u64 start,
-			   u64 end,
+			   const loff_t start,
+			   const loff_t end,
 			   struct btrfs_log_ctx *ctx)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -5180,9 +5118,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 		return -ENOMEM;
 	}
 
-	start = ALIGN_DOWN(start, fs_info->sectorsize);
-	end = ALIGN(end, fs_info->sectorsize);
-
 	min_key.objectid = ino;
 	min_key.type = BTRFS_INODE_ITEM_KEY;
 	min_key.offset = 0;
@@ -5298,8 +5233,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
 	err = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
 				      path, dst_path, logged_isize,
-				      recursive_logging, inode_only,
-				      start, end, ctx, &need_log_inode_item);
+				      recursive_logging, inode_only, ctx,
+				      &need_log_inode_item);
 	if (err)
 		goto out_unlock;
 
@@ -5312,7 +5247,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
 		btrfs_release_path(path);
 		btrfs_release_path(dst_path);
-		err = btrfs_log_holes(trans, root, inode, path, start, end);
+		err = btrfs_log_holes(trans, root, inode, path);
 		if (err)
 			goto out_unlock;
 	}
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 8134924cfc17..e6b6cb0f8bc6 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -36,12 +36,10 @@ struct btrfs_ioctl_vol_args {
 #define BTRFS_DEVICE_PATH_NAME_MAX	1024
 #define BTRFS_SUBVOL_NAME_MAX 		4039
 
-/*
- * Deprecated since 5.7:
- *
- * BTRFS_SUBVOL_CREATE_ASYNC	(1ULL << 0)
- */
-
+#ifndef __KERNEL__
+/* Deprecated since 5.7 */
+# define BTRFS_SUBVOL_CREATE_ASYNC	(1ULL << 0)
+#endif
 #define BTRFS_SUBVOL_RDONLY		(1ULL << 1)
 #define BTRFS_SUBVOL_QGROUP_INHERIT	(1ULL << 2)