aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2021-01-15 16:26:17 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2021-02-03 23:28:40 +0100
commit2175bf57dc9522c58d93dcd474758434a3f05c57 (patch)
treef200ae2b37066181df9ebc2efc951e3b19c0780e
parentf343bf1aaf5517683d77be1f78da0d7117770464 (diff)
downloadkernel_replicant_linux-2175bf57dc9522c58d93dcd474758434a3f05c57.tar.gz
kernel_replicant_linux-2175bf57dc9522c58d93dcd474758434a3f05c57.tar.bz2
kernel_replicant_linux-2175bf57dc9522c58d93dcd474758434a3f05c57.zip
btrfs: fix possible free space tree corruption with online conversion
commit 2f96e40212d435b328459ba6b3956395eed8fa9f upstream. While running btrfs/011 in a loop I would often ASSERT() while trying to add a new free space entry that already existed, or get an EEXIST while adding a new block to the extent tree, which is another indication of double allocation. This occurs because when we do the free space tree population, we create the new root and then populate the tree and commit the transaction. The problem is when you create a new root, the root node and commit root node are the same. During this initial transaction commit we will run all of the delayed refs that were paused during the free space tree generation, and thus begin to cache block groups. While caching block groups the caching thread will be reading from the main root for the free space tree, so as we make allocations we'll be changing the free space tree, which can cause us to add the same range twice which results in either the ASSERT(ret != -EEXIST); in __btrfs_add_free_space, or in a variety of different errors when running delayed refs because of a double allocation. Fix this by marking the fs_info as unsafe to load the free space tree, and fall back on the old slow method. We could be smarter than this, for example caching the block group while we're populating the free space tree, but since this is a serious problem I've opted for the simplest solution. CC: stable@vger.kernel.org # 4.9+ Fixes: a5ed91828518 ("Btrfs: implement the free space B-tree") Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--fs/btrfs/block-group.c10
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/free-space-tree.c10
3 files changed, 21 insertions, 2 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index cef2f080fdcd..a2111eab614f 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -639,7 +639,15 @@ static noinline void caching_thread(struct btrfs_work *work)
mutex_lock(&caching_ctl->mutex);
down_read(&fs_info->commit_root_sem);
- if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+ /*
+ * If we are in the transaction that populated the free space tree we
+ * can't actually cache from the free space tree as our commit root and
+ * real root are the same, so we could change the contents of the blocks
+ * while caching. Instead do the slow caching in this case, and after
+ * the transaction has committed we will be safe.
+ */
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+ !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags)))
ret = load_free_space_tree(caching_ctl);
else
ret = load_extent_tree_free(caching_ctl);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e01545538e07..30ea9780725f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -146,6 +146,9 @@ enum {
BTRFS_FS_STATE_DEV_REPLACING,
/* The btrfs_fs_info created for self-tests */
BTRFS_FS_STATE_DUMMY_FS_INFO,
+
+ /* Indicate that we can't trust the free space tree for caching yet */
+ BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED,
};
#define BTRFS_BACKREF_REV_MAX 256
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 6b9faf3b0e96..6cf2f7bb30c2 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1152,6 +1152,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
return PTR_ERR(trans);
set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
free_space_root = btrfs_create_tree(trans,
BTRFS_FREE_SPACE_TREE_OBJECTID);
if (IS_ERR(free_space_root)) {
@@ -1173,11 +1174,18 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ ret = btrfs_commit_transaction(trans);
- return btrfs_commit_transaction(trans);
+ /*
+ * Now that we've committed the transaction any reading of our commit
+ * root will be safe, so we can cache from the free space tree now.
+ */
+ clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+ return ret;
abort:
clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;