aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/btrfs_inode.h9
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/inode.c15
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/reflink.c15
-rw-r--r--fs/btrfs/space-info.c2
7 files changed, 40 insertions, 8 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index c974f7fe614a..8de4bf8edb9c 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -42,6 +42,15 @@ enum {
* to an inode.
*/
BTRFS_INODE_NO_XATTRS,
+ /*
+ * Set when we are in a context where we need to start a transaction and
+ * have dirty pages with the respective file range locked. This is to
+ * ensure that when reserving space for the transaction, if we are low
+ * on available space and need to flush delalloc, we will not flush
+ * delalloc for this inode, because that could result in a deadlock (on
+ * the file range, inode's io_tree).
+ */
+ BTRFS_INODE_NO_DELALLOC_FLUSH,
};
/* in memory btrfs inode */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 62461239600f..e01545538e07 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3001,7 +3001,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr);
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
+ bool in_reclaim_context);
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
unsigned int extra_bits,
struct extent_state **cached_state);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 10638537b9ef..d29780463182 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -703,7 +703,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
- ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
+ ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7e8d8169779d..07479250221d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9389,7 +9389,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot)
+static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot,
+ bool in_reclaim_context)
{
struct btrfs_inode *binode;
struct inode *inode;
@@ -9410,6 +9411,11 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot
list_move_tail(&binode->delalloc_inodes,
&root->delalloc_inodes);
+
+ if (in_reclaim_context &&
+ test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags))
+ continue;
+
inode = igrab(&binode->vfs_inode);
if (!inode) {
cond_resched_lock(&root->delalloc_lock);
@@ -9463,10 +9469,11 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
- return start_delalloc_inodes(root, &nr, true);
+ return start_delalloc_inodes(root, &nr, true, false);
}
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
+ bool in_reclaim_context)
{
struct btrfs_root *root;
struct list_head splice;
@@ -9489,7 +9496,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
- ret = start_delalloc_inodes(root, &nr, false);
+ ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context);
btrfs_put_root(root);
if (ret < 0)
goto out;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e8ca229a216b..bd46e107f955 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4940,7 +4940,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SYNC: {
int ret;
- ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
+ ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
if (ret)
return ret;
ret = btrfs_sync_fs(inode->i_sb, 1);
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 99aa87c08912..a646af95dd10 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -89,6 +89,19 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
if (ret)
goto out_unlock;
+ /*
+ * After dirtying the page our caller will need to start a transaction,
+ * and if we are low on metadata free space, that can cause flushing of
+ * delalloc for all inodes in order to get metadata space released.
+ * However we are holding the range locked for the whole duration of
+ * the clone/dedupe operation, so we may deadlock if that happens and no
+ * other task releases enough space. So mark this inode as not being
+ * possible to flush to avoid such deadlock. We will clear that flag
+ * when we finish cloning all extents, since a transaction is started
+ * after finding each extent to clone.
+ */
+ set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
+
if (comp_type == BTRFS_COMPRESS_NONE) {
char *map;
@@ -547,6 +560,8 @@ process_slot:
out:
btrfs_free_path(path);
kvfree(buf);
+ clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags);
+
return ret;
}
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 64099565ab8f..67e55c5479b8 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -532,7 +532,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
loops = 0;
while ((delalloc_bytes || dio_bytes) && loops < 3) {
- btrfs_start_delalloc_roots(fs_info, items);
+ btrfs_start_delalloc_roots(fs_info, items, true);
loops++;
if (wait_ordered && !trans) {