]> git.baikalelectronics.ru Git - kernel.git/commitdiff
btrfs: ensure relocation never runs while we have send operations running
authorFilipe Manana <fdmanana@suse.com>
Mon, 21 Jun 2021 10:10:38 +0000 (11:10 +0100)
committerDavid Sterba <dsterba@suse.com>
Tue, 22 Jun 2021 12:11:58 +0000 (14:11 +0200)
Relocation and send do not play well together because while send is
running a block group can be relocated, a transaction committed and
the respective disk extents get re-allocated and written to or discarded
while send is about to do something with the extents.

This was explained in commit 9e967495e0e0ae ("Btrfs: prevent send failures
and crashes due to concurrent relocation"), which prevented balance and
send from running in parallel but it did not address one remaining case
where chunk relocation can happen: shrinking a device (and device deletion
which shrinks a device's size to 0 before deleting the device).

We also have now one more case where relocation is triggered: on zoned
filesystems partially used block groups get relocated by a background
thread, introduced in commit 18bb8bbf13c183 ("btrfs: zoned: automatically
reclaim zones").

So make sure that instead of preventing balance from running when there
are ongoing send operations, we prevent relocation from happening.
This uses the infrastructure recently added by a patch that has the
subject: "btrfs: add cancellable chunk relocation support".

Also it adds a spinlock used exclusively for the exclusivity between
send and relocation, as before fs_info->balance_mutex was used, which
would make an attempt to run send to block waiting for balance to
finish, which can take a lot of time on large filesystems.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/block-group.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/relocation.c
fs/btrfs/send.c
fs/btrfs/volumes.c

index c42b6528552ffb50171fabdc42152f7d92f2f1c0..024a1c6e5b4022703d8078e05691200ec9dc1618 100644 (file)
@@ -1491,7 +1491,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
                container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
        struct btrfs_block_group *bg;
        struct btrfs_space_info *space_info;
-       int ret;
+       LIST_HEAD(again_list);
 
        if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
                return;
@@ -1502,6 +1502,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
        mutex_lock(&fs_info->reclaim_bgs_lock);
        spin_lock(&fs_info->unused_bgs_lock);
        while (!list_empty(&fs_info->reclaim_bgs)) {
+               int ret = 0;
+
                bg = list_first_entry(&fs_info->reclaim_bgs,
                                      struct btrfs_block_group,
                                      bg_list);
@@ -1547,9 +1549,13 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
                                  bg->start);
 
 next:
-               btrfs_put_block_group(bg);
                spin_lock(&fs_info->unused_bgs_lock);
+               if (ret == -EAGAIN && list_empty(&bg->bg_list))
+                       list_add_tail(&bg->bg_list, &again_list);
+               else
+                       btrfs_put_block_group(bg);
        }
+       list_splice_tail(&again_list, &fs_info->reclaim_bgs);
        spin_unlock(&fs_info->unused_bgs_lock);
        mutex_unlock(&fs_info->reclaim_bgs_lock);
        btrfs_exclop_finish(fs_info);
index c80d3be148a53b7b65645334053fd5d1eee7a52e..15d17e12c5de33146d8a4ca7fa71157c5ab74b39 100644 (file)
@@ -561,13 +561,13 @@ enum {
        /*
         * Indicate that balance has been set up from the ioctl and is in the
         * main phase. The fs_info::balance_ctl is initialized.
-        * Set and cleared while holding fs_info::balance_mutex.
         */
        BTRFS_FS_BALANCE_RUNNING,
 
        /*
         * Indicate that relocation of a chunk has started, it's set per chunk
         * and is toggled between chunks.
+        * Set, tested and cleared while holding fs_info::send_reloc_lock.
         */
        BTRFS_FS_RELOC_RUNNING,
 
@@ -995,9 +995,10 @@ struct btrfs_fs_info {
 
        struct crypto_shash *csum_shash;
 
+       spinlock_t send_reloc_lock;
        /*
         * Number of send operations in progress.
-        * Updated while holding fs_info::balance_mutex.
+        * Updated while holding fs_info::send_reloc_lock.
         */
        int send_in_progress;
 
index 6eb0010f9c7e1f3b43de19943c0a5a76d54aa3f4..4621120b6bc721688bf860e3467fcb8644251f9f 100644 (file)
@@ -2999,6 +2999,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
        spin_lock_init(&fs_info->swapfile_pins_lock);
        fs_info->swapfile_pins = RB_ROOT;
 
+       spin_lock_init(&fs_info->send_reloc_lock);
        fs_info->send_in_progress = 0;
 
        fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH;
index 420a89869889549511d79bad38bb65742ec9ccef..fc831597cb22e8f786913edc3fa8005defa99466 100644 (file)
@@ -3789,14 +3789,25 @@ out:
  *   0             success
  *   -EINPROGRESS  operation is already in progress, that's probably a bug
  *   -ECANCELED    cancellation request was set before the operation started
+ *   -EAGAIN       can not start because there are ongoing send operations
  */
 static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
 {
+       spin_lock(&fs_info->send_reloc_lock);
+       if (fs_info->send_in_progress) {
+               btrfs_warn_rl(fs_info,
+"cannot run relocation while send operations are in progress (%d in progress)",
+                             fs_info->send_in_progress);
+               spin_unlock(&fs_info->send_reloc_lock);
+               return -EAGAIN;
+       }
        if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
                /* This should not happen */
+               spin_unlock(&fs_info->send_reloc_lock);
                btrfs_err(fs_info, "reloc already running, cannot start");
                return -EINPROGRESS;
        }
+       spin_unlock(&fs_info->send_reloc_lock);
 
        if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
                btrfs_info(fs_info, "chunk relocation canceled on start");
@@ -3818,7 +3829,9 @@ static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
        /* Requested after start, clear bit first so any waiters can continue */
        if (atomic_read(&fs_info->reloc_cancel_req) > 0)
                btrfs_info(fs_info, "chunk relocation canceled during operation");
+       spin_lock(&fs_info->send_reloc_lock);
        clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
+       spin_unlock(&fs_info->send_reloc_lock);
        atomic_set(&fs_info->reloc_cancel_req, 0);
 }
 
index 6e69302828ef2889f884b01365d5436a232891ff..37e502b09a80b435bc5c319b45afa3f328eef5d9 100644 (file)
@@ -7416,23 +7416,23 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
        if (ret)
                goto out;
 
-       mutex_lock(&fs_info->balance_mutex);
-       if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
-               mutex_unlock(&fs_info->balance_mutex);
+       spin_lock(&fs_info->send_reloc_lock);
+       if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
+               spin_unlock(&fs_info->send_reloc_lock);
                btrfs_warn_rl(fs_info,
-               "cannot run send because a balance operation is in progress");
+               "cannot run send because a relocation operation is in progress");
                ret = -EAGAIN;
                goto out;
        }
        fs_info->send_in_progress++;
-       mutex_unlock(&fs_info->balance_mutex);
+       spin_unlock(&fs_info->send_reloc_lock);
 
        current->journal_info = BTRFS_SEND_TRANS_STUB;
        ret = send_subvol(sctx);
        current->journal_info = NULL;
-       mutex_lock(&fs_info->balance_mutex);
+       spin_lock(&fs_info->send_reloc_lock);
        fs_info->send_in_progress--;
-       mutex_unlock(&fs_info->balance_mutex);
+       spin_unlock(&fs_info->send_reloc_lock);
        if (ret < 0)
                goto out;
 
index 582695cee9d133e3b485baf3d86893f304895dfe..782e16795bc40c7bb3afa9e435cf750eb69441c5 100644 (file)
@@ -4217,14 +4217,6 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
                                btrfs_bg_type_to_raid_name(data_target));
        }
 
-       if (fs_info->send_in_progress) {
-               btrfs_warn_rl(fs_info,
-"cannot run balance while send operations are in progress (%d in progress)",
-                             fs_info->send_in_progress);
-               ret = -EAGAIN;
-               goto out;
-       }
-
        ret = insert_balance_item(fs_info, bctl);
        if (ret && ret != -EEXIST)
                goto out;