btrfs: wait on async extents when flushing delalloc

author Josef Bacik <josef@toxicpanda.com>

Wed, 14 Jul 2021 18:47:21 +0000 (14:47 -0400)

committer David Sterba <dsterba@suse.com>

Mon, 23 Aug 2021 11:19:07 +0000 (13:19 +0200)
author Josef Bacik <josef@toxicpanda.com>
Wed, 14 Jul 2021 18:47:21 +0000 (14:47 -0400)
committer David Sterba <dsterba@suse.com>
Mon, 23 Aug 2021 11:19:07 +0000 (13:19 +0200)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 73b6413cc9aff9f0f5d658d41004fc8645db8a44..25eb214f56ac735312e18282d6b2298329d3399d 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9879,10 +9879,6 @@ static int start_delalloc_inodes(struct btrfs_root *root,
                                          &work->work);
                 } else {
                         ret = sync_inode(inode, wbc);
-                       if (!ret &&
-                           test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-                                    &BTRFS_I(inode)->runtime_flags))
-                               ret = sync_inode(inode, wbc);
                         btrfs_add_delayed_iput(inode);
                         if (ret || wbc->nr_to_write <= 0)
                                 goto out;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c

index eb90a262563f68707a705817509d5901357ebec5..d9c8d738678f0d91a00c90424f1733e75627c2fc 100644 (file)
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -532,9 +532,49 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
         while ((delalloc_bytes || ordered_bytes) && loops < 3) {
                 u64 temp = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
                 long nr_pages = min_t(u64, temp, LONG_MAX);
+               int async_pages;
  
                 btrfs_start_delalloc_roots(fs_info, nr_pages, true);
  
+               /*
+                * We need to make sure any outstanding async pages are now
+                * processed before we continue.  This is because things like
+                * sync_inode() try to be smart and skip writing if the inode is
+                * marked clean.  We don't use filemap_fwrite for flushing
+                * because we want to control how many pages we write out at a
+                * time, thus this is the only safe way to make sure we've
+                * waited for outstanding compressed workers to have started
+                * their jobs and thus have ordered extents set up properly.
+                *
+                * This exists because we do not want to wait for each
+                * individual inode to finish its async work, we simply want to
+                * start the IO on everybody, and then come back here and wait
+                * for all of the async work to catch up.  Once we're done with
+                * that we know we'll have ordered extents for everything and we
+                * can decide if we wait for that or not.
+                *
+                * If we choose to replace this in the future, make absolutely
+                * sure that the proper waiting is being done in the async case,
+                * as there have been bugs in that area before.
+                */
+               async_pages = atomic_read(&fs_info->async_delalloc_pages);
+               if (!async_pages)
+                       goto skip_async;
+
+               /*
+                * We don't want to wait forever, if we wrote less pages in this
+                * loop than we have outstanding, only wait for that number of
+                * pages, otherwise we can wait for all async pages to finish
+                * before continuing.
+                */
+               if (async_pages > nr_pages)
+                       async_pages -= nr_pages;
+               else
+                       async_pages = 0;
+               wait_event(fs_info->async_submit_wait,
+                          atomic_read(&fs_info->async_delalloc_pages) <=
+                          async_pages);
+skip_async:
                 loops++;
                 if (wait_ordered && !trans) {
                         btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
author	Josef Bacik <josef@toxicpanda.com>
	Wed, 14 Jul 2021 18:47:21 +0000 (14:47 -0400)
committer	David Sterba <dsterba@suse.com>
	Mon, 23 Aug 2021 11:19:07 +0000 (13:19 +0200)
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/space-info.c		patch \| blob \| history