]> git.baikalelectronics.ru Git - kernel.git/commitdiff
btrfs: replace cleaner_delayed_iput_mutex with a waitqueue
authorJosef Bacik <josef@toxicpanda.com>
Mon, 3 Dec 2018 16:06:52 +0000 (11:06 -0500)
committerDavid Sterba <dsterba@suse.com>
Mon, 25 Feb 2019 13:13:29 +0000 (14:13 +0100)
The throttle path doesn't take cleaner_delayed_iput_mutex, which means
we could think we're done flushing iputs in the data space reservation
path when we could have a throttler doing an iput.  There's no real
reason to serialize the delayed iput flushing, so instead of taking the
cleaner_delayed_iput_mutex whenever we flush the delayed iputs just
replace it with an atomic counter and a waitqueue.  This removes the
short (or long depending on how big the inode is) window where we think
there are no more pending iputs when there really are some.

The waiting is killable as it could be indirectly called from user
operations like fallocate or zero-range. Such call sites should handle
the error but otherwise it's not necessary. Eg. flush_space just needs
to attempt to make space by waiting on iputs.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
[ add killable comment and changelog parts ]
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c

index 2399f56d8a544f6891716d0599b94c730fffc00c..6e0fd98c6bd9e72508071d22a40f417412f64f74 100644 (file)
@@ -934,7 +934,8 @@ struct btrfs_fs_info {
 
        spinlock_t delayed_iput_lock;
        struct list_head delayed_iputs;
-       struct mutex cleaner_delayed_iput_mutex;
+       atomic_t nr_delayed_iputs;
+       wait_queue_head_t delayed_iputs_wait;
 
        /* this protects tree_mod_seq_list */
        spinlock_t tree_mod_seq_lock;
@@ -3282,6 +3283,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root);
 int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
 void btrfs_add_delayed_iput(struct inode *inode);
 void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
+int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
 int btrfs_prealloc_file_range(struct inode *inode, int mode,
                              u64 start, u64 num_bytes, u64 min_size,
                              loff_t actual_len, u64 *alloc_hint);
index 4047867473e16ead5445ed2589525687b229aec2..8c0038de73ee2ebdabf3d890159088a5c0a3608e 100644 (file)
@@ -1717,9 +1717,7 @@ static int cleaner_kthread(void *arg)
                        goto sleep;
                }
 
-               mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
                btrfs_run_delayed_iputs(fs_info);
-               mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
 
                again = btrfs_clean_one_deleted_snapshot(root);
                mutex_unlock(&fs_info->cleaner_mutex);
@@ -2676,7 +2674,6 @@ int open_ctree(struct super_block *sb,
        mutex_init(&fs_info->delete_unused_bgs_mutex);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
-       mutex_init(&fs_info->cleaner_delayed_iput_mutex);
        seqlock_init(&fs_info->profiles_lock);
 
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2698,6 +2695,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->defrag_running, 0);
        atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic_set(&fs_info->reada_works_cnt, 0);
+       atomic_set(&fs_info->nr_delayed_iputs, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
        fs_info->sb = sb;
        fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
@@ -2775,6 +2773,7 @@ int open_ctree(struct super_block *sb,
        init_waitqueue_head(&fs_info->transaction_wait);
        init_waitqueue_head(&fs_info->transaction_blocked_wait);
        init_waitqueue_head(&fs_info->async_submit_wait);
+       init_waitqueue_head(&fs_info->delayed_iputs_wait);
 
        INIT_LIST_HEAD(&fs_info->pinned_chunks);
 
index 994c71c9eb7051a947a404558a3b7ecc95fc4b3c..f72935646fb154903abb0fd5f6863ad98194e117 100644 (file)
@@ -4279,10 +4279,14 @@ commit_trans:
                                /*
                                 * The cleaner kthread might still be doing iput
                                 * operations. Wait for it to finish so that
-                                * more space is released.
+                                * more space is released.  We don't need to
+                                * explicitly run the delayed iputs here because
+                                * the commit_transaction would have woken up
+                                * the cleaner.
                                 */
-                               mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
-                               mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+                               ret = btrfs_wait_on_delayed_iputs(fs_info);
+                               if (ret)
+                                       return ret;
                                goto again;
                        } else {
                                btrfs_end_transaction(trans);
@@ -4967,9 +4971,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                 * bunch of pinned space, so make sure we run the iputs before
                 * we do our pinned bytes check below.
                 */
-               mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
                btrfs_run_delayed_iputs(fs_info);
-               mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+               btrfs_wait_on_delayed_iputs(fs_info);
 
                ret = may_commit_transaction(fs_info, space_info);
                break;
index 4a0da2d7758b96bf9c0027d2281b9ef35eb1c119..a443645cf8153f57f902e4f7ff686b97f54682ec 100644 (file)
@@ -3256,6 +3256,7 @@ void btrfs_add_delayed_iput(struct inode *inode)
        if (atomic_add_unless(&inode->i_count, -1, 1))
                return;
 
+       atomic_inc(&fs_info->nr_delayed_iputs);
        spin_lock(&fs_info->delayed_iput_lock);
        ASSERT(list_empty(&binode->delayed_iput));
        list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
@@ -3276,11 +3277,32 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
                list_del_init(&inode->delayed_iput);
                spin_unlock(&fs_info->delayed_iput_lock);
                iput(&inode->vfs_inode);
+               if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
+                       wake_up(&fs_info->delayed_iputs_wait);
                spin_lock(&fs_info->delayed_iput_lock);
        }
        spin_unlock(&fs_info->delayed_iput_lock);
 }
 
+/**
+ * btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running
+ * @fs_info - the fs_info for this fs
+ * @return - EINTR if we were killed, 0 if nothing's pending
+ *
+ * This will wait on any delayed iputs that are currently running with KILLABLE
+ * set.  Once they are all done running we will return, unless we are killed in
+ * which case we return EINTR. This helps in user operations like fallocate etc
+ * that might get blocked on the iputs.
+ */
+int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
+{
+       int ret = wait_event_killable(fs_info->delayed_iputs_wait,
+                       atomic_read(&fs_info->nr_delayed_iputs) == 0);
+       if (ret)
+               return -EINTR;
+       return 0;
+}
+
 /*
  * This creates an orphan entry for the given inode in case something goes wrong
  * in the middle of an unlink.