]> git.baikalelectronics.ru Git - kernel.git/commitdiff
ext4: fix deadlock due to mbcache entry corruption
authorJan Kara <jack@suse.cz>
Wed, 23 Nov 2022 19:39:50 +0000 (20:39 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 18 Jan 2023 10:41:56 +0000 (11:41 +0100)
[ Upstream commit a44e84a9b7764c72896f7241a0ec9ac7e7ef38dd ]

When manipulating xattr blocks, we can deadlock infinitely looping
inside ext4_xattr_block_set() where we constantly keep finding xattr
block for reuse in mbcache but we are unable to reuse it because its
reference count is too big. This happens because cache entry for the
xattr block is marked as reusable (e_reusable set) although its
reference count is too big. When this inconsistency happens, this
inconsistent state is kept indefinitely and so ext4_xattr_block_set()
keeps retrying indefinitely.

The inconsistent state is caused by non-atomic update of e_reusable bit.
e_reusable is part of a bitfield and e_reusable update can race with
update of e_referenced bit in the same bitfield resulting in loss of one
of the updates. Fix the problem by using atomic bitops instead.

This bug has been around for many years, but it became *much* easier
to hit after commit c36112767922 ("ext4: fix race when reusing xattr
blocks").

Cc: stable@vger.kernel.org
Fixes: 61d17f2f0093 ("mbcache: add reusable flag to cache entries")
Fixes: c36112767922 ("ext4: fix race when reusing xattr blocks")
Reported-and-tested-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
Reported-by: Thilo Fromm <t-lo@linux.microsoft.com>
Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/ext4/xattr.c
fs/mbcache.c
include/linux/mbcache.h

index 131de3fcd2bef1d904cb8b2b4f4ae2992bfea6b7..78df2d65998e659f76c575d4becb26e00f714e67 100644 (file)
@@ -1293,7 +1293,7 @@ retry_ref:
                                ce = mb_cache_entry_get(ea_block_cache, hash,
                                                        bh->b_blocknr);
                                if (ce) {
-                                       ce->e_reusable = 1;
+                                       set_bit(MBE_REUSABLE_B, &ce->e_flags);
                                        mb_cache_entry_put(ea_block_cache, ce);
                                }
                        }
@@ -2054,7 +2054,7 @@ inserted:
                                }
                                BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
                                if (ref == EXT4_XATTR_REFCOUNT_MAX)
-                                       ce->e_reusable = 0;
+                                       clear_bit(MBE_REUSABLE_B, &ce->e_flags);
                                ea_bdebug(new_bh, "reusing; refcount now=%d",
                                          ref);
                                ext4_xattr_block_csum_set(inode, new_bh);
index 950f1829a7fd0d774d0d4f2e82a048531a0941fc..7a12ae87c80637e3923ecf5d125d0520a2a9f038 100644 (file)
@@ -94,8 +94,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
        atomic_set(&entry->e_refcnt, 1);
        entry->e_key = key;
        entry->e_value = value;
-       entry->e_reusable = reusable;
-       entry->e_referenced = 0;
+       entry->e_flags = 0;
+       if (reusable)
+               set_bit(MBE_REUSABLE_B, &entry->e_flags);
        head = mb_cache_entry_head(cache, key);
        hlist_bl_lock(head);
        hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
@@ -162,7 +163,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
        while (node) {
                entry = hlist_bl_entry(node, struct mb_cache_entry,
                                       e_hash_list);
-               if (entry->e_key == key && entry->e_reusable &&
+               if (entry->e_key == key &&
+                   test_bit(MBE_REUSABLE_B, &entry->e_flags) &&
                    atomic_inc_not_zero(&entry->e_refcnt))
                        goto out;
                node = node->next;
@@ -318,7 +320,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
 void mb_cache_entry_touch(struct mb_cache *cache,
                          struct mb_cache_entry *entry)
 {
-       entry->e_referenced = 1;
+       set_bit(MBE_REFERENCED_B, &entry->e_flags);
 }
 EXPORT_SYMBOL(mb_cache_entry_touch);
 
@@ -343,9 +345,9 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
                entry = list_first_entry(&cache->c_list,
                                         struct mb_cache_entry, e_list);
                /* Drop initial hash reference if there is no user */
-               if (entry->e_referenced ||
+               if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
                    atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
-                       entry->e_referenced = 0;
+                       clear_bit(MBE_REFERENCED_B, &entry->e_flags);
                        list_move_tail(&entry->e_list, &cache->c_list);
                        continue;
                }
index e9d5ece8779492260097ba637d487fb362292dfa..591bc4cefe1d6b916ba7e1107be82d623146e3a9 100644 (file)
 
 struct mb_cache;
 
+/* Cache entry flags */
+enum {
+       MBE_REFERENCED_B = 0,
+       MBE_REUSABLE_B
+};
+
 struct mb_cache_entry {
        /* List of entries in cache - protected by cache->c_list_lock */
        struct list_head        e_list;
@@ -26,8 +32,7 @@ struct mb_cache_entry {
        atomic_t                e_refcnt;
        /* Key in hash - stable during lifetime of the entry */
        u32                     e_key;
-       u32                     e_referenced:1;
-       u32                     e_reusable:1;
+       unsigned long           e_flags;
        /* User provided value - stable during lifetime of the entry */
        u64                     e_value;
 };